##// END OF EJS Templates
revlog: improve the robustness of the splitting process...
marmoute -
r51242:87f0155d stable
parent child Browse files
Show More
@@ -1,632 +1,632 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 from .i18n import _
10 10 from .node import (
11 11 bin,
12 12 hex,
13 13 )
14 14 from .thirdparty import attr
15 15
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 metadata,
20 20 pycompat,
21 21 revlog,
22 22 )
23 23 from .utils import (
24 24 dateutil,
25 25 stringutil,
26 26 )
27 27 from .revlogutils import (
28 28 constants as revlog_constants,
29 29 flagutil,
30 30 )
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 86 return b"\0".join(items)
87 87
88 88
89 89 def stripdesc(desc):
90 90 """strip trailing whitespace and leading and trailing empty lines"""
91 91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 92
93 93
94 94 class appender:
95 95 """the changelog index must be updated last on disk, so we use this class
96 96 to delay writes to it"""
97 97
98 98 def __init__(self, vfs, name, mode, buf):
99 99 self.data = buf
100 100 fp = vfs(name, mode)
101 101 self.fp = fp
102 102 self.offset = fp.tell()
103 103 self.size = vfs.fstat(fp).st_size
104 104 self._end = self.size
105 105
106 106 def end(self):
107 107 return self._end
108 108
109 109 def tell(self):
110 110 return self.offset
111 111
112 112 def flush(self):
113 113 pass
114 114
115 115 @property
116 116 def closed(self):
117 117 return self.fp.closed
118 118
119 119 def close(self):
120 120 self.fp.close()
121 121
122 122 def seek(self, offset, whence=0):
123 123 '''virtual file offset spans real file and data'''
124 124 if whence == 0:
125 125 self.offset = offset
126 126 elif whence == 1:
127 127 self.offset += offset
128 128 elif whence == 2:
129 129 self.offset = self.end() + offset
130 130 if self.offset < self.size:
131 131 self.fp.seek(self.offset)
132 132
133 133 def read(self, count=-1):
134 134 '''only trick here is reads that span real file and data'''
135 135 ret = b""
136 136 if self.offset < self.size:
137 137 s = self.fp.read(count)
138 138 ret = s
139 139 self.offset += len(s)
140 140 if count > 0:
141 141 count -= len(s)
142 142 if count != 0:
143 143 doff = self.offset - self.size
144 144 self.data.insert(0, b"".join(self.data))
145 145 del self.data[1:]
146 146 s = self.data[0][doff : doff + count]
147 147 self.offset += len(s)
148 148 ret += s
149 149 return ret
150 150
151 151 def write(self, s):
152 152 self.data.append(bytes(s))
153 153 self.offset += len(s)
154 154 self._end += len(s)
155 155
156 156 def __enter__(self):
157 157 self.fp.__enter__()
158 158 return self
159 159
160 160 def __exit__(self, *args):
161 161 return self.fp.__exit__(*args)
162 162
163 163
164 164 class _divertopener:
165 165 def __init__(self, opener, target):
166 166 self._opener = opener
167 167 self._target = target
168 168
169 169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 170 if name != self._target:
171 171 return self._opener(name, mode, **kwargs)
172 172 return self._opener(name + b".a", mode, **kwargs)
173 173
174 174 def __getattr__(self, attr):
175 175 return getattr(self._opener, attr)
176 176
177 177
178 178 def _delayopener(opener, target, buf):
179 179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 180
181 181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 182 if name != target:
183 183 return opener(name, mode, **kwargs)
184 184 assert not kwargs
185 185 return appender(opener, name, mode, buf)
186 186
187 187 return _delay
188 188
189 189
190 190 @attr.s
191 191 class _changelogrevision:
192 192 # Extensions might modify _defaultextra, so let the constructor below pass
193 193 # it in
194 194 extra = attr.ib()
195 195 manifest = attr.ib()
196 196 user = attr.ib(default=b'')
197 197 date = attr.ib(default=(0, 0))
198 198 files = attr.ib(default=attr.Factory(list))
199 199 filesadded = attr.ib(default=None)
200 200 filesremoved = attr.ib(default=None)
201 201 p1copies = attr.ib(default=None)
202 202 p2copies = attr.ib(default=None)
203 203 description = attr.ib(default=b'')
204 204 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
205 205
206 206
207 207 class changelogrevision:
208 208 """Holds results of a parsed changelog revision.
209 209
210 210 Changelog revisions consist of multiple pieces of data, including
211 211 the manifest node, user, and date. This object exposes a view into
212 212 the parsed object.
213 213 """
214 214
215 215 __slots__ = (
216 216 '_offsets',
217 217 '_text',
218 218 '_sidedata',
219 219 '_cpsd',
220 220 '_changes',
221 221 )
222 222
223 223 def __new__(cls, cl, text, sidedata, cpsd):
224 224 if not text:
225 225 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
226 226
227 227 self = super(changelogrevision, cls).__new__(cls)
228 228 # We could return here and implement the following as an __init__.
229 229 # But doing it here is equivalent and saves an extra function call.
230 230
231 231 # format used:
232 232 # nodeid\n : manifest node in ascii
233 233 # user\n : user, no \n or \r allowed
234 234 # time tz extra\n : date (time is int or float, timezone is int)
235 235 # : extra is metadata, encoded and separated by '\0'
236 236 # : older versions ignore it
237 237 # files\n\n : files modified by the cset, no \n or \r allowed
238 238 # (.*) : comment (free text, ideally utf-8)
239 239 #
240 240 # changelog v0 doesn't use extra
241 241
242 242 nl1 = text.index(b'\n')
243 243 nl2 = text.index(b'\n', nl1 + 1)
244 244 nl3 = text.index(b'\n', nl2 + 1)
245 245
246 246 # The list of files may be empty. Which means nl3 is the first of the
247 247 # double newline that precedes the description.
248 248 if text[nl3 + 1 : nl3 + 2] == b'\n':
249 249 doublenl = nl3
250 250 else:
251 251 doublenl = text.index(b'\n\n', nl3 + 1)
252 252
253 253 self._offsets = (nl1, nl2, nl3, doublenl)
254 254 self._text = text
255 255 self._sidedata = sidedata
256 256 self._cpsd = cpsd
257 257 self._changes = None
258 258
259 259 return self
260 260
261 261 @property
262 262 def manifest(self):
263 263 return bin(self._text[0 : self._offsets[0]])
264 264
265 265 @property
266 266 def user(self):
267 267 off = self._offsets
268 268 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
269 269
270 270 @property
271 271 def _rawdate(self):
272 272 off = self._offsets
273 273 dateextra = self._text[off[1] + 1 : off[2]]
274 274 return dateextra.split(b' ', 2)[0:2]
275 275
276 276 @property
277 277 def _rawextra(self):
278 278 off = self._offsets
279 279 dateextra = self._text[off[1] + 1 : off[2]]
280 280 fields = dateextra.split(b' ', 2)
281 281 if len(fields) != 3:
282 282 return None
283 283
284 284 return fields[2]
285 285
286 286 @property
287 287 def date(self):
288 288 raw = self._rawdate
289 289 time = float(raw[0])
290 290 # Various tools did silly things with the timezone.
291 291 try:
292 292 timezone = int(raw[1])
293 293 except ValueError:
294 294 timezone = 0
295 295
296 296 return time, timezone
297 297
298 298 @property
299 299 def extra(self):
300 300 raw = self._rawextra
301 301 if raw is None:
302 302 return _defaultextra
303 303
304 304 return decodeextra(raw)
305 305
306 306 @property
307 307 def changes(self):
308 308 if self._changes is not None:
309 309 return self._changes
310 310 if self._cpsd:
311 311 changes = metadata.decode_files_sidedata(self._sidedata)
312 312 else:
313 313 changes = metadata.ChangingFiles(
314 314 touched=self.files or (),
315 315 added=self.filesadded or (),
316 316 removed=self.filesremoved or (),
317 317 p1_copies=self.p1copies or {},
318 318 p2_copies=self.p2copies or {},
319 319 )
320 320 self._changes = changes
321 321 return changes
322 322
323 323 @property
324 324 def files(self):
325 325 if self._cpsd:
326 326 return sorted(self.changes.touched)
327 327 off = self._offsets
328 328 if off[2] == off[3]:
329 329 return []
330 330
331 331 return self._text[off[2] + 1 : off[3]].split(b'\n')
332 332
333 333 @property
334 334 def filesadded(self):
335 335 if self._cpsd:
336 336 return self.changes.added
337 337 else:
338 338 rawindices = self.extra.get(b'filesadded')
339 339 if rawindices is None:
340 340 return None
341 341 return metadata.decodefileindices(self.files, rawindices)
342 342
343 343 @property
344 344 def filesremoved(self):
345 345 if self._cpsd:
346 346 return self.changes.removed
347 347 else:
348 348 rawindices = self.extra.get(b'filesremoved')
349 349 if rawindices is None:
350 350 return None
351 351 return metadata.decodefileindices(self.files, rawindices)
352 352
353 353 @property
354 354 def p1copies(self):
355 355 if self._cpsd:
356 356 return self.changes.copied_from_p1
357 357 else:
358 358 rawcopies = self.extra.get(b'p1copies')
359 359 if rawcopies is None:
360 360 return None
361 361 return metadata.decodecopies(self.files, rawcopies)
362 362
363 363 @property
364 364 def p2copies(self):
365 365 if self._cpsd:
366 366 return self.changes.copied_from_p2
367 367 else:
368 368 rawcopies = self.extra.get(b'p2copies')
369 369 if rawcopies is None:
370 370 return None
371 371 return metadata.decodecopies(self.files, rawcopies)
372 372
373 373 @property
374 374 def description(self):
375 375 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
376 376
377 377 @property
378 378 def branchinfo(self):
379 379 extra = self.extra
380 380 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
381 381
382 382
383 383 class changelog(revlog.revlog):
384 384 def __init__(self, opener, trypending=False, concurrencychecker=None):
385 385 """Load a changelog revlog using an opener.
386 386
387 387 If ``trypending`` is true, we attempt to load the index from a
388 388 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 389 The ``00changelog.i.a`` file contains index (and possibly inline
390 390 revision) data for a transaction that hasn't been finalized yet.
391 391 It exists in a separate file to facilitate readers (such as
392 392 hooks processes) accessing data before a transaction is finalized.
393 393
394 394 ``concurrencychecker`` will be passed to the revlog init function, see
395 395 the documentation there.
396 396 """
397 397 revlog.revlog.__init__(
398 398 self,
399 399 opener,
400 400 target=(revlog_constants.KIND_CHANGELOG, None),
401 401 radix=b'00changelog',
402 402 checkambig=True,
403 403 mmaplargeindex=True,
404 404 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
405 405 concurrencychecker=concurrencychecker,
406 406 trypending=trypending,
407 407 )
408 408
409 409 if self._initempty and (self._format_version == revlog.REVLOGV1):
410 410 # changelogs don't benefit from generaldelta.
411 411
412 412 self._format_flags &= ~revlog.FLAG_GENERALDELTA
413 413 self._generaldelta = False
414 414
415 415 # Delta chains for changelogs tend to be very small because entries
416 416 # tend to be small and don't delta well with each. So disable delta
417 417 # chains.
418 418 self._storedeltachains = False
419 419
420 420 self._realopener = opener
421 421 self._delayed = False
422 422 self._delaybuf = None
423 423 self._divert = False
424 424 self._filteredrevs = frozenset()
425 425 self._filteredrevs_hashcache = {}
426 426 self._copiesstorage = opener.options.get(b'copies-storage')
427 427
428 428 @property
429 429 def filteredrevs(self):
430 430 return self._filteredrevs
431 431
432 432 @filteredrevs.setter
433 433 def filteredrevs(self, val):
434 434 # Ensure all updates go through this function
435 435 assert isinstance(val, frozenset)
436 436 self._filteredrevs = val
437 437 self._filteredrevs_hashcache = {}
438 438
439 439 def _write_docket(self, tr):
440 440 if not self._delayed:
441 441 super(changelog, self)._write_docket(tr)
442 442
443 443 def delayupdate(self, tr):
444 444 """delay visibility of index updates to other readers"""
445 445 if self._docket is None and not self._delayed:
446 446 if len(self) == 0:
447 447 self._divert = True
448 448 if self._realopener.exists(self._indexfile + b'.a'):
449 449 self._realopener.unlink(self._indexfile + b'.a')
450 450 self.opener = _divertopener(self._realopener, self._indexfile)
451 451 else:
452 452 self._delaybuf = []
453 453 self.opener = _delayopener(
454 454 self._realopener, self._indexfile, self._delaybuf
455 455 )
456 456 self._segmentfile.opener = self.opener
457 457 self._segmentfile_sidedata.opener = self.opener
458 458 self._delayed = True
459 459 tr.addpending(b'cl-%i' % id(self), self._writepending)
460 460 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
461 461
462 462 def _finalize(self, tr):
463 463 """finalize index updates"""
464 464 self._delayed = False
465 465 self.opener = self._realopener
466 466 self._segmentfile.opener = self.opener
467 467 self._segmentfile_sidedata.opener = self.opener
468 468 # move redirected index data back into place
469 469 if self._docket is not None:
470 470 self._write_docket(tr)
471 471 elif self._divert:
472 472 assert not self._delaybuf
473 473 tmpname = self._indexfile + b".a"
474 474 nfile = self.opener.open(tmpname)
475 475 nfile.close()
476 476 self.opener.rename(tmpname, self._indexfile, checkambig=True)
477 477 elif self._delaybuf:
478 478 fp = self.opener(self._indexfile, b'a', checkambig=True)
479 479 fp.write(b"".join(self._delaybuf))
480 480 fp.close()
481 481 self._delaybuf = None
482 482 self._divert = False
483 483 # split when we're done
484 self._enforceinlinesize(tr)
484 self._enforceinlinesize(tr, side_write=False)
485 485
486 486 def _writepending(self, tr):
487 487 """create a file containing the unfinalized state for
488 488 pretxnchangegroup"""
489 489 if self._docket:
490 490 return self._docket.write(tr, pending=True)
491 491 if self._delaybuf:
492 492 # make a temporary copy of the index
493 493 fp1 = self._realopener(self._indexfile)
494 494 pendingfilename = self._indexfile + b".a"
495 495 # register as a temp file to ensure cleanup on failure
496 496 tr.registertmp(pendingfilename)
497 497 # write existing data
498 498 fp2 = self._realopener(pendingfilename, b"w")
499 499 fp2.write(fp1.read())
500 500 # add pending data
501 501 fp2.write(b"".join(self._delaybuf))
502 502 fp2.close()
503 503 # switch modes so finalize can simply rename
504 504 self._delaybuf = None
505 505 self._divert = True
506 506 self.opener = _divertopener(self._realopener, self._indexfile)
507 507 self._segmentfile.opener = self.opener
508 508 self._segmentfile_sidedata.opener = self.opener
509 509
510 510 if self._divert:
511 511 return True
512 512
513 513 return False
514 514
515 def _enforceinlinesize(self, tr):
515 def _enforceinlinesize(self, tr, side_write=True):
516 516 if not self._delayed:
517 revlog.revlog._enforceinlinesize(self, tr)
517 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
518 518
519 519 def read(self, nodeorrev):
520 520 """Obtain data from a parsed changelog revision.
521 521
522 522 Returns a 6-tuple of:
523 523
524 524 - manifest node in binary
525 525 - author/user as a localstr
526 526 - date as a 2-tuple of (time, timezone)
527 527 - list of files
528 528 - commit message as a localstr
529 529 - dict of extra metadata
530 530
531 531 Unless you need to access all fields, consider calling
532 532 ``changelogrevision`` instead, as it is faster for partial object
533 533 access.
534 534 """
535 535 d = self._revisiondata(nodeorrev)
536 536 sidedata = self.sidedata(nodeorrev)
537 537 copy_sd = self._copiesstorage == b'changeset-sidedata'
538 538 c = changelogrevision(self, d, sidedata, copy_sd)
539 539 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
540 540
541 541 def changelogrevision(self, nodeorrev):
542 542 """Obtain a ``changelogrevision`` for a node or revision."""
543 543 text = self._revisiondata(nodeorrev)
544 544 sidedata = self.sidedata(nodeorrev)
545 545 return changelogrevision(
546 546 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
547 547 )
548 548
549 549 def readfiles(self, nodeorrev):
550 550 """
551 551 short version of read that only returns the files modified by the cset
552 552 """
553 553 text = self.revision(nodeorrev)
554 554 if not text:
555 555 return []
556 556 last = text.index(b"\n\n")
557 557 l = text[:last].split(b'\n')
558 558 return l[3:]
559 559
560 560 def add(
561 561 self,
562 562 manifest,
563 563 files,
564 564 desc,
565 565 transaction,
566 566 p1,
567 567 p2,
568 568 user,
569 569 date=None,
570 570 extra=None,
571 571 ):
572 572 # Convert to UTF-8 encoded bytestrings as the very first
573 573 # thing: calling any method on a localstr object will turn it
574 574 # into a str object and the cached UTF-8 string is thus lost.
575 575 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
576 576
577 577 user = user.strip()
578 578 # An empty username or a username with a "\n" will make the
579 579 # revision text contain two "\n\n" sequences -> corrupt
580 580 # repository since read cannot unpack the revision.
581 581 if not user:
582 582 raise error.StorageError(_(b"empty username"))
583 583 if b"\n" in user:
584 584 raise error.StorageError(
585 585 _(b"username %r contains a newline") % pycompat.bytestr(user)
586 586 )
587 587
588 588 desc = stripdesc(desc)
589 589
590 590 if date:
591 591 parseddate = b"%d %d" % dateutil.parsedate(date)
592 592 else:
593 593 parseddate = b"%d %d" % dateutil.makedate()
594 594 if extra:
595 595 branch = extra.get(b"branch")
596 596 if branch in (b"default", b""):
597 597 del extra[b"branch"]
598 598 elif branch in (b".", b"null", b"tip"):
599 599 raise error.StorageError(
600 600 _(b'the name \'%s\' is reserved') % branch
601 601 )
602 602 sortedfiles = sorted(files.touched)
603 603 flags = 0
604 604 sidedata = None
605 605 if self._copiesstorage == b'changeset-sidedata':
606 606 if files.has_copies_info:
607 607 flags |= flagutil.REVIDX_HASCOPIESINFO
608 608 sidedata = metadata.encode_files_sidedata(files)
609 609
610 610 if extra:
611 611 extra = encodeextra(extra)
612 612 parseddate = b"%s %s" % (parseddate, extra)
613 613 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
614 614 text = b"\n".join(l)
615 615 rev = self.addrevision(
616 616 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
617 617 )
618 618 return self.node(rev)
619 619
620 620 def branchinfo(self, rev):
621 621 """return the branch name and open/close state of a revision
622 622
623 623 This function exists because creating a changectx object
624 624 just to access this is costly."""
625 625 return self.changelogrevision(rev).branchinfo
626 626
627 627 def _nodeduplicatecallback(self, transaction, rev):
628 628 # keep track of revisions that got "re-added", eg: unbunde of know rev.
629 629 #
630 630 # We track them in a list to preserve their order from the source bundle
631 631 duplicates = transaction.changes.setdefault(b'revduplicates', [])
632 632 duplicates.append(rev)
@@ -1,303 +1,304 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 from .i18n import _
10 10 from .node import nullrev
11 11 from . import (
12 12 error,
13 13 revlog,
14 14 )
15 15 from .interfaces import (
16 16 repository,
17 17 util as interfaceutil,
18 18 )
19 19 from .utils import storageutil
20 20 from .revlogutils import (
21 21 constants as revlog_constants,
22 22 rewrite,
23 23 )
24 24
25 25
26 26 @interfaceutil.implementer(repository.ifilestorage)
27 27 class filelog:
28 def __init__(self, opener, path):
28 def __init__(self, opener, path, try_split=False):
29 29 self._revlog = revlog.revlog(
30 30 opener,
31 31 # XXX should use the unencoded path
32 32 target=(revlog_constants.KIND_FILELOG, path),
33 33 radix=b'/'.join((b'data', path)),
34 34 censorable=True,
35 35 canonical_parent_order=False, # see comment in revlog.py
36 try_split=try_split,
36 37 )
37 38 # Full name of the user visible file, relative to the repository root.
38 39 # Used by LFS.
39 40 self._revlog.filename = path
40 41 self.nullid = self._revlog.nullid
41 42 opts = opener.options
42 43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
43 44
44 45 def __len__(self):
45 46 return len(self._revlog)
46 47
47 48 def __iter__(self):
48 49 return self._revlog.__iter__()
49 50
50 51 def hasnode(self, node):
51 52 if node in (self.nullid, nullrev):
52 53 return False
53 54
54 55 try:
55 56 self._revlog.rev(node)
56 57 return True
57 58 except (TypeError, ValueError, IndexError, error.LookupError):
58 59 return False
59 60
60 61 def revs(self, start=0, stop=None):
61 62 return self._revlog.revs(start=start, stop=stop)
62 63
63 64 def parents(self, node):
64 65 return self._revlog.parents(node)
65 66
66 67 def parentrevs(self, rev):
67 68 return self._revlog.parentrevs(rev)
68 69
69 70 def rev(self, node):
70 71 return self._revlog.rev(node)
71 72
72 73 def node(self, rev):
73 74 return self._revlog.node(rev)
74 75
75 76 def lookup(self, node):
76 77 return storageutil.fileidlookup(
77 78 self._revlog, node, self._revlog.display_id
78 79 )
79 80
80 81 def linkrev(self, rev):
81 82 return self._revlog.linkrev(rev)
82 83
83 84 def commonancestorsheads(self, node1, node2):
84 85 return self._revlog.commonancestorsheads(node1, node2)
85 86
86 87 # Used by dagop.blockdescendants().
87 88 def descendants(self, revs):
88 89 return self._revlog.descendants(revs)
89 90
90 91 def heads(self, start=None, stop=None):
91 92 return self._revlog.heads(start, stop)
92 93
93 94 # Used by hgweb, children extension.
94 95 def children(self, node):
95 96 return self._revlog.children(node)
96 97
97 98 def iscensored(self, rev):
98 99 return self._revlog.iscensored(rev)
99 100
100 101 def revision(self, node, _df=None):
101 102 return self._revlog.revision(node, _df=_df)
102 103
103 104 def rawdata(self, node, _df=None):
104 105 return self._revlog.rawdata(node, _df=_df)
105 106
106 107 def emitrevisions(
107 108 self,
108 109 nodes,
109 110 nodesorder=None,
110 111 revisiondata=False,
111 112 assumehaveparentrevisions=False,
112 113 deltamode=repository.CG_DELTAMODE_STD,
113 114 sidedata_helpers=None,
114 115 debug_info=None,
115 116 ):
116 117 return self._revlog.emitrevisions(
117 118 nodes,
118 119 nodesorder=nodesorder,
119 120 revisiondata=revisiondata,
120 121 assumehaveparentrevisions=assumehaveparentrevisions,
121 122 deltamode=deltamode,
122 123 sidedata_helpers=sidedata_helpers,
123 124 debug_info=debug_info,
124 125 )
125 126
126 127 def addrevision(
127 128 self,
128 129 revisiondata,
129 130 transaction,
130 131 linkrev,
131 132 p1,
132 133 p2,
133 134 node=None,
134 135 flags=revlog.REVIDX_DEFAULT_FLAGS,
135 136 cachedelta=None,
136 137 ):
137 138 return self._revlog.addrevision(
138 139 revisiondata,
139 140 transaction,
140 141 linkrev,
141 142 p1,
142 143 p2,
143 144 node=node,
144 145 flags=flags,
145 146 cachedelta=cachedelta,
146 147 )
147 148
148 149 def addgroup(
149 150 self,
150 151 deltas,
151 152 linkmapper,
152 153 transaction,
153 154 addrevisioncb=None,
154 155 duplicaterevisioncb=None,
155 156 maybemissingparents=False,
156 157 debug_info=None,
157 158 delta_base_reuse_policy=None,
158 159 ):
159 160 if maybemissingparents:
160 161 raise error.Abort(
161 162 _(
162 163 b'revlog storage does not support missing '
163 164 b'parents write mode'
164 165 )
165 166 )
166 167
167 168 with self._revlog._writing(transaction):
168 169
169 170 if self._fix_issue6528:
170 171 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
171 172
172 173 return self._revlog.addgroup(
173 174 deltas,
174 175 linkmapper,
175 176 transaction,
176 177 addrevisioncb=addrevisioncb,
177 178 duplicaterevisioncb=duplicaterevisioncb,
178 179 debug_info=debug_info,
179 180 delta_base_reuse_policy=delta_base_reuse_policy,
180 181 )
181 182
182 183 def getstrippoint(self, minlink):
183 184 return self._revlog.getstrippoint(minlink)
184 185
185 186 def strip(self, minlink, transaction):
186 187 return self._revlog.strip(minlink, transaction)
187 188
188 189 def censorrevision(self, tr, node, tombstone=b''):
189 190 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
190 191
191 192 def files(self):
192 193 return self._revlog.files()
193 194
194 195 def read(self, node):
195 196 return storageutil.filtermetadata(self.revision(node))
196 197
197 198 def add(self, text, meta, transaction, link, p1=None, p2=None):
198 199 if meta or text.startswith(b'\1\n'):
199 200 text = storageutil.packmeta(meta, text)
200 201 rev = self.addrevision(text, transaction, link, p1, p2)
201 202 return self.node(rev)
202 203
203 204 def renamed(self, node):
204 205 return storageutil.filerevisioncopied(self, node)
205 206
206 207 def size(self, rev):
207 208 """return the size of a given revision"""
208 209
209 210 # for revisions with renames, we have to go the slow way
210 211 node = self.node(rev)
211 212 if self.iscensored(rev):
212 213 return 0
213 214 if self.renamed(node):
214 215 return len(self.read(node))
215 216
216 217 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
217 218 # XXX See also basefilectx.cmp.
218 219 return self._revlog.size(rev)
219 220
220 221 def cmp(self, node, text):
221 222 """compare text with a given file revision
222 223
223 224 returns True if text is different than what is stored.
224 225 """
225 226 return not storageutil.filedataequivalent(self, node, text)
226 227
227 228 def verifyintegrity(self, state):
228 229 return self._revlog.verifyintegrity(state)
229 230
230 231 def storageinfo(
231 232 self,
232 233 exclusivefiles=False,
233 234 sharedfiles=False,
234 235 revisionscount=False,
235 236 trackedsize=False,
236 237 storedsize=False,
237 238 ):
238 239 return self._revlog.storageinfo(
239 240 exclusivefiles=exclusivefiles,
240 241 sharedfiles=sharedfiles,
241 242 revisionscount=revisionscount,
242 243 trackedsize=trackedsize,
243 244 storedsize=storedsize,
244 245 )
245 246
246 247 # Used by repo upgrade.
247 248 def clone(self, tr, destrevlog, **kwargs):
248 249 if not isinstance(destrevlog, filelog):
249 250 msg = b'expected filelog to clone(), not %r'
250 251 msg %= destrevlog
251 252 raise error.ProgrammingError(msg)
252 253
253 254 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
254 255
255 256
256 257 class narrowfilelog(filelog):
257 258 """Filelog variation to be used with narrow stores."""
258 259
259 def __init__(self, opener, path, narrowmatch):
260 super(narrowfilelog, self).__init__(opener, path)
260 def __init__(self, opener, path, narrowmatch, try_split=False):
261 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
261 262 self._narrowmatch = narrowmatch
262 263
263 264 def renamed(self, node):
264 265 res = super(narrowfilelog, self).renamed(node)
265 266
266 267 # Renames that come from outside the narrowspec are problematic
267 268 # because we may lack the base text for the rename. This can result
268 269 # in code attempting to walk the ancestry or compute a diff
269 270 # encountering a missing revision. We address this by silently
270 271 # removing rename metadata if the source file is outside the
271 272 # narrow spec.
272 273 #
273 274 # A better solution would be to see if the base revision is available,
274 275 # rather than assuming it isn't.
275 276 #
276 277 # An even better solution would be to teach all consumers of rename
277 278 # metadata that the base revision may not be available.
278 279 #
279 280 # TODO consider better ways of doing this.
280 281 if res and not self._narrowmatch(res[0]):
281 282 return None
282 283
283 284 return res
284 285
285 286 def size(self, rev):
286 287 # Because we have a custom renamed() that may lie, we need to call
287 288 # the base renamed() to report accurate results.
288 289 node = self.node(rev)
289 290 if super(narrowfilelog, self).renamed(node):
290 291 return len(self.read(node))
291 292 else:
292 293 return super(narrowfilelog, self).size(rev)
293 294
294 295 def cmp(self, node, text):
295 296 # We don't call `super` because narrow parents can be buggy in case of a
296 297 # ambiguous dirstate. Always take the slow path until there is a better
297 298 # fix, see issue6150.
298 299
299 300 # Censored files compare against the empty file.
300 301 if self.iscensored(self.rev(node)):
301 302 return text != b''
302 303
303 304 return self.read(node) != text
@@ -1,3984 +1,3995 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 # coding: utf-8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 import functools
11 11 import os
12 12 import random
13 13 import re
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from concurrent import futures
19 19 from typing import (
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullrev,
28 28 sha1nodeconstants,
29 29 short,
30 30 )
31 31 from .pycompat import (
32 32 delattr,
33 33 getattr,
34 34 )
35 35 from . import (
36 36 bookmarks,
37 37 branchmap,
38 38 bundle2,
39 39 bundlecaches,
40 40 changegroup,
41 41 color,
42 42 commit,
43 43 context,
44 44 dirstate,
45 45 discovery,
46 46 encoding,
47 47 error,
48 48 exchange,
49 49 extensions,
50 50 filelog,
51 51 hook,
52 52 lock as lockmod,
53 53 match as matchmod,
54 54 mergestate as mergestatemod,
55 55 mergeutil,
56 56 namespaces,
57 57 narrowspec,
58 58 obsolete,
59 59 pathutil,
60 60 phases,
61 61 pushkey,
62 62 pycompat,
63 63 rcutil,
64 64 repoview,
65 65 requirements as requirementsmod,
66 66 revlog,
67 67 revset,
68 68 revsetlang,
69 69 scmutil,
70 70 sparse,
71 71 store as storemod,
72 72 subrepoutil,
73 73 tags as tagsmod,
74 74 transaction,
75 75 txnutil,
76 76 util,
77 77 vfs as vfsmod,
78 78 wireprototypes,
79 79 )
80 80
81 81 from .interfaces import (
82 82 repository,
83 83 util as interfaceutil,
84 84 )
85 85
86 86 from .utils import (
87 87 hashutil,
88 88 procutil,
89 89 stringutil,
90 90 urlutil,
91 91 )
92 92
93 93 from .revlogutils import (
94 94 concurrency_checker as revlogchecker,
95 95 constants as revlogconst,
96 96 sidedata as sidedatamod,
97 97 )
98 98
99 99 release = lockmod.release
100 100 urlerr = util.urlerr
101 101 urlreq = util.urlreq
102 102
103 103 RE_SKIP_DIRSTATE_ROLLBACK = re.compile(
104 104 b"^((dirstate|narrowspec.dirstate).*|branch$)"
105 105 )
106 106
107 107 # set of (path, vfs-location) tuples. vfs-location is:
108 108 # - 'plain for vfs relative paths
109 109 # - '' for svfs relative paths
110 110 _cachedfiles = set()
111 111
112 112
113 113 class _basefilecache(scmutil.filecache):
114 114 """All filecache usage on repo are done for logic that should be unfiltered"""
115 115
116 116 def __get__(self, repo, type=None):
117 117 if repo is None:
118 118 return self
119 119 # proxy to unfiltered __dict__ since filtered repo has no entry
120 120 unfi = repo.unfiltered()
121 121 try:
122 122 return unfi.__dict__[self.sname]
123 123 except KeyError:
124 124 pass
125 125 return super(_basefilecache, self).__get__(unfi, type)
126 126
127 127 def set(self, repo, value):
128 128 return super(_basefilecache, self).set(repo.unfiltered(), value)
129 129
130 130
131 131 class repofilecache(_basefilecache):
132 132 """filecache for files in .hg but outside of .hg/store"""
133 133
134 134 def __init__(self, *paths):
135 135 super(repofilecache, self).__init__(*paths)
136 136 for path in paths:
137 137 _cachedfiles.add((path, b'plain'))
138 138
139 139 def join(self, obj, fname):
140 140 return obj.vfs.join(fname)
141 141
142 142
143 143 class storecache(_basefilecache):
144 144 """filecache for files in the store"""
145 145
146 146 def __init__(self, *paths):
147 147 super(storecache, self).__init__(*paths)
148 148 for path in paths:
149 149 _cachedfiles.add((path, b''))
150 150
151 151 def join(self, obj, fname):
152 152 return obj.sjoin(fname)
153 153
154 154
155 155 class changelogcache(storecache):
156 156 """filecache for the changelog"""
157 157
158 158 def __init__(self):
159 159 super(changelogcache, self).__init__()
160 160 _cachedfiles.add((b'00changelog.i', b''))
161 161 _cachedfiles.add((b'00changelog.n', b''))
162 162
163 163 def tracked_paths(self, obj):
164 164 paths = [self.join(obj, b'00changelog.i')]
165 165 if obj.store.opener.options.get(b'persistent-nodemap', False):
166 166 paths.append(self.join(obj, b'00changelog.n'))
167 167 return paths
168 168
169 169
170 170 class manifestlogcache(storecache):
171 171 """filecache for the manifestlog"""
172 172
173 173 def __init__(self):
174 174 super(manifestlogcache, self).__init__()
175 175 _cachedfiles.add((b'00manifest.i', b''))
176 176 _cachedfiles.add((b'00manifest.n', b''))
177 177
178 178 def tracked_paths(self, obj):
179 179 paths = [self.join(obj, b'00manifest.i')]
180 180 if obj.store.opener.options.get(b'persistent-nodemap', False):
181 181 paths.append(self.join(obj, b'00manifest.n'))
182 182 return paths
183 183
184 184
185 185 class mixedrepostorecache(_basefilecache):
186 186 """filecache for a mix files in .hg/store and outside"""
187 187
188 188 def __init__(self, *pathsandlocations):
189 189 # scmutil.filecache only uses the path for passing back into our
190 190 # join(), so we can safely pass a list of paths and locations
191 191 super(mixedrepostorecache, self).__init__(*pathsandlocations)
192 192 _cachedfiles.update(pathsandlocations)
193 193
194 194 def join(self, obj, fnameandlocation):
195 195 fname, location = fnameandlocation
196 196 if location == b'plain':
197 197 return obj.vfs.join(fname)
198 198 else:
199 199 if location != b'':
200 200 raise error.ProgrammingError(
201 201 b'unexpected location: %s' % location
202 202 )
203 203 return obj.sjoin(fname)
204 204
205 205
206 206 def isfilecached(repo, name):
207 207 """check if a repo has already cached "name" filecache-ed property
208 208
209 209 This returns (cachedobj-or-None, iscached) tuple.
210 210 """
211 211 cacheentry = repo.unfiltered()._filecache.get(name, None)
212 212 if not cacheentry:
213 213 return None, False
214 214 return cacheentry.obj, True
215 215
216 216
217 217 class unfilteredpropertycache(util.propertycache):
218 218 """propertycache that apply to unfiltered repo only"""
219 219
220 220 def __get__(self, repo, type=None):
221 221 unfi = repo.unfiltered()
222 222 if unfi is repo:
223 223 return super(unfilteredpropertycache, self).__get__(unfi)
224 224 return getattr(unfi, self.name)
225 225
226 226
227 227 class filteredpropertycache(util.propertycache):
228 228 """propertycache that must take filtering in account"""
229 229
230 230 def cachevalue(self, obj, value):
231 231 object.__setattr__(obj, self.name, value)
232 232
233 233
234 234 def hasunfilteredcache(repo, name):
235 235 """check if a repo has an unfilteredpropertycache value for <name>"""
236 236 return name in vars(repo.unfiltered())
237 237
238 238
239 239 def unfilteredmethod(orig):
240 240 """decorate method that always need to be run on unfiltered version"""
241 241
242 242 @functools.wraps(orig)
243 243 def wrapper(repo, *args, **kwargs):
244 244 return orig(repo.unfiltered(), *args, **kwargs)
245 245
246 246 return wrapper
247 247
248 248
249 249 moderncaps = {
250 250 b'lookup',
251 251 b'branchmap',
252 252 b'pushkey',
253 253 b'known',
254 254 b'getbundle',
255 255 b'unbundle',
256 256 }
257 257 legacycaps = moderncaps.union({b'changegroupsubset'})
258 258
259 259
260 260 @interfaceutil.implementer(repository.ipeercommandexecutor)
261 261 class localcommandexecutor:
262 262 def __init__(self, peer):
263 263 self._peer = peer
264 264 self._sent = False
265 265 self._closed = False
266 266
267 267 def __enter__(self):
268 268 return self
269 269
270 270 def __exit__(self, exctype, excvalue, exctb):
271 271 self.close()
272 272
273 273 def callcommand(self, command, args):
274 274 if self._sent:
275 275 raise error.ProgrammingError(
276 276 b'callcommand() cannot be used after sendcommands()'
277 277 )
278 278
279 279 if self._closed:
280 280 raise error.ProgrammingError(
281 281 b'callcommand() cannot be used after close()'
282 282 )
283 283
284 284 # We don't need to support anything fancy. Just call the named
285 285 # method on the peer and return a resolved future.
286 286 fn = getattr(self._peer, pycompat.sysstr(command))
287 287
288 288 f = futures.Future()
289 289
290 290 try:
291 291 result = fn(**pycompat.strkwargs(args))
292 292 except Exception:
293 293 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
294 294 else:
295 295 f.set_result(result)
296 296
297 297 return f
298 298
299 299 def sendcommands(self):
300 300 self._sent = True
301 301
302 302 def close(self):
303 303 self._closed = True
304 304
305 305
306 306 @interfaceutil.implementer(repository.ipeercommands)
307 307 class localpeer(repository.peer):
308 308 '''peer for a local repo; reflects only the most recent API'''
309 309
310 310 def __init__(self, repo, caps=None, path=None):
311 311 super(localpeer, self).__init__(repo.ui, path=path)
312 312
313 313 if caps is None:
314 314 caps = moderncaps.copy()
315 315 self._repo = repo.filtered(b'served')
316 316
317 317 if repo._wanted_sidedata:
318 318 formatted = bundle2.format_remote_wanted_sidedata(repo)
319 319 caps.add(b'exp-wanted-sidedata=' + formatted)
320 320
321 321 self._caps = repo._restrictcapabilities(caps)
322 322
323 323 # Begin of _basepeer interface.
324 324
325 325 def url(self):
326 326 return self._repo.url()
327 327
328 328 def local(self):
329 329 return self._repo
330 330
331 331 def canpush(self):
332 332 return True
333 333
334 334 def close(self):
335 335 self._repo.close()
336 336
337 337 # End of _basepeer interface.
338 338
339 339 # Begin of _basewirecommands interface.
340 340
341 341 def branchmap(self):
342 342 return self._repo.branchmap()
343 343
344 344 def capabilities(self):
345 345 return self._caps
346 346
347 347 def clonebundles(self):
348 348 return self._repo.tryread(bundlecaches.CB_MANIFEST_FILE)
349 349
350 350 def debugwireargs(self, one, two, three=None, four=None, five=None):
351 351 """Used to test argument passing over the wire"""
352 352 return b"%s %s %s %s %s" % (
353 353 one,
354 354 two,
355 355 pycompat.bytestr(three),
356 356 pycompat.bytestr(four),
357 357 pycompat.bytestr(five),
358 358 )
359 359
360 360 def getbundle(
361 361 self,
362 362 source,
363 363 heads=None,
364 364 common=None,
365 365 bundlecaps=None,
366 366 remote_sidedata=None,
367 367 **kwargs
368 368 ):
369 369 chunks = exchange.getbundlechunks(
370 370 self._repo,
371 371 source,
372 372 heads=heads,
373 373 common=common,
374 374 bundlecaps=bundlecaps,
375 375 remote_sidedata=remote_sidedata,
376 376 **kwargs
377 377 )[1]
378 378 cb = util.chunkbuffer(chunks)
379 379
380 380 if exchange.bundle2requested(bundlecaps):
381 381 # When requesting a bundle2, getbundle returns a stream to make the
382 382 # wire level function happier. We need to build a proper object
383 383 # from it in local peer.
384 384 return bundle2.getunbundler(self.ui, cb)
385 385 else:
386 386 return changegroup.getunbundler(b'01', cb, None)
387 387
388 388 def heads(self):
389 389 return self._repo.heads()
390 390
391 391 def known(self, nodes):
392 392 return self._repo.known(nodes)
393 393
394 394 def listkeys(self, namespace):
395 395 return self._repo.listkeys(namespace)
396 396
397 397 def lookup(self, key):
398 398 return self._repo.lookup(key)
399 399
400 400 def pushkey(self, namespace, key, old, new):
401 401 return self._repo.pushkey(namespace, key, old, new)
402 402
403 403 def stream_out(self):
404 404 raise error.Abort(_(b'cannot perform stream clone against local peer'))
405 405
406 406 def unbundle(self, bundle, heads, url):
407 407 """apply a bundle on a repo
408 408
409 409 This function handles the repo locking itself."""
410 410 try:
411 411 try:
412 412 bundle = exchange.readbundle(self.ui, bundle, None)
413 413 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
414 414 if util.safehasattr(ret, b'getchunks'):
415 415 # This is a bundle20 object, turn it into an unbundler.
416 416 # This little dance should be dropped eventually when the
417 417 # API is finally improved.
418 418 stream = util.chunkbuffer(ret.getchunks())
419 419 ret = bundle2.getunbundler(self.ui, stream)
420 420 return ret
421 421 except Exception as exc:
422 422 # If the exception contains output salvaged from a bundle2
423 423 # reply, we need to make sure it is printed before continuing
424 424 # to fail. So we build a bundle2 with such output and consume
425 425 # it directly.
426 426 #
427 427 # This is not very elegant but allows a "simple" solution for
428 428 # issue4594
429 429 output = getattr(exc, '_bundle2salvagedoutput', ())
430 430 if output:
431 431 bundler = bundle2.bundle20(self._repo.ui)
432 432 for out in output:
433 433 bundler.addpart(out)
434 434 stream = util.chunkbuffer(bundler.getchunks())
435 435 b = bundle2.getunbundler(self.ui, stream)
436 436 bundle2.processbundle(self._repo, b)
437 437 raise
438 438 except error.PushRaced as exc:
439 439 raise error.ResponseError(
440 440 _(b'push failed:'), stringutil.forcebytestr(exc)
441 441 )
442 442
443 443 # End of _basewirecommands interface.
444 444
445 445 # Begin of peer interface.
446 446
447 447 def commandexecutor(self):
448 448 return localcommandexecutor(self)
449 449
450 450 # End of peer interface.
451 451
452 452
453 453 @interfaceutil.implementer(repository.ipeerlegacycommands)
454 454 class locallegacypeer(localpeer):
455 455 """peer extension which implements legacy methods too; used for tests with
456 456 restricted capabilities"""
457 457
458 458 def __init__(self, repo, path=None):
459 459 super(locallegacypeer, self).__init__(repo, caps=legacycaps, path=path)
460 460
461 461 # Begin of baselegacywirecommands interface.
462 462
463 463 def between(self, pairs):
464 464 return self._repo.between(pairs)
465 465
466 466 def branches(self, nodes):
467 467 return self._repo.branches(nodes)
468 468
469 469 def changegroup(self, nodes, source):
470 470 outgoing = discovery.outgoing(
471 471 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
472 472 )
473 473 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
474 474
475 475 def changegroupsubset(self, bases, heads, source):
476 476 outgoing = discovery.outgoing(
477 477 self._repo, missingroots=bases, ancestorsof=heads
478 478 )
479 479 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
480 480
481 481 # End of baselegacywirecommands interface.
482 482
483 483
484 484 # Functions receiving (ui, features) that extensions can register to impact
485 485 # the ability to load repositories with custom requirements. Only
486 486 # functions defined in loaded extensions are called.
487 487 #
488 488 # The function receives a set of requirement strings that the repository
489 489 # is capable of opening. Functions will typically add elements to the
490 490 # set to reflect that the extension knows how to handle that requirements.
491 491 featuresetupfuncs = set()
492 492
493 493
494 494 def _getsharedvfs(hgvfs, requirements):
495 495 """returns the vfs object pointing to root of shared source
496 496 repo for a shared repository
497 497
498 498 hgvfs is vfs pointing at .hg/ of current repo (shared one)
499 499 requirements is a set of requirements of current repo (shared one)
500 500 """
501 501 # The ``shared`` or ``relshared`` requirements indicate the
502 502 # store lives in the path contained in the ``.hg/sharedpath`` file.
503 503 # This is an absolute path for ``shared`` and relative to
504 504 # ``.hg/`` for ``relshared``.
505 505 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
506 506 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
507 507 sharedpath = util.normpath(hgvfs.join(sharedpath))
508 508
509 509 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
510 510
511 511 if not sharedvfs.exists():
512 512 raise error.RepoError(
513 513 _(b'.hg/sharedpath points to nonexistent directory %s')
514 514 % sharedvfs.base
515 515 )
516 516 return sharedvfs
517 517
518 518
519 519 def _readrequires(vfs, allowmissing):
520 520 """reads the require file present at root of this vfs
521 521 and return a set of requirements
522 522
523 523 If allowmissing is True, we suppress FileNotFoundError if raised"""
524 524 # requires file contains a newline-delimited list of
525 525 # features/capabilities the opener (us) must have in order to use
526 526 # the repository. This file was introduced in Mercurial 0.9.2,
527 527 # which means very old repositories may not have one. We assume
528 528 # a missing file translates to no requirements.
529 529 read = vfs.tryread if allowmissing else vfs.read
530 530 return set(read(b'requires').splitlines())
531 531
532 532
533 533 def makelocalrepository(baseui, path: bytes, intents=None):
534 534 """Create a local repository object.
535 535
536 536 Given arguments needed to construct a local repository, this function
537 537 performs various early repository loading functionality (such as
538 538 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
539 539 the repository can be opened, derives a type suitable for representing
540 540 that repository, and returns an instance of it.
541 541
542 542 The returned object conforms to the ``repository.completelocalrepository``
543 543 interface.
544 544
545 545 The repository type is derived by calling a series of factory functions
546 546 for each aspect/interface of the final repository. These are defined by
547 547 ``REPO_INTERFACES``.
548 548
549 549 Each factory function is called to produce a type implementing a specific
550 550 interface. The cumulative list of returned types will be combined into a
551 551 new type and that type will be instantiated to represent the local
552 552 repository.
553 553
554 554 The factory functions each receive various state that may be consulted
555 555 as part of deriving a type.
556 556
557 557 Extensions should wrap these factory functions to customize repository type
558 558 creation. Note that an extension's wrapped function may be called even if
559 559 that extension is not loaded for the repo being constructed. Extensions
560 560 should check if their ``__name__`` appears in the
561 561 ``extensionmodulenames`` set passed to the factory function and no-op if
562 562 not.
563 563 """
564 564 ui = baseui.copy()
565 565 # Prevent copying repo configuration.
566 566 ui.copy = baseui.copy
567 567
568 568 # Working directory VFS rooted at repository root.
569 569 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
570 570
571 571 # Main VFS for .hg/ directory.
572 572 hgpath = wdirvfs.join(b'.hg')
573 573 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
574 574 # Whether this repository is shared one or not
575 575 shared = False
576 576 # If this repository is shared, vfs pointing to shared repo
577 577 sharedvfs = None
578 578
579 579 # The .hg/ path should exist and should be a directory. All other
580 580 # cases are errors.
581 581 if not hgvfs.isdir():
582 582 try:
583 583 hgvfs.stat()
584 584 except FileNotFoundError:
585 585 pass
586 586 except ValueError as e:
587 587 # Can be raised on Python 3.8 when path is invalid.
588 588 raise error.Abort(
589 589 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
590 590 )
591 591
592 592 raise error.RepoError(_(b'repository %s not found') % path)
593 593
594 594 requirements = _readrequires(hgvfs, True)
595 595 shared = (
596 596 requirementsmod.SHARED_REQUIREMENT in requirements
597 597 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
598 598 )
599 599 storevfs = None
600 600 if shared:
601 601 # This is a shared repo
602 602 sharedvfs = _getsharedvfs(hgvfs, requirements)
603 603 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
604 604 else:
605 605 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
606 606
607 607 # if .hg/requires contains the sharesafe requirement, it means
608 608 # there exists a `.hg/store/requires` too and we should read it
609 609 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
610 610 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
611 611 # is not present, refer checkrequirementscompat() for that
612 612 #
613 613 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
614 614 # repository was shared the old way. We check the share source .hg/requires
615 615 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
616 616 # to be reshared
617 617 hint = _(b"see `hg help config.format.use-share-safe` for more information")
618 618 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
619 619 if (
620 620 shared
621 621 and requirementsmod.SHARESAFE_REQUIREMENT
622 622 not in _readrequires(sharedvfs, True)
623 623 ):
624 624 mismatch_warn = ui.configbool(
625 625 b'share', b'safe-mismatch.source-not-safe.warn'
626 626 )
627 627 mismatch_config = ui.config(
628 628 b'share', b'safe-mismatch.source-not-safe'
629 629 )
630 630 mismatch_verbose_upgrade = ui.configbool(
631 631 b'share', b'safe-mismatch.source-not-safe:verbose-upgrade'
632 632 )
633 633 if mismatch_config in (
634 634 b'downgrade-allow',
635 635 b'allow',
636 636 b'downgrade-abort',
637 637 ):
638 638 # prevent cyclic import localrepo -> upgrade -> localrepo
639 639 from . import upgrade
640 640
641 641 upgrade.downgrade_share_to_non_safe(
642 642 ui,
643 643 hgvfs,
644 644 sharedvfs,
645 645 requirements,
646 646 mismatch_config,
647 647 mismatch_warn,
648 648 mismatch_verbose_upgrade,
649 649 )
650 650 elif mismatch_config == b'abort':
651 651 raise error.Abort(
652 652 _(b"share source does not support share-safe requirement"),
653 653 hint=hint,
654 654 )
655 655 else:
656 656 raise error.Abort(
657 657 _(
658 658 b"share-safe mismatch with source.\nUnrecognized"
659 659 b" value '%s' of `share.safe-mismatch.source-not-safe`"
660 660 b" set."
661 661 )
662 662 % mismatch_config,
663 663 hint=hint,
664 664 )
665 665 else:
666 666 requirements |= _readrequires(storevfs, False)
667 667 elif shared:
668 668 sourcerequires = _readrequires(sharedvfs, False)
669 669 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
670 670 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
671 671 mismatch_warn = ui.configbool(
672 672 b'share', b'safe-mismatch.source-safe.warn'
673 673 )
674 674 mismatch_verbose_upgrade = ui.configbool(
675 675 b'share', b'safe-mismatch.source-safe:verbose-upgrade'
676 676 )
677 677 if mismatch_config in (
678 678 b'upgrade-allow',
679 679 b'allow',
680 680 b'upgrade-abort',
681 681 ):
682 682 # prevent cyclic import localrepo -> upgrade -> localrepo
683 683 from . import upgrade
684 684
685 685 upgrade.upgrade_share_to_safe(
686 686 ui,
687 687 hgvfs,
688 688 storevfs,
689 689 requirements,
690 690 mismatch_config,
691 691 mismatch_warn,
692 692 mismatch_verbose_upgrade,
693 693 )
694 694 elif mismatch_config == b'abort':
695 695 raise error.Abort(
696 696 _(
697 697 b'version mismatch: source uses share-safe'
698 698 b' functionality while the current share does not'
699 699 ),
700 700 hint=hint,
701 701 )
702 702 else:
703 703 raise error.Abort(
704 704 _(
705 705 b"share-safe mismatch with source.\nUnrecognized"
706 706 b" value '%s' of `share.safe-mismatch.source-safe` set."
707 707 )
708 708 % mismatch_config,
709 709 hint=hint,
710 710 )
711 711
712 712 # The .hg/hgrc file may load extensions or contain config options
713 713 # that influence repository construction. Attempt to load it and
714 714 # process any new extensions that it may have pulled in.
715 715 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
716 716 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
717 717 extensions.loadall(ui)
718 718 extensions.populateui(ui)
719 719
720 720 # Set of module names of extensions loaded for this repository.
721 721 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
722 722
723 723 supportedrequirements = gathersupportedrequirements(ui)
724 724
725 725 # We first validate the requirements are known.
726 726 ensurerequirementsrecognized(requirements, supportedrequirements)
727 727
728 728 # Then we validate that the known set is reasonable to use together.
729 729 ensurerequirementscompatible(ui, requirements)
730 730
731 731 # TODO there are unhandled edge cases related to opening repositories with
732 732 # shared storage. If storage is shared, we should also test for requirements
733 733 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
734 734 # that repo, as that repo may load extensions needed to open it. This is a
735 735 # bit complicated because we don't want the other hgrc to overwrite settings
736 736 # in this hgrc.
737 737 #
738 738 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
739 739 # file when sharing repos. But if a requirement is added after the share is
740 740 # performed, thereby introducing a new requirement for the opener, we may
741 741 # will not see that and could encounter a run-time error interacting with
742 742 # that shared store since it has an unknown-to-us requirement.
743 743
744 744 # At this point, we know we should be capable of opening the repository.
745 745 # Now get on with doing that.
746 746
747 747 features = set()
748 748
749 749 # The "store" part of the repository holds versioned data. How it is
750 750 # accessed is determined by various requirements. If `shared` or
751 751 # `relshared` requirements are present, this indicates current repository
752 752 # is a share and store exists in path mentioned in `.hg/sharedpath`
753 753 if shared:
754 754 storebasepath = sharedvfs.base
755 755 cachepath = sharedvfs.join(b'cache')
756 756 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
757 757 else:
758 758 storebasepath = hgvfs.base
759 759 cachepath = hgvfs.join(b'cache')
760 760 wcachepath = hgvfs.join(b'wcache')
761 761
762 762 # The store has changed over time and the exact layout is dictated by
763 763 # requirements. The store interface abstracts differences across all
764 764 # of them.
765 765 store = makestore(
766 766 requirements,
767 767 storebasepath,
768 768 lambda base: vfsmod.vfs(base, cacheaudited=True),
769 769 )
770 770 hgvfs.createmode = store.createmode
771 771
772 772 storevfs = store.vfs
773 773 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
774 774
775 775 if (
776 776 requirementsmod.REVLOGV2_REQUIREMENT in requirements
777 777 or requirementsmod.CHANGELOGV2_REQUIREMENT in requirements
778 778 ):
779 779 features.add(repository.REPO_FEATURE_SIDE_DATA)
780 780 # the revlogv2 docket introduced race condition that we need to fix
781 781 features.discard(repository.REPO_FEATURE_STREAM_CLONE)
782 782
783 783 # The cache vfs is used to manage cache files.
784 784 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
785 785 cachevfs.createmode = store.createmode
786 786 # The cache vfs is used to manage cache files related to the working copy
787 787 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
788 788 wcachevfs.createmode = store.createmode
789 789
790 790 # Now resolve the type for the repository object. We do this by repeatedly
791 791 # calling a factory function to produces types for specific aspects of the
792 792 # repo's operation. The aggregate returned types are used as base classes
793 793 # for a dynamically-derived type, which will represent our new repository.
794 794
795 795 bases = []
796 796 extrastate = {}
797 797
798 798 for iface, fn in REPO_INTERFACES:
799 799 # We pass all potentially useful state to give extensions tons of
800 800 # flexibility.
801 801 typ = fn()(
802 802 ui=ui,
803 803 intents=intents,
804 804 requirements=requirements,
805 805 features=features,
806 806 wdirvfs=wdirvfs,
807 807 hgvfs=hgvfs,
808 808 store=store,
809 809 storevfs=storevfs,
810 810 storeoptions=storevfs.options,
811 811 cachevfs=cachevfs,
812 812 wcachevfs=wcachevfs,
813 813 extensionmodulenames=extensionmodulenames,
814 814 extrastate=extrastate,
815 815 baseclasses=bases,
816 816 )
817 817
818 818 if not isinstance(typ, type):
819 819 raise error.ProgrammingError(
820 820 b'unable to construct type for %s' % iface
821 821 )
822 822
823 823 bases.append(typ)
824 824
825 825 # type() allows you to use characters in type names that wouldn't be
826 826 # recognized as Python symbols in source code. We abuse that to add
827 827 # rich information about our constructed repo.
828 828 name = pycompat.sysstr(
829 829 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
830 830 )
831 831
832 832 cls = type(name, tuple(bases), {})
833 833
834 834 return cls(
835 835 baseui=baseui,
836 836 ui=ui,
837 837 origroot=path,
838 838 wdirvfs=wdirvfs,
839 839 hgvfs=hgvfs,
840 840 requirements=requirements,
841 841 supportedrequirements=supportedrequirements,
842 842 sharedpath=storebasepath,
843 843 store=store,
844 844 cachevfs=cachevfs,
845 845 wcachevfs=wcachevfs,
846 846 features=features,
847 847 intents=intents,
848 848 )
849 849
850 850
851 851 def loadhgrc(
852 852 ui,
853 853 wdirvfs: vfsmod.vfs,
854 854 hgvfs: vfsmod.vfs,
855 855 requirements,
856 856 sharedvfs: Optional[vfsmod.vfs] = None,
857 857 ):
858 858 """Load hgrc files/content into a ui instance.
859 859
860 860 This is called during repository opening to load any additional
861 861 config files or settings relevant to the current repository.
862 862
863 863 Returns a bool indicating whether any additional configs were loaded.
864 864
865 865 Extensions should monkeypatch this function to modify how per-repo
866 866 configs are loaded. For example, an extension may wish to pull in
867 867 configs from alternate files or sources.
868 868
869 869 sharedvfs is vfs object pointing to source repo if the current one is a
870 870 shared one
871 871 """
872 872 if not rcutil.use_repo_hgrc():
873 873 return False
874 874
875 875 ret = False
876 876 # first load config from shared source if we has to
877 877 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
878 878 try:
879 879 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
880 880 ret = True
881 881 except IOError:
882 882 pass
883 883
884 884 try:
885 885 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
886 886 ret = True
887 887 except IOError:
888 888 pass
889 889
890 890 try:
891 891 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
892 892 ret = True
893 893 except IOError:
894 894 pass
895 895
896 896 return ret
897 897
898 898
899 899 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
900 900 """Perform additional actions after .hg/hgrc is loaded.
901 901
902 902 This function is called during repository loading immediately after
903 903 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
904 904
905 905 The function can be used to validate configs, automatically add
906 906 options (including extensions) based on requirements, etc.
907 907 """
908 908
909 909 # Map of requirements to list of extensions to load automatically when
910 910 # requirement is present.
911 911 autoextensions = {
912 912 b'git': [b'git'],
913 913 b'largefiles': [b'largefiles'],
914 914 b'lfs': [b'lfs'],
915 915 }
916 916
917 917 for requirement, names in sorted(autoextensions.items()):
918 918 if requirement not in requirements:
919 919 continue
920 920
921 921 for name in names:
922 922 if not ui.hasconfig(b'extensions', name):
923 923 ui.setconfig(b'extensions', name, b'', source=b'autoload')
924 924
925 925
926 926 def gathersupportedrequirements(ui):
927 927 """Determine the complete set of recognized requirements."""
928 928 # Start with all requirements supported by this file.
929 929 supported = set(localrepository._basesupported)
930 930
931 931 # Execute ``featuresetupfuncs`` entries if they belong to an extension
932 932 # relevant to this ui instance.
933 933 modules = {m.__name__ for n, m in extensions.extensions(ui)}
934 934
935 935 for fn in featuresetupfuncs:
936 936 if fn.__module__ in modules:
937 937 fn(ui, supported)
938 938
939 939 # Add derived requirements from registered compression engines.
940 940 for name in util.compengines:
941 941 engine = util.compengines[name]
942 942 if engine.available() and engine.revlogheader():
943 943 supported.add(b'exp-compression-%s' % name)
944 944 if engine.name() == b'zstd':
945 945 supported.add(requirementsmod.REVLOG_COMPRESSION_ZSTD)
946 946
947 947 return supported
948 948
949 949
950 950 def ensurerequirementsrecognized(requirements, supported):
951 951 """Validate that a set of local requirements is recognized.
952 952
953 953 Receives a set of requirements. Raises an ``error.RepoError`` if there
954 954 exists any requirement in that set that currently loaded code doesn't
955 955 recognize.
956 956
957 957 Returns a set of supported requirements.
958 958 """
959 959 missing = set()
960 960
961 961 for requirement in requirements:
962 962 if requirement in supported:
963 963 continue
964 964
965 965 if not requirement or not requirement[0:1].isalnum():
966 966 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
967 967
968 968 missing.add(requirement)
969 969
970 970 if missing:
971 971 raise error.RequirementError(
972 972 _(b'repository requires features unknown to this Mercurial: %s')
973 973 % b' '.join(sorted(missing)),
974 974 hint=_(
975 975 b'see https://mercurial-scm.org/wiki/MissingRequirement '
976 976 b'for more information'
977 977 ),
978 978 )
979 979
980 980
981 981 def ensurerequirementscompatible(ui, requirements):
982 982 """Validates that a set of recognized requirements is mutually compatible.
983 983
984 984 Some requirements may not be compatible with others or require
985 985 config options that aren't enabled. This function is called during
986 986 repository opening to ensure that the set of requirements needed
987 987 to open a repository is sane and compatible with config options.
988 988
989 989 Extensions can monkeypatch this function to perform additional
990 990 checking.
991 991
992 992 ``error.RepoError`` should be raised on failure.
993 993 """
994 994 if (
995 995 requirementsmod.SPARSE_REQUIREMENT in requirements
996 996 and not sparse.enabled
997 997 ):
998 998 raise error.RepoError(
999 999 _(
1000 1000 b'repository is using sparse feature but '
1001 1001 b'sparse is not enabled; enable the '
1002 1002 b'"sparse" extensions to access'
1003 1003 )
1004 1004 )
1005 1005
1006 1006
1007 1007 def makestore(requirements, path, vfstype):
1008 1008 """Construct a storage object for a repository."""
1009 1009 if requirementsmod.STORE_REQUIREMENT in requirements:
1010 1010 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
1011 1011 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
1012 1012 return storemod.fncachestore(path, vfstype, dotencode)
1013 1013
1014 1014 return storemod.encodedstore(path, vfstype)
1015 1015
1016 1016 return storemod.basicstore(path, vfstype)
1017 1017
1018 1018
1019 1019 def resolvestorevfsoptions(ui, requirements, features):
1020 1020 """Resolve the options to pass to the store vfs opener.
1021 1021
1022 1022 The returned dict is used to influence behavior of the storage layer.
1023 1023 """
1024 1024 options = {}
1025 1025
1026 1026 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
1027 1027 options[b'treemanifest'] = True
1028 1028
1029 1029 # experimental config: format.manifestcachesize
1030 1030 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
1031 1031 if manifestcachesize is not None:
1032 1032 options[b'manifestcachesize'] = manifestcachesize
1033 1033
1034 1034 # In the absence of another requirement superseding a revlog-related
1035 1035 # requirement, we have to assume the repo is using revlog version 0.
1036 1036 # This revlog format is super old and we don't bother trying to parse
1037 1037 # opener options for it because those options wouldn't do anything
1038 1038 # meaningful on such old repos.
1039 1039 if (
1040 1040 requirementsmod.REVLOGV1_REQUIREMENT in requirements
1041 1041 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
1042 1042 ):
1043 1043 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
1044 1044 else: # explicitly mark repo as using revlogv0
1045 1045 options[b'revlogv0'] = True
1046 1046
1047 1047 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
1048 1048 options[b'copies-storage'] = b'changeset-sidedata'
1049 1049 else:
1050 1050 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1051 1051 copiesextramode = (b'changeset-only', b'compatibility')
1052 1052 if writecopiesto in copiesextramode:
1053 1053 options[b'copies-storage'] = b'extra'
1054 1054
1055 1055 return options
1056 1056
1057 1057
1058 1058 def resolverevlogstorevfsoptions(ui, requirements, features):
1059 1059 """Resolve opener options specific to revlogs."""
1060 1060
1061 1061 options = {}
1062 1062 options[b'flagprocessors'] = {}
1063 1063
1064 1064 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1065 1065 options[b'revlogv1'] = True
1066 1066 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1067 1067 options[b'revlogv2'] = True
1068 1068 if requirementsmod.CHANGELOGV2_REQUIREMENT in requirements:
1069 1069 options[b'changelogv2'] = True
1070 1070 cmp_rank = ui.configbool(b'experimental', b'changelog-v2.compute-rank')
1071 1071 options[b'changelogv2.compute-rank'] = cmp_rank
1072 1072
1073 1073 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1074 1074 options[b'generaldelta'] = True
1075 1075
1076 1076 # experimental config: format.chunkcachesize
1077 1077 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1078 1078 if chunkcachesize is not None:
1079 1079 options[b'chunkcachesize'] = chunkcachesize
1080 1080
1081 1081 deltabothparents = ui.configbool(
1082 1082 b'storage', b'revlog.optimize-delta-parent-choice'
1083 1083 )
1084 1084 options[b'deltabothparents'] = deltabothparents
1085 1085 dps_cgds = ui.configint(
1086 1086 b'storage',
1087 1087 b'revlog.delta-parent-search.candidate-group-chunk-size',
1088 1088 )
1089 1089 options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds
1090 1090 options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta')
1091 1091
1092 1092 issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
1093 1093 options[b'issue6528.fix-incoming'] = issue6528
1094 1094
1095 1095 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1096 1096 lazydeltabase = False
1097 1097 if lazydelta:
1098 1098 lazydeltabase = ui.configbool(
1099 1099 b'storage', b'revlog.reuse-external-delta-parent'
1100 1100 )
1101 1101 if lazydeltabase is None:
1102 1102 lazydeltabase = not scmutil.gddeltaconfig(ui)
1103 1103 options[b'lazydelta'] = lazydelta
1104 1104 options[b'lazydeltabase'] = lazydeltabase
1105 1105
1106 1106 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1107 1107 if 0 <= chainspan:
1108 1108 options[b'maxdeltachainspan'] = chainspan
1109 1109
1110 1110 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1111 1111 if mmapindexthreshold is not None:
1112 1112 options[b'mmapindexthreshold'] = mmapindexthreshold
1113 1113
1114 1114 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1115 1115 srdensitythres = float(
1116 1116 ui.config(b'experimental', b'sparse-read.density-threshold')
1117 1117 )
1118 1118 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1119 1119 options[b'with-sparse-read'] = withsparseread
1120 1120 options[b'sparse-read-density-threshold'] = srdensitythres
1121 1121 options[b'sparse-read-min-gap-size'] = srmingapsize
1122 1122
1123 1123 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1124 1124 options[b'sparse-revlog'] = sparserevlog
1125 1125 if sparserevlog:
1126 1126 options[b'generaldelta'] = True
1127 1127
1128 1128 maxchainlen = None
1129 1129 if sparserevlog:
1130 1130 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1131 1131 # experimental config: format.maxchainlen
1132 1132 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1133 1133 if maxchainlen is not None:
1134 1134 options[b'maxchainlen'] = maxchainlen
1135 1135
1136 1136 for r in requirements:
1137 1137 # we allow multiple compression engine requirement to co-exist because
1138 1138 # strickly speaking, revlog seems to support mixed compression style.
1139 1139 #
1140 1140 # The compression used for new entries will be "the last one"
1141 1141 prefix = r.startswith
1142 1142 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1143 1143 options[b'compengine'] = r.split(b'-', 2)[2]
1144 1144
1145 1145 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
1146 1146 if options[b'zlib.level'] is not None:
1147 1147 if not (0 <= options[b'zlib.level'] <= 9):
1148 1148 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1149 1149 raise error.Abort(msg % options[b'zlib.level'])
1150 1150 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
1151 1151 if options[b'zstd.level'] is not None:
1152 1152 if not (0 <= options[b'zstd.level'] <= 22):
1153 1153 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1154 1154 raise error.Abort(msg % options[b'zstd.level'])
1155 1155
1156 1156 if requirementsmod.NARROW_REQUIREMENT in requirements:
1157 1157 options[b'enableellipsis'] = True
1158 1158
1159 1159 if ui.configbool(b'experimental', b'rust.index'):
1160 1160 options[b'rust.index'] = True
1161 1161 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1162 1162 slow_path = ui.config(
1163 1163 b'storage', b'revlog.persistent-nodemap.slow-path'
1164 1164 )
1165 1165 if slow_path not in (b'allow', b'warn', b'abort'):
1166 1166 default = ui.config_default(
1167 1167 b'storage', b'revlog.persistent-nodemap.slow-path'
1168 1168 )
1169 1169 msg = _(
1170 1170 b'unknown value for config '
1171 1171 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1172 1172 )
1173 1173 ui.warn(msg % slow_path)
1174 1174 if not ui.quiet:
1175 1175 ui.warn(_(b'falling back to default value: %s\n') % default)
1176 1176 slow_path = default
1177 1177
1178 1178 msg = _(
1179 1179 b"accessing `persistent-nodemap` repository without associated "
1180 1180 b"fast implementation."
1181 1181 )
1182 1182 hint = _(
1183 1183 b"check `hg help config.format.use-persistent-nodemap` "
1184 1184 b"for details"
1185 1185 )
1186 1186 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1187 1187 if slow_path == b'warn':
1188 1188 msg = b"warning: " + msg + b'\n'
1189 1189 ui.warn(msg)
1190 1190 if not ui.quiet:
1191 1191 hint = b'(' + hint + b')\n'
1192 1192 ui.warn(hint)
1193 1193 if slow_path == b'abort':
1194 1194 raise error.Abort(msg, hint=hint)
1195 1195 options[b'persistent-nodemap'] = True
1196 1196 if requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements:
1197 1197 slow_path = ui.config(b'storage', b'dirstate-v2.slow-path')
1198 1198 if slow_path not in (b'allow', b'warn', b'abort'):
1199 1199 default = ui.config_default(b'storage', b'dirstate-v2.slow-path')
1200 1200 msg = _(b'unknown value for config "dirstate-v2.slow-path": "%s"\n')
1201 1201 ui.warn(msg % slow_path)
1202 1202 if not ui.quiet:
1203 1203 ui.warn(_(b'falling back to default value: %s\n') % default)
1204 1204 slow_path = default
1205 1205
1206 1206 msg = _(
1207 1207 b"accessing `dirstate-v2` repository without associated "
1208 1208 b"fast implementation."
1209 1209 )
1210 1210 hint = _(
1211 1211 b"check `hg help config.format.use-dirstate-v2` " b"for details"
1212 1212 )
1213 1213 if not dirstate.HAS_FAST_DIRSTATE_V2:
1214 1214 if slow_path == b'warn':
1215 1215 msg = b"warning: " + msg + b'\n'
1216 1216 ui.warn(msg)
1217 1217 if not ui.quiet:
1218 1218 hint = b'(' + hint + b')\n'
1219 1219 ui.warn(hint)
1220 1220 if slow_path == b'abort':
1221 1221 raise error.Abort(msg, hint=hint)
1222 1222 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1223 1223 options[b'persistent-nodemap.mmap'] = True
1224 1224 if ui.configbool(b'devel', b'persistent-nodemap'):
1225 1225 options[b'devel-force-nodemap'] = True
1226 1226
1227 1227 return options
1228 1228
1229 1229
1230 1230 def makemain(**kwargs):
1231 1231 """Produce a type conforming to ``ilocalrepositorymain``."""
1232 1232 return localrepository
1233 1233
1234 1234
1235 1235 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1236 1236 class revlogfilestorage:
1237 1237 """File storage when using revlogs."""
1238 1238
1239 1239 def file(self, path):
1240 1240 if path.startswith(b'/'):
1241 1241 path = path[1:]
1242 1242
1243 return filelog.filelog(self.svfs, path)
1243 try_split = (
1244 self.currenttransaction() is not None
1245 or txnutil.mayhavepending(self.root)
1246 )
1247
1248 return filelog.filelog(self.svfs, path, try_split=try_split)
1244 1249
1245 1250
1246 1251 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1247 1252 class revlognarrowfilestorage:
1248 1253 """File storage when using revlogs and narrow files."""
1249 1254
1250 1255 def file(self, path):
1251 1256 if path.startswith(b'/'):
1252 1257 path = path[1:]
1253 1258
1254 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
1259 try_split = (
1260 self.currenttransaction() is not None
1261 or txnutil.mayhavepending(self.root)
1262 )
1263 return filelog.narrowfilelog(
1264 self.svfs, path, self._storenarrowmatch, try_split=try_split
1265 )
1255 1266
1256 1267
1257 1268 def makefilestorage(requirements, features, **kwargs):
1258 1269 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1259 1270 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1260 1271 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1261 1272
1262 1273 if requirementsmod.NARROW_REQUIREMENT in requirements:
1263 1274 return revlognarrowfilestorage
1264 1275 else:
1265 1276 return revlogfilestorage
1266 1277
1267 1278
1268 1279 # List of repository interfaces and factory functions for them. Each
1269 1280 # will be called in order during ``makelocalrepository()`` to iteratively
1270 1281 # derive the final type for a local repository instance. We capture the
1271 1282 # function as a lambda so we don't hold a reference and the module-level
1272 1283 # functions can be wrapped.
1273 1284 REPO_INTERFACES = [
1274 1285 (repository.ilocalrepositorymain, lambda: makemain),
1275 1286 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1276 1287 ]
1277 1288
1278 1289
1279 1290 @interfaceutil.implementer(repository.ilocalrepositorymain)
1280 1291 class localrepository:
1281 1292 """Main class for representing local repositories.
1282 1293
1283 1294 All local repositories are instances of this class.
1284 1295
1285 1296 Constructed on its own, instances of this class are not usable as
1286 1297 repository objects. To obtain a usable repository object, call
1287 1298 ``hg.repository()``, ``localrepo.instance()``, or
1288 1299 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1289 1300 ``instance()`` adds support for creating new repositories.
1290 1301 ``hg.repository()`` adds more extension integration, including calling
1291 1302 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1292 1303 used.
1293 1304 """
1294 1305
1295 1306 _basesupported = {
1296 1307 requirementsmod.ARCHIVED_PHASE_REQUIREMENT,
1297 1308 requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT,
1298 1309 requirementsmod.CHANGELOGV2_REQUIREMENT,
1299 1310 requirementsmod.COPIESSDC_REQUIREMENT,
1300 1311 requirementsmod.DIRSTATE_TRACKED_HINT_V1,
1301 1312 requirementsmod.DIRSTATE_V2_REQUIREMENT,
1302 1313 requirementsmod.DOTENCODE_REQUIREMENT,
1303 1314 requirementsmod.FNCACHE_REQUIREMENT,
1304 1315 requirementsmod.GENERALDELTA_REQUIREMENT,
1305 1316 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1306 1317 requirementsmod.NODEMAP_REQUIREMENT,
1307 1318 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1308 1319 requirementsmod.REVLOGV1_REQUIREMENT,
1309 1320 requirementsmod.REVLOGV2_REQUIREMENT,
1310 1321 requirementsmod.SHARED_REQUIREMENT,
1311 1322 requirementsmod.SHARESAFE_REQUIREMENT,
1312 1323 requirementsmod.SPARSE_REQUIREMENT,
1313 1324 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1314 1325 requirementsmod.STORE_REQUIREMENT,
1315 1326 requirementsmod.TREEMANIFEST_REQUIREMENT,
1316 1327 }
1317 1328
1318 1329 # list of prefix for file which can be written without 'wlock'
1319 1330 # Extensions should extend this list when needed
1320 1331 _wlockfreeprefix = {
1321 1332 # We migh consider requiring 'wlock' for the next
1322 1333 # two, but pretty much all the existing code assume
1323 1334 # wlock is not needed so we keep them excluded for
1324 1335 # now.
1325 1336 b'hgrc',
1326 1337 b'requires',
1327 1338 # XXX cache is a complicatged business someone
1328 1339 # should investigate this in depth at some point
1329 1340 b'cache/',
1330 1341 # XXX bisect was still a bit too messy at the time
1331 1342 # this changeset was introduced. Someone should fix
1332 1343 # the remainig bit and drop this line
1333 1344 b'bisect.state',
1334 1345 }
1335 1346
1336 1347 def __init__(
1337 1348 self,
1338 1349 baseui,
1339 1350 ui,
1340 1351 origroot: bytes,
1341 1352 wdirvfs: vfsmod.vfs,
1342 1353 hgvfs: vfsmod.vfs,
1343 1354 requirements,
1344 1355 supportedrequirements,
1345 1356 sharedpath: bytes,
1346 1357 store,
1347 1358 cachevfs: vfsmod.vfs,
1348 1359 wcachevfs: vfsmod.vfs,
1349 1360 features,
1350 1361 intents=None,
1351 1362 ):
1352 1363 """Create a new local repository instance.
1353 1364
1354 1365 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1355 1366 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1356 1367 object.
1357 1368
1358 1369 Arguments:
1359 1370
1360 1371 baseui
1361 1372 ``ui.ui`` instance that ``ui`` argument was based off of.
1362 1373
1363 1374 ui
1364 1375 ``ui.ui`` instance for use by the repository.
1365 1376
1366 1377 origroot
1367 1378 ``bytes`` path to working directory root of this repository.
1368 1379
1369 1380 wdirvfs
1370 1381 ``vfs.vfs`` rooted at the working directory.
1371 1382
1372 1383 hgvfs
1373 1384 ``vfs.vfs`` rooted at .hg/
1374 1385
1375 1386 requirements
1376 1387 ``set`` of bytestrings representing repository opening requirements.
1377 1388
1378 1389 supportedrequirements
1379 1390 ``set`` of bytestrings representing repository requirements that we
1380 1391 know how to open. May be a supetset of ``requirements``.
1381 1392
1382 1393 sharedpath
1383 1394 ``bytes`` Defining path to storage base directory. Points to a
1384 1395 ``.hg/`` directory somewhere.
1385 1396
1386 1397 store
1387 1398 ``store.basicstore`` (or derived) instance providing access to
1388 1399 versioned storage.
1389 1400
1390 1401 cachevfs
1391 1402 ``vfs.vfs`` used for cache files.
1392 1403
1393 1404 wcachevfs
1394 1405 ``vfs.vfs`` used for cache files related to the working copy.
1395 1406
1396 1407 features
1397 1408 ``set`` of bytestrings defining features/capabilities of this
1398 1409 instance.
1399 1410
1400 1411 intents
1401 1412 ``set`` of system strings indicating what this repo will be used
1402 1413 for.
1403 1414 """
1404 1415 self.baseui = baseui
1405 1416 self.ui = ui
1406 1417 self.origroot = origroot
1407 1418 # vfs rooted at working directory.
1408 1419 self.wvfs = wdirvfs
1409 1420 self.root = wdirvfs.base
1410 1421 # vfs rooted at .hg/. Used to access most non-store paths.
1411 1422 self.vfs = hgvfs
1412 1423 self.path = hgvfs.base
1413 1424 self.requirements = requirements
1414 1425 self.nodeconstants = sha1nodeconstants
1415 1426 self.nullid = self.nodeconstants.nullid
1416 1427 self.supported = supportedrequirements
1417 1428 self.sharedpath = sharedpath
1418 1429 self.store = store
1419 1430 self.cachevfs = cachevfs
1420 1431 self.wcachevfs = wcachevfs
1421 1432 self.features = features
1422 1433
1423 1434 self.filtername = None
1424 1435
1425 1436 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1426 1437 b'devel', b'check-locks'
1427 1438 ):
1428 1439 self.vfs.audit = self._getvfsward(self.vfs.audit)
1429 1440 # A list of callback to shape the phase if no data were found.
1430 1441 # Callback are in the form: func(repo, roots) --> processed root.
1431 1442 # This list it to be filled by extension during repo setup
1432 1443 self._phasedefaults = []
1433 1444
1434 1445 color.setup(self.ui)
1435 1446
1436 1447 self.spath = self.store.path
1437 1448 self.svfs = self.store.vfs
1438 1449 self.sjoin = self.store.join
1439 1450 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1440 1451 b'devel', b'check-locks'
1441 1452 ):
1442 1453 if util.safehasattr(self.svfs, b'vfs'): # this is filtervfs
1443 1454 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1444 1455 else: # standard vfs
1445 1456 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1446 1457
1447 1458 self._dirstatevalidatewarned = False
1448 1459
1449 1460 self._branchcaches = branchmap.BranchMapCache()
1450 1461 self._revbranchcache = None
1451 1462 self._filterpats = {}
1452 1463 self._datafilters = {}
1453 1464 self._transref = self._lockref = self._wlockref = None
1454 1465
1455 1466 # A cache for various files under .hg/ that tracks file changes,
1456 1467 # (used by the filecache decorator)
1457 1468 #
1458 1469 # Maps a property name to its util.filecacheentry
1459 1470 self._filecache = {}
1460 1471
1461 1472 # hold sets of revision to be filtered
1462 1473 # should be cleared when something might have changed the filter value:
1463 1474 # - new changesets,
1464 1475 # - phase change,
1465 1476 # - new obsolescence marker,
1466 1477 # - working directory parent change,
1467 1478 # - bookmark changes
1468 1479 self.filteredrevcache = {}
1469 1480
1470 1481 self._dirstate = None
1471 1482 # post-dirstate-status hooks
1472 1483 self._postdsstatus = []
1473 1484
1474 1485 self._pending_narrow_pats = None
1475 1486 self._pending_narrow_pats_dirstate = None
1476 1487
1477 1488 # generic mapping between names and nodes
1478 1489 self.names = namespaces.namespaces()
1479 1490
1480 1491 # Key to signature value.
1481 1492 self._sparsesignaturecache = {}
1482 1493 # Signature to cached matcher instance.
1483 1494 self._sparsematchercache = {}
1484 1495
1485 1496 self._extrafilterid = repoview.extrafilter(ui)
1486 1497
1487 1498 self.filecopiesmode = None
1488 1499 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1489 1500 self.filecopiesmode = b'changeset-sidedata'
1490 1501
1491 1502 self._wanted_sidedata = set()
1492 1503 self._sidedata_computers = {}
1493 1504 sidedatamod.set_sidedata_spec_for_repo(self)
1494 1505
1495 1506 def _getvfsward(self, origfunc):
1496 1507 """build a ward for self.vfs"""
1497 1508 rref = weakref.ref(self)
1498 1509
1499 1510 def checkvfs(path, mode=None):
1500 1511 ret = origfunc(path, mode=mode)
1501 1512 repo = rref()
1502 1513 if (
1503 1514 repo is None
1504 1515 or not util.safehasattr(repo, b'_wlockref')
1505 1516 or not util.safehasattr(repo, b'_lockref')
1506 1517 ):
1507 1518 return
1508 1519 if mode in (None, b'r', b'rb'):
1509 1520 return
1510 1521 if path.startswith(repo.path):
1511 1522 # truncate name relative to the repository (.hg)
1512 1523 path = path[len(repo.path) + 1 :]
1513 1524 if path.startswith(b'cache/'):
1514 1525 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1515 1526 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1516 1527 # path prefixes covered by 'lock'
1517 1528 vfs_path_prefixes = (
1518 1529 b'journal.',
1519 1530 b'undo.',
1520 1531 b'strip-backup/',
1521 1532 b'cache/',
1522 1533 )
1523 1534 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1524 1535 if repo._currentlock(repo._lockref) is None:
1525 1536 repo.ui.develwarn(
1526 1537 b'write with no lock: "%s"' % path,
1527 1538 stacklevel=3,
1528 1539 config=b'check-locks',
1529 1540 )
1530 1541 elif repo._currentlock(repo._wlockref) is None:
1531 1542 # rest of vfs files are covered by 'wlock'
1532 1543 #
1533 1544 # exclude special files
1534 1545 for prefix in self._wlockfreeprefix:
1535 1546 if path.startswith(prefix):
1536 1547 return
1537 1548 repo.ui.develwarn(
1538 1549 b'write with no wlock: "%s"' % path,
1539 1550 stacklevel=3,
1540 1551 config=b'check-locks',
1541 1552 )
1542 1553 return ret
1543 1554
1544 1555 return checkvfs
1545 1556
1546 1557 def _getsvfsward(self, origfunc):
1547 1558 """build a ward for self.svfs"""
1548 1559 rref = weakref.ref(self)
1549 1560
1550 1561 def checksvfs(path, mode=None):
1551 1562 ret = origfunc(path, mode=mode)
1552 1563 repo = rref()
1553 1564 if repo is None or not util.safehasattr(repo, b'_lockref'):
1554 1565 return
1555 1566 if mode in (None, b'r', b'rb'):
1556 1567 return
1557 1568 if path.startswith(repo.sharedpath):
1558 1569 # truncate name relative to the repository (.hg)
1559 1570 path = path[len(repo.sharedpath) + 1 :]
1560 1571 if repo._currentlock(repo._lockref) is None:
1561 1572 repo.ui.develwarn(
1562 1573 b'write with no lock: "%s"' % path, stacklevel=4
1563 1574 )
1564 1575 return ret
1565 1576
1566 1577 return checksvfs
1567 1578
1568 1579 @property
1569 1580 def vfs_map(self):
1570 1581 return {
1571 1582 b'': self.svfs,
1572 1583 b'plain': self.vfs,
1573 1584 b'store': self.svfs,
1574 1585 }
1575 1586
1576 1587 def close(self):
1577 1588 self._writecaches()
1578 1589
1579 1590 def _writecaches(self):
1580 1591 if self._revbranchcache:
1581 1592 self._revbranchcache.write()
1582 1593
1583 1594 def _restrictcapabilities(self, caps):
1584 1595 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1585 1596 caps = set(caps)
1586 1597 capsblob = bundle2.encodecaps(
1587 1598 bundle2.getrepocaps(self, role=b'client')
1588 1599 )
1589 1600 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1590 1601 if self.ui.configbool(b'experimental', b'narrow'):
1591 1602 caps.add(wireprototypes.NARROWCAP)
1592 1603 return caps
1593 1604
1594 1605 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1595 1606 # self -> auditor -> self._checknested -> self
1596 1607
1597 1608 @property
1598 1609 def auditor(self):
1599 1610 # This is only used by context.workingctx.match in order to
1600 1611 # detect files in subrepos.
1601 1612 return pathutil.pathauditor(self.root, callback=self._checknested)
1602 1613
1603 1614 @property
1604 1615 def nofsauditor(self):
1605 1616 # This is only used by context.basectx.match in order to detect
1606 1617 # files in subrepos.
1607 1618 return pathutil.pathauditor(
1608 1619 self.root, callback=self._checknested, realfs=False, cached=True
1609 1620 )
1610 1621
1611 1622 def _checknested(self, path):
1612 1623 """Determine if path is a legal nested repository."""
1613 1624 if not path.startswith(self.root):
1614 1625 return False
1615 1626 subpath = path[len(self.root) + 1 :]
1616 1627 normsubpath = util.pconvert(subpath)
1617 1628
1618 1629 # XXX: Checking against the current working copy is wrong in
1619 1630 # the sense that it can reject things like
1620 1631 #
1621 1632 # $ hg cat -r 10 sub/x.txt
1622 1633 #
1623 1634 # if sub/ is no longer a subrepository in the working copy
1624 1635 # parent revision.
1625 1636 #
1626 1637 # However, it can of course also allow things that would have
1627 1638 # been rejected before, such as the above cat command if sub/
1628 1639 # is a subrepository now, but was a normal directory before.
1629 1640 # The old path auditor would have rejected by mistake since it
1630 1641 # panics when it sees sub/.hg/.
1631 1642 #
1632 1643 # All in all, checking against the working copy seems sensible
1633 1644 # since we want to prevent access to nested repositories on
1634 1645 # the filesystem *now*.
1635 1646 ctx = self[None]
1636 1647 parts = util.splitpath(subpath)
1637 1648 while parts:
1638 1649 prefix = b'/'.join(parts)
1639 1650 if prefix in ctx.substate:
1640 1651 if prefix == normsubpath:
1641 1652 return True
1642 1653 else:
1643 1654 sub = ctx.sub(prefix)
1644 1655 return sub.checknested(subpath[len(prefix) + 1 :])
1645 1656 else:
1646 1657 parts.pop()
1647 1658 return False
1648 1659
1649 1660 def peer(self, path=None):
1650 1661 return localpeer(self, path=path) # not cached to avoid reference cycle
1651 1662
1652 1663 def unfiltered(self):
1653 1664 """Return unfiltered version of the repository
1654 1665
1655 1666 Intended to be overwritten by filtered repo."""
1656 1667 return self
1657 1668
1658 1669 def filtered(self, name, visibilityexceptions=None):
1659 1670 """Return a filtered version of a repository
1660 1671
1661 1672 The `name` parameter is the identifier of the requested view. This
1662 1673 will return a repoview object set "exactly" to the specified view.
1663 1674
1664 1675 This function does not apply recursive filtering to a repository. For
1665 1676 example calling `repo.filtered("served")` will return a repoview using
1666 1677 the "served" view, regardless of the initial view used by `repo`.
1667 1678
1668 1679 In other word, there is always only one level of `repoview` "filtering".
1669 1680 """
1670 1681 if self._extrafilterid is not None and b'%' not in name:
1671 1682 name = name + b'%' + self._extrafilterid
1672 1683
1673 1684 cls = repoview.newtype(self.unfiltered().__class__)
1674 1685 return cls(self, name, visibilityexceptions)
1675 1686
1676 1687 @mixedrepostorecache(
1677 1688 (b'bookmarks', b'plain'),
1678 1689 (b'bookmarks.current', b'plain'),
1679 1690 (b'bookmarks', b''),
1680 1691 (b'00changelog.i', b''),
1681 1692 )
1682 1693 def _bookmarks(self):
1683 1694 # Since the multiple files involved in the transaction cannot be
1684 1695 # written atomically (with current repository format), there is a race
1685 1696 # condition here.
1686 1697 #
1687 1698 # 1) changelog content A is read
1688 1699 # 2) outside transaction update changelog to content B
1689 1700 # 3) outside transaction update bookmark file referring to content B
1690 1701 # 4) bookmarks file content is read and filtered against changelog-A
1691 1702 #
1692 1703 # When this happens, bookmarks against nodes missing from A are dropped.
1693 1704 #
1694 1705 # Having this happening during read is not great, but it become worse
1695 1706 # when this happen during write because the bookmarks to the "unknown"
1696 1707 # nodes will be dropped for good. However, writes happen within locks.
1697 1708 # This locking makes it possible to have a race free consistent read.
1698 1709 # For this purpose data read from disc before locking are
1699 1710 # "invalidated" right after the locks are taken. This invalidations are
1700 1711 # "light", the `filecache` mechanism keep the data in memory and will
1701 1712 # reuse them if the underlying files did not changed. Not parsing the
1702 1713 # same data multiple times helps performances.
1703 1714 #
1704 1715 # Unfortunately in the case describe above, the files tracked by the
1705 1716 # bookmarks file cache might not have changed, but the in-memory
1706 1717 # content is still "wrong" because we used an older changelog content
1707 1718 # to process the on-disk data. So after locking, the changelog would be
1708 1719 # refreshed but `_bookmarks` would be preserved.
1709 1720 # Adding `00changelog.i` to the list of tracked file is not
1710 1721 # enough, because at the time we build the content for `_bookmarks` in
1711 1722 # (4), the changelog file has already diverged from the content used
1712 1723 # for loading `changelog` in (1)
1713 1724 #
1714 1725 # To prevent the issue, we force the changelog to be explicitly
1715 1726 # reloaded while computing `_bookmarks`. The data race can still happen
1716 1727 # without the lock (with a narrower window), but it would no longer go
1717 1728 # undetected during the lock time refresh.
1718 1729 #
1719 1730 # The new schedule is as follow
1720 1731 #
1721 1732 # 1) filecache logic detect that `_bookmarks` needs to be computed
1722 1733 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1723 1734 # 3) We force `changelog` filecache to be tested
1724 1735 # 4) cachestat for `changelog` are captured (for changelog)
1725 1736 # 5) `_bookmarks` is computed and cached
1726 1737 #
1727 1738 # The step in (3) ensure we have a changelog at least as recent as the
1728 1739 # cache stat computed in (1). As a result at locking time:
1729 1740 # * if the changelog did not changed since (1) -> we can reuse the data
1730 1741 # * otherwise -> the bookmarks get refreshed.
1731 1742 self._refreshchangelog()
1732 1743 return bookmarks.bmstore(self)
1733 1744
1734 1745 def _refreshchangelog(self):
1735 1746 """make sure the in memory changelog match the on-disk one"""
1736 1747 if 'changelog' in vars(self) and self.currenttransaction() is None:
1737 1748 del self.changelog
1738 1749
1739 1750 @property
1740 1751 def _activebookmark(self):
1741 1752 return self._bookmarks.active
1742 1753
1743 1754 # _phasesets depend on changelog. what we need is to call
1744 1755 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1745 1756 # can't be easily expressed in filecache mechanism.
1746 1757 @storecache(b'phaseroots', b'00changelog.i')
1747 1758 def _phasecache(self):
1748 1759 return phases.phasecache(self, self._phasedefaults)
1749 1760
1750 1761 @storecache(b'obsstore')
1751 1762 def obsstore(self):
1752 1763 return obsolete.makestore(self.ui, self)
1753 1764
1754 1765 @changelogcache()
1755 1766 def changelog(repo):
1756 1767 # load dirstate before changelog to avoid race see issue6303
1757 1768 repo.dirstate.prefetch_parents()
1758 1769 return repo.store.changelog(
1759 1770 txnutil.mayhavepending(repo.root),
1760 1771 concurrencychecker=revlogchecker.get_checker(repo.ui, b'changelog'),
1761 1772 )
1762 1773
1763 1774 @manifestlogcache()
1764 1775 def manifestlog(self):
1765 1776 return self.store.manifestlog(self, self._storenarrowmatch)
1766 1777
1767 1778 @unfilteredpropertycache
1768 1779 def dirstate(self):
1769 1780 if self._dirstate is None:
1770 1781 self._dirstate = self._makedirstate()
1771 1782 else:
1772 1783 self._dirstate.refresh()
1773 1784 return self._dirstate
1774 1785
1775 1786 def _makedirstate(self):
1776 1787 """Extension point for wrapping the dirstate per-repo."""
1777 1788 sparsematchfn = None
1778 1789 if sparse.use_sparse(self):
1779 1790 sparsematchfn = lambda: sparse.matcher(self)
1780 1791 v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
1781 1792 th = requirementsmod.DIRSTATE_TRACKED_HINT_V1
1782 1793 use_dirstate_v2 = v2_req in self.requirements
1783 1794 use_tracked_hint = th in self.requirements
1784 1795
1785 1796 return dirstate.dirstate(
1786 1797 self.vfs,
1787 1798 self.ui,
1788 1799 self.root,
1789 1800 self._dirstatevalidate,
1790 1801 sparsematchfn,
1791 1802 self.nodeconstants,
1792 1803 use_dirstate_v2,
1793 1804 use_tracked_hint=use_tracked_hint,
1794 1805 )
1795 1806
1796 1807 def _dirstatevalidate(self, node):
1797 1808 try:
1798 1809 self.changelog.rev(node)
1799 1810 return node
1800 1811 except error.LookupError:
1801 1812 if not self._dirstatevalidatewarned:
1802 1813 self._dirstatevalidatewarned = True
1803 1814 self.ui.warn(
1804 1815 _(b"warning: ignoring unknown working parent %s!\n")
1805 1816 % short(node)
1806 1817 )
1807 1818 return self.nullid
1808 1819
1809 1820 @storecache(narrowspec.FILENAME)
1810 1821 def narrowpats(self):
1811 1822 """matcher patterns for this repository's narrowspec
1812 1823
1813 1824 A tuple of (includes, excludes).
1814 1825 """
1815 1826 # the narrow management should probably move into its own object
1816 1827 val = self._pending_narrow_pats
1817 1828 if val is None:
1818 1829 val = narrowspec.load(self)
1819 1830 return val
1820 1831
1821 1832 @storecache(narrowspec.FILENAME)
1822 1833 def _storenarrowmatch(self):
1823 1834 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1824 1835 return matchmod.always()
1825 1836 include, exclude = self.narrowpats
1826 1837 return narrowspec.match(self.root, include=include, exclude=exclude)
1827 1838
1828 1839 @storecache(narrowspec.FILENAME)
1829 1840 def _narrowmatch(self):
1830 1841 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1831 1842 return matchmod.always()
1832 1843 narrowspec.checkworkingcopynarrowspec(self)
1833 1844 include, exclude = self.narrowpats
1834 1845 return narrowspec.match(self.root, include=include, exclude=exclude)
1835 1846
1836 1847 def narrowmatch(self, match=None, includeexact=False):
1837 1848 """matcher corresponding the the repo's narrowspec
1838 1849
1839 1850 If `match` is given, then that will be intersected with the narrow
1840 1851 matcher.
1841 1852
1842 1853 If `includeexact` is True, then any exact matches from `match` will
1843 1854 be included even if they're outside the narrowspec.
1844 1855 """
1845 1856 if match:
1846 1857 if includeexact and not self._narrowmatch.always():
1847 1858 # do not exclude explicitly-specified paths so that they can
1848 1859 # be warned later on
1849 1860 em = matchmod.exact(match.files())
1850 1861 nm = matchmod.unionmatcher([self._narrowmatch, em])
1851 1862 return matchmod.intersectmatchers(match, nm)
1852 1863 return matchmod.intersectmatchers(match, self._narrowmatch)
1853 1864 return self._narrowmatch
1854 1865
1855 1866 def setnarrowpats(self, newincludes, newexcludes):
1856 1867 narrowspec.save(self, newincludes, newexcludes)
1857 1868 self.invalidate(clearfilecache=True)
1858 1869
1859 1870 @unfilteredpropertycache
1860 1871 def _quick_access_changeid_null(self):
1861 1872 return {
1862 1873 b'null': (nullrev, self.nodeconstants.nullid),
1863 1874 nullrev: (nullrev, self.nodeconstants.nullid),
1864 1875 self.nullid: (nullrev, self.nullid),
1865 1876 }
1866 1877
1867 1878 @unfilteredpropertycache
1868 1879 def _quick_access_changeid_wc(self):
1869 1880 # also fast path access to the working copy parents
1870 1881 # however, only do it for filter that ensure wc is visible.
1871 1882 quick = self._quick_access_changeid_null.copy()
1872 1883 cl = self.unfiltered().changelog
1873 1884 for node in self.dirstate.parents():
1874 1885 if node == self.nullid:
1875 1886 continue
1876 1887 rev = cl.index.get_rev(node)
1877 1888 if rev is None:
1878 1889 # unknown working copy parent case:
1879 1890 #
1880 1891 # skip the fast path and let higher code deal with it
1881 1892 continue
1882 1893 pair = (rev, node)
1883 1894 quick[rev] = pair
1884 1895 quick[node] = pair
1885 1896 # also add the parents of the parents
1886 1897 for r in cl.parentrevs(rev):
1887 1898 if r == nullrev:
1888 1899 continue
1889 1900 n = cl.node(r)
1890 1901 pair = (r, n)
1891 1902 quick[r] = pair
1892 1903 quick[n] = pair
1893 1904 p1node = self.dirstate.p1()
1894 1905 if p1node != self.nullid:
1895 1906 quick[b'.'] = quick[p1node]
1896 1907 return quick
1897 1908
1898 1909 @unfilteredmethod
1899 1910 def _quick_access_changeid_invalidate(self):
1900 1911 if '_quick_access_changeid_wc' in vars(self):
1901 1912 del self.__dict__['_quick_access_changeid_wc']
1902 1913
1903 1914 @property
1904 1915 def _quick_access_changeid(self):
1905 1916 """an helper dictionnary for __getitem__ calls
1906 1917
1907 1918 This contains a list of symbol we can recognise right away without
1908 1919 further processing.
1909 1920 """
1910 1921 if self.filtername in repoview.filter_has_wc:
1911 1922 return self._quick_access_changeid_wc
1912 1923 return self._quick_access_changeid_null
1913 1924
1914 1925 def __getitem__(self, changeid):
1915 1926 # dealing with special cases
1916 1927 if changeid is None:
1917 1928 return context.workingctx(self)
1918 1929 if isinstance(changeid, context.basectx):
1919 1930 return changeid
1920 1931
1921 1932 # dealing with multiple revisions
1922 1933 if isinstance(changeid, slice):
1923 1934 # wdirrev isn't contiguous so the slice shouldn't include it
1924 1935 return [
1925 1936 self[i]
1926 1937 for i in range(*changeid.indices(len(self)))
1927 1938 if i not in self.changelog.filteredrevs
1928 1939 ]
1929 1940
1930 1941 # dealing with some special values
1931 1942 quick_access = self._quick_access_changeid.get(changeid)
1932 1943 if quick_access is not None:
1933 1944 rev, node = quick_access
1934 1945 return context.changectx(self, rev, node, maybe_filtered=False)
1935 1946 if changeid == b'tip':
1936 1947 node = self.changelog.tip()
1937 1948 rev = self.changelog.rev(node)
1938 1949 return context.changectx(self, rev, node)
1939 1950
1940 1951 # dealing with arbitrary values
1941 1952 try:
1942 1953 if isinstance(changeid, int):
1943 1954 node = self.changelog.node(changeid)
1944 1955 rev = changeid
1945 1956 elif changeid == b'.':
1946 1957 # this is a hack to delay/avoid loading obsmarkers
1947 1958 # when we know that '.' won't be hidden
1948 1959 node = self.dirstate.p1()
1949 1960 rev = self.unfiltered().changelog.rev(node)
1950 1961 elif len(changeid) == self.nodeconstants.nodelen:
1951 1962 try:
1952 1963 node = changeid
1953 1964 rev = self.changelog.rev(changeid)
1954 1965 except error.FilteredLookupError:
1955 1966 changeid = hex(changeid) # for the error message
1956 1967 raise
1957 1968 except LookupError:
1958 1969 # check if it might have come from damaged dirstate
1959 1970 #
1960 1971 # XXX we could avoid the unfiltered if we had a recognizable
1961 1972 # exception for filtered changeset access
1962 1973 if (
1963 1974 self.local()
1964 1975 and changeid in self.unfiltered().dirstate.parents()
1965 1976 ):
1966 1977 msg = _(b"working directory has unknown parent '%s'!")
1967 1978 raise error.Abort(msg % short(changeid))
1968 1979 changeid = hex(changeid) # for the error message
1969 1980 raise
1970 1981
1971 1982 elif len(changeid) == 2 * self.nodeconstants.nodelen:
1972 1983 node = bin(changeid)
1973 1984 rev = self.changelog.rev(node)
1974 1985 else:
1975 1986 raise error.ProgrammingError(
1976 1987 b"unsupported changeid '%s' of type %s"
1977 1988 % (changeid, pycompat.bytestr(type(changeid)))
1978 1989 )
1979 1990
1980 1991 return context.changectx(self, rev, node)
1981 1992
1982 1993 except (error.FilteredIndexError, error.FilteredLookupError):
1983 1994 raise error.FilteredRepoLookupError(
1984 1995 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
1985 1996 )
1986 1997 except (IndexError, LookupError):
1987 1998 raise error.RepoLookupError(
1988 1999 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
1989 2000 )
1990 2001 except error.WdirUnsupported:
1991 2002 return context.workingctx(self)
1992 2003
1993 2004 def __contains__(self, changeid):
1994 2005 """True if the given changeid exists"""
1995 2006 try:
1996 2007 self[changeid]
1997 2008 return True
1998 2009 except error.RepoLookupError:
1999 2010 return False
2000 2011
2001 2012 def __nonzero__(self):
2002 2013 return True
2003 2014
2004 2015 __bool__ = __nonzero__
2005 2016
2006 2017 def __len__(self):
2007 2018 # no need to pay the cost of repoview.changelog
2008 2019 unfi = self.unfiltered()
2009 2020 return len(unfi.changelog)
2010 2021
2011 2022 def __iter__(self):
2012 2023 return iter(self.changelog)
2013 2024
2014 2025 def revs(self, expr: bytes, *args):
2015 2026 """Find revisions matching a revset.
2016 2027
2017 2028 The revset is specified as a string ``expr`` that may contain
2018 2029 %-formatting to escape certain types. See ``revsetlang.formatspec``.
2019 2030
2020 2031 Revset aliases from the configuration are not expanded. To expand
2021 2032 user aliases, consider calling ``scmutil.revrange()`` or
2022 2033 ``repo.anyrevs([expr], user=True)``.
2023 2034
2024 2035 Returns a smartset.abstractsmartset, which is a list-like interface
2025 2036 that contains integer revisions.
2026 2037 """
2027 2038 tree = revsetlang.spectree(expr, *args)
2028 2039 return revset.makematcher(tree)(self)
2029 2040
2030 2041 def set(self, expr: bytes, *args):
2031 2042 """Find revisions matching a revset and emit changectx instances.
2032 2043
2033 2044 This is a convenience wrapper around ``revs()`` that iterates the
2034 2045 result and is a generator of changectx instances.
2035 2046
2036 2047 Revset aliases from the configuration are not expanded. To expand
2037 2048 user aliases, consider calling ``scmutil.revrange()``.
2038 2049 """
2039 2050 for r in self.revs(expr, *args):
2040 2051 yield self[r]
2041 2052
2042 2053 def anyrevs(self, specs: bytes, user=False, localalias=None):
2043 2054 """Find revisions matching one of the given revsets.
2044 2055
2045 2056 Revset aliases from the configuration are not expanded by default. To
2046 2057 expand user aliases, specify ``user=True``. To provide some local
2047 2058 definitions overriding user aliases, set ``localalias`` to
2048 2059 ``{name: definitionstring}``.
2049 2060 """
2050 2061 if specs == [b'null']:
2051 2062 return revset.baseset([nullrev])
2052 2063 if specs == [b'.']:
2053 2064 quick_data = self._quick_access_changeid.get(b'.')
2054 2065 if quick_data is not None:
2055 2066 return revset.baseset([quick_data[0]])
2056 2067 if user:
2057 2068 m = revset.matchany(
2058 2069 self.ui,
2059 2070 specs,
2060 2071 lookup=revset.lookupfn(self),
2061 2072 localalias=localalias,
2062 2073 )
2063 2074 else:
2064 2075 m = revset.matchany(None, specs, localalias=localalias)
2065 2076 return m(self)
2066 2077
2067 2078 def url(self) -> bytes:
2068 2079 return b'file:' + self.root
2069 2080
2070 2081 def hook(self, name, throw=False, **args):
2071 2082 """Call a hook, passing this repo instance.
2072 2083
2073 2084 This a convenience method to aid invoking hooks. Extensions likely
2074 2085 won't call this unless they have registered a custom hook or are
2075 2086 replacing code that is expected to call a hook.
2076 2087 """
2077 2088 return hook.hook(self.ui, self, name, throw, **args)
2078 2089
2079 2090 @filteredpropertycache
2080 2091 def _tagscache(self):
2081 2092 """Returns a tagscache object that contains various tags related
2082 2093 caches."""
2083 2094
2084 2095 # This simplifies its cache management by having one decorated
2085 2096 # function (this one) and the rest simply fetch things from it.
2086 2097 class tagscache:
2087 2098 def __init__(self):
2088 2099 # These two define the set of tags for this repository. tags
2089 2100 # maps tag name to node; tagtypes maps tag name to 'global' or
2090 2101 # 'local'. (Global tags are defined by .hgtags across all
2091 2102 # heads, and local tags are defined in .hg/localtags.)
2092 2103 # They constitute the in-memory cache of tags.
2093 2104 self.tags = self.tagtypes = None
2094 2105
2095 2106 self.nodetagscache = self.tagslist = None
2096 2107
2097 2108 cache = tagscache()
2098 2109 cache.tags, cache.tagtypes = self._findtags()
2099 2110
2100 2111 return cache
2101 2112
2102 2113 def tags(self):
2103 2114 '''return a mapping of tag to node'''
2104 2115 t = {}
2105 2116 if self.changelog.filteredrevs:
2106 2117 tags, tt = self._findtags()
2107 2118 else:
2108 2119 tags = self._tagscache.tags
2109 2120 rev = self.changelog.rev
2110 2121 for k, v in tags.items():
2111 2122 try:
2112 2123 # ignore tags to unknown nodes
2113 2124 rev(v)
2114 2125 t[k] = v
2115 2126 except (error.LookupError, ValueError):
2116 2127 pass
2117 2128 return t
2118 2129
2119 2130 def _findtags(self):
2120 2131 """Do the hard work of finding tags. Return a pair of dicts
2121 2132 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2122 2133 maps tag name to a string like \'global\' or \'local\'.
2123 2134 Subclasses or extensions are free to add their own tags, but
2124 2135 should be aware that the returned dicts will be retained for the
2125 2136 duration of the localrepo object."""
2126 2137
2127 2138 # XXX what tagtype should subclasses/extensions use? Currently
2128 2139 # mq and bookmarks add tags, but do not set the tagtype at all.
2129 2140 # Should each extension invent its own tag type? Should there
2130 2141 # be one tagtype for all such "virtual" tags? Or is the status
2131 2142 # quo fine?
2132 2143
2133 2144 # map tag name to (node, hist)
2134 2145 alltags = tagsmod.findglobaltags(self.ui, self)
2135 2146 # map tag name to tag type
2136 2147 tagtypes = {tag: b'global' for tag in alltags}
2137 2148
2138 2149 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2139 2150
2140 2151 # Build the return dicts. Have to re-encode tag names because
2141 2152 # the tags module always uses UTF-8 (in order not to lose info
2142 2153 # writing to the cache), but the rest of Mercurial wants them in
2143 2154 # local encoding.
2144 2155 tags = {}
2145 2156 for name, (node, hist) in alltags.items():
2146 2157 if node != self.nullid:
2147 2158 tags[encoding.tolocal(name)] = node
2148 2159 tags[b'tip'] = self.changelog.tip()
2149 2160 tagtypes = {
2150 2161 encoding.tolocal(name): value for (name, value) in tagtypes.items()
2151 2162 }
2152 2163 return (tags, tagtypes)
2153 2164
2154 2165 def tagtype(self, tagname):
2155 2166 """
2156 2167 return the type of the given tag. result can be:
2157 2168
2158 2169 'local' : a local tag
2159 2170 'global' : a global tag
2160 2171 None : tag does not exist
2161 2172 """
2162 2173
2163 2174 return self._tagscache.tagtypes.get(tagname)
2164 2175
2165 2176 def tagslist(self):
2166 2177 '''return a list of tags ordered by revision'''
2167 2178 if not self._tagscache.tagslist:
2168 2179 l = []
2169 2180 for t, n in self.tags().items():
2170 2181 l.append((self.changelog.rev(n), t, n))
2171 2182 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2172 2183
2173 2184 return self._tagscache.tagslist
2174 2185
2175 2186 def nodetags(self, node):
2176 2187 '''return the tags associated with a node'''
2177 2188 if not self._tagscache.nodetagscache:
2178 2189 nodetagscache = {}
2179 2190 for t, n in self._tagscache.tags.items():
2180 2191 nodetagscache.setdefault(n, []).append(t)
2181 2192 for tags in nodetagscache.values():
2182 2193 tags.sort()
2183 2194 self._tagscache.nodetagscache = nodetagscache
2184 2195 return self._tagscache.nodetagscache.get(node, [])
2185 2196
2186 2197 def nodebookmarks(self, node):
2187 2198 """return the list of bookmarks pointing to the specified node"""
2188 2199 return self._bookmarks.names(node)
2189 2200
2190 2201 def branchmap(self):
2191 2202 """returns a dictionary {branch: [branchheads]} with branchheads
2192 2203 ordered by increasing revision number"""
2193 2204 return self._branchcaches[self]
2194 2205
2195 2206 @unfilteredmethod
2196 2207 def revbranchcache(self):
2197 2208 if not self._revbranchcache:
2198 2209 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2199 2210 return self._revbranchcache
2200 2211
2201 2212 def register_changeset(self, rev, changelogrevision):
2202 2213 self.revbranchcache().setdata(rev, changelogrevision)
2203 2214
2204 2215 def branchtip(self, branch, ignoremissing=False):
2205 2216 """return the tip node for a given branch
2206 2217
2207 2218 If ignoremissing is True, then this method will not raise an error.
2208 2219 This is helpful for callers that only expect None for a missing branch
2209 2220 (e.g. namespace).
2210 2221
2211 2222 """
2212 2223 try:
2213 2224 return self.branchmap().branchtip(branch)
2214 2225 except KeyError:
2215 2226 if not ignoremissing:
2216 2227 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2217 2228 else:
2218 2229 pass
2219 2230
2220 2231 def lookup(self, key):
2221 2232 node = scmutil.revsymbol(self, key).node()
2222 2233 if node is None:
2223 2234 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2224 2235 return node
2225 2236
2226 2237 def lookupbranch(self, key):
2227 2238 if self.branchmap().hasbranch(key):
2228 2239 return key
2229 2240
2230 2241 return scmutil.revsymbol(self, key).branch()
2231 2242
2232 2243 def known(self, nodes):
2233 2244 cl = self.changelog
2234 2245 get_rev = cl.index.get_rev
2235 2246 filtered = cl.filteredrevs
2236 2247 result = []
2237 2248 for n in nodes:
2238 2249 r = get_rev(n)
2239 2250 resp = not (r is None or r in filtered)
2240 2251 result.append(resp)
2241 2252 return result
2242 2253
2243 2254 def local(self):
2244 2255 return self
2245 2256
2246 2257 def publishing(self):
2247 2258 # it's safe (and desirable) to trust the publish flag unconditionally
2248 2259 # so that we don't finalize changes shared between users via ssh or nfs
2249 2260 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2250 2261
2251 2262 def cancopy(self):
2252 2263 # so statichttprepo's override of local() works
2253 2264 if not self.local():
2254 2265 return False
2255 2266 if not self.publishing():
2256 2267 return True
2257 2268 # if publishing we can't copy if there is filtered content
2258 2269 return not self.filtered(b'visible').changelog.filteredrevs
2259 2270
2260 2271 def shared(self):
2261 2272 '''the type of shared repository (None if not shared)'''
2262 2273 if self.sharedpath != self.path:
2263 2274 return b'store'
2264 2275 return None
2265 2276
2266 2277 def wjoin(self, f: bytes, *insidef: bytes) -> bytes:
2267 2278 return self.vfs.reljoin(self.root, f, *insidef)
2268 2279
2269 2280 def setparents(self, p1, p2=None):
2270 2281 if p2 is None:
2271 2282 p2 = self.nullid
2272 2283 self[None].setparents(p1, p2)
2273 2284 self._quick_access_changeid_invalidate()
2274 2285
2275 2286 def filectx(self, path: bytes, changeid=None, fileid=None, changectx=None):
2276 2287 """changeid must be a changeset revision, if specified.
2277 2288 fileid can be a file revision or node."""
2278 2289 return context.filectx(
2279 2290 self, path, changeid, fileid, changectx=changectx
2280 2291 )
2281 2292
2282 2293 def getcwd(self) -> bytes:
2283 2294 return self.dirstate.getcwd()
2284 2295
2285 2296 def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
2286 2297 return self.dirstate.pathto(f, cwd)
2287 2298
2288 2299 def _loadfilter(self, filter):
2289 2300 if filter not in self._filterpats:
2290 2301 l = []
2291 2302 for pat, cmd in self.ui.configitems(filter):
2292 2303 if cmd == b'!':
2293 2304 continue
2294 2305 mf = matchmod.match(self.root, b'', [pat])
2295 2306 fn = None
2296 2307 params = cmd
2297 2308 for name, filterfn in self._datafilters.items():
2298 2309 if cmd.startswith(name):
2299 2310 fn = filterfn
2300 2311 params = cmd[len(name) :].lstrip()
2301 2312 break
2302 2313 if not fn:
2303 2314 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2304 2315 fn.__name__ = 'commandfilter'
2305 2316 # Wrap old filters not supporting keyword arguments
2306 2317 if not pycompat.getargspec(fn)[2]:
2307 2318 oldfn = fn
2308 2319 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2309 2320 fn.__name__ = 'compat-' + oldfn.__name__
2310 2321 l.append((mf, fn, params))
2311 2322 self._filterpats[filter] = l
2312 2323 return self._filterpats[filter]
2313 2324
2314 2325 def _filter(self, filterpats, filename, data):
2315 2326 for mf, fn, cmd in filterpats:
2316 2327 if mf(filename):
2317 2328 self.ui.debug(
2318 2329 b"filtering %s through %s\n"
2319 2330 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2320 2331 )
2321 2332 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2322 2333 break
2323 2334
2324 2335 return data
2325 2336
2326 2337 @unfilteredpropertycache
2327 2338 def _encodefilterpats(self):
2328 2339 return self._loadfilter(b'encode')
2329 2340
2330 2341 @unfilteredpropertycache
2331 2342 def _decodefilterpats(self):
2332 2343 return self._loadfilter(b'decode')
2333 2344
2334 2345 def adddatafilter(self, name, filter):
2335 2346 self._datafilters[name] = filter
2336 2347
2337 2348 def wread(self, filename: bytes) -> bytes:
2338 2349 if self.wvfs.islink(filename):
2339 2350 data = self.wvfs.readlink(filename)
2340 2351 else:
2341 2352 data = self.wvfs.read(filename)
2342 2353 return self._filter(self._encodefilterpats, filename, data)
2343 2354
2344 2355 def wwrite(
2345 2356 self,
2346 2357 filename: bytes,
2347 2358 data: bytes,
2348 2359 flags: bytes,
2349 2360 backgroundclose=False,
2350 2361 **kwargs
2351 2362 ) -> int:
2352 2363 """write ``data`` into ``filename`` in the working directory
2353 2364
2354 2365 This returns length of written (maybe decoded) data.
2355 2366 """
2356 2367 data = self._filter(self._decodefilterpats, filename, data)
2357 2368 if b'l' in flags:
2358 2369 self.wvfs.symlink(data, filename)
2359 2370 else:
2360 2371 self.wvfs.write(
2361 2372 filename, data, backgroundclose=backgroundclose, **kwargs
2362 2373 )
2363 2374 if b'x' in flags:
2364 2375 self.wvfs.setflags(filename, False, True)
2365 2376 else:
2366 2377 self.wvfs.setflags(filename, False, False)
2367 2378 return len(data)
2368 2379
2369 2380 def wwritedata(self, filename: bytes, data: bytes) -> bytes:
2370 2381 return self._filter(self._decodefilterpats, filename, data)
2371 2382
2372 2383 def currenttransaction(self):
2373 2384 """return the current transaction or None if non exists"""
2374 2385 if self._transref:
2375 2386 tr = self._transref()
2376 2387 else:
2377 2388 tr = None
2378 2389
2379 2390 if tr and tr.running():
2380 2391 return tr
2381 2392 return None
2382 2393
2383 2394 def transaction(self, desc, report=None):
2384 2395 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2385 2396 b'devel', b'check-locks'
2386 2397 ):
2387 2398 if self._currentlock(self._lockref) is None:
2388 2399 raise error.ProgrammingError(b'transaction requires locking')
2389 2400 tr = self.currenttransaction()
2390 2401 if tr is not None:
2391 2402 return tr.nest(name=desc)
2392 2403
2393 2404 # abort here if the journal already exists
2394 2405 if self.svfs.exists(b"journal"):
2395 2406 raise error.RepoError(
2396 2407 _(b"abandoned transaction found"),
2397 2408 hint=_(b"run 'hg recover' to clean up transaction"),
2398 2409 )
2399 2410
2400 2411 # At that point your dirstate should be clean:
2401 2412 #
2402 2413 # - If you don't have the wlock, why would you still have a dirty
2403 2414 # dirstate ?
2404 2415 #
2405 2416 # - If you hold the wlock, you should not be opening a transaction in
2406 2417 # the middle of a `distate.changing_*` block. The transaction needs to
2407 2418 # be open before that and wrap the change-context.
2408 2419 #
2409 2420 # - If you are not within a `dirstate.changing_*` context, why is our
2410 2421 # dirstate dirty?
2411 2422 if self.dirstate._dirty:
2412 2423 m = "cannot open a transaction with a dirty dirstate"
2413 2424 raise error.ProgrammingError(m)
2414 2425
2415 2426 idbase = b"%.40f#%f" % (random.random(), time.time())
2416 2427 ha = hex(hashutil.sha1(idbase).digest())
2417 2428 txnid = b'TXN:' + ha
2418 2429 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2419 2430
2420 2431 self._writejournal(desc)
2421 2432 if report:
2422 2433 rp = report
2423 2434 else:
2424 2435 rp = self.ui.warn
2425 2436 vfsmap = self.vfs_map
2426 2437 # we must avoid cyclic reference between repo and transaction.
2427 2438 reporef = weakref.ref(self)
2428 2439 # Code to track tag movement
2429 2440 #
2430 2441 # Since tags are all handled as file content, it is actually quite hard
2431 2442 # to track these movement from a code perspective. So we fallback to a
2432 2443 # tracking at the repository level. One could envision to track changes
2433 2444 # to the '.hgtags' file through changegroup apply but that fails to
2434 2445 # cope with case where transaction expose new heads without changegroup
2435 2446 # being involved (eg: phase movement).
2436 2447 #
2437 2448 # For now, We gate the feature behind a flag since this likely comes
2438 2449 # with performance impacts. The current code run more often than needed
2439 2450 # and do not use caches as much as it could. The current focus is on
2440 2451 # the behavior of the feature so we disable it by default. The flag
2441 2452 # will be removed when we are happy with the performance impact.
2442 2453 #
2443 2454 # Once this feature is no longer experimental move the following
2444 2455 # documentation to the appropriate help section:
2445 2456 #
2446 2457 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2447 2458 # tags (new or changed or deleted tags). In addition the details of
2448 2459 # these changes are made available in a file at:
2449 2460 # ``REPOROOT/.hg/changes/tags.changes``.
2450 2461 # Make sure you check for HG_TAG_MOVED before reading that file as it
2451 2462 # might exist from a previous transaction even if no tag were touched
2452 2463 # in this one. Changes are recorded in a line base format::
2453 2464 #
2454 2465 # <action> <hex-node> <tag-name>\n
2455 2466 #
2456 2467 # Actions are defined as follow:
2457 2468 # "-R": tag is removed,
2458 2469 # "+A": tag is added,
2459 2470 # "-M": tag is moved (old value),
2460 2471 # "+M": tag is moved (new value),
2461 2472 tracktags = lambda x: None
2462 2473 # experimental config: experimental.hook-track-tags
2463 2474 shouldtracktags = self.ui.configbool(
2464 2475 b'experimental', b'hook-track-tags'
2465 2476 )
2466 2477 if desc != b'strip' and shouldtracktags:
2467 2478 oldheads = self.changelog.headrevs()
2468 2479
2469 2480 def tracktags(tr2):
2470 2481 repo = reporef()
2471 2482 assert repo is not None # help pytype
2472 2483 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2473 2484 newheads = repo.changelog.headrevs()
2474 2485 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2475 2486 # notes: we compare lists here.
2476 2487 # As we do it only once buiding set would not be cheaper
2477 2488 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2478 2489 if changes:
2479 2490 tr2.hookargs[b'tag_moved'] = b'1'
2480 2491 with repo.vfs(
2481 2492 b'changes/tags.changes', b'w', atomictemp=True
2482 2493 ) as changesfile:
2483 2494 # note: we do not register the file to the transaction
2484 2495 # because we needs it to still exist on the transaction
2485 2496 # is close (for txnclose hooks)
2486 2497 tagsmod.writediff(changesfile, changes)
2487 2498
2488 2499 def validate(tr2):
2489 2500 """will run pre-closing hooks"""
2490 2501 # XXX the transaction API is a bit lacking here so we take a hacky
2491 2502 # path for now
2492 2503 #
2493 2504 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2494 2505 # dict is copied before these run. In addition we needs the data
2495 2506 # available to in memory hooks too.
2496 2507 #
2497 2508 # Moreover, we also need to make sure this runs before txnclose
2498 2509 # hooks and there is no "pending" mechanism that would execute
2499 2510 # logic only if hooks are about to run.
2500 2511 #
2501 2512 # Fixing this limitation of the transaction is also needed to track
2502 2513 # other families of changes (bookmarks, phases, obsolescence).
2503 2514 #
2504 2515 # This will have to be fixed before we remove the experimental
2505 2516 # gating.
2506 2517 tracktags(tr2)
2507 2518 repo = reporef()
2508 2519 assert repo is not None # help pytype
2509 2520
2510 2521 singleheadopt = (b'experimental', b'single-head-per-branch')
2511 2522 singlehead = repo.ui.configbool(*singleheadopt)
2512 2523 if singlehead:
2513 2524 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2514 2525 accountclosed = singleheadsub.get(
2515 2526 b"account-closed-heads", False
2516 2527 )
2517 2528 if singleheadsub.get(b"public-changes-only", False):
2518 2529 filtername = b"immutable"
2519 2530 else:
2520 2531 filtername = b"visible"
2521 2532 scmutil.enforcesinglehead(
2522 2533 repo, tr2, desc, accountclosed, filtername
2523 2534 )
2524 2535 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2525 2536 for name, (old, new) in sorted(
2526 2537 tr.changes[b'bookmarks'].items()
2527 2538 ):
2528 2539 args = tr.hookargs.copy()
2529 2540 args.update(bookmarks.preparehookargs(name, old, new))
2530 2541 repo.hook(
2531 2542 b'pretxnclose-bookmark',
2532 2543 throw=True,
2533 2544 **pycompat.strkwargs(args)
2534 2545 )
2535 2546 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2536 2547 cl = repo.unfiltered().changelog
2537 2548 for revs, (old, new) in tr.changes[b'phases']:
2538 2549 for rev in revs:
2539 2550 args = tr.hookargs.copy()
2540 2551 node = hex(cl.node(rev))
2541 2552 args.update(phases.preparehookargs(node, old, new))
2542 2553 repo.hook(
2543 2554 b'pretxnclose-phase',
2544 2555 throw=True,
2545 2556 **pycompat.strkwargs(args)
2546 2557 )
2547 2558
2548 2559 repo.hook(
2549 2560 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2550 2561 )
2551 2562
2552 2563 def releasefn(tr, success):
2553 2564 repo = reporef()
2554 2565 if repo is None:
2555 2566 # If the repo has been GC'd (and this release function is being
2556 2567 # called from transaction.__del__), there's not much we can do,
2557 2568 # so just leave the unfinished transaction there and let the
2558 2569 # user run `hg recover`.
2559 2570 return
2560 2571 if success:
2561 2572 # this should be explicitly invoked here, because
2562 2573 # in-memory changes aren't written out at closing
2563 2574 # transaction, if tr.addfilegenerator (via
2564 2575 # dirstate.write or so) isn't invoked while
2565 2576 # transaction running
2566 2577 repo.dirstate.write(None)
2567 2578 else:
2568 2579 # discard all changes (including ones already written
2569 2580 # out) in this transaction
2570 2581 repo.invalidate(clearfilecache=True)
2571 2582
2572 2583 tr = transaction.transaction(
2573 2584 rp,
2574 2585 self.svfs,
2575 2586 vfsmap,
2576 2587 b"journal",
2577 2588 b"undo",
2578 2589 lambda: None,
2579 2590 self.store.createmode,
2580 2591 validator=validate,
2581 2592 releasefn=releasefn,
2582 2593 checkambigfiles=_cachedfiles,
2583 2594 name=desc,
2584 2595 )
2585 2596 for vfs_id, path in self._journalfiles():
2586 2597 tr.add_journal(vfs_id, path)
2587 2598 tr.changes[b'origrepolen'] = len(self)
2588 2599 tr.changes[b'obsmarkers'] = set()
2589 2600 tr.changes[b'phases'] = []
2590 2601 tr.changes[b'bookmarks'] = {}
2591 2602
2592 2603 tr.hookargs[b'txnid'] = txnid
2593 2604 tr.hookargs[b'txnname'] = desc
2594 2605 tr.hookargs[b'changes'] = tr.changes
2595 2606 # note: writing the fncache only during finalize mean that the file is
2596 2607 # outdated when running hooks. As fncache is used for streaming clone,
2597 2608 # this is not expected to break anything that happen during the hooks.
2598 2609 tr.addfinalize(b'flush-fncache', self.store.write)
2599 2610
2600 2611 def txnclosehook(tr2):
2601 2612 """To be run if transaction is successful, will schedule a hook run"""
2602 2613 # Don't reference tr2 in hook() so we don't hold a reference.
2603 2614 # This reduces memory consumption when there are multiple
2604 2615 # transactions per lock. This can likely go away if issue5045
2605 2616 # fixes the function accumulation.
2606 2617 hookargs = tr2.hookargs
2607 2618
2608 2619 def hookfunc(unused_success):
2609 2620 repo = reporef()
2610 2621 assert repo is not None # help pytype
2611 2622
2612 2623 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2613 2624 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2614 2625 for name, (old, new) in bmchanges:
2615 2626 args = tr.hookargs.copy()
2616 2627 args.update(bookmarks.preparehookargs(name, old, new))
2617 2628 repo.hook(
2618 2629 b'txnclose-bookmark',
2619 2630 throw=False,
2620 2631 **pycompat.strkwargs(args)
2621 2632 )
2622 2633
2623 2634 if hook.hashook(repo.ui, b'txnclose-phase'):
2624 2635 cl = repo.unfiltered().changelog
2625 2636 phasemv = sorted(
2626 2637 tr.changes[b'phases'], key=lambda r: r[0][0]
2627 2638 )
2628 2639 for revs, (old, new) in phasemv:
2629 2640 for rev in revs:
2630 2641 args = tr.hookargs.copy()
2631 2642 node = hex(cl.node(rev))
2632 2643 args.update(phases.preparehookargs(node, old, new))
2633 2644 repo.hook(
2634 2645 b'txnclose-phase',
2635 2646 throw=False,
2636 2647 **pycompat.strkwargs(args)
2637 2648 )
2638 2649
2639 2650 repo.hook(
2640 2651 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2641 2652 )
2642 2653
2643 2654 repo = reporef()
2644 2655 assert repo is not None # help pytype
2645 2656 repo._afterlock(hookfunc)
2646 2657
2647 2658 tr.addfinalize(b'txnclose-hook', txnclosehook)
2648 2659 # Include a leading "-" to make it happen before the transaction summary
2649 2660 # reports registered via scmutil.registersummarycallback() whose names
2650 2661 # are 00-txnreport etc. That way, the caches will be warm when the
2651 2662 # callbacks run.
2652 2663 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2653 2664
2654 2665 def txnaborthook(tr2):
2655 2666 """To be run if transaction is aborted"""
2656 2667 repo = reporef()
2657 2668 assert repo is not None # help pytype
2658 2669 repo.hook(
2659 2670 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2660 2671 )
2661 2672
2662 2673 tr.addabort(b'txnabort-hook', txnaborthook)
2663 2674 # avoid eager cache invalidation. in-memory data should be identical
2664 2675 # to stored data if transaction has no error.
2665 2676 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2666 2677 self._transref = weakref.ref(tr)
2667 2678 scmutil.registersummarycallback(self, tr, desc)
2668 2679 # This only exist to deal with the need of rollback to have viable
2669 2680 # parents at the end of the operation. So backup viable parents at the
2670 2681 # time of this operation.
2671 2682 #
2672 2683 # We only do it when the `wlock` is taken, otherwise other might be
2673 2684 # altering the dirstate under us.
2674 2685 #
2675 2686 # This is really not a great way to do this (first, because we cannot
2676 2687 # always do it). There are more viable alternative that exists
2677 2688 #
2678 2689 # - backing only the working copy parent in a dedicated files and doing
2679 2690 # a clean "keep-update" to them on `hg rollback`.
2680 2691 #
2681 2692 # - slightly changing the behavior an applying a logic similar to "hg
2682 2693 # strip" to pick a working copy destination on `hg rollback`
2683 2694 if self.currentwlock() is not None:
2684 2695 ds = self.dirstate
2685 2696 if not self.vfs.exists(b'branch'):
2686 2697 # force a file to be written if None exist
2687 2698 ds.setbranch(b'default', None)
2688 2699
2689 2700 def backup_dirstate(tr):
2690 2701 for f in ds.all_file_names():
2691 2702 # hardlink backup is okay because `dirstate` is always
2692 2703 # atomically written and possible data file are append only
2693 2704 # and resistant to trailing data.
2694 2705 tr.addbackup(f, hardlink=True, location=b'plain')
2695 2706
2696 2707 tr.addvalidator(b'dirstate-backup', backup_dirstate)
2697 2708 return tr
2698 2709
2699 2710 def _journalfiles(self):
2700 2711 return (
2701 2712 (self.svfs, b'journal'),
2702 2713 (self.vfs, b'journal.desc'),
2703 2714 )
2704 2715
2705 2716 def undofiles(self):
2706 2717 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2707 2718
2708 2719 @unfilteredmethod
2709 2720 def _writejournal(self, desc):
2710 2721 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2711 2722
2712 2723 def recover(self):
2713 2724 with self.lock():
2714 2725 if self.svfs.exists(b"journal"):
2715 2726 self.ui.status(_(b"rolling back interrupted transaction\n"))
2716 2727 vfsmap = self.vfs_map
2717 2728 transaction.rollback(
2718 2729 self.svfs,
2719 2730 vfsmap,
2720 2731 b"journal",
2721 2732 self.ui.warn,
2722 2733 checkambigfiles=_cachedfiles,
2723 2734 )
2724 2735 self.invalidate()
2725 2736 return True
2726 2737 else:
2727 2738 self.ui.warn(_(b"no interrupted transaction available\n"))
2728 2739 return False
2729 2740
2730 2741 def rollback(self, dryrun=False, force=False):
2731 2742 wlock = lock = None
2732 2743 try:
2733 2744 wlock = self.wlock()
2734 2745 lock = self.lock()
2735 2746 if self.svfs.exists(b"undo"):
2736 2747 return self._rollback(dryrun, force)
2737 2748 else:
2738 2749 self.ui.warn(_(b"no rollback information available\n"))
2739 2750 return 1
2740 2751 finally:
2741 2752 release(lock, wlock)
2742 2753
2743 2754 @unfilteredmethod # Until we get smarter cache management
2744 2755 def _rollback(self, dryrun, force):
2745 2756 ui = self.ui
2746 2757
2747 2758 parents = self.dirstate.parents()
2748 2759 try:
2749 2760 args = self.vfs.read(b'undo.desc').splitlines()
2750 2761 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2751 2762 if len(args) >= 3:
2752 2763 detail = args[2]
2753 2764 oldtip = oldlen - 1
2754 2765
2755 2766 if detail and ui.verbose:
2756 2767 msg = _(
2757 2768 b'repository tip rolled back to revision %d'
2758 2769 b' (undo %s: %s)\n'
2759 2770 ) % (oldtip, desc, detail)
2760 2771 else:
2761 2772 msg = _(
2762 2773 b'repository tip rolled back to revision %d (undo %s)\n'
2763 2774 ) % (oldtip, desc)
2764 2775 parentgone = any(self[p].rev() > oldtip for p in parents)
2765 2776 except IOError:
2766 2777 msg = _(b'rolling back unknown transaction\n')
2767 2778 desc = None
2768 2779 parentgone = True
2769 2780
2770 2781 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2771 2782 raise error.Abort(
2772 2783 _(
2773 2784 b'rollback of last commit while not checked out '
2774 2785 b'may lose data'
2775 2786 ),
2776 2787 hint=_(b'use -f to force'),
2777 2788 )
2778 2789
2779 2790 ui.status(msg)
2780 2791 if dryrun:
2781 2792 return 0
2782 2793
2783 2794 self.destroying()
2784 2795 vfsmap = self.vfs_map
2785 2796 skip_journal_pattern = None
2786 2797 if not parentgone:
2787 2798 skip_journal_pattern = RE_SKIP_DIRSTATE_ROLLBACK
2788 2799 transaction.rollback(
2789 2800 self.svfs,
2790 2801 vfsmap,
2791 2802 b'undo',
2792 2803 ui.warn,
2793 2804 checkambigfiles=_cachedfiles,
2794 2805 skip_journal_pattern=skip_journal_pattern,
2795 2806 )
2796 2807 self.invalidate()
2797 2808 self.dirstate.invalidate()
2798 2809
2799 2810 if parentgone:
2800 2811 # replace this with some explicit parent update in the future.
2801 2812 has_node = self.changelog.index.has_node
2802 2813 if not all(has_node(p) for p in self.dirstate._pl):
2803 2814 # There was no dirstate to backup initially, we need to drop
2804 2815 # the existing one.
2805 2816 with self.dirstate.changing_parents(self):
2806 2817 self.dirstate.setparents(self.nullid)
2807 2818 self.dirstate.clear()
2808 2819
2809 2820 parents = tuple([p.rev() for p in self[None].parents()])
2810 2821 if len(parents) > 1:
2811 2822 ui.status(
2812 2823 _(
2813 2824 b'working directory now based on '
2814 2825 b'revisions %d and %d\n'
2815 2826 )
2816 2827 % parents
2817 2828 )
2818 2829 else:
2819 2830 ui.status(
2820 2831 _(b'working directory now based on revision %d\n') % parents
2821 2832 )
2822 2833 mergestatemod.mergestate.clean(self)
2823 2834
2824 2835 # TODO: if we know which new heads may result from this rollback, pass
2825 2836 # them to destroy(), which will prevent the branchhead cache from being
2826 2837 # invalidated.
2827 2838 self.destroyed()
2828 2839 return 0
2829 2840
2830 2841 def _buildcacheupdater(self, newtransaction):
2831 2842 """called during transaction to build the callback updating cache
2832 2843
2833 2844 Lives on the repository to help extension who might want to augment
2834 2845 this logic. For this purpose, the created transaction is passed to the
2835 2846 method.
2836 2847 """
2837 2848 # we must avoid cyclic reference between repo and transaction.
2838 2849 reporef = weakref.ref(self)
2839 2850
2840 2851 def updater(tr):
2841 2852 repo = reporef()
2842 2853 assert repo is not None # help pytype
2843 2854 repo.updatecaches(tr)
2844 2855
2845 2856 return updater
2846 2857
2847 2858 @unfilteredmethod
2848 2859 def updatecaches(self, tr=None, full=False, caches=None):
2849 2860 """warm appropriate caches
2850 2861
2851 2862 If this function is called after a transaction closed. The transaction
2852 2863 will be available in the 'tr' argument. This can be used to selectively
2853 2864 update caches relevant to the changes in that transaction.
2854 2865
2855 2866 If 'full' is set, make sure all caches the function knows about have
2856 2867 up-to-date data. Even the ones usually loaded more lazily.
2857 2868
2858 2869 The `full` argument can take a special "post-clone" value. In this case
2859 2870 the cache warming is made after a clone and of the slower cache might
2860 2871 be skipped, namely the `.fnodetags` one. This argument is 5.8 specific
2861 2872 as we plan for a cleaner way to deal with this for 5.9.
2862 2873 """
2863 2874 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2864 2875 # During strip, many caches are invalid but
2865 2876 # later call to `destroyed` will refresh them.
2866 2877 return
2867 2878
2868 2879 unfi = self.unfiltered()
2869 2880
2870 2881 if full:
2871 2882 msg = (
2872 2883 "`full` argument for `repo.updatecaches` is deprecated\n"
2873 2884 "(use `caches=repository.CACHE_ALL` instead)"
2874 2885 )
2875 2886 self.ui.deprecwarn(msg, b"5.9")
2876 2887 caches = repository.CACHES_ALL
2877 2888 if full == b"post-clone":
2878 2889 caches = repository.CACHES_POST_CLONE
2879 2890 caches = repository.CACHES_ALL
2880 2891 elif caches is None:
2881 2892 caches = repository.CACHES_DEFAULT
2882 2893
2883 2894 if repository.CACHE_BRANCHMAP_SERVED in caches:
2884 2895 if tr is None or tr.changes[b'origrepolen'] < len(self):
2885 2896 # accessing the 'served' branchmap should refresh all the others,
2886 2897 self.ui.debug(b'updating the branch cache\n')
2887 2898 self.filtered(b'served').branchmap()
2888 2899 self.filtered(b'served.hidden').branchmap()
2889 2900 # flush all possibly delayed write.
2890 2901 self._branchcaches.write_delayed(self)
2891 2902
2892 2903 if repository.CACHE_CHANGELOG_CACHE in caches:
2893 2904 self.changelog.update_caches(transaction=tr)
2894 2905
2895 2906 if repository.CACHE_MANIFESTLOG_CACHE in caches:
2896 2907 self.manifestlog.update_caches(transaction=tr)
2897 2908
2898 2909 if repository.CACHE_REV_BRANCH in caches:
2899 2910 rbc = unfi.revbranchcache()
2900 2911 for r in unfi.changelog:
2901 2912 rbc.branchinfo(r)
2902 2913 rbc.write()
2903 2914
2904 2915 if repository.CACHE_FULL_MANIFEST in caches:
2905 2916 # ensure the working copy parents are in the manifestfulltextcache
2906 2917 for ctx in self[b'.'].parents():
2907 2918 ctx.manifest() # accessing the manifest is enough
2908 2919
2909 2920 if repository.CACHE_FILE_NODE_TAGS in caches:
2910 2921 # accessing fnode cache warms the cache
2911 2922 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2912 2923
2913 2924 if repository.CACHE_TAGS_DEFAULT in caches:
2914 2925 # accessing tags warm the cache
2915 2926 self.tags()
2916 2927 if repository.CACHE_TAGS_SERVED in caches:
2917 2928 self.filtered(b'served').tags()
2918 2929
2919 2930 if repository.CACHE_BRANCHMAP_ALL in caches:
2920 2931 # The CACHE_BRANCHMAP_ALL updates lazily-loaded caches immediately,
2921 2932 # so we're forcing a write to cause these caches to be warmed up
2922 2933 # even if they haven't explicitly been requested yet (if they've
2923 2934 # never been used by hg, they won't ever have been written, even if
2924 2935 # they're a subset of another kind of cache that *has* been used).
2925 2936 for filt in repoview.filtertable.keys():
2926 2937 filtered = self.filtered(filt)
2927 2938 filtered.branchmap().write(filtered)
2928 2939
2929 2940 def invalidatecaches(self):
2930 2941 if '_tagscache' in vars(self):
2931 2942 # can't use delattr on proxy
2932 2943 del self.__dict__['_tagscache']
2933 2944
2934 2945 self._branchcaches.clear()
2935 2946 self.invalidatevolatilesets()
2936 2947 self._sparsesignaturecache.clear()
2937 2948
2938 2949 def invalidatevolatilesets(self):
2939 2950 self.filteredrevcache.clear()
2940 2951 obsolete.clearobscaches(self)
2941 2952 self._quick_access_changeid_invalidate()
2942 2953
2943 2954 def invalidatedirstate(self):
2944 2955 """Invalidates the dirstate, causing the next call to dirstate
2945 2956 to check if it was modified since the last time it was read,
2946 2957 rereading it if it has.
2947 2958
2948 2959 This is different to dirstate.invalidate() that it doesn't always
2949 2960 rereads the dirstate. Use dirstate.invalidate() if you want to
2950 2961 explicitly read the dirstate again (i.e. restoring it to a previous
2951 2962 known good state)."""
2952 2963 unfi = self.unfiltered()
2953 2964 if 'dirstate' in unfi.__dict__:
2954 2965 assert not self.dirstate.is_changing_any
2955 2966 del unfi.__dict__['dirstate']
2956 2967
2957 2968 def invalidate(self, clearfilecache=False):
2958 2969 """Invalidates both store and non-store parts other than dirstate
2959 2970
2960 2971 If a transaction is running, invalidation of store is omitted,
2961 2972 because discarding in-memory changes might cause inconsistency
2962 2973 (e.g. incomplete fncache causes unintentional failure, but
2963 2974 redundant one doesn't).
2964 2975 """
2965 2976 unfiltered = self.unfiltered() # all file caches are stored unfiltered
2966 2977 for k in list(self._filecache.keys()):
2967 2978 if (
2968 2979 k == b'changelog'
2969 2980 and self.currenttransaction()
2970 2981 and self.changelog._delayed
2971 2982 ):
2972 2983 # The changelog object may store unwritten revisions. We don't
2973 2984 # want to lose them.
2974 2985 # TODO: Solve the problem instead of working around it.
2975 2986 continue
2976 2987
2977 2988 if clearfilecache:
2978 2989 del self._filecache[k]
2979 2990 try:
2980 2991 delattr(unfiltered, k)
2981 2992 except AttributeError:
2982 2993 pass
2983 2994 self.invalidatecaches()
2984 2995 if not self.currenttransaction():
2985 2996 # TODO: Changing contents of store outside transaction
2986 2997 # causes inconsistency. We should make in-memory store
2987 2998 # changes detectable, and abort if changed.
2988 2999 self.store.invalidatecaches()
2989 3000
2990 3001 def invalidateall(self):
2991 3002 """Fully invalidates both store and non-store parts, causing the
2992 3003 subsequent operation to reread any outside changes."""
2993 3004 # extension should hook this to invalidate its caches
2994 3005 self.invalidate()
2995 3006 self.invalidatedirstate()
2996 3007
2997 3008 @unfilteredmethod
2998 3009 def _refreshfilecachestats(self, tr):
2999 3010 """Reload stats of cached files so that they are flagged as valid"""
3000 3011 for k, ce in self._filecache.items():
3001 3012 k = pycompat.sysstr(k)
3002 3013 if k == 'dirstate' or k not in self.__dict__:
3003 3014 continue
3004 3015 ce.refresh()
3005 3016
3006 3017 def _lock(
3007 3018 self,
3008 3019 vfs,
3009 3020 lockname,
3010 3021 wait,
3011 3022 releasefn,
3012 3023 acquirefn,
3013 3024 desc,
3014 3025 ):
3015 3026 timeout = 0
3016 3027 warntimeout = 0
3017 3028 if wait:
3018 3029 timeout = self.ui.configint(b"ui", b"timeout")
3019 3030 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
3020 3031 # internal config: ui.signal-safe-lock
3021 3032 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
3022 3033
3023 3034 l = lockmod.trylock(
3024 3035 self.ui,
3025 3036 vfs,
3026 3037 lockname,
3027 3038 timeout,
3028 3039 warntimeout,
3029 3040 releasefn=releasefn,
3030 3041 acquirefn=acquirefn,
3031 3042 desc=desc,
3032 3043 signalsafe=signalsafe,
3033 3044 )
3034 3045 return l
3035 3046
3036 3047 def _afterlock(self, callback):
3037 3048 """add a callback to be run when the repository is fully unlocked
3038 3049
3039 3050 The callback will be executed when the outermost lock is released
3040 3051 (with wlock being higher level than 'lock')."""
3041 3052 for ref in (self._wlockref, self._lockref):
3042 3053 l = ref and ref()
3043 3054 if l and l.held:
3044 3055 l.postrelease.append(callback)
3045 3056 break
3046 3057 else: # no lock have been found.
3047 3058 callback(True)
3048 3059
3049 3060 def lock(self, wait=True):
3050 3061 """Lock the repository store (.hg/store) and return a weak reference
3051 3062 to the lock. Use this before modifying the store (e.g. committing or
3052 3063 stripping). If you are opening a transaction, get a lock as well.)
3053 3064
3054 3065 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3055 3066 'wlock' first to avoid a dead-lock hazard."""
3056 3067 l = self._currentlock(self._lockref)
3057 3068 if l is not None:
3058 3069 l.lock()
3059 3070 return l
3060 3071
3061 3072 l = self._lock(
3062 3073 vfs=self.svfs,
3063 3074 lockname=b"lock",
3064 3075 wait=wait,
3065 3076 releasefn=None,
3066 3077 acquirefn=self.invalidate,
3067 3078 desc=_(b'repository %s') % self.origroot,
3068 3079 )
3069 3080 self._lockref = weakref.ref(l)
3070 3081 return l
3071 3082
3072 3083 def wlock(self, wait=True):
3073 3084 """Lock the non-store parts of the repository (everything under
3074 3085 .hg except .hg/store) and return a weak reference to the lock.
3075 3086
3076 3087 Use this before modifying files in .hg.
3077 3088
3078 3089 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3079 3090 'wlock' first to avoid a dead-lock hazard."""
3080 3091 l = self._wlockref() if self._wlockref else None
3081 3092 if l is not None and l.held:
3082 3093 l.lock()
3083 3094 return l
3084 3095
3085 3096 # We do not need to check for non-waiting lock acquisition. Such
3086 3097 # acquisition would not cause dead-lock as they would just fail.
3087 3098 if wait and (
3088 3099 self.ui.configbool(b'devel', b'all-warnings')
3089 3100 or self.ui.configbool(b'devel', b'check-locks')
3090 3101 ):
3091 3102 if self._currentlock(self._lockref) is not None:
3092 3103 self.ui.develwarn(b'"wlock" acquired after "lock"')
3093 3104
3094 3105 def unlock():
3095 3106 if self.dirstate.is_changing_any:
3096 3107 msg = b"wlock release in the middle of a changing parents"
3097 3108 self.ui.develwarn(msg)
3098 3109 self.dirstate.invalidate()
3099 3110 else:
3100 3111 if self.dirstate._dirty:
3101 3112 msg = b"dirty dirstate on wlock release"
3102 3113 self.ui.develwarn(msg)
3103 3114 self.dirstate.write(None)
3104 3115
3105 3116 unfi = self.unfiltered()
3106 3117 if 'dirstate' in unfi.__dict__:
3107 3118 del unfi.__dict__['dirstate']
3108 3119
3109 3120 l = self._lock(
3110 3121 self.vfs,
3111 3122 b"wlock",
3112 3123 wait,
3113 3124 unlock,
3114 3125 self.invalidatedirstate,
3115 3126 _(b'working directory of %s') % self.origroot,
3116 3127 )
3117 3128 self._wlockref = weakref.ref(l)
3118 3129 return l
3119 3130
3120 3131 def _currentlock(self, lockref):
3121 3132 """Returns the lock if it's held, or None if it's not."""
3122 3133 if lockref is None:
3123 3134 return None
3124 3135 l = lockref()
3125 3136 if l is None or not l.held:
3126 3137 return None
3127 3138 return l
3128 3139
3129 3140 def currentwlock(self):
3130 3141 """Returns the wlock if it's held, or None if it's not."""
3131 3142 return self._currentlock(self._wlockref)
3132 3143
3133 3144 def checkcommitpatterns(self, wctx, match, status, fail):
3134 3145 """check for commit arguments that aren't committable"""
3135 3146 if match.isexact() or match.prefix():
3136 3147 matched = set(status.modified + status.added + status.removed)
3137 3148
3138 3149 for f in match.files():
3139 3150 f = self.dirstate.normalize(f)
3140 3151 if f == b'.' or f in matched or f in wctx.substate:
3141 3152 continue
3142 3153 if f in status.deleted:
3143 3154 fail(f, _(b'file not found!'))
3144 3155 # Is it a directory that exists or used to exist?
3145 3156 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
3146 3157 d = f + b'/'
3147 3158 for mf in matched:
3148 3159 if mf.startswith(d):
3149 3160 break
3150 3161 else:
3151 3162 fail(f, _(b"no match under directory!"))
3152 3163 elif f not in self.dirstate:
3153 3164 fail(f, _(b"file not tracked!"))
3154 3165
3155 3166 @unfilteredmethod
3156 3167 def commit(
3157 3168 self,
3158 3169 text=b"",
3159 3170 user=None,
3160 3171 date=None,
3161 3172 match=None,
3162 3173 force=False,
3163 3174 editor=None,
3164 3175 extra=None,
3165 3176 ):
3166 3177 """Add a new revision to current repository.
3167 3178
3168 3179 Revision information is gathered from the working directory,
3169 3180 match can be used to filter the committed files. If editor is
3170 3181 supplied, it is called to get a commit message.
3171 3182 """
3172 3183 if extra is None:
3173 3184 extra = {}
3174 3185
3175 3186 def fail(f, msg):
3176 3187 raise error.InputError(b'%s: %s' % (f, msg))
3177 3188
3178 3189 if not match:
3179 3190 match = matchmod.always()
3180 3191
3181 3192 if not force:
3182 3193 match.bad = fail
3183 3194
3184 3195 # lock() for recent changelog (see issue4368)
3185 3196 with self.wlock(), self.lock():
3186 3197 wctx = self[None]
3187 3198 merge = len(wctx.parents()) > 1
3188 3199
3189 3200 if not force and merge and not match.always():
3190 3201 raise error.Abort(
3191 3202 _(
3192 3203 b'cannot partially commit a merge '
3193 3204 b'(do not specify files or patterns)'
3194 3205 )
3195 3206 )
3196 3207
3197 3208 status = self.status(match=match, clean=force)
3198 3209 if force:
3199 3210 status.modified.extend(
3200 3211 status.clean
3201 3212 ) # mq may commit clean files
3202 3213
3203 3214 # check subrepos
3204 3215 subs, commitsubs, newstate = subrepoutil.precommit(
3205 3216 self.ui, wctx, status, match, force=force
3206 3217 )
3207 3218
3208 3219 # make sure all explicit patterns are matched
3209 3220 if not force:
3210 3221 self.checkcommitpatterns(wctx, match, status, fail)
3211 3222
3212 3223 cctx = context.workingcommitctx(
3213 3224 self, status, text, user, date, extra
3214 3225 )
3215 3226
3216 3227 ms = mergestatemod.mergestate.read(self)
3217 3228 mergeutil.checkunresolved(ms)
3218 3229
3219 3230 # internal config: ui.allowemptycommit
3220 3231 if cctx.isempty() and not self.ui.configbool(
3221 3232 b'ui', b'allowemptycommit'
3222 3233 ):
3223 3234 self.ui.debug(b'nothing to commit, clearing merge state\n')
3224 3235 ms.reset()
3225 3236 return None
3226 3237
3227 3238 if merge and cctx.deleted():
3228 3239 raise error.Abort(_(b"cannot commit merge with missing files"))
3229 3240
3230 3241 if editor:
3231 3242 cctx._text = editor(self, cctx, subs)
3232 3243 edited = text != cctx._text
3233 3244
3234 3245 # Save commit message in case this transaction gets rolled back
3235 3246 # (e.g. by a pretxncommit hook). Leave the content alone on
3236 3247 # the assumption that the user will use the same editor again.
3237 3248 msg_path = self.savecommitmessage(cctx._text)
3238 3249
3239 3250 # commit subs and write new state
3240 3251 if subs:
3241 3252 uipathfn = scmutil.getuipathfn(self)
3242 3253 for s in sorted(commitsubs):
3243 3254 sub = wctx.sub(s)
3244 3255 self.ui.status(
3245 3256 _(b'committing subrepository %s\n')
3246 3257 % uipathfn(subrepoutil.subrelpath(sub))
3247 3258 )
3248 3259 sr = sub.commit(cctx._text, user, date)
3249 3260 newstate[s] = (newstate[s][0], sr)
3250 3261 subrepoutil.writestate(self, newstate)
3251 3262
3252 3263 p1, p2 = self.dirstate.parents()
3253 3264 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3254 3265 try:
3255 3266 self.hook(
3256 3267 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3257 3268 )
3258 3269 with self.transaction(b'commit'):
3259 3270 ret = self.commitctx(cctx, True)
3260 3271 # update bookmarks, dirstate and mergestate
3261 3272 bookmarks.update(self, [p1, p2], ret)
3262 3273 cctx.markcommitted(ret)
3263 3274 ms.reset()
3264 3275 except: # re-raises
3265 3276 if edited:
3266 3277 self.ui.write(
3267 3278 _(b'note: commit message saved in %s\n') % msg_path
3268 3279 )
3269 3280 self.ui.write(
3270 3281 _(
3271 3282 b"note: use 'hg commit --logfile "
3272 3283 b"%s --edit' to reuse it\n"
3273 3284 )
3274 3285 % msg_path
3275 3286 )
3276 3287 raise
3277 3288
3278 3289 def commithook(unused_success):
3279 3290 # hack for command that use a temporary commit (eg: histedit)
3280 3291 # temporary commit got stripped before hook release
3281 3292 if self.changelog.hasnode(ret):
3282 3293 self.hook(
3283 3294 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3284 3295 )
3285 3296
3286 3297 self._afterlock(commithook)
3287 3298 return ret
3288 3299
3289 3300 @unfilteredmethod
3290 3301 def commitctx(self, ctx, error=False, origctx=None):
3291 3302 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3292 3303
3293 3304 @unfilteredmethod
3294 3305 def destroying(self):
3295 3306 """Inform the repository that nodes are about to be destroyed.
3296 3307 Intended for use by strip and rollback, so there's a common
3297 3308 place for anything that has to be done before destroying history.
3298 3309
3299 3310 This is mostly useful for saving state that is in memory and waiting
3300 3311 to be flushed when the current lock is released. Because a call to
3301 3312 destroyed is imminent, the repo will be invalidated causing those
3302 3313 changes to stay in memory (waiting for the next unlock), or vanish
3303 3314 completely.
3304 3315 """
3305 3316 # When using the same lock to commit and strip, the phasecache is left
3306 3317 # dirty after committing. Then when we strip, the repo is invalidated,
3307 3318 # causing those changes to disappear.
3308 3319 if '_phasecache' in vars(self):
3309 3320 self._phasecache.write()
3310 3321
3311 3322 @unfilteredmethod
3312 3323 def destroyed(self):
3313 3324 """Inform the repository that nodes have been destroyed.
3314 3325 Intended for use by strip and rollback, so there's a common
3315 3326 place for anything that has to be done after destroying history.
3316 3327 """
3317 3328 # When one tries to:
3318 3329 # 1) destroy nodes thus calling this method (e.g. strip)
3319 3330 # 2) use phasecache somewhere (e.g. commit)
3320 3331 #
3321 3332 # then 2) will fail because the phasecache contains nodes that were
3322 3333 # removed. We can either remove phasecache from the filecache,
3323 3334 # causing it to reload next time it is accessed, or simply filter
3324 3335 # the removed nodes now and write the updated cache.
3325 3336 self._phasecache.filterunknown(self)
3326 3337 self._phasecache.write()
3327 3338
3328 3339 # refresh all repository caches
3329 3340 self.updatecaches()
3330 3341
3331 3342 # Ensure the persistent tag cache is updated. Doing it now
3332 3343 # means that the tag cache only has to worry about destroyed
3333 3344 # heads immediately after a strip/rollback. That in turn
3334 3345 # guarantees that "cachetip == currenttip" (comparing both rev
3335 3346 # and node) always means no nodes have been added or destroyed.
3336 3347
3337 3348 # XXX this is suboptimal when qrefresh'ing: we strip the current
3338 3349 # head, refresh the tag cache, then immediately add a new head.
3339 3350 # But I think doing it this way is necessary for the "instant
3340 3351 # tag cache retrieval" case to work.
3341 3352 self.invalidate()
3342 3353
3343 3354 def status(
3344 3355 self,
3345 3356 node1=b'.',
3346 3357 node2=None,
3347 3358 match=None,
3348 3359 ignored=False,
3349 3360 clean=False,
3350 3361 unknown=False,
3351 3362 listsubrepos=False,
3352 3363 ):
3353 3364 '''a convenience method that calls node1.status(node2)'''
3354 3365 return self[node1].status(
3355 3366 node2, match, ignored, clean, unknown, listsubrepos
3356 3367 )
3357 3368
3358 3369 def addpostdsstatus(self, ps):
3359 3370 """Add a callback to run within the wlock, at the point at which status
3360 3371 fixups happen.
3361 3372
3362 3373 On status completion, callback(wctx, status) will be called with the
3363 3374 wlock held, unless the dirstate has changed from underneath or the wlock
3364 3375 couldn't be grabbed.
3365 3376
3366 3377 Callbacks should not capture and use a cached copy of the dirstate --
3367 3378 it might change in the meanwhile. Instead, they should access the
3368 3379 dirstate via wctx.repo().dirstate.
3369 3380
3370 3381 This list is emptied out after each status run -- extensions should
3371 3382 make sure it adds to this list each time dirstate.status is called.
3372 3383 Extensions should also make sure they don't call this for statuses
3373 3384 that don't involve the dirstate.
3374 3385 """
3375 3386
3376 3387 # The list is located here for uniqueness reasons -- it is actually
3377 3388 # managed by the workingctx, but that isn't unique per-repo.
3378 3389 self._postdsstatus.append(ps)
3379 3390
3380 3391 def postdsstatus(self):
3381 3392 """Used by workingctx to get the list of post-dirstate-status hooks."""
3382 3393 return self._postdsstatus
3383 3394
3384 3395 def clearpostdsstatus(self):
3385 3396 """Used by workingctx to clear post-dirstate-status hooks."""
3386 3397 del self._postdsstatus[:]
3387 3398
3388 3399 def heads(self, start=None):
3389 3400 if start is None:
3390 3401 cl = self.changelog
3391 3402 headrevs = reversed(cl.headrevs())
3392 3403 return [cl.node(rev) for rev in headrevs]
3393 3404
3394 3405 heads = self.changelog.heads(start)
3395 3406 # sort the output in rev descending order
3396 3407 return sorted(heads, key=self.changelog.rev, reverse=True)
3397 3408
3398 3409 def branchheads(self, branch=None, start=None, closed=False):
3399 3410 """return a (possibly filtered) list of heads for the given branch
3400 3411
3401 3412 Heads are returned in topological order, from newest to oldest.
3402 3413 If branch is None, use the dirstate branch.
3403 3414 If start is not None, return only heads reachable from start.
3404 3415 If closed is True, return heads that are marked as closed as well.
3405 3416 """
3406 3417 if branch is None:
3407 3418 branch = self[None].branch()
3408 3419 branches = self.branchmap()
3409 3420 if not branches.hasbranch(branch):
3410 3421 return []
3411 3422 # the cache returns heads ordered lowest to highest
3412 3423 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3413 3424 if start is not None:
3414 3425 # filter out the heads that cannot be reached from startrev
3415 3426 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3416 3427 bheads = [h for h in bheads if h in fbheads]
3417 3428 return bheads
3418 3429
3419 3430 def branches(self, nodes):
3420 3431 if not nodes:
3421 3432 nodes = [self.changelog.tip()]
3422 3433 b = []
3423 3434 for n in nodes:
3424 3435 t = n
3425 3436 while True:
3426 3437 p = self.changelog.parents(n)
3427 3438 if p[1] != self.nullid or p[0] == self.nullid:
3428 3439 b.append((t, n, p[0], p[1]))
3429 3440 break
3430 3441 n = p[0]
3431 3442 return b
3432 3443
3433 3444 def between(self, pairs):
3434 3445 r = []
3435 3446
3436 3447 for top, bottom in pairs:
3437 3448 n, l, i = top, [], 0
3438 3449 f = 1
3439 3450
3440 3451 while n != bottom and n != self.nullid:
3441 3452 p = self.changelog.parents(n)[0]
3442 3453 if i == f:
3443 3454 l.append(n)
3444 3455 f = f * 2
3445 3456 n = p
3446 3457 i += 1
3447 3458
3448 3459 r.append(l)
3449 3460
3450 3461 return r
3451 3462
3452 3463 def checkpush(self, pushop):
3453 3464 """Extensions can override this function if additional checks have
3454 3465 to be performed before pushing, or call it if they override push
3455 3466 command.
3456 3467 """
3457 3468
3458 3469 @unfilteredpropertycache
3459 3470 def prepushoutgoinghooks(self):
3460 3471 """Return util.hooks consists of a pushop with repo, remote, outgoing
3461 3472 methods, which are called before pushing changesets.
3462 3473 """
3463 3474 return util.hooks()
3464 3475
3465 3476 def pushkey(self, namespace, key, old, new):
3466 3477 try:
3467 3478 tr = self.currenttransaction()
3468 3479 hookargs = {}
3469 3480 if tr is not None:
3470 3481 hookargs.update(tr.hookargs)
3471 3482 hookargs = pycompat.strkwargs(hookargs)
3472 3483 hookargs['namespace'] = namespace
3473 3484 hookargs['key'] = key
3474 3485 hookargs['old'] = old
3475 3486 hookargs['new'] = new
3476 3487 self.hook(b'prepushkey', throw=True, **hookargs)
3477 3488 except error.HookAbort as exc:
3478 3489 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3479 3490 if exc.hint:
3480 3491 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3481 3492 return False
3482 3493 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3483 3494 ret = pushkey.push(self, namespace, key, old, new)
3484 3495
3485 3496 def runhook(unused_success):
3486 3497 self.hook(
3487 3498 b'pushkey',
3488 3499 namespace=namespace,
3489 3500 key=key,
3490 3501 old=old,
3491 3502 new=new,
3492 3503 ret=ret,
3493 3504 )
3494 3505
3495 3506 self._afterlock(runhook)
3496 3507 return ret
3497 3508
3498 3509 def listkeys(self, namespace):
3499 3510 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3500 3511 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3501 3512 values = pushkey.list(self, namespace)
3502 3513 self.hook(b'listkeys', namespace=namespace, values=values)
3503 3514 return values
3504 3515
3505 3516 def debugwireargs(self, one, two, three=None, four=None, five=None):
3506 3517 '''used to test argument passing over the wire'''
3507 3518 return b"%s %s %s %s %s" % (
3508 3519 one,
3509 3520 two,
3510 3521 pycompat.bytestr(three),
3511 3522 pycompat.bytestr(four),
3512 3523 pycompat.bytestr(five),
3513 3524 )
3514 3525
3515 3526 def savecommitmessage(self, text):
3516 3527 fp = self.vfs(b'last-message.txt', b'wb')
3517 3528 try:
3518 3529 fp.write(text)
3519 3530 finally:
3520 3531 fp.close()
3521 3532 return self.pathto(fp.name[len(self.root) + 1 :])
3522 3533
3523 3534 def register_wanted_sidedata(self, category):
3524 3535 if repository.REPO_FEATURE_SIDE_DATA not in self.features:
3525 3536 # Only revlogv2 repos can want sidedata.
3526 3537 return
3527 3538 self._wanted_sidedata.add(pycompat.bytestr(category))
3528 3539
3529 3540 def register_sidedata_computer(
3530 3541 self, kind, category, keys, computer, flags, replace=False
3531 3542 ):
3532 3543 if kind not in revlogconst.ALL_KINDS:
3533 3544 msg = _(b"unexpected revlog kind '%s'.")
3534 3545 raise error.ProgrammingError(msg % kind)
3535 3546 category = pycompat.bytestr(category)
3536 3547 already_registered = category in self._sidedata_computers.get(kind, [])
3537 3548 if already_registered and not replace:
3538 3549 msg = _(
3539 3550 b"cannot register a sidedata computer twice for category '%s'."
3540 3551 )
3541 3552 raise error.ProgrammingError(msg % category)
3542 3553 if replace and not already_registered:
3543 3554 msg = _(
3544 3555 b"cannot replace a sidedata computer that isn't registered "
3545 3556 b"for category '%s'."
3546 3557 )
3547 3558 raise error.ProgrammingError(msg % category)
3548 3559 self._sidedata_computers.setdefault(kind, {})
3549 3560 self._sidedata_computers[kind][category] = (keys, computer, flags)
3550 3561
3551 3562
3552 3563 def undoname(fn: bytes) -> bytes:
3553 3564 base, name = os.path.split(fn)
3554 3565 assert name.startswith(b'journal')
3555 3566 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3556 3567
3557 3568
3558 3569 def instance(ui, path: bytes, create, intents=None, createopts=None):
3559 3570 # prevent cyclic import localrepo -> upgrade -> localrepo
3560 3571 from . import upgrade
3561 3572
3562 3573 localpath = urlutil.urllocalpath(path)
3563 3574 if create:
3564 3575 createrepository(ui, localpath, createopts=createopts)
3565 3576
3566 3577 def repo_maker():
3567 3578 return makelocalrepository(ui, localpath, intents=intents)
3568 3579
3569 3580 repo = repo_maker()
3570 3581 repo = upgrade.may_auto_upgrade(repo, repo_maker)
3571 3582 return repo
3572 3583
3573 3584
3574 3585 def islocal(path: bytes) -> bool:
3575 3586 return True
3576 3587
3577 3588
3578 3589 def defaultcreateopts(ui, createopts=None):
3579 3590 """Populate the default creation options for a repository.
3580 3591
3581 3592 A dictionary of explicitly requested creation options can be passed
3582 3593 in. Missing keys will be populated.
3583 3594 """
3584 3595 createopts = dict(createopts or {})
3585 3596
3586 3597 if b'backend' not in createopts:
3587 3598 # experimental config: storage.new-repo-backend
3588 3599 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3589 3600
3590 3601 return createopts
3591 3602
3592 3603
3593 3604 def clone_requirements(ui, createopts, srcrepo):
3594 3605 """clone the requirements of a local repo for a local clone
3595 3606
3596 3607 The store requirements are unchanged while the working copy requirements
3597 3608 depends on the configuration
3598 3609 """
3599 3610 target_requirements = set()
3600 3611 if not srcrepo.requirements:
3601 3612 # this is a legacy revlog "v0" repository, we cannot do anything fancy
3602 3613 # with it.
3603 3614 return target_requirements
3604 3615 createopts = defaultcreateopts(ui, createopts=createopts)
3605 3616 for r in newreporequirements(ui, createopts):
3606 3617 if r in requirementsmod.WORKING_DIR_REQUIREMENTS:
3607 3618 target_requirements.add(r)
3608 3619
3609 3620 for r in srcrepo.requirements:
3610 3621 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS:
3611 3622 target_requirements.add(r)
3612 3623 return target_requirements
3613 3624
3614 3625
3615 3626 def newreporequirements(ui, createopts):
3616 3627 """Determine the set of requirements for a new local repository.
3617 3628
3618 3629 Extensions can wrap this function to specify custom requirements for
3619 3630 new repositories.
3620 3631 """
3621 3632
3622 3633 if b'backend' not in createopts:
3623 3634 raise error.ProgrammingError(
3624 3635 b'backend key not present in createopts; '
3625 3636 b'was defaultcreateopts() called?'
3626 3637 )
3627 3638
3628 3639 if createopts[b'backend'] != b'revlogv1':
3629 3640 raise error.Abort(
3630 3641 _(
3631 3642 b'unable to determine repository requirements for '
3632 3643 b'storage backend: %s'
3633 3644 )
3634 3645 % createopts[b'backend']
3635 3646 )
3636 3647
3637 3648 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3638 3649 if ui.configbool(b'format', b'usestore'):
3639 3650 requirements.add(requirementsmod.STORE_REQUIREMENT)
3640 3651 if ui.configbool(b'format', b'usefncache'):
3641 3652 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3642 3653 if ui.configbool(b'format', b'dotencode'):
3643 3654 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3644 3655
3645 3656 compengines = ui.configlist(b'format', b'revlog-compression')
3646 3657 for compengine in compengines:
3647 3658 if compengine in util.compengines:
3648 3659 engine = util.compengines[compengine]
3649 3660 if engine.available() and engine.revlogheader():
3650 3661 break
3651 3662 else:
3652 3663 raise error.Abort(
3653 3664 _(
3654 3665 b'compression engines %s defined by '
3655 3666 b'format.revlog-compression not available'
3656 3667 )
3657 3668 % b', '.join(b'"%s"' % e for e in compengines),
3658 3669 hint=_(
3659 3670 b'run "hg debuginstall" to list available '
3660 3671 b'compression engines'
3661 3672 ),
3662 3673 )
3663 3674
3664 3675 # zlib is the historical default and doesn't need an explicit requirement.
3665 3676 if compengine == b'zstd':
3666 3677 requirements.add(b'revlog-compression-zstd')
3667 3678 elif compengine != b'zlib':
3668 3679 requirements.add(b'exp-compression-%s' % compengine)
3669 3680
3670 3681 if scmutil.gdinitconfig(ui):
3671 3682 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3672 3683 if ui.configbool(b'format', b'sparse-revlog'):
3673 3684 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3674 3685
3675 3686 # experimental config: format.use-dirstate-v2
3676 3687 # Keep this logic in sync with `has_dirstate_v2()` in `tests/hghave.py`
3677 3688 if ui.configbool(b'format', b'use-dirstate-v2'):
3678 3689 requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT)
3679 3690
3680 3691 # experimental config: format.exp-use-copies-side-data-changeset
3681 3692 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3682 3693 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3683 3694 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3684 3695 if ui.configbool(b'experimental', b'treemanifest'):
3685 3696 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3686 3697
3687 3698 changelogv2 = ui.config(b'format', b'exp-use-changelog-v2')
3688 3699 if changelogv2 == b'enable-unstable-format-and-corrupt-my-data':
3689 3700 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3690 3701
3691 3702 revlogv2 = ui.config(b'experimental', b'revlogv2')
3692 3703 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3693 3704 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3694 3705 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3695 3706 # experimental config: format.internal-phase
3696 3707 if ui.configbool(b'format', b'use-internal-phase'):
3697 3708 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3698 3709
3699 3710 # experimental config: format.exp-archived-phase
3700 3711 if ui.configbool(b'format', b'exp-archived-phase'):
3701 3712 requirements.add(requirementsmod.ARCHIVED_PHASE_REQUIREMENT)
3702 3713
3703 3714 if createopts.get(b'narrowfiles'):
3704 3715 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3705 3716
3706 3717 if createopts.get(b'lfs'):
3707 3718 requirements.add(b'lfs')
3708 3719
3709 3720 if ui.configbool(b'format', b'bookmarks-in-store'):
3710 3721 requirements.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3711 3722
3712 3723 if ui.configbool(b'format', b'use-persistent-nodemap'):
3713 3724 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3714 3725
3715 3726 # if share-safe is enabled, let's create the new repository with the new
3716 3727 # requirement
3717 3728 if ui.configbool(b'format', b'use-share-safe'):
3718 3729 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3719 3730
3720 3731 # if we are creating a share-repoΒΉ we have to handle requirement
3721 3732 # differently.
3722 3733 #
3723 3734 # [1] (i.e. reusing the store from another repository, just having a
3724 3735 # working copy)
3725 3736 if b'sharedrepo' in createopts:
3726 3737 source_requirements = set(createopts[b'sharedrepo'].requirements)
3727 3738
3728 3739 if requirementsmod.SHARESAFE_REQUIREMENT not in source_requirements:
3729 3740 # share to an old school repository, we have to copy the
3730 3741 # requirements and hope for the best.
3731 3742 requirements = source_requirements
3732 3743 else:
3733 3744 # We have control on the working copy only, so "copy" the non
3734 3745 # working copy part over, ignoring previous logic.
3735 3746 to_drop = set()
3736 3747 for req in requirements:
3737 3748 if req in requirementsmod.WORKING_DIR_REQUIREMENTS:
3738 3749 continue
3739 3750 if req in source_requirements:
3740 3751 continue
3741 3752 to_drop.add(req)
3742 3753 requirements -= to_drop
3743 3754 requirements |= source_requirements
3744 3755
3745 3756 if createopts.get(b'sharedrelative'):
3746 3757 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3747 3758 else:
3748 3759 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3749 3760
3750 3761 if ui.configbool(b'format', b'use-dirstate-tracked-hint'):
3751 3762 version = ui.configint(b'format', b'use-dirstate-tracked-hint.version')
3752 3763 msg = _(b"ignoring unknown tracked key version: %d\n")
3753 3764 hint = _(
3754 3765 b"see `hg help config.format.use-dirstate-tracked-hint-version"
3755 3766 )
3756 3767 if version != 1:
3757 3768 ui.warn(msg % version, hint=hint)
3758 3769 else:
3759 3770 requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
3760 3771
3761 3772 return requirements
3762 3773
3763 3774
3764 3775 def checkrequirementscompat(ui, requirements):
3765 3776 """Checks compatibility of repository requirements enabled and disabled.
3766 3777
3767 3778 Returns a set of requirements which needs to be dropped because dependend
3768 3779 requirements are not enabled. Also warns users about it"""
3769 3780
3770 3781 dropped = set()
3771 3782
3772 3783 if requirementsmod.STORE_REQUIREMENT not in requirements:
3773 3784 if requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3774 3785 ui.warn(
3775 3786 _(
3776 3787 b'ignoring enabled \'format.bookmarks-in-store\' config '
3777 3788 b'beacuse it is incompatible with disabled '
3778 3789 b'\'format.usestore\' config\n'
3779 3790 )
3780 3791 )
3781 3792 dropped.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3782 3793
3783 3794 if (
3784 3795 requirementsmod.SHARED_REQUIREMENT in requirements
3785 3796 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3786 3797 ):
3787 3798 raise error.Abort(
3788 3799 _(
3789 3800 b"cannot create shared repository as source was created"
3790 3801 b" with 'format.usestore' config disabled"
3791 3802 )
3792 3803 )
3793 3804
3794 3805 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3795 3806 if ui.hasconfig(b'format', b'use-share-safe'):
3796 3807 msg = _(
3797 3808 b"ignoring enabled 'format.use-share-safe' config because "
3798 3809 b"it is incompatible with disabled 'format.usestore'"
3799 3810 b" config\n"
3800 3811 )
3801 3812 ui.warn(msg)
3802 3813 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3803 3814
3804 3815 return dropped
3805 3816
3806 3817
3807 3818 def filterknowncreateopts(ui, createopts):
3808 3819 """Filters a dict of repo creation options against options that are known.
3809 3820
3810 3821 Receives a dict of repo creation options and returns a dict of those
3811 3822 options that we don't know how to handle.
3812 3823
3813 3824 This function is called as part of repository creation. If the
3814 3825 returned dict contains any items, repository creation will not
3815 3826 be allowed, as it means there was a request to create a repository
3816 3827 with options not recognized by loaded code.
3817 3828
3818 3829 Extensions can wrap this function to filter out creation options
3819 3830 they know how to handle.
3820 3831 """
3821 3832 known = {
3822 3833 b'backend',
3823 3834 b'lfs',
3824 3835 b'narrowfiles',
3825 3836 b'sharedrepo',
3826 3837 b'sharedrelative',
3827 3838 b'shareditems',
3828 3839 b'shallowfilestore',
3829 3840 }
3830 3841
3831 3842 return {k: v for k, v in createopts.items() if k not in known}
3832 3843
3833 3844
3834 3845 def createrepository(ui, path: bytes, createopts=None, requirements=None):
3835 3846 """Create a new repository in a vfs.
3836 3847
3837 3848 ``path`` path to the new repo's working directory.
3838 3849 ``createopts`` options for the new repository.
3839 3850 ``requirement`` predefined set of requirements.
3840 3851 (incompatible with ``createopts``)
3841 3852
3842 3853 The following keys for ``createopts`` are recognized:
3843 3854
3844 3855 backend
3845 3856 The storage backend to use.
3846 3857 lfs
3847 3858 Repository will be created with ``lfs`` requirement. The lfs extension
3848 3859 will automatically be loaded when the repository is accessed.
3849 3860 narrowfiles
3850 3861 Set up repository to support narrow file storage.
3851 3862 sharedrepo
3852 3863 Repository object from which storage should be shared.
3853 3864 sharedrelative
3854 3865 Boolean indicating if the path to the shared repo should be
3855 3866 stored as relative. By default, the pointer to the "parent" repo
3856 3867 is stored as an absolute path.
3857 3868 shareditems
3858 3869 Set of items to share to the new repository (in addition to storage).
3859 3870 shallowfilestore
3860 3871 Indicates that storage for files should be shallow (not all ancestor
3861 3872 revisions are known).
3862 3873 """
3863 3874
3864 3875 if requirements is not None:
3865 3876 if createopts is not None:
3866 3877 msg = b'cannot specify both createopts and requirements'
3867 3878 raise error.ProgrammingError(msg)
3868 3879 createopts = {}
3869 3880 else:
3870 3881 createopts = defaultcreateopts(ui, createopts=createopts)
3871 3882
3872 3883 unknownopts = filterknowncreateopts(ui, createopts)
3873 3884
3874 3885 if not isinstance(unknownopts, dict):
3875 3886 raise error.ProgrammingError(
3876 3887 b'filterknowncreateopts() did not return a dict'
3877 3888 )
3878 3889
3879 3890 if unknownopts:
3880 3891 raise error.Abort(
3881 3892 _(
3882 3893 b'unable to create repository because of unknown '
3883 3894 b'creation option: %s'
3884 3895 )
3885 3896 % b', '.join(sorted(unknownopts)),
3886 3897 hint=_(b'is a required extension not loaded?'),
3887 3898 )
3888 3899
3889 3900 requirements = newreporequirements(ui, createopts=createopts)
3890 3901 requirements -= checkrequirementscompat(ui, requirements)
3891 3902
3892 3903 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3893 3904
3894 3905 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3895 3906 if hgvfs.exists():
3896 3907 raise error.RepoError(_(b'repository %s already exists') % path)
3897 3908
3898 3909 if b'sharedrepo' in createopts:
3899 3910 sharedpath = createopts[b'sharedrepo'].sharedpath
3900 3911
3901 3912 if createopts.get(b'sharedrelative'):
3902 3913 try:
3903 3914 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3904 3915 sharedpath = util.pconvert(sharedpath)
3905 3916 except (IOError, ValueError) as e:
3906 3917 # ValueError is raised on Windows if the drive letters differ
3907 3918 # on each path.
3908 3919 raise error.Abort(
3909 3920 _(b'cannot calculate relative path'),
3910 3921 hint=stringutil.forcebytestr(e),
3911 3922 )
3912 3923
3913 3924 if not wdirvfs.exists():
3914 3925 wdirvfs.makedirs()
3915 3926
3916 3927 hgvfs.makedir(notindexed=True)
3917 3928 if b'sharedrepo' not in createopts:
3918 3929 hgvfs.mkdir(b'cache')
3919 3930 hgvfs.mkdir(b'wcache')
3920 3931
3921 3932 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3922 3933 if has_store and b'sharedrepo' not in createopts:
3923 3934 hgvfs.mkdir(b'store')
3924 3935
3925 3936 # We create an invalid changelog outside the store so very old
3926 3937 # Mercurial versions (which didn't know about the requirements
3927 3938 # file) encounter an error on reading the changelog. This
3928 3939 # effectively locks out old clients and prevents them from
3929 3940 # mucking with a repo in an unknown format.
3930 3941 #
3931 3942 # The revlog header has version 65535, which won't be recognized by
3932 3943 # such old clients.
3933 3944 hgvfs.append(
3934 3945 b'00changelog.i',
3935 3946 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3936 3947 b'layout',
3937 3948 )
3938 3949
3939 3950 # Filter the requirements into working copy and store ones
3940 3951 wcreq, storereq = scmutil.filterrequirements(requirements)
3941 3952 # write working copy ones
3942 3953 scmutil.writerequires(hgvfs, wcreq)
3943 3954 # If there are store requirements and the current repository
3944 3955 # is not a shared one, write stored requirements
3945 3956 # For new shared repository, we don't need to write the store
3946 3957 # requirements as they are already present in store requires
3947 3958 if storereq and b'sharedrepo' not in createopts:
3948 3959 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
3949 3960 scmutil.writerequires(storevfs, storereq)
3950 3961
3951 3962 # Write out file telling readers where to find the shared store.
3952 3963 if b'sharedrepo' in createopts:
3953 3964 hgvfs.write(b'sharedpath', sharedpath)
3954 3965
3955 3966 if createopts.get(b'shareditems'):
3956 3967 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
3957 3968 hgvfs.write(b'shared', shared)
3958 3969
3959 3970
3960 3971 def poisonrepository(repo):
3961 3972 """Poison a repository instance so it can no longer be used."""
3962 3973 # Perform any cleanup on the instance.
3963 3974 repo.close()
3964 3975
3965 3976 # Our strategy is to replace the type of the object with one that
3966 3977 # has all attribute lookups result in error.
3967 3978 #
3968 3979 # But we have to allow the close() method because some constructors
3969 3980 # of repos call close() on repo references.
3970 3981 class poisonedrepository:
3971 3982 def __getattribute__(self, item):
3972 3983 if item == 'close':
3973 3984 return object.__getattribute__(self, item)
3974 3985
3975 3986 raise error.ProgrammingError(
3976 3987 b'repo instances should not be used after unshare'
3977 3988 )
3978 3989
3979 3990 def close(self):
3980 3991 pass
3981 3992
3982 3993 # We may have a repoview, which intercepts __setattr__. So be sure
3983 3994 # we operate at the lowest level possible.
3984 3995 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,3385 +1,3399 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullrev,
29 29 sha1nodeconstants,
30 30 short,
31 31 wdirrev,
32 32 )
33 33 from .i18n import _
34 34 from .pycompat import getattr
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class revlog:
245 245 """
246 246 the underlying revision storage object
247 247
248 248 A revlog consists of two parts, an index and the revision data.
249 249
250 250 The index is a file with a fixed record size containing
251 251 information on each revision, including its nodeid (hash), the
252 252 nodeids of its parents, the position and offset of its data within
253 253 the data file, and the revision it's based on. Finally, each entry
254 254 contains a linkrev entry that can serve as a pointer to external
255 255 data.
256 256
257 257 The revision data itself is a linear collection of data chunks.
258 258 Each chunk represents a revision and is usually represented as a
259 259 delta against the previous chunk. To bound lookup time, runs of
260 260 deltas are limited to about 2 times the length of the original
261 261 version data. This makes retrieval of a version proportional to
262 262 its size, or O(1) relative to the number of revisions.
263 263
264 264 Both pieces of the revlog are written to in an append-only
265 265 fashion, which means we never need to rewrite a file to insert or
266 266 remove data, and can use some simple techniques to avoid the need
267 267 for locking while reading.
268 268
269 269 If checkambig, indexfile is opened with checkambig=True at
270 270 writing, to avoid file stat ambiguity.
271 271
272 272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 273 index will be mmapped rather than read if it is larger than the
274 274 configured threshold.
275 275
276 276 If censorable is True, the revlog can have censored revisions.
277 277
278 278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 279 compression for the data content.
280 280
281 281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 282 file handle, a filename, and an expected position. It should check whether
283 283 the current position in the file handle is valid, and log/warn/fail (by
284 284 raising).
285 285
286 286 See mercurial/revlogutils/contants.py for details about the content of an
287 287 index entry.
288 288 """
289 289
290 290 _flagserrorclass = error.RevlogError
291 291
292 292 def __init__(
293 293 self,
294 294 opener,
295 295 target,
296 296 radix,
297 297 postfix=None, # only exist for `tmpcensored` now
298 298 checkambig=False,
299 299 mmaplargeindex=False,
300 300 censorable=False,
301 301 upperboundcomp=None,
302 302 persistentnodemap=False,
303 303 concurrencychecker=None,
304 304 trypending=False,
305 try_split=False,
305 306 canonical_parent_order=True,
306 307 ):
307 308 """
308 309 create a revlog object
309 310
310 311 opener is a function that abstracts the file opening operation
311 312 and can be used to implement COW semantics or the like.
312 313
313 314 `target`: a (KIND, ID) tuple that identify the content stored in
314 315 this revlog. It help the rest of the code to understand what the revlog
315 316 is about without having to resort to heuristic and index filename
316 317 analysis. Note: that this must be reliably be set by normal code, but
317 318 that test, debug, or performance measurement code might not set this to
318 319 accurate value.
319 320 """
320 321 self.upperboundcomp = upperboundcomp
321 322
322 323 self.radix = radix
323 324
324 325 self._docket_file = None
325 326 self._indexfile = None
326 327 self._datafile = None
327 328 self._sidedatafile = None
328 329 self._nodemap_file = None
329 330 self.postfix = postfix
330 331 self._trypending = trypending
332 self._try_split = try_split
331 333 self.opener = opener
332 334 if persistentnodemap:
333 335 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334 336
335 337 assert target[0] in ALL_KINDS
336 338 assert len(target) == 2
337 339 self.target = target
338 340 # When True, indexfile is opened with checkambig=True at writing, to
339 341 # avoid file stat ambiguity.
340 342 self._checkambig = checkambig
341 343 self._mmaplargeindex = mmaplargeindex
342 344 self._censorable = censorable
343 345 # 3-tuple of (node, rev, text) for a raw revision.
344 346 self._revisioncache = None
345 347 # Maps rev to chain base rev.
346 348 self._chainbasecache = util.lrucachedict(100)
347 349 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 350 self._chunkcache = (0, b'')
349 351 # How much data to read and cache into the raw revlog data cache.
350 352 self._chunkcachesize = 65536
351 353 self._maxchainlen = None
352 354 self._deltabothparents = True
353 355 self._candidate_group_chunk_size = 0
354 356 self._debug_delta = False
355 357 self.index = None
356 358 self._docket = None
357 359 self._nodemap_docket = None
358 360 # Mapping of partial identifiers to full nodes.
359 361 self._pcache = {}
360 362 # Mapping of revision integer to full node.
361 363 self._compengine = b'zlib'
362 364 self._compengineopts = {}
363 365 self._maxdeltachainspan = -1
364 366 self._withsparseread = False
365 367 self._sparserevlog = False
366 368 self.hassidedata = False
367 369 self._srdensitythreshold = 0.50
368 370 self._srmingapsize = 262144
369 371
370 372 # other optionnals features
371 373
372 374 # might remove rank configuration once the computation has no impact
373 375 self._compute_rank = False
374 376
375 377 # Make copy of flag processors so each revlog instance can support
376 378 # custom flags.
377 379 self._flagprocessors = dict(flagutil.flagprocessors)
378 380
379 381 # 3-tuple of file handles being used for active writing.
380 382 self._writinghandles = None
381 383 # prevent nesting of addgroup
382 384 self._adding_group = None
383 385
384 386 self._loadindex()
385 387
386 388 self._concurrencychecker = concurrencychecker
387 389
388 390 # parent order is supposed to be semantically irrelevant, so we
389 391 # normally resort parents to ensure that the first parent is non-null,
390 392 # if there is a non-null parent at all.
391 393 # filelog abuses the parent order as flag to mark some instances of
392 394 # meta-encoded files, so allow it to disable this behavior.
393 395 self.canonical_parent_order = canonical_parent_order
394 396
395 397 def _init_opts(self):
396 398 """process options (from above/config) to setup associated default revlog mode
397 399
398 400 These values might be affected when actually reading on disk information.
399 401
400 402 The relevant values are returned for use in _loadindex().
401 403
402 404 * newversionflags:
403 405 version header to use if we need to create a new revlog
404 406
405 407 * mmapindexthreshold:
406 408 minimal index size for start to use mmap
407 409
408 410 * force_nodemap:
409 411 force the usage of a "development" version of the nodemap code
410 412 """
411 413 mmapindexthreshold = None
412 414 opts = self.opener.options
413 415
414 416 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
415 417 new_header = CHANGELOGV2
416 418 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
417 419 elif b'revlogv2' in opts:
418 420 new_header = REVLOGV2
419 421 elif b'revlogv1' in opts:
420 422 new_header = REVLOGV1 | FLAG_INLINE_DATA
421 423 if b'generaldelta' in opts:
422 424 new_header |= FLAG_GENERALDELTA
423 425 elif b'revlogv0' in self.opener.options:
424 426 new_header = REVLOGV0
425 427 else:
426 428 new_header = REVLOG_DEFAULT_VERSION
427 429
428 430 if b'chunkcachesize' in opts:
429 431 self._chunkcachesize = opts[b'chunkcachesize']
430 432 if b'maxchainlen' in opts:
431 433 self._maxchainlen = opts[b'maxchainlen']
432 434 if b'deltabothparents' in opts:
433 435 self._deltabothparents = opts[b'deltabothparents']
434 436 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
435 437 if dps_cgds:
436 438 self._candidate_group_chunk_size = dps_cgds
437 439 self._lazydelta = bool(opts.get(b'lazydelta', True))
438 440 self._lazydeltabase = False
439 441 if self._lazydelta:
440 442 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
441 443 if b'debug-delta' in opts:
442 444 self._debug_delta = opts[b'debug-delta']
443 445 if b'compengine' in opts:
444 446 self._compengine = opts[b'compengine']
445 447 if b'zlib.level' in opts:
446 448 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
447 449 if b'zstd.level' in opts:
448 450 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
449 451 if b'maxdeltachainspan' in opts:
450 452 self._maxdeltachainspan = opts[b'maxdeltachainspan']
451 453 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
452 454 mmapindexthreshold = opts[b'mmapindexthreshold']
453 455 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
454 456 withsparseread = bool(opts.get(b'with-sparse-read', False))
455 457 # sparse-revlog forces sparse-read
456 458 self._withsparseread = self._sparserevlog or withsparseread
457 459 if b'sparse-read-density-threshold' in opts:
458 460 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
459 461 if b'sparse-read-min-gap-size' in opts:
460 462 self._srmingapsize = opts[b'sparse-read-min-gap-size']
461 463 if opts.get(b'enableellipsis'):
462 464 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
463 465
464 466 # revlog v0 doesn't have flag processors
465 467 for flag, processor in opts.get(b'flagprocessors', {}).items():
466 468 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
467 469
468 470 if self._chunkcachesize <= 0:
469 471 raise error.RevlogError(
470 472 _(b'revlog chunk cache size %r is not greater than 0')
471 473 % self._chunkcachesize
472 474 )
473 475 elif self._chunkcachesize & (self._chunkcachesize - 1):
474 476 raise error.RevlogError(
475 477 _(b'revlog chunk cache size %r is not a power of 2')
476 478 % self._chunkcachesize
477 479 )
478 480 force_nodemap = opts.get(b'devel-force-nodemap', False)
479 481 return new_header, mmapindexthreshold, force_nodemap
480 482
481 483 def _get_data(self, filepath, mmap_threshold, size=None):
482 484 """return a file content with or without mmap
483 485
484 486 If the file is missing return the empty string"""
485 487 try:
486 488 with self.opener(filepath) as fp:
487 489 if mmap_threshold is not None:
488 490 file_size = self.opener.fstat(fp).st_size
489 491 if file_size >= mmap_threshold:
490 492 if size is not None:
491 493 # avoid potentiel mmap crash
492 494 size = min(file_size, size)
493 495 # TODO: should .close() to release resources without
494 496 # relying on Python GC
495 497 if size is None:
496 498 return util.buffer(util.mmapread(fp))
497 499 else:
498 500 return util.buffer(util.mmapread(fp, size))
499 501 if size is None:
500 502 return fp.read()
501 503 else:
502 504 return fp.read(size)
503 505 except FileNotFoundError:
504 506 return b''
505 507
506 508 def _loadindex(self, docket=None):
507 509
508 510 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
509 511
510 512 if self.postfix is not None:
511 513 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
512 514 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
513 515 entry_point = b'%s.i.a' % self.radix
516 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
517 entry_point = b'%s.i.s' % self.radix
514 518 else:
515 519 entry_point = b'%s.i' % self.radix
516 520
517 521 if docket is not None:
518 522 self._docket = docket
519 523 self._docket_file = entry_point
520 524 else:
521 525 self._initempty = True
522 526 entry_data = self._get_data(entry_point, mmapindexthreshold)
523 527 if len(entry_data) > 0:
524 528 header = INDEX_HEADER.unpack(entry_data[:4])[0]
525 529 self._initempty = False
526 530 else:
527 531 header = new_header
528 532
529 533 self._format_flags = header & ~0xFFFF
530 534 self._format_version = header & 0xFFFF
531 535
532 536 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
533 537 if supported_flags is None:
534 538 msg = _(b'unknown version (%d) in revlog %s')
535 539 msg %= (self._format_version, self.display_id)
536 540 raise error.RevlogError(msg)
537 541 elif self._format_flags & ~supported_flags:
538 542 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
539 543 display_flag = self._format_flags >> 16
540 544 msg %= (display_flag, self._format_version, self.display_id)
541 545 raise error.RevlogError(msg)
542 546
543 547 features = FEATURES_BY_VERSION[self._format_version]
544 548 self._inline = features[b'inline'](self._format_flags)
545 549 self._generaldelta = features[b'generaldelta'](self._format_flags)
546 550 self.hassidedata = features[b'sidedata']
547 551
548 552 if not features[b'docket']:
549 553 self._indexfile = entry_point
550 554 index_data = entry_data
551 555 else:
552 556 self._docket_file = entry_point
553 557 if self._initempty:
554 558 self._docket = docketutil.default_docket(self, header)
555 559 else:
556 560 self._docket = docketutil.parse_docket(
557 561 self, entry_data, use_pending=self._trypending
558 562 )
559 563
560 564 if self._docket is not None:
561 565 self._indexfile = self._docket.index_filepath()
562 566 index_data = b''
563 567 index_size = self._docket.index_end
564 568 if index_size > 0:
565 569 index_data = self._get_data(
566 570 self._indexfile, mmapindexthreshold, size=index_size
567 571 )
568 572 if len(index_data) < index_size:
569 573 msg = _(b'too few index data for %s: got %d, expected %d')
570 574 msg %= (self.display_id, len(index_data), index_size)
571 575 raise error.RevlogError(msg)
572 576
573 577 self._inline = False
574 578 # generaldelta implied by version 2 revlogs.
575 579 self._generaldelta = True
576 580 # the logic for persistent nodemap will be dealt with within the
577 581 # main docket, so disable it for now.
578 582 self._nodemap_file = None
579 583
580 584 if self._docket is not None:
581 585 self._datafile = self._docket.data_filepath()
582 586 self._sidedatafile = self._docket.sidedata_filepath()
583 587 elif self.postfix is None:
584 588 self._datafile = b'%s.d' % self.radix
585 589 else:
586 590 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
587 591
588 592 self.nodeconstants = sha1nodeconstants
589 593 self.nullid = self.nodeconstants.nullid
590 594
591 595 # sparse-revlog can't be on without general-delta (issue6056)
592 596 if not self._generaldelta:
593 597 self._sparserevlog = False
594 598
595 599 self._storedeltachains = True
596 600
597 601 devel_nodemap = (
598 602 self._nodemap_file
599 603 and force_nodemap
600 604 and parse_index_v1_nodemap is not None
601 605 )
602 606
603 607 use_rust_index = False
604 608 if rustrevlog is not None:
605 609 if self._nodemap_file is not None:
606 610 use_rust_index = True
607 611 else:
608 612 use_rust_index = self.opener.options.get(b'rust.index')
609 613
610 614 self._parse_index = parse_index_v1
611 615 if self._format_version == REVLOGV0:
612 616 self._parse_index = revlogv0.parse_index_v0
613 617 elif self._format_version == REVLOGV2:
614 618 self._parse_index = parse_index_v2
615 619 elif self._format_version == CHANGELOGV2:
616 620 self._parse_index = parse_index_cl_v2
617 621 elif devel_nodemap:
618 622 self._parse_index = parse_index_v1_nodemap
619 623 elif use_rust_index:
620 624 self._parse_index = parse_index_v1_mixed
621 625 try:
622 626 d = self._parse_index(index_data, self._inline)
623 627 index, chunkcache = d
624 628 use_nodemap = (
625 629 not self._inline
626 630 and self._nodemap_file is not None
627 631 and util.safehasattr(index, 'update_nodemap_data')
628 632 )
629 633 if use_nodemap:
630 634 nodemap_data = nodemaputil.persisted_data(self)
631 635 if nodemap_data is not None:
632 636 docket = nodemap_data[0]
633 637 if (
634 638 len(d[0]) > docket.tip_rev
635 639 and d[0][docket.tip_rev][7] == docket.tip_node
636 640 ):
637 641 # no changelog tampering
638 642 self._nodemap_docket = docket
639 643 index.update_nodemap_data(*nodemap_data)
640 644 except (ValueError, IndexError):
641 645 raise error.RevlogError(
642 646 _(b"index %s is corrupted") % self.display_id
643 647 )
644 648 self.index = index
645 649 self._segmentfile = randomaccessfile.randomaccessfile(
646 650 self.opener,
647 651 (self._indexfile if self._inline else self._datafile),
648 652 self._chunkcachesize,
649 653 chunkcache,
650 654 )
651 655 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
652 656 self.opener,
653 657 self._sidedatafile,
654 658 self._chunkcachesize,
655 659 )
656 660 # revnum -> (chain-length, sum-delta-length)
657 661 self._chaininfocache = util.lrucachedict(500)
658 662 # revlog header -> revlog compressor
659 663 self._decompressors = {}
660 664
661 665 @util.propertycache
662 666 def revlog_kind(self):
663 667 return self.target[0]
664 668
665 669 @util.propertycache
666 670 def display_id(self):
667 671 """The public facing "ID" of the revlog that we use in message"""
668 672 if self.revlog_kind == KIND_FILELOG:
669 673 # Reference the file without the "data/" prefix, so it is familiar
670 674 # to the user.
671 675 return self.target[1]
672 676 else:
673 677 return self.radix
674 678
675 679 def _get_decompressor(self, t):
676 680 try:
677 681 compressor = self._decompressors[t]
678 682 except KeyError:
679 683 try:
680 684 engine = util.compengines.forrevlogheader(t)
681 685 compressor = engine.revlogcompressor(self._compengineopts)
682 686 self._decompressors[t] = compressor
683 687 except KeyError:
684 688 raise error.RevlogError(
685 689 _(b'unknown compression type %s') % binascii.hexlify(t)
686 690 )
687 691 return compressor
688 692
689 693 @util.propertycache
690 694 def _compressor(self):
691 695 engine = util.compengines[self._compengine]
692 696 return engine.revlogcompressor(self._compengineopts)
693 697
694 698 @util.propertycache
695 699 def _decompressor(self):
696 700 """the default decompressor"""
697 701 if self._docket is None:
698 702 return None
699 703 t = self._docket.default_compression_header
700 704 c = self._get_decompressor(t)
701 705 return c.decompress
702 706
703 707 def _indexfp(self):
704 708 """file object for the revlog's index file"""
705 709 return self.opener(self._indexfile, mode=b"r")
706 710
707 711 def __index_write_fp(self):
708 712 # You should not use this directly and use `_writing` instead
709 713 try:
710 714 f = self.opener(
711 715 self._indexfile, mode=b"r+", checkambig=self._checkambig
712 716 )
713 717 if self._docket is None:
714 718 f.seek(0, os.SEEK_END)
715 719 else:
716 720 f.seek(self._docket.index_end, os.SEEK_SET)
717 721 return f
718 722 except FileNotFoundError:
719 723 return self.opener(
720 724 self._indexfile, mode=b"w+", checkambig=self._checkambig
721 725 )
722 726
723 727 def __index_new_fp(self):
724 728 # You should not use this unless you are upgrading from inline revlog
725 729 return self.opener(
726 730 self._indexfile,
727 731 mode=b"w",
728 732 checkambig=self._checkambig,
729 733 atomictemp=True,
730 734 )
731 735
732 736 def _datafp(self, mode=b'r'):
733 737 """file object for the revlog's data file"""
734 738 return self.opener(self._datafile, mode=mode)
735 739
736 740 @contextlib.contextmanager
737 741 def _sidedatareadfp(self):
738 742 """file object suitable to read sidedata"""
739 743 if self._writinghandles:
740 744 yield self._writinghandles[2]
741 745 else:
742 746 with self.opener(self._sidedatafile) as fp:
743 747 yield fp
744 748
745 749 def tiprev(self):
746 750 return len(self.index) - 1
747 751
748 752 def tip(self):
749 753 return self.node(self.tiprev())
750 754
751 755 def __contains__(self, rev):
752 756 return 0 <= rev < len(self)
753 757
754 758 def __len__(self):
755 759 return len(self.index)
756 760
757 761 def __iter__(self):
758 762 return iter(range(len(self)))
759 763
760 764 def revs(self, start=0, stop=None):
761 765 """iterate over all rev in this revlog (from start to stop)"""
762 766 return storageutil.iterrevs(len(self), start=start, stop=stop)
763 767
764 768 def hasnode(self, node):
765 769 try:
766 770 self.rev(node)
767 771 return True
768 772 except KeyError:
769 773 return False
770 774
771 775 def candelta(self, baserev, rev):
772 776 """whether two revisions (baserev, rev) can be delta-ed or not"""
773 777 # Disable delta if either rev requires a content-changing flag
774 778 # processor (ex. LFS). This is because such flag processor can alter
775 779 # the rawtext content that the delta will be based on, and two clients
776 780 # could have a same revlog node with different flags (i.e. different
777 781 # rawtext contents) and the delta could be incompatible.
778 782 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
779 783 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
780 784 ):
781 785 return False
782 786 return True
783 787
784 788 def update_caches(self, transaction):
785 789 if self._nodemap_file is not None:
786 790 if transaction is None:
787 791 nodemaputil.update_persistent_nodemap(self)
788 792 else:
789 793 nodemaputil.setup_persistent_nodemap(transaction, self)
790 794
791 795 def clearcaches(self):
792 796 self._revisioncache = None
793 797 self._chainbasecache.clear()
794 798 self._segmentfile.clear_cache()
795 799 self._segmentfile_sidedata.clear_cache()
796 800 self._pcache = {}
797 801 self._nodemap_docket = None
798 802 self.index.clearcaches()
799 803 # The python code is the one responsible for validating the docket, we
800 804 # end up having to refresh it here.
801 805 use_nodemap = (
802 806 not self._inline
803 807 and self._nodemap_file is not None
804 808 and util.safehasattr(self.index, 'update_nodemap_data')
805 809 )
806 810 if use_nodemap:
807 811 nodemap_data = nodemaputil.persisted_data(self)
808 812 if nodemap_data is not None:
809 813 self._nodemap_docket = nodemap_data[0]
810 814 self.index.update_nodemap_data(*nodemap_data)
811 815
812 816 def rev(self, node):
813 817 try:
814 818 return self.index.rev(node)
815 819 except TypeError:
816 820 raise
817 821 except error.RevlogError:
818 822 # parsers.c radix tree lookup failed
819 823 if (
820 824 node == self.nodeconstants.wdirid
821 825 or node in self.nodeconstants.wdirfilenodeids
822 826 ):
823 827 raise error.WdirUnsupported
824 828 raise error.LookupError(node, self.display_id, _(b'no node'))
825 829
826 830 # Accessors for index entries.
827 831
828 832 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
829 833 # are flags.
830 834 def start(self, rev):
831 835 return int(self.index[rev][0] >> 16)
832 836
833 837 def sidedata_cut_off(self, rev):
834 838 sd_cut_off = self.index[rev][8]
835 839 if sd_cut_off != 0:
836 840 return sd_cut_off
837 841 # This is some annoying dance, because entries without sidedata
838 842 # currently use 0 as their ofsset. (instead of previous-offset +
839 843 # previous-size)
840 844 #
841 845 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
842 846 # In the meantime, we need this.
843 847 while 0 <= rev:
844 848 e = self.index[rev]
845 849 if e[9] != 0:
846 850 return e[8] + e[9]
847 851 rev -= 1
848 852 return 0
849 853
850 854 def flags(self, rev):
851 855 return self.index[rev][0] & 0xFFFF
852 856
853 857 def length(self, rev):
854 858 return self.index[rev][1]
855 859
856 860 def sidedata_length(self, rev):
857 861 if not self.hassidedata:
858 862 return 0
859 863 return self.index[rev][9]
860 864
861 865 def rawsize(self, rev):
862 866 """return the length of the uncompressed text for a given revision"""
863 867 l = self.index[rev][2]
864 868 if l >= 0:
865 869 return l
866 870
867 871 t = self.rawdata(rev)
868 872 return len(t)
869 873
870 874 def size(self, rev):
871 875 """length of non-raw text (processed by a "read" flag processor)"""
872 876 # fast path: if no "read" flag processor could change the content,
873 877 # size is rawsize. note: ELLIPSIS is known to not change the content.
874 878 flags = self.flags(rev)
875 879 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
876 880 return self.rawsize(rev)
877 881
878 882 return len(self.revision(rev))
879 883
880 884 def fast_rank(self, rev):
881 885 """Return the rank of a revision if already known, or None otherwise.
882 886
883 887 The rank of a revision is the size of the sub-graph it defines as a
884 888 head. Equivalently, the rank of a revision `r` is the size of the set
885 889 `ancestors(r)`, `r` included.
886 890
887 891 This method returns the rank retrieved from the revlog in constant
888 892 time. It makes no attempt at computing unknown values for versions of
889 893 the revlog which do not persist the rank.
890 894 """
891 895 rank = self.index[rev][ENTRY_RANK]
892 896 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
893 897 return None
894 898 if rev == nullrev:
895 899 return 0 # convention
896 900 return rank
897 901
898 902 def chainbase(self, rev):
899 903 base = self._chainbasecache.get(rev)
900 904 if base is not None:
901 905 return base
902 906
903 907 index = self.index
904 908 iterrev = rev
905 909 base = index[iterrev][3]
906 910 while base != iterrev:
907 911 iterrev = base
908 912 base = index[iterrev][3]
909 913
910 914 self._chainbasecache[rev] = base
911 915 return base
912 916
913 917 def linkrev(self, rev):
914 918 return self.index[rev][4]
915 919
916 920 def parentrevs(self, rev):
917 921 try:
918 922 entry = self.index[rev]
919 923 except IndexError:
920 924 if rev == wdirrev:
921 925 raise error.WdirUnsupported
922 926 raise
923 927
924 928 if self.canonical_parent_order and entry[5] == nullrev:
925 929 return entry[6], entry[5]
926 930 else:
927 931 return entry[5], entry[6]
928 932
929 933 # fast parentrevs(rev) where rev isn't filtered
930 934 _uncheckedparentrevs = parentrevs
931 935
932 936 def node(self, rev):
933 937 try:
934 938 return self.index[rev][7]
935 939 except IndexError:
936 940 if rev == wdirrev:
937 941 raise error.WdirUnsupported
938 942 raise
939 943
940 944 # Derived from index values.
941 945
942 946 def end(self, rev):
943 947 return self.start(rev) + self.length(rev)
944 948
945 949 def parents(self, node):
946 950 i = self.index
947 951 d = i[self.rev(node)]
948 952 # inline node() to avoid function call overhead
949 953 if self.canonical_parent_order and d[5] == self.nullid:
950 954 return i[d[6]][7], i[d[5]][7]
951 955 else:
952 956 return i[d[5]][7], i[d[6]][7]
953 957
954 958 def chainlen(self, rev):
955 959 return self._chaininfo(rev)[0]
956 960
957 961 def _chaininfo(self, rev):
958 962 chaininfocache = self._chaininfocache
959 963 if rev in chaininfocache:
960 964 return chaininfocache[rev]
961 965 index = self.index
962 966 generaldelta = self._generaldelta
963 967 iterrev = rev
964 968 e = index[iterrev]
965 969 clen = 0
966 970 compresseddeltalen = 0
967 971 while iterrev != e[3]:
968 972 clen += 1
969 973 compresseddeltalen += e[1]
970 974 if generaldelta:
971 975 iterrev = e[3]
972 976 else:
973 977 iterrev -= 1
974 978 if iterrev in chaininfocache:
975 979 t = chaininfocache[iterrev]
976 980 clen += t[0]
977 981 compresseddeltalen += t[1]
978 982 break
979 983 e = index[iterrev]
980 984 else:
981 985 # Add text length of base since decompressing that also takes
982 986 # work. For cache hits the length is already included.
983 987 compresseddeltalen += e[1]
984 988 r = (clen, compresseddeltalen)
985 989 chaininfocache[rev] = r
986 990 return r
987 991
988 992 def _deltachain(self, rev, stoprev=None):
989 993 """Obtain the delta chain for a revision.
990 994
991 995 ``stoprev`` specifies a revision to stop at. If not specified, we
992 996 stop at the base of the chain.
993 997
994 998 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
995 999 revs in ascending order and ``stopped`` is a bool indicating whether
996 1000 ``stoprev`` was hit.
997 1001 """
998 1002 # Try C implementation.
999 1003 try:
1000 1004 return self.index.deltachain(rev, stoprev, self._generaldelta)
1001 1005 except AttributeError:
1002 1006 pass
1003 1007
1004 1008 chain = []
1005 1009
1006 1010 # Alias to prevent attribute lookup in tight loop.
1007 1011 index = self.index
1008 1012 generaldelta = self._generaldelta
1009 1013
1010 1014 iterrev = rev
1011 1015 e = index[iterrev]
1012 1016 while iterrev != e[3] and iterrev != stoprev:
1013 1017 chain.append(iterrev)
1014 1018 if generaldelta:
1015 1019 iterrev = e[3]
1016 1020 else:
1017 1021 iterrev -= 1
1018 1022 e = index[iterrev]
1019 1023
1020 1024 if iterrev == stoprev:
1021 1025 stopped = True
1022 1026 else:
1023 1027 chain.append(iterrev)
1024 1028 stopped = False
1025 1029
1026 1030 chain.reverse()
1027 1031 return chain, stopped
1028 1032
1029 1033 def ancestors(self, revs, stoprev=0, inclusive=False):
1030 1034 """Generate the ancestors of 'revs' in reverse revision order.
1031 1035 Does not generate revs lower than stoprev.
1032 1036
1033 1037 See the documentation for ancestor.lazyancestors for more details."""
1034 1038
1035 1039 # first, make sure start revisions aren't filtered
1036 1040 revs = list(revs)
1037 1041 checkrev = self.node
1038 1042 for r in revs:
1039 1043 checkrev(r)
1040 1044 # and we're sure ancestors aren't filtered as well
1041 1045
1042 1046 if rustancestor is not None and self.index.rust_ext_compat:
1043 1047 lazyancestors = rustancestor.LazyAncestors
1044 1048 arg = self.index
1045 1049 else:
1046 1050 lazyancestors = ancestor.lazyancestors
1047 1051 arg = self._uncheckedparentrevs
1048 1052 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1049 1053
1050 1054 def descendants(self, revs):
1051 1055 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1052 1056
1053 1057 def findcommonmissing(self, common=None, heads=None):
1054 1058 """Return a tuple of the ancestors of common and the ancestors of heads
1055 1059 that are not ancestors of common. In revset terminology, we return the
1056 1060 tuple:
1057 1061
1058 1062 ::common, (::heads) - (::common)
1059 1063
1060 1064 The list is sorted by revision number, meaning it is
1061 1065 topologically sorted.
1062 1066
1063 1067 'heads' and 'common' are both lists of node IDs. If heads is
1064 1068 not supplied, uses all of the revlog's heads. If common is not
1065 1069 supplied, uses nullid."""
1066 1070 if common is None:
1067 1071 common = [self.nullid]
1068 1072 if heads is None:
1069 1073 heads = self.heads()
1070 1074
1071 1075 common = [self.rev(n) for n in common]
1072 1076 heads = [self.rev(n) for n in heads]
1073 1077
1074 1078 # we want the ancestors, but inclusive
1075 1079 class lazyset:
1076 1080 def __init__(self, lazyvalues):
1077 1081 self.addedvalues = set()
1078 1082 self.lazyvalues = lazyvalues
1079 1083
1080 1084 def __contains__(self, value):
1081 1085 return value in self.addedvalues or value in self.lazyvalues
1082 1086
1083 1087 def __iter__(self):
1084 1088 added = self.addedvalues
1085 1089 for r in added:
1086 1090 yield r
1087 1091 for r in self.lazyvalues:
1088 1092 if not r in added:
1089 1093 yield r
1090 1094
1091 1095 def add(self, value):
1092 1096 self.addedvalues.add(value)
1093 1097
1094 1098 def update(self, values):
1095 1099 self.addedvalues.update(values)
1096 1100
1097 1101 has = lazyset(self.ancestors(common))
1098 1102 has.add(nullrev)
1099 1103 has.update(common)
1100 1104
1101 1105 # take all ancestors from heads that aren't in has
1102 1106 missing = set()
1103 1107 visit = collections.deque(r for r in heads if r not in has)
1104 1108 while visit:
1105 1109 r = visit.popleft()
1106 1110 if r in missing:
1107 1111 continue
1108 1112 else:
1109 1113 missing.add(r)
1110 1114 for p in self.parentrevs(r):
1111 1115 if p not in has:
1112 1116 visit.append(p)
1113 1117 missing = list(missing)
1114 1118 missing.sort()
1115 1119 return has, [self.node(miss) for miss in missing]
1116 1120
1117 1121 def incrementalmissingrevs(self, common=None):
1118 1122 """Return an object that can be used to incrementally compute the
1119 1123 revision numbers of the ancestors of arbitrary sets that are not
1120 1124 ancestors of common. This is an ancestor.incrementalmissingancestors
1121 1125 object.
1122 1126
1123 1127 'common' is a list of revision numbers. If common is not supplied, uses
1124 1128 nullrev.
1125 1129 """
1126 1130 if common is None:
1127 1131 common = [nullrev]
1128 1132
1129 1133 if rustancestor is not None and self.index.rust_ext_compat:
1130 1134 return rustancestor.MissingAncestors(self.index, common)
1131 1135 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1132 1136
1133 1137 def findmissingrevs(self, common=None, heads=None):
1134 1138 """Return the revision numbers of the ancestors of heads that
1135 1139 are not ancestors of common.
1136 1140
1137 1141 More specifically, return a list of revision numbers corresponding to
1138 1142 nodes N such that every N satisfies the following constraints:
1139 1143
1140 1144 1. N is an ancestor of some node in 'heads'
1141 1145 2. N is not an ancestor of any node in 'common'
1142 1146
1143 1147 The list is sorted by revision number, meaning it is
1144 1148 topologically sorted.
1145 1149
1146 1150 'heads' and 'common' are both lists of revision numbers. If heads is
1147 1151 not supplied, uses all of the revlog's heads. If common is not
1148 1152 supplied, uses nullid."""
1149 1153 if common is None:
1150 1154 common = [nullrev]
1151 1155 if heads is None:
1152 1156 heads = self.headrevs()
1153 1157
1154 1158 inc = self.incrementalmissingrevs(common=common)
1155 1159 return inc.missingancestors(heads)
1156 1160
1157 1161 def findmissing(self, common=None, heads=None):
1158 1162 """Return the ancestors of heads that are not ancestors of common.
1159 1163
1160 1164 More specifically, return a list of nodes N such that every N
1161 1165 satisfies the following constraints:
1162 1166
1163 1167 1. N is an ancestor of some node in 'heads'
1164 1168 2. N is not an ancestor of any node in 'common'
1165 1169
1166 1170 The list is sorted by revision number, meaning it is
1167 1171 topologically sorted.
1168 1172
1169 1173 'heads' and 'common' are both lists of node IDs. If heads is
1170 1174 not supplied, uses all of the revlog's heads. If common is not
1171 1175 supplied, uses nullid."""
1172 1176 if common is None:
1173 1177 common = [self.nullid]
1174 1178 if heads is None:
1175 1179 heads = self.heads()
1176 1180
1177 1181 common = [self.rev(n) for n in common]
1178 1182 heads = [self.rev(n) for n in heads]
1179 1183
1180 1184 inc = self.incrementalmissingrevs(common=common)
1181 1185 return [self.node(r) for r in inc.missingancestors(heads)]
1182 1186
1183 1187 def nodesbetween(self, roots=None, heads=None):
1184 1188 """Return a topological path from 'roots' to 'heads'.
1185 1189
1186 1190 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1187 1191 topologically sorted list of all nodes N that satisfy both of
1188 1192 these constraints:
1189 1193
1190 1194 1. N is a descendant of some node in 'roots'
1191 1195 2. N is an ancestor of some node in 'heads'
1192 1196
1193 1197 Every node is considered to be both a descendant and an ancestor
1194 1198 of itself, so every reachable node in 'roots' and 'heads' will be
1195 1199 included in 'nodes'.
1196 1200
1197 1201 'outroots' is the list of reachable nodes in 'roots', i.e., the
1198 1202 subset of 'roots' that is returned in 'nodes'. Likewise,
1199 1203 'outheads' is the subset of 'heads' that is also in 'nodes'.
1200 1204
1201 1205 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1202 1206 unspecified, uses nullid as the only root. If 'heads' is
1203 1207 unspecified, uses list of all of the revlog's heads."""
1204 1208 nonodes = ([], [], [])
1205 1209 if roots is not None:
1206 1210 roots = list(roots)
1207 1211 if not roots:
1208 1212 return nonodes
1209 1213 lowestrev = min([self.rev(n) for n in roots])
1210 1214 else:
1211 1215 roots = [self.nullid] # Everybody's a descendant of nullid
1212 1216 lowestrev = nullrev
1213 1217 if (lowestrev == nullrev) and (heads is None):
1214 1218 # We want _all_ the nodes!
1215 1219 return (
1216 1220 [self.node(r) for r in self],
1217 1221 [self.nullid],
1218 1222 list(self.heads()),
1219 1223 )
1220 1224 if heads is None:
1221 1225 # All nodes are ancestors, so the latest ancestor is the last
1222 1226 # node.
1223 1227 highestrev = len(self) - 1
1224 1228 # Set ancestors to None to signal that every node is an ancestor.
1225 1229 ancestors = None
1226 1230 # Set heads to an empty dictionary for later discovery of heads
1227 1231 heads = {}
1228 1232 else:
1229 1233 heads = list(heads)
1230 1234 if not heads:
1231 1235 return nonodes
1232 1236 ancestors = set()
1233 1237 # Turn heads into a dictionary so we can remove 'fake' heads.
1234 1238 # Also, later we will be using it to filter out the heads we can't
1235 1239 # find from roots.
1236 1240 heads = dict.fromkeys(heads, False)
1237 1241 # Start at the top and keep marking parents until we're done.
1238 1242 nodestotag = set(heads)
1239 1243 # Remember where the top was so we can use it as a limit later.
1240 1244 highestrev = max([self.rev(n) for n in nodestotag])
1241 1245 while nodestotag:
1242 1246 # grab a node to tag
1243 1247 n = nodestotag.pop()
1244 1248 # Never tag nullid
1245 1249 if n == self.nullid:
1246 1250 continue
1247 1251 # A node's revision number represents its place in a
1248 1252 # topologically sorted list of nodes.
1249 1253 r = self.rev(n)
1250 1254 if r >= lowestrev:
1251 1255 if n not in ancestors:
1252 1256 # If we are possibly a descendant of one of the roots
1253 1257 # and we haven't already been marked as an ancestor
1254 1258 ancestors.add(n) # Mark as ancestor
1255 1259 # Add non-nullid parents to list of nodes to tag.
1256 1260 nodestotag.update(
1257 1261 [p for p in self.parents(n) if p != self.nullid]
1258 1262 )
1259 1263 elif n in heads: # We've seen it before, is it a fake head?
1260 1264 # So it is, real heads should not be the ancestors of
1261 1265 # any other heads.
1262 1266 heads.pop(n)
1263 1267 if not ancestors:
1264 1268 return nonodes
1265 1269 # Now that we have our set of ancestors, we want to remove any
1266 1270 # roots that are not ancestors.
1267 1271
1268 1272 # If one of the roots was nullid, everything is included anyway.
1269 1273 if lowestrev > nullrev:
1270 1274 # But, since we weren't, let's recompute the lowest rev to not
1271 1275 # include roots that aren't ancestors.
1272 1276
1273 1277 # Filter out roots that aren't ancestors of heads
1274 1278 roots = [root for root in roots if root in ancestors]
1275 1279 # Recompute the lowest revision
1276 1280 if roots:
1277 1281 lowestrev = min([self.rev(root) for root in roots])
1278 1282 else:
1279 1283 # No more roots? Return empty list
1280 1284 return nonodes
1281 1285 else:
1282 1286 # We are descending from nullid, and don't need to care about
1283 1287 # any other roots.
1284 1288 lowestrev = nullrev
1285 1289 roots = [self.nullid]
1286 1290 # Transform our roots list into a set.
1287 1291 descendants = set(roots)
1288 1292 # Also, keep the original roots so we can filter out roots that aren't
1289 1293 # 'real' roots (i.e. are descended from other roots).
1290 1294 roots = descendants.copy()
1291 1295 # Our topologically sorted list of output nodes.
1292 1296 orderedout = []
1293 1297 # Don't start at nullid since we don't want nullid in our output list,
1294 1298 # and if nullid shows up in descendants, empty parents will look like
1295 1299 # they're descendants.
1296 1300 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1297 1301 n = self.node(r)
1298 1302 isdescendant = False
1299 1303 if lowestrev == nullrev: # Everybody is a descendant of nullid
1300 1304 isdescendant = True
1301 1305 elif n in descendants:
1302 1306 # n is already a descendant
1303 1307 isdescendant = True
1304 1308 # This check only needs to be done here because all the roots
1305 1309 # will start being marked is descendants before the loop.
1306 1310 if n in roots:
1307 1311 # If n was a root, check if it's a 'real' root.
1308 1312 p = tuple(self.parents(n))
1309 1313 # If any of its parents are descendants, it's not a root.
1310 1314 if (p[0] in descendants) or (p[1] in descendants):
1311 1315 roots.remove(n)
1312 1316 else:
1313 1317 p = tuple(self.parents(n))
1314 1318 # A node is a descendant if either of its parents are
1315 1319 # descendants. (We seeded the dependents list with the roots
1316 1320 # up there, remember?)
1317 1321 if (p[0] in descendants) or (p[1] in descendants):
1318 1322 descendants.add(n)
1319 1323 isdescendant = True
1320 1324 if isdescendant and ((ancestors is None) or (n in ancestors)):
1321 1325 # Only include nodes that are both descendants and ancestors.
1322 1326 orderedout.append(n)
1323 1327 if (ancestors is not None) and (n in heads):
1324 1328 # We're trying to figure out which heads are reachable
1325 1329 # from roots.
1326 1330 # Mark this head as having been reached
1327 1331 heads[n] = True
1328 1332 elif ancestors is None:
1329 1333 # Otherwise, we're trying to discover the heads.
1330 1334 # Assume this is a head because if it isn't, the next step
1331 1335 # will eventually remove it.
1332 1336 heads[n] = True
1333 1337 # But, obviously its parents aren't.
1334 1338 for p in self.parents(n):
1335 1339 heads.pop(p, None)
1336 1340 heads = [head for head, flag in heads.items() if flag]
1337 1341 roots = list(roots)
1338 1342 assert orderedout
1339 1343 assert roots
1340 1344 assert heads
1341 1345 return (orderedout, roots, heads)
1342 1346
1343 1347 def headrevs(self, revs=None):
1344 1348 if revs is None:
1345 1349 try:
1346 1350 return self.index.headrevs()
1347 1351 except AttributeError:
1348 1352 return self._headrevs()
1349 1353 if rustdagop is not None and self.index.rust_ext_compat:
1350 1354 return rustdagop.headrevs(self.index, revs)
1351 1355 return dagop.headrevs(revs, self._uncheckedparentrevs)
1352 1356
1353 1357 def computephases(self, roots):
1354 1358 return self.index.computephasesmapsets(roots)
1355 1359
1356 1360 def _headrevs(self):
1357 1361 count = len(self)
1358 1362 if not count:
1359 1363 return [nullrev]
1360 1364 # we won't iter over filtered rev so nobody is a head at start
1361 1365 ishead = [0] * (count + 1)
1362 1366 index = self.index
1363 1367 for r in self:
1364 1368 ishead[r] = 1 # I may be an head
1365 1369 e = index[r]
1366 1370 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1367 1371 return [r for r, val in enumerate(ishead) if val]
1368 1372
1369 1373 def heads(self, start=None, stop=None):
1370 1374 """return the list of all nodes that have no children
1371 1375
1372 1376 if start is specified, only heads that are descendants of
1373 1377 start will be returned
1374 1378 if stop is specified, it will consider all the revs from stop
1375 1379 as if they had no children
1376 1380 """
1377 1381 if start is None and stop is None:
1378 1382 if not len(self):
1379 1383 return [self.nullid]
1380 1384 return [self.node(r) for r in self.headrevs()]
1381 1385
1382 1386 if start is None:
1383 1387 start = nullrev
1384 1388 else:
1385 1389 start = self.rev(start)
1386 1390
1387 1391 stoprevs = {self.rev(n) for n in stop or []}
1388 1392
1389 1393 revs = dagop.headrevssubset(
1390 1394 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1391 1395 )
1392 1396
1393 1397 return [self.node(rev) for rev in revs]
1394 1398
1395 1399 def children(self, node):
1396 1400 """find the children of a given node"""
1397 1401 c = []
1398 1402 p = self.rev(node)
1399 1403 for r in self.revs(start=p + 1):
1400 1404 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1401 1405 if prevs:
1402 1406 for pr in prevs:
1403 1407 if pr == p:
1404 1408 c.append(self.node(r))
1405 1409 elif p == nullrev:
1406 1410 c.append(self.node(r))
1407 1411 return c
1408 1412
1409 1413 def commonancestorsheads(self, a, b):
1410 1414 """calculate all the heads of the common ancestors of nodes a and b"""
1411 1415 a, b = self.rev(a), self.rev(b)
1412 1416 ancs = self._commonancestorsheads(a, b)
1413 1417 return pycompat.maplist(self.node, ancs)
1414 1418
1415 1419 def _commonancestorsheads(self, *revs):
1416 1420 """calculate all the heads of the common ancestors of revs"""
1417 1421 try:
1418 1422 ancs = self.index.commonancestorsheads(*revs)
1419 1423 except (AttributeError, OverflowError): # C implementation failed
1420 1424 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1421 1425 return ancs
1422 1426
1423 1427 def isancestor(self, a, b):
1424 1428 """return True if node a is an ancestor of node b
1425 1429
1426 1430 A revision is considered an ancestor of itself."""
1427 1431 a, b = self.rev(a), self.rev(b)
1428 1432 return self.isancestorrev(a, b)
1429 1433
1430 1434 def isancestorrev(self, a, b):
1431 1435 """return True if revision a is an ancestor of revision b
1432 1436
1433 1437 A revision is considered an ancestor of itself.
1434 1438
1435 1439 The implementation of this is trivial but the use of
1436 1440 reachableroots is not."""
1437 1441 if a == nullrev:
1438 1442 return True
1439 1443 elif a == b:
1440 1444 return True
1441 1445 elif a > b:
1442 1446 return False
1443 1447 return bool(self.reachableroots(a, [b], [a], includepath=False))
1444 1448
1445 1449 def reachableroots(self, minroot, heads, roots, includepath=False):
1446 1450 """return (heads(::(<roots> and <roots>::<heads>)))
1447 1451
1448 1452 If includepath is True, return (<roots>::<heads>)."""
1449 1453 try:
1450 1454 return self.index.reachableroots2(
1451 1455 minroot, heads, roots, includepath
1452 1456 )
1453 1457 except AttributeError:
1454 1458 return dagop._reachablerootspure(
1455 1459 self.parentrevs, minroot, roots, heads, includepath
1456 1460 )
1457 1461
1458 1462 def ancestor(self, a, b):
1459 1463 """calculate the "best" common ancestor of nodes a and b"""
1460 1464
1461 1465 a, b = self.rev(a), self.rev(b)
1462 1466 try:
1463 1467 ancs = self.index.ancestors(a, b)
1464 1468 except (AttributeError, OverflowError):
1465 1469 ancs = ancestor.ancestors(self.parentrevs, a, b)
1466 1470 if ancs:
1467 1471 # choose a consistent winner when there's a tie
1468 1472 return min(map(self.node, ancs))
1469 1473 return self.nullid
1470 1474
1471 1475 def _match(self, id):
1472 1476 if isinstance(id, int):
1473 1477 # rev
1474 1478 return self.node(id)
1475 1479 if len(id) == self.nodeconstants.nodelen:
1476 1480 # possibly a binary node
1477 1481 # odds of a binary node being all hex in ASCII are 1 in 10**25
1478 1482 try:
1479 1483 node = id
1480 1484 self.rev(node) # quick search the index
1481 1485 return node
1482 1486 except error.LookupError:
1483 1487 pass # may be partial hex id
1484 1488 try:
1485 1489 # str(rev)
1486 1490 rev = int(id)
1487 1491 if b"%d" % rev != id:
1488 1492 raise ValueError
1489 1493 if rev < 0:
1490 1494 rev = len(self) + rev
1491 1495 if rev < 0 or rev >= len(self):
1492 1496 raise ValueError
1493 1497 return self.node(rev)
1494 1498 except (ValueError, OverflowError):
1495 1499 pass
1496 1500 if len(id) == 2 * self.nodeconstants.nodelen:
1497 1501 try:
1498 1502 # a full hex nodeid?
1499 1503 node = bin(id)
1500 1504 self.rev(node)
1501 1505 return node
1502 1506 except (binascii.Error, error.LookupError):
1503 1507 pass
1504 1508
1505 1509 def _partialmatch(self, id):
1506 1510 # we don't care wdirfilenodeids as they should be always full hash
1507 1511 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1508 1512 ambiguous = False
1509 1513 try:
1510 1514 partial = self.index.partialmatch(id)
1511 1515 if partial and self.hasnode(partial):
1512 1516 if maybewdir:
1513 1517 # single 'ff...' match in radix tree, ambiguous with wdir
1514 1518 ambiguous = True
1515 1519 else:
1516 1520 return partial
1517 1521 elif maybewdir:
1518 1522 # no 'ff...' match in radix tree, wdir identified
1519 1523 raise error.WdirUnsupported
1520 1524 else:
1521 1525 return None
1522 1526 except error.RevlogError:
1523 1527 # parsers.c radix tree lookup gave multiple matches
1524 1528 # fast path: for unfiltered changelog, radix tree is accurate
1525 1529 if not getattr(self, 'filteredrevs', None):
1526 1530 ambiguous = True
1527 1531 # fall through to slow path that filters hidden revisions
1528 1532 except (AttributeError, ValueError):
1529 1533 # we are pure python, or key is not hex
1530 1534 pass
1531 1535 if ambiguous:
1532 1536 raise error.AmbiguousPrefixLookupError(
1533 1537 id, self.display_id, _(b'ambiguous identifier')
1534 1538 )
1535 1539
1536 1540 if id in self._pcache:
1537 1541 return self._pcache[id]
1538 1542
1539 1543 if len(id) <= 40:
1540 1544 # hex(node)[:...]
1541 1545 l = len(id) // 2 * 2 # grab an even number of digits
1542 1546 try:
1543 1547 # we're dropping the last digit, so let's check that it's hex,
1544 1548 # to avoid the expensive computation below if it's not
1545 1549 if len(id) % 2 > 0:
1546 1550 if not (id[-1] in hexdigits):
1547 1551 return None
1548 1552 prefix = bin(id[:l])
1549 1553 except binascii.Error:
1550 1554 pass
1551 1555 else:
1552 1556 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1553 1557 nl = [
1554 1558 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1555 1559 ]
1556 1560 if self.nodeconstants.nullhex.startswith(id):
1557 1561 nl.append(self.nullid)
1558 1562 if len(nl) > 0:
1559 1563 if len(nl) == 1 and not maybewdir:
1560 1564 self._pcache[id] = nl[0]
1561 1565 return nl[0]
1562 1566 raise error.AmbiguousPrefixLookupError(
1563 1567 id, self.display_id, _(b'ambiguous identifier')
1564 1568 )
1565 1569 if maybewdir:
1566 1570 raise error.WdirUnsupported
1567 1571 return None
1568 1572
1569 1573 def lookup(self, id):
1570 1574 """locate a node based on:
1571 1575 - revision number or str(revision number)
1572 1576 - nodeid or subset of hex nodeid
1573 1577 """
1574 1578 n = self._match(id)
1575 1579 if n is not None:
1576 1580 return n
1577 1581 n = self._partialmatch(id)
1578 1582 if n:
1579 1583 return n
1580 1584
1581 1585 raise error.LookupError(id, self.display_id, _(b'no match found'))
1582 1586
1583 1587 def shortest(self, node, minlength=1):
1584 1588 """Find the shortest unambiguous prefix that matches node."""
1585 1589
1586 1590 def isvalid(prefix):
1587 1591 try:
1588 1592 matchednode = self._partialmatch(prefix)
1589 1593 except error.AmbiguousPrefixLookupError:
1590 1594 return False
1591 1595 except error.WdirUnsupported:
1592 1596 # single 'ff...' match
1593 1597 return True
1594 1598 if matchednode is None:
1595 1599 raise error.LookupError(node, self.display_id, _(b'no node'))
1596 1600 return True
1597 1601
1598 1602 def maybewdir(prefix):
1599 1603 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1600 1604
1601 1605 hexnode = hex(node)
1602 1606
1603 1607 def disambiguate(hexnode, minlength):
1604 1608 """Disambiguate against wdirid."""
1605 1609 for length in range(minlength, len(hexnode) + 1):
1606 1610 prefix = hexnode[:length]
1607 1611 if not maybewdir(prefix):
1608 1612 return prefix
1609 1613
1610 1614 if not getattr(self, 'filteredrevs', None):
1611 1615 try:
1612 1616 length = max(self.index.shortest(node), minlength)
1613 1617 return disambiguate(hexnode, length)
1614 1618 except error.RevlogError:
1615 1619 if node != self.nodeconstants.wdirid:
1616 1620 raise error.LookupError(
1617 1621 node, self.display_id, _(b'no node')
1618 1622 )
1619 1623 except AttributeError:
1620 1624 # Fall through to pure code
1621 1625 pass
1622 1626
1623 1627 if node == self.nodeconstants.wdirid:
1624 1628 for length in range(minlength, len(hexnode) + 1):
1625 1629 prefix = hexnode[:length]
1626 1630 if isvalid(prefix):
1627 1631 return prefix
1628 1632
1629 1633 for length in range(minlength, len(hexnode) + 1):
1630 1634 prefix = hexnode[:length]
1631 1635 if isvalid(prefix):
1632 1636 return disambiguate(hexnode, length)
1633 1637
1634 1638 def cmp(self, node, text):
1635 1639 """compare text with a given file revision
1636 1640
1637 1641 returns True if text is different than what is stored.
1638 1642 """
1639 1643 p1, p2 = self.parents(node)
1640 1644 return storageutil.hashrevisionsha1(text, p1, p2) != node
1641 1645
1642 1646 def _getsegmentforrevs(self, startrev, endrev, df=None):
1643 1647 """Obtain a segment of raw data corresponding to a range of revisions.
1644 1648
1645 1649 Accepts the start and end revisions and an optional already-open
1646 1650 file handle to be used for reading. If the file handle is read, its
1647 1651 seek position will not be preserved.
1648 1652
1649 1653 Requests for data may be satisfied by a cache.
1650 1654
1651 1655 Returns a 2-tuple of (offset, data) for the requested range of
1652 1656 revisions. Offset is the integer offset from the beginning of the
1653 1657 revlog and data is a str or buffer of the raw byte data.
1654 1658
1655 1659 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1656 1660 to determine where each revision's data begins and ends.
1657 1661 """
1658 1662 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1659 1663 # (functions are expensive).
1660 1664 index = self.index
1661 1665 istart = index[startrev]
1662 1666 start = int(istart[0] >> 16)
1663 1667 if startrev == endrev:
1664 1668 end = start + istart[1]
1665 1669 else:
1666 1670 iend = index[endrev]
1667 1671 end = int(iend[0] >> 16) + iend[1]
1668 1672
1669 1673 if self._inline:
1670 1674 start += (startrev + 1) * self.index.entry_size
1671 1675 end += (endrev + 1) * self.index.entry_size
1672 1676 length = end - start
1673 1677
1674 1678 return start, self._segmentfile.read_chunk(start, length, df)
1675 1679
1676 1680 def _chunk(self, rev, df=None):
1677 1681 """Obtain a single decompressed chunk for a revision.
1678 1682
1679 1683 Accepts an integer revision and an optional already-open file handle
1680 1684 to be used for reading. If used, the seek position of the file will not
1681 1685 be preserved.
1682 1686
1683 1687 Returns a str holding uncompressed data for the requested revision.
1684 1688 """
1685 1689 compression_mode = self.index[rev][10]
1686 1690 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1687 1691 if compression_mode == COMP_MODE_PLAIN:
1688 1692 return data
1689 1693 elif compression_mode == COMP_MODE_DEFAULT:
1690 1694 return self._decompressor(data)
1691 1695 elif compression_mode == COMP_MODE_INLINE:
1692 1696 return self.decompress(data)
1693 1697 else:
1694 1698 msg = b'unknown compression mode %d'
1695 1699 msg %= compression_mode
1696 1700 raise error.RevlogError(msg)
1697 1701
1698 1702 def _chunks(self, revs, df=None, targetsize=None):
1699 1703 """Obtain decompressed chunks for the specified revisions.
1700 1704
1701 1705 Accepts an iterable of numeric revisions that are assumed to be in
1702 1706 ascending order. Also accepts an optional already-open file handle
1703 1707 to be used for reading. If used, the seek position of the file will
1704 1708 not be preserved.
1705 1709
1706 1710 This function is similar to calling ``self._chunk()`` multiple times,
1707 1711 but is faster.
1708 1712
1709 1713 Returns a list with decompressed data for each requested revision.
1710 1714 """
1711 1715 if not revs:
1712 1716 return []
1713 1717 start = self.start
1714 1718 length = self.length
1715 1719 inline = self._inline
1716 1720 iosize = self.index.entry_size
1717 1721 buffer = util.buffer
1718 1722
1719 1723 l = []
1720 1724 ladd = l.append
1721 1725
1722 1726 if not self._withsparseread:
1723 1727 slicedchunks = (revs,)
1724 1728 else:
1725 1729 slicedchunks = deltautil.slicechunk(
1726 1730 self, revs, targetsize=targetsize
1727 1731 )
1728 1732
1729 1733 for revschunk in slicedchunks:
1730 1734 firstrev = revschunk[0]
1731 1735 # Skip trailing revisions with empty diff
1732 1736 for lastrev in revschunk[::-1]:
1733 1737 if length(lastrev) != 0:
1734 1738 break
1735 1739
1736 1740 try:
1737 1741 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1738 1742 except OverflowError:
1739 1743 # issue4215 - we can't cache a run of chunks greater than
1740 1744 # 2G on Windows
1741 1745 return [self._chunk(rev, df=df) for rev in revschunk]
1742 1746
1743 1747 decomp = self.decompress
1744 1748 # self._decompressor might be None, but will not be used in that case
1745 1749 def_decomp = self._decompressor
1746 1750 for rev in revschunk:
1747 1751 chunkstart = start(rev)
1748 1752 if inline:
1749 1753 chunkstart += (rev + 1) * iosize
1750 1754 chunklength = length(rev)
1751 1755 comp_mode = self.index[rev][10]
1752 1756 c = buffer(data, chunkstart - offset, chunklength)
1753 1757 if comp_mode == COMP_MODE_PLAIN:
1754 1758 ladd(c)
1755 1759 elif comp_mode == COMP_MODE_INLINE:
1756 1760 ladd(decomp(c))
1757 1761 elif comp_mode == COMP_MODE_DEFAULT:
1758 1762 ladd(def_decomp(c))
1759 1763 else:
1760 1764 msg = b'unknown compression mode %d'
1761 1765 msg %= comp_mode
1762 1766 raise error.RevlogError(msg)
1763 1767
1764 1768 return l
1765 1769
1766 1770 def deltaparent(self, rev):
1767 1771 """return deltaparent of the given revision"""
1768 1772 base = self.index[rev][3]
1769 1773 if base == rev:
1770 1774 return nullrev
1771 1775 elif self._generaldelta:
1772 1776 return base
1773 1777 else:
1774 1778 return rev - 1
1775 1779
1776 1780 def issnapshot(self, rev):
1777 1781 """tells whether rev is a snapshot"""
1778 1782 if not self._sparserevlog:
1779 1783 return self.deltaparent(rev) == nullrev
1780 1784 elif util.safehasattr(self.index, b'issnapshot'):
1781 1785 # directly assign the method to cache the testing and access
1782 1786 self.issnapshot = self.index.issnapshot
1783 1787 return self.issnapshot(rev)
1784 1788 if rev == nullrev:
1785 1789 return True
1786 1790 entry = self.index[rev]
1787 1791 base = entry[3]
1788 1792 if base == rev:
1789 1793 return True
1790 1794 if base == nullrev:
1791 1795 return True
1792 1796 p1 = entry[5]
1793 1797 while self.length(p1) == 0:
1794 1798 b = self.deltaparent(p1)
1795 1799 if b == p1:
1796 1800 break
1797 1801 p1 = b
1798 1802 p2 = entry[6]
1799 1803 while self.length(p2) == 0:
1800 1804 b = self.deltaparent(p2)
1801 1805 if b == p2:
1802 1806 break
1803 1807 p2 = b
1804 1808 if base == p1 or base == p2:
1805 1809 return False
1806 1810 return self.issnapshot(base)
1807 1811
1808 1812 def snapshotdepth(self, rev):
1809 1813 """number of snapshot in the chain before this one"""
1810 1814 if not self.issnapshot(rev):
1811 1815 raise error.ProgrammingError(b'revision %d not a snapshot')
1812 1816 return len(self._deltachain(rev)[0]) - 1
1813 1817
1814 1818 def revdiff(self, rev1, rev2):
1815 1819 """return or calculate a delta between two revisions
1816 1820
1817 1821 The delta calculated is in binary form and is intended to be written to
1818 1822 revlog data directly. So this function needs raw revision data.
1819 1823 """
1820 1824 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1821 1825 return bytes(self._chunk(rev2))
1822 1826
1823 1827 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1824 1828
1825 1829 def revision(self, nodeorrev, _df=None):
1826 1830 """return an uncompressed revision of a given node or revision
1827 1831 number.
1828 1832
1829 1833 _df - an existing file handle to read from. (internal-only)
1830 1834 """
1831 1835 return self._revisiondata(nodeorrev, _df)
1832 1836
1833 1837 def sidedata(self, nodeorrev, _df=None):
1834 1838 """a map of extra data related to the changeset but not part of the hash
1835 1839
1836 1840 This function currently return a dictionary. However, more advanced
1837 1841 mapping object will likely be used in the future for a more
1838 1842 efficient/lazy code.
1839 1843 """
1840 1844 # deal with <nodeorrev> argument type
1841 1845 if isinstance(nodeorrev, int):
1842 1846 rev = nodeorrev
1843 1847 else:
1844 1848 rev = self.rev(nodeorrev)
1845 1849 return self._sidedata(rev)
1846 1850
1847 1851 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1848 1852 # deal with <nodeorrev> argument type
1849 1853 if isinstance(nodeorrev, int):
1850 1854 rev = nodeorrev
1851 1855 node = self.node(rev)
1852 1856 else:
1853 1857 node = nodeorrev
1854 1858 rev = None
1855 1859
1856 1860 # fast path the special `nullid` rev
1857 1861 if node == self.nullid:
1858 1862 return b""
1859 1863
1860 1864 # ``rawtext`` is the text as stored inside the revlog. Might be the
1861 1865 # revision or might need to be processed to retrieve the revision.
1862 1866 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1863 1867
1864 1868 if raw and validated:
1865 1869 # if we don't want to process the raw text and that raw
1866 1870 # text is cached, we can exit early.
1867 1871 return rawtext
1868 1872 if rev is None:
1869 1873 rev = self.rev(node)
1870 1874 # the revlog's flag for this revision
1871 1875 # (usually alter its state or content)
1872 1876 flags = self.flags(rev)
1873 1877
1874 1878 if validated and flags == REVIDX_DEFAULT_FLAGS:
1875 1879 # no extra flags set, no flag processor runs, text = rawtext
1876 1880 return rawtext
1877 1881
1878 1882 if raw:
1879 1883 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1880 1884 text = rawtext
1881 1885 else:
1882 1886 r = flagutil.processflagsread(self, rawtext, flags)
1883 1887 text, validatehash = r
1884 1888 if validatehash:
1885 1889 self.checkhash(text, node, rev=rev)
1886 1890 if not validated:
1887 1891 self._revisioncache = (node, rev, rawtext)
1888 1892
1889 1893 return text
1890 1894
1891 1895 def _rawtext(self, node, rev, _df=None):
1892 1896 """return the possibly unvalidated rawtext for a revision
1893 1897
1894 1898 returns (rev, rawtext, validated)
1895 1899 """
1896 1900
1897 1901 # revision in the cache (could be useful to apply delta)
1898 1902 cachedrev = None
1899 1903 # An intermediate text to apply deltas to
1900 1904 basetext = None
1901 1905
1902 1906 # Check if we have the entry in cache
1903 1907 # The cache entry looks like (node, rev, rawtext)
1904 1908 if self._revisioncache:
1905 1909 if self._revisioncache[0] == node:
1906 1910 return (rev, self._revisioncache[2], True)
1907 1911 cachedrev = self._revisioncache[1]
1908 1912
1909 1913 if rev is None:
1910 1914 rev = self.rev(node)
1911 1915
1912 1916 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1913 1917 if stopped:
1914 1918 basetext = self._revisioncache[2]
1915 1919
1916 1920 # drop cache to save memory, the caller is expected to
1917 1921 # update self._revisioncache after validating the text
1918 1922 self._revisioncache = None
1919 1923
1920 1924 targetsize = None
1921 1925 rawsize = self.index[rev][2]
1922 1926 if 0 <= rawsize:
1923 1927 targetsize = 4 * rawsize
1924 1928
1925 1929 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1926 1930 if basetext is None:
1927 1931 basetext = bytes(bins[0])
1928 1932 bins = bins[1:]
1929 1933
1930 1934 rawtext = mdiff.patches(basetext, bins)
1931 1935 del basetext # let us have a chance to free memory early
1932 1936 return (rev, rawtext, False)
1933 1937
1934 1938 def _sidedata(self, rev):
1935 1939 """Return the sidedata for a given revision number."""
1936 1940 index_entry = self.index[rev]
1937 1941 sidedata_offset = index_entry[8]
1938 1942 sidedata_size = index_entry[9]
1939 1943
1940 1944 if self._inline:
1941 1945 sidedata_offset += self.index.entry_size * (1 + rev)
1942 1946 if sidedata_size == 0:
1943 1947 return {}
1944 1948
1945 1949 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1946 1950 filename = self._sidedatafile
1947 1951 end = self._docket.sidedata_end
1948 1952 offset = sidedata_offset
1949 1953 length = sidedata_size
1950 1954 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1951 1955 raise error.RevlogError(m)
1952 1956
1953 1957 comp_segment = self._segmentfile_sidedata.read_chunk(
1954 1958 sidedata_offset, sidedata_size
1955 1959 )
1956 1960
1957 1961 comp = self.index[rev][11]
1958 1962 if comp == COMP_MODE_PLAIN:
1959 1963 segment = comp_segment
1960 1964 elif comp == COMP_MODE_DEFAULT:
1961 1965 segment = self._decompressor(comp_segment)
1962 1966 elif comp == COMP_MODE_INLINE:
1963 1967 segment = self.decompress(comp_segment)
1964 1968 else:
1965 1969 msg = b'unknown compression mode %d'
1966 1970 msg %= comp
1967 1971 raise error.RevlogError(msg)
1968 1972
1969 1973 sidedata = sidedatautil.deserialize_sidedata(segment)
1970 1974 return sidedata
1971 1975
1972 1976 def rawdata(self, nodeorrev, _df=None):
1973 1977 """return an uncompressed raw data of a given node or revision number.
1974 1978
1975 1979 _df - an existing file handle to read from. (internal-only)
1976 1980 """
1977 1981 return self._revisiondata(nodeorrev, _df, raw=True)
1978 1982
1979 1983 def hash(self, text, p1, p2):
1980 1984 """Compute a node hash.
1981 1985
1982 1986 Available as a function so that subclasses can replace the hash
1983 1987 as needed.
1984 1988 """
1985 1989 return storageutil.hashrevisionsha1(text, p1, p2)
1986 1990
1987 1991 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1988 1992 """Check node hash integrity.
1989 1993
1990 1994 Available as a function so that subclasses can extend hash mismatch
1991 1995 behaviors as needed.
1992 1996 """
1993 1997 try:
1994 1998 if p1 is None and p2 is None:
1995 1999 p1, p2 = self.parents(node)
1996 2000 if node != self.hash(text, p1, p2):
1997 2001 # Clear the revision cache on hash failure. The revision cache
1998 2002 # only stores the raw revision and clearing the cache does have
1999 2003 # the side-effect that we won't have a cache hit when the raw
2000 2004 # revision data is accessed. But this case should be rare and
2001 2005 # it is extra work to teach the cache about the hash
2002 2006 # verification state.
2003 2007 if self._revisioncache and self._revisioncache[0] == node:
2004 2008 self._revisioncache = None
2005 2009
2006 2010 revornode = rev
2007 2011 if revornode is None:
2008 2012 revornode = templatefilters.short(hex(node))
2009 2013 raise error.RevlogError(
2010 2014 _(b"integrity check failed on %s:%s")
2011 2015 % (self.display_id, pycompat.bytestr(revornode))
2012 2016 )
2013 2017 except error.RevlogError:
2014 2018 if self._censorable and storageutil.iscensoredtext(text):
2015 2019 raise error.CensoredNodeError(self.display_id, node, text)
2016 2020 raise
2017 2021
2018 def _enforceinlinesize(self, tr):
2022 def _enforceinlinesize(self, tr, side_write=True):
2019 2023 """Check if the revlog is too big for inline and convert if so.
2020 2024
2021 2025 This should be called after revisions are added to the revlog. If the
2022 2026 revlog has grown too large to be an inline revlog, it will convert it
2023 2027 to use multiple index and data files.
2024 2028 """
2025 2029 tiprev = len(self) - 1
2026 2030 total_size = self.start(tiprev) + self.length(tiprev)
2027 2031 if not self._inline or total_size < _maxinline:
2028 2032 return
2029 2033
2030 2034 troffset = tr.findoffset(self._indexfile)
2031 2035 if troffset is None:
2032 2036 raise error.RevlogError(
2033 2037 _(b"%s not found in the transaction") % self._indexfile
2034 2038 )
2035 trindex = None
2039 if troffset:
2040 tr.addbackup(self._indexfile, for_offset=True)
2036 2041 tr.add(self._datafile, 0)
2037 2042
2038 2043 existing_handles = False
2039 2044 if self._writinghandles is not None:
2040 2045 existing_handles = True
2041 2046 fp = self._writinghandles[0]
2042 2047 fp.flush()
2043 2048 fp.close()
2044 2049 # We can't use the cached file handle after close(). So prevent
2045 2050 # its usage.
2046 2051 self._writinghandles = None
2047 2052 self._segmentfile.writing_handle = None
2048 2053 # No need to deal with sidedata writing handle as it is only
2049 2054 # relevant with revlog-v2 which is never inline, not reaching
2050 2055 # this code
2056 if side_write:
2057 old_index_file_path = self._indexfile
2058 new_index_file_path = self._indexfile + b'.s'
2059 opener = self.opener
2060
2061 fncache = getattr(opener, 'fncache', None)
2062 if fncache is not None:
2063 fncache.addignore(new_index_file_path)
2064
2065 # the "split" index replace the real index when the transaction is finalized
2066 def finalize_callback(tr):
2067 opener.rename(
2068 new_index_file_path,
2069 old_index_file_path,
2070 checkambig=True,
2071 )
2072
2073 tr.registertmp(new_index_file_path)
2074 if self.target[1] is not None:
2075 finalize_id = b'000-revlog-split-%d-%s' % self.target
2076 else:
2077 finalize_id = b'000-revlog-split-%d' % self.target[0]
2078 tr.addfinalize(finalize_id, finalize_callback)
2051 2079
2052 2080 new_dfh = self._datafp(b'w+')
2053 2081 new_dfh.truncate(0) # drop any potentially existing data
2054 2082 try:
2055 2083 with self._indexfp() as read_ifh:
2056 2084 for r in self:
2057 2085 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2058 if (
2059 trindex is None
2060 and troffset
2061 <= self.start(r) + r * self.index.entry_size
2062 ):
2063 trindex = r
2064 2086 new_dfh.flush()
2065 2087
2066 if trindex is None:
2067 trindex = 0
2068
2088 if side_write:
2089 self._indexfile = new_index_file_path
2069 2090 with self.__index_new_fp() as fp:
2070 2091 self._format_flags &= ~FLAG_INLINE_DATA
2071 2092 self._inline = False
2072 2093 for i in self:
2073 2094 e = self.index.entry_binary(i)
2074 2095 if i == 0 and self._docket is None:
2075 2096 header = self._format_flags | self._format_version
2076 2097 header = self.index.pack_header(header)
2077 2098 e = header + e
2078 2099 fp.write(e)
2079 2100 if self._docket is not None:
2080 2101 self._docket.index_end = fp.tell()
2081 2102
2082 # There is a small transactional race here. If the rename of
2083 # the index fails, we should remove the datafile. It is more
2084 # important to ensure that the data file is not truncated
2085 # when the index is replaced as otherwise data is lost.
2086 tr.replace(self._datafile, self.start(trindex))
2087
2088 # the temp file replace the real index when we exit the context
2089 # manager
2090
2091 tr.replace(self._indexfile, trindex * self.index.entry_size)
2103 # If we don't use side-write, the temp file replace the real
2104 # index when we exit the context manager
2105
2092 2106 nodemaputil.setup_persistent_nodemap(tr, self)
2093 2107 self._segmentfile = randomaccessfile.randomaccessfile(
2094 2108 self.opener,
2095 2109 self._datafile,
2096 2110 self._chunkcachesize,
2097 2111 )
2098 2112
2099 2113 if existing_handles:
2100 2114 # switched from inline to conventional reopen the index
2101 2115 ifh = self.__index_write_fp()
2102 2116 self._writinghandles = (ifh, new_dfh, None)
2103 2117 self._segmentfile.writing_handle = new_dfh
2104 2118 new_dfh = None
2105 2119 # No need to deal with sidedata writing handle as it is only
2106 2120 # relevant with revlog-v2 which is never inline, not reaching
2107 2121 # this code
2108 2122 finally:
2109 2123 if new_dfh is not None:
2110 2124 new_dfh.close()
2111 2125
2112 2126 def _nodeduplicatecallback(self, transaction, node):
2113 2127 """called when trying to add a node already stored."""
2114 2128
2115 2129 @contextlib.contextmanager
2116 2130 def reading(self):
2117 2131 """Context manager that keeps data and sidedata files open for reading"""
2118 2132 with self._segmentfile.reading():
2119 2133 with self._segmentfile_sidedata.reading():
2120 2134 yield
2121 2135
2122 2136 @contextlib.contextmanager
2123 2137 def _writing(self, transaction):
2124 2138 if self._trypending:
2125 2139 msg = b'try to write in a `trypending` revlog: %s'
2126 2140 msg %= self.display_id
2127 2141 raise error.ProgrammingError(msg)
2128 2142 if self._writinghandles is not None:
2129 2143 yield
2130 2144 else:
2131 2145 ifh = dfh = sdfh = None
2132 2146 try:
2133 2147 r = len(self)
2134 2148 # opening the data file.
2135 2149 dsize = 0
2136 2150 if r:
2137 2151 dsize = self.end(r - 1)
2138 2152 dfh = None
2139 2153 if not self._inline:
2140 2154 try:
2141 2155 dfh = self._datafp(b"r+")
2142 2156 if self._docket is None:
2143 2157 dfh.seek(0, os.SEEK_END)
2144 2158 else:
2145 2159 dfh.seek(self._docket.data_end, os.SEEK_SET)
2146 2160 except FileNotFoundError:
2147 2161 dfh = self._datafp(b"w+")
2148 2162 transaction.add(self._datafile, dsize)
2149 2163 if self._sidedatafile is not None:
2150 2164 # revlog-v2 does not inline, help Pytype
2151 2165 assert dfh is not None
2152 2166 try:
2153 2167 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2154 2168 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2155 2169 except FileNotFoundError:
2156 2170 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2157 2171 transaction.add(
2158 2172 self._sidedatafile, self._docket.sidedata_end
2159 2173 )
2160 2174
2161 2175 # opening the index file.
2162 2176 isize = r * self.index.entry_size
2163 2177 ifh = self.__index_write_fp()
2164 2178 if self._inline:
2165 2179 transaction.add(self._indexfile, dsize + isize)
2166 2180 else:
2167 2181 transaction.add(self._indexfile, isize)
2168 2182 # exposing all file handle for writing.
2169 2183 self._writinghandles = (ifh, dfh, sdfh)
2170 2184 self._segmentfile.writing_handle = ifh if self._inline else dfh
2171 2185 self._segmentfile_sidedata.writing_handle = sdfh
2172 2186 yield
2173 2187 if self._docket is not None:
2174 2188 self._write_docket(transaction)
2175 2189 finally:
2176 2190 self._writinghandles = None
2177 2191 self._segmentfile.writing_handle = None
2178 2192 self._segmentfile_sidedata.writing_handle = None
2179 2193 if dfh is not None:
2180 2194 dfh.close()
2181 2195 if sdfh is not None:
2182 2196 sdfh.close()
2183 2197 # closing the index file last to avoid exposing referent to
2184 2198 # potential unflushed data content.
2185 2199 if ifh is not None:
2186 2200 ifh.close()
2187 2201
2188 2202 def _write_docket(self, transaction):
2189 2203 """write the current docket on disk
2190 2204
2191 2205 Exist as a method to help changelog to implement transaction logic
2192 2206
2193 2207 We could also imagine using the same transaction logic for all revlog
2194 2208 since docket are cheap."""
2195 2209 self._docket.write(transaction)
2196 2210
2197 2211 def addrevision(
2198 2212 self,
2199 2213 text,
2200 2214 transaction,
2201 2215 link,
2202 2216 p1,
2203 2217 p2,
2204 2218 cachedelta=None,
2205 2219 node=None,
2206 2220 flags=REVIDX_DEFAULT_FLAGS,
2207 2221 deltacomputer=None,
2208 2222 sidedata=None,
2209 2223 ):
2210 2224 """add a revision to the log
2211 2225
2212 2226 text - the revision data to add
2213 2227 transaction - the transaction object used for rollback
2214 2228 link - the linkrev data to add
2215 2229 p1, p2 - the parent nodeids of the revision
2216 2230 cachedelta - an optional precomputed delta
2217 2231 node - nodeid of revision; typically node is not specified, and it is
2218 2232 computed by default as hash(text, p1, p2), however subclasses might
2219 2233 use different hashing method (and override checkhash() in such case)
2220 2234 flags - the known flags to set on the revision
2221 2235 deltacomputer - an optional deltacomputer instance shared between
2222 2236 multiple calls
2223 2237 """
2224 2238 if link == nullrev:
2225 2239 raise error.RevlogError(
2226 2240 _(b"attempted to add linkrev -1 to %s") % self.display_id
2227 2241 )
2228 2242
2229 2243 if sidedata is None:
2230 2244 sidedata = {}
2231 2245 elif sidedata and not self.hassidedata:
2232 2246 raise error.ProgrammingError(
2233 2247 _(b"trying to add sidedata to a revlog who don't support them")
2234 2248 )
2235 2249
2236 2250 if flags:
2237 2251 node = node or self.hash(text, p1, p2)
2238 2252
2239 2253 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2240 2254
2241 2255 # If the flag processor modifies the revision data, ignore any provided
2242 2256 # cachedelta.
2243 2257 if rawtext != text:
2244 2258 cachedelta = None
2245 2259
2246 2260 if len(rawtext) > _maxentrysize:
2247 2261 raise error.RevlogError(
2248 2262 _(
2249 2263 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2250 2264 )
2251 2265 % (self.display_id, len(rawtext))
2252 2266 )
2253 2267
2254 2268 node = node or self.hash(rawtext, p1, p2)
2255 2269 rev = self.index.get_rev(node)
2256 2270 if rev is not None:
2257 2271 return rev
2258 2272
2259 2273 if validatehash:
2260 2274 self.checkhash(rawtext, node, p1=p1, p2=p2)
2261 2275
2262 2276 return self.addrawrevision(
2263 2277 rawtext,
2264 2278 transaction,
2265 2279 link,
2266 2280 p1,
2267 2281 p2,
2268 2282 node,
2269 2283 flags,
2270 2284 cachedelta=cachedelta,
2271 2285 deltacomputer=deltacomputer,
2272 2286 sidedata=sidedata,
2273 2287 )
2274 2288
2275 2289 def addrawrevision(
2276 2290 self,
2277 2291 rawtext,
2278 2292 transaction,
2279 2293 link,
2280 2294 p1,
2281 2295 p2,
2282 2296 node,
2283 2297 flags,
2284 2298 cachedelta=None,
2285 2299 deltacomputer=None,
2286 2300 sidedata=None,
2287 2301 ):
2288 2302 """add a raw revision with known flags, node and parents
2289 2303 useful when reusing a revision not stored in this revlog (ex: received
2290 2304 over wire, or read from an external bundle).
2291 2305 """
2292 2306 with self._writing(transaction):
2293 2307 return self._addrevision(
2294 2308 node,
2295 2309 rawtext,
2296 2310 transaction,
2297 2311 link,
2298 2312 p1,
2299 2313 p2,
2300 2314 flags,
2301 2315 cachedelta,
2302 2316 deltacomputer=deltacomputer,
2303 2317 sidedata=sidedata,
2304 2318 )
2305 2319
2306 2320 def compress(self, data):
2307 2321 """Generate a possibly-compressed representation of data."""
2308 2322 if not data:
2309 2323 return b'', data
2310 2324
2311 2325 compressed = self._compressor.compress(data)
2312 2326
2313 2327 if compressed:
2314 2328 # The revlog compressor added the header in the returned data.
2315 2329 return b'', compressed
2316 2330
2317 2331 if data[0:1] == b'\0':
2318 2332 return b'', data
2319 2333 return b'u', data
2320 2334
2321 2335 def decompress(self, data):
2322 2336 """Decompress a revlog chunk.
2323 2337
2324 2338 The chunk is expected to begin with a header identifying the
2325 2339 format type so it can be routed to an appropriate decompressor.
2326 2340 """
2327 2341 if not data:
2328 2342 return data
2329 2343
2330 2344 # Revlogs are read much more frequently than they are written and many
2331 2345 # chunks only take microseconds to decompress, so performance is
2332 2346 # important here.
2333 2347 #
2334 2348 # We can make a few assumptions about revlogs:
2335 2349 #
2336 2350 # 1) the majority of chunks will be compressed (as opposed to inline
2337 2351 # raw data).
2338 2352 # 2) decompressing *any* data will likely by at least 10x slower than
2339 2353 # returning raw inline data.
2340 2354 # 3) we want to prioritize common and officially supported compression
2341 2355 # engines
2342 2356 #
2343 2357 # It follows that we want to optimize for "decompress compressed data
2344 2358 # when encoded with common and officially supported compression engines"
2345 2359 # case over "raw data" and "data encoded by less common or non-official
2346 2360 # compression engines." That is why we have the inline lookup first
2347 2361 # followed by the compengines lookup.
2348 2362 #
2349 2363 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2350 2364 # compressed chunks. And this matters for changelog and manifest reads.
2351 2365 t = data[0:1]
2352 2366
2353 2367 if t == b'x':
2354 2368 try:
2355 2369 return _zlibdecompress(data)
2356 2370 except zlib.error as e:
2357 2371 raise error.RevlogError(
2358 2372 _(b'revlog decompress error: %s')
2359 2373 % stringutil.forcebytestr(e)
2360 2374 )
2361 2375 # '\0' is more common than 'u' so it goes first.
2362 2376 elif t == b'\0':
2363 2377 return data
2364 2378 elif t == b'u':
2365 2379 return util.buffer(data, 1)
2366 2380
2367 2381 compressor = self._get_decompressor(t)
2368 2382
2369 2383 return compressor.decompress(data)
2370 2384
2371 2385 def _addrevision(
2372 2386 self,
2373 2387 node,
2374 2388 rawtext,
2375 2389 transaction,
2376 2390 link,
2377 2391 p1,
2378 2392 p2,
2379 2393 flags,
2380 2394 cachedelta,
2381 2395 alwayscache=False,
2382 2396 deltacomputer=None,
2383 2397 sidedata=None,
2384 2398 ):
2385 2399 """internal function to add revisions to the log
2386 2400
2387 2401 see addrevision for argument descriptions.
2388 2402
2389 2403 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2390 2404
2391 2405 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2392 2406 be used.
2393 2407
2394 2408 invariants:
2395 2409 - rawtext is optional (can be None); if not set, cachedelta must be set.
2396 2410 if both are set, they must correspond to each other.
2397 2411 """
2398 2412 if node == self.nullid:
2399 2413 raise error.RevlogError(
2400 2414 _(b"%s: attempt to add null revision") % self.display_id
2401 2415 )
2402 2416 if (
2403 2417 node == self.nodeconstants.wdirid
2404 2418 or node in self.nodeconstants.wdirfilenodeids
2405 2419 ):
2406 2420 raise error.RevlogError(
2407 2421 _(b"%s: attempt to add wdir revision") % self.display_id
2408 2422 )
2409 2423 if self._writinghandles is None:
2410 2424 msg = b'adding revision outside `revlog._writing` context'
2411 2425 raise error.ProgrammingError(msg)
2412 2426
2413 2427 if self._inline:
2414 2428 fh = self._writinghandles[0]
2415 2429 else:
2416 2430 fh = self._writinghandles[1]
2417 2431
2418 2432 btext = [rawtext]
2419 2433
2420 2434 curr = len(self)
2421 2435 prev = curr - 1
2422 2436
2423 2437 offset = self._get_data_offset(prev)
2424 2438
2425 2439 if self._concurrencychecker:
2426 2440 ifh, dfh, sdfh = self._writinghandles
2427 2441 # XXX no checking for the sidedata file
2428 2442 if self._inline:
2429 2443 # offset is "as if" it were in the .d file, so we need to add on
2430 2444 # the size of the entry metadata.
2431 2445 self._concurrencychecker(
2432 2446 ifh, self._indexfile, offset + curr * self.index.entry_size
2433 2447 )
2434 2448 else:
2435 2449 # Entries in the .i are a consistent size.
2436 2450 self._concurrencychecker(
2437 2451 ifh, self._indexfile, curr * self.index.entry_size
2438 2452 )
2439 2453 self._concurrencychecker(dfh, self._datafile, offset)
2440 2454
2441 2455 p1r, p2r = self.rev(p1), self.rev(p2)
2442 2456
2443 2457 # full versions are inserted when the needed deltas
2444 2458 # become comparable to the uncompressed text
2445 2459 if rawtext is None:
2446 2460 # need rawtext size, before changed by flag processors, which is
2447 2461 # the non-raw size. use revlog explicitly to avoid filelog's extra
2448 2462 # logic that might remove metadata size.
2449 2463 textlen = mdiff.patchedsize(
2450 2464 revlog.size(self, cachedelta[0]), cachedelta[1]
2451 2465 )
2452 2466 else:
2453 2467 textlen = len(rawtext)
2454 2468
2455 2469 if deltacomputer is None:
2456 2470 write_debug = None
2457 2471 if self._debug_delta:
2458 2472 write_debug = transaction._report
2459 2473 deltacomputer = deltautil.deltacomputer(
2460 2474 self, write_debug=write_debug
2461 2475 )
2462 2476
2463 2477 if cachedelta is not None and len(cachedelta) == 2:
2464 2478 # If the cached delta has no information about how it should be
2465 2479 # reused, add the default reuse instruction according to the
2466 2480 # revlog's configuration.
2467 2481 if self._generaldelta and self._lazydeltabase:
2468 2482 delta_base_reuse = DELTA_BASE_REUSE_TRY
2469 2483 else:
2470 2484 delta_base_reuse = DELTA_BASE_REUSE_NO
2471 2485 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2472 2486
2473 2487 revinfo = revlogutils.revisioninfo(
2474 2488 node,
2475 2489 p1,
2476 2490 p2,
2477 2491 btext,
2478 2492 textlen,
2479 2493 cachedelta,
2480 2494 flags,
2481 2495 )
2482 2496
2483 2497 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2484 2498
2485 2499 compression_mode = COMP_MODE_INLINE
2486 2500 if self._docket is not None:
2487 2501 default_comp = self._docket.default_compression_header
2488 2502 r = deltautil.delta_compression(default_comp, deltainfo)
2489 2503 compression_mode, deltainfo = r
2490 2504
2491 2505 sidedata_compression_mode = COMP_MODE_INLINE
2492 2506 if sidedata and self.hassidedata:
2493 2507 sidedata_compression_mode = COMP_MODE_PLAIN
2494 2508 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2495 2509 sidedata_offset = self._docket.sidedata_end
2496 2510 h, comp_sidedata = self.compress(serialized_sidedata)
2497 2511 if (
2498 2512 h != b'u'
2499 2513 and comp_sidedata[0:1] != b'\0'
2500 2514 and len(comp_sidedata) < len(serialized_sidedata)
2501 2515 ):
2502 2516 assert not h
2503 2517 if (
2504 2518 comp_sidedata[0:1]
2505 2519 == self._docket.default_compression_header
2506 2520 ):
2507 2521 sidedata_compression_mode = COMP_MODE_DEFAULT
2508 2522 serialized_sidedata = comp_sidedata
2509 2523 else:
2510 2524 sidedata_compression_mode = COMP_MODE_INLINE
2511 2525 serialized_sidedata = comp_sidedata
2512 2526 else:
2513 2527 serialized_sidedata = b""
2514 2528 # Don't store the offset if the sidedata is empty, that way
2515 2529 # we can easily detect empty sidedata and they will be no different
2516 2530 # than ones we manually add.
2517 2531 sidedata_offset = 0
2518 2532
2519 2533 rank = RANK_UNKNOWN
2520 2534 if self._compute_rank:
2521 2535 if (p1r, p2r) == (nullrev, nullrev):
2522 2536 rank = 1
2523 2537 elif p1r != nullrev and p2r == nullrev:
2524 2538 rank = 1 + self.fast_rank(p1r)
2525 2539 elif p1r == nullrev and p2r != nullrev:
2526 2540 rank = 1 + self.fast_rank(p2r)
2527 2541 else: # merge node
2528 2542 if rustdagop is not None and self.index.rust_ext_compat:
2529 2543 rank = rustdagop.rank(self.index, p1r, p2r)
2530 2544 else:
2531 2545 pmin, pmax = sorted((p1r, p2r))
2532 2546 rank = 1 + self.fast_rank(pmax)
2533 2547 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2534 2548
2535 2549 e = revlogutils.entry(
2536 2550 flags=flags,
2537 2551 data_offset=offset,
2538 2552 data_compressed_length=deltainfo.deltalen,
2539 2553 data_uncompressed_length=textlen,
2540 2554 data_compression_mode=compression_mode,
2541 2555 data_delta_base=deltainfo.base,
2542 2556 link_rev=link,
2543 2557 parent_rev_1=p1r,
2544 2558 parent_rev_2=p2r,
2545 2559 node_id=node,
2546 2560 sidedata_offset=sidedata_offset,
2547 2561 sidedata_compressed_length=len(serialized_sidedata),
2548 2562 sidedata_compression_mode=sidedata_compression_mode,
2549 2563 rank=rank,
2550 2564 )
2551 2565
2552 2566 self.index.append(e)
2553 2567 entry = self.index.entry_binary(curr)
2554 2568 if curr == 0 and self._docket is None:
2555 2569 header = self._format_flags | self._format_version
2556 2570 header = self.index.pack_header(header)
2557 2571 entry = header + entry
2558 2572 self._writeentry(
2559 2573 transaction,
2560 2574 entry,
2561 2575 deltainfo.data,
2562 2576 link,
2563 2577 offset,
2564 2578 serialized_sidedata,
2565 2579 sidedata_offset,
2566 2580 )
2567 2581
2568 2582 rawtext = btext[0]
2569 2583
2570 2584 if alwayscache and rawtext is None:
2571 2585 rawtext = deltacomputer.buildtext(revinfo, fh)
2572 2586
2573 2587 if type(rawtext) == bytes: # only accept immutable objects
2574 2588 self._revisioncache = (node, curr, rawtext)
2575 2589 self._chainbasecache[curr] = deltainfo.chainbase
2576 2590 return curr
2577 2591
2578 2592 def _get_data_offset(self, prev):
2579 2593 """Returns the current offset in the (in-transaction) data file.
2580 2594 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2581 2595 file to store that information: since sidedata can be rewritten to the
2582 2596 end of the data file within a transaction, you can have cases where, for
2583 2597 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2584 2598 to `n - 1`'s sidedata being written after `n`'s data.
2585 2599
2586 2600 TODO cache this in a docket file before getting out of experimental."""
2587 2601 if self._docket is None:
2588 2602 return self.end(prev)
2589 2603 else:
2590 2604 return self._docket.data_end
2591 2605
2592 2606 def _writeentry(
2593 2607 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2594 2608 ):
2595 2609 # Files opened in a+ mode have inconsistent behavior on various
2596 2610 # platforms. Windows requires that a file positioning call be made
2597 2611 # when the file handle transitions between reads and writes. See
2598 2612 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2599 2613 # platforms, Python or the platform itself can be buggy. Some versions
2600 2614 # of Solaris have been observed to not append at the end of the file
2601 2615 # if the file was seeked to before the end. See issue4943 for more.
2602 2616 #
2603 2617 # We work around this issue by inserting a seek() before writing.
2604 2618 # Note: This is likely not necessary on Python 3. However, because
2605 2619 # the file handle is reused for reads and may be seeked there, we need
2606 2620 # to be careful before changing this.
2607 2621 if self._writinghandles is None:
2608 2622 msg = b'adding revision outside `revlog._writing` context'
2609 2623 raise error.ProgrammingError(msg)
2610 2624 ifh, dfh, sdfh = self._writinghandles
2611 2625 if self._docket is None:
2612 2626 ifh.seek(0, os.SEEK_END)
2613 2627 else:
2614 2628 ifh.seek(self._docket.index_end, os.SEEK_SET)
2615 2629 if dfh:
2616 2630 if self._docket is None:
2617 2631 dfh.seek(0, os.SEEK_END)
2618 2632 else:
2619 2633 dfh.seek(self._docket.data_end, os.SEEK_SET)
2620 2634 if sdfh:
2621 2635 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2622 2636
2623 2637 curr = len(self) - 1
2624 2638 if not self._inline:
2625 2639 transaction.add(self._datafile, offset)
2626 2640 if self._sidedatafile:
2627 2641 transaction.add(self._sidedatafile, sidedata_offset)
2628 2642 transaction.add(self._indexfile, curr * len(entry))
2629 2643 if data[0]:
2630 2644 dfh.write(data[0])
2631 2645 dfh.write(data[1])
2632 2646 if sidedata:
2633 2647 sdfh.write(sidedata)
2634 2648 ifh.write(entry)
2635 2649 else:
2636 2650 offset += curr * self.index.entry_size
2637 2651 transaction.add(self._indexfile, offset)
2638 2652 ifh.write(entry)
2639 2653 ifh.write(data[0])
2640 2654 ifh.write(data[1])
2641 2655 assert not sidedata
2642 2656 self._enforceinlinesize(transaction)
2643 2657 if self._docket is not None:
2644 2658 # revlog-v2 always has 3 writing handles, help Pytype
2645 2659 wh1 = self._writinghandles[0]
2646 2660 wh2 = self._writinghandles[1]
2647 2661 wh3 = self._writinghandles[2]
2648 2662 assert wh1 is not None
2649 2663 assert wh2 is not None
2650 2664 assert wh3 is not None
2651 2665 self._docket.index_end = wh1.tell()
2652 2666 self._docket.data_end = wh2.tell()
2653 2667 self._docket.sidedata_end = wh3.tell()
2654 2668
2655 2669 nodemaputil.setup_persistent_nodemap(transaction, self)
2656 2670
2657 2671 def addgroup(
2658 2672 self,
2659 2673 deltas,
2660 2674 linkmapper,
2661 2675 transaction,
2662 2676 alwayscache=False,
2663 2677 addrevisioncb=None,
2664 2678 duplicaterevisioncb=None,
2665 2679 debug_info=None,
2666 2680 delta_base_reuse_policy=None,
2667 2681 ):
2668 2682 """
2669 2683 add a delta group
2670 2684
2671 2685 given a set of deltas, add them to the revision log. the
2672 2686 first delta is against its parent, which should be in our
2673 2687 log, the rest are against the previous delta.
2674 2688
2675 2689 If ``addrevisioncb`` is defined, it will be called with arguments of
2676 2690 this revlog and the node that was added.
2677 2691 """
2678 2692
2679 2693 if self._adding_group:
2680 2694 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2681 2695
2682 2696 # read the default delta-base reuse policy from revlog config if the
2683 2697 # group did not specify one.
2684 2698 if delta_base_reuse_policy is None:
2685 2699 if self._generaldelta and self._lazydeltabase:
2686 2700 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2687 2701 else:
2688 2702 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2689 2703
2690 2704 self._adding_group = True
2691 2705 empty = True
2692 2706 try:
2693 2707 with self._writing(transaction):
2694 2708 write_debug = None
2695 2709 if self._debug_delta:
2696 2710 write_debug = transaction._report
2697 2711 deltacomputer = deltautil.deltacomputer(
2698 2712 self,
2699 2713 write_debug=write_debug,
2700 2714 debug_info=debug_info,
2701 2715 )
2702 2716 # loop through our set of deltas
2703 2717 for data in deltas:
2704 2718 (
2705 2719 node,
2706 2720 p1,
2707 2721 p2,
2708 2722 linknode,
2709 2723 deltabase,
2710 2724 delta,
2711 2725 flags,
2712 2726 sidedata,
2713 2727 ) = data
2714 2728 link = linkmapper(linknode)
2715 2729 flags = flags or REVIDX_DEFAULT_FLAGS
2716 2730
2717 2731 rev = self.index.get_rev(node)
2718 2732 if rev is not None:
2719 2733 # this can happen if two branches make the same change
2720 2734 self._nodeduplicatecallback(transaction, rev)
2721 2735 if duplicaterevisioncb:
2722 2736 duplicaterevisioncb(self, rev)
2723 2737 empty = False
2724 2738 continue
2725 2739
2726 2740 for p in (p1, p2):
2727 2741 if not self.index.has_node(p):
2728 2742 raise error.LookupError(
2729 2743 p, self.radix, _(b'unknown parent')
2730 2744 )
2731 2745
2732 2746 if not self.index.has_node(deltabase):
2733 2747 raise error.LookupError(
2734 2748 deltabase, self.display_id, _(b'unknown delta base')
2735 2749 )
2736 2750
2737 2751 baserev = self.rev(deltabase)
2738 2752
2739 2753 if baserev != nullrev and self.iscensored(baserev):
2740 2754 # if base is censored, delta must be full replacement in a
2741 2755 # single patch operation
2742 2756 hlen = struct.calcsize(b">lll")
2743 2757 oldlen = self.rawsize(baserev)
2744 2758 newlen = len(delta) - hlen
2745 2759 if delta[:hlen] != mdiff.replacediffheader(
2746 2760 oldlen, newlen
2747 2761 ):
2748 2762 raise error.CensoredBaseError(
2749 2763 self.display_id, self.node(baserev)
2750 2764 )
2751 2765
2752 2766 if not flags and self._peek_iscensored(baserev, delta):
2753 2767 flags |= REVIDX_ISCENSORED
2754 2768
2755 2769 # We assume consumers of addrevisioncb will want to retrieve
2756 2770 # the added revision, which will require a call to
2757 2771 # revision(). revision() will fast path if there is a cache
2758 2772 # hit. So, we tell _addrevision() to always cache in this case.
2759 2773 # We're only using addgroup() in the context of changegroup
2760 2774 # generation so the revision data can always be handled as raw
2761 2775 # by the flagprocessor.
2762 2776 rev = self._addrevision(
2763 2777 node,
2764 2778 None,
2765 2779 transaction,
2766 2780 link,
2767 2781 p1,
2768 2782 p2,
2769 2783 flags,
2770 2784 (baserev, delta, delta_base_reuse_policy),
2771 2785 alwayscache=alwayscache,
2772 2786 deltacomputer=deltacomputer,
2773 2787 sidedata=sidedata,
2774 2788 )
2775 2789
2776 2790 if addrevisioncb:
2777 2791 addrevisioncb(self, rev)
2778 2792 empty = False
2779 2793 finally:
2780 2794 self._adding_group = False
2781 2795 return not empty
2782 2796
2783 2797 def iscensored(self, rev):
2784 2798 """Check if a file revision is censored."""
2785 2799 if not self._censorable:
2786 2800 return False
2787 2801
2788 2802 return self.flags(rev) & REVIDX_ISCENSORED
2789 2803
2790 2804 def _peek_iscensored(self, baserev, delta):
2791 2805 """Quickly check if a delta produces a censored revision."""
2792 2806 if not self._censorable:
2793 2807 return False
2794 2808
2795 2809 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2796 2810
2797 2811 def getstrippoint(self, minlink):
2798 2812 """find the minimum rev that must be stripped to strip the linkrev
2799 2813
2800 2814 Returns a tuple containing the minimum rev and a set of all revs that
2801 2815 have linkrevs that will be broken by this strip.
2802 2816 """
2803 2817 return storageutil.resolvestripinfo(
2804 2818 minlink,
2805 2819 len(self) - 1,
2806 2820 self.headrevs(),
2807 2821 self.linkrev,
2808 2822 self.parentrevs,
2809 2823 )
2810 2824
2811 2825 def strip(self, minlink, transaction):
2812 2826 """truncate the revlog on the first revision with a linkrev >= minlink
2813 2827
2814 2828 This function is called when we're stripping revision minlink and
2815 2829 its descendants from the repository.
2816 2830
2817 2831 We have to remove all revisions with linkrev >= minlink, because
2818 2832 the equivalent changelog revisions will be renumbered after the
2819 2833 strip.
2820 2834
2821 2835 So we truncate the revlog on the first of these revisions, and
2822 2836 trust that the caller has saved the revisions that shouldn't be
2823 2837 removed and that it'll re-add them after this truncation.
2824 2838 """
2825 2839 if len(self) == 0:
2826 2840 return
2827 2841
2828 2842 rev, _ = self.getstrippoint(minlink)
2829 2843 if rev == len(self):
2830 2844 return
2831 2845
2832 2846 # first truncate the files on disk
2833 2847 data_end = self.start(rev)
2834 2848 if not self._inline:
2835 2849 transaction.add(self._datafile, data_end)
2836 2850 end = rev * self.index.entry_size
2837 2851 else:
2838 2852 end = data_end + (rev * self.index.entry_size)
2839 2853
2840 2854 if self._sidedatafile:
2841 2855 sidedata_end = self.sidedata_cut_off(rev)
2842 2856 transaction.add(self._sidedatafile, sidedata_end)
2843 2857
2844 2858 transaction.add(self._indexfile, end)
2845 2859 if self._docket is not None:
2846 2860 # XXX we could, leverage the docket while stripping. However it is
2847 2861 # not powerfull enough at the time of this comment
2848 2862 self._docket.index_end = end
2849 2863 self._docket.data_end = data_end
2850 2864 self._docket.sidedata_end = sidedata_end
2851 2865 self._docket.write(transaction, stripping=True)
2852 2866
2853 2867 # then reset internal state in memory to forget those revisions
2854 2868 self._revisioncache = None
2855 2869 self._chaininfocache = util.lrucachedict(500)
2856 2870 self._segmentfile.clear_cache()
2857 2871 self._segmentfile_sidedata.clear_cache()
2858 2872
2859 2873 del self.index[rev:-1]
2860 2874
2861 2875 def checksize(self):
2862 2876 """Check size of index and data files
2863 2877
2864 2878 return a (dd, di) tuple.
2865 2879 - dd: extra bytes for the "data" file
2866 2880 - di: extra bytes for the "index" file
2867 2881
2868 2882 A healthy revlog will return (0, 0).
2869 2883 """
2870 2884 expected = 0
2871 2885 if len(self):
2872 2886 expected = max(0, self.end(len(self) - 1))
2873 2887
2874 2888 try:
2875 2889 with self._datafp() as f:
2876 2890 f.seek(0, io.SEEK_END)
2877 2891 actual = f.tell()
2878 2892 dd = actual - expected
2879 2893 except FileNotFoundError:
2880 2894 dd = 0
2881 2895
2882 2896 try:
2883 2897 f = self.opener(self._indexfile)
2884 2898 f.seek(0, io.SEEK_END)
2885 2899 actual = f.tell()
2886 2900 f.close()
2887 2901 s = self.index.entry_size
2888 2902 i = max(0, actual // s)
2889 2903 di = actual - (i * s)
2890 2904 if self._inline:
2891 2905 databytes = 0
2892 2906 for r in self:
2893 2907 databytes += max(0, self.length(r))
2894 2908 dd = 0
2895 2909 di = actual - len(self) * s - databytes
2896 2910 except FileNotFoundError:
2897 2911 di = 0
2898 2912
2899 2913 return (dd, di)
2900 2914
2901 2915 def files(self):
2902 2916 res = [self._indexfile]
2903 2917 if self._docket_file is None:
2904 2918 if not self._inline:
2905 2919 res.append(self._datafile)
2906 2920 else:
2907 2921 res.append(self._docket_file)
2908 2922 res.extend(self._docket.old_index_filepaths(include_empty=False))
2909 2923 if self._docket.data_end:
2910 2924 res.append(self._datafile)
2911 2925 res.extend(self._docket.old_data_filepaths(include_empty=False))
2912 2926 if self._docket.sidedata_end:
2913 2927 res.append(self._sidedatafile)
2914 2928 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2915 2929 return res
2916 2930
2917 2931 def emitrevisions(
2918 2932 self,
2919 2933 nodes,
2920 2934 nodesorder=None,
2921 2935 revisiondata=False,
2922 2936 assumehaveparentrevisions=False,
2923 2937 deltamode=repository.CG_DELTAMODE_STD,
2924 2938 sidedata_helpers=None,
2925 2939 debug_info=None,
2926 2940 ):
2927 2941 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2928 2942 raise error.ProgrammingError(
2929 2943 b'unhandled value for nodesorder: %s' % nodesorder
2930 2944 )
2931 2945
2932 2946 if nodesorder is None and not self._generaldelta:
2933 2947 nodesorder = b'storage'
2934 2948
2935 2949 if (
2936 2950 not self._storedeltachains
2937 2951 and deltamode != repository.CG_DELTAMODE_PREV
2938 2952 ):
2939 2953 deltamode = repository.CG_DELTAMODE_FULL
2940 2954
2941 2955 return storageutil.emitrevisions(
2942 2956 self,
2943 2957 nodes,
2944 2958 nodesorder,
2945 2959 revlogrevisiondelta,
2946 2960 deltaparentfn=self.deltaparent,
2947 2961 candeltafn=self.candelta,
2948 2962 rawsizefn=self.rawsize,
2949 2963 revdifffn=self.revdiff,
2950 2964 flagsfn=self.flags,
2951 2965 deltamode=deltamode,
2952 2966 revisiondata=revisiondata,
2953 2967 assumehaveparentrevisions=assumehaveparentrevisions,
2954 2968 sidedata_helpers=sidedata_helpers,
2955 2969 debug_info=debug_info,
2956 2970 )
2957 2971
2958 2972 DELTAREUSEALWAYS = b'always'
2959 2973 DELTAREUSESAMEREVS = b'samerevs'
2960 2974 DELTAREUSENEVER = b'never'
2961 2975
2962 2976 DELTAREUSEFULLADD = b'fulladd'
2963 2977
2964 2978 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2965 2979
2966 2980 def clone(
2967 2981 self,
2968 2982 tr,
2969 2983 destrevlog,
2970 2984 addrevisioncb=None,
2971 2985 deltareuse=DELTAREUSESAMEREVS,
2972 2986 forcedeltabothparents=None,
2973 2987 sidedata_helpers=None,
2974 2988 ):
2975 2989 """Copy this revlog to another, possibly with format changes.
2976 2990
2977 2991 The destination revlog will contain the same revisions and nodes.
2978 2992 However, it may not be bit-for-bit identical due to e.g. delta encoding
2979 2993 differences.
2980 2994
2981 2995 The ``deltareuse`` argument control how deltas from the existing revlog
2982 2996 are preserved in the destination revlog. The argument can have the
2983 2997 following values:
2984 2998
2985 2999 DELTAREUSEALWAYS
2986 3000 Deltas will always be reused (if possible), even if the destination
2987 3001 revlog would not select the same revisions for the delta. This is the
2988 3002 fastest mode of operation.
2989 3003 DELTAREUSESAMEREVS
2990 3004 Deltas will be reused if the destination revlog would pick the same
2991 3005 revisions for the delta. This mode strikes a balance between speed
2992 3006 and optimization.
2993 3007 DELTAREUSENEVER
2994 3008 Deltas will never be reused. This is the slowest mode of execution.
2995 3009 This mode can be used to recompute deltas (e.g. if the diff/delta
2996 3010 algorithm changes).
2997 3011 DELTAREUSEFULLADD
2998 3012 Revision will be re-added as if their were new content. This is
2999 3013 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3000 3014 eg: large file detection and handling.
3001 3015
3002 3016 Delta computation can be slow, so the choice of delta reuse policy can
3003 3017 significantly affect run time.
3004 3018
3005 3019 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3006 3020 two extremes. Deltas will be reused if they are appropriate. But if the
3007 3021 delta could choose a better revision, it will do so. This means if you
3008 3022 are converting a non-generaldelta revlog to a generaldelta revlog,
3009 3023 deltas will be recomputed if the delta's parent isn't a parent of the
3010 3024 revision.
3011 3025
3012 3026 In addition to the delta policy, the ``forcedeltabothparents``
3013 3027 argument controls whether to force compute deltas against both parents
3014 3028 for merges. By default, the current default is used.
3015 3029
3016 3030 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3017 3031 `sidedata_helpers`.
3018 3032 """
3019 3033 if deltareuse not in self.DELTAREUSEALL:
3020 3034 raise ValueError(
3021 3035 _(b'value for deltareuse invalid: %s') % deltareuse
3022 3036 )
3023 3037
3024 3038 if len(destrevlog):
3025 3039 raise ValueError(_(b'destination revlog is not empty'))
3026 3040
3027 3041 if getattr(self, 'filteredrevs', None):
3028 3042 raise ValueError(_(b'source revlog has filtered revisions'))
3029 3043 if getattr(destrevlog, 'filteredrevs', None):
3030 3044 raise ValueError(_(b'destination revlog has filtered revisions'))
3031 3045
3032 3046 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3033 3047 # if possible.
3034 3048 oldlazydelta = destrevlog._lazydelta
3035 3049 oldlazydeltabase = destrevlog._lazydeltabase
3036 3050 oldamd = destrevlog._deltabothparents
3037 3051
3038 3052 try:
3039 3053 if deltareuse == self.DELTAREUSEALWAYS:
3040 3054 destrevlog._lazydeltabase = True
3041 3055 destrevlog._lazydelta = True
3042 3056 elif deltareuse == self.DELTAREUSESAMEREVS:
3043 3057 destrevlog._lazydeltabase = False
3044 3058 destrevlog._lazydelta = True
3045 3059 elif deltareuse == self.DELTAREUSENEVER:
3046 3060 destrevlog._lazydeltabase = False
3047 3061 destrevlog._lazydelta = False
3048 3062
3049 3063 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3050 3064
3051 3065 self._clone(
3052 3066 tr,
3053 3067 destrevlog,
3054 3068 addrevisioncb,
3055 3069 deltareuse,
3056 3070 forcedeltabothparents,
3057 3071 sidedata_helpers,
3058 3072 )
3059 3073
3060 3074 finally:
3061 3075 destrevlog._lazydelta = oldlazydelta
3062 3076 destrevlog._lazydeltabase = oldlazydeltabase
3063 3077 destrevlog._deltabothparents = oldamd
3064 3078
3065 3079 def _clone(
3066 3080 self,
3067 3081 tr,
3068 3082 destrevlog,
3069 3083 addrevisioncb,
3070 3084 deltareuse,
3071 3085 forcedeltabothparents,
3072 3086 sidedata_helpers,
3073 3087 ):
3074 3088 """perform the core duty of `revlog.clone` after parameter processing"""
3075 3089 write_debug = None
3076 3090 if self._debug_delta:
3077 3091 write_debug = tr._report
3078 3092 deltacomputer = deltautil.deltacomputer(
3079 3093 destrevlog,
3080 3094 write_debug=write_debug,
3081 3095 )
3082 3096 index = self.index
3083 3097 for rev in self:
3084 3098 entry = index[rev]
3085 3099
3086 3100 # Some classes override linkrev to take filtered revs into
3087 3101 # account. Use raw entry from index.
3088 3102 flags = entry[0] & 0xFFFF
3089 3103 linkrev = entry[4]
3090 3104 p1 = index[entry[5]][7]
3091 3105 p2 = index[entry[6]][7]
3092 3106 node = entry[7]
3093 3107
3094 3108 # (Possibly) reuse the delta from the revlog if allowed and
3095 3109 # the revlog chunk is a delta.
3096 3110 cachedelta = None
3097 3111 rawtext = None
3098 3112 if deltareuse == self.DELTAREUSEFULLADD:
3099 3113 text = self._revisiondata(rev)
3100 3114 sidedata = self.sidedata(rev)
3101 3115
3102 3116 if sidedata_helpers is not None:
3103 3117 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3104 3118 self, sidedata_helpers, sidedata, rev
3105 3119 )
3106 3120 flags = flags | new_flags[0] & ~new_flags[1]
3107 3121
3108 3122 destrevlog.addrevision(
3109 3123 text,
3110 3124 tr,
3111 3125 linkrev,
3112 3126 p1,
3113 3127 p2,
3114 3128 cachedelta=cachedelta,
3115 3129 node=node,
3116 3130 flags=flags,
3117 3131 deltacomputer=deltacomputer,
3118 3132 sidedata=sidedata,
3119 3133 )
3120 3134 else:
3121 3135 if destrevlog._lazydelta:
3122 3136 dp = self.deltaparent(rev)
3123 3137 if dp != nullrev:
3124 3138 cachedelta = (dp, bytes(self._chunk(rev)))
3125 3139
3126 3140 sidedata = None
3127 3141 if not cachedelta:
3128 3142 rawtext = self._revisiondata(rev)
3129 3143 sidedata = self.sidedata(rev)
3130 3144 if sidedata is None:
3131 3145 sidedata = self.sidedata(rev)
3132 3146
3133 3147 if sidedata_helpers is not None:
3134 3148 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3135 3149 self, sidedata_helpers, sidedata, rev
3136 3150 )
3137 3151 flags = flags | new_flags[0] & ~new_flags[1]
3138 3152
3139 3153 with destrevlog._writing(tr):
3140 3154 destrevlog._addrevision(
3141 3155 node,
3142 3156 rawtext,
3143 3157 tr,
3144 3158 linkrev,
3145 3159 p1,
3146 3160 p2,
3147 3161 flags,
3148 3162 cachedelta,
3149 3163 deltacomputer=deltacomputer,
3150 3164 sidedata=sidedata,
3151 3165 )
3152 3166
3153 3167 if addrevisioncb:
3154 3168 addrevisioncb(self, rev, node)
3155 3169
3156 3170 def censorrevision(self, tr, censornode, tombstone=b''):
3157 3171 if self._format_version == REVLOGV0:
3158 3172 raise error.RevlogError(
3159 3173 _(b'cannot censor with version %d revlogs')
3160 3174 % self._format_version
3161 3175 )
3162 3176 elif self._format_version == REVLOGV1:
3163 3177 rewrite.v1_censor(self, tr, censornode, tombstone)
3164 3178 else:
3165 3179 rewrite.v2_censor(self, tr, censornode, tombstone)
3166 3180
3167 3181 def verifyintegrity(self, state):
3168 3182 """Verifies the integrity of the revlog.
3169 3183
3170 3184 Yields ``revlogproblem`` instances describing problems that are
3171 3185 found.
3172 3186 """
3173 3187 dd, di = self.checksize()
3174 3188 if dd:
3175 3189 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3176 3190 if di:
3177 3191 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3178 3192
3179 3193 version = self._format_version
3180 3194
3181 3195 # The verifier tells us what version revlog we should be.
3182 3196 if version != state[b'expectedversion']:
3183 3197 yield revlogproblem(
3184 3198 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3185 3199 % (self.display_id, version, state[b'expectedversion'])
3186 3200 )
3187 3201
3188 3202 state[b'skipread'] = set()
3189 3203 state[b'safe_renamed'] = set()
3190 3204
3191 3205 for rev in self:
3192 3206 node = self.node(rev)
3193 3207
3194 3208 # Verify contents. 4 cases to care about:
3195 3209 #
3196 3210 # common: the most common case
3197 3211 # rename: with a rename
3198 3212 # meta: file content starts with b'\1\n', the metadata
3199 3213 # header defined in filelog.py, but without a rename
3200 3214 # ext: content stored externally
3201 3215 #
3202 3216 # More formally, their differences are shown below:
3203 3217 #
3204 3218 # | common | rename | meta | ext
3205 3219 # -------------------------------------------------------
3206 3220 # flags() | 0 | 0 | 0 | not 0
3207 3221 # renamed() | False | True | False | ?
3208 3222 # rawtext[0:2]=='\1\n'| False | True | True | ?
3209 3223 #
3210 3224 # "rawtext" means the raw text stored in revlog data, which
3211 3225 # could be retrieved by "rawdata(rev)". "text"
3212 3226 # mentioned below is "revision(rev)".
3213 3227 #
3214 3228 # There are 3 different lengths stored physically:
3215 3229 # 1. L1: rawsize, stored in revlog index
3216 3230 # 2. L2: len(rawtext), stored in revlog data
3217 3231 # 3. L3: len(text), stored in revlog data if flags==0, or
3218 3232 # possibly somewhere else if flags!=0
3219 3233 #
3220 3234 # L1 should be equal to L2. L3 could be different from them.
3221 3235 # "text" may or may not affect commit hash depending on flag
3222 3236 # processors (see flagutil.addflagprocessor).
3223 3237 #
3224 3238 # | common | rename | meta | ext
3225 3239 # -------------------------------------------------
3226 3240 # rawsize() | L1 | L1 | L1 | L1
3227 3241 # size() | L1 | L2-LM | L1(*) | L1 (?)
3228 3242 # len(rawtext) | L2 | L2 | L2 | L2
3229 3243 # len(text) | L2 | L2 | L2 | L3
3230 3244 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3231 3245 #
3232 3246 # LM: length of metadata, depending on rawtext
3233 3247 # (*): not ideal, see comment in filelog.size
3234 3248 # (?): could be "- len(meta)" if the resolved content has
3235 3249 # rename metadata
3236 3250 #
3237 3251 # Checks needed to be done:
3238 3252 # 1. length check: L1 == L2, in all cases.
3239 3253 # 2. hash check: depending on flag processor, we may need to
3240 3254 # use either "text" (external), or "rawtext" (in revlog).
3241 3255
3242 3256 try:
3243 3257 skipflags = state.get(b'skipflags', 0)
3244 3258 if skipflags:
3245 3259 skipflags &= self.flags(rev)
3246 3260
3247 3261 _verify_revision(self, skipflags, state, node)
3248 3262
3249 3263 l1 = self.rawsize(rev)
3250 3264 l2 = len(self.rawdata(node))
3251 3265
3252 3266 if l1 != l2:
3253 3267 yield revlogproblem(
3254 3268 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3255 3269 node=node,
3256 3270 )
3257 3271
3258 3272 except error.CensoredNodeError:
3259 3273 if state[b'erroroncensored']:
3260 3274 yield revlogproblem(
3261 3275 error=_(b'censored file data'), node=node
3262 3276 )
3263 3277 state[b'skipread'].add(node)
3264 3278 except Exception as e:
3265 3279 yield revlogproblem(
3266 3280 error=_(b'unpacking %s: %s')
3267 3281 % (short(node), stringutil.forcebytestr(e)),
3268 3282 node=node,
3269 3283 )
3270 3284 state[b'skipread'].add(node)
3271 3285
3272 3286 def storageinfo(
3273 3287 self,
3274 3288 exclusivefiles=False,
3275 3289 sharedfiles=False,
3276 3290 revisionscount=False,
3277 3291 trackedsize=False,
3278 3292 storedsize=False,
3279 3293 ):
3280 3294 d = {}
3281 3295
3282 3296 if exclusivefiles:
3283 3297 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3284 3298 if not self._inline:
3285 3299 d[b'exclusivefiles'].append((self.opener, self._datafile))
3286 3300
3287 3301 if sharedfiles:
3288 3302 d[b'sharedfiles'] = []
3289 3303
3290 3304 if revisionscount:
3291 3305 d[b'revisionscount'] = len(self)
3292 3306
3293 3307 if trackedsize:
3294 3308 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3295 3309
3296 3310 if storedsize:
3297 3311 d[b'storedsize'] = sum(
3298 3312 self.opener.stat(path).st_size for path in self.files()
3299 3313 )
3300 3314
3301 3315 return d
3302 3316
3303 3317 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3304 3318 if not self.hassidedata:
3305 3319 return
3306 3320 # revlog formats with sidedata support does not support inline
3307 3321 assert not self._inline
3308 3322 if not helpers[1] and not helpers[2]:
3309 3323 # Nothing to generate or remove
3310 3324 return
3311 3325
3312 3326 new_entries = []
3313 3327 # append the new sidedata
3314 3328 with self._writing(transaction):
3315 3329 ifh, dfh, sdfh = self._writinghandles
3316 3330 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3317 3331
3318 3332 current_offset = sdfh.tell()
3319 3333 for rev in range(startrev, endrev + 1):
3320 3334 entry = self.index[rev]
3321 3335 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3322 3336 store=self,
3323 3337 sidedata_helpers=helpers,
3324 3338 sidedata={},
3325 3339 rev=rev,
3326 3340 )
3327 3341
3328 3342 serialized_sidedata = sidedatautil.serialize_sidedata(
3329 3343 new_sidedata
3330 3344 )
3331 3345
3332 3346 sidedata_compression_mode = COMP_MODE_INLINE
3333 3347 if serialized_sidedata and self.hassidedata:
3334 3348 sidedata_compression_mode = COMP_MODE_PLAIN
3335 3349 h, comp_sidedata = self.compress(serialized_sidedata)
3336 3350 if (
3337 3351 h != b'u'
3338 3352 and comp_sidedata[0] != b'\0'
3339 3353 and len(comp_sidedata) < len(serialized_sidedata)
3340 3354 ):
3341 3355 assert not h
3342 3356 if (
3343 3357 comp_sidedata[0]
3344 3358 == self._docket.default_compression_header
3345 3359 ):
3346 3360 sidedata_compression_mode = COMP_MODE_DEFAULT
3347 3361 serialized_sidedata = comp_sidedata
3348 3362 else:
3349 3363 sidedata_compression_mode = COMP_MODE_INLINE
3350 3364 serialized_sidedata = comp_sidedata
3351 3365 if entry[8] != 0 or entry[9] != 0:
3352 3366 # rewriting entries that already have sidedata is not
3353 3367 # supported yet, because it introduces garbage data in the
3354 3368 # revlog.
3355 3369 msg = b"rewriting existing sidedata is not supported yet"
3356 3370 raise error.Abort(msg)
3357 3371
3358 3372 # Apply (potential) flags to add and to remove after running
3359 3373 # the sidedata helpers
3360 3374 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3361 3375 entry_update = (
3362 3376 current_offset,
3363 3377 len(serialized_sidedata),
3364 3378 new_offset_flags,
3365 3379 sidedata_compression_mode,
3366 3380 )
3367 3381
3368 3382 # the sidedata computation might have move the file cursors around
3369 3383 sdfh.seek(current_offset, os.SEEK_SET)
3370 3384 sdfh.write(serialized_sidedata)
3371 3385 new_entries.append(entry_update)
3372 3386 current_offset += len(serialized_sidedata)
3373 3387 self._docket.sidedata_end = sdfh.tell()
3374 3388
3375 3389 # rewrite the new index entries
3376 3390 ifh.seek(startrev * self.index.entry_size)
3377 3391 for i, e in enumerate(new_entries):
3378 3392 rev = startrev + i
3379 3393 self.index.replace_sidedata_info(rev, *e)
3380 3394 packed = self.index.entry_binary(rev)
3381 3395 if rev == 0 and self._docket is None:
3382 3396 header = self._format_flags | self._format_version
3383 3397 header = self.index.pack_header(header)
3384 3398 packed = header + packed
3385 3399 ifh.write(packed)
@@ -1,433 +1,428 b''
1 1 Test correctness of revlog inline -> non-inline transition
2 2 ----------------------------------------------------------
3 3
4 4 Helper extension to intercept renames and kill process
5 5
6 6 $ cat > $TESTTMP/intercept_before_rename.py << EOF
7 7 > import os
8 8 > import signal
9 9 > from mercurial import extensions, util
10 10 >
11 11 > def extsetup(ui):
12 > def close(orig, *args, **kwargs):
13 > path = util.normpath(args[0]._atomictempfile__name)
14 > if path.endswith(b'/.hg/store/data/file.i'):
12 > def rename(orig, src, dest, *args, **kwargs):
13 > path = util.normpath(dest)
14 > if path.endswith(b'data/file.i'):
15 15 > os.kill(os.getpid(), signal.SIGKILL)
16 > return orig(*args, **kwargs)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
16 > return orig(src, dest, *args, **kwargs)
17 > extensions.wrapfunction(util, 'rename', rename)
18 18 > EOF
19 19
20 20 $ cat > $TESTTMP/intercept_after_rename.py << EOF
21 21 > import os
22 22 > import signal
23 23 > from mercurial import extensions, util
24 24 >
25 25 > def extsetup(ui):
26 26 > def close(orig, *args, **kwargs):
27 27 > path = util.normpath(args[0]._atomictempfile__name)
28 28 > r = orig(*args, **kwargs)
29 29 > if path.endswith(b'/.hg/store/data/file.i'):
30 30 > os.kill(os.getpid(), signal.SIGKILL)
31 31 > return r
32 32 > extensions.wrapfunction(util.atomictempfile, 'close', close)
33 > def extsetup(ui):
34 > def rename(orig, src, dest, *args, **kwargs):
35 > path = util.normpath(dest)
36 > r = orig(src, dest, *args, **kwargs)
37 > if path.endswith(b'data/file.i'):
38 > os.kill(os.getpid(), signal.SIGKILL)
39 > return r
40 > extensions.wrapfunction(util, 'rename', rename)
33 41 > EOF
34 42
35 43 $ cat > $TESTTMP/killme.py << EOF
36 44 > import os
37 45 > import signal
38 46 >
39 47 > def killme(ui, repo, hooktype, **kwargs):
40 48 > os.kill(os.getpid(), signal.SIGKILL)
41 49 > EOF
42 50
43 51 $ cat > $TESTTMP/reader_wait_split.py << EOF
44 52 > import os
45 53 > import signal
46 54 > from mercurial import extensions, revlog, testing
47 55 > def _wait_post_load(orig, self, *args, **kwargs):
48 56 > wait = b'data/file' in self.radix
49 57 > if wait:
50 58 > testing.wait_file(b"$TESTTMP/writer-revlog-split")
51 59 > r = orig(self, *args, **kwargs)
52 60 > if wait:
53 61 > testing.write_file(b"$TESTTMP/reader-index-read")
54 62 > testing.wait_file(b"$TESTTMP/writer-revlog-unsplit")
55 63 > return r
56 64 >
57 65 > def extsetup(ui):
58 66 > extensions.wrapfunction(revlog.revlog, '_loadindex', _wait_post_load)
59 67 > EOF
60 68
61 69 setup a repository for tests
62 70 ----------------------------
63 71
64 72 $ cat >> $HGRCPATH << EOF
65 73 > [format]
66 74 > revlog-compression=none
67 75 > EOF
68 76
69 77 $ hg init troffset-computation
70 78 $ cd troffset-computation
71 79 $ printf '%20d' '1' > file
72 80 $ hg commit -Aqma
73 81 $ printf '%1024d' '1' > file
74 82 $ hg commit -Aqmb
75 83 $ printf '%20d' '1' > file
76 84 $ hg commit -Aqmc
77 85 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
78 $ hg commit -AqmD
86 $ hg commit -AqmD --traceback
79 87
80 88 Reference size:
81 89 $ f -s file
82 90 file: size=131072
83 91 $ f -s .hg/store/data/file*
84 92 .hg/store/data/file.d: size=132139
85 93 .hg/store/data/file.i: size=256
86 94
87 95 $ cd ..
88 96
89 97
90 98 Test a hard crash after the file was split but before the transaction was committed
91 99 ===================================================================================
92 100
93 101 Test offset computation to correctly factor in the index entries themselves.
94 102 Also test that the new data size has the correct size if the transaction is aborted
95 103 after the index has been replaced.
96 104
97 105 Test repo has commits a, b, c, D, where D is large (grows the revlog enough that it
98 106 transitions to non-inline storage). The clone initially has changes a, b
99 107 and will transition to non-inline storage when adding c, D.
100 108
101 109 If the transaction adding c, D is rolled back, then we don't undo the revlog split,
102 110 but truncate the index and the data to remove both c and D.
103 111
104 112
105 113 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-copy
106 114 $ cd troffset-computation-copy
107 115
108 116 Reference size:
109 117 $ f -s file
110 118 file: size=1024
111 119 $ f -s .hg/store/data/file*
112 120 .hg/store/data/file.i: size=1174
113 121
114 122 $ cat > .hg/hgrc <<EOF
115 123 > [hooks]
116 124 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
117 125 > EOF
118 126 #if chg
119 127 $ hg pull ../troffset-computation
120 128 pulling from ../troffset-computation
121 129 [255]
122 130 #else
123 131 $ hg pull ../troffset-computation
124 132 pulling from ../troffset-computation
125 133 Killed
126 134 [137]
127 135 #endif
128 136
129 137
130 The revlog have been split on disk
138 The inline revlog still exist, but a split version exist next to it
131 139
132 140 $ f -s .hg/store/data/file*
133 141 .hg/store/data/file.d: size=132139
134 .hg/store/data/file.i: size=256
142 .hg/store/data/file.i: size=132395
143 .hg/store/data/file.i.s: size=256
135 144
136 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
137 data/file.i 128
138 145
139 146 The first file.i entry should match the "Reference size" above.
140 147 The first file.d entry is the temporary record during the split,
141 148
142 The second entry after the split happened. The sum of the second file.d
143 and the second file.i entry should match the first file.i entry.
149 A "temporary file" entry exist for the split index.
144 150
145 151 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
146 152 data/file.i 1174
147 153 data/file.d 0
148 data/file.d 1046
149 data/file.i 128
154 $ cat .hg/store/journal.backupfiles | tr -s '\000' ' ' | tr -s '\00' ' '| grep data/file
155 data/file.i data/journal.backup.file.i 0
156 data/file.i.s 0
157
158 recover is rolling the split back, the fncache is still valid
159
150 160 $ hg recover
151 161 rolling back interrupted transaction
152 162 (verify step skipped, run `hg verify` to check your repository content)
153 163 $ f -s .hg/store/data/file*
154 .hg/store/data/file.d: size=1046
155 .hg/store/data/file.i: size=128
164 .hg/store/data/file.i: size=1174
156 165 $ hg tip
157 166 changeset: 1:cfa8d6e60429
158 167 tag: tip
159 168 user: test
160 169 date: Thu Jan 01 00:00:00 1970 +0000
161 170 summary: b
162 171
163 172 $ hg verify -q
164 warning: revlog 'data/file.d' not in fncache!
165 1 warnings encountered!
166 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
167 173 $ hg debugrebuildfncache --only-data
168 adding data/file.d
169 1 items added, 0 removed from fncache
174 fncache already up to date
170 175 $ hg verify -q
171 176 $ cd ..
172 177
173 178 Test a hard crash right before the index is move into place
174 179 ===========================================================
175 180
176 181 Now retry the procedure but intercept the rename of the index and check that
177 182 the journal does not contain the new index size. This demonstrates the edge case
178 183 where the data file is left as garbage.
179 184
180 185 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-copy2
181 186 $ cd troffset-computation-copy2
182 187
183 188 Reference size:
184 189 $ f -s file
185 190 file: size=1024
186 191 $ f -s .hg/store/data/file*
187 192 .hg/store/data/file.i: size=1174
188 193
189 194 $ cat > .hg/hgrc <<EOF
190 195 > [extensions]
191 196 > intercept_rename = $TESTTMP/intercept_before_rename.py
192 > [hooks]
193 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
194 197 > EOF
195 198 #if chg
196 199 $ hg pull ../troffset-computation
197 200 pulling from ../troffset-computation
201 searching for changes
202 adding changesets
203 adding manifests
204 adding file changes
198 205 [255]
199 206 #else
200 207 $ hg pull ../troffset-computation
201 208 pulling from ../troffset-computation
209 searching for changes
210 adding changesets
211 adding manifests
212 adding file changes
202 213 Killed
203 214 [137]
204 215 #endif
205 216
206 The data file is created, but the revlog is still inline
217 The inline revlog still exist, but a split version exist next to it
207 218
208 219 $ f -s .hg/store/data/file*
209 220 .hg/store/data/file.d: size=132139
210 221 .hg/store/data/file.i: size=132395
222 .hg/store/data/file.i.s: size=256
211 223
212 224 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
213 225 data/file.i 1174
214 226 data/file.d 0
215 data/file.d 1046
227
228 recover is rolling the split back, the fncache is still valid
216 229
217 230 $ hg recover
218 231 rolling back interrupted transaction
219 232 (verify step skipped, run `hg verify` to check your repository content)
220 233 $ f -s .hg/store/data/file*
221 .hg/store/data/file.d: size=1046
222 234 .hg/store/data/file.i: size=1174
223 235 $ hg tip
224 236 changeset: 1:cfa8d6e60429
225 237 tag: tip
226 238 user: test
227 239 date: Thu Jan 01 00:00:00 1970 +0000
228 240 summary: b
229 241
230 242 $ hg verify -q
231 243 $ cd ..
232 244
233 245 Test a hard crash right after the index is move into place
234 246 ===========================================================
235 247
236 248 Now retry the procedure but intercept the rename of the index.
237 249
238 Things get corrupted /o\
239
240 250 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-crash-after-rename
241 251 $ cd troffset-computation-crash-after-rename
242 252
243 253 Reference size:
244 254 $ f -s file
245 255 file: size=1024
246 256 $ f -s .hg/store/data/file*
247 257 .hg/store/data/file.i: size=1174
248 258
249 259 $ cat > .hg/hgrc <<EOF
250 260 > [extensions]
251 261 > intercept_rename = $TESTTMP/intercept_after_rename.py
252 > [hooks]
253 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
254 262 > EOF
255 263 #if chg
256 264 $ hg pull ../troffset-computation
257 265 pulling from ../troffset-computation
266 searching for changes
267 adding changesets
268 adding manifests
269 adding file changes
258 270 [255]
259 271 #else
260 272 $ hg pull ../troffset-computation
261 273 pulling from ../troffset-computation
274 searching for changes
275 adding changesets
276 adding manifests
277 adding file changes
262 278 Killed
263 279 [137]
264 280 #endif
265 281
266 the revlog has been split on disk
282 The inline revlog was over written on disk
267 283
268 284 $ f -s .hg/store/data/file*
269 285 .hg/store/data/file.d: size=132139
270 286 .hg/store/data/file.i: size=256
271 287
272 288 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
273 289 data/file.i 1174
274 290 data/file.d 0
275 data/file.d 1046
291
292 recover is rolling the split back, the fncache is still valid
276 293
277 294 $ hg recover
278 295 rolling back interrupted transaction
279 abort: attempted to truncate data/file.i to 1174 bytes, but it was already 256 bytes
280
281 [255]
296 (verify step skipped, run `hg verify` to check your repository content)
282 297 $ f -s .hg/store/data/file*
283 .hg/store/data/file.d: size=1046
284 .hg/store/data/file.i: size=256
298 .hg/store/data/file.i: size=1174
285 299 $ hg tip
286 300 changeset: 1:cfa8d6e60429
287 301 tag: tip
288 302 user: test
289 303 date: Thu Jan 01 00:00:00 1970 +0000
290 304 summary: b
291 305
292 306 $ hg verify -q
293 abandoned transaction found - run hg recover
294 warning: revlog 'data/file.d' not in fncache!
295 file@0: data length off by -131093 bytes
296 file@2: unpacking fa1120531cc1: partial read of revlog data/file.d; expected 21 bytes from offset 1046, got 0
297 file@3: unpacking a631378adaa3: partial read of revlog data/file.d; expected 131072 bytes from offset 1067, got -21
298 file@?: rev 2 points to nonexistent changeset 2
299 (expected )
300 file@?: fa1120531cc1 not in manifests
301 file@?: rev 3 points to nonexistent changeset 3
302 (expected )
303 file@?: a631378adaa3 not in manifests
304 not checking dirstate because of previous errors
305 3 warnings encountered!
306 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
307 7 integrity errors encountered!
308 (first damaged changeset appears to be 0)
309 [1]
310 307 $ cd ..
311 308
312 309 Have the transaction rollback itself without any hard crash
313 310 ===========================================================
314 311
315 312
316 313 Repeat the original test but let hg rollback the transaction.
317 314
318 315 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-copy-rb
319 316 $ cd troffset-computation-copy-rb
320 317 $ cat > .hg/hgrc <<EOF
321 318 > [hooks]
322 319 > pretxnchangegroup = false
323 320 > EOF
324 321 $ hg pull ../troffset-computation
325 322 pulling from ../troffset-computation
326 323 searching for changes
327 324 adding changesets
328 325 adding manifests
329 326 adding file changes
330 327 transaction abort!
331 328 rollback completed
332 329 abort: pretxnchangegroup hook exited with status 1
333 330 [40]
334 331
335 File are still split on disk, with the expected size.
332 The split was rollback
336 333
337 334 $ f -s .hg/store/data/file*
338 .hg/store/data/file.d: size=1046
339 .hg/store/data/file.i: size=128
335 .hg/store/data/file.d: size=0
336 .hg/store/data/file.i: size=1174
337
340 338
341 339 $ hg tip
342 340 changeset: 1:cfa8d6e60429
343 341 tag: tip
344 342 user: test
345 343 date: Thu Jan 01 00:00:00 1970 +0000
346 344 summary: b
347 345
348 346 $ hg verify -q
349 warning: revlog 'data/file.d' not in fncache!
350 1 warnings encountered!
351 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
352 347 $ cd ..
353 348
354 349 Read race
355 350 =========
356 351
357 352 We check that a client that started reading a revlog (its index) after the
358 353 split and end reading (the data) after the rollback should be fine
359 354
360 355 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-race
361 356 $ cd troffset-computation-race
362 357 $ cat > .hg/hgrc <<EOF
363 358 > [hooks]
364 359 > pretxnchangegroup=$RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/reader-index-read $TESTTMP/writer-revlog-split
365 360 > pretxnclose = false
366 361 > EOF
367 362
368 363 start a reader
369 364
370 365 $ hg cat --rev 0 file \
371 366 > --config "extensions.wait_read=$TESTTMP/reader_wait_split.py" \
372 367 > 2> $TESTTMP/reader.stderr \
373 368 > > $TESTTMP/reader.stdout &
374 369
375 370 Do a failed pull in //
376 371
377 372 $ hg pull ../troffset-computation
378 373 pulling from ../troffset-computation
379 374 searching for changes
380 375 adding changesets
381 376 adding manifests
382 377 adding file changes
383 378 transaction abort!
384 379 rollback completed
385 380 abort: pretxnclose hook exited with status 1
386 381 [40]
387 382 $ touch $TESTTMP/writer-revlog-unsplit
388 383 $ wait
389 384
390 385 The reader should be fine
391 386 $ cat $TESTTMP/reader.stderr
392 387 $ cat $TESTTMP/reader.stdout
393 388 1 (no-eol)
394 389 $ cd ..
395 390
396 391 pending hooks
397 392 =============
398 393
399 394 We checks that hooks properly see the inside of the transaction, while other process don't.
400 395
401 396 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-hooks
402 397 $ cd troffset-computation-hooks
403 398 $ cat > .hg/hgrc <<EOF
404 399 > [hooks]
405 400 > pretxnclose.01-echo = hg cat -r 'max(all())' file | f --size
406 401 > pretxnclose.02-echo = $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/hook-done $TESTTMP/hook-tr-ready
407 402 > pretxnclose.03-abort = false
408 403 > EOF
409 404
410 405 $ (
411 406 > $RUNTESTDIR/testlib/wait-on-file 5 $TESTTMP/hook-tr-ready;\
412 407 > hg cat -r 'max(all())' file | f --size;\
413 408 > touch $TESTTMP/hook-done
414 409 > ) >stdout 2>stderr &
415 410
416 411 $ hg pull ../troffset-computation
417 412 pulling from ../troffset-computation
418 413 searching for changes
419 414 adding changesets
420 415 adding manifests
421 416 adding file changes
422 417 size=131072
423 418 transaction abort!
424 419 rollback completed
425 420 abort: pretxnclose.03-abort hook exited with status 1
426 421 [40]
427 422
428 423 $ cat stdout
429 424 size=1024
430 425 $ cat stderr
431 426
432 427
433 428 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now