##// END OF EJS Templates
nodemap: drop the 'exp-' prefix for internal opener option...
marmoute -
r45296:5e3c7186 default
parent child Browse files
Show More
@@ -1,629 +1,627 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 copies,
20 20 encoding,
21 21 error,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29
30 30 from .revlogutils import sidedata as sidedatamod
31 31
32 32 _defaultextra = {b'branch': b'default'}
33 33
34 34
35 35 def _string_escape(text):
36 36 """
37 37 >>> from .pycompat import bytechr as chr
38 38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 40 >>> s
41 41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 42 >>> res = _string_escape(s)
43 43 >>> s == _string_unescape(res)
44 44 True
45 45 """
46 46 # subset of the string_escape codec
47 47 text = (
48 48 text.replace(b'\\', b'\\\\')
49 49 .replace(b'\n', b'\\n')
50 50 .replace(b'\r', b'\\r')
51 51 )
52 52 return text.replace(b'\0', b'\\0')
53 53
54 54
55 55 def _string_unescape(text):
56 56 if b'\\0' in text:
57 57 # fix up \0 without getting into trouble with \\0
58 58 text = text.replace(b'\\\\', b'\\\\\n')
59 59 text = text.replace(b'\\0', b'\0')
60 60 text = text.replace(b'\n', b'')
61 61 return stringutil.unescapestr(text)
62 62
63 63
64 64 def decodeextra(text):
65 65 """
66 66 >>> from .pycompat import bytechr as chr
67 67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 68 ... ).items())
69 69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 71 ... b'baz': chr(92) + chr(0) + b'2'})
72 72 ... ).items())
73 73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 74 """
75 75 extra = _defaultextra.copy()
76 76 for l in text.split(b'\0'):
77 77 if l:
78 78 k, v = _string_unescape(l).split(b':', 1)
79 79 extra[k] = v
80 80 return extra
81 81
82 82
83 83 def encodeextra(d):
84 84 # keys must be sorted to produce a deterministic changelog entry
85 85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 86 return b"\0".join(items)
87 87
88 88
89 89 def stripdesc(desc):
90 90 """strip trailing whitespace and leading and trailing empty lines"""
91 91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 92
93 93
94 94 class appender(object):
95 95 '''the changelog index must be updated last on disk, so we use this class
96 96 to delay writes to it'''
97 97
98 98 def __init__(self, vfs, name, mode, buf):
99 99 self.data = buf
100 100 fp = vfs(name, mode)
101 101 self.fp = fp
102 102 self.offset = fp.tell()
103 103 self.size = vfs.fstat(fp).st_size
104 104 self._end = self.size
105 105
106 106 def end(self):
107 107 return self._end
108 108
109 109 def tell(self):
110 110 return self.offset
111 111
112 112 def flush(self):
113 113 pass
114 114
115 115 @property
116 116 def closed(self):
117 117 return self.fp.closed
118 118
119 119 def close(self):
120 120 self.fp.close()
121 121
122 122 def seek(self, offset, whence=0):
123 123 '''virtual file offset spans real file and data'''
124 124 if whence == 0:
125 125 self.offset = offset
126 126 elif whence == 1:
127 127 self.offset += offset
128 128 elif whence == 2:
129 129 self.offset = self.end() + offset
130 130 if self.offset < self.size:
131 131 self.fp.seek(self.offset)
132 132
133 133 def read(self, count=-1):
134 134 '''only trick here is reads that span real file and data'''
135 135 ret = b""
136 136 if self.offset < self.size:
137 137 s = self.fp.read(count)
138 138 ret = s
139 139 self.offset += len(s)
140 140 if count > 0:
141 141 count -= len(s)
142 142 if count != 0:
143 143 doff = self.offset - self.size
144 144 self.data.insert(0, b"".join(self.data))
145 145 del self.data[1:]
146 146 s = self.data[0][doff : doff + count]
147 147 self.offset += len(s)
148 148 ret += s
149 149 return ret
150 150
151 151 def write(self, s):
152 152 self.data.append(bytes(s))
153 153 self.offset += len(s)
154 154 self._end += len(s)
155 155
156 156 def __enter__(self):
157 157 self.fp.__enter__()
158 158 return self
159 159
160 160 def __exit__(self, *args):
161 161 return self.fp.__exit__(*args)
162 162
163 163
164 164 class _divertopener(object):
165 165 def __init__(self, opener, target):
166 166 self._opener = opener
167 167 self._target = target
168 168
169 169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 170 if name != self._target:
171 171 return self._opener(name, mode, **kwargs)
172 172 return self._opener(name + b".a", mode, **kwargs)
173 173
174 174 def __getattr__(self, attr):
175 175 return getattr(self._opener, attr)
176 176
177 177
178 178 def _delayopener(opener, target, buf):
179 179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 180
181 181 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 182 if name != target:
183 183 return opener(name, mode, **kwargs)
184 184 assert not kwargs
185 185 return appender(opener, name, mode, buf)
186 186
187 187 return _delay
188 188
189 189
190 190 @attr.s
191 191 class _changelogrevision(object):
192 192 # Extensions might modify _defaultextra, so let the constructor below pass
193 193 # it in
194 194 extra = attr.ib()
195 195 manifest = attr.ib(default=nullid)
196 196 user = attr.ib(default=b'')
197 197 date = attr.ib(default=(0, 0))
198 198 files = attr.ib(default=attr.Factory(list))
199 199 filesadded = attr.ib(default=None)
200 200 filesremoved = attr.ib(default=None)
201 201 p1copies = attr.ib(default=None)
202 202 p2copies = attr.ib(default=None)
203 203 description = attr.ib(default=b'')
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 '_offsets',
216 216 '_text',
217 217 '_sidedata',
218 218 '_cpsd',
219 219 )
220 220
221 221 def __new__(cls, text, sidedata, cpsd):
222 222 if not text:
223 223 return _changelogrevision(extra=_defaultextra)
224 224
225 225 self = super(changelogrevision, cls).__new__(cls)
226 226 # We could return here and implement the following as an __init__.
227 227 # But doing it here is equivalent and saves an extra function call.
228 228
229 229 # format used:
230 230 # nodeid\n : manifest node in ascii
231 231 # user\n : user, no \n or \r allowed
232 232 # time tz extra\n : date (time is int or float, timezone is int)
233 233 # : extra is metadata, encoded and separated by '\0'
234 234 # : older versions ignore it
235 235 # files\n\n : files modified by the cset, no \n or \r allowed
236 236 # (.*) : comment (free text, ideally utf-8)
237 237 #
238 238 # changelog v0 doesn't use extra
239 239
240 240 nl1 = text.index(b'\n')
241 241 nl2 = text.index(b'\n', nl1 + 1)
242 242 nl3 = text.index(b'\n', nl2 + 1)
243 243
244 244 # The list of files may be empty. Which means nl3 is the first of the
245 245 # double newline that precedes the description.
246 246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 247 doublenl = nl3
248 248 else:
249 249 doublenl = text.index(b'\n\n', nl3 + 1)
250 250
251 251 self._offsets = (nl1, nl2, nl3, doublenl)
252 252 self._text = text
253 253 self._sidedata = sidedata
254 254 self._cpsd = cpsd
255 255
256 256 return self
257 257
258 258 @property
259 259 def manifest(self):
260 260 return bin(self._text[0 : self._offsets[0]])
261 261
262 262 @property
263 263 def user(self):
264 264 off = self._offsets
265 265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 266
267 267 @property
268 268 def _rawdate(self):
269 269 off = self._offsets
270 270 dateextra = self._text[off[1] + 1 : off[2]]
271 271 return dateextra.split(b' ', 2)[0:2]
272 272
273 273 @property
274 274 def _rawextra(self):
275 275 off = self._offsets
276 276 dateextra = self._text[off[1] + 1 : off[2]]
277 277 fields = dateextra.split(b' ', 2)
278 278 if len(fields) != 3:
279 279 return None
280 280
281 281 return fields[2]
282 282
283 283 @property
284 284 def date(self):
285 285 raw = self._rawdate
286 286 time = float(raw[0])
287 287 # Various tools did silly things with the timezone.
288 288 try:
289 289 timezone = int(raw[1])
290 290 except ValueError:
291 291 timezone = 0
292 292
293 293 return time, timezone
294 294
295 295 @property
296 296 def extra(self):
297 297 raw = self._rawextra
298 298 if raw is None:
299 299 return _defaultextra
300 300
301 301 return decodeextra(raw)
302 302
303 303 @property
304 304 def files(self):
305 305 off = self._offsets
306 306 if off[2] == off[3]:
307 307 return []
308 308
309 309 return self._text[off[2] + 1 : off[3]].split(b'\n')
310 310
311 311 @property
312 312 def filesadded(self):
313 313 if self._cpsd:
314 314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 315 if not rawindices:
316 316 return []
317 317 else:
318 318 rawindices = self.extra.get(b'filesadded')
319 319 if rawindices is None:
320 320 return None
321 321 return copies.decodefileindices(self.files, rawindices)
322 322
323 323 @property
324 324 def filesremoved(self):
325 325 if self._cpsd:
326 326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 327 if not rawindices:
328 328 return []
329 329 else:
330 330 rawindices = self.extra.get(b'filesremoved')
331 331 if rawindices is None:
332 332 return None
333 333 return copies.decodefileindices(self.files, rawindices)
334 334
335 335 @property
336 336 def p1copies(self):
337 337 if self._cpsd:
338 338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 339 if not rawcopies:
340 340 return {}
341 341 else:
342 342 rawcopies = self.extra.get(b'p1copies')
343 343 if rawcopies is None:
344 344 return None
345 345 return copies.decodecopies(self.files, rawcopies)
346 346
347 347 @property
348 348 def p2copies(self):
349 349 if self._cpsd:
350 350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 351 if not rawcopies:
352 352 return {}
353 353 else:
354 354 rawcopies = self.extra.get(b'p2copies')
355 355 if rawcopies is None:
356 356 return None
357 357 return copies.decodecopies(self.files, rawcopies)
358 358
359 359 @property
360 360 def description(self):
361 361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362 362
363 363
364 364 class changelog(revlog.revlog):
365 365 def __init__(self, opener, trypending=False):
366 366 """Load a changelog revlog using an opener.
367 367
368 368 If ``trypending`` is true, we attempt to load the index from a
369 369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 370 The ``00changelog.i.a`` file contains index (and possibly inline
371 371 revision) data for a transaction that hasn't been finalized yet.
372 372 It exists in a separate file to facilitate readers (such as
373 373 hooks processes) accessing data before a transaction is finalized.
374 374 """
375 375 if trypending and opener.exists(b'00changelog.i.a'):
376 376 indexfile = b'00changelog.i.a'
377 377 else:
378 378 indexfile = b'00changelog.i'
379 379
380 380 datafile = b'00changelog.d'
381 381 revlog.revlog.__init__(
382 382 self,
383 383 opener,
384 384 indexfile,
385 385 datafile=datafile,
386 386 checkambig=True,
387 387 mmaplargeindex=True,
388 persistentnodemap=opener.options.get(
389 b'exp-persistent-nodemap', False
390 ),
388 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
391 389 )
392 390
393 391 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
394 392 # changelogs don't benefit from generaldelta.
395 393
396 394 self.version &= ~revlog.FLAG_GENERALDELTA
397 395 self._generaldelta = False
398 396
399 397 # Delta chains for changelogs tend to be very small because entries
400 398 # tend to be small and don't delta well with each. So disable delta
401 399 # chains.
402 400 self._storedeltachains = False
403 401
404 402 self._realopener = opener
405 403 self._delayed = False
406 404 self._delaybuf = None
407 405 self._divert = False
408 406 self.filteredrevs = frozenset()
409 407 self._copiesstorage = opener.options.get(b'copies-storage')
410 408
411 409 def delayupdate(self, tr):
412 410 """delay visibility of index updates to other readers"""
413 411
414 412 if not self._delayed:
415 413 if len(self) == 0:
416 414 self._divert = True
417 415 if self._realopener.exists(self.indexfile + b'.a'):
418 416 self._realopener.unlink(self.indexfile + b'.a')
419 417 self.opener = _divertopener(self._realopener, self.indexfile)
420 418 else:
421 419 self._delaybuf = []
422 420 self.opener = _delayopener(
423 421 self._realopener, self.indexfile, self._delaybuf
424 422 )
425 423 self._delayed = True
426 424 tr.addpending(b'cl-%i' % id(self), self._writepending)
427 425 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
428 426
429 427 def _finalize(self, tr):
430 428 """finalize index updates"""
431 429 self._delayed = False
432 430 self.opener = self._realopener
433 431 # move redirected index data back into place
434 432 if self._divert:
435 433 assert not self._delaybuf
436 434 tmpname = self.indexfile + b".a"
437 435 nfile = self.opener.open(tmpname)
438 436 nfile.close()
439 437 self.opener.rename(tmpname, self.indexfile, checkambig=True)
440 438 elif self._delaybuf:
441 439 fp = self.opener(self.indexfile, b'a', checkambig=True)
442 440 fp.write(b"".join(self._delaybuf))
443 441 fp.close()
444 442 self._delaybuf = None
445 443 self._divert = False
446 444 # split when we're done
447 445 self._enforceinlinesize(tr)
448 446
449 447 def _writepending(self, tr):
450 448 """create a file containing the unfinalized state for
451 449 pretxnchangegroup"""
452 450 if self._delaybuf:
453 451 # make a temporary copy of the index
454 452 fp1 = self._realopener(self.indexfile)
455 453 pendingfilename = self.indexfile + b".a"
456 454 # register as a temp file to ensure cleanup on failure
457 455 tr.registertmp(pendingfilename)
458 456 # write existing data
459 457 fp2 = self._realopener(pendingfilename, b"w")
460 458 fp2.write(fp1.read())
461 459 # add pending data
462 460 fp2.write(b"".join(self._delaybuf))
463 461 fp2.close()
464 462 # switch modes so finalize can simply rename
465 463 self._delaybuf = None
466 464 self._divert = True
467 465 self.opener = _divertopener(self._realopener, self.indexfile)
468 466
469 467 if self._divert:
470 468 return True
471 469
472 470 return False
473 471
474 472 def _enforceinlinesize(self, tr, fp=None):
475 473 if not self._delayed:
476 474 revlog.revlog._enforceinlinesize(self, tr, fp)
477 475
478 476 def read(self, node):
479 477 """Obtain data from a parsed changelog revision.
480 478
481 479 Returns a 6-tuple of:
482 480
483 481 - manifest node in binary
484 482 - author/user as a localstr
485 483 - date as a 2-tuple of (time, timezone)
486 484 - list of files
487 485 - commit message as a localstr
488 486 - dict of extra metadata
489 487
490 488 Unless you need to access all fields, consider calling
491 489 ``changelogrevision`` instead, as it is faster for partial object
492 490 access.
493 491 """
494 492 d, s = self._revisiondata(node)
495 493 c = changelogrevision(
496 494 d, s, self._copiesstorage == b'changeset-sidedata'
497 495 )
498 496 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
499 497
500 498 def changelogrevision(self, nodeorrev):
501 499 """Obtain a ``changelogrevision`` for a node or revision."""
502 500 text, sidedata = self._revisiondata(nodeorrev)
503 501 return changelogrevision(
504 502 text, sidedata, self._copiesstorage == b'changeset-sidedata'
505 503 )
506 504
507 505 def readfiles(self, node):
508 506 """
509 507 short version of read that only returns the files modified by the cset
510 508 """
511 509 text = self.revision(node)
512 510 if not text:
513 511 return []
514 512 last = text.index(b"\n\n")
515 513 l = text[:last].split(b'\n')
516 514 return l[3:]
517 515
518 516 def add(
519 517 self,
520 518 manifest,
521 519 files,
522 520 desc,
523 521 transaction,
524 522 p1,
525 523 p2,
526 524 user,
527 525 date=None,
528 526 extra=None,
529 527 p1copies=None,
530 528 p2copies=None,
531 529 filesadded=None,
532 530 filesremoved=None,
533 531 ):
534 532 # Convert to UTF-8 encoded bytestrings as the very first
535 533 # thing: calling any method on a localstr object will turn it
536 534 # into a str object and the cached UTF-8 string is thus lost.
537 535 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
538 536
539 537 user = user.strip()
540 538 # An empty username or a username with a "\n" will make the
541 539 # revision text contain two "\n\n" sequences -> corrupt
542 540 # repository since read cannot unpack the revision.
543 541 if not user:
544 542 raise error.StorageError(_(b"empty username"))
545 543 if b"\n" in user:
546 544 raise error.StorageError(
547 545 _(b"username %r contains a newline") % pycompat.bytestr(user)
548 546 )
549 547
550 548 desc = stripdesc(desc)
551 549
552 550 if date:
553 551 parseddate = b"%d %d" % dateutil.parsedate(date)
554 552 else:
555 553 parseddate = b"%d %d" % dateutil.makedate()
556 554 if extra:
557 555 branch = extra.get(b"branch")
558 556 if branch in (b"default", b""):
559 557 del extra[b"branch"]
560 558 elif branch in (b".", b"null", b"tip"):
561 559 raise error.StorageError(
562 560 _(b'the name \'%s\' is reserved') % branch
563 561 )
564 562 sortedfiles = sorted(files)
565 563 sidedata = None
566 564 if extra is not None:
567 565 for name in (
568 566 b'p1copies',
569 567 b'p2copies',
570 568 b'filesadded',
571 569 b'filesremoved',
572 570 ):
573 571 extra.pop(name, None)
574 572 if p1copies is not None:
575 573 p1copies = copies.encodecopies(sortedfiles, p1copies)
576 574 if p2copies is not None:
577 575 p2copies = copies.encodecopies(sortedfiles, p2copies)
578 576 if filesadded is not None:
579 577 filesadded = copies.encodefileindices(sortedfiles, filesadded)
580 578 if filesremoved is not None:
581 579 filesremoved = copies.encodefileindices(sortedfiles, filesremoved)
582 580 if self._copiesstorage == b'extra':
583 581 extrasentries = p1copies, p2copies, filesadded, filesremoved
584 582 if extra is None and any(x is not None for x in extrasentries):
585 583 extra = {}
586 584 if p1copies is not None:
587 585 extra[b'p1copies'] = p1copies
588 586 if p2copies is not None:
589 587 extra[b'p2copies'] = p2copies
590 588 if filesadded is not None:
591 589 extra[b'filesadded'] = filesadded
592 590 if filesremoved is not None:
593 591 extra[b'filesremoved'] = filesremoved
594 592 elif self._copiesstorage == b'changeset-sidedata':
595 593 sidedata = {}
596 594 if p1copies:
597 595 sidedata[sidedatamod.SD_P1COPIES] = p1copies
598 596 if p2copies:
599 597 sidedata[sidedatamod.SD_P2COPIES] = p2copies
600 598 if filesadded:
601 599 sidedata[sidedatamod.SD_FILESADDED] = filesadded
602 600 if filesremoved:
603 601 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
604 602 if not sidedata:
605 603 sidedata = None
606 604
607 605 if extra:
608 606 extra = encodeextra(extra)
609 607 parseddate = b"%s %s" % (parseddate, extra)
610 608 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
611 609 text = b"\n".join(l)
612 610 return self.addrevision(
613 611 text, transaction, len(self), p1, p2, sidedata=sidedata
614 612 )
615 613
616 614 def branchinfo(self, rev):
617 615 """return the branch name and open/close state of a revision
618 616
619 617 This function exists because creating a changectx object
620 618 just to access this is costly."""
621 619 extra = self.read(rev)[5]
622 620 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
623 621
624 622 def _nodeduplicatecallback(self, transaction, node):
625 623 # keep track of revisions that got "re-added", eg: unbunde of know rev.
626 624 #
627 625 # We track them in a list to preserve their order from the source bundle
628 626 duplicates = transaction.changes.setdefault(b'revduplicates', [])
629 627 duplicates.append(self.rev(node))
@@ -1,3828 +1,3828 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import os
12 12 import random
13 13 import sys
14 14 import time
15 15 import weakref
16 16
17 17 from .i18n import _
18 18 from .node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 short,
24 24 )
25 25 from .pycompat import (
26 26 delattr,
27 27 getattr,
28 28 )
29 29 from . import (
30 30 bookmarks,
31 31 branchmap,
32 32 bundle2,
33 33 changegroup,
34 34 color,
35 35 context,
36 36 dirstate,
37 37 dirstateguard,
38 38 discovery,
39 39 encoding,
40 40 error,
41 41 exchange,
42 42 extensions,
43 43 filelog,
44 44 hook,
45 45 lock as lockmod,
46 46 match as matchmod,
47 47 merge as mergemod,
48 48 mergeutil,
49 49 namespaces,
50 50 narrowspec,
51 51 obsolete,
52 52 pathutil,
53 53 phases,
54 54 pushkey,
55 55 pycompat,
56 56 rcutil,
57 57 repoview,
58 58 revset,
59 59 revsetlang,
60 60 scmutil,
61 61 sparse,
62 62 store as storemod,
63 63 subrepoutil,
64 64 tags as tagsmod,
65 65 transaction,
66 66 txnutil,
67 67 util,
68 68 vfs as vfsmod,
69 69 )
70 70
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75
76 76 from .utils import (
77 77 hashutil,
78 78 procutil,
79 79 stringutil,
80 80 )
81 81
82 82 from .revlogutils import constants as revlogconst
83 83
84 84 release = lockmod.release
85 85 urlerr = util.urlerr
86 86 urlreq = util.urlreq
87 87
88 88 # set of (path, vfs-location) tuples. vfs-location is:
89 89 # - 'plain for vfs relative paths
90 90 # - '' for svfs relative paths
91 91 _cachedfiles = set()
92 92
93 93
94 94 class _basefilecache(scmutil.filecache):
95 95 """All filecache usage on repo are done for logic that should be unfiltered
96 96 """
97 97
98 98 def __get__(self, repo, type=None):
99 99 if repo is None:
100 100 return self
101 101 # proxy to unfiltered __dict__ since filtered repo has no entry
102 102 unfi = repo.unfiltered()
103 103 try:
104 104 return unfi.__dict__[self.sname]
105 105 except KeyError:
106 106 pass
107 107 return super(_basefilecache, self).__get__(unfi, type)
108 108
109 109 def set(self, repo, value):
110 110 return super(_basefilecache, self).set(repo.unfiltered(), value)
111 111
112 112
113 113 class repofilecache(_basefilecache):
114 114 """filecache for files in .hg but outside of .hg/store"""
115 115
116 116 def __init__(self, *paths):
117 117 super(repofilecache, self).__init__(*paths)
118 118 for path in paths:
119 119 _cachedfiles.add((path, b'plain'))
120 120
121 121 def join(self, obj, fname):
122 122 return obj.vfs.join(fname)
123 123
124 124
125 125 class storecache(_basefilecache):
126 126 """filecache for files in the store"""
127 127
128 128 def __init__(self, *paths):
129 129 super(storecache, self).__init__(*paths)
130 130 for path in paths:
131 131 _cachedfiles.add((path, b''))
132 132
133 133 def join(self, obj, fname):
134 134 return obj.sjoin(fname)
135 135
136 136
137 137 class mixedrepostorecache(_basefilecache):
138 138 """filecache for a mix files in .hg/store and outside"""
139 139
140 140 def __init__(self, *pathsandlocations):
141 141 # scmutil.filecache only uses the path for passing back into our
142 142 # join(), so we can safely pass a list of paths and locations
143 143 super(mixedrepostorecache, self).__init__(*pathsandlocations)
144 144 _cachedfiles.update(pathsandlocations)
145 145
146 146 def join(self, obj, fnameandlocation):
147 147 fname, location = fnameandlocation
148 148 if location == b'plain':
149 149 return obj.vfs.join(fname)
150 150 else:
151 151 if location != b'':
152 152 raise error.ProgrammingError(
153 153 b'unexpected location: %s' % location
154 154 )
155 155 return obj.sjoin(fname)
156 156
157 157
158 158 def isfilecached(repo, name):
159 159 """check if a repo has already cached "name" filecache-ed property
160 160
161 161 This returns (cachedobj-or-None, iscached) tuple.
162 162 """
163 163 cacheentry = repo.unfiltered()._filecache.get(name, None)
164 164 if not cacheentry:
165 165 return None, False
166 166 return cacheentry.obj, True
167 167
168 168
169 169 class unfilteredpropertycache(util.propertycache):
170 170 """propertycache that apply to unfiltered repo only"""
171 171
172 172 def __get__(self, repo, type=None):
173 173 unfi = repo.unfiltered()
174 174 if unfi is repo:
175 175 return super(unfilteredpropertycache, self).__get__(unfi)
176 176 return getattr(unfi, self.name)
177 177
178 178
179 179 class filteredpropertycache(util.propertycache):
180 180 """propertycache that must take filtering in account"""
181 181
182 182 def cachevalue(self, obj, value):
183 183 object.__setattr__(obj, self.name, value)
184 184
185 185
186 186 def hasunfilteredcache(repo, name):
187 187 """check if a repo has an unfilteredpropertycache value for <name>"""
188 188 return name in vars(repo.unfiltered())
189 189
190 190
191 191 def unfilteredmethod(orig):
192 192 """decorate method that always need to be run on unfiltered version"""
193 193
194 194 def wrapper(repo, *args, **kwargs):
195 195 return orig(repo.unfiltered(), *args, **kwargs)
196 196
197 197 return wrapper
198 198
199 199
200 200 moderncaps = {
201 201 b'lookup',
202 202 b'branchmap',
203 203 b'pushkey',
204 204 b'known',
205 205 b'getbundle',
206 206 b'unbundle',
207 207 }
208 208 legacycaps = moderncaps.union({b'changegroupsubset'})
209 209
210 210
211 211 @interfaceutil.implementer(repository.ipeercommandexecutor)
212 212 class localcommandexecutor(object):
213 213 def __init__(self, peer):
214 214 self._peer = peer
215 215 self._sent = False
216 216 self._closed = False
217 217
218 218 def __enter__(self):
219 219 return self
220 220
221 221 def __exit__(self, exctype, excvalue, exctb):
222 222 self.close()
223 223
224 224 def callcommand(self, command, args):
225 225 if self._sent:
226 226 raise error.ProgrammingError(
227 227 b'callcommand() cannot be used after sendcommands()'
228 228 )
229 229
230 230 if self._closed:
231 231 raise error.ProgrammingError(
232 232 b'callcommand() cannot be used after close()'
233 233 )
234 234
235 235 # We don't need to support anything fancy. Just call the named
236 236 # method on the peer and return a resolved future.
237 237 fn = getattr(self._peer, pycompat.sysstr(command))
238 238
239 239 f = pycompat.futures.Future()
240 240
241 241 try:
242 242 result = fn(**pycompat.strkwargs(args))
243 243 except Exception:
244 244 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
245 245 else:
246 246 f.set_result(result)
247 247
248 248 return f
249 249
250 250 def sendcommands(self):
251 251 self._sent = True
252 252
253 253 def close(self):
254 254 self._closed = True
255 255
256 256
257 257 @interfaceutil.implementer(repository.ipeercommands)
258 258 class localpeer(repository.peer):
259 259 '''peer for a local repo; reflects only the most recent API'''
260 260
261 261 def __init__(self, repo, caps=None):
262 262 super(localpeer, self).__init__()
263 263
264 264 if caps is None:
265 265 caps = moderncaps.copy()
266 266 self._repo = repo.filtered(b'served')
267 267 self.ui = repo.ui
268 268 self._caps = repo._restrictcapabilities(caps)
269 269
270 270 # Begin of _basepeer interface.
271 271
272 272 def url(self):
273 273 return self._repo.url()
274 274
275 275 def local(self):
276 276 return self._repo
277 277
278 278 def peer(self):
279 279 return self
280 280
281 281 def canpush(self):
282 282 return True
283 283
284 284 def close(self):
285 285 self._repo.close()
286 286
287 287 # End of _basepeer interface.
288 288
289 289 # Begin of _basewirecommands interface.
290 290
291 291 def branchmap(self):
292 292 return self._repo.branchmap()
293 293
294 294 def capabilities(self):
295 295 return self._caps
296 296
297 297 def clonebundles(self):
298 298 return self._repo.tryread(b'clonebundles.manifest')
299 299
300 300 def debugwireargs(self, one, two, three=None, four=None, five=None):
301 301 """Used to test argument passing over the wire"""
302 302 return b"%s %s %s %s %s" % (
303 303 one,
304 304 two,
305 305 pycompat.bytestr(three),
306 306 pycompat.bytestr(four),
307 307 pycompat.bytestr(five),
308 308 )
309 309
310 310 def getbundle(
311 311 self, source, heads=None, common=None, bundlecaps=None, **kwargs
312 312 ):
313 313 chunks = exchange.getbundlechunks(
314 314 self._repo,
315 315 source,
316 316 heads=heads,
317 317 common=common,
318 318 bundlecaps=bundlecaps,
319 319 **kwargs
320 320 )[1]
321 321 cb = util.chunkbuffer(chunks)
322 322
323 323 if exchange.bundle2requested(bundlecaps):
324 324 # When requesting a bundle2, getbundle returns a stream to make the
325 325 # wire level function happier. We need to build a proper object
326 326 # from it in local peer.
327 327 return bundle2.getunbundler(self.ui, cb)
328 328 else:
329 329 return changegroup.getunbundler(b'01', cb, None)
330 330
331 331 def heads(self):
332 332 return self._repo.heads()
333 333
334 334 def known(self, nodes):
335 335 return self._repo.known(nodes)
336 336
337 337 def listkeys(self, namespace):
338 338 return self._repo.listkeys(namespace)
339 339
340 340 def lookup(self, key):
341 341 return self._repo.lookup(key)
342 342
343 343 def pushkey(self, namespace, key, old, new):
344 344 return self._repo.pushkey(namespace, key, old, new)
345 345
346 346 def stream_out(self):
347 347 raise error.Abort(_(b'cannot perform stream clone against local peer'))
348 348
349 349 def unbundle(self, bundle, heads, url):
350 350 """apply a bundle on a repo
351 351
352 352 This function handles the repo locking itself."""
353 353 try:
354 354 try:
355 355 bundle = exchange.readbundle(self.ui, bundle, None)
356 356 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
357 357 if util.safehasattr(ret, b'getchunks'):
358 358 # This is a bundle20 object, turn it into an unbundler.
359 359 # This little dance should be dropped eventually when the
360 360 # API is finally improved.
361 361 stream = util.chunkbuffer(ret.getchunks())
362 362 ret = bundle2.getunbundler(self.ui, stream)
363 363 return ret
364 364 except Exception as exc:
365 365 # If the exception contains output salvaged from a bundle2
366 366 # reply, we need to make sure it is printed before continuing
367 367 # to fail. So we build a bundle2 with such output and consume
368 368 # it directly.
369 369 #
370 370 # This is not very elegant but allows a "simple" solution for
371 371 # issue4594
372 372 output = getattr(exc, '_bundle2salvagedoutput', ())
373 373 if output:
374 374 bundler = bundle2.bundle20(self._repo.ui)
375 375 for out in output:
376 376 bundler.addpart(out)
377 377 stream = util.chunkbuffer(bundler.getchunks())
378 378 b = bundle2.getunbundler(self.ui, stream)
379 379 bundle2.processbundle(self._repo, b)
380 380 raise
381 381 except error.PushRaced as exc:
382 382 raise error.ResponseError(
383 383 _(b'push failed:'), stringutil.forcebytestr(exc)
384 384 )
385 385
386 386 # End of _basewirecommands interface.
387 387
388 388 # Begin of peer interface.
389 389
390 390 def commandexecutor(self):
391 391 return localcommandexecutor(self)
392 392
393 393 # End of peer interface.
394 394
395 395
396 396 @interfaceutil.implementer(repository.ipeerlegacycommands)
397 397 class locallegacypeer(localpeer):
398 398 '''peer extension which implements legacy methods too; used for tests with
399 399 restricted capabilities'''
400 400
401 401 def __init__(self, repo):
402 402 super(locallegacypeer, self).__init__(repo, caps=legacycaps)
403 403
404 404 # Begin of baselegacywirecommands interface.
405 405
406 406 def between(self, pairs):
407 407 return self._repo.between(pairs)
408 408
409 409 def branches(self, nodes):
410 410 return self._repo.branches(nodes)
411 411
412 412 def changegroup(self, nodes, source):
413 413 outgoing = discovery.outgoing(
414 414 self._repo, missingroots=nodes, missingheads=self._repo.heads()
415 415 )
416 416 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
417 417
418 418 def changegroupsubset(self, bases, heads, source):
419 419 outgoing = discovery.outgoing(
420 420 self._repo, missingroots=bases, missingheads=heads
421 421 )
422 422 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
423 423
424 424 # End of baselegacywirecommands interface.
425 425
426 426
427 427 # Increment the sub-version when the revlog v2 format changes to lock out old
428 428 # clients.
429 429 REVLOGV2_REQUIREMENT = b'exp-revlogv2.1'
430 430
431 431 # A repository with the sparserevlog feature will have delta chains that
432 432 # can spread over a larger span. Sparse reading cuts these large spans into
433 433 # pieces, so that each piece isn't too big.
434 434 # Without the sparserevlog capability, reading from the repository could use
435 435 # huge amounts of memory, because the whole span would be read at once,
436 436 # including all the intermediate revisions that aren't pertinent for the chain.
437 437 # This is why once a repository has enabled sparse-read, it becomes required.
438 438 SPARSEREVLOG_REQUIREMENT = b'sparserevlog'
439 439
440 440 # A repository with the sidedataflag requirement will allow to store extra
441 441 # information for revision without altering their original hashes.
442 442 SIDEDATA_REQUIREMENT = b'exp-sidedata-flag'
443 443
444 444 # A repository with the the copies-sidedata-changeset requirement will store
445 445 # copies related information in changeset's sidedata.
446 446 COPIESSDC_REQUIREMENT = b'exp-copies-sidedata-changeset'
447 447
448 448 # The repository use persistent nodemap for the changelog and the manifest.
449 449 NODEMAP_REQUIREMENT = b'persistent-nodemap'
450 450
451 451 # Functions receiving (ui, features) that extensions can register to impact
452 452 # the ability to load repositories with custom requirements. Only
453 453 # functions defined in loaded extensions are called.
454 454 #
455 455 # The function receives a set of requirement strings that the repository
456 456 # is capable of opening. Functions will typically add elements to the
457 457 # set to reflect that the extension knows how to handle that requirements.
458 458 featuresetupfuncs = set()
459 459
460 460
461 461 def makelocalrepository(baseui, path, intents=None):
462 462 """Create a local repository object.
463 463
464 464 Given arguments needed to construct a local repository, this function
465 465 performs various early repository loading functionality (such as
466 466 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
467 467 the repository can be opened, derives a type suitable for representing
468 468 that repository, and returns an instance of it.
469 469
470 470 The returned object conforms to the ``repository.completelocalrepository``
471 471 interface.
472 472
473 473 The repository type is derived by calling a series of factory functions
474 474 for each aspect/interface of the final repository. These are defined by
475 475 ``REPO_INTERFACES``.
476 476
477 477 Each factory function is called to produce a type implementing a specific
478 478 interface. The cumulative list of returned types will be combined into a
479 479 new type and that type will be instantiated to represent the local
480 480 repository.
481 481
482 482 The factory functions each receive various state that may be consulted
483 483 as part of deriving a type.
484 484
485 485 Extensions should wrap these factory functions to customize repository type
486 486 creation. Note that an extension's wrapped function may be called even if
487 487 that extension is not loaded for the repo being constructed. Extensions
488 488 should check if their ``__name__`` appears in the
489 489 ``extensionmodulenames`` set passed to the factory function and no-op if
490 490 not.
491 491 """
492 492 ui = baseui.copy()
493 493 # Prevent copying repo configuration.
494 494 ui.copy = baseui.copy
495 495
496 496 # Working directory VFS rooted at repository root.
497 497 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
498 498
499 499 # Main VFS for .hg/ directory.
500 500 hgpath = wdirvfs.join(b'.hg')
501 501 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
502 502
503 503 # The .hg/ path should exist and should be a directory. All other
504 504 # cases are errors.
505 505 if not hgvfs.isdir():
506 506 try:
507 507 hgvfs.stat()
508 508 except OSError as e:
509 509 if e.errno != errno.ENOENT:
510 510 raise
511 511
512 512 raise error.RepoError(_(b'repository %s not found') % path)
513 513
514 514 # .hg/requires file contains a newline-delimited list of
515 515 # features/capabilities the opener (us) must have in order to use
516 516 # the repository. This file was introduced in Mercurial 0.9.2,
517 517 # which means very old repositories may not have one. We assume
518 518 # a missing file translates to no requirements.
519 519 try:
520 520 requirements = set(hgvfs.read(b'requires').splitlines())
521 521 except IOError as e:
522 522 if e.errno != errno.ENOENT:
523 523 raise
524 524 requirements = set()
525 525
526 526 # The .hg/hgrc file may load extensions or contain config options
527 527 # that influence repository construction. Attempt to load it and
528 528 # process any new extensions that it may have pulled in.
529 529 if loadhgrc(ui, wdirvfs, hgvfs, requirements):
530 530 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
531 531 extensions.loadall(ui)
532 532 extensions.populateui(ui)
533 533
534 534 # Set of module names of extensions loaded for this repository.
535 535 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
536 536
537 537 supportedrequirements = gathersupportedrequirements(ui)
538 538
539 539 # We first validate the requirements are known.
540 540 ensurerequirementsrecognized(requirements, supportedrequirements)
541 541
542 542 # Then we validate that the known set is reasonable to use together.
543 543 ensurerequirementscompatible(ui, requirements)
544 544
545 545 # TODO there are unhandled edge cases related to opening repositories with
546 546 # shared storage. If storage is shared, we should also test for requirements
547 547 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
548 548 # that repo, as that repo may load extensions needed to open it. This is a
549 549 # bit complicated because we don't want the other hgrc to overwrite settings
550 550 # in this hgrc.
551 551 #
552 552 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
553 553 # file when sharing repos. But if a requirement is added after the share is
554 554 # performed, thereby introducing a new requirement for the opener, we may
555 555 # will not see that and could encounter a run-time error interacting with
556 556 # that shared store since it has an unknown-to-us requirement.
557 557
558 558 # At this point, we know we should be capable of opening the repository.
559 559 # Now get on with doing that.
560 560
561 561 features = set()
562 562
563 563 # The "store" part of the repository holds versioned data. How it is
564 564 # accessed is determined by various requirements. The ``shared`` or
565 565 # ``relshared`` requirements indicate the store lives in the path contained
566 566 # in the ``.hg/sharedpath`` file. This is an absolute path for
567 567 # ``shared`` and relative to ``.hg/`` for ``relshared``.
568 568 if b'shared' in requirements or b'relshared' in requirements:
569 569 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
570 570 if b'relshared' in requirements:
571 571 sharedpath = hgvfs.join(sharedpath)
572 572
573 573 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
574 574
575 575 if not sharedvfs.exists():
576 576 raise error.RepoError(
577 577 _(b'.hg/sharedpath points to nonexistent directory %s')
578 578 % sharedvfs.base
579 579 )
580 580
581 581 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
582 582
583 583 storebasepath = sharedvfs.base
584 584 cachepath = sharedvfs.join(b'cache')
585 585 else:
586 586 storebasepath = hgvfs.base
587 587 cachepath = hgvfs.join(b'cache')
588 588 wcachepath = hgvfs.join(b'wcache')
589 589
590 590 # The store has changed over time and the exact layout is dictated by
591 591 # requirements. The store interface abstracts differences across all
592 592 # of them.
593 593 store = makestore(
594 594 requirements,
595 595 storebasepath,
596 596 lambda base: vfsmod.vfs(base, cacheaudited=True),
597 597 )
598 598 hgvfs.createmode = store.createmode
599 599
600 600 storevfs = store.vfs
601 601 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
602 602
603 603 # The cache vfs is used to manage cache files.
604 604 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
605 605 cachevfs.createmode = store.createmode
606 606 # The cache vfs is used to manage cache files related to the working copy
607 607 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
608 608 wcachevfs.createmode = store.createmode
609 609
610 610 # Now resolve the type for the repository object. We do this by repeatedly
611 611 # calling a factory function to produces types for specific aspects of the
612 612 # repo's operation. The aggregate returned types are used as base classes
613 613 # for a dynamically-derived type, which will represent our new repository.
614 614
615 615 bases = []
616 616 extrastate = {}
617 617
618 618 for iface, fn in REPO_INTERFACES:
619 619 # We pass all potentially useful state to give extensions tons of
620 620 # flexibility.
621 621 typ = fn()(
622 622 ui=ui,
623 623 intents=intents,
624 624 requirements=requirements,
625 625 features=features,
626 626 wdirvfs=wdirvfs,
627 627 hgvfs=hgvfs,
628 628 store=store,
629 629 storevfs=storevfs,
630 630 storeoptions=storevfs.options,
631 631 cachevfs=cachevfs,
632 632 wcachevfs=wcachevfs,
633 633 extensionmodulenames=extensionmodulenames,
634 634 extrastate=extrastate,
635 635 baseclasses=bases,
636 636 )
637 637
638 638 if not isinstance(typ, type):
639 639 raise error.ProgrammingError(
640 640 b'unable to construct type for %s' % iface
641 641 )
642 642
643 643 bases.append(typ)
644 644
645 645 # type() allows you to use characters in type names that wouldn't be
646 646 # recognized as Python symbols in source code. We abuse that to add
647 647 # rich information about our constructed repo.
648 648 name = pycompat.sysstr(
649 649 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
650 650 )
651 651
652 652 cls = type(name, tuple(bases), {})
653 653
654 654 return cls(
655 655 baseui=baseui,
656 656 ui=ui,
657 657 origroot=path,
658 658 wdirvfs=wdirvfs,
659 659 hgvfs=hgvfs,
660 660 requirements=requirements,
661 661 supportedrequirements=supportedrequirements,
662 662 sharedpath=storebasepath,
663 663 store=store,
664 664 cachevfs=cachevfs,
665 665 wcachevfs=wcachevfs,
666 666 features=features,
667 667 intents=intents,
668 668 )
669 669
670 670
671 671 def loadhgrc(ui, wdirvfs, hgvfs, requirements):
672 672 """Load hgrc files/content into a ui instance.
673 673
674 674 This is called during repository opening to load any additional
675 675 config files or settings relevant to the current repository.
676 676
677 677 Returns a bool indicating whether any additional configs were loaded.
678 678
679 679 Extensions should monkeypatch this function to modify how per-repo
680 680 configs are loaded. For example, an extension may wish to pull in
681 681 configs from alternate files or sources.
682 682 """
683 683 if not rcutil.use_repo_hgrc():
684 684 return False
685 685 try:
686 686 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
687 687 return True
688 688 except IOError:
689 689 return False
690 690
691 691
692 692 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
693 693 """Perform additional actions after .hg/hgrc is loaded.
694 694
695 695 This function is called during repository loading immediately after
696 696 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
697 697
698 698 The function can be used to validate configs, automatically add
699 699 options (including extensions) based on requirements, etc.
700 700 """
701 701
702 702 # Map of requirements to list of extensions to load automatically when
703 703 # requirement is present.
704 704 autoextensions = {
705 705 b'git': [b'git'],
706 706 b'largefiles': [b'largefiles'],
707 707 b'lfs': [b'lfs'],
708 708 }
709 709
710 710 for requirement, names in sorted(autoextensions.items()):
711 711 if requirement not in requirements:
712 712 continue
713 713
714 714 for name in names:
715 715 if not ui.hasconfig(b'extensions', name):
716 716 ui.setconfig(b'extensions', name, b'', source=b'autoload')
717 717
718 718
719 719 def gathersupportedrequirements(ui):
720 720 """Determine the complete set of recognized requirements."""
721 721 # Start with all requirements supported by this file.
722 722 supported = set(localrepository._basesupported)
723 723
724 724 # Execute ``featuresetupfuncs`` entries if they belong to an extension
725 725 # relevant to this ui instance.
726 726 modules = {m.__name__ for n, m in extensions.extensions(ui)}
727 727
728 728 for fn in featuresetupfuncs:
729 729 if fn.__module__ in modules:
730 730 fn(ui, supported)
731 731
732 732 # Add derived requirements from registered compression engines.
733 733 for name in util.compengines:
734 734 engine = util.compengines[name]
735 735 if engine.available() and engine.revlogheader():
736 736 supported.add(b'exp-compression-%s' % name)
737 737 if engine.name() == b'zstd':
738 738 supported.add(b'revlog-compression-zstd')
739 739
740 740 return supported
741 741
742 742
743 743 def ensurerequirementsrecognized(requirements, supported):
744 744 """Validate that a set of local requirements is recognized.
745 745
746 746 Receives a set of requirements. Raises an ``error.RepoError`` if there
747 747 exists any requirement in that set that currently loaded code doesn't
748 748 recognize.
749 749
750 750 Returns a set of supported requirements.
751 751 """
752 752 missing = set()
753 753
754 754 for requirement in requirements:
755 755 if requirement in supported:
756 756 continue
757 757
758 758 if not requirement or not requirement[0:1].isalnum():
759 759 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
760 760
761 761 missing.add(requirement)
762 762
763 763 if missing:
764 764 raise error.RequirementError(
765 765 _(b'repository requires features unknown to this Mercurial: %s')
766 766 % b' '.join(sorted(missing)),
767 767 hint=_(
768 768 b'see https://mercurial-scm.org/wiki/MissingRequirement '
769 769 b'for more information'
770 770 ),
771 771 )
772 772
773 773
774 774 def ensurerequirementscompatible(ui, requirements):
775 775 """Validates that a set of recognized requirements is mutually compatible.
776 776
777 777 Some requirements may not be compatible with others or require
778 778 config options that aren't enabled. This function is called during
779 779 repository opening to ensure that the set of requirements needed
780 780 to open a repository is sane and compatible with config options.
781 781
782 782 Extensions can monkeypatch this function to perform additional
783 783 checking.
784 784
785 785 ``error.RepoError`` should be raised on failure.
786 786 """
787 787 if b'exp-sparse' in requirements and not sparse.enabled:
788 788 raise error.RepoError(
789 789 _(
790 790 b'repository is using sparse feature but '
791 791 b'sparse is not enabled; enable the '
792 792 b'"sparse" extensions to access'
793 793 )
794 794 )
795 795
796 796
797 797 def makestore(requirements, path, vfstype):
798 798 """Construct a storage object for a repository."""
799 799 if b'store' in requirements:
800 800 if b'fncache' in requirements:
801 801 return storemod.fncachestore(
802 802 path, vfstype, b'dotencode' in requirements
803 803 )
804 804
805 805 return storemod.encodedstore(path, vfstype)
806 806
807 807 return storemod.basicstore(path, vfstype)
808 808
809 809
810 810 def resolvestorevfsoptions(ui, requirements, features):
811 811 """Resolve the options to pass to the store vfs opener.
812 812
813 813 The returned dict is used to influence behavior of the storage layer.
814 814 """
815 815 options = {}
816 816
817 817 if b'treemanifest' in requirements:
818 818 options[b'treemanifest'] = True
819 819
820 820 # experimental config: format.manifestcachesize
821 821 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
822 822 if manifestcachesize is not None:
823 823 options[b'manifestcachesize'] = manifestcachesize
824 824
825 825 # In the absence of another requirement superseding a revlog-related
826 826 # requirement, we have to assume the repo is using revlog version 0.
827 827 # This revlog format is super old and we don't bother trying to parse
828 828 # opener options for it because those options wouldn't do anything
829 829 # meaningful on such old repos.
830 830 if b'revlogv1' in requirements or REVLOGV2_REQUIREMENT in requirements:
831 831 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
832 832 else: # explicitly mark repo as using revlogv0
833 833 options[b'revlogv0'] = True
834 834
835 835 if COPIESSDC_REQUIREMENT in requirements:
836 836 options[b'copies-storage'] = b'changeset-sidedata'
837 837 else:
838 838 writecopiesto = ui.config(b'experimental', b'copies.write-to')
839 839 copiesextramode = (b'changeset-only', b'compatibility')
840 840 if writecopiesto in copiesextramode:
841 841 options[b'copies-storage'] = b'extra'
842 842
843 843 return options
844 844
845 845
846 846 def resolverevlogstorevfsoptions(ui, requirements, features):
847 847 """Resolve opener options specific to revlogs."""
848 848
849 849 options = {}
850 850 options[b'flagprocessors'] = {}
851 851
852 852 if b'revlogv1' in requirements:
853 853 options[b'revlogv1'] = True
854 854 if REVLOGV2_REQUIREMENT in requirements:
855 855 options[b'revlogv2'] = True
856 856
857 857 if b'generaldelta' in requirements:
858 858 options[b'generaldelta'] = True
859 859
860 860 # experimental config: format.chunkcachesize
861 861 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
862 862 if chunkcachesize is not None:
863 863 options[b'chunkcachesize'] = chunkcachesize
864 864
865 865 deltabothparents = ui.configbool(
866 866 b'storage', b'revlog.optimize-delta-parent-choice'
867 867 )
868 868 options[b'deltabothparents'] = deltabothparents
869 869
870 870 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
871 871 lazydeltabase = False
872 872 if lazydelta:
873 873 lazydeltabase = ui.configbool(
874 874 b'storage', b'revlog.reuse-external-delta-parent'
875 875 )
876 876 if lazydeltabase is None:
877 877 lazydeltabase = not scmutil.gddeltaconfig(ui)
878 878 options[b'lazydelta'] = lazydelta
879 879 options[b'lazydeltabase'] = lazydeltabase
880 880
881 881 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
882 882 if 0 <= chainspan:
883 883 options[b'maxdeltachainspan'] = chainspan
884 884
885 885 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
886 886 if mmapindexthreshold is not None:
887 887 options[b'mmapindexthreshold'] = mmapindexthreshold
888 888
889 889 withsparseread = ui.configbool(b'experimental', b'sparse-read')
890 890 srdensitythres = float(
891 891 ui.config(b'experimental', b'sparse-read.density-threshold')
892 892 )
893 893 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
894 894 options[b'with-sparse-read'] = withsparseread
895 895 options[b'sparse-read-density-threshold'] = srdensitythres
896 896 options[b'sparse-read-min-gap-size'] = srmingapsize
897 897
898 898 sparserevlog = SPARSEREVLOG_REQUIREMENT in requirements
899 899 options[b'sparse-revlog'] = sparserevlog
900 900 if sparserevlog:
901 901 options[b'generaldelta'] = True
902 902
903 903 sidedata = SIDEDATA_REQUIREMENT in requirements
904 904 options[b'side-data'] = sidedata
905 905
906 906 maxchainlen = None
907 907 if sparserevlog:
908 908 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
909 909 # experimental config: format.maxchainlen
910 910 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
911 911 if maxchainlen is not None:
912 912 options[b'maxchainlen'] = maxchainlen
913 913
914 914 for r in requirements:
915 915 # we allow multiple compression engine requirement to co-exist because
916 916 # strickly speaking, revlog seems to support mixed compression style.
917 917 #
918 918 # The compression used for new entries will be "the last one"
919 919 prefix = r.startswith
920 920 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
921 921 options[b'compengine'] = r.split(b'-', 2)[2]
922 922
923 923 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
924 924 if options[b'zlib.level'] is not None:
925 925 if not (0 <= options[b'zlib.level'] <= 9):
926 926 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
927 927 raise error.Abort(msg % options[b'zlib.level'])
928 928 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
929 929 if options[b'zstd.level'] is not None:
930 930 if not (0 <= options[b'zstd.level'] <= 22):
931 931 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
932 932 raise error.Abort(msg % options[b'zstd.level'])
933 933
934 934 if repository.NARROW_REQUIREMENT in requirements:
935 935 options[b'enableellipsis'] = True
936 936
937 937 if ui.configbool(b'experimental', b'rust.index'):
938 938 options[b'rust.index'] = True
939 939 if NODEMAP_REQUIREMENT in requirements:
940 options[b'exp-persistent-nodemap'] = True
940 options[b'persistent-nodemap'] = True
941 941 if ui.configbool(b'experimental', b'exp-persistent-nodemap.mmap'):
942 options[b'exp-persistent-nodemap.mmap'] = True
942 options[b'persistent-nodemap.mmap'] = True
943 943 epnm = ui.config(b'experimental', b'exp-persistent-nodemap.mode')
944 options[b'exp-persistent-nodemap.mode'] = epnm
944 options[b'persistent-nodemap.mode'] = epnm
945 945 if ui.configbool(b'devel', b'persistent-nodemap'):
946 946 options[b'devel-force-nodemap'] = True
947 947
948 948 return options
949 949
950 950
951 951 def makemain(**kwargs):
952 952 """Produce a type conforming to ``ilocalrepositorymain``."""
953 953 return localrepository
954 954
955 955
956 956 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
957 957 class revlogfilestorage(object):
958 958 """File storage when using revlogs."""
959 959
960 960 def file(self, path):
961 961 if path[0] == b'/':
962 962 path = path[1:]
963 963
964 964 return filelog.filelog(self.svfs, path)
965 965
966 966
967 967 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
968 968 class revlognarrowfilestorage(object):
969 969 """File storage when using revlogs and narrow files."""
970 970
971 971 def file(self, path):
972 972 if path[0] == b'/':
973 973 path = path[1:]
974 974
975 975 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
976 976
977 977
978 978 def makefilestorage(requirements, features, **kwargs):
979 979 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
980 980 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
981 981 features.add(repository.REPO_FEATURE_STREAM_CLONE)
982 982
983 983 if repository.NARROW_REQUIREMENT in requirements:
984 984 return revlognarrowfilestorage
985 985 else:
986 986 return revlogfilestorage
987 987
988 988
989 989 # List of repository interfaces and factory functions for them. Each
990 990 # will be called in order during ``makelocalrepository()`` to iteratively
991 991 # derive the final type for a local repository instance. We capture the
992 992 # function as a lambda so we don't hold a reference and the module-level
993 993 # functions can be wrapped.
994 994 REPO_INTERFACES = [
995 995 (repository.ilocalrepositorymain, lambda: makemain),
996 996 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
997 997 ]
998 998
999 999
1000 1000 @interfaceutil.implementer(repository.ilocalrepositorymain)
1001 1001 class localrepository(object):
1002 1002 """Main class for representing local repositories.
1003 1003
1004 1004 All local repositories are instances of this class.
1005 1005
1006 1006 Constructed on its own, instances of this class are not usable as
1007 1007 repository objects. To obtain a usable repository object, call
1008 1008 ``hg.repository()``, ``localrepo.instance()``, or
1009 1009 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1010 1010 ``instance()`` adds support for creating new repositories.
1011 1011 ``hg.repository()`` adds more extension integration, including calling
1012 1012 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1013 1013 used.
1014 1014 """
1015 1015
1016 1016 # obsolete experimental requirements:
1017 1017 # - manifestv2: An experimental new manifest format that allowed
1018 1018 # for stem compression of long paths. Experiment ended up not
1019 1019 # being successful (repository sizes went up due to worse delta
1020 1020 # chains), and the code was deleted in 4.6.
1021 1021 supportedformats = {
1022 1022 b'revlogv1',
1023 1023 b'generaldelta',
1024 1024 b'treemanifest',
1025 1025 COPIESSDC_REQUIREMENT,
1026 1026 REVLOGV2_REQUIREMENT,
1027 1027 SIDEDATA_REQUIREMENT,
1028 1028 SPARSEREVLOG_REQUIREMENT,
1029 1029 NODEMAP_REQUIREMENT,
1030 1030 bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT,
1031 1031 }
1032 1032 _basesupported = supportedformats | {
1033 1033 b'store',
1034 1034 b'fncache',
1035 1035 b'shared',
1036 1036 b'relshared',
1037 1037 b'dotencode',
1038 1038 b'exp-sparse',
1039 1039 b'internal-phase',
1040 1040 }
1041 1041
1042 1042 # list of prefix for file which can be written without 'wlock'
1043 1043 # Extensions should extend this list when needed
1044 1044 _wlockfreeprefix = {
1045 1045 # We migh consider requiring 'wlock' for the next
1046 1046 # two, but pretty much all the existing code assume
1047 1047 # wlock is not needed so we keep them excluded for
1048 1048 # now.
1049 1049 b'hgrc',
1050 1050 b'requires',
1051 1051 # XXX cache is a complicatged business someone
1052 1052 # should investigate this in depth at some point
1053 1053 b'cache/',
1054 1054 # XXX shouldn't be dirstate covered by the wlock?
1055 1055 b'dirstate',
1056 1056 # XXX bisect was still a bit too messy at the time
1057 1057 # this changeset was introduced. Someone should fix
1058 1058 # the remainig bit and drop this line
1059 1059 b'bisect.state',
1060 1060 }
1061 1061
1062 1062 def __init__(
1063 1063 self,
1064 1064 baseui,
1065 1065 ui,
1066 1066 origroot,
1067 1067 wdirvfs,
1068 1068 hgvfs,
1069 1069 requirements,
1070 1070 supportedrequirements,
1071 1071 sharedpath,
1072 1072 store,
1073 1073 cachevfs,
1074 1074 wcachevfs,
1075 1075 features,
1076 1076 intents=None,
1077 1077 ):
1078 1078 """Create a new local repository instance.
1079 1079
1080 1080 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1081 1081 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1082 1082 object.
1083 1083
1084 1084 Arguments:
1085 1085
1086 1086 baseui
1087 1087 ``ui.ui`` instance that ``ui`` argument was based off of.
1088 1088
1089 1089 ui
1090 1090 ``ui.ui`` instance for use by the repository.
1091 1091
1092 1092 origroot
1093 1093 ``bytes`` path to working directory root of this repository.
1094 1094
1095 1095 wdirvfs
1096 1096 ``vfs.vfs`` rooted at the working directory.
1097 1097
1098 1098 hgvfs
1099 1099 ``vfs.vfs`` rooted at .hg/
1100 1100
1101 1101 requirements
1102 1102 ``set`` of bytestrings representing repository opening requirements.
1103 1103
1104 1104 supportedrequirements
1105 1105 ``set`` of bytestrings representing repository requirements that we
1106 1106 know how to open. May be a supetset of ``requirements``.
1107 1107
1108 1108 sharedpath
1109 1109 ``bytes`` Defining path to storage base directory. Points to a
1110 1110 ``.hg/`` directory somewhere.
1111 1111
1112 1112 store
1113 1113 ``store.basicstore`` (or derived) instance providing access to
1114 1114 versioned storage.
1115 1115
1116 1116 cachevfs
1117 1117 ``vfs.vfs`` used for cache files.
1118 1118
1119 1119 wcachevfs
1120 1120 ``vfs.vfs`` used for cache files related to the working copy.
1121 1121
1122 1122 features
1123 1123 ``set`` of bytestrings defining features/capabilities of this
1124 1124 instance.
1125 1125
1126 1126 intents
1127 1127 ``set`` of system strings indicating what this repo will be used
1128 1128 for.
1129 1129 """
1130 1130 self.baseui = baseui
1131 1131 self.ui = ui
1132 1132 self.origroot = origroot
1133 1133 # vfs rooted at working directory.
1134 1134 self.wvfs = wdirvfs
1135 1135 self.root = wdirvfs.base
1136 1136 # vfs rooted at .hg/. Used to access most non-store paths.
1137 1137 self.vfs = hgvfs
1138 1138 self.path = hgvfs.base
1139 1139 self.requirements = requirements
1140 1140 self.supported = supportedrequirements
1141 1141 self.sharedpath = sharedpath
1142 1142 self.store = store
1143 1143 self.cachevfs = cachevfs
1144 1144 self.wcachevfs = wcachevfs
1145 1145 self.features = features
1146 1146
1147 1147 self.filtername = None
1148 1148
1149 1149 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1150 1150 b'devel', b'check-locks'
1151 1151 ):
1152 1152 self.vfs.audit = self._getvfsward(self.vfs.audit)
1153 1153 # A list of callback to shape the phase if no data were found.
1154 1154 # Callback are in the form: func(repo, roots) --> processed root.
1155 1155 # This list it to be filled by extension during repo setup
1156 1156 self._phasedefaults = []
1157 1157
1158 1158 color.setup(self.ui)
1159 1159
1160 1160 self.spath = self.store.path
1161 1161 self.svfs = self.store.vfs
1162 1162 self.sjoin = self.store.join
1163 1163 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1164 1164 b'devel', b'check-locks'
1165 1165 ):
1166 1166 if util.safehasattr(self.svfs, b'vfs'): # this is filtervfs
1167 1167 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1168 1168 else: # standard vfs
1169 1169 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1170 1170
1171 1171 self._dirstatevalidatewarned = False
1172 1172
1173 1173 self._branchcaches = branchmap.BranchMapCache()
1174 1174 self._revbranchcache = None
1175 1175 self._filterpats = {}
1176 1176 self._datafilters = {}
1177 1177 self._transref = self._lockref = self._wlockref = None
1178 1178
1179 1179 # A cache for various files under .hg/ that tracks file changes,
1180 1180 # (used by the filecache decorator)
1181 1181 #
1182 1182 # Maps a property name to its util.filecacheentry
1183 1183 self._filecache = {}
1184 1184
1185 1185 # hold sets of revision to be filtered
1186 1186 # should be cleared when something might have changed the filter value:
1187 1187 # - new changesets,
1188 1188 # - phase change,
1189 1189 # - new obsolescence marker,
1190 1190 # - working directory parent change,
1191 1191 # - bookmark changes
1192 1192 self.filteredrevcache = {}
1193 1193
1194 1194 # post-dirstate-status hooks
1195 1195 self._postdsstatus = []
1196 1196
1197 1197 # generic mapping between names and nodes
1198 1198 self.names = namespaces.namespaces()
1199 1199
1200 1200 # Key to signature value.
1201 1201 self._sparsesignaturecache = {}
1202 1202 # Signature to cached matcher instance.
1203 1203 self._sparsematchercache = {}
1204 1204
1205 1205 self._extrafilterid = repoview.extrafilter(ui)
1206 1206
1207 1207 self.filecopiesmode = None
1208 1208 if COPIESSDC_REQUIREMENT in self.requirements:
1209 1209 self.filecopiesmode = b'changeset-sidedata'
1210 1210
1211 1211 def _getvfsward(self, origfunc):
1212 1212 """build a ward for self.vfs"""
1213 1213 rref = weakref.ref(self)
1214 1214
1215 1215 def checkvfs(path, mode=None):
1216 1216 ret = origfunc(path, mode=mode)
1217 1217 repo = rref()
1218 1218 if (
1219 1219 repo is None
1220 1220 or not util.safehasattr(repo, b'_wlockref')
1221 1221 or not util.safehasattr(repo, b'_lockref')
1222 1222 ):
1223 1223 return
1224 1224 if mode in (None, b'r', b'rb'):
1225 1225 return
1226 1226 if path.startswith(repo.path):
1227 1227 # truncate name relative to the repository (.hg)
1228 1228 path = path[len(repo.path) + 1 :]
1229 1229 if path.startswith(b'cache/'):
1230 1230 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1231 1231 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1232 1232 if path.startswith(b'journal.') or path.startswith(b'undo.'):
1233 1233 # journal is covered by 'lock'
1234 1234 if repo._currentlock(repo._lockref) is None:
1235 1235 repo.ui.develwarn(
1236 1236 b'write with no lock: "%s"' % path,
1237 1237 stacklevel=3,
1238 1238 config=b'check-locks',
1239 1239 )
1240 1240 elif repo._currentlock(repo._wlockref) is None:
1241 1241 # rest of vfs files are covered by 'wlock'
1242 1242 #
1243 1243 # exclude special files
1244 1244 for prefix in self._wlockfreeprefix:
1245 1245 if path.startswith(prefix):
1246 1246 return
1247 1247 repo.ui.develwarn(
1248 1248 b'write with no wlock: "%s"' % path,
1249 1249 stacklevel=3,
1250 1250 config=b'check-locks',
1251 1251 )
1252 1252 return ret
1253 1253
1254 1254 return checkvfs
1255 1255
1256 1256 def _getsvfsward(self, origfunc):
1257 1257 """build a ward for self.svfs"""
1258 1258 rref = weakref.ref(self)
1259 1259
1260 1260 def checksvfs(path, mode=None):
1261 1261 ret = origfunc(path, mode=mode)
1262 1262 repo = rref()
1263 1263 if repo is None or not util.safehasattr(repo, b'_lockref'):
1264 1264 return
1265 1265 if mode in (None, b'r', b'rb'):
1266 1266 return
1267 1267 if path.startswith(repo.sharedpath):
1268 1268 # truncate name relative to the repository (.hg)
1269 1269 path = path[len(repo.sharedpath) + 1 :]
1270 1270 if repo._currentlock(repo._lockref) is None:
1271 1271 repo.ui.develwarn(
1272 1272 b'write with no lock: "%s"' % path, stacklevel=4
1273 1273 )
1274 1274 return ret
1275 1275
1276 1276 return checksvfs
1277 1277
1278 1278 def close(self):
1279 1279 self._writecaches()
1280 1280
1281 1281 def _writecaches(self):
1282 1282 if self._revbranchcache:
1283 1283 self._revbranchcache.write()
1284 1284
1285 1285 def _restrictcapabilities(self, caps):
1286 1286 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1287 1287 caps = set(caps)
1288 1288 capsblob = bundle2.encodecaps(
1289 1289 bundle2.getrepocaps(self, role=b'client')
1290 1290 )
1291 1291 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1292 1292 return caps
1293 1293
1294 1294 def _writerequirements(self):
1295 1295 scmutil.writerequires(self.vfs, self.requirements)
1296 1296
1297 1297 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1298 1298 # self -> auditor -> self._checknested -> self
1299 1299
1300 1300 @property
1301 1301 def auditor(self):
1302 1302 # This is only used by context.workingctx.match in order to
1303 1303 # detect files in subrepos.
1304 1304 return pathutil.pathauditor(self.root, callback=self._checknested)
1305 1305
1306 1306 @property
1307 1307 def nofsauditor(self):
1308 1308 # This is only used by context.basectx.match in order to detect
1309 1309 # files in subrepos.
1310 1310 return pathutil.pathauditor(
1311 1311 self.root, callback=self._checknested, realfs=False, cached=True
1312 1312 )
1313 1313
1314 1314 def _checknested(self, path):
1315 1315 """Determine if path is a legal nested repository."""
1316 1316 if not path.startswith(self.root):
1317 1317 return False
1318 1318 subpath = path[len(self.root) + 1 :]
1319 1319 normsubpath = util.pconvert(subpath)
1320 1320
1321 1321 # XXX: Checking against the current working copy is wrong in
1322 1322 # the sense that it can reject things like
1323 1323 #
1324 1324 # $ hg cat -r 10 sub/x.txt
1325 1325 #
1326 1326 # if sub/ is no longer a subrepository in the working copy
1327 1327 # parent revision.
1328 1328 #
1329 1329 # However, it can of course also allow things that would have
1330 1330 # been rejected before, such as the above cat command if sub/
1331 1331 # is a subrepository now, but was a normal directory before.
1332 1332 # The old path auditor would have rejected by mistake since it
1333 1333 # panics when it sees sub/.hg/.
1334 1334 #
1335 1335 # All in all, checking against the working copy seems sensible
1336 1336 # since we want to prevent access to nested repositories on
1337 1337 # the filesystem *now*.
1338 1338 ctx = self[None]
1339 1339 parts = util.splitpath(subpath)
1340 1340 while parts:
1341 1341 prefix = b'/'.join(parts)
1342 1342 if prefix in ctx.substate:
1343 1343 if prefix == normsubpath:
1344 1344 return True
1345 1345 else:
1346 1346 sub = ctx.sub(prefix)
1347 1347 return sub.checknested(subpath[len(prefix) + 1 :])
1348 1348 else:
1349 1349 parts.pop()
1350 1350 return False
1351 1351
1352 1352 def peer(self):
1353 1353 return localpeer(self) # not cached to avoid reference cycle
1354 1354
1355 1355 def unfiltered(self):
1356 1356 """Return unfiltered version of the repository
1357 1357
1358 1358 Intended to be overwritten by filtered repo."""
1359 1359 return self
1360 1360
1361 1361 def filtered(self, name, visibilityexceptions=None):
1362 1362 """Return a filtered version of a repository
1363 1363
1364 1364 The `name` parameter is the identifier of the requested view. This
1365 1365 will return a repoview object set "exactly" to the specified view.
1366 1366
1367 1367 This function does not apply recursive filtering to a repository. For
1368 1368 example calling `repo.filtered("served")` will return a repoview using
1369 1369 the "served" view, regardless of the initial view used by `repo`.
1370 1370
1371 1371 In other word, there is always only one level of `repoview` "filtering".
1372 1372 """
1373 1373 if self._extrafilterid is not None and b'%' not in name:
1374 1374 name = name + b'%' + self._extrafilterid
1375 1375
1376 1376 cls = repoview.newtype(self.unfiltered().__class__)
1377 1377 return cls(self, name, visibilityexceptions)
1378 1378
1379 1379 @mixedrepostorecache(
1380 1380 (b'bookmarks', b'plain'),
1381 1381 (b'bookmarks.current', b'plain'),
1382 1382 (b'bookmarks', b''),
1383 1383 (b'00changelog.i', b''),
1384 1384 )
1385 1385 def _bookmarks(self):
1386 1386 # Since the multiple files involved in the transaction cannot be
1387 1387 # written atomically (with current repository format), there is a race
1388 1388 # condition here.
1389 1389 #
1390 1390 # 1) changelog content A is read
1391 1391 # 2) outside transaction update changelog to content B
1392 1392 # 3) outside transaction update bookmark file referring to content B
1393 1393 # 4) bookmarks file content is read and filtered against changelog-A
1394 1394 #
1395 1395 # When this happens, bookmarks against nodes missing from A are dropped.
1396 1396 #
1397 1397 # Having this happening during read is not great, but it become worse
1398 1398 # when this happen during write because the bookmarks to the "unknown"
1399 1399 # nodes will be dropped for good. However, writes happen within locks.
1400 1400 # This locking makes it possible to have a race free consistent read.
1401 1401 # For this purpose data read from disc before locking are
1402 1402 # "invalidated" right after the locks are taken. This invalidations are
1403 1403 # "light", the `filecache` mechanism keep the data in memory and will
1404 1404 # reuse them if the underlying files did not changed. Not parsing the
1405 1405 # same data multiple times helps performances.
1406 1406 #
1407 1407 # Unfortunately in the case describe above, the files tracked by the
1408 1408 # bookmarks file cache might not have changed, but the in-memory
1409 1409 # content is still "wrong" because we used an older changelog content
1410 1410 # to process the on-disk data. So after locking, the changelog would be
1411 1411 # refreshed but `_bookmarks` would be preserved.
1412 1412 # Adding `00changelog.i` to the list of tracked file is not
1413 1413 # enough, because at the time we build the content for `_bookmarks` in
1414 1414 # (4), the changelog file has already diverged from the content used
1415 1415 # for loading `changelog` in (1)
1416 1416 #
1417 1417 # To prevent the issue, we force the changelog to be explicitly
1418 1418 # reloaded while computing `_bookmarks`. The data race can still happen
1419 1419 # without the lock (with a narrower window), but it would no longer go
1420 1420 # undetected during the lock time refresh.
1421 1421 #
1422 1422 # The new schedule is as follow
1423 1423 #
1424 1424 # 1) filecache logic detect that `_bookmarks` needs to be computed
1425 1425 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1426 1426 # 3) We force `changelog` filecache to be tested
1427 1427 # 4) cachestat for `changelog` are captured (for changelog)
1428 1428 # 5) `_bookmarks` is computed and cached
1429 1429 #
1430 1430 # The step in (3) ensure we have a changelog at least as recent as the
1431 1431 # cache stat computed in (1). As a result at locking time:
1432 1432 # * if the changelog did not changed since (1) -> we can reuse the data
1433 1433 # * otherwise -> the bookmarks get refreshed.
1434 1434 self._refreshchangelog()
1435 1435 return bookmarks.bmstore(self)
1436 1436
1437 1437 def _refreshchangelog(self):
1438 1438 """make sure the in memory changelog match the on-disk one"""
1439 1439 if 'changelog' in vars(self) and self.currenttransaction() is None:
1440 1440 del self.changelog
1441 1441
1442 1442 @property
1443 1443 def _activebookmark(self):
1444 1444 return self._bookmarks.active
1445 1445
1446 1446 # _phasesets depend on changelog. what we need is to call
1447 1447 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1448 1448 # can't be easily expressed in filecache mechanism.
1449 1449 @storecache(b'phaseroots', b'00changelog.i')
1450 1450 def _phasecache(self):
1451 1451 return phases.phasecache(self, self._phasedefaults)
1452 1452
1453 1453 @storecache(b'obsstore')
1454 1454 def obsstore(self):
1455 1455 return obsolete.makestore(self.ui, self)
1456 1456
1457 1457 @storecache(b'00changelog.i')
1458 1458 def changelog(self):
1459 1459 return self.store.changelog(txnutil.mayhavepending(self.root))
1460 1460
1461 1461 @storecache(b'00manifest.i')
1462 1462 def manifestlog(self):
1463 1463 return self.store.manifestlog(self, self._storenarrowmatch)
1464 1464
1465 1465 @repofilecache(b'dirstate')
1466 1466 def dirstate(self):
1467 1467 return self._makedirstate()
1468 1468
1469 1469 def _makedirstate(self):
1470 1470 """Extension point for wrapping the dirstate per-repo."""
1471 1471 sparsematchfn = lambda: sparse.matcher(self)
1472 1472
1473 1473 return dirstate.dirstate(
1474 1474 self.vfs, self.ui, self.root, self._dirstatevalidate, sparsematchfn
1475 1475 )
1476 1476
1477 1477 def _dirstatevalidate(self, node):
1478 1478 try:
1479 1479 self.changelog.rev(node)
1480 1480 return node
1481 1481 except error.LookupError:
1482 1482 if not self._dirstatevalidatewarned:
1483 1483 self._dirstatevalidatewarned = True
1484 1484 self.ui.warn(
1485 1485 _(b"warning: ignoring unknown working parent %s!\n")
1486 1486 % short(node)
1487 1487 )
1488 1488 return nullid
1489 1489
1490 1490 @storecache(narrowspec.FILENAME)
1491 1491 def narrowpats(self):
1492 1492 """matcher patterns for this repository's narrowspec
1493 1493
1494 1494 A tuple of (includes, excludes).
1495 1495 """
1496 1496 return narrowspec.load(self)
1497 1497
1498 1498 @storecache(narrowspec.FILENAME)
1499 1499 def _storenarrowmatch(self):
1500 1500 if repository.NARROW_REQUIREMENT not in self.requirements:
1501 1501 return matchmod.always()
1502 1502 include, exclude = self.narrowpats
1503 1503 return narrowspec.match(self.root, include=include, exclude=exclude)
1504 1504
1505 1505 @storecache(narrowspec.FILENAME)
1506 1506 def _narrowmatch(self):
1507 1507 if repository.NARROW_REQUIREMENT not in self.requirements:
1508 1508 return matchmod.always()
1509 1509 narrowspec.checkworkingcopynarrowspec(self)
1510 1510 include, exclude = self.narrowpats
1511 1511 return narrowspec.match(self.root, include=include, exclude=exclude)
1512 1512
1513 1513 def narrowmatch(self, match=None, includeexact=False):
1514 1514 """matcher corresponding the the repo's narrowspec
1515 1515
1516 1516 If `match` is given, then that will be intersected with the narrow
1517 1517 matcher.
1518 1518
1519 1519 If `includeexact` is True, then any exact matches from `match` will
1520 1520 be included even if they're outside the narrowspec.
1521 1521 """
1522 1522 if match:
1523 1523 if includeexact and not self._narrowmatch.always():
1524 1524 # do not exclude explicitly-specified paths so that they can
1525 1525 # be warned later on
1526 1526 em = matchmod.exact(match.files())
1527 1527 nm = matchmod.unionmatcher([self._narrowmatch, em])
1528 1528 return matchmod.intersectmatchers(match, nm)
1529 1529 return matchmod.intersectmatchers(match, self._narrowmatch)
1530 1530 return self._narrowmatch
1531 1531
1532 1532 def setnarrowpats(self, newincludes, newexcludes):
1533 1533 narrowspec.save(self, newincludes, newexcludes)
1534 1534 self.invalidate(clearfilecache=True)
1535 1535
1536 1536 @unfilteredpropertycache
1537 1537 def _quick_access_changeid_null(self):
1538 1538 return {
1539 1539 b'null': (nullrev, nullid),
1540 1540 nullrev: (nullrev, nullid),
1541 1541 nullid: (nullrev, nullid),
1542 1542 }
1543 1543
1544 1544 @unfilteredpropertycache
1545 1545 def _quick_access_changeid_wc(self):
1546 1546 # also fast path access to the working copy parents
1547 1547 # however, only do it for filter that ensure wc is visible.
1548 1548 quick = {}
1549 1549 cl = self.unfiltered().changelog
1550 1550 for node in self.dirstate.parents():
1551 1551 if node == nullid:
1552 1552 continue
1553 1553 rev = cl.index.get_rev(node)
1554 1554 if rev is None:
1555 1555 # unknown working copy parent case:
1556 1556 #
1557 1557 # skip the fast path and let higher code deal with it
1558 1558 continue
1559 1559 pair = (rev, node)
1560 1560 quick[rev] = pair
1561 1561 quick[node] = pair
1562 1562 # also add the parents of the parents
1563 1563 for r in cl.parentrevs(rev):
1564 1564 if r == nullrev:
1565 1565 continue
1566 1566 n = cl.node(r)
1567 1567 pair = (r, n)
1568 1568 quick[r] = pair
1569 1569 quick[n] = pair
1570 1570 p1node = self.dirstate.p1()
1571 1571 if p1node != nullid:
1572 1572 quick[b'.'] = quick[p1node]
1573 1573 return quick
1574 1574
1575 1575 @unfilteredmethod
1576 1576 def _quick_access_changeid_invalidate(self):
1577 1577 if '_quick_access_changeid_wc' in vars(self):
1578 1578 del self.__dict__['_quick_access_changeid_wc']
1579 1579
1580 1580 @property
1581 1581 def _quick_access_changeid(self):
1582 1582 """an helper dictionnary for __getitem__ calls
1583 1583
1584 1584 This contains a list of symbol we can recognise right away without
1585 1585 further processing.
1586 1586 """
1587 1587 mapping = self._quick_access_changeid_null
1588 1588 if self.filtername in repoview.filter_has_wc:
1589 1589 mapping = mapping.copy()
1590 1590 mapping.update(self._quick_access_changeid_wc)
1591 1591 return mapping
1592 1592
1593 1593 def __getitem__(self, changeid):
1594 1594 # dealing with special cases
1595 1595 if changeid is None:
1596 1596 return context.workingctx(self)
1597 1597 if isinstance(changeid, context.basectx):
1598 1598 return changeid
1599 1599
1600 1600 # dealing with multiple revisions
1601 1601 if isinstance(changeid, slice):
1602 1602 # wdirrev isn't contiguous so the slice shouldn't include it
1603 1603 return [
1604 1604 self[i]
1605 1605 for i in pycompat.xrange(*changeid.indices(len(self)))
1606 1606 if i not in self.changelog.filteredrevs
1607 1607 ]
1608 1608
1609 1609 # dealing with some special values
1610 1610 quick_access = self._quick_access_changeid.get(changeid)
1611 1611 if quick_access is not None:
1612 1612 rev, node = quick_access
1613 1613 return context.changectx(self, rev, node, maybe_filtered=False)
1614 1614 if changeid == b'tip':
1615 1615 node = self.changelog.tip()
1616 1616 rev = self.changelog.rev(node)
1617 1617 return context.changectx(self, rev, node)
1618 1618
1619 1619 # dealing with arbitrary values
1620 1620 try:
1621 1621 if isinstance(changeid, int):
1622 1622 node = self.changelog.node(changeid)
1623 1623 rev = changeid
1624 1624 elif changeid == b'.':
1625 1625 # this is a hack to delay/avoid loading obsmarkers
1626 1626 # when we know that '.' won't be hidden
1627 1627 node = self.dirstate.p1()
1628 1628 rev = self.unfiltered().changelog.rev(node)
1629 1629 elif len(changeid) == 20:
1630 1630 try:
1631 1631 node = changeid
1632 1632 rev = self.changelog.rev(changeid)
1633 1633 except error.FilteredLookupError:
1634 1634 changeid = hex(changeid) # for the error message
1635 1635 raise
1636 1636 except LookupError:
1637 1637 # check if it might have come from damaged dirstate
1638 1638 #
1639 1639 # XXX we could avoid the unfiltered if we had a recognizable
1640 1640 # exception for filtered changeset access
1641 1641 if (
1642 1642 self.local()
1643 1643 and changeid in self.unfiltered().dirstate.parents()
1644 1644 ):
1645 1645 msg = _(b"working directory has unknown parent '%s'!")
1646 1646 raise error.Abort(msg % short(changeid))
1647 1647 changeid = hex(changeid) # for the error message
1648 1648 raise
1649 1649
1650 1650 elif len(changeid) == 40:
1651 1651 node = bin(changeid)
1652 1652 rev = self.changelog.rev(node)
1653 1653 else:
1654 1654 raise error.ProgrammingError(
1655 1655 b"unsupported changeid '%s' of type %s"
1656 1656 % (changeid, pycompat.bytestr(type(changeid)))
1657 1657 )
1658 1658
1659 1659 return context.changectx(self, rev, node)
1660 1660
1661 1661 except (error.FilteredIndexError, error.FilteredLookupError):
1662 1662 raise error.FilteredRepoLookupError(
1663 1663 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
1664 1664 )
1665 1665 except (IndexError, LookupError):
1666 1666 raise error.RepoLookupError(
1667 1667 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
1668 1668 )
1669 1669 except error.WdirUnsupported:
1670 1670 return context.workingctx(self)
1671 1671
1672 1672 def __contains__(self, changeid):
1673 1673 """True if the given changeid exists
1674 1674
1675 1675 error.AmbiguousPrefixLookupError is raised if an ambiguous node
1676 1676 specified.
1677 1677 """
1678 1678 try:
1679 1679 self[changeid]
1680 1680 return True
1681 1681 except error.RepoLookupError:
1682 1682 return False
1683 1683
1684 1684 def __nonzero__(self):
1685 1685 return True
1686 1686
1687 1687 __bool__ = __nonzero__
1688 1688
1689 1689 def __len__(self):
1690 1690 # no need to pay the cost of repoview.changelog
1691 1691 unfi = self.unfiltered()
1692 1692 return len(unfi.changelog)
1693 1693
1694 1694 def __iter__(self):
1695 1695 return iter(self.changelog)
1696 1696
1697 1697 def revs(self, expr, *args):
1698 1698 '''Find revisions matching a revset.
1699 1699
1700 1700 The revset is specified as a string ``expr`` that may contain
1701 1701 %-formatting to escape certain types. See ``revsetlang.formatspec``.
1702 1702
1703 1703 Revset aliases from the configuration are not expanded. To expand
1704 1704 user aliases, consider calling ``scmutil.revrange()`` or
1705 1705 ``repo.anyrevs([expr], user=True)``.
1706 1706
1707 1707 Returns a smartset.abstractsmartset, which is a list-like interface
1708 1708 that contains integer revisions.
1709 1709 '''
1710 1710 tree = revsetlang.spectree(expr, *args)
1711 1711 return revset.makematcher(tree)(self)
1712 1712
1713 1713 def set(self, expr, *args):
1714 1714 '''Find revisions matching a revset and emit changectx instances.
1715 1715
1716 1716 This is a convenience wrapper around ``revs()`` that iterates the
1717 1717 result and is a generator of changectx instances.
1718 1718
1719 1719 Revset aliases from the configuration are not expanded. To expand
1720 1720 user aliases, consider calling ``scmutil.revrange()``.
1721 1721 '''
1722 1722 for r in self.revs(expr, *args):
1723 1723 yield self[r]
1724 1724
1725 1725 def anyrevs(self, specs, user=False, localalias=None):
1726 1726 '''Find revisions matching one of the given revsets.
1727 1727
1728 1728 Revset aliases from the configuration are not expanded by default. To
1729 1729 expand user aliases, specify ``user=True``. To provide some local
1730 1730 definitions overriding user aliases, set ``localalias`` to
1731 1731 ``{name: definitionstring}``.
1732 1732 '''
1733 1733 if specs == [b'null']:
1734 1734 return revset.baseset([nullrev])
1735 1735 if specs == [b'.']:
1736 1736 quick_data = self._quick_access_changeid.get(b'.')
1737 1737 if quick_data is not None:
1738 1738 return revset.baseset([quick_data[0]])
1739 1739 if user:
1740 1740 m = revset.matchany(
1741 1741 self.ui,
1742 1742 specs,
1743 1743 lookup=revset.lookupfn(self),
1744 1744 localalias=localalias,
1745 1745 )
1746 1746 else:
1747 1747 m = revset.matchany(None, specs, localalias=localalias)
1748 1748 return m(self)
1749 1749
1750 1750 def url(self):
1751 1751 return b'file:' + self.root
1752 1752
1753 1753 def hook(self, name, throw=False, **args):
1754 1754 """Call a hook, passing this repo instance.
1755 1755
1756 1756 This a convenience method to aid invoking hooks. Extensions likely
1757 1757 won't call this unless they have registered a custom hook or are
1758 1758 replacing code that is expected to call a hook.
1759 1759 """
1760 1760 return hook.hook(self.ui, self, name, throw, **args)
1761 1761
1762 1762 @filteredpropertycache
1763 1763 def _tagscache(self):
1764 1764 '''Returns a tagscache object that contains various tags related
1765 1765 caches.'''
1766 1766
1767 1767 # This simplifies its cache management by having one decorated
1768 1768 # function (this one) and the rest simply fetch things from it.
1769 1769 class tagscache(object):
1770 1770 def __init__(self):
1771 1771 # These two define the set of tags for this repository. tags
1772 1772 # maps tag name to node; tagtypes maps tag name to 'global' or
1773 1773 # 'local'. (Global tags are defined by .hgtags across all
1774 1774 # heads, and local tags are defined in .hg/localtags.)
1775 1775 # They constitute the in-memory cache of tags.
1776 1776 self.tags = self.tagtypes = None
1777 1777
1778 1778 self.nodetagscache = self.tagslist = None
1779 1779
1780 1780 cache = tagscache()
1781 1781 cache.tags, cache.tagtypes = self._findtags()
1782 1782
1783 1783 return cache
1784 1784
1785 1785 def tags(self):
1786 1786 '''return a mapping of tag to node'''
1787 1787 t = {}
1788 1788 if self.changelog.filteredrevs:
1789 1789 tags, tt = self._findtags()
1790 1790 else:
1791 1791 tags = self._tagscache.tags
1792 1792 rev = self.changelog.rev
1793 1793 for k, v in pycompat.iteritems(tags):
1794 1794 try:
1795 1795 # ignore tags to unknown nodes
1796 1796 rev(v)
1797 1797 t[k] = v
1798 1798 except (error.LookupError, ValueError):
1799 1799 pass
1800 1800 return t
1801 1801
1802 1802 def _findtags(self):
1803 1803 '''Do the hard work of finding tags. Return a pair of dicts
1804 1804 (tags, tagtypes) where tags maps tag name to node, and tagtypes
1805 1805 maps tag name to a string like \'global\' or \'local\'.
1806 1806 Subclasses or extensions are free to add their own tags, but
1807 1807 should be aware that the returned dicts will be retained for the
1808 1808 duration of the localrepo object.'''
1809 1809
1810 1810 # XXX what tagtype should subclasses/extensions use? Currently
1811 1811 # mq and bookmarks add tags, but do not set the tagtype at all.
1812 1812 # Should each extension invent its own tag type? Should there
1813 1813 # be one tagtype for all such "virtual" tags? Or is the status
1814 1814 # quo fine?
1815 1815
1816 1816 # map tag name to (node, hist)
1817 1817 alltags = tagsmod.findglobaltags(self.ui, self)
1818 1818 # map tag name to tag type
1819 1819 tagtypes = {tag: b'global' for tag in alltags}
1820 1820
1821 1821 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
1822 1822
1823 1823 # Build the return dicts. Have to re-encode tag names because
1824 1824 # the tags module always uses UTF-8 (in order not to lose info
1825 1825 # writing to the cache), but the rest of Mercurial wants them in
1826 1826 # local encoding.
1827 1827 tags = {}
1828 1828 for (name, (node, hist)) in pycompat.iteritems(alltags):
1829 1829 if node != nullid:
1830 1830 tags[encoding.tolocal(name)] = node
1831 1831 tags[b'tip'] = self.changelog.tip()
1832 1832 tagtypes = {
1833 1833 encoding.tolocal(name): value
1834 1834 for (name, value) in pycompat.iteritems(tagtypes)
1835 1835 }
1836 1836 return (tags, tagtypes)
1837 1837
1838 1838 def tagtype(self, tagname):
1839 1839 '''
1840 1840 return the type of the given tag. result can be:
1841 1841
1842 1842 'local' : a local tag
1843 1843 'global' : a global tag
1844 1844 None : tag does not exist
1845 1845 '''
1846 1846
1847 1847 return self._tagscache.tagtypes.get(tagname)
1848 1848
1849 1849 def tagslist(self):
1850 1850 '''return a list of tags ordered by revision'''
1851 1851 if not self._tagscache.tagslist:
1852 1852 l = []
1853 1853 for t, n in pycompat.iteritems(self.tags()):
1854 1854 l.append((self.changelog.rev(n), t, n))
1855 1855 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
1856 1856
1857 1857 return self._tagscache.tagslist
1858 1858
1859 1859 def nodetags(self, node):
1860 1860 '''return the tags associated with a node'''
1861 1861 if not self._tagscache.nodetagscache:
1862 1862 nodetagscache = {}
1863 1863 for t, n in pycompat.iteritems(self._tagscache.tags):
1864 1864 nodetagscache.setdefault(n, []).append(t)
1865 1865 for tags in pycompat.itervalues(nodetagscache):
1866 1866 tags.sort()
1867 1867 self._tagscache.nodetagscache = nodetagscache
1868 1868 return self._tagscache.nodetagscache.get(node, [])
1869 1869
1870 1870 def nodebookmarks(self, node):
1871 1871 """return the list of bookmarks pointing to the specified node"""
1872 1872 return self._bookmarks.names(node)
1873 1873
1874 1874 def branchmap(self):
1875 1875 '''returns a dictionary {branch: [branchheads]} with branchheads
1876 1876 ordered by increasing revision number'''
1877 1877 return self._branchcaches[self]
1878 1878
1879 1879 @unfilteredmethod
1880 1880 def revbranchcache(self):
1881 1881 if not self._revbranchcache:
1882 1882 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
1883 1883 return self._revbranchcache
1884 1884
1885 1885 def branchtip(self, branch, ignoremissing=False):
1886 1886 '''return the tip node for a given branch
1887 1887
1888 1888 If ignoremissing is True, then this method will not raise an error.
1889 1889 This is helpful for callers that only expect None for a missing branch
1890 1890 (e.g. namespace).
1891 1891
1892 1892 '''
1893 1893 try:
1894 1894 return self.branchmap().branchtip(branch)
1895 1895 except KeyError:
1896 1896 if not ignoremissing:
1897 1897 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
1898 1898 else:
1899 1899 pass
1900 1900
1901 1901 def lookup(self, key):
1902 1902 node = scmutil.revsymbol(self, key).node()
1903 1903 if node is None:
1904 1904 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
1905 1905 return node
1906 1906
1907 1907 def lookupbranch(self, key):
1908 1908 if self.branchmap().hasbranch(key):
1909 1909 return key
1910 1910
1911 1911 return scmutil.revsymbol(self, key).branch()
1912 1912
1913 1913 def known(self, nodes):
1914 1914 cl = self.changelog
1915 1915 get_rev = cl.index.get_rev
1916 1916 filtered = cl.filteredrevs
1917 1917 result = []
1918 1918 for n in nodes:
1919 1919 r = get_rev(n)
1920 1920 resp = not (r is None or r in filtered)
1921 1921 result.append(resp)
1922 1922 return result
1923 1923
1924 1924 def local(self):
1925 1925 return self
1926 1926
1927 1927 def publishing(self):
1928 1928 # it's safe (and desirable) to trust the publish flag unconditionally
1929 1929 # so that we don't finalize changes shared between users via ssh or nfs
1930 1930 return self.ui.configbool(b'phases', b'publish', untrusted=True)
1931 1931
1932 1932 def cancopy(self):
1933 1933 # so statichttprepo's override of local() works
1934 1934 if not self.local():
1935 1935 return False
1936 1936 if not self.publishing():
1937 1937 return True
1938 1938 # if publishing we can't copy if there is filtered content
1939 1939 return not self.filtered(b'visible').changelog.filteredrevs
1940 1940
1941 1941 def shared(self):
1942 1942 '''the type of shared repository (None if not shared)'''
1943 1943 if self.sharedpath != self.path:
1944 1944 return b'store'
1945 1945 return None
1946 1946
1947 1947 def wjoin(self, f, *insidef):
1948 1948 return self.vfs.reljoin(self.root, f, *insidef)
1949 1949
1950 1950 def setparents(self, p1, p2=nullid):
1951 1951 self[None].setparents(p1, p2)
1952 1952 self._quick_access_changeid_invalidate()
1953 1953
1954 1954 def filectx(self, path, changeid=None, fileid=None, changectx=None):
1955 1955 """changeid must be a changeset revision, if specified.
1956 1956 fileid can be a file revision or node."""
1957 1957 return context.filectx(
1958 1958 self, path, changeid, fileid, changectx=changectx
1959 1959 )
1960 1960
1961 1961 def getcwd(self):
1962 1962 return self.dirstate.getcwd()
1963 1963
1964 1964 def pathto(self, f, cwd=None):
1965 1965 return self.dirstate.pathto(f, cwd)
1966 1966
1967 1967 def _loadfilter(self, filter):
1968 1968 if filter not in self._filterpats:
1969 1969 l = []
1970 1970 for pat, cmd in self.ui.configitems(filter):
1971 1971 if cmd == b'!':
1972 1972 continue
1973 1973 mf = matchmod.match(self.root, b'', [pat])
1974 1974 fn = None
1975 1975 params = cmd
1976 1976 for name, filterfn in pycompat.iteritems(self._datafilters):
1977 1977 if cmd.startswith(name):
1978 1978 fn = filterfn
1979 1979 params = cmd[len(name) :].lstrip()
1980 1980 break
1981 1981 if not fn:
1982 1982 fn = lambda s, c, **kwargs: procutil.filter(s, c)
1983 1983 fn.__name__ = 'commandfilter'
1984 1984 # Wrap old filters not supporting keyword arguments
1985 1985 if not pycompat.getargspec(fn)[2]:
1986 1986 oldfn = fn
1987 1987 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
1988 1988 fn.__name__ = 'compat-' + oldfn.__name__
1989 1989 l.append((mf, fn, params))
1990 1990 self._filterpats[filter] = l
1991 1991 return self._filterpats[filter]
1992 1992
1993 1993 def _filter(self, filterpats, filename, data):
1994 1994 for mf, fn, cmd in filterpats:
1995 1995 if mf(filename):
1996 1996 self.ui.debug(
1997 1997 b"filtering %s through %s\n"
1998 1998 % (filename, cmd or pycompat.sysbytes(fn.__name__))
1999 1999 )
2000 2000 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2001 2001 break
2002 2002
2003 2003 return data
2004 2004
2005 2005 @unfilteredpropertycache
2006 2006 def _encodefilterpats(self):
2007 2007 return self._loadfilter(b'encode')
2008 2008
2009 2009 @unfilteredpropertycache
2010 2010 def _decodefilterpats(self):
2011 2011 return self._loadfilter(b'decode')
2012 2012
2013 2013 def adddatafilter(self, name, filter):
2014 2014 self._datafilters[name] = filter
2015 2015
2016 2016 def wread(self, filename):
2017 2017 if self.wvfs.islink(filename):
2018 2018 data = self.wvfs.readlink(filename)
2019 2019 else:
2020 2020 data = self.wvfs.read(filename)
2021 2021 return self._filter(self._encodefilterpats, filename, data)
2022 2022
2023 2023 def wwrite(self, filename, data, flags, backgroundclose=False, **kwargs):
2024 2024 """write ``data`` into ``filename`` in the working directory
2025 2025
2026 2026 This returns length of written (maybe decoded) data.
2027 2027 """
2028 2028 data = self._filter(self._decodefilterpats, filename, data)
2029 2029 if b'l' in flags:
2030 2030 self.wvfs.symlink(data, filename)
2031 2031 else:
2032 2032 self.wvfs.write(
2033 2033 filename, data, backgroundclose=backgroundclose, **kwargs
2034 2034 )
2035 2035 if b'x' in flags:
2036 2036 self.wvfs.setflags(filename, False, True)
2037 2037 else:
2038 2038 self.wvfs.setflags(filename, False, False)
2039 2039 return len(data)
2040 2040
2041 2041 def wwritedata(self, filename, data):
2042 2042 return self._filter(self._decodefilterpats, filename, data)
2043 2043
2044 2044 def currenttransaction(self):
2045 2045 """return the current transaction or None if non exists"""
2046 2046 if self._transref:
2047 2047 tr = self._transref()
2048 2048 else:
2049 2049 tr = None
2050 2050
2051 2051 if tr and tr.running():
2052 2052 return tr
2053 2053 return None
2054 2054
2055 2055 def transaction(self, desc, report=None):
2056 2056 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2057 2057 b'devel', b'check-locks'
2058 2058 ):
2059 2059 if self._currentlock(self._lockref) is None:
2060 2060 raise error.ProgrammingError(b'transaction requires locking')
2061 2061 tr = self.currenttransaction()
2062 2062 if tr is not None:
2063 2063 return tr.nest(name=desc)
2064 2064
2065 2065 # abort here if the journal already exists
2066 2066 if self.svfs.exists(b"journal"):
2067 2067 raise error.RepoError(
2068 2068 _(b"abandoned transaction found"),
2069 2069 hint=_(b"run 'hg recover' to clean up transaction"),
2070 2070 )
2071 2071
2072 2072 idbase = b"%.40f#%f" % (random.random(), time.time())
2073 2073 ha = hex(hashutil.sha1(idbase).digest())
2074 2074 txnid = b'TXN:' + ha
2075 2075 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2076 2076
2077 2077 self._writejournal(desc)
2078 2078 renames = [(vfs, x, undoname(x)) for vfs, x in self._journalfiles()]
2079 2079 if report:
2080 2080 rp = report
2081 2081 else:
2082 2082 rp = self.ui.warn
2083 2083 vfsmap = {b'plain': self.vfs, b'store': self.svfs} # root of .hg/
2084 2084 # we must avoid cyclic reference between repo and transaction.
2085 2085 reporef = weakref.ref(self)
2086 2086 # Code to track tag movement
2087 2087 #
2088 2088 # Since tags are all handled as file content, it is actually quite hard
2089 2089 # to track these movement from a code perspective. So we fallback to a
2090 2090 # tracking at the repository level. One could envision to track changes
2091 2091 # to the '.hgtags' file through changegroup apply but that fails to
2092 2092 # cope with case where transaction expose new heads without changegroup
2093 2093 # being involved (eg: phase movement).
2094 2094 #
2095 2095 # For now, We gate the feature behind a flag since this likely comes
2096 2096 # with performance impacts. The current code run more often than needed
2097 2097 # and do not use caches as much as it could. The current focus is on
2098 2098 # the behavior of the feature so we disable it by default. The flag
2099 2099 # will be removed when we are happy with the performance impact.
2100 2100 #
2101 2101 # Once this feature is no longer experimental move the following
2102 2102 # documentation to the appropriate help section:
2103 2103 #
2104 2104 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2105 2105 # tags (new or changed or deleted tags). In addition the details of
2106 2106 # these changes are made available in a file at:
2107 2107 # ``REPOROOT/.hg/changes/tags.changes``.
2108 2108 # Make sure you check for HG_TAG_MOVED before reading that file as it
2109 2109 # might exist from a previous transaction even if no tag were touched
2110 2110 # in this one. Changes are recorded in a line base format::
2111 2111 #
2112 2112 # <action> <hex-node> <tag-name>\n
2113 2113 #
2114 2114 # Actions are defined as follow:
2115 2115 # "-R": tag is removed,
2116 2116 # "+A": tag is added,
2117 2117 # "-M": tag is moved (old value),
2118 2118 # "+M": tag is moved (new value),
2119 2119 tracktags = lambda x: None
2120 2120 # experimental config: experimental.hook-track-tags
2121 2121 shouldtracktags = self.ui.configbool(
2122 2122 b'experimental', b'hook-track-tags'
2123 2123 )
2124 2124 if desc != b'strip' and shouldtracktags:
2125 2125 oldheads = self.changelog.headrevs()
2126 2126
2127 2127 def tracktags(tr2):
2128 2128 repo = reporef()
2129 2129 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2130 2130 newheads = repo.changelog.headrevs()
2131 2131 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2132 2132 # notes: we compare lists here.
2133 2133 # As we do it only once buiding set would not be cheaper
2134 2134 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2135 2135 if changes:
2136 2136 tr2.hookargs[b'tag_moved'] = b'1'
2137 2137 with repo.vfs(
2138 2138 b'changes/tags.changes', b'w', atomictemp=True
2139 2139 ) as changesfile:
2140 2140 # note: we do not register the file to the transaction
2141 2141 # because we needs it to still exist on the transaction
2142 2142 # is close (for txnclose hooks)
2143 2143 tagsmod.writediff(changesfile, changes)
2144 2144
2145 2145 def validate(tr2):
2146 2146 """will run pre-closing hooks"""
2147 2147 # XXX the transaction API is a bit lacking here so we take a hacky
2148 2148 # path for now
2149 2149 #
2150 2150 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2151 2151 # dict is copied before these run. In addition we needs the data
2152 2152 # available to in memory hooks too.
2153 2153 #
2154 2154 # Moreover, we also need to make sure this runs before txnclose
2155 2155 # hooks and there is no "pending" mechanism that would execute
2156 2156 # logic only if hooks are about to run.
2157 2157 #
2158 2158 # Fixing this limitation of the transaction is also needed to track
2159 2159 # other families of changes (bookmarks, phases, obsolescence).
2160 2160 #
2161 2161 # This will have to be fixed before we remove the experimental
2162 2162 # gating.
2163 2163 tracktags(tr2)
2164 2164 repo = reporef()
2165 2165
2166 2166 singleheadopt = (b'experimental', b'single-head-per-branch')
2167 2167 singlehead = repo.ui.configbool(*singleheadopt)
2168 2168 if singlehead:
2169 2169 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2170 2170 accountclosed = singleheadsub.get(
2171 2171 b"account-closed-heads", False
2172 2172 )
2173 2173 scmutil.enforcesinglehead(repo, tr2, desc, accountclosed)
2174 2174 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2175 2175 for name, (old, new) in sorted(
2176 2176 tr.changes[b'bookmarks'].items()
2177 2177 ):
2178 2178 args = tr.hookargs.copy()
2179 2179 args.update(bookmarks.preparehookargs(name, old, new))
2180 2180 repo.hook(
2181 2181 b'pretxnclose-bookmark',
2182 2182 throw=True,
2183 2183 **pycompat.strkwargs(args)
2184 2184 )
2185 2185 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2186 2186 cl = repo.unfiltered().changelog
2187 2187 for revs, (old, new) in tr.changes[b'phases']:
2188 2188 for rev in revs:
2189 2189 args = tr.hookargs.copy()
2190 2190 node = hex(cl.node(rev))
2191 2191 args.update(phases.preparehookargs(node, old, new))
2192 2192 repo.hook(
2193 2193 b'pretxnclose-phase',
2194 2194 throw=True,
2195 2195 **pycompat.strkwargs(args)
2196 2196 )
2197 2197
2198 2198 repo.hook(
2199 2199 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2200 2200 )
2201 2201
2202 2202 def releasefn(tr, success):
2203 2203 repo = reporef()
2204 2204 if repo is None:
2205 2205 # If the repo has been GC'd (and this release function is being
2206 2206 # called from transaction.__del__), there's not much we can do,
2207 2207 # so just leave the unfinished transaction there and let the
2208 2208 # user run `hg recover`.
2209 2209 return
2210 2210 if success:
2211 2211 # this should be explicitly invoked here, because
2212 2212 # in-memory changes aren't written out at closing
2213 2213 # transaction, if tr.addfilegenerator (via
2214 2214 # dirstate.write or so) isn't invoked while
2215 2215 # transaction running
2216 2216 repo.dirstate.write(None)
2217 2217 else:
2218 2218 # discard all changes (including ones already written
2219 2219 # out) in this transaction
2220 2220 narrowspec.restorebackup(self, b'journal.narrowspec')
2221 2221 narrowspec.restorewcbackup(self, b'journal.narrowspec.dirstate')
2222 2222 repo.dirstate.restorebackup(None, b'journal.dirstate')
2223 2223
2224 2224 repo.invalidate(clearfilecache=True)
2225 2225
2226 2226 tr = transaction.transaction(
2227 2227 rp,
2228 2228 self.svfs,
2229 2229 vfsmap,
2230 2230 b"journal",
2231 2231 b"undo",
2232 2232 aftertrans(renames),
2233 2233 self.store.createmode,
2234 2234 validator=validate,
2235 2235 releasefn=releasefn,
2236 2236 checkambigfiles=_cachedfiles,
2237 2237 name=desc,
2238 2238 )
2239 2239 tr.changes[b'origrepolen'] = len(self)
2240 2240 tr.changes[b'obsmarkers'] = set()
2241 2241 tr.changes[b'phases'] = []
2242 2242 tr.changes[b'bookmarks'] = {}
2243 2243
2244 2244 tr.hookargs[b'txnid'] = txnid
2245 2245 tr.hookargs[b'txnname'] = desc
2246 2246 # note: writing the fncache only during finalize mean that the file is
2247 2247 # outdated when running hooks. As fncache is used for streaming clone,
2248 2248 # this is not expected to break anything that happen during the hooks.
2249 2249 tr.addfinalize(b'flush-fncache', self.store.write)
2250 2250
2251 2251 def txnclosehook(tr2):
2252 2252 """To be run if transaction is successful, will schedule a hook run
2253 2253 """
2254 2254 # Don't reference tr2 in hook() so we don't hold a reference.
2255 2255 # This reduces memory consumption when there are multiple
2256 2256 # transactions per lock. This can likely go away if issue5045
2257 2257 # fixes the function accumulation.
2258 2258 hookargs = tr2.hookargs
2259 2259
2260 2260 def hookfunc(unused_success):
2261 2261 repo = reporef()
2262 2262 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2263 2263 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2264 2264 for name, (old, new) in bmchanges:
2265 2265 args = tr.hookargs.copy()
2266 2266 args.update(bookmarks.preparehookargs(name, old, new))
2267 2267 repo.hook(
2268 2268 b'txnclose-bookmark',
2269 2269 throw=False,
2270 2270 **pycompat.strkwargs(args)
2271 2271 )
2272 2272
2273 2273 if hook.hashook(repo.ui, b'txnclose-phase'):
2274 2274 cl = repo.unfiltered().changelog
2275 2275 phasemv = sorted(
2276 2276 tr.changes[b'phases'], key=lambda r: r[0][0]
2277 2277 )
2278 2278 for revs, (old, new) in phasemv:
2279 2279 for rev in revs:
2280 2280 args = tr.hookargs.copy()
2281 2281 node = hex(cl.node(rev))
2282 2282 args.update(phases.preparehookargs(node, old, new))
2283 2283 repo.hook(
2284 2284 b'txnclose-phase',
2285 2285 throw=False,
2286 2286 **pycompat.strkwargs(args)
2287 2287 )
2288 2288
2289 2289 repo.hook(
2290 2290 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2291 2291 )
2292 2292
2293 2293 reporef()._afterlock(hookfunc)
2294 2294
2295 2295 tr.addfinalize(b'txnclose-hook', txnclosehook)
2296 2296 # Include a leading "-" to make it happen before the transaction summary
2297 2297 # reports registered via scmutil.registersummarycallback() whose names
2298 2298 # are 00-txnreport etc. That way, the caches will be warm when the
2299 2299 # callbacks run.
2300 2300 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2301 2301
2302 2302 def txnaborthook(tr2):
2303 2303 """To be run if transaction is aborted
2304 2304 """
2305 2305 reporef().hook(
2306 2306 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2307 2307 )
2308 2308
2309 2309 tr.addabort(b'txnabort-hook', txnaborthook)
2310 2310 # avoid eager cache invalidation. in-memory data should be identical
2311 2311 # to stored data if transaction has no error.
2312 2312 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2313 2313 self._transref = weakref.ref(tr)
2314 2314 scmutil.registersummarycallback(self, tr, desc)
2315 2315 return tr
2316 2316
2317 2317 def _journalfiles(self):
2318 2318 return (
2319 2319 (self.svfs, b'journal'),
2320 2320 (self.svfs, b'journal.narrowspec'),
2321 2321 (self.vfs, b'journal.narrowspec.dirstate'),
2322 2322 (self.vfs, b'journal.dirstate'),
2323 2323 (self.vfs, b'journal.branch'),
2324 2324 (self.vfs, b'journal.desc'),
2325 2325 (bookmarks.bookmarksvfs(self), b'journal.bookmarks'),
2326 2326 (self.svfs, b'journal.phaseroots'),
2327 2327 )
2328 2328
2329 2329 def undofiles(self):
2330 2330 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2331 2331
2332 2332 @unfilteredmethod
2333 2333 def _writejournal(self, desc):
2334 2334 self.dirstate.savebackup(None, b'journal.dirstate')
2335 2335 narrowspec.savewcbackup(self, b'journal.narrowspec.dirstate')
2336 2336 narrowspec.savebackup(self, b'journal.narrowspec')
2337 2337 self.vfs.write(
2338 2338 b"journal.branch", encoding.fromlocal(self.dirstate.branch())
2339 2339 )
2340 2340 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2341 2341 bookmarksvfs = bookmarks.bookmarksvfs(self)
2342 2342 bookmarksvfs.write(
2343 2343 b"journal.bookmarks", bookmarksvfs.tryread(b"bookmarks")
2344 2344 )
2345 2345 self.svfs.write(b"journal.phaseroots", self.svfs.tryread(b"phaseroots"))
2346 2346
2347 2347 def recover(self):
2348 2348 with self.lock():
2349 2349 if self.svfs.exists(b"journal"):
2350 2350 self.ui.status(_(b"rolling back interrupted transaction\n"))
2351 2351 vfsmap = {
2352 2352 b'': self.svfs,
2353 2353 b'plain': self.vfs,
2354 2354 }
2355 2355 transaction.rollback(
2356 2356 self.svfs,
2357 2357 vfsmap,
2358 2358 b"journal",
2359 2359 self.ui.warn,
2360 2360 checkambigfiles=_cachedfiles,
2361 2361 )
2362 2362 self.invalidate()
2363 2363 return True
2364 2364 else:
2365 2365 self.ui.warn(_(b"no interrupted transaction available\n"))
2366 2366 return False
2367 2367
2368 2368 def rollback(self, dryrun=False, force=False):
2369 2369 wlock = lock = dsguard = None
2370 2370 try:
2371 2371 wlock = self.wlock()
2372 2372 lock = self.lock()
2373 2373 if self.svfs.exists(b"undo"):
2374 2374 dsguard = dirstateguard.dirstateguard(self, b'rollback')
2375 2375
2376 2376 return self._rollback(dryrun, force, dsguard)
2377 2377 else:
2378 2378 self.ui.warn(_(b"no rollback information available\n"))
2379 2379 return 1
2380 2380 finally:
2381 2381 release(dsguard, lock, wlock)
2382 2382
2383 2383 @unfilteredmethod # Until we get smarter cache management
2384 2384 def _rollback(self, dryrun, force, dsguard):
2385 2385 ui = self.ui
2386 2386 try:
2387 2387 args = self.vfs.read(b'undo.desc').splitlines()
2388 2388 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2389 2389 if len(args) >= 3:
2390 2390 detail = args[2]
2391 2391 oldtip = oldlen - 1
2392 2392
2393 2393 if detail and ui.verbose:
2394 2394 msg = _(
2395 2395 b'repository tip rolled back to revision %d'
2396 2396 b' (undo %s: %s)\n'
2397 2397 ) % (oldtip, desc, detail)
2398 2398 else:
2399 2399 msg = _(
2400 2400 b'repository tip rolled back to revision %d (undo %s)\n'
2401 2401 ) % (oldtip, desc)
2402 2402 except IOError:
2403 2403 msg = _(b'rolling back unknown transaction\n')
2404 2404 desc = None
2405 2405
2406 2406 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2407 2407 raise error.Abort(
2408 2408 _(
2409 2409 b'rollback of last commit while not checked out '
2410 2410 b'may lose data'
2411 2411 ),
2412 2412 hint=_(b'use -f to force'),
2413 2413 )
2414 2414
2415 2415 ui.status(msg)
2416 2416 if dryrun:
2417 2417 return 0
2418 2418
2419 2419 parents = self.dirstate.parents()
2420 2420 self.destroying()
2421 2421 vfsmap = {b'plain': self.vfs, b'': self.svfs}
2422 2422 transaction.rollback(
2423 2423 self.svfs, vfsmap, b'undo', ui.warn, checkambigfiles=_cachedfiles
2424 2424 )
2425 2425 bookmarksvfs = bookmarks.bookmarksvfs(self)
2426 2426 if bookmarksvfs.exists(b'undo.bookmarks'):
2427 2427 bookmarksvfs.rename(
2428 2428 b'undo.bookmarks', b'bookmarks', checkambig=True
2429 2429 )
2430 2430 if self.svfs.exists(b'undo.phaseroots'):
2431 2431 self.svfs.rename(b'undo.phaseroots', b'phaseroots', checkambig=True)
2432 2432 self.invalidate()
2433 2433
2434 2434 has_node = self.changelog.index.has_node
2435 2435 parentgone = any(not has_node(p) for p in parents)
2436 2436 if parentgone:
2437 2437 # prevent dirstateguard from overwriting already restored one
2438 2438 dsguard.close()
2439 2439
2440 2440 narrowspec.restorebackup(self, b'undo.narrowspec')
2441 2441 narrowspec.restorewcbackup(self, b'undo.narrowspec.dirstate')
2442 2442 self.dirstate.restorebackup(None, b'undo.dirstate')
2443 2443 try:
2444 2444 branch = self.vfs.read(b'undo.branch')
2445 2445 self.dirstate.setbranch(encoding.tolocal(branch))
2446 2446 except IOError:
2447 2447 ui.warn(
2448 2448 _(
2449 2449 b'named branch could not be reset: '
2450 2450 b'current branch is still \'%s\'\n'
2451 2451 )
2452 2452 % self.dirstate.branch()
2453 2453 )
2454 2454
2455 2455 parents = tuple([p.rev() for p in self[None].parents()])
2456 2456 if len(parents) > 1:
2457 2457 ui.status(
2458 2458 _(
2459 2459 b'working directory now based on '
2460 2460 b'revisions %d and %d\n'
2461 2461 )
2462 2462 % parents
2463 2463 )
2464 2464 else:
2465 2465 ui.status(
2466 2466 _(b'working directory now based on revision %d\n') % parents
2467 2467 )
2468 2468 mergemod.mergestate.clean(self, self[b'.'].node())
2469 2469
2470 2470 # TODO: if we know which new heads may result from this rollback, pass
2471 2471 # them to destroy(), which will prevent the branchhead cache from being
2472 2472 # invalidated.
2473 2473 self.destroyed()
2474 2474 return 0
2475 2475
2476 2476 def _buildcacheupdater(self, newtransaction):
2477 2477 """called during transaction to build the callback updating cache
2478 2478
2479 2479 Lives on the repository to help extension who might want to augment
2480 2480 this logic. For this purpose, the created transaction is passed to the
2481 2481 method.
2482 2482 """
2483 2483 # we must avoid cyclic reference between repo and transaction.
2484 2484 reporef = weakref.ref(self)
2485 2485
2486 2486 def updater(tr):
2487 2487 repo = reporef()
2488 2488 repo.updatecaches(tr)
2489 2489
2490 2490 return updater
2491 2491
2492 2492 @unfilteredmethod
2493 2493 def updatecaches(self, tr=None, full=False):
2494 2494 """warm appropriate caches
2495 2495
2496 2496 If this function is called after a transaction closed. The transaction
2497 2497 will be available in the 'tr' argument. This can be used to selectively
2498 2498 update caches relevant to the changes in that transaction.
2499 2499
2500 2500 If 'full' is set, make sure all caches the function knows about have
2501 2501 up-to-date data. Even the ones usually loaded more lazily.
2502 2502 """
2503 2503 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2504 2504 # During strip, many caches are invalid but
2505 2505 # later call to `destroyed` will refresh them.
2506 2506 return
2507 2507
2508 2508 if tr is None or tr.changes[b'origrepolen'] < len(self):
2509 2509 # accessing the 'ser ved' branchmap should refresh all the others,
2510 2510 self.ui.debug(b'updating the branch cache\n')
2511 2511 self.filtered(b'served').branchmap()
2512 2512 self.filtered(b'served.hidden').branchmap()
2513 2513
2514 2514 if full:
2515 2515 unfi = self.unfiltered()
2516 2516
2517 2517 self.changelog.update_caches(transaction=tr)
2518 2518 self.manifestlog.update_caches(transaction=tr)
2519 2519
2520 2520 rbc = unfi.revbranchcache()
2521 2521 for r in unfi.changelog:
2522 2522 rbc.branchinfo(r)
2523 2523 rbc.write()
2524 2524
2525 2525 # ensure the working copy parents are in the manifestfulltextcache
2526 2526 for ctx in self[b'.'].parents():
2527 2527 ctx.manifest() # accessing the manifest is enough
2528 2528
2529 2529 # accessing fnode cache warms the cache
2530 2530 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2531 2531 # accessing tags warm the cache
2532 2532 self.tags()
2533 2533 self.filtered(b'served').tags()
2534 2534
2535 2535 # The `full` arg is documented as updating even the lazily-loaded
2536 2536 # caches immediately, so we're forcing a write to cause these caches
2537 2537 # to be warmed up even if they haven't explicitly been requested
2538 2538 # yet (if they've never been used by hg, they won't ever have been
2539 2539 # written, even if they're a subset of another kind of cache that
2540 2540 # *has* been used).
2541 2541 for filt in repoview.filtertable.keys():
2542 2542 filtered = self.filtered(filt)
2543 2543 filtered.branchmap().write(filtered)
2544 2544
2545 2545 def invalidatecaches(self):
2546 2546
2547 2547 if '_tagscache' in vars(self):
2548 2548 # can't use delattr on proxy
2549 2549 del self.__dict__['_tagscache']
2550 2550
2551 2551 self._branchcaches.clear()
2552 2552 self.invalidatevolatilesets()
2553 2553 self._sparsesignaturecache.clear()
2554 2554
2555 2555 def invalidatevolatilesets(self):
2556 2556 self.filteredrevcache.clear()
2557 2557 obsolete.clearobscaches(self)
2558 2558 self._quick_access_changeid_invalidate()
2559 2559
2560 2560 def invalidatedirstate(self):
2561 2561 '''Invalidates the dirstate, causing the next call to dirstate
2562 2562 to check if it was modified since the last time it was read,
2563 2563 rereading it if it has.
2564 2564
2565 2565 This is different to dirstate.invalidate() that it doesn't always
2566 2566 rereads the dirstate. Use dirstate.invalidate() if you want to
2567 2567 explicitly read the dirstate again (i.e. restoring it to a previous
2568 2568 known good state).'''
2569 2569 if hasunfilteredcache(self, 'dirstate'):
2570 2570 for k in self.dirstate._filecache:
2571 2571 try:
2572 2572 delattr(self.dirstate, k)
2573 2573 except AttributeError:
2574 2574 pass
2575 2575 delattr(self.unfiltered(), 'dirstate')
2576 2576
2577 2577 def invalidate(self, clearfilecache=False):
2578 2578 '''Invalidates both store and non-store parts other than dirstate
2579 2579
2580 2580 If a transaction is running, invalidation of store is omitted,
2581 2581 because discarding in-memory changes might cause inconsistency
2582 2582 (e.g. incomplete fncache causes unintentional failure, but
2583 2583 redundant one doesn't).
2584 2584 '''
2585 2585 unfiltered = self.unfiltered() # all file caches are stored unfiltered
2586 2586 for k in list(self._filecache.keys()):
2587 2587 # dirstate is invalidated separately in invalidatedirstate()
2588 2588 if k == b'dirstate':
2589 2589 continue
2590 2590 if (
2591 2591 k == b'changelog'
2592 2592 and self.currenttransaction()
2593 2593 and self.changelog._delayed
2594 2594 ):
2595 2595 # The changelog object may store unwritten revisions. We don't
2596 2596 # want to lose them.
2597 2597 # TODO: Solve the problem instead of working around it.
2598 2598 continue
2599 2599
2600 2600 if clearfilecache:
2601 2601 del self._filecache[k]
2602 2602 try:
2603 2603 delattr(unfiltered, k)
2604 2604 except AttributeError:
2605 2605 pass
2606 2606 self.invalidatecaches()
2607 2607 if not self.currenttransaction():
2608 2608 # TODO: Changing contents of store outside transaction
2609 2609 # causes inconsistency. We should make in-memory store
2610 2610 # changes detectable, and abort if changed.
2611 2611 self.store.invalidatecaches()
2612 2612
2613 2613 def invalidateall(self):
2614 2614 '''Fully invalidates both store and non-store parts, causing the
2615 2615 subsequent operation to reread any outside changes.'''
2616 2616 # extension should hook this to invalidate its caches
2617 2617 self.invalidate()
2618 2618 self.invalidatedirstate()
2619 2619
2620 2620 @unfilteredmethod
2621 2621 def _refreshfilecachestats(self, tr):
2622 2622 """Reload stats of cached files so that they are flagged as valid"""
2623 2623 for k, ce in self._filecache.items():
2624 2624 k = pycompat.sysstr(k)
2625 2625 if k == 'dirstate' or k not in self.__dict__:
2626 2626 continue
2627 2627 ce.refresh()
2628 2628
2629 2629 def _lock(
2630 2630 self,
2631 2631 vfs,
2632 2632 lockname,
2633 2633 wait,
2634 2634 releasefn,
2635 2635 acquirefn,
2636 2636 desc,
2637 2637 inheritchecker=None,
2638 2638 parentenvvar=None,
2639 2639 ):
2640 2640 parentlock = None
2641 2641 # the contents of parentenvvar are used by the underlying lock to
2642 2642 # determine whether it can be inherited
2643 2643 if parentenvvar is not None:
2644 2644 parentlock = encoding.environ.get(parentenvvar)
2645 2645
2646 2646 timeout = 0
2647 2647 warntimeout = 0
2648 2648 if wait:
2649 2649 timeout = self.ui.configint(b"ui", b"timeout")
2650 2650 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
2651 2651 # internal config: ui.signal-safe-lock
2652 2652 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
2653 2653
2654 2654 l = lockmod.trylock(
2655 2655 self.ui,
2656 2656 vfs,
2657 2657 lockname,
2658 2658 timeout,
2659 2659 warntimeout,
2660 2660 releasefn=releasefn,
2661 2661 acquirefn=acquirefn,
2662 2662 desc=desc,
2663 2663 inheritchecker=inheritchecker,
2664 2664 parentlock=parentlock,
2665 2665 signalsafe=signalsafe,
2666 2666 )
2667 2667 return l
2668 2668
2669 2669 def _afterlock(self, callback):
2670 2670 """add a callback to be run when the repository is fully unlocked
2671 2671
2672 2672 The callback will be executed when the outermost lock is released
2673 2673 (with wlock being higher level than 'lock')."""
2674 2674 for ref in (self._wlockref, self._lockref):
2675 2675 l = ref and ref()
2676 2676 if l and l.held:
2677 2677 l.postrelease.append(callback)
2678 2678 break
2679 2679 else: # no lock have been found.
2680 2680 callback(True)
2681 2681
2682 2682 def lock(self, wait=True):
2683 2683 '''Lock the repository store (.hg/store) and return a weak reference
2684 2684 to the lock. Use this before modifying the store (e.g. committing or
2685 2685 stripping). If you are opening a transaction, get a lock as well.)
2686 2686
2687 2687 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2688 2688 'wlock' first to avoid a dead-lock hazard.'''
2689 2689 l = self._currentlock(self._lockref)
2690 2690 if l is not None:
2691 2691 l.lock()
2692 2692 return l
2693 2693
2694 2694 l = self._lock(
2695 2695 vfs=self.svfs,
2696 2696 lockname=b"lock",
2697 2697 wait=wait,
2698 2698 releasefn=None,
2699 2699 acquirefn=self.invalidate,
2700 2700 desc=_(b'repository %s') % self.origroot,
2701 2701 )
2702 2702 self._lockref = weakref.ref(l)
2703 2703 return l
2704 2704
2705 2705 def _wlockchecktransaction(self):
2706 2706 if self.currenttransaction() is not None:
2707 2707 raise error.LockInheritanceContractViolation(
2708 2708 b'wlock cannot be inherited in the middle of a transaction'
2709 2709 )
2710 2710
2711 2711 def wlock(self, wait=True):
2712 2712 '''Lock the non-store parts of the repository (everything under
2713 2713 .hg except .hg/store) and return a weak reference to the lock.
2714 2714
2715 2715 Use this before modifying files in .hg.
2716 2716
2717 2717 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
2718 2718 'wlock' first to avoid a dead-lock hazard.'''
2719 2719 l = self._wlockref and self._wlockref()
2720 2720 if l is not None and l.held:
2721 2721 l.lock()
2722 2722 return l
2723 2723
2724 2724 # We do not need to check for non-waiting lock acquisition. Such
2725 2725 # acquisition would not cause dead-lock as they would just fail.
2726 2726 if wait and (
2727 2727 self.ui.configbool(b'devel', b'all-warnings')
2728 2728 or self.ui.configbool(b'devel', b'check-locks')
2729 2729 ):
2730 2730 if self._currentlock(self._lockref) is not None:
2731 2731 self.ui.develwarn(b'"wlock" acquired after "lock"')
2732 2732
2733 2733 def unlock():
2734 2734 if self.dirstate.pendingparentchange():
2735 2735 self.dirstate.invalidate()
2736 2736 else:
2737 2737 self.dirstate.write(None)
2738 2738
2739 2739 self._filecache[b'dirstate'].refresh()
2740 2740
2741 2741 l = self._lock(
2742 2742 self.vfs,
2743 2743 b"wlock",
2744 2744 wait,
2745 2745 unlock,
2746 2746 self.invalidatedirstate,
2747 2747 _(b'working directory of %s') % self.origroot,
2748 2748 inheritchecker=self._wlockchecktransaction,
2749 2749 parentenvvar=b'HG_WLOCK_LOCKER',
2750 2750 )
2751 2751 self._wlockref = weakref.ref(l)
2752 2752 return l
2753 2753
2754 2754 def _currentlock(self, lockref):
2755 2755 """Returns the lock if it's held, or None if it's not."""
2756 2756 if lockref is None:
2757 2757 return None
2758 2758 l = lockref()
2759 2759 if l is None or not l.held:
2760 2760 return None
2761 2761 return l
2762 2762
2763 2763 def currentwlock(self):
2764 2764 """Returns the wlock if it's held, or None if it's not."""
2765 2765 return self._currentlock(self._wlockref)
2766 2766
2767 2767 def _filecommit(
2768 2768 self,
2769 2769 fctx,
2770 2770 manifest1,
2771 2771 manifest2,
2772 2772 linkrev,
2773 2773 tr,
2774 2774 changelist,
2775 2775 includecopymeta,
2776 2776 ):
2777 2777 """
2778 2778 commit an individual file as part of a larger transaction
2779 2779 """
2780 2780
2781 2781 fname = fctx.path()
2782 2782 fparent1 = manifest1.get(fname, nullid)
2783 2783 fparent2 = manifest2.get(fname, nullid)
2784 2784 if isinstance(fctx, context.filectx):
2785 2785 node = fctx.filenode()
2786 2786 if node in [fparent1, fparent2]:
2787 2787 self.ui.debug(b'reusing %s filelog entry\n' % fname)
2788 2788 if (
2789 2789 fparent1 != nullid
2790 2790 and manifest1.flags(fname) != fctx.flags()
2791 2791 ) or (
2792 2792 fparent2 != nullid
2793 2793 and manifest2.flags(fname) != fctx.flags()
2794 2794 ):
2795 2795 changelist.append(fname)
2796 2796 return node
2797 2797
2798 2798 flog = self.file(fname)
2799 2799 meta = {}
2800 2800 cfname = fctx.copysource()
2801 2801 if cfname and cfname != fname:
2802 2802 # Mark the new revision of this file as a copy of another
2803 2803 # file. This copy data will effectively act as a parent
2804 2804 # of this new revision. If this is a merge, the first
2805 2805 # parent will be the nullid (meaning "look up the copy data")
2806 2806 # and the second one will be the other parent. For example:
2807 2807 #
2808 2808 # 0 --- 1 --- 3 rev1 changes file foo
2809 2809 # \ / rev2 renames foo to bar and changes it
2810 2810 # \- 2 -/ rev3 should have bar with all changes and
2811 2811 # should record that bar descends from
2812 2812 # bar in rev2 and foo in rev1
2813 2813 #
2814 2814 # this allows this merge to succeed:
2815 2815 #
2816 2816 # 0 --- 1 --- 3 rev4 reverts the content change from rev2
2817 2817 # \ / merging rev3 and rev4 should use bar@rev2
2818 2818 # \- 2 --- 4 as the merge base
2819 2819 #
2820 2820
2821 2821 cnode = manifest1.get(cfname)
2822 2822 newfparent = fparent2
2823 2823
2824 2824 if manifest2: # branch merge
2825 2825 if fparent2 == nullid or cnode is None: # copied on remote side
2826 2826 if cfname in manifest2:
2827 2827 cnode = manifest2[cfname]
2828 2828 newfparent = fparent1
2829 2829
2830 2830 # Here, we used to search backwards through history to try to find
2831 2831 # where the file copy came from if the source of a copy was not in
2832 2832 # the parent directory. However, this doesn't actually make sense to
2833 2833 # do (what does a copy from something not in your working copy even
2834 2834 # mean?) and it causes bugs (eg, issue4476). Instead, we will warn
2835 2835 # the user that copy information was dropped, so if they didn't
2836 2836 # expect this outcome it can be fixed, but this is the correct
2837 2837 # behavior in this circumstance.
2838 2838
2839 2839 if cnode:
2840 2840 self.ui.debug(
2841 2841 b" %s: copy %s:%s\n" % (fname, cfname, hex(cnode))
2842 2842 )
2843 2843 if includecopymeta:
2844 2844 meta[b"copy"] = cfname
2845 2845 meta[b"copyrev"] = hex(cnode)
2846 2846 fparent1, fparent2 = nullid, newfparent
2847 2847 else:
2848 2848 self.ui.warn(
2849 2849 _(
2850 2850 b"warning: can't find ancestor for '%s' "
2851 2851 b"copied from '%s'!\n"
2852 2852 )
2853 2853 % (fname, cfname)
2854 2854 )
2855 2855
2856 2856 elif fparent1 == nullid:
2857 2857 fparent1, fparent2 = fparent2, nullid
2858 2858 elif fparent2 != nullid:
2859 2859 # is one parent an ancestor of the other?
2860 2860 fparentancestors = flog.commonancestorsheads(fparent1, fparent2)
2861 2861 if fparent1 in fparentancestors:
2862 2862 fparent1, fparent2 = fparent2, nullid
2863 2863 elif fparent2 in fparentancestors:
2864 2864 fparent2 = nullid
2865 2865 elif not fparentancestors:
2866 2866 # TODO: this whole if-else might be simplified much more
2867 2867 ms = mergemod.mergestate.read(self)
2868 2868 if (
2869 2869 fname in ms
2870 2870 and ms[fname] == mergemod.MERGE_RECORD_MERGED_OTHER
2871 2871 ):
2872 2872 fparent1, fparent2 = fparent2, nullid
2873 2873
2874 2874 # is the file changed?
2875 2875 text = fctx.data()
2876 2876 if fparent2 != nullid or flog.cmp(fparent1, text) or meta:
2877 2877 changelist.append(fname)
2878 2878 return flog.add(text, meta, tr, linkrev, fparent1, fparent2)
2879 2879 # are just the flags changed during merge?
2880 2880 elif fname in manifest1 and manifest1.flags(fname) != fctx.flags():
2881 2881 changelist.append(fname)
2882 2882
2883 2883 return fparent1
2884 2884
2885 2885 def checkcommitpatterns(self, wctx, match, status, fail):
2886 2886 """check for commit arguments that aren't committable"""
2887 2887 if match.isexact() or match.prefix():
2888 2888 matched = set(status.modified + status.added + status.removed)
2889 2889
2890 2890 for f in match.files():
2891 2891 f = self.dirstate.normalize(f)
2892 2892 if f == b'.' or f in matched or f in wctx.substate:
2893 2893 continue
2894 2894 if f in status.deleted:
2895 2895 fail(f, _(b'file not found!'))
2896 2896 # Is it a directory that exists or used to exist?
2897 2897 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
2898 2898 d = f + b'/'
2899 2899 for mf in matched:
2900 2900 if mf.startswith(d):
2901 2901 break
2902 2902 else:
2903 2903 fail(f, _(b"no match under directory!"))
2904 2904 elif f not in self.dirstate:
2905 2905 fail(f, _(b"file not tracked!"))
2906 2906
2907 2907 @unfilteredmethod
2908 2908 def commit(
2909 2909 self,
2910 2910 text=b"",
2911 2911 user=None,
2912 2912 date=None,
2913 2913 match=None,
2914 2914 force=False,
2915 2915 editor=None,
2916 2916 extra=None,
2917 2917 ):
2918 2918 """Add a new revision to current repository.
2919 2919
2920 2920 Revision information is gathered from the working directory,
2921 2921 match can be used to filter the committed files. If editor is
2922 2922 supplied, it is called to get a commit message.
2923 2923 """
2924 2924 if extra is None:
2925 2925 extra = {}
2926 2926
2927 2927 def fail(f, msg):
2928 2928 raise error.Abort(b'%s: %s' % (f, msg))
2929 2929
2930 2930 if not match:
2931 2931 match = matchmod.always()
2932 2932
2933 2933 if not force:
2934 2934 match.bad = fail
2935 2935
2936 2936 # lock() for recent changelog (see issue4368)
2937 2937 with self.wlock(), self.lock():
2938 2938 wctx = self[None]
2939 2939 merge = len(wctx.parents()) > 1
2940 2940
2941 2941 if not force and merge and not match.always():
2942 2942 raise error.Abort(
2943 2943 _(
2944 2944 b'cannot partially commit a merge '
2945 2945 b'(do not specify files or patterns)'
2946 2946 )
2947 2947 )
2948 2948
2949 2949 status = self.status(match=match, clean=force)
2950 2950 if force:
2951 2951 status.modified.extend(
2952 2952 status.clean
2953 2953 ) # mq may commit clean files
2954 2954
2955 2955 # check subrepos
2956 2956 subs, commitsubs, newstate = subrepoutil.precommit(
2957 2957 self.ui, wctx, status, match, force=force
2958 2958 )
2959 2959
2960 2960 # make sure all explicit patterns are matched
2961 2961 if not force:
2962 2962 self.checkcommitpatterns(wctx, match, status, fail)
2963 2963
2964 2964 cctx = context.workingcommitctx(
2965 2965 self, status, text, user, date, extra
2966 2966 )
2967 2967
2968 2968 ms = mergemod.mergestate.read(self)
2969 2969 mergeutil.checkunresolved(ms)
2970 2970
2971 2971 # internal config: ui.allowemptycommit
2972 2972 allowemptycommit = (
2973 2973 wctx.branch() != wctx.p1().branch()
2974 2974 or extra.get(b'close')
2975 2975 or merge
2976 2976 or cctx.files()
2977 2977 or self.ui.configbool(b'ui', b'allowemptycommit')
2978 2978 )
2979 2979 if not allowemptycommit:
2980 2980 self.ui.debug(b'nothing to commit, clearing merge state\n')
2981 2981 ms.reset()
2982 2982 return None
2983 2983
2984 2984 if merge and cctx.deleted():
2985 2985 raise error.Abort(_(b"cannot commit merge with missing files"))
2986 2986
2987 2987 if editor:
2988 2988 cctx._text = editor(self, cctx, subs)
2989 2989 edited = text != cctx._text
2990 2990
2991 2991 # Save commit message in case this transaction gets rolled back
2992 2992 # (e.g. by a pretxncommit hook). Leave the content alone on
2993 2993 # the assumption that the user will use the same editor again.
2994 2994 msgfn = self.savecommitmessage(cctx._text)
2995 2995
2996 2996 # commit subs and write new state
2997 2997 if subs:
2998 2998 uipathfn = scmutil.getuipathfn(self)
2999 2999 for s in sorted(commitsubs):
3000 3000 sub = wctx.sub(s)
3001 3001 self.ui.status(
3002 3002 _(b'committing subrepository %s\n')
3003 3003 % uipathfn(subrepoutil.subrelpath(sub))
3004 3004 )
3005 3005 sr = sub.commit(cctx._text, user, date)
3006 3006 newstate[s] = (newstate[s][0], sr)
3007 3007 subrepoutil.writestate(self, newstate)
3008 3008
3009 3009 p1, p2 = self.dirstate.parents()
3010 3010 hookp1, hookp2 = hex(p1), (p2 != nullid and hex(p2) or b'')
3011 3011 try:
3012 3012 self.hook(
3013 3013 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3014 3014 )
3015 3015 with self.transaction(b'commit'):
3016 3016 ret = self.commitctx(cctx, True)
3017 3017 # update bookmarks, dirstate and mergestate
3018 3018 bookmarks.update(self, [p1, p2], ret)
3019 3019 cctx.markcommitted(ret)
3020 3020 ms.reset()
3021 3021 except: # re-raises
3022 3022 if edited:
3023 3023 self.ui.write(
3024 3024 _(b'note: commit message saved in %s\n') % msgfn
3025 3025 )
3026 3026 self.ui.write(
3027 3027 _(
3028 3028 b"note: use 'hg commit --logfile "
3029 3029 b".hg/last-message.txt --edit' to reuse it\n"
3030 3030 )
3031 3031 )
3032 3032 raise
3033 3033
3034 3034 def commithook(unused_success):
3035 3035 # hack for command that use a temporary commit (eg: histedit)
3036 3036 # temporary commit got stripped before hook release
3037 3037 if self.changelog.hasnode(ret):
3038 3038 self.hook(
3039 3039 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3040 3040 )
3041 3041
3042 3042 self._afterlock(commithook)
3043 3043 return ret
3044 3044
3045 3045 @unfilteredmethod
3046 3046 def commitctx(self, ctx, error=False, origctx=None):
3047 3047 """Add a new revision to current repository.
3048 3048 Revision information is passed via the context argument.
3049 3049
3050 3050 ctx.files() should list all files involved in this commit, i.e.
3051 3051 modified/added/removed files. On merge, it may be wider than the
3052 3052 ctx.files() to be committed, since any file nodes derived directly
3053 3053 from p1 or p2 are excluded from the committed ctx.files().
3054 3054
3055 3055 origctx is for convert to work around the problem that bug
3056 3056 fixes to the files list in changesets change hashes. For
3057 3057 convert to be the identity, it can pass an origctx and this
3058 3058 function will use the same files list when it makes sense to
3059 3059 do so.
3060 3060 """
3061 3061
3062 3062 p1, p2 = ctx.p1(), ctx.p2()
3063 3063 user = ctx.user()
3064 3064
3065 3065 if self.filecopiesmode == b'changeset-sidedata':
3066 3066 writechangesetcopy = True
3067 3067 writefilecopymeta = True
3068 3068 writecopiesto = None
3069 3069 else:
3070 3070 writecopiesto = self.ui.config(b'experimental', b'copies.write-to')
3071 3071 writefilecopymeta = writecopiesto != b'changeset-only'
3072 3072 writechangesetcopy = writecopiesto in (
3073 3073 b'changeset-only',
3074 3074 b'compatibility',
3075 3075 )
3076 3076 p1copies, p2copies = None, None
3077 3077 if writechangesetcopy:
3078 3078 p1copies = ctx.p1copies()
3079 3079 p2copies = ctx.p2copies()
3080 3080 filesadded, filesremoved = None, None
3081 3081 with self.lock(), self.transaction(b"commit") as tr:
3082 3082 trp = weakref.proxy(tr)
3083 3083
3084 3084 if ctx.manifestnode():
3085 3085 # reuse an existing manifest revision
3086 3086 self.ui.debug(b'reusing known manifest\n')
3087 3087 mn = ctx.manifestnode()
3088 3088 files = ctx.files()
3089 3089 if writechangesetcopy:
3090 3090 filesadded = ctx.filesadded()
3091 3091 filesremoved = ctx.filesremoved()
3092 3092 elif ctx.files():
3093 3093 m1ctx = p1.manifestctx()
3094 3094 m2ctx = p2.manifestctx()
3095 3095 mctx = m1ctx.copy()
3096 3096
3097 3097 m = mctx.read()
3098 3098 m1 = m1ctx.read()
3099 3099 m2 = m2ctx.read()
3100 3100
3101 3101 # check in files
3102 3102 added = []
3103 3103 changed = []
3104 3104 removed = list(ctx.removed())
3105 3105 linkrev = len(self)
3106 3106 self.ui.note(_(b"committing files:\n"))
3107 3107 uipathfn = scmutil.getuipathfn(self)
3108 3108 for f in sorted(ctx.modified() + ctx.added()):
3109 3109 self.ui.note(uipathfn(f) + b"\n")
3110 3110 try:
3111 3111 fctx = ctx[f]
3112 3112 if fctx is None:
3113 3113 removed.append(f)
3114 3114 else:
3115 3115 added.append(f)
3116 3116 m[f] = self._filecommit(
3117 3117 fctx,
3118 3118 m1,
3119 3119 m2,
3120 3120 linkrev,
3121 3121 trp,
3122 3122 changed,
3123 3123 writefilecopymeta,
3124 3124 )
3125 3125 m.setflag(f, fctx.flags())
3126 3126 except OSError:
3127 3127 self.ui.warn(
3128 3128 _(b"trouble committing %s!\n") % uipathfn(f)
3129 3129 )
3130 3130 raise
3131 3131 except IOError as inst:
3132 3132 errcode = getattr(inst, 'errno', errno.ENOENT)
3133 3133 if error or errcode and errcode != errno.ENOENT:
3134 3134 self.ui.warn(
3135 3135 _(b"trouble committing %s!\n") % uipathfn(f)
3136 3136 )
3137 3137 raise
3138 3138
3139 3139 # update manifest
3140 3140 removed = [f for f in removed if f in m1 or f in m2]
3141 3141 drop = sorted([f for f in removed if f in m])
3142 3142 for f in drop:
3143 3143 del m[f]
3144 3144 if p2.rev() != nullrev:
3145 3145
3146 3146 @util.cachefunc
3147 3147 def mas():
3148 3148 p1n = p1.node()
3149 3149 p2n = p2.node()
3150 3150 cahs = self.changelog.commonancestorsheads(p1n, p2n)
3151 3151 if not cahs:
3152 3152 cahs = [nullrev]
3153 3153 return [self[r].manifest() for r in cahs]
3154 3154
3155 3155 def deletionfromparent(f):
3156 3156 # When a file is removed relative to p1 in a merge, this
3157 3157 # function determines whether the absence is due to a
3158 3158 # deletion from a parent, or whether the merge commit
3159 3159 # itself deletes the file. We decide this by doing a
3160 3160 # simplified three way merge of the manifest entry for
3161 3161 # the file. There are two ways we decide the merge
3162 3162 # itself didn't delete a file:
3163 3163 # - neither parent (nor the merge) contain the file
3164 3164 # - exactly one parent contains the file, and that
3165 3165 # parent has the same filelog entry as the merge
3166 3166 # ancestor (or all of them if there two). In other
3167 3167 # words, that parent left the file unchanged while the
3168 3168 # other one deleted it.
3169 3169 # One way to think about this is that deleting a file is
3170 3170 # similar to emptying it, so the list of changed files
3171 3171 # should be similar either way. The computation
3172 3172 # described above is not done directly in _filecommit
3173 3173 # when creating the list of changed files, however
3174 3174 # it does something very similar by comparing filelog
3175 3175 # nodes.
3176 3176 if f in m1:
3177 3177 return f not in m2 and all(
3178 3178 f in ma and ma.find(f) == m1.find(f)
3179 3179 for ma in mas()
3180 3180 )
3181 3181 elif f in m2:
3182 3182 return all(
3183 3183 f in ma and ma.find(f) == m2.find(f)
3184 3184 for ma in mas()
3185 3185 )
3186 3186 else:
3187 3187 return True
3188 3188
3189 3189 removed = [f for f in removed if not deletionfromparent(f)]
3190 3190
3191 3191 files = changed + removed
3192 3192 md = None
3193 3193 if not files:
3194 3194 # if no "files" actually changed in terms of the changelog,
3195 3195 # try hard to detect unmodified manifest entry so that the
3196 3196 # exact same commit can be reproduced later on convert.
3197 3197 md = m1.diff(m, scmutil.matchfiles(self, ctx.files()))
3198 3198 if not files and md:
3199 3199 self.ui.debug(
3200 3200 b'not reusing manifest (no file change in '
3201 3201 b'changelog, but manifest differs)\n'
3202 3202 )
3203 3203 if files or md:
3204 3204 self.ui.note(_(b"committing manifest\n"))
3205 3205 # we're using narrowmatch here since it's already applied at
3206 3206 # other stages (such as dirstate.walk), so we're already
3207 3207 # ignoring things outside of narrowspec in most cases. The
3208 3208 # one case where we might have files outside the narrowspec
3209 3209 # at this point is merges, and we already error out in the
3210 3210 # case where the merge has files outside of the narrowspec,
3211 3211 # so this is safe.
3212 3212 mn = mctx.write(
3213 3213 trp,
3214 3214 linkrev,
3215 3215 p1.manifestnode(),
3216 3216 p2.manifestnode(),
3217 3217 added,
3218 3218 drop,
3219 3219 match=self.narrowmatch(),
3220 3220 )
3221 3221
3222 3222 if writechangesetcopy:
3223 3223 filesadded = [
3224 3224 f for f in changed if not (f in m1 or f in m2)
3225 3225 ]
3226 3226 filesremoved = removed
3227 3227 else:
3228 3228 self.ui.debug(
3229 3229 b'reusing manifest from p1 (listed files '
3230 3230 b'actually unchanged)\n'
3231 3231 )
3232 3232 mn = p1.manifestnode()
3233 3233 else:
3234 3234 self.ui.debug(b'reusing manifest from p1 (no file change)\n')
3235 3235 mn = p1.manifestnode()
3236 3236 files = []
3237 3237
3238 3238 if writecopiesto == b'changeset-only':
3239 3239 # If writing only to changeset extras, use None to indicate that
3240 3240 # no entry should be written. If writing to both, write an empty
3241 3241 # entry to prevent the reader from falling back to reading
3242 3242 # filelogs.
3243 3243 p1copies = p1copies or None
3244 3244 p2copies = p2copies or None
3245 3245 filesadded = filesadded or None
3246 3246 filesremoved = filesremoved or None
3247 3247
3248 3248 if origctx and origctx.manifestnode() == mn:
3249 3249 files = origctx.files()
3250 3250
3251 3251 # update changelog
3252 3252 self.ui.note(_(b"committing changelog\n"))
3253 3253 self.changelog.delayupdate(tr)
3254 3254 n = self.changelog.add(
3255 3255 mn,
3256 3256 files,
3257 3257 ctx.description(),
3258 3258 trp,
3259 3259 p1.node(),
3260 3260 p2.node(),
3261 3261 user,
3262 3262 ctx.date(),
3263 3263 ctx.extra().copy(),
3264 3264 p1copies,
3265 3265 p2copies,
3266 3266 filesadded,
3267 3267 filesremoved,
3268 3268 )
3269 3269 xp1, xp2 = p1.hex(), p2 and p2.hex() or b''
3270 3270 self.hook(
3271 3271 b'pretxncommit',
3272 3272 throw=True,
3273 3273 node=hex(n),
3274 3274 parent1=xp1,
3275 3275 parent2=xp2,
3276 3276 )
3277 3277 # set the new commit is proper phase
3278 3278 targetphase = subrepoutil.newcommitphase(self.ui, ctx)
3279 3279 if targetphase:
3280 3280 # retract boundary do not alter parent changeset.
3281 3281 # if a parent have higher the resulting phase will
3282 3282 # be compliant anyway
3283 3283 #
3284 3284 # if minimal phase was 0 we don't need to retract anything
3285 3285 phases.registernew(self, tr, targetphase, [n])
3286 3286 return n
3287 3287
3288 3288 @unfilteredmethod
3289 3289 def destroying(self):
3290 3290 '''Inform the repository that nodes are about to be destroyed.
3291 3291 Intended for use by strip and rollback, so there's a common
3292 3292 place for anything that has to be done before destroying history.
3293 3293
3294 3294 This is mostly useful for saving state that is in memory and waiting
3295 3295 to be flushed when the current lock is released. Because a call to
3296 3296 destroyed is imminent, the repo will be invalidated causing those
3297 3297 changes to stay in memory (waiting for the next unlock), or vanish
3298 3298 completely.
3299 3299 '''
3300 3300 # When using the same lock to commit and strip, the phasecache is left
3301 3301 # dirty after committing. Then when we strip, the repo is invalidated,
3302 3302 # causing those changes to disappear.
3303 3303 if '_phasecache' in vars(self):
3304 3304 self._phasecache.write()
3305 3305
3306 3306 @unfilteredmethod
3307 3307 def destroyed(self):
3308 3308 '''Inform the repository that nodes have been destroyed.
3309 3309 Intended for use by strip and rollback, so there's a common
3310 3310 place for anything that has to be done after destroying history.
3311 3311 '''
3312 3312 # When one tries to:
3313 3313 # 1) destroy nodes thus calling this method (e.g. strip)
3314 3314 # 2) use phasecache somewhere (e.g. commit)
3315 3315 #
3316 3316 # then 2) will fail because the phasecache contains nodes that were
3317 3317 # removed. We can either remove phasecache from the filecache,
3318 3318 # causing it to reload next time it is accessed, or simply filter
3319 3319 # the removed nodes now and write the updated cache.
3320 3320 self._phasecache.filterunknown(self)
3321 3321 self._phasecache.write()
3322 3322
3323 3323 # refresh all repository caches
3324 3324 self.updatecaches()
3325 3325
3326 3326 # Ensure the persistent tag cache is updated. Doing it now
3327 3327 # means that the tag cache only has to worry about destroyed
3328 3328 # heads immediately after a strip/rollback. That in turn
3329 3329 # guarantees that "cachetip == currenttip" (comparing both rev
3330 3330 # and node) always means no nodes have been added or destroyed.
3331 3331
3332 3332 # XXX this is suboptimal when qrefresh'ing: we strip the current
3333 3333 # head, refresh the tag cache, then immediately add a new head.
3334 3334 # But I think doing it this way is necessary for the "instant
3335 3335 # tag cache retrieval" case to work.
3336 3336 self.invalidate()
3337 3337
3338 3338 def status(
3339 3339 self,
3340 3340 node1=b'.',
3341 3341 node2=None,
3342 3342 match=None,
3343 3343 ignored=False,
3344 3344 clean=False,
3345 3345 unknown=False,
3346 3346 listsubrepos=False,
3347 3347 ):
3348 3348 '''a convenience method that calls node1.status(node2)'''
3349 3349 return self[node1].status(
3350 3350 node2, match, ignored, clean, unknown, listsubrepos
3351 3351 )
3352 3352
3353 3353 def addpostdsstatus(self, ps):
3354 3354 """Add a callback to run within the wlock, at the point at which status
3355 3355 fixups happen.
3356 3356
3357 3357 On status completion, callback(wctx, status) will be called with the
3358 3358 wlock held, unless the dirstate has changed from underneath or the wlock
3359 3359 couldn't be grabbed.
3360 3360
3361 3361 Callbacks should not capture and use a cached copy of the dirstate --
3362 3362 it might change in the meanwhile. Instead, they should access the
3363 3363 dirstate via wctx.repo().dirstate.
3364 3364
3365 3365 This list is emptied out after each status run -- extensions should
3366 3366 make sure it adds to this list each time dirstate.status is called.
3367 3367 Extensions should also make sure they don't call this for statuses
3368 3368 that don't involve the dirstate.
3369 3369 """
3370 3370
3371 3371 # The list is located here for uniqueness reasons -- it is actually
3372 3372 # managed by the workingctx, but that isn't unique per-repo.
3373 3373 self._postdsstatus.append(ps)
3374 3374
3375 3375 def postdsstatus(self):
3376 3376 """Used by workingctx to get the list of post-dirstate-status hooks."""
3377 3377 return self._postdsstatus
3378 3378
3379 3379 def clearpostdsstatus(self):
3380 3380 """Used by workingctx to clear post-dirstate-status hooks."""
3381 3381 del self._postdsstatus[:]
3382 3382
3383 3383 def heads(self, start=None):
3384 3384 if start is None:
3385 3385 cl = self.changelog
3386 3386 headrevs = reversed(cl.headrevs())
3387 3387 return [cl.node(rev) for rev in headrevs]
3388 3388
3389 3389 heads = self.changelog.heads(start)
3390 3390 # sort the output in rev descending order
3391 3391 return sorted(heads, key=self.changelog.rev, reverse=True)
3392 3392
3393 3393 def branchheads(self, branch=None, start=None, closed=False):
3394 3394 '''return a (possibly filtered) list of heads for the given branch
3395 3395
3396 3396 Heads are returned in topological order, from newest to oldest.
3397 3397 If branch is None, use the dirstate branch.
3398 3398 If start is not None, return only heads reachable from start.
3399 3399 If closed is True, return heads that are marked as closed as well.
3400 3400 '''
3401 3401 if branch is None:
3402 3402 branch = self[None].branch()
3403 3403 branches = self.branchmap()
3404 3404 if not branches.hasbranch(branch):
3405 3405 return []
3406 3406 # the cache returns heads ordered lowest to highest
3407 3407 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3408 3408 if start is not None:
3409 3409 # filter out the heads that cannot be reached from startrev
3410 3410 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3411 3411 bheads = [h for h in bheads if h in fbheads]
3412 3412 return bheads
3413 3413
3414 3414 def branches(self, nodes):
3415 3415 if not nodes:
3416 3416 nodes = [self.changelog.tip()]
3417 3417 b = []
3418 3418 for n in nodes:
3419 3419 t = n
3420 3420 while True:
3421 3421 p = self.changelog.parents(n)
3422 3422 if p[1] != nullid or p[0] == nullid:
3423 3423 b.append((t, n, p[0], p[1]))
3424 3424 break
3425 3425 n = p[0]
3426 3426 return b
3427 3427
3428 3428 def between(self, pairs):
3429 3429 r = []
3430 3430
3431 3431 for top, bottom in pairs:
3432 3432 n, l, i = top, [], 0
3433 3433 f = 1
3434 3434
3435 3435 while n != bottom and n != nullid:
3436 3436 p = self.changelog.parents(n)[0]
3437 3437 if i == f:
3438 3438 l.append(n)
3439 3439 f = f * 2
3440 3440 n = p
3441 3441 i += 1
3442 3442
3443 3443 r.append(l)
3444 3444
3445 3445 return r
3446 3446
3447 3447 def checkpush(self, pushop):
3448 3448 """Extensions can override this function if additional checks have
3449 3449 to be performed before pushing, or call it if they override push
3450 3450 command.
3451 3451 """
3452 3452
3453 3453 @unfilteredpropertycache
3454 3454 def prepushoutgoinghooks(self):
3455 3455 """Return util.hooks consists of a pushop with repo, remote, outgoing
3456 3456 methods, which are called before pushing changesets.
3457 3457 """
3458 3458 return util.hooks()
3459 3459
3460 3460 def pushkey(self, namespace, key, old, new):
3461 3461 try:
3462 3462 tr = self.currenttransaction()
3463 3463 hookargs = {}
3464 3464 if tr is not None:
3465 3465 hookargs.update(tr.hookargs)
3466 3466 hookargs = pycompat.strkwargs(hookargs)
3467 3467 hookargs['namespace'] = namespace
3468 3468 hookargs['key'] = key
3469 3469 hookargs['old'] = old
3470 3470 hookargs['new'] = new
3471 3471 self.hook(b'prepushkey', throw=True, **hookargs)
3472 3472 except error.HookAbort as exc:
3473 3473 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3474 3474 if exc.hint:
3475 3475 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3476 3476 return False
3477 3477 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3478 3478 ret = pushkey.push(self, namespace, key, old, new)
3479 3479
3480 3480 def runhook(unused_success):
3481 3481 self.hook(
3482 3482 b'pushkey',
3483 3483 namespace=namespace,
3484 3484 key=key,
3485 3485 old=old,
3486 3486 new=new,
3487 3487 ret=ret,
3488 3488 )
3489 3489
3490 3490 self._afterlock(runhook)
3491 3491 return ret
3492 3492
3493 3493 def listkeys(self, namespace):
3494 3494 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3495 3495 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3496 3496 values = pushkey.list(self, namespace)
3497 3497 self.hook(b'listkeys', namespace=namespace, values=values)
3498 3498 return values
3499 3499
3500 3500 def debugwireargs(self, one, two, three=None, four=None, five=None):
3501 3501 '''used to test argument passing over the wire'''
3502 3502 return b"%s %s %s %s %s" % (
3503 3503 one,
3504 3504 two,
3505 3505 pycompat.bytestr(three),
3506 3506 pycompat.bytestr(four),
3507 3507 pycompat.bytestr(five),
3508 3508 )
3509 3509
3510 3510 def savecommitmessage(self, text):
3511 3511 fp = self.vfs(b'last-message.txt', b'wb')
3512 3512 try:
3513 3513 fp.write(text)
3514 3514 finally:
3515 3515 fp.close()
3516 3516 return self.pathto(fp.name[len(self.root) + 1 :])
3517 3517
3518 3518
3519 3519 # used to avoid circular references so destructors work
3520 3520 def aftertrans(files):
3521 3521 renamefiles = [tuple(t) for t in files]
3522 3522
3523 3523 def a():
3524 3524 for vfs, src, dest in renamefiles:
3525 3525 # if src and dest refer to a same file, vfs.rename is a no-op,
3526 3526 # leaving both src and dest on disk. delete dest to make sure
3527 3527 # the rename couldn't be such a no-op.
3528 3528 vfs.tryunlink(dest)
3529 3529 try:
3530 3530 vfs.rename(src, dest)
3531 3531 except OSError: # journal file does not yet exist
3532 3532 pass
3533 3533
3534 3534 return a
3535 3535
3536 3536
3537 3537 def undoname(fn):
3538 3538 base, name = os.path.split(fn)
3539 3539 assert name.startswith(b'journal')
3540 3540 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3541 3541
3542 3542
3543 3543 def instance(ui, path, create, intents=None, createopts=None):
3544 3544 localpath = util.urllocalpath(path)
3545 3545 if create:
3546 3546 createrepository(ui, localpath, createopts=createopts)
3547 3547
3548 3548 return makelocalrepository(ui, localpath, intents=intents)
3549 3549
3550 3550
3551 3551 def islocal(path):
3552 3552 return True
3553 3553
3554 3554
3555 3555 def defaultcreateopts(ui, createopts=None):
3556 3556 """Populate the default creation options for a repository.
3557 3557
3558 3558 A dictionary of explicitly requested creation options can be passed
3559 3559 in. Missing keys will be populated.
3560 3560 """
3561 3561 createopts = dict(createopts or {})
3562 3562
3563 3563 if b'backend' not in createopts:
3564 3564 # experimental config: storage.new-repo-backend
3565 3565 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3566 3566
3567 3567 return createopts
3568 3568
3569 3569
3570 3570 def newreporequirements(ui, createopts):
3571 3571 """Determine the set of requirements for a new local repository.
3572 3572
3573 3573 Extensions can wrap this function to specify custom requirements for
3574 3574 new repositories.
3575 3575 """
3576 3576 # If the repo is being created from a shared repository, we copy
3577 3577 # its requirements.
3578 3578 if b'sharedrepo' in createopts:
3579 3579 requirements = set(createopts[b'sharedrepo'].requirements)
3580 3580 if createopts.get(b'sharedrelative'):
3581 3581 requirements.add(b'relshared')
3582 3582 else:
3583 3583 requirements.add(b'shared')
3584 3584
3585 3585 return requirements
3586 3586
3587 3587 if b'backend' not in createopts:
3588 3588 raise error.ProgrammingError(
3589 3589 b'backend key not present in createopts; '
3590 3590 b'was defaultcreateopts() called?'
3591 3591 )
3592 3592
3593 3593 if createopts[b'backend'] != b'revlogv1':
3594 3594 raise error.Abort(
3595 3595 _(
3596 3596 b'unable to determine repository requirements for '
3597 3597 b'storage backend: %s'
3598 3598 )
3599 3599 % createopts[b'backend']
3600 3600 )
3601 3601
3602 3602 requirements = {b'revlogv1'}
3603 3603 if ui.configbool(b'format', b'usestore'):
3604 3604 requirements.add(b'store')
3605 3605 if ui.configbool(b'format', b'usefncache'):
3606 3606 requirements.add(b'fncache')
3607 3607 if ui.configbool(b'format', b'dotencode'):
3608 3608 requirements.add(b'dotencode')
3609 3609
3610 3610 compengines = ui.configlist(b'format', b'revlog-compression')
3611 3611 for compengine in compengines:
3612 3612 if compengine in util.compengines:
3613 3613 break
3614 3614 else:
3615 3615 raise error.Abort(
3616 3616 _(
3617 3617 b'compression engines %s defined by '
3618 3618 b'format.revlog-compression not available'
3619 3619 )
3620 3620 % b', '.join(b'"%s"' % e for e in compengines),
3621 3621 hint=_(
3622 3622 b'run "hg debuginstall" to list available '
3623 3623 b'compression engines'
3624 3624 ),
3625 3625 )
3626 3626
3627 3627 # zlib is the historical default and doesn't need an explicit requirement.
3628 3628 if compengine == b'zstd':
3629 3629 requirements.add(b'revlog-compression-zstd')
3630 3630 elif compengine != b'zlib':
3631 3631 requirements.add(b'exp-compression-%s' % compengine)
3632 3632
3633 3633 if scmutil.gdinitconfig(ui):
3634 3634 requirements.add(b'generaldelta')
3635 3635 if ui.configbool(b'format', b'sparse-revlog'):
3636 3636 requirements.add(SPARSEREVLOG_REQUIREMENT)
3637 3637
3638 3638 # experimental config: format.exp-use-side-data
3639 3639 if ui.configbool(b'format', b'exp-use-side-data'):
3640 3640 requirements.add(SIDEDATA_REQUIREMENT)
3641 3641 # experimental config: format.exp-use-copies-side-data-changeset
3642 3642 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3643 3643 requirements.add(SIDEDATA_REQUIREMENT)
3644 3644 requirements.add(COPIESSDC_REQUIREMENT)
3645 3645 if ui.configbool(b'experimental', b'treemanifest'):
3646 3646 requirements.add(b'treemanifest')
3647 3647
3648 3648 revlogv2 = ui.config(b'experimental', b'revlogv2')
3649 3649 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3650 3650 requirements.remove(b'revlogv1')
3651 3651 # generaldelta is implied by revlogv2.
3652 3652 requirements.discard(b'generaldelta')
3653 3653 requirements.add(REVLOGV2_REQUIREMENT)
3654 3654 # experimental config: format.internal-phase
3655 3655 if ui.configbool(b'format', b'internal-phase'):
3656 3656 requirements.add(b'internal-phase')
3657 3657
3658 3658 if createopts.get(b'narrowfiles'):
3659 3659 requirements.add(repository.NARROW_REQUIREMENT)
3660 3660
3661 3661 if createopts.get(b'lfs'):
3662 3662 requirements.add(b'lfs')
3663 3663
3664 3664 if ui.configbool(b'format', b'bookmarks-in-store'):
3665 3665 requirements.add(bookmarks.BOOKMARKS_IN_STORE_REQUIREMENT)
3666 3666
3667 3667 if ui.configbool(b'experimental', b'exp-persistent-nodemap'):
3668 3668 requirements.add(NODEMAP_REQUIREMENT)
3669 3669
3670 3670 return requirements
3671 3671
3672 3672
3673 3673 def filterknowncreateopts(ui, createopts):
3674 3674 """Filters a dict of repo creation options against options that are known.
3675 3675
3676 3676 Receives a dict of repo creation options and returns a dict of those
3677 3677 options that we don't know how to handle.
3678 3678
3679 3679 This function is called as part of repository creation. If the
3680 3680 returned dict contains any items, repository creation will not
3681 3681 be allowed, as it means there was a request to create a repository
3682 3682 with options not recognized by loaded code.
3683 3683
3684 3684 Extensions can wrap this function to filter out creation options
3685 3685 they know how to handle.
3686 3686 """
3687 3687 known = {
3688 3688 b'backend',
3689 3689 b'lfs',
3690 3690 b'narrowfiles',
3691 3691 b'sharedrepo',
3692 3692 b'sharedrelative',
3693 3693 b'shareditems',
3694 3694 b'shallowfilestore',
3695 3695 }
3696 3696
3697 3697 return {k: v for k, v in createopts.items() if k not in known}
3698 3698
3699 3699
3700 3700 def createrepository(ui, path, createopts=None):
3701 3701 """Create a new repository in a vfs.
3702 3702
3703 3703 ``path`` path to the new repo's working directory.
3704 3704 ``createopts`` options for the new repository.
3705 3705
3706 3706 The following keys for ``createopts`` are recognized:
3707 3707
3708 3708 backend
3709 3709 The storage backend to use.
3710 3710 lfs
3711 3711 Repository will be created with ``lfs`` requirement. The lfs extension
3712 3712 will automatically be loaded when the repository is accessed.
3713 3713 narrowfiles
3714 3714 Set up repository to support narrow file storage.
3715 3715 sharedrepo
3716 3716 Repository object from which storage should be shared.
3717 3717 sharedrelative
3718 3718 Boolean indicating if the path to the shared repo should be
3719 3719 stored as relative. By default, the pointer to the "parent" repo
3720 3720 is stored as an absolute path.
3721 3721 shareditems
3722 3722 Set of items to share to the new repository (in addition to storage).
3723 3723 shallowfilestore
3724 3724 Indicates that storage for files should be shallow (not all ancestor
3725 3725 revisions are known).
3726 3726 """
3727 3727 createopts = defaultcreateopts(ui, createopts=createopts)
3728 3728
3729 3729 unknownopts = filterknowncreateopts(ui, createopts)
3730 3730
3731 3731 if not isinstance(unknownopts, dict):
3732 3732 raise error.ProgrammingError(
3733 3733 b'filterknowncreateopts() did not return a dict'
3734 3734 )
3735 3735
3736 3736 if unknownopts:
3737 3737 raise error.Abort(
3738 3738 _(
3739 3739 b'unable to create repository because of unknown '
3740 3740 b'creation option: %s'
3741 3741 )
3742 3742 % b', '.join(sorted(unknownopts)),
3743 3743 hint=_(b'is a required extension not loaded?'),
3744 3744 )
3745 3745
3746 3746 requirements = newreporequirements(ui, createopts=createopts)
3747 3747
3748 3748 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3749 3749
3750 3750 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3751 3751 if hgvfs.exists():
3752 3752 raise error.RepoError(_(b'repository %s already exists') % path)
3753 3753
3754 3754 if b'sharedrepo' in createopts:
3755 3755 sharedpath = createopts[b'sharedrepo'].sharedpath
3756 3756
3757 3757 if createopts.get(b'sharedrelative'):
3758 3758 try:
3759 3759 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3760 3760 except (IOError, ValueError) as e:
3761 3761 # ValueError is raised on Windows if the drive letters differ
3762 3762 # on each path.
3763 3763 raise error.Abort(
3764 3764 _(b'cannot calculate relative path'),
3765 3765 hint=stringutil.forcebytestr(e),
3766 3766 )
3767 3767
3768 3768 if not wdirvfs.exists():
3769 3769 wdirvfs.makedirs()
3770 3770
3771 3771 hgvfs.makedir(notindexed=True)
3772 3772 if b'sharedrepo' not in createopts:
3773 3773 hgvfs.mkdir(b'cache')
3774 3774 hgvfs.mkdir(b'wcache')
3775 3775
3776 3776 if b'store' in requirements and b'sharedrepo' not in createopts:
3777 3777 hgvfs.mkdir(b'store')
3778 3778
3779 3779 # We create an invalid changelog outside the store so very old
3780 3780 # Mercurial versions (which didn't know about the requirements
3781 3781 # file) encounter an error on reading the changelog. This
3782 3782 # effectively locks out old clients and prevents them from
3783 3783 # mucking with a repo in an unknown format.
3784 3784 #
3785 3785 # The revlog header has version 2, which won't be recognized by
3786 3786 # such old clients.
3787 3787 hgvfs.append(
3788 3788 b'00changelog.i',
3789 3789 b'\0\0\0\2 dummy changelog to prevent using the old repo '
3790 3790 b'layout',
3791 3791 )
3792 3792
3793 3793 scmutil.writerequires(hgvfs, requirements)
3794 3794
3795 3795 # Write out file telling readers where to find the shared store.
3796 3796 if b'sharedrepo' in createopts:
3797 3797 hgvfs.write(b'sharedpath', sharedpath)
3798 3798
3799 3799 if createopts.get(b'shareditems'):
3800 3800 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
3801 3801 hgvfs.write(b'shared', shared)
3802 3802
3803 3803
3804 3804 def poisonrepository(repo):
3805 3805 """Poison a repository instance so it can no longer be used."""
3806 3806 # Perform any cleanup on the instance.
3807 3807 repo.close()
3808 3808
3809 3809 # Our strategy is to replace the type of the object with one that
3810 3810 # has all attribute lookups result in error.
3811 3811 #
3812 3812 # But we have to allow the close() method because some constructors
3813 3813 # of repos call close() on repo references.
3814 3814 class poisonedrepository(object):
3815 3815 def __getattribute__(self, item):
3816 3816 if item == 'close':
3817 3817 return object.__getattribute__(self, item)
3818 3818
3819 3819 raise error.ProgrammingError(
3820 3820 b'repo instances should not be used after unshare'
3821 3821 )
3822 3822
3823 3823 def close(self):
3824 3824 pass
3825 3825
3826 3826 # We may have a repoview, which intercepts __setattr__. So be sure
3827 3827 # we operate at the lowest level possible.
3828 3828 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,2304 +1,2302 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 if 64 < nl:
62 62 # modern hash, full width
63 63 yield f, bin(n[:64]), n[64:]
64 64 elif 40 < nl < 45:
65 65 # legacy hash, always sha1
66 66 yield f, bin(n[:40]), n[40:]
67 67 else:
68 68 yield f, bin(n), b''
69 69
70 70
71 71 def _text(it):
72 72 files = []
73 73 lines = []
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return b''.join(lines)
82 82
83 83
84 84 class lazymanifestiter(object):
85 85 def __init__(self, lm):
86 86 self.pos = 0
87 87 self.lm = lm
88 88
89 89 def __iter__(self):
90 90 return self
91 91
92 92 def next(self):
93 93 try:
94 94 data, pos = self.lm._get(self.pos)
95 95 except IndexError:
96 96 raise StopIteration
97 97 if pos == -1:
98 98 self.pos += 1
99 99 return data[0]
100 100 self.pos += 1
101 101 zeropos = data.find(b'\x00', pos)
102 102 return data[pos:zeropos]
103 103
104 104 __next__ = next
105 105
106 106
107 107 class lazymanifestiterentries(object):
108 108 def __init__(self, lm):
109 109 self.lm = lm
110 110 self.pos = 0
111 111
112 112 def __iter__(self):
113 113 return self
114 114
115 115 def next(self):
116 116 try:
117 117 data, pos = self.lm._get(self.pos)
118 118 except IndexError:
119 119 raise StopIteration
120 120 if pos == -1:
121 121 self.pos += 1
122 122 return data
123 123 zeropos = data.find(b'\x00', pos)
124 124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
125 125 flags = self.lm._getflags(data, self.pos, zeropos)
126 126 self.pos += 1
127 127 return (data[pos:zeropos], hashval, flags)
128 128
129 129 __next__ = next
130 130
131 131
132 132 def unhexlify(data, extra, pos, length):
133 133 s = bin(data[pos : pos + length])
134 134 if extra:
135 135 s += chr(extra & 0xFF)
136 136 return s
137 137
138 138
139 139 def _cmp(a, b):
140 140 return (a > b) - (a < b)
141 141
142 142
143 143 class _lazymanifest(object):
144 144 """A pure python manifest backed by a byte string. It is supplimented with
145 145 internal lists as it is modified, until it is compacted back to a pure byte
146 146 string.
147 147
148 148 ``data`` is the initial manifest data.
149 149
150 150 ``positions`` is a list of offsets, one per manifest entry. Positive
151 151 values are offsets into ``data``, negative values are offsets into the
152 152 ``extradata`` list. When an entry is removed, its entry is dropped from
153 153 ``positions``. The values are encoded such that when walking the list and
154 154 indexing into ``data`` or ``extradata`` as appropriate, the entries are
155 155 sorted by filename.
156 156
157 157 ``extradata`` is a list of (key, hash, flags) for entries that were added or
158 158 modified since the manifest was created or compacted.
159 159 """
160 160
161 161 def __init__(
162 162 self,
163 163 data,
164 164 positions=None,
165 165 extrainfo=None,
166 166 extradata=None,
167 167 hasremovals=False,
168 168 ):
169 169 if positions is None:
170 170 self.positions = self.findlines(data)
171 171 self.extrainfo = [0] * len(self.positions)
172 172 self.data = data
173 173 self.extradata = []
174 174 self.hasremovals = False
175 175 else:
176 176 self.positions = positions[:]
177 177 self.extrainfo = extrainfo[:]
178 178 self.extradata = extradata[:]
179 179 self.data = data
180 180 self.hasremovals = hasremovals
181 181
182 182 def findlines(self, data):
183 183 if not data:
184 184 return []
185 185 pos = data.find(b"\n")
186 186 if pos == -1 or data[-1:] != b'\n':
187 187 raise ValueError(b"Manifest did not end in a newline.")
188 188 positions = [0]
189 189 prev = data[: data.find(b'\x00')]
190 190 while pos < len(data) - 1 and pos != -1:
191 191 positions.append(pos + 1)
192 192 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
193 193 if nexts < prev:
194 194 raise ValueError(b"Manifest lines not in sorted order.")
195 195 prev = nexts
196 196 pos = data.find(b"\n", pos + 1)
197 197 return positions
198 198
199 199 def _get(self, index):
200 200 # get the position encoded in pos:
201 201 # positive number is an index in 'data'
202 202 # negative number is in extrapieces
203 203 pos = self.positions[index]
204 204 if pos >= 0:
205 205 return self.data, pos
206 206 return self.extradata[-pos - 1], -1
207 207
208 208 def _getkey(self, pos):
209 209 if pos >= 0:
210 210 return self.data[pos : self.data.find(b'\x00', pos + 1)]
211 211 return self.extradata[-pos - 1][0]
212 212
213 213 def bsearch(self, key):
214 214 first = 0
215 215 last = len(self.positions) - 1
216 216
217 217 while first <= last:
218 218 midpoint = (first + last) // 2
219 219 nextpos = self.positions[midpoint]
220 220 candidate = self._getkey(nextpos)
221 221 r = _cmp(key, candidate)
222 222 if r == 0:
223 223 return midpoint
224 224 else:
225 225 if r < 0:
226 226 last = midpoint - 1
227 227 else:
228 228 first = midpoint + 1
229 229 return -1
230 230
231 231 def bsearch2(self, key):
232 232 # same as the above, but will always return the position
233 233 # done for performance reasons
234 234 first = 0
235 235 last = len(self.positions) - 1
236 236
237 237 while first <= last:
238 238 midpoint = (first + last) // 2
239 239 nextpos = self.positions[midpoint]
240 240 candidate = self._getkey(nextpos)
241 241 r = _cmp(key, candidate)
242 242 if r == 0:
243 243 return (midpoint, True)
244 244 else:
245 245 if r < 0:
246 246 last = midpoint - 1
247 247 else:
248 248 first = midpoint + 1
249 249 return (first, False)
250 250
251 251 def __contains__(self, key):
252 252 return self.bsearch(key) != -1
253 253
254 254 def _getflags(self, data, needle, pos):
255 255 start = pos + 41
256 256 end = data.find(b"\n", start)
257 257 if end == -1:
258 258 end = len(data) - 1
259 259 if start == end:
260 260 return b''
261 261 return self.data[start:end]
262 262
263 263 def __getitem__(self, key):
264 264 if not isinstance(key, bytes):
265 265 raise TypeError(b"getitem: manifest keys must be a bytes.")
266 266 needle = self.bsearch(key)
267 267 if needle == -1:
268 268 raise KeyError
269 269 data, pos = self._get(needle)
270 270 if pos == -1:
271 271 return (data[1], data[2])
272 272 zeropos = data.find(b'\x00', pos)
273 273 nlpos = data.find(b'\n', zeropos)
274 274 assert 0 <= needle <= len(self.positions)
275 275 assert len(self.extrainfo) == len(self.positions)
276 276 hlen = nlpos - zeropos - 1
277 277 # Hashes sometimes have an extra byte tucked on the end, so
278 278 # detect that.
279 279 if hlen % 2:
280 280 hlen -= 1
281 281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
282 282 flags = self._getflags(data, needle, zeropos)
283 283 return (hashval, flags)
284 284
285 285 def __delitem__(self, key):
286 286 needle, found = self.bsearch2(key)
287 287 if not found:
288 288 raise KeyError
289 289 cur = self.positions[needle]
290 290 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
291 291 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
292 292 if cur >= 0:
293 293 # This does NOT unsort the list as far as the search functions are
294 294 # concerned, as they only examine lines mapped by self.positions.
295 295 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
296 296 self.hasremovals = True
297 297
298 298 def __setitem__(self, key, value):
299 299 if not isinstance(key, bytes):
300 300 raise TypeError(b"setitem: manifest keys must be a byte string.")
301 301 if not isinstance(value, tuple) or len(value) != 2:
302 302 raise TypeError(
303 303 b"Manifest values must be a tuple of (node, flags)."
304 304 )
305 305 hashval = value[0]
306 306 # hashes are either 20 or 32 bytes (sha1 or its replacement),
307 307 # and allow one extra byte taht won't be persisted to disk but
308 308 # is sometimes used in memory.
309 309 if not isinstance(hashval, bytes) or not (
310 310 20 <= len(hashval) <= 22 or 32 <= len(hashval) <= 34
311 311 ):
312 312 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
313 313 flags = value[1]
314 314 if len(hashval) == 22:
315 315 hashval = hashval[:-1]
316 316 if not isinstance(flags, bytes) or len(flags) > 1:
317 317 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
318 318 needle, found = self.bsearch2(key)
319 319 if found:
320 320 # put the item
321 321 pos = self.positions[needle]
322 322 if pos < 0:
323 323 self.extradata[-pos - 1] = (key, hashval, value[1])
324 324 else:
325 325 # just don't bother
326 326 self.extradata.append((key, hashval, value[1]))
327 327 self.positions[needle] = -len(self.extradata)
328 328 else:
329 329 # not found, put it in with extra positions
330 330 self.extradata.append((key, hashval, value[1]))
331 331 self.positions = (
332 332 self.positions[:needle]
333 333 + [-len(self.extradata)]
334 334 + self.positions[needle:]
335 335 )
336 336 self.extrainfo = (
337 337 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
338 338 )
339 339
340 340 def copy(self):
341 341 # XXX call _compact like in C?
342 342 return _lazymanifest(
343 343 self.data,
344 344 self.positions,
345 345 self.extrainfo,
346 346 self.extradata,
347 347 self.hasremovals,
348 348 )
349 349
350 350 def _compact(self):
351 351 # hopefully not called TOO often
352 352 if len(self.extradata) == 0 and not self.hasremovals:
353 353 return
354 354 l = []
355 355 i = 0
356 356 offset = 0
357 357 self.extrainfo = [0] * len(self.positions)
358 358 while i < len(self.positions):
359 359 if self.positions[i] >= 0:
360 360 cur = self.positions[i]
361 361 last_cut = cur
362 362
363 363 # Collect all contiguous entries in the buffer at the current
364 364 # offset, breaking out only for added/modified items held in
365 365 # extradata, or a deleted line prior to the next position.
366 366 while True:
367 367 self.positions[i] = offset
368 368 i += 1
369 369 if i == len(self.positions) or self.positions[i] < 0:
370 370 break
371 371
372 372 # A removed file has no positions[] entry, but does have an
373 373 # overwritten first byte. Break out and find the end of the
374 374 # current good entry/entries if there is a removed file
375 375 # before the next position.
376 376 if (
377 377 self.hasremovals
378 378 and self.data.find(b'\n\x00', cur, self.positions[i])
379 379 != -1
380 380 ):
381 381 break
382 382
383 383 offset += self.positions[i] - cur
384 384 cur = self.positions[i]
385 385 end_cut = self.data.find(b'\n', cur)
386 386 if end_cut != -1:
387 387 end_cut += 1
388 388 offset += end_cut - cur
389 389 l.append(self.data[last_cut:end_cut])
390 390 else:
391 391 while i < len(self.positions) and self.positions[i] < 0:
392 392 cur = self.positions[i]
393 393 t = self.extradata[-cur - 1]
394 394 l.append(self._pack(t))
395 395 self.positions[i] = offset
396 396 # Hashes are either 20 bytes (old sha1s) or 32
397 397 # bytes (new non-sha1).
398 398 hlen = 20
399 399 if len(t[1]) > 25:
400 400 hlen = 32
401 401 if len(t[1]) > hlen:
402 402 self.extrainfo[i] = ord(t[1][hlen + 1])
403 403 offset += len(l[-1])
404 404 i += 1
405 405 self.data = b''.join(l)
406 406 self.hasremovals = False
407 407 self.extradata = []
408 408
409 409 def _pack(self, d):
410 410 n = d[1]
411 411 if len(n) == 21 or len(n) == 33:
412 412 n = n[:-1]
413 413 assert len(n) == 20 or len(n) == 32
414 414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
415 415
416 416 def text(self):
417 417 self._compact()
418 418 return self.data
419 419
420 420 def diff(self, m2, clean=False):
421 421 '''Finds changes between the current manifest and m2.'''
422 422 # XXX think whether efficiency matters here
423 423 diff = {}
424 424
425 425 for fn, e1, flags in self.iterentries():
426 426 if fn not in m2:
427 427 diff[fn] = (e1, flags), (None, b'')
428 428 else:
429 429 e2 = m2[fn]
430 430 if (e1, flags) != e2:
431 431 diff[fn] = (e1, flags), e2
432 432 elif clean:
433 433 diff[fn] = None
434 434
435 435 for fn, e2, flags in m2.iterentries():
436 436 if fn not in self:
437 437 diff[fn] = (None, b''), (e2, flags)
438 438
439 439 return diff
440 440
441 441 def iterentries(self):
442 442 return lazymanifestiterentries(self)
443 443
444 444 def iterkeys(self):
445 445 return lazymanifestiter(self)
446 446
447 447 def __iter__(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __len__(self):
451 451 return len(self.positions)
452 452
453 453 def filtercopy(self, filterfn):
454 454 # XXX should be optimized
455 455 c = _lazymanifest(b'')
456 456 for f, n, fl in self.iterentries():
457 457 if filterfn(f):
458 458 c[f] = n, fl
459 459 return c
460 460
461 461
462 462 try:
463 463 _lazymanifest = parsers.lazymanifest
464 464 except AttributeError:
465 465 pass
466 466
467 467
468 468 @interfaceutil.implementer(repository.imanifestdict)
469 469 class manifestdict(object):
470 470 def __init__(self, data=b''):
471 471 self._lm = _lazymanifest(data)
472 472
473 473 def __getitem__(self, key):
474 474 return self._lm[key][0]
475 475
476 476 def find(self, key):
477 477 return self._lm[key]
478 478
479 479 def __len__(self):
480 480 return len(self._lm)
481 481
482 482 def __nonzero__(self):
483 483 # nonzero is covered by the __len__ function, but implementing it here
484 484 # makes it easier for extensions to override.
485 485 return len(self._lm) != 0
486 486
487 487 __bool__ = __nonzero__
488 488
489 489 def __setitem__(self, key, node):
490 490 self._lm[key] = node, self.flags(key)
491 491
492 492 def __contains__(self, key):
493 493 if key is None:
494 494 return False
495 495 return key in self._lm
496 496
497 497 def __delitem__(self, key):
498 498 del self._lm[key]
499 499
500 500 def __iter__(self):
501 501 return self._lm.__iter__()
502 502
503 503 def iterkeys(self):
504 504 return self._lm.iterkeys()
505 505
506 506 def keys(self):
507 507 return list(self.iterkeys())
508 508
509 509 def filesnotin(self, m2, match=None):
510 510 '''Set of files in this manifest that are not in the other'''
511 511 if match is not None:
512 512 match = matchmod.badmatch(match, lambda path, msg: None)
513 513 sm2 = set(m2.walk(match))
514 514 return {f for f in self.walk(match) if f not in sm2}
515 515 return {f for f in self if f not in m2}
516 516
517 517 @propertycache
518 518 def _dirs(self):
519 519 return pathutil.dirs(self)
520 520
521 521 def dirs(self):
522 522 return self._dirs
523 523
524 524 def hasdir(self, dir):
525 525 return dir in self._dirs
526 526
527 527 def _filesfastpath(self, match):
528 528 '''Checks whether we can correctly and quickly iterate over matcher
529 529 files instead of over manifest files.'''
530 530 files = match.files()
531 531 return len(files) < 100 and (
532 532 match.isexact()
533 533 or (match.prefix() and all(fn in self for fn in files))
534 534 )
535 535
536 536 def walk(self, match):
537 537 '''Generates matching file names.
538 538
539 539 Equivalent to manifest.matches(match).iterkeys(), but without creating
540 540 an entirely new manifest.
541 541
542 542 It also reports nonexistent files by marking them bad with match.bad().
543 543 '''
544 544 if match.always():
545 545 for f in iter(self):
546 546 yield f
547 547 return
548 548
549 549 fset = set(match.files())
550 550
551 551 # avoid the entire walk if we're only looking for specific files
552 552 if self._filesfastpath(match):
553 553 for fn in sorted(fset):
554 554 if fn in self:
555 555 yield fn
556 556 return
557 557
558 558 for fn in self:
559 559 if fn in fset:
560 560 # specified pattern is the exact name
561 561 fset.remove(fn)
562 562 if match(fn):
563 563 yield fn
564 564
565 565 # for dirstate.walk, files=[''] means "walk the whole tree".
566 566 # follow that here, too
567 567 fset.discard(b'')
568 568
569 569 for fn in sorted(fset):
570 570 if not self.hasdir(fn):
571 571 match.bad(fn, None)
572 572
573 573 def _matches(self, match):
574 574 '''generate a new manifest filtered by the match argument'''
575 575 if match.always():
576 576 return self.copy()
577 577
578 578 if self._filesfastpath(match):
579 579 m = manifestdict()
580 580 lm = self._lm
581 581 for fn in match.files():
582 582 if fn in lm:
583 583 m._lm[fn] = lm[fn]
584 584 return m
585 585
586 586 m = manifestdict()
587 587 m._lm = self._lm.filtercopy(match)
588 588 return m
589 589
590 590 def diff(self, m2, match=None, clean=False):
591 591 '''Finds changes between the current manifest and m2.
592 592
593 593 Args:
594 594 m2: the manifest to which this manifest should be compared.
595 595 clean: if true, include files unchanged between these manifests
596 596 with a None value in the returned dictionary.
597 597
598 598 The result is returned as a dict with filename as key and
599 599 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
600 600 nodeid in the current/other manifest and fl1/fl2 is the flag
601 601 in the current/other manifest. Where the file does not exist,
602 602 the nodeid will be None and the flags will be the empty
603 603 string.
604 604 '''
605 605 if match:
606 606 m1 = self._matches(match)
607 607 m2 = m2._matches(match)
608 608 return m1.diff(m2, clean=clean)
609 609 return self._lm.diff(m2._lm, clean)
610 610
611 611 def setflag(self, key, flag):
612 612 self._lm[key] = self[key], flag
613 613
614 614 def get(self, key, default=None):
615 615 try:
616 616 return self._lm[key][0]
617 617 except KeyError:
618 618 return default
619 619
620 620 def flags(self, key):
621 621 try:
622 622 return self._lm[key][1]
623 623 except KeyError:
624 624 return b''
625 625
626 626 def copy(self):
627 627 c = manifestdict()
628 628 c._lm = self._lm.copy()
629 629 return c
630 630
631 631 def items(self):
632 632 return (x[:2] for x in self._lm.iterentries())
633 633
634 634 def iteritems(self):
635 635 return (x[:2] for x in self._lm.iterentries())
636 636
637 637 def iterentries(self):
638 638 return self._lm.iterentries()
639 639
640 640 def text(self):
641 641 # most likely uses native version
642 642 return self._lm.text()
643 643
644 644 def fastdelta(self, base, changes):
645 645 """Given a base manifest text as a bytearray and a list of changes
646 646 relative to that text, compute a delta that can be used by revlog.
647 647 """
648 648 delta = []
649 649 dstart = None
650 650 dend = None
651 651 dline = [b""]
652 652 start = 0
653 653 # zero copy representation of base as a buffer
654 654 addbuf = util.buffer(base)
655 655
656 656 changes = list(changes)
657 657 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
658 658 # start with a readonly loop that finds the offset of
659 659 # each line and creates the deltas
660 660 for f, todelete in changes:
661 661 # bs will either be the index of the item or the insert point
662 662 start, end = _msearch(addbuf, f, start)
663 663 if not todelete:
664 664 h, fl = self._lm[f]
665 665 l = b"%s\0%s%s\n" % (f, hex(h), fl)
666 666 else:
667 667 if start == end:
668 668 # item we want to delete was not found, error out
669 669 raise AssertionError(
670 670 _(b"failed to remove %s from manifest") % f
671 671 )
672 672 l = b""
673 673 if dstart is not None and dstart <= start and dend >= start:
674 674 if dend < end:
675 675 dend = end
676 676 if l:
677 677 dline.append(l)
678 678 else:
679 679 if dstart is not None:
680 680 delta.append([dstart, dend, b"".join(dline)])
681 681 dstart = start
682 682 dend = end
683 683 dline = [l]
684 684
685 685 if dstart is not None:
686 686 delta.append([dstart, dend, b"".join(dline)])
687 687 # apply the delta to the base, and get a delta for addrevision
688 688 deltatext, arraytext = _addlistdelta(base, delta)
689 689 else:
690 690 # For large changes, it's much cheaper to just build the text and
691 691 # diff it.
692 692 arraytext = bytearray(self.text())
693 693 deltatext = mdiff.textdiff(
694 694 util.buffer(base), util.buffer(arraytext)
695 695 )
696 696
697 697 return arraytext, deltatext
698 698
699 699
700 700 def _msearch(m, s, lo=0, hi=None):
701 701 '''return a tuple (start, end) that says where to find s within m.
702 702
703 703 If the string is found m[start:end] are the line containing
704 704 that string. If start == end the string was not found and
705 705 they indicate the proper sorted insertion point.
706 706
707 707 m should be a buffer, a memoryview or a byte string.
708 708 s is a byte string'''
709 709
710 710 def advance(i, c):
711 711 while i < lenm and m[i : i + 1] != c:
712 712 i += 1
713 713 return i
714 714
715 715 if not s:
716 716 return (lo, lo)
717 717 lenm = len(m)
718 718 if not hi:
719 719 hi = lenm
720 720 while lo < hi:
721 721 mid = (lo + hi) // 2
722 722 start = mid
723 723 while start > 0 and m[start - 1 : start] != b'\n':
724 724 start -= 1
725 725 end = advance(start, b'\0')
726 726 if bytes(m[start:end]) < s:
727 727 # we know that after the null there are 40 bytes of sha1
728 728 # this translates to the bisect lo = mid + 1
729 729 lo = advance(end + 40, b'\n') + 1
730 730 else:
731 731 # this translates to the bisect hi = mid
732 732 hi = start
733 733 end = advance(lo, b'\0')
734 734 found = m[lo:end]
735 735 if s == found:
736 736 # we know that after the null there are 40 bytes of sha1
737 737 end = advance(end + 40, b'\n')
738 738 return (lo, end + 1)
739 739 else:
740 740 return (lo, lo)
741 741
742 742
743 743 def _checkforbidden(l):
744 744 """Check filenames for illegal characters."""
745 745 for f in l:
746 746 if b'\n' in f or b'\r' in f:
747 747 raise error.StorageError(
748 748 _(b"'\\n' and '\\r' disallowed in filenames: %r")
749 749 % pycompat.bytestr(f)
750 750 )
751 751
752 752
753 753 # apply the changes collected during the bisect loop to our addlist
754 754 # return a delta suitable for addrevision
755 755 def _addlistdelta(addlist, x):
756 756 # for large addlist arrays, building a new array is cheaper
757 757 # than repeatedly modifying the existing one
758 758 currentposition = 0
759 759 newaddlist = bytearray()
760 760
761 761 for start, end, content in x:
762 762 newaddlist += addlist[currentposition:start]
763 763 if content:
764 764 newaddlist += bytearray(content)
765 765
766 766 currentposition = end
767 767
768 768 newaddlist += addlist[currentposition:]
769 769
770 770 deltatext = b"".join(
771 771 struct.pack(b">lll", start, end, len(content)) + content
772 772 for start, end, content in x
773 773 )
774 774 return deltatext, newaddlist
775 775
776 776
777 777 def _splittopdir(f):
778 778 if b'/' in f:
779 779 dir, subpath = f.split(b'/', 1)
780 780 return dir + b'/', subpath
781 781 else:
782 782 return b'', f
783 783
784 784
785 785 _noop = lambda s: None
786 786
787 787
788 788 @interfaceutil.implementer(repository.imanifestdict)
789 789 class treemanifest(object):
790 790 def __init__(self, dir=b'', text=b''):
791 791 self._dir = dir
792 792 self._node = nullid
793 793 self._loadfunc = _noop
794 794 self._copyfunc = _noop
795 795 self._dirty = False
796 796 self._dirs = {}
797 797 self._lazydirs = {}
798 798 # Using _lazymanifest here is a little slower than plain old dicts
799 799 self._files = {}
800 800 self._flags = {}
801 801 if text:
802 802
803 803 def readsubtree(subdir, subm):
804 804 raise AssertionError(
805 805 b'treemanifest constructor only accepts flat manifests'
806 806 )
807 807
808 808 self.parse(text, readsubtree)
809 809 self._dirty = True # Mark flat manifest dirty after parsing
810 810
811 811 def _subpath(self, path):
812 812 return self._dir + path
813 813
814 814 def _loadalllazy(self):
815 815 selfdirs = self._dirs
816 816 for d, (path, node, readsubtree, docopy) in pycompat.iteritems(
817 817 self._lazydirs
818 818 ):
819 819 if docopy:
820 820 selfdirs[d] = readsubtree(path, node).copy()
821 821 else:
822 822 selfdirs[d] = readsubtree(path, node)
823 823 self._lazydirs = {}
824 824
825 825 def _loadlazy(self, d):
826 826 v = self._lazydirs.get(d)
827 827 if v:
828 828 path, node, readsubtree, docopy = v
829 829 if docopy:
830 830 self._dirs[d] = readsubtree(path, node).copy()
831 831 else:
832 832 self._dirs[d] = readsubtree(path, node)
833 833 del self._lazydirs[d]
834 834
835 835 def _loadchildrensetlazy(self, visit):
836 836 if not visit:
837 837 return None
838 838 if visit == b'all' or visit == b'this':
839 839 self._loadalllazy()
840 840 return None
841 841
842 842 loadlazy = self._loadlazy
843 843 for k in visit:
844 844 loadlazy(k + b'/')
845 845 return visit
846 846
847 847 def _loaddifflazy(self, t1, t2):
848 848 """load items in t1 and t2 if they're needed for diffing.
849 849
850 850 The criteria currently is:
851 851 - if it's not present in _lazydirs in either t1 or t2, load it in the
852 852 other (it may already be loaded or it may not exist, doesn't matter)
853 853 - if it's present in _lazydirs in both, compare the nodeid; if it
854 854 differs, load it in both
855 855 """
856 856 toloadlazy = []
857 857 for d, v1 in pycompat.iteritems(t1._lazydirs):
858 858 v2 = t2._lazydirs.get(d)
859 859 if not v2 or v2[1] != v1[1]:
860 860 toloadlazy.append(d)
861 861 for d, v1 in pycompat.iteritems(t2._lazydirs):
862 862 if d not in t1._lazydirs:
863 863 toloadlazy.append(d)
864 864
865 865 for d in toloadlazy:
866 866 t1._loadlazy(d)
867 867 t2._loadlazy(d)
868 868
869 869 def __len__(self):
870 870 self._load()
871 871 size = len(self._files)
872 872 self._loadalllazy()
873 873 for m in self._dirs.values():
874 874 size += m.__len__()
875 875 return size
876 876
877 877 def __nonzero__(self):
878 878 # Faster than "__len() != 0" since it avoids loading sub-manifests
879 879 return not self._isempty()
880 880
881 881 __bool__ = __nonzero__
882 882
883 883 def _isempty(self):
884 884 self._load() # for consistency; already loaded by all callers
885 885 # See if we can skip loading everything.
886 886 if self._files or (
887 887 self._dirs and any(not m._isempty() for m in self._dirs.values())
888 888 ):
889 889 return False
890 890 self._loadalllazy()
891 891 return not self._dirs or all(m._isempty() for m in self._dirs.values())
892 892
893 893 @encoding.strmethod
894 894 def __repr__(self):
895 895 return (
896 896 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
897 897 % (
898 898 self._dir,
899 899 hex(self._node),
900 900 bool(self._loadfunc is _noop),
901 901 self._dirty,
902 902 id(self),
903 903 )
904 904 )
905 905
906 906 def dir(self):
907 907 '''The directory that this tree manifest represents, including a
908 908 trailing '/'. Empty string for the repo root directory.'''
909 909 return self._dir
910 910
911 911 def node(self):
912 912 '''This node of this instance. nullid for unsaved instances. Should
913 913 be updated when the instance is read or written from a revlog.
914 914 '''
915 915 assert not self._dirty
916 916 return self._node
917 917
918 918 def setnode(self, node):
919 919 self._node = node
920 920 self._dirty = False
921 921
922 922 def iterentries(self):
923 923 self._load()
924 924 self._loadalllazy()
925 925 for p, n in sorted(
926 926 itertools.chain(self._dirs.items(), self._files.items())
927 927 ):
928 928 if p in self._files:
929 929 yield self._subpath(p), n, self._flags.get(p, b'')
930 930 else:
931 931 for x in n.iterentries():
932 932 yield x
933 933
934 934 def items(self):
935 935 self._load()
936 936 self._loadalllazy()
937 937 for p, n in sorted(
938 938 itertools.chain(self._dirs.items(), self._files.items())
939 939 ):
940 940 if p in self._files:
941 941 yield self._subpath(p), n
942 942 else:
943 943 for f, sn in pycompat.iteritems(n):
944 944 yield f, sn
945 945
946 946 iteritems = items
947 947
948 948 def iterkeys(self):
949 949 self._load()
950 950 self._loadalllazy()
951 951 for p in sorted(itertools.chain(self._dirs, self._files)):
952 952 if p in self._files:
953 953 yield self._subpath(p)
954 954 else:
955 955 for f in self._dirs[p]:
956 956 yield f
957 957
958 958 def keys(self):
959 959 return list(self.iterkeys())
960 960
961 961 def __iter__(self):
962 962 return self.iterkeys()
963 963
964 964 def __contains__(self, f):
965 965 if f is None:
966 966 return False
967 967 self._load()
968 968 dir, subpath = _splittopdir(f)
969 969 if dir:
970 970 self._loadlazy(dir)
971 971
972 972 if dir not in self._dirs:
973 973 return False
974 974
975 975 return self._dirs[dir].__contains__(subpath)
976 976 else:
977 977 return f in self._files
978 978
979 979 def get(self, f, default=None):
980 980 self._load()
981 981 dir, subpath = _splittopdir(f)
982 982 if dir:
983 983 self._loadlazy(dir)
984 984
985 985 if dir not in self._dirs:
986 986 return default
987 987 return self._dirs[dir].get(subpath, default)
988 988 else:
989 989 return self._files.get(f, default)
990 990
991 991 def __getitem__(self, f):
992 992 self._load()
993 993 dir, subpath = _splittopdir(f)
994 994 if dir:
995 995 self._loadlazy(dir)
996 996
997 997 return self._dirs[dir].__getitem__(subpath)
998 998 else:
999 999 return self._files[f]
1000 1000
1001 1001 def flags(self, f):
1002 1002 self._load()
1003 1003 dir, subpath = _splittopdir(f)
1004 1004 if dir:
1005 1005 self._loadlazy(dir)
1006 1006
1007 1007 if dir not in self._dirs:
1008 1008 return b''
1009 1009 return self._dirs[dir].flags(subpath)
1010 1010 else:
1011 1011 if f in self._lazydirs or f in self._dirs:
1012 1012 return b''
1013 1013 return self._flags.get(f, b'')
1014 1014
1015 1015 def find(self, f):
1016 1016 self._load()
1017 1017 dir, subpath = _splittopdir(f)
1018 1018 if dir:
1019 1019 self._loadlazy(dir)
1020 1020
1021 1021 return self._dirs[dir].find(subpath)
1022 1022 else:
1023 1023 return self._files[f], self._flags.get(f, b'')
1024 1024
1025 1025 def __delitem__(self, f):
1026 1026 self._load()
1027 1027 dir, subpath = _splittopdir(f)
1028 1028 if dir:
1029 1029 self._loadlazy(dir)
1030 1030
1031 1031 self._dirs[dir].__delitem__(subpath)
1032 1032 # If the directory is now empty, remove it
1033 1033 if self._dirs[dir]._isempty():
1034 1034 del self._dirs[dir]
1035 1035 else:
1036 1036 del self._files[f]
1037 1037 if f in self._flags:
1038 1038 del self._flags[f]
1039 1039 self._dirty = True
1040 1040
1041 1041 def __setitem__(self, f, n):
1042 1042 assert n is not None
1043 1043 self._load()
1044 1044 dir, subpath = _splittopdir(f)
1045 1045 if dir:
1046 1046 self._loadlazy(dir)
1047 1047 if dir not in self._dirs:
1048 1048 self._dirs[dir] = treemanifest(self._subpath(dir))
1049 1049 self._dirs[dir].__setitem__(subpath, n)
1050 1050 else:
1051 1051 # manifest nodes are either 20 bytes or 32 bytes,
1052 1052 # depending on the hash in use. An extra byte is
1053 1053 # occasionally used by hg, but won't ever be
1054 1054 # persisted. Trim to 21 or 33 bytes as appropriate.
1055 1055 trim = 21 if len(n) < 25 else 33
1056 1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1057 1057 self._dirty = True
1058 1058
1059 1059 def _load(self):
1060 1060 if self._loadfunc is not _noop:
1061 1061 lf, self._loadfunc = self._loadfunc, _noop
1062 1062 lf(self)
1063 1063 elif self._copyfunc is not _noop:
1064 1064 cf, self._copyfunc = self._copyfunc, _noop
1065 1065 cf(self)
1066 1066
1067 1067 def setflag(self, f, flags):
1068 1068 """Set the flags (symlink, executable) for path f."""
1069 1069 self._load()
1070 1070 dir, subpath = _splittopdir(f)
1071 1071 if dir:
1072 1072 self._loadlazy(dir)
1073 1073 if dir not in self._dirs:
1074 1074 self._dirs[dir] = treemanifest(self._subpath(dir))
1075 1075 self._dirs[dir].setflag(subpath, flags)
1076 1076 else:
1077 1077 self._flags[f] = flags
1078 1078 self._dirty = True
1079 1079
1080 1080 def copy(self):
1081 1081 copy = treemanifest(self._dir)
1082 1082 copy._node = self._node
1083 1083 copy._dirty = self._dirty
1084 1084 if self._copyfunc is _noop:
1085 1085
1086 1086 def _copyfunc(s):
1087 1087 self._load()
1088 1088 s._lazydirs = {
1089 1089 d: (p, n, r, True)
1090 1090 for d, (p, n, r, c) in pycompat.iteritems(self._lazydirs)
1091 1091 }
1092 1092 sdirs = s._dirs
1093 1093 for d, v in pycompat.iteritems(self._dirs):
1094 1094 sdirs[d] = v.copy()
1095 1095 s._files = dict.copy(self._files)
1096 1096 s._flags = dict.copy(self._flags)
1097 1097
1098 1098 if self._loadfunc is _noop:
1099 1099 _copyfunc(copy)
1100 1100 else:
1101 1101 copy._copyfunc = _copyfunc
1102 1102 else:
1103 1103 copy._copyfunc = self._copyfunc
1104 1104 return copy
1105 1105
1106 1106 def filesnotin(self, m2, match=None):
1107 1107 '''Set of files in this manifest that are not in the other'''
1108 1108 if match and not match.always():
1109 1109 m1 = self._matches(match)
1110 1110 m2 = m2._matches(match)
1111 1111 return m1.filesnotin(m2)
1112 1112
1113 1113 files = set()
1114 1114
1115 1115 def _filesnotin(t1, t2):
1116 1116 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1117 1117 return
1118 1118 t1._load()
1119 1119 t2._load()
1120 1120 self._loaddifflazy(t1, t2)
1121 1121 for d, m1 in pycompat.iteritems(t1._dirs):
1122 1122 if d in t2._dirs:
1123 1123 m2 = t2._dirs[d]
1124 1124 _filesnotin(m1, m2)
1125 1125 else:
1126 1126 files.update(m1.iterkeys())
1127 1127
1128 1128 for fn in t1._files:
1129 1129 if fn not in t2._files:
1130 1130 files.add(t1._subpath(fn))
1131 1131
1132 1132 _filesnotin(self, m2)
1133 1133 return files
1134 1134
1135 1135 @propertycache
1136 1136 def _alldirs(self):
1137 1137 return pathutil.dirs(self)
1138 1138
1139 1139 def dirs(self):
1140 1140 return self._alldirs
1141 1141
1142 1142 def hasdir(self, dir):
1143 1143 self._load()
1144 1144 topdir, subdir = _splittopdir(dir)
1145 1145 if topdir:
1146 1146 self._loadlazy(topdir)
1147 1147 if topdir in self._dirs:
1148 1148 return self._dirs[topdir].hasdir(subdir)
1149 1149 return False
1150 1150 dirslash = dir + b'/'
1151 1151 return dirslash in self._dirs or dirslash in self._lazydirs
1152 1152
1153 1153 def walk(self, match):
1154 1154 '''Generates matching file names.
1155 1155
1156 1156 It also reports nonexistent files by marking them bad with match.bad().
1157 1157 '''
1158 1158 if match.always():
1159 1159 for f in iter(self):
1160 1160 yield f
1161 1161 return
1162 1162
1163 1163 fset = set(match.files())
1164 1164
1165 1165 for fn in self._walk(match):
1166 1166 if fn in fset:
1167 1167 # specified pattern is the exact name
1168 1168 fset.remove(fn)
1169 1169 yield fn
1170 1170
1171 1171 # for dirstate.walk, files=[''] means "walk the whole tree".
1172 1172 # follow that here, too
1173 1173 fset.discard(b'')
1174 1174
1175 1175 for fn in sorted(fset):
1176 1176 if not self.hasdir(fn):
1177 1177 match.bad(fn, None)
1178 1178
1179 1179 def _walk(self, match):
1180 1180 '''Recursively generates matching file names for walk().'''
1181 1181 visit = match.visitchildrenset(self._dir[:-1])
1182 1182 if not visit:
1183 1183 return
1184 1184
1185 1185 # yield this dir's files and walk its submanifests
1186 1186 self._load()
1187 1187 visit = self._loadchildrensetlazy(visit)
1188 1188 for p in sorted(list(self._dirs) + list(self._files)):
1189 1189 if p in self._files:
1190 1190 fullp = self._subpath(p)
1191 1191 if match(fullp):
1192 1192 yield fullp
1193 1193 else:
1194 1194 if not visit or p[:-1] in visit:
1195 1195 for f in self._dirs[p]._walk(match):
1196 1196 yield f
1197 1197
1198 1198 def _matches(self, match):
1199 1199 '''recursively generate a new manifest filtered by the match argument.
1200 1200 '''
1201 1201 if match.always():
1202 1202 return self.copy()
1203 1203 return self._matches_inner(match)
1204 1204
1205 1205 def _matches_inner(self, match):
1206 1206 if match.always():
1207 1207 return self.copy()
1208 1208
1209 1209 visit = match.visitchildrenset(self._dir[:-1])
1210 1210 if visit == b'all':
1211 1211 return self.copy()
1212 1212 ret = treemanifest(self._dir)
1213 1213 if not visit:
1214 1214 return ret
1215 1215
1216 1216 self._load()
1217 1217 for fn in self._files:
1218 1218 # While visitchildrenset *usually* lists only subdirs, this is
1219 1219 # actually up to the matcher and may have some files in the set().
1220 1220 # If visit == 'this', we should obviously look at the files in this
1221 1221 # directory; if visit is a set, and fn is in it, we should inspect
1222 1222 # fn (but no need to inspect things not in the set).
1223 1223 if visit != b'this' and fn not in visit:
1224 1224 continue
1225 1225 fullp = self._subpath(fn)
1226 1226 # visitchildrenset isn't perfect, we still need to call the regular
1227 1227 # matcher code to further filter results.
1228 1228 if not match(fullp):
1229 1229 continue
1230 1230 ret._files[fn] = self._files[fn]
1231 1231 if fn in self._flags:
1232 1232 ret._flags[fn] = self._flags[fn]
1233 1233
1234 1234 visit = self._loadchildrensetlazy(visit)
1235 1235 for dir, subm in pycompat.iteritems(self._dirs):
1236 1236 if visit and dir[:-1] not in visit:
1237 1237 continue
1238 1238 m = subm._matches_inner(match)
1239 1239 if not m._isempty():
1240 1240 ret._dirs[dir] = m
1241 1241
1242 1242 if not ret._isempty():
1243 1243 ret._dirty = True
1244 1244 return ret
1245 1245
1246 1246 def fastdelta(self, base, changes):
1247 1247 raise FastdeltaUnavailable()
1248 1248
1249 1249 def diff(self, m2, match=None, clean=False):
1250 1250 '''Finds changes between the current manifest and m2.
1251 1251
1252 1252 Args:
1253 1253 m2: the manifest to which this manifest should be compared.
1254 1254 clean: if true, include files unchanged between these manifests
1255 1255 with a None value in the returned dictionary.
1256 1256
1257 1257 The result is returned as a dict with filename as key and
1258 1258 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1259 1259 nodeid in the current/other manifest and fl1/fl2 is the flag
1260 1260 in the current/other manifest. Where the file does not exist,
1261 1261 the nodeid will be None and the flags will be the empty
1262 1262 string.
1263 1263 '''
1264 1264 if match and not match.always():
1265 1265 m1 = self._matches(match)
1266 1266 m2 = m2._matches(match)
1267 1267 return m1.diff(m2, clean=clean)
1268 1268 result = {}
1269 1269 emptytree = treemanifest()
1270 1270
1271 1271 def _iterativediff(t1, t2, stack):
1272 1272 """compares two tree manifests and append new tree-manifests which
1273 1273 needs to be compared to stack"""
1274 1274 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1275 1275 return
1276 1276 t1._load()
1277 1277 t2._load()
1278 1278 self._loaddifflazy(t1, t2)
1279 1279
1280 1280 for d, m1 in pycompat.iteritems(t1._dirs):
1281 1281 m2 = t2._dirs.get(d, emptytree)
1282 1282 stack.append((m1, m2))
1283 1283
1284 1284 for d, m2 in pycompat.iteritems(t2._dirs):
1285 1285 if d not in t1._dirs:
1286 1286 stack.append((emptytree, m2))
1287 1287
1288 1288 for fn, n1 in pycompat.iteritems(t1._files):
1289 1289 fl1 = t1._flags.get(fn, b'')
1290 1290 n2 = t2._files.get(fn, None)
1291 1291 fl2 = t2._flags.get(fn, b'')
1292 1292 if n1 != n2 or fl1 != fl2:
1293 1293 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1294 1294 elif clean:
1295 1295 result[t1._subpath(fn)] = None
1296 1296
1297 1297 for fn, n2 in pycompat.iteritems(t2._files):
1298 1298 if fn not in t1._files:
1299 1299 fl2 = t2._flags.get(fn, b'')
1300 1300 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1301 1301
1302 1302 stackls = []
1303 1303 _iterativediff(self, m2, stackls)
1304 1304 while stackls:
1305 1305 t1, t2 = stackls.pop()
1306 1306 # stackls is populated in the function call
1307 1307 _iterativediff(t1, t2, stackls)
1308 1308 return result
1309 1309
1310 1310 def unmodifiedsince(self, m2):
1311 1311 return not self._dirty and not m2._dirty and self._node == m2._node
1312 1312
1313 1313 def parse(self, text, readsubtree):
1314 1314 selflazy = self._lazydirs
1315 1315 subpath = self._subpath
1316 1316 for f, n, fl in _parse(text):
1317 1317 if fl == b't':
1318 1318 f = f + b'/'
1319 1319 # False below means "doesn't need to be copied" and can use the
1320 1320 # cached value from readsubtree directly.
1321 1321 selflazy[f] = (subpath(f), n, readsubtree, False)
1322 1322 elif b'/' in f:
1323 1323 # This is a flat manifest, so use __setitem__ and setflag rather
1324 1324 # than assigning directly to _files and _flags, so we can
1325 1325 # assign a path in a subdirectory, and to mark dirty (compared
1326 1326 # to nullid).
1327 1327 self[f] = n
1328 1328 if fl:
1329 1329 self.setflag(f, fl)
1330 1330 else:
1331 1331 # Assigning to _files and _flags avoids marking as dirty,
1332 1332 # and should be a little faster.
1333 1333 self._files[f] = n
1334 1334 if fl:
1335 1335 self._flags[f] = fl
1336 1336
1337 1337 def text(self):
1338 1338 """Get the full data of this manifest as a bytestring."""
1339 1339 self._load()
1340 1340 return _text(self.iterentries())
1341 1341
1342 1342 def dirtext(self):
1343 1343 """Get the full data of this directory as a bytestring. Make sure that
1344 1344 any submanifests have been written first, so their nodeids are correct.
1345 1345 """
1346 1346 self._load()
1347 1347 flags = self.flags
1348 1348 lazydirs = [
1349 1349 (d[:-1], v[1], b't') for d, v in pycompat.iteritems(self._lazydirs)
1350 1350 ]
1351 1351 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1352 1352 files = [(f, self._files[f], flags(f)) for f in self._files]
1353 1353 return _text(sorted(dirs + files + lazydirs))
1354 1354
1355 1355 def read(self, gettext, readsubtree):
1356 1356 def _load_for_read(s):
1357 1357 s.parse(gettext(), readsubtree)
1358 1358 s._dirty = False
1359 1359
1360 1360 self._loadfunc = _load_for_read
1361 1361
1362 1362 def writesubtrees(self, m1, m2, writesubtree, match):
1363 1363 self._load() # for consistency; should never have any effect here
1364 1364 m1._load()
1365 1365 m2._load()
1366 1366 emptytree = treemanifest()
1367 1367
1368 1368 def getnode(m, d):
1369 1369 ld = m._lazydirs.get(d)
1370 1370 if ld:
1371 1371 return ld[1]
1372 1372 return m._dirs.get(d, emptytree)._node
1373 1373
1374 1374 # let's skip investigating things that `match` says we do not need.
1375 1375 visit = match.visitchildrenset(self._dir[:-1])
1376 1376 visit = self._loadchildrensetlazy(visit)
1377 1377 if visit == b'this' or visit == b'all':
1378 1378 visit = None
1379 1379 for d, subm in pycompat.iteritems(self._dirs):
1380 1380 if visit and d[:-1] not in visit:
1381 1381 continue
1382 1382 subp1 = getnode(m1, d)
1383 1383 subp2 = getnode(m2, d)
1384 1384 if subp1 == nullid:
1385 1385 subp1, subp2 = subp2, subp1
1386 1386 writesubtree(subm, subp1, subp2, match)
1387 1387
1388 1388 def walksubtrees(self, matcher=None):
1389 1389 """Returns an iterator of the subtrees of this manifest, including this
1390 1390 manifest itself.
1391 1391
1392 1392 If `matcher` is provided, it only returns subtrees that match.
1393 1393 """
1394 1394 if matcher and not matcher.visitdir(self._dir[:-1]):
1395 1395 return
1396 1396 if not matcher or matcher(self._dir[:-1]):
1397 1397 yield self
1398 1398
1399 1399 self._load()
1400 1400 # OPT: use visitchildrenset to avoid loading everything.
1401 1401 self._loadalllazy()
1402 1402 for d, subm in pycompat.iteritems(self._dirs):
1403 1403 for subtree in subm.walksubtrees(matcher=matcher):
1404 1404 yield subtree
1405 1405
1406 1406
1407 1407 class manifestfulltextcache(util.lrucachedict):
1408 1408 """File-backed LRU cache for the manifest cache
1409 1409
1410 1410 File consists of entries, up to EOF:
1411 1411
1412 1412 - 20 bytes node, 4 bytes length, <length> manifest data
1413 1413
1414 1414 These are written in reverse cache order (oldest to newest).
1415 1415
1416 1416 """
1417 1417
1418 1418 _file = b'manifestfulltextcache'
1419 1419
1420 1420 def __init__(self, max):
1421 1421 super(manifestfulltextcache, self).__init__(max)
1422 1422 self._dirty = False
1423 1423 self._read = False
1424 1424 self._opener = None
1425 1425
1426 1426 def read(self):
1427 1427 if self._read or self._opener is None:
1428 1428 return
1429 1429
1430 1430 try:
1431 1431 with self._opener(self._file) as fp:
1432 1432 set = super(manifestfulltextcache, self).__setitem__
1433 1433 # ignore trailing data, this is a cache, corruption is skipped
1434 1434 while True:
1435 1435 # TODO do we need to do work here for sha1 portability?
1436 1436 node = fp.read(20)
1437 1437 if len(node) < 20:
1438 1438 break
1439 1439 try:
1440 1440 size = struct.unpack(b'>L', fp.read(4))[0]
1441 1441 except struct.error:
1442 1442 break
1443 1443 value = bytearray(fp.read(size))
1444 1444 if len(value) != size:
1445 1445 break
1446 1446 set(node, value)
1447 1447 except IOError:
1448 1448 # the file is allowed to be missing
1449 1449 pass
1450 1450
1451 1451 self._read = True
1452 1452 self._dirty = False
1453 1453
1454 1454 def write(self):
1455 1455 if not self._dirty or self._opener is None:
1456 1456 return
1457 1457 # rotate backwards to the first used node
1458 1458 with self._opener(
1459 1459 self._file, b'w', atomictemp=True, checkambig=True
1460 1460 ) as fp:
1461 1461 node = self._head.prev
1462 1462 while True:
1463 1463 if node.key in self._cache:
1464 1464 fp.write(node.key)
1465 1465 fp.write(struct.pack(b'>L', len(node.value)))
1466 1466 fp.write(node.value)
1467 1467 if node is self._head:
1468 1468 break
1469 1469 node = node.prev
1470 1470
1471 1471 def __len__(self):
1472 1472 if not self._read:
1473 1473 self.read()
1474 1474 return super(manifestfulltextcache, self).__len__()
1475 1475
1476 1476 def __contains__(self, k):
1477 1477 if not self._read:
1478 1478 self.read()
1479 1479 return super(manifestfulltextcache, self).__contains__(k)
1480 1480
1481 1481 def __iter__(self):
1482 1482 if not self._read:
1483 1483 self.read()
1484 1484 return super(manifestfulltextcache, self).__iter__()
1485 1485
1486 1486 def __getitem__(self, k):
1487 1487 if not self._read:
1488 1488 self.read()
1489 1489 # the cache lru order can change on read
1490 1490 setdirty = self._cache.get(k) is not self._head
1491 1491 value = super(manifestfulltextcache, self).__getitem__(k)
1492 1492 if setdirty:
1493 1493 self._dirty = True
1494 1494 return value
1495 1495
1496 1496 def __setitem__(self, k, v):
1497 1497 if not self._read:
1498 1498 self.read()
1499 1499 super(manifestfulltextcache, self).__setitem__(k, v)
1500 1500 self._dirty = True
1501 1501
1502 1502 def __delitem__(self, k):
1503 1503 if not self._read:
1504 1504 self.read()
1505 1505 super(manifestfulltextcache, self).__delitem__(k)
1506 1506 self._dirty = True
1507 1507
1508 1508 def get(self, k, default=None):
1509 1509 if not self._read:
1510 1510 self.read()
1511 1511 return super(manifestfulltextcache, self).get(k, default=default)
1512 1512
1513 1513 def clear(self, clear_persisted_data=False):
1514 1514 super(manifestfulltextcache, self).clear()
1515 1515 if clear_persisted_data:
1516 1516 self._dirty = True
1517 1517 self.write()
1518 1518 self._read = False
1519 1519
1520 1520
1521 1521 # and upper bound of what we expect from compression
1522 1522 # (real live value seems to be "3")
1523 1523 MAXCOMPRESSION = 3
1524 1524
1525 1525
1526 1526 class FastdeltaUnavailable(Exception):
1527 1527 """Exception raised when fastdelta isn't usable on a manifest."""
1528 1528
1529 1529
1530 1530 @interfaceutil.implementer(repository.imanifeststorage)
1531 1531 class manifestrevlog(object):
1532 1532 '''A revlog that stores manifest texts. This is responsible for caching the
1533 1533 full-text manifest contents.
1534 1534 '''
1535 1535
1536 1536 def __init__(
1537 1537 self,
1538 1538 opener,
1539 1539 tree=b'',
1540 1540 dirlogcache=None,
1541 1541 indexfile=None,
1542 1542 treemanifest=False,
1543 1543 ):
1544 1544 """Constructs a new manifest revlog
1545 1545
1546 1546 `indexfile` - used by extensions to have two manifests at once, like
1547 1547 when transitioning between flatmanifeset and treemanifests.
1548 1548
1549 1549 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1550 1550 options can also be used to make this a tree manifest revlog. The opener
1551 1551 option takes precedence, so if it is set to True, we ignore whatever
1552 1552 value is passed in to the constructor.
1553 1553 """
1554 1554 # During normal operations, we expect to deal with not more than four
1555 1555 # revs at a time (such as during commit --amend). When rebasing large
1556 1556 # stacks of commits, the number can go up, hence the config knob below.
1557 1557 cachesize = 4
1558 1558 optiontreemanifest = False
1559 1559 opts = getattr(opener, 'options', None)
1560 1560 if opts is not None:
1561 1561 cachesize = opts.get(b'manifestcachesize', cachesize)
1562 1562 optiontreemanifest = opts.get(b'treemanifest', False)
1563 1563
1564 1564 self._treeondisk = optiontreemanifest or treemanifest
1565 1565
1566 1566 self._fulltextcache = manifestfulltextcache(cachesize)
1567 1567
1568 1568 if tree:
1569 1569 assert self._treeondisk, b'opts is %r' % opts
1570 1570
1571 1571 if indexfile is None:
1572 1572 indexfile = b'00manifest.i'
1573 1573 if tree:
1574 1574 indexfile = b"meta/" + tree + indexfile
1575 1575
1576 1576 self.tree = tree
1577 1577
1578 1578 # The dirlogcache is kept on the root manifest log
1579 1579 if tree:
1580 1580 self._dirlogcache = dirlogcache
1581 1581 else:
1582 1582 self._dirlogcache = {b'': self}
1583 1583
1584 1584 self._revlog = revlog.revlog(
1585 1585 opener,
1586 1586 indexfile,
1587 1587 # only root indexfile is cached
1588 1588 checkambig=not bool(tree),
1589 1589 mmaplargeindex=True,
1590 1590 upperboundcomp=MAXCOMPRESSION,
1591 persistentnodemap=opener.options.get(
1592 b'exp-persistent-nodemap', False
1593 ),
1591 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1594 1592 )
1595 1593
1596 1594 self.index = self._revlog.index
1597 1595 self.version = self._revlog.version
1598 1596 self._generaldelta = self._revlog._generaldelta
1599 1597
1600 1598 def _setupmanifestcachehooks(self, repo):
1601 1599 """Persist the manifestfulltextcache on lock release"""
1602 1600 if not util.safehasattr(repo, b'_wlockref'):
1603 1601 return
1604 1602
1605 1603 self._fulltextcache._opener = repo.wcachevfs
1606 1604 if repo._currentlock(repo._wlockref) is None:
1607 1605 return
1608 1606
1609 1607 reporef = weakref.ref(repo)
1610 1608 manifestrevlogref = weakref.ref(self)
1611 1609
1612 1610 def persistmanifestcache(success):
1613 1611 # Repo is in an unknown state, do not persist.
1614 1612 if not success:
1615 1613 return
1616 1614
1617 1615 repo = reporef()
1618 1616 self = manifestrevlogref()
1619 1617 if repo is None or self is None:
1620 1618 return
1621 1619 if repo.manifestlog.getstorage(b'') is not self:
1622 1620 # there's a different manifest in play now, abort
1623 1621 return
1624 1622 self._fulltextcache.write()
1625 1623
1626 1624 repo._afterlock(persistmanifestcache)
1627 1625
1628 1626 @property
1629 1627 def fulltextcache(self):
1630 1628 return self._fulltextcache
1631 1629
1632 1630 def clearcaches(self, clear_persisted_data=False):
1633 1631 self._revlog.clearcaches()
1634 1632 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1635 1633 self._dirlogcache = {self.tree: self}
1636 1634
1637 1635 def dirlog(self, d):
1638 1636 if d:
1639 1637 assert self._treeondisk
1640 1638 if d not in self._dirlogcache:
1641 1639 mfrevlog = manifestrevlog(
1642 1640 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1643 1641 )
1644 1642 self._dirlogcache[d] = mfrevlog
1645 1643 return self._dirlogcache[d]
1646 1644
1647 1645 def add(
1648 1646 self,
1649 1647 m,
1650 1648 transaction,
1651 1649 link,
1652 1650 p1,
1653 1651 p2,
1654 1652 added,
1655 1653 removed,
1656 1654 readtree=None,
1657 1655 match=None,
1658 1656 ):
1659 1657 try:
1660 1658 if p1 not in self.fulltextcache:
1661 1659 raise FastdeltaUnavailable()
1662 1660 # If our first parent is in the manifest cache, we can
1663 1661 # compute a delta here using properties we know about the
1664 1662 # manifest up-front, which may save time later for the
1665 1663 # revlog layer.
1666 1664
1667 1665 _checkforbidden(added)
1668 1666 # combine the changed lists into one sorted iterator
1669 1667 work = heapq.merge(
1670 1668 [(x, False) for x in sorted(added)],
1671 1669 [(x, True) for x in sorted(removed)],
1672 1670 )
1673 1671
1674 1672 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1675 1673 cachedelta = self._revlog.rev(p1), deltatext
1676 1674 text = util.buffer(arraytext)
1677 1675 n = self._revlog.addrevision(
1678 1676 text, transaction, link, p1, p2, cachedelta
1679 1677 )
1680 1678 except FastdeltaUnavailable:
1681 1679 # The first parent manifest isn't already loaded or the
1682 1680 # manifest implementation doesn't support fastdelta, so
1683 1681 # we'll just encode a fulltext of the manifest and pass
1684 1682 # that through to the revlog layer, and let it handle the
1685 1683 # delta process.
1686 1684 if self._treeondisk:
1687 1685 assert readtree, b"readtree must be set for treemanifest writes"
1688 1686 assert match, b"match must be specified for treemanifest writes"
1689 1687 m1 = readtree(self.tree, p1)
1690 1688 m2 = readtree(self.tree, p2)
1691 1689 n = self._addtree(
1692 1690 m, transaction, link, m1, m2, readtree, match=match
1693 1691 )
1694 1692 arraytext = None
1695 1693 else:
1696 1694 text = m.text()
1697 1695 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1698 1696 arraytext = bytearray(text)
1699 1697
1700 1698 if arraytext is not None:
1701 1699 self.fulltextcache[n] = arraytext
1702 1700
1703 1701 return n
1704 1702
1705 1703 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1706 1704 # If the manifest is unchanged compared to one parent,
1707 1705 # don't write a new revision
1708 1706 if self.tree != b'' and (
1709 1707 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1710 1708 ):
1711 1709 return m.node()
1712 1710
1713 1711 def writesubtree(subm, subp1, subp2, match):
1714 1712 sublog = self.dirlog(subm.dir())
1715 1713 sublog.add(
1716 1714 subm,
1717 1715 transaction,
1718 1716 link,
1719 1717 subp1,
1720 1718 subp2,
1721 1719 None,
1722 1720 None,
1723 1721 readtree=readtree,
1724 1722 match=match,
1725 1723 )
1726 1724
1727 1725 m.writesubtrees(m1, m2, writesubtree, match)
1728 1726 text = m.dirtext()
1729 1727 n = None
1730 1728 if self.tree != b'':
1731 1729 # Double-check whether contents are unchanged to one parent
1732 1730 if text == m1.dirtext():
1733 1731 n = m1.node()
1734 1732 elif text == m2.dirtext():
1735 1733 n = m2.node()
1736 1734
1737 1735 if not n:
1738 1736 n = self._revlog.addrevision(
1739 1737 text, transaction, link, m1.node(), m2.node()
1740 1738 )
1741 1739
1742 1740 # Save nodeid so parent manifest can calculate its nodeid
1743 1741 m.setnode(n)
1744 1742 return n
1745 1743
1746 1744 def __len__(self):
1747 1745 return len(self._revlog)
1748 1746
1749 1747 def __iter__(self):
1750 1748 return self._revlog.__iter__()
1751 1749
1752 1750 def rev(self, node):
1753 1751 return self._revlog.rev(node)
1754 1752
1755 1753 def node(self, rev):
1756 1754 return self._revlog.node(rev)
1757 1755
1758 1756 def lookup(self, value):
1759 1757 return self._revlog.lookup(value)
1760 1758
1761 1759 def parentrevs(self, rev):
1762 1760 return self._revlog.parentrevs(rev)
1763 1761
1764 1762 def parents(self, node):
1765 1763 return self._revlog.parents(node)
1766 1764
1767 1765 def linkrev(self, rev):
1768 1766 return self._revlog.linkrev(rev)
1769 1767
1770 1768 def checksize(self):
1771 1769 return self._revlog.checksize()
1772 1770
1773 1771 def revision(self, node, _df=None, raw=False):
1774 1772 return self._revlog.revision(node, _df=_df, raw=raw)
1775 1773
1776 1774 def rawdata(self, node, _df=None):
1777 1775 return self._revlog.rawdata(node, _df=_df)
1778 1776
1779 1777 def revdiff(self, rev1, rev2):
1780 1778 return self._revlog.revdiff(rev1, rev2)
1781 1779
1782 1780 def cmp(self, node, text):
1783 1781 return self._revlog.cmp(node, text)
1784 1782
1785 1783 def deltaparent(self, rev):
1786 1784 return self._revlog.deltaparent(rev)
1787 1785
1788 1786 def emitrevisions(
1789 1787 self,
1790 1788 nodes,
1791 1789 nodesorder=None,
1792 1790 revisiondata=False,
1793 1791 assumehaveparentrevisions=False,
1794 1792 deltamode=repository.CG_DELTAMODE_STD,
1795 1793 ):
1796 1794 return self._revlog.emitrevisions(
1797 1795 nodes,
1798 1796 nodesorder=nodesorder,
1799 1797 revisiondata=revisiondata,
1800 1798 assumehaveparentrevisions=assumehaveparentrevisions,
1801 1799 deltamode=deltamode,
1802 1800 )
1803 1801
1804 1802 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1805 1803 return self._revlog.addgroup(
1806 1804 deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
1807 1805 )
1808 1806
1809 1807 def rawsize(self, rev):
1810 1808 return self._revlog.rawsize(rev)
1811 1809
1812 1810 def getstrippoint(self, minlink):
1813 1811 return self._revlog.getstrippoint(minlink)
1814 1812
1815 1813 def strip(self, minlink, transaction):
1816 1814 return self._revlog.strip(minlink, transaction)
1817 1815
1818 1816 def files(self):
1819 1817 return self._revlog.files()
1820 1818
1821 1819 def clone(self, tr, destrevlog, **kwargs):
1822 1820 if not isinstance(destrevlog, manifestrevlog):
1823 1821 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1824 1822
1825 1823 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1826 1824
1827 1825 def storageinfo(
1828 1826 self,
1829 1827 exclusivefiles=False,
1830 1828 sharedfiles=False,
1831 1829 revisionscount=False,
1832 1830 trackedsize=False,
1833 1831 storedsize=False,
1834 1832 ):
1835 1833 return self._revlog.storageinfo(
1836 1834 exclusivefiles=exclusivefiles,
1837 1835 sharedfiles=sharedfiles,
1838 1836 revisionscount=revisionscount,
1839 1837 trackedsize=trackedsize,
1840 1838 storedsize=storedsize,
1841 1839 )
1842 1840
1843 1841 @property
1844 1842 def indexfile(self):
1845 1843 return self._revlog.indexfile
1846 1844
1847 1845 @indexfile.setter
1848 1846 def indexfile(self, value):
1849 1847 self._revlog.indexfile = value
1850 1848
1851 1849 @property
1852 1850 def opener(self):
1853 1851 return self._revlog.opener
1854 1852
1855 1853 @opener.setter
1856 1854 def opener(self, value):
1857 1855 self._revlog.opener = value
1858 1856
1859 1857
1860 1858 @interfaceutil.implementer(repository.imanifestlog)
1861 1859 class manifestlog(object):
1862 1860 """A collection class representing the collection of manifest snapshots
1863 1861 referenced by commits in the repository.
1864 1862
1865 1863 In this situation, 'manifest' refers to the abstract concept of a snapshot
1866 1864 of the list of files in the given commit. Consumers of the output of this
1867 1865 class do not care about the implementation details of the actual manifests
1868 1866 they receive (i.e. tree or flat or lazily loaded, etc)."""
1869 1867
1870 1868 def __init__(self, opener, repo, rootstore, narrowmatch):
1871 1869 usetreemanifest = False
1872 1870 cachesize = 4
1873 1871
1874 1872 opts = getattr(opener, 'options', None)
1875 1873 if opts is not None:
1876 1874 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1877 1875 cachesize = opts.get(b'manifestcachesize', cachesize)
1878 1876
1879 1877 self._treemanifests = usetreemanifest
1880 1878
1881 1879 self._rootstore = rootstore
1882 1880 self._rootstore._setupmanifestcachehooks(repo)
1883 1881 self._narrowmatch = narrowmatch
1884 1882
1885 1883 # A cache of the manifestctx or treemanifestctx for each directory
1886 1884 self._dirmancache = {}
1887 1885 self._dirmancache[b''] = util.lrucachedict(cachesize)
1888 1886
1889 1887 self._cachesize = cachesize
1890 1888
1891 1889 def __getitem__(self, node):
1892 1890 """Retrieves the manifest instance for the given node. Throws a
1893 1891 LookupError if not found.
1894 1892 """
1895 1893 return self.get(b'', node)
1896 1894
1897 1895 def get(self, tree, node, verify=True):
1898 1896 """Retrieves the manifest instance for the given node. Throws a
1899 1897 LookupError if not found.
1900 1898
1901 1899 `verify` - if True an exception will be thrown if the node is not in
1902 1900 the revlog
1903 1901 """
1904 1902 if node in self._dirmancache.get(tree, ()):
1905 1903 return self._dirmancache[tree][node]
1906 1904
1907 1905 if not self._narrowmatch.always():
1908 1906 if not self._narrowmatch.visitdir(tree[:-1]):
1909 1907 return excludeddirmanifestctx(tree, node)
1910 1908 if tree:
1911 1909 if self._rootstore._treeondisk:
1912 1910 if verify:
1913 1911 # Side-effect is LookupError is raised if node doesn't
1914 1912 # exist.
1915 1913 self.getstorage(tree).rev(node)
1916 1914
1917 1915 m = treemanifestctx(self, tree, node)
1918 1916 else:
1919 1917 raise error.Abort(
1920 1918 _(
1921 1919 b"cannot ask for manifest directory '%s' in a flat "
1922 1920 b"manifest"
1923 1921 )
1924 1922 % tree
1925 1923 )
1926 1924 else:
1927 1925 if verify:
1928 1926 # Side-effect is LookupError is raised if node doesn't exist.
1929 1927 self._rootstore.rev(node)
1930 1928
1931 1929 if self._treemanifests:
1932 1930 m = treemanifestctx(self, b'', node)
1933 1931 else:
1934 1932 m = manifestctx(self, node)
1935 1933
1936 1934 if node != nullid:
1937 1935 mancache = self._dirmancache.get(tree)
1938 1936 if not mancache:
1939 1937 mancache = util.lrucachedict(self._cachesize)
1940 1938 self._dirmancache[tree] = mancache
1941 1939 mancache[node] = m
1942 1940 return m
1943 1941
1944 1942 def getstorage(self, tree):
1945 1943 return self._rootstore.dirlog(tree)
1946 1944
1947 1945 def clearcaches(self, clear_persisted_data=False):
1948 1946 self._dirmancache.clear()
1949 1947 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1950 1948
1951 1949 def rev(self, node):
1952 1950 return self._rootstore.rev(node)
1953 1951
1954 1952 def update_caches(self, transaction):
1955 1953 return self._rootstore._revlog.update_caches(transaction=transaction)
1956 1954
1957 1955
1958 1956 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1959 1957 class memmanifestctx(object):
1960 1958 def __init__(self, manifestlog):
1961 1959 self._manifestlog = manifestlog
1962 1960 self._manifestdict = manifestdict()
1963 1961
1964 1962 def _storage(self):
1965 1963 return self._manifestlog.getstorage(b'')
1966 1964
1967 1965 def copy(self):
1968 1966 memmf = memmanifestctx(self._manifestlog)
1969 1967 memmf._manifestdict = self.read().copy()
1970 1968 return memmf
1971 1969
1972 1970 def read(self):
1973 1971 return self._manifestdict
1974 1972
1975 1973 def write(self, transaction, link, p1, p2, added, removed, match=None):
1976 1974 return self._storage().add(
1977 1975 self._manifestdict,
1978 1976 transaction,
1979 1977 link,
1980 1978 p1,
1981 1979 p2,
1982 1980 added,
1983 1981 removed,
1984 1982 match=match,
1985 1983 )
1986 1984
1987 1985
1988 1986 @interfaceutil.implementer(repository.imanifestrevisionstored)
1989 1987 class manifestctx(object):
1990 1988 """A class representing a single revision of a manifest, including its
1991 1989 contents, its parent revs, and its linkrev.
1992 1990 """
1993 1991
1994 1992 def __init__(self, manifestlog, node):
1995 1993 self._manifestlog = manifestlog
1996 1994 self._data = None
1997 1995
1998 1996 self._node = node
1999 1997
2000 1998 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2001 1999 # but let's add it later when something needs it and we can load it
2002 2000 # lazily.
2003 2001 # self.p1, self.p2 = store.parents(node)
2004 2002 # rev = store.rev(node)
2005 2003 # self.linkrev = store.linkrev(rev)
2006 2004
2007 2005 def _storage(self):
2008 2006 return self._manifestlog.getstorage(b'')
2009 2007
2010 2008 def node(self):
2011 2009 return self._node
2012 2010
2013 2011 def copy(self):
2014 2012 memmf = memmanifestctx(self._manifestlog)
2015 2013 memmf._manifestdict = self.read().copy()
2016 2014 return memmf
2017 2015
2018 2016 @propertycache
2019 2017 def parents(self):
2020 2018 return self._storage().parents(self._node)
2021 2019
2022 2020 def read(self):
2023 2021 if self._data is None:
2024 2022 if self._node == nullid:
2025 2023 self._data = manifestdict()
2026 2024 else:
2027 2025 store = self._storage()
2028 2026 if self._node in store.fulltextcache:
2029 2027 text = pycompat.bytestr(store.fulltextcache[self._node])
2030 2028 else:
2031 2029 text = store.revision(self._node)
2032 2030 arraytext = bytearray(text)
2033 2031 store.fulltextcache[self._node] = arraytext
2034 2032 self._data = manifestdict(text)
2035 2033 return self._data
2036 2034
2037 2035 def readfast(self, shallow=False):
2038 2036 '''Calls either readdelta or read, based on which would be less work.
2039 2037 readdelta is called if the delta is against the p1, and therefore can be
2040 2038 read quickly.
2041 2039
2042 2040 If `shallow` is True, nothing changes since this is a flat manifest.
2043 2041 '''
2044 2042 store = self._storage()
2045 2043 r = store.rev(self._node)
2046 2044 deltaparent = store.deltaparent(r)
2047 2045 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2048 2046 return self.readdelta()
2049 2047 return self.read()
2050 2048
2051 2049 def readdelta(self, shallow=False):
2052 2050 '''Returns a manifest containing just the entries that are present
2053 2051 in this manifest, but not in its p1 manifest. This is efficient to read
2054 2052 if the revlog delta is already p1.
2055 2053
2056 2054 Changing the value of `shallow` has no effect on flat manifests.
2057 2055 '''
2058 2056 store = self._storage()
2059 2057 r = store.rev(self._node)
2060 2058 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2061 2059 return manifestdict(d)
2062 2060
2063 2061 def find(self, key):
2064 2062 return self.read().find(key)
2065 2063
2066 2064
2067 2065 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2068 2066 class memtreemanifestctx(object):
2069 2067 def __init__(self, manifestlog, dir=b''):
2070 2068 self._manifestlog = manifestlog
2071 2069 self._dir = dir
2072 2070 self._treemanifest = treemanifest()
2073 2071
2074 2072 def _storage(self):
2075 2073 return self._manifestlog.getstorage(b'')
2076 2074
2077 2075 def copy(self):
2078 2076 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2079 2077 memmf._treemanifest = self._treemanifest.copy()
2080 2078 return memmf
2081 2079
2082 2080 def read(self):
2083 2081 return self._treemanifest
2084 2082
2085 2083 def write(self, transaction, link, p1, p2, added, removed, match=None):
2086 2084 def readtree(dir, node):
2087 2085 return self._manifestlog.get(dir, node).read()
2088 2086
2089 2087 return self._storage().add(
2090 2088 self._treemanifest,
2091 2089 transaction,
2092 2090 link,
2093 2091 p1,
2094 2092 p2,
2095 2093 added,
2096 2094 removed,
2097 2095 readtree=readtree,
2098 2096 match=match,
2099 2097 )
2100 2098
2101 2099
2102 2100 @interfaceutil.implementer(repository.imanifestrevisionstored)
2103 2101 class treemanifestctx(object):
2104 2102 def __init__(self, manifestlog, dir, node):
2105 2103 self._manifestlog = manifestlog
2106 2104 self._dir = dir
2107 2105 self._data = None
2108 2106
2109 2107 self._node = node
2110 2108
2111 2109 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2112 2110 # we can instantiate treemanifestctx objects for directories we don't
2113 2111 # have on disk.
2114 2112 # self.p1, self.p2 = store.parents(node)
2115 2113 # rev = store.rev(node)
2116 2114 # self.linkrev = store.linkrev(rev)
2117 2115
2118 2116 def _storage(self):
2119 2117 narrowmatch = self._manifestlog._narrowmatch
2120 2118 if not narrowmatch.always():
2121 2119 if not narrowmatch.visitdir(self._dir[:-1]):
2122 2120 return excludedmanifestrevlog(self._dir)
2123 2121 return self._manifestlog.getstorage(self._dir)
2124 2122
2125 2123 def read(self):
2126 2124 if self._data is None:
2127 2125 store = self._storage()
2128 2126 if self._node == nullid:
2129 2127 self._data = treemanifest()
2130 2128 # TODO accessing non-public API
2131 2129 elif store._treeondisk:
2132 2130 m = treemanifest(dir=self._dir)
2133 2131
2134 2132 def gettext():
2135 2133 return store.revision(self._node)
2136 2134
2137 2135 def readsubtree(dir, subm):
2138 2136 # Set verify to False since we need to be able to create
2139 2137 # subtrees for trees that don't exist on disk.
2140 2138 return self._manifestlog.get(dir, subm, verify=False).read()
2141 2139
2142 2140 m.read(gettext, readsubtree)
2143 2141 m.setnode(self._node)
2144 2142 self._data = m
2145 2143 else:
2146 2144 if self._node in store.fulltextcache:
2147 2145 text = pycompat.bytestr(store.fulltextcache[self._node])
2148 2146 else:
2149 2147 text = store.revision(self._node)
2150 2148 arraytext = bytearray(text)
2151 2149 store.fulltextcache[self._node] = arraytext
2152 2150 self._data = treemanifest(dir=self._dir, text=text)
2153 2151
2154 2152 return self._data
2155 2153
2156 2154 def node(self):
2157 2155 return self._node
2158 2156
2159 2157 def copy(self):
2160 2158 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2161 2159 memmf._treemanifest = self.read().copy()
2162 2160 return memmf
2163 2161
2164 2162 @propertycache
2165 2163 def parents(self):
2166 2164 return self._storage().parents(self._node)
2167 2165
2168 2166 def readdelta(self, shallow=False):
2169 2167 '''Returns a manifest containing just the entries that are present
2170 2168 in this manifest, but not in its p1 manifest. This is efficient to read
2171 2169 if the revlog delta is already p1.
2172 2170
2173 2171 If `shallow` is True, this will read the delta for this directory,
2174 2172 without recursively reading subdirectory manifests. Instead, any
2175 2173 subdirectory entry will be reported as it appears in the manifest, i.e.
2176 2174 the subdirectory will be reported among files and distinguished only by
2177 2175 its 't' flag.
2178 2176 '''
2179 2177 store = self._storage()
2180 2178 if shallow:
2181 2179 r = store.rev(self._node)
2182 2180 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2183 2181 return manifestdict(d)
2184 2182 else:
2185 2183 # Need to perform a slow delta
2186 2184 r0 = store.deltaparent(store.rev(self._node))
2187 2185 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2188 2186 m1 = self.read()
2189 2187 md = treemanifest(dir=self._dir)
2190 2188 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2191 2189 if n1:
2192 2190 md[f] = n1
2193 2191 if fl1:
2194 2192 md.setflag(f, fl1)
2195 2193 return md
2196 2194
2197 2195 def readfast(self, shallow=False):
2198 2196 '''Calls either readdelta or read, based on which would be less work.
2199 2197 readdelta is called if the delta is against the p1, and therefore can be
2200 2198 read quickly.
2201 2199
2202 2200 If `shallow` is True, it only returns the entries from this manifest,
2203 2201 and not any submanifests.
2204 2202 '''
2205 2203 store = self._storage()
2206 2204 r = store.rev(self._node)
2207 2205 deltaparent = store.deltaparent(r)
2208 2206 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2209 2207 return self.readdelta(shallow=shallow)
2210 2208
2211 2209 if shallow:
2212 2210 return manifestdict(store.revision(self._node))
2213 2211 else:
2214 2212 return self.read()
2215 2213
2216 2214 def find(self, key):
2217 2215 return self.read().find(key)
2218 2216
2219 2217
2220 2218 class excludeddir(treemanifest):
2221 2219 """Stand-in for a directory that is excluded from the repository.
2222 2220
2223 2221 With narrowing active on a repository that uses treemanifests,
2224 2222 some of the directory revlogs will be excluded from the resulting
2225 2223 clone. This is a huge storage win for clients, but means we need
2226 2224 some sort of pseudo-manifest to surface to internals so we can
2227 2225 detect a merge conflict outside the narrowspec. That's what this
2228 2226 class is: it stands in for a directory whose node is known, but
2229 2227 whose contents are unknown.
2230 2228 """
2231 2229
2232 2230 def __init__(self, dir, node):
2233 2231 super(excludeddir, self).__init__(dir)
2234 2232 self._node = node
2235 2233 # Add an empty file, which will be included by iterators and such,
2236 2234 # appearing as the directory itself (i.e. something like "dir/")
2237 2235 self._files[b''] = node
2238 2236 self._flags[b''] = b't'
2239 2237
2240 2238 # Manifests outside the narrowspec should never be modified, so avoid
2241 2239 # copying. This makes a noticeable difference when there are very many
2242 2240 # directories outside the narrowspec. Also, it makes sense for the copy to
2243 2241 # be of the same type as the original, which would not happen with the
2244 2242 # super type's copy().
2245 2243 def copy(self):
2246 2244 return self
2247 2245
2248 2246
2249 2247 class excludeddirmanifestctx(treemanifestctx):
2250 2248 """context wrapper for excludeddir - see that docstring for rationale"""
2251 2249
2252 2250 def __init__(self, dir, node):
2253 2251 self._dir = dir
2254 2252 self._node = node
2255 2253
2256 2254 def read(self):
2257 2255 return excludeddir(self._dir, self._node)
2258 2256
2259 2257 def write(self, *args):
2260 2258 raise error.ProgrammingError(
2261 2259 b'attempt to write manifest from excluded dir %s' % self._dir
2262 2260 )
2263 2261
2264 2262
2265 2263 class excludedmanifestrevlog(manifestrevlog):
2266 2264 """Stand-in for excluded treemanifest revlogs.
2267 2265
2268 2266 When narrowing is active on a treemanifest repository, we'll have
2269 2267 references to directories we can't see due to the revlog being
2270 2268 skipped. This class exists to conform to the manifestrevlog
2271 2269 interface for those directories and proactively prevent writes to
2272 2270 outside the narrowspec.
2273 2271 """
2274 2272
2275 2273 def __init__(self, dir):
2276 2274 self._dir = dir
2277 2275
2278 2276 def __len__(self):
2279 2277 raise error.ProgrammingError(
2280 2278 b'attempt to get length of excluded dir %s' % self._dir
2281 2279 )
2282 2280
2283 2281 def rev(self, node):
2284 2282 raise error.ProgrammingError(
2285 2283 b'attempt to get rev from excluded dir %s' % self._dir
2286 2284 )
2287 2285
2288 2286 def linkrev(self, node):
2289 2287 raise error.ProgrammingError(
2290 2288 b'attempt to get linkrev from excluded dir %s' % self._dir
2291 2289 )
2292 2290
2293 2291 def node(self, rev):
2294 2292 raise error.ProgrammingError(
2295 2293 b'attempt to get node from excluded dir %s' % self._dir
2296 2294 )
2297 2295
2298 2296 def add(self, *args, **kwargs):
2299 2297 # We should never write entries in dirlogs outside the narrow clone.
2300 2298 # However, the method still gets called from writesubtree() in
2301 2299 # _addtree(), so we need to handle it. We should possibly make that
2302 2300 # avoid calling add() with a clean manifest (_dirty is always False
2303 2301 # in excludeddir instances).
2304 2302 pass
@@ -1,644 +1,644 b''
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import os
13 13 import re
14 14 import struct
15 15
16 16 from ..i18n import _
17 17
18 18 from .. import (
19 19 error,
20 20 node as nodemod,
21 21 util,
22 22 )
23 23
24 24
25 25 class NodeMap(dict):
26 26 def __missing__(self, x):
27 27 raise error.RevlogError(b'unknown node: %s' % x)
28 28
29 29
30 30 def persisted_data(revlog):
31 31 """read the nodemap for a revlog from disk"""
32 32 if revlog.nodemap_file is None:
33 33 return None
34 34 pdata = revlog.opener.tryread(revlog.nodemap_file)
35 35 if not pdata:
36 36 return None
37 37 offset = 0
38 38 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
39 39 if version != ONDISK_VERSION:
40 40 return None
41 41 offset += S_VERSION.size
42 42 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
43 43 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
44 44 offset += S_HEADER.size
45 45 docket = NodeMapDocket(pdata[offset : offset + uid_size])
46 46 offset += uid_size
47 47 docket.tip_rev = tip_rev
48 48 docket.tip_node = pdata[offset : offset + tip_node_size]
49 49 docket.data_length = data_length
50 50 docket.data_unused = data_unused
51 51
52 52 filename = _rawdata_filepath(revlog, docket)
53 use_mmap = revlog.opener.options.get(b"exp-persistent-nodemap.mmap")
53 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
54 54 try:
55 55 with revlog.opener(filename) as fd:
56 56 if use_mmap:
57 57 data = util.buffer(util.mmapread(fd, data_length))
58 58 else:
59 59 data = fd.read(data_length)
60 60 except OSError as e:
61 61 if e.errno != errno.ENOENT:
62 62 raise
63 63 if len(data) < data_length:
64 64 return None
65 65 return docket, data
66 66
67 67
68 68 def setup_persistent_nodemap(tr, revlog):
69 69 """Install whatever is needed transaction side to persist a nodemap on disk
70 70
71 71 (only actually persist the nodemap if this is relevant for this revlog)
72 72 """
73 73 if revlog._inline:
74 74 return # inlined revlog are too small for this to be relevant
75 75 if revlog.nodemap_file is None:
76 76 return # we do not use persistent_nodemap on this revlog
77 77
78 78 # we need to happen after the changelog finalization, in that use "cl-"
79 79 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
80 80 if tr.hasfinalize(callback_id):
81 81 return # no need to register again
82 82 tr.addpending(
83 83 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
84 84 )
85 85 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
86 86
87 87
88 88 class _NoTransaction(object):
89 89 """transaction like object to update the nodemap outside a transaction
90 90 """
91 91
92 92 def __init__(self):
93 93 self._postclose = {}
94 94
95 95 def addpostclose(self, callback_id, callback_func):
96 96 self._postclose[callback_id] = callback_func
97 97
98 98 def registertmp(self, *args, **kwargs):
99 99 pass
100 100
101 101 def addbackup(self, *args, **kwargs):
102 102 pass
103 103
104 104 def add(self, *args, **kwargs):
105 105 pass
106 106
107 107 def addabort(self, *args, **kwargs):
108 108 pass
109 109
110 110 def _report(self, *args):
111 111 pass
112 112
113 113
114 114 def update_persistent_nodemap(revlog):
115 115 """update the persistent nodemap right now
116 116
117 117 To be used for updating the nodemap on disk outside of a normal transaction
118 118 setup (eg, `debugupdatecache`).
119 119 """
120 120 if revlog._inline:
121 121 return # inlined revlog are too small for this to be relevant
122 122 if revlog.nodemap_file is None:
123 123 return # we do not use persistent_nodemap on this revlog
124 124
125 125 notr = _NoTransaction()
126 126 _persist_nodemap(notr, revlog)
127 127 for k in sorted(notr._postclose):
128 128 notr._postclose[k](None)
129 129
130 130
131 131 def _persist_nodemap(tr, revlog, pending=False):
132 132 """Write nodemap data on disk for a given revlog
133 133 """
134 134 if getattr(revlog, 'filteredrevs', ()):
135 135 raise error.ProgrammingError(
136 136 "cannot persist nodemap of a filtered changelog"
137 137 )
138 138 if revlog.nodemap_file is None:
139 139 msg = "calling persist nodemap on a revlog without the feature enableb"
140 140 raise error.ProgrammingError(msg)
141 141
142 142 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
143 143 ondisk_docket = revlog._nodemap_docket
144 144 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
145 use_mmap = revlog.opener.options.get(b"exp-persistent-nodemap.mmap")
146 mode = revlog.opener.options.get(b"exp-persistent-nodemap.mode")
145 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
146 mode = revlog.opener.options.get(b"persistent-nodemap.mode")
147 147 if not can_incremental:
148 148 msg = _(b"persistent nodemap in strict mode without efficient method")
149 149 if mode == b'warn':
150 150 tr._report(b"%s\n" % msg)
151 151 elif mode == b'strict':
152 152 raise error.Abort(msg)
153 153
154 154 data = None
155 155 # first attemp an incremental update of the data
156 156 if can_incremental and ondisk_docket is not None:
157 157 target_docket = revlog._nodemap_docket.copy()
158 158 (
159 159 src_docket,
160 160 data_changed_count,
161 161 data,
162 162 ) = revlog.index.nodemap_data_incremental()
163 163 new_length = target_docket.data_length + len(data)
164 164 new_unused = target_docket.data_unused + data_changed_count
165 165 if src_docket != target_docket:
166 166 data = None
167 167 elif new_length <= (new_unused * 10): # under 10% of unused data
168 168 data = None
169 169 else:
170 170 datafile = _rawdata_filepath(revlog, target_docket)
171 171 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
172 172 # store vfs
173 173 tr.add(datafile, target_docket.data_length)
174 174 with revlog.opener(datafile, b'r+') as fd:
175 175 fd.seek(target_docket.data_length)
176 176 fd.write(data)
177 177 if feed_data:
178 178 if use_mmap:
179 179 fd.seek(0)
180 180 new_data = fd.read(new_length)
181 181 else:
182 182 fd.flush()
183 183 new_data = util.buffer(util.mmapread(fd, new_length))
184 184 target_docket.data_length = new_length
185 185 target_docket.data_unused = new_unused
186 186
187 187 if data is None:
188 188 # otherwise fallback to a full new export
189 189 target_docket = NodeMapDocket()
190 190 datafile = _rawdata_filepath(revlog, target_docket)
191 191 if util.safehasattr(revlog.index, "nodemap_data_all"):
192 192 data = revlog.index.nodemap_data_all()
193 193 else:
194 194 data = persistent_data(revlog.index)
195 195 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
196 196 # store vfs
197 197
198 198 tryunlink = revlog.opener.tryunlink
199 199
200 200 def abortck(tr):
201 201 tryunlink(datafile)
202 202
203 203 callback_id = b"delete-%s" % datafile
204 204
205 205 # some flavor of the transaction abort does not cleanup new file, it
206 206 # simply empty them.
207 207 tr.addabort(callback_id, abortck)
208 208 with revlog.opener(datafile, b'w+') as fd:
209 209 fd.write(data)
210 210 if feed_data:
211 211 if use_mmap:
212 212 new_data = data
213 213 else:
214 214 fd.flush()
215 215 new_data = util.buffer(util.mmapread(fd, len(data)))
216 216 target_docket.data_length = len(data)
217 217 target_docket.tip_rev = revlog.tiprev()
218 218 target_docket.tip_node = revlog.node(target_docket.tip_rev)
219 219 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
220 220 # store vfs
221 221 file_path = revlog.nodemap_file
222 222 if pending:
223 223 file_path += b'.a'
224 224 tr.registertmp(file_path)
225 225 else:
226 226 tr.addbackup(file_path)
227 227
228 228 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
229 229 fp.write(target_docket.serialize())
230 230 revlog._nodemap_docket = target_docket
231 231 if feed_data:
232 232 revlog.index.update_nodemap_data(target_docket, new_data)
233 233
234 234 # search for old index file in all cases, some older process might have
235 235 # left one behind.
236 236 olds = _other_rawdata_filepath(revlog, target_docket)
237 237 if olds:
238 238 realvfs = getattr(revlog, '_realopener', revlog.opener)
239 239
240 240 def cleanup(tr):
241 241 for oldfile in olds:
242 242 realvfs.tryunlink(oldfile)
243 243
244 244 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
245 245 tr.addpostclose(callback_id, cleanup)
246 246
247 247
248 248 ### Nodemap docket file
249 249 #
250 250 # The nodemap data are stored on disk using 2 files:
251 251 #
252 252 # * a raw data files containing a persistent nodemap
253 253 # (see `Nodemap Trie` section)
254 254 #
255 255 # * a small "docket" file containing medatadata
256 256 #
257 257 # While the nodemap data can be multiple tens of megabytes, the "docket" is
258 258 # small, it is easy to update it automatically or to duplicated its content
259 259 # during a transaction.
260 260 #
261 261 # Multiple raw data can exist at the same time (The currently valid one and a
262 262 # new one beind used by an in progress transaction). To accomodate this, the
263 263 # filename hosting the raw data has a variable parts. The exact filename is
264 264 # specified inside the "docket" file.
265 265 #
266 266 # The docket file contains information to find, qualify and validate the raw
267 267 # data. Its content is currently very light, but it will expand as the on disk
268 268 # nodemap gains the necessary features to be used in production.
269 269
270 270 ONDISK_VERSION = 1
271 271 S_VERSION = struct.Struct(">B")
272 272 S_HEADER = struct.Struct(">BQQQQ")
273 273
274 274 ID_SIZE = 8
275 275
276 276
277 277 def _make_uid():
278 278 """return a new unique identifier.
279 279
280 280 The identifier is random and composed of ascii characters."""
281 281 return nodemod.hex(os.urandom(ID_SIZE))
282 282
283 283
284 284 class NodeMapDocket(object):
285 285 """metadata associated with persistent nodemap data
286 286
287 287 The persistent data may come from disk or be on their way to disk.
288 288 """
289 289
290 290 def __init__(self, uid=None):
291 291 if uid is None:
292 292 uid = _make_uid()
293 293 # a unique identifier for the data file:
294 294 # - When new data are appended, it is preserved.
295 295 # - When a new data file is created, a new identifier is generated.
296 296 self.uid = uid
297 297 # the tipmost revision stored in the data file. This revision and all
298 298 # revision before it are expected to be encoded in the data file.
299 299 self.tip_rev = None
300 300 # the node of that tipmost revision, if it mismatch the current index
301 301 # data the docket is not valid for the current index and should be
302 302 # discarded.
303 303 #
304 304 # note: this method is not perfect as some destructive operation could
305 305 # preserve the same tip_rev + tip_node while altering lower revision.
306 306 # However this multiple other caches have the same vulnerability (eg:
307 307 # brancmap cache).
308 308 self.tip_node = None
309 309 # the size (in bytes) of the persisted data to encode the nodemap valid
310 310 # for `tip_rev`.
311 311 # - data file shorter than this are corrupted,
312 312 # - any extra data should be ignored.
313 313 self.data_length = None
314 314 # the amount (in bytes) of "dead" data, still in the data file but no
315 315 # longer used for the nodemap.
316 316 self.data_unused = 0
317 317
318 318 def copy(self):
319 319 new = NodeMapDocket(uid=self.uid)
320 320 new.tip_rev = self.tip_rev
321 321 new.tip_node = self.tip_node
322 322 new.data_length = self.data_length
323 323 new.data_unused = self.data_unused
324 324 return new
325 325
326 326 def __cmp__(self, other):
327 327 if self.uid < other.uid:
328 328 return -1
329 329 if self.uid > other.uid:
330 330 return 1
331 331 elif self.data_length < other.data_length:
332 332 return -1
333 333 elif self.data_length > other.data_length:
334 334 return 1
335 335 return 0
336 336
337 337 def __eq__(self, other):
338 338 return self.uid == other.uid and self.data_length == other.data_length
339 339
340 340 def serialize(self):
341 341 """return serialized bytes for a docket using the passed uid"""
342 342 data = []
343 343 data.append(S_VERSION.pack(ONDISK_VERSION))
344 344 headers = (
345 345 len(self.uid),
346 346 self.tip_rev,
347 347 self.data_length,
348 348 self.data_unused,
349 349 len(self.tip_node),
350 350 )
351 351 data.append(S_HEADER.pack(*headers))
352 352 data.append(self.uid)
353 353 data.append(self.tip_node)
354 354 return b''.join(data)
355 355
356 356
357 357 def _rawdata_filepath(revlog, docket):
358 358 """The (vfs relative) nodemap's rawdata file for a given uid"""
359 359 if revlog.nodemap_file.endswith(b'.n.a'):
360 360 prefix = revlog.nodemap_file[:-4]
361 361 else:
362 362 prefix = revlog.nodemap_file[:-2]
363 363 return b"%s-%s.nd" % (prefix, docket.uid)
364 364
365 365
366 366 def _other_rawdata_filepath(revlog, docket):
367 367 prefix = revlog.nodemap_file[:-2]
368 368 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
369 369 new_file_path = _rawdata_filepath(revlog, docket)
370 370 new_file_name = revlog.opener.basename(new_file_path)
371 371 dirpath = revlog.opener.dirname(new_file_path)
372 372 others = []
373 373 for f in revlog.opener.listdir(dirpath):
374 374 if pattern.match(f) and f != new_file_name:
375 375 others.append(f)
376 376 return others
377 377
378 378
379 379 ### Nodemap Trie
380 380 #
381 381 # This is a simple reference implementation to compute and persist a nodemap
382 382 # trie. This reference implementation is write only. The python version of this
383 383 # is not expected to be actually used, since it wont provide performance
384 384 # improvement over existing non-persistent C implementation.
385 385 #
386 386 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
387 387 # revision can be adressed using its node shortest prefix.
388 388 #
389 389 # The trie is stored as a sequence of block. Each block contains 16 entries
390 390 # (signed 64bit integer, big endian). Each entry can be one of the following:
391 391 #
392 392 # * value >= 0 -> index of sub-block
393 393 # * value == -1 -> no value
394 394 # * value < -1 -> a revision value: rev = -(value+10)
395 395 #
396 396 # The implementation focus on simplicity, not on performance. A Rust
397 397 # implementation should provide a efficient version of the same binary
398 398 # persistence. This reference python implementation is never meant to be
399 399 # extensively use in production.
400 400
401 401
402 402 def persistent_data(index):
403 403 """return the persistent binary form for a nodemap for a given index
404 404 """
405 405 trie = _build_trie(index)
406 406 return _persist_trie(trie)
407 407
408 408
409 409 def update_persistent_data(index, root, max_idx, last_rev):
410 410 """return the incremental update for persistent nodemap from a given index
411 411 """
412 412 changed_block, trie = _update_trie(index, root, last_rev)
413 413 return (
414 414 changed_block * S_BLOCK.size,
415 415 _persist_trie(trie, existing_idx=max_idx),
416 416 )
417 417
418 418
419 419 S_BLOCK = struct.Struct(">" + ("l" * 16))
420 420
421 421 NO_ENTRY = -1
422 422 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
423 423 REV_OFFSET = 2
424 424
425 425
426 426 def _transform_rev(rev):
427 427 """Return the number used to represent the rev in the tree.
428 428
429 429 (or retrieve a rev number from such representation)
430 430
431 431 Note that this is an involution, a function equal to its inverse (i.e.
432 432 which gives the identity when applied to itself).
433 433 """
434 434 return -(rev + REV_OFFSET)
435 435
436 436
437 437 def _to_int(hex_digit):
438 438 """turn an hexadecimal digit into a proper integer"""
439 439 return int(hex_digit, 16)
440 440
441 441
442 442 class Block(dict):
443 443 """represent a block of the Trie
444 444
445 445 contains up to 16 entry indexed from 0 to 15"""
446 446
447 447 def __init__(self):
448 448 super(Block, self).__init__()
449 449 # If this block exist on disk, here is its ID
450 450 self.ondisk_id = None
451 451
452 452 def __iter__(self):
453 453 return iter(self.get(i) for i in range(16))
454 454
455 455
456 456 def _build_trie(index):
457 457 """build a nodemap trie
458 458
459 459 The nodemap stores revision number for each unique prefix.
460 460
461 461 Each block is a dictionary with keys in `[0, 15]`. Values are either
462 462 another block or a revision number.
463 463 """
464 464 root = Block()
465 465 for rev in range(len(index)):
466 466 hex = nodemod.hex(index[rev][7])
467 467 _insert_into_block(index, 0, root, rev, hex)
468 468 return root
469 469
470 470
471 471 def _update_trie(index, root, last_rev):
472 472 """consume"""
473 473 changed = 0
474 474 for rev in range(last_rev + 1, len(index)):
475 475 hex = nodemod.hex(index[rev][7])
476 476 changed += _insert_into_block(index, 0, root, rev, hex)
477 477 return changed, root
478 478
479 479
480 480 def _insert_into_block(index, level, block, current_rev, current_hex):
481 481 """insert a new revision in a block
482 482
483 483 index: the index we are adding revision for
484 484 level: the depth of the current block in the trie
485 485 block: the block currently being considered
486 486 current_rev: the revision number we are adding
487 487 current_hex: the hexadecimal representation of the of that revision
488 488 """
489 489 changed = 1
490 490 if block.ondisk_id is not None:
491 491 block.ondisk_id = None
492 492 hex_digit = _to_int(current_hex[level : level + 1])
493 493 entry = block.get(hex_digit)
494 494 if entry is None:
495 495 # no entry, simply store the revision number
496 496 block[hex_digit] = current_rev
497 497 elif isinstance(entry, dict):
498 498 # need to recurse to an underlying block
499 499 changed += _insert_into_block(
500 500 index, level + 1, entry, current_rev, current_hex
501 501 )
502 502 else:
503 503 # collision with a previously unique prefix, inserting new
504 504 # vertices to fit both entry.
505 505 other_hex = nodemod.hex(index[entry][7])
506 506 other_rev = entry
507 507 new = Block()
508 508 block[hex_digit] = new
509 509 _insert_into_block(index, level + 1, new, other_rev, other_hex)
510 510 _insert_into_block(index, level + 1, new, current_rev, current_hex)
511 511 return changed
512 512
513 513
514 514 def _persist_trie(root, existing_idx=None):
515 515 """turn a nodemap trie into persistent binary data
516 516
517 517 See `_build_trie` for nodemap trie structure"""
518 518 block_map = {}
519 519 if existing_idx is not None:
520 520 base_idx = existing_idx + 1
521 521 else:
522 522 base_idx = 0
523 523 chunks = []
524 524 for tn in _walk_trie(root):
525 525 if tn.ondisk_id is not None:
526 526 block_map[id(tn)] = tn.ondisk_id
527 527 else:
528 528 block_map[id(tn)] = len(chunks) + base_idx
529 529 chunks.append(_persist_block(tn, block_map))
530 530 return b''.join(chunks)
531 531
532 532
533 533 def _walk_trie(block):
534 534 """yield all the block in a trie
535 535
536 536 Children blocks are always yield before their parent block.
537 537 """
538 538 for (__, item) in sorted(block.items()):
539 539 if isinstance(item, dict):
540 540 for sub_block in _walk_trie(item):
541 541 yield sub_block
542 542 yield block
543 543
544 544
545 545 def _persist_block(block_node, block_map):
546 546 """produce persistent binary data for a single block
547 547
548 548 Children block are assumed to be already persisted and present in
549 549 block_map.
550 550 """
551 551 data = tuple(_to_value(v, block_map) for v in block_node)
552 552 return S_BLOCK.pack(*data)
553 553
554 554
555 555 def _to_value(item, block_map):
556 556 """persist any value as an integer"""
557 557 if item is None:
558 558 return NO_ENTRY
559 559 elif isinstance(item, dict):
560 560 return block_map[id(item)]
561 561 else:
562 562 return _transform_rev(item)
563 563
564 564
565 565 def parse_data(data):
566 566 """parse parse nodemap data into a nodemap Trie"""
567 567 if (len(data) % S_BLOCK.size) != 0:
568 568 msg = "nodemap data size is not a multiple of block size (%d): %d"
569 569 raise error.Abort(msg % (S_BLOCK.size, len(data)))
570 570 if not data:
571 571 return Block(), None
572 572 block_map = {}
573 573 new_blocks = []
574 574 for i in range(0, len(data), S_BLOCK.size):
575 575 block = Block()
576 576 block.ondisk_id = len(block_map)
577 577 block_map[block.ondisk_id] = block
578 578 block_data = data[i : i + S_BLOCK.size]
579 579 values = S_BLOCK.unpack(block_data)
580 580 new_blocks.append((block, values))
581 581 for b, values in new_blocks:
582 582 for idx, v in enumerate(values):
583 583 if v == NO_ENTRY:
584 584 continue
585 585 elif v >= 0:
586 586 b[idx] = block_map[v]
587 587 else:
588 588 b[idx] = _transform_rev(v)
589 589 return block, i // S_BLOCK.size
590 590
591 591
592 592 # debug utility
593 593
594 594
595 595 def check_data(ui, index, data):
596 596 """verify that the provided nodemap data are valid for the given idex"""
597 597 ret = 0
598 598 ui.status((b"revision in index: %d\n") % len(index))
599 599 root, __ = parse_data(data)
600 600 all_revs = set(_all_revisions(root))
601 601 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
602 602 for r in range(len(index)):
603 603 if r not in all_revs:
604 604 msg = b" revision missing from nodemap: %d\n" % r
605 605 ui.write_err(msg)
606 606 ret = 1
607 607 else:
608 608 all_revs.remove(r)
609 609 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
610 610 if nm_rev is None:
611 611 msg = b" revision node does not match any entries: %d\n" % r
612 612 ui.write_err(msg)
613 613 ret = 1
614 614 elif nm_rev != r:
615 615 msg = (
616 616 b" revision node does not match the expected revision: "
617 617 b"%d != %d\n" % (r, nm_rev)
618 618 )
619 619 ui.write_err(msg)
620 620 ret = 1
621 621
622 622 if all_revs:
623 623 for r in sorted(all_revs):
624 624 msg = b" extra revision in nodemap: %d\n" % r
625 625 ui.write_err(msg)
626 626 ret = 1
627 627 return ret
628 628
629 629
630 630 def _all_revisions(root):
631 631 """return all revisions stored in a Trie"""
632 632 for block in _walk_trie(root):
633 633 for v in block:
634 634 if v is None or isinstance(v, Block):
635 635 continue
636 636 yield v
637 637
638 638
639 639 def _find_node(block, node):
640 640 """find the revision associated with a given node"""
641 641 entry = block.get(_to_int(node[0:1]))
642 642 if isinstance(entry, dict):
643 643 return _find_node(entry, node[1:])
644 644 return entry
General Comments 0
You need to be logged in to leave comments. Login now