##// END OF EJS Templates
revlog: introduce a mandatory `_writing` context to update revlog content...
marmoute -
r47988:906a7bca default
parent child Browse files
Show More
@@ -1,625 +1,625 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 28 from .revlogutils import (
29 29 constants as revlog_constants,
30 30 flagutil,
31 31 )
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 87 return b"\0".join(items)
88 88
89 89
90 90 def stripdesc(desc):
91 91 """strip trailing whitespace and leading and trailing empty lines"""
92 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93 93
94 94
95 95 class appender(object):
96 96 """the changelog index must be updated last on disk, so we use this class
97 97 to delay writes to it"""
98 98
99 99 def __init__(self, vfs, name, mode, buf):
100 100 self.data = buf
101 101 fp = vfs(name, mode)
102 102 self.fp = fp
103 103 self.offset = fp.tell()
104 104 self.size = vfs.fstat(fp).st_size
105 105 self._end = self.size
106 106
107 107 def end(self):
108 108 return self._end
109 109
110 110 def tell(self):
111 111 return self.offset
112 112
113 113 def flush(self):
114 114 pass
115 115
116 116 @property
117 117 def closed(self):
118 118 return self.fp.closed
119 119
120 120 def close(self):
121 121 self.fp.close()
122 122
123 123 def seek(self, offset, whence=0):
124 124 '''virtual file offset spans real file and data'''
125 125 if whence == 0:
126 126 self.offset = offset
127 127 elif whence == 1:
128 128 self.offset += offset
129 129 elif whence == 2:
130 130 self.offset = self.end() + offset
131 131 if self.offset < self.size:
132 132 self.fp.seek(self.offset)
133 133
134 134 def read(self, count=-1):
135 135 '''only trick here is reads that span real file and data'''
136 136 ret = b""
137 137 if self.offset < self.size:
138 138 s = self.fp.read(count)
139 139 ret = s
140 140 self.offset += len(s)
141 141 if count > 0:
142 142 count -= len(s)
143 143 if count != 0:
144 144 doff = self.offset - self.size
145 145 self.data.insert(0, b"".join(self.data))
146 146 del self.data[1:]
147 147 s = self.data[0][doff : doff + count]
148 148 self.offset += len(s)
149 149 ret += s
150 150 return ret
151 151
152 152 def write(self, s):
153 153 self.data.append(bytes(s))
154 154 self.offset += len(s)
155 155 self._end += len(s)
156 156
157 157 def __enter__(self):
158 158 self.fp.__enter__()
159 159 return self
160 160
161 161 def __exit__(self, *args):
162 162 return self.fp.__exit__(*args)
163 163
164 164
165 165 class _divertopener(object):
166 166 def __init__(self, opener, target):
167 167 self._opener = opener
168 168 self._target = target
169 169
170 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 171 if name != self._target:
172 172 return self._opener(name, mode, **kwargs)
173 173 return self._opener(name + b".a", mode, **kwargs)
174 174
175 175 def __getattr__(self, attr):
176 176 return getattr(self._opener, attr)
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 183 if name != target:
184 184 return opener(name, mode, **kwargs)
185 185 assert not kwargs
186 186 return appender(opener, name, mode, buf)
187 187
188 188 return _delay
189 189
190 190
191 191 @attr.s
192 192 class _changelogrevision(object):
193 193 # Extensions might modify _defaultextra, so let the constructor below pass
194 194 # it in
195 195 extra = attr.ib()
196 196 manifest = attr.ib()
197 197 user = attr.ib(default=b'')
198 198 date = attr.ib(default=(0, 0))
199 199 files = attr.ib(default=attr.Factory(list))
200 200 filesadded = attr.ib(default=None)
201 201 filesremoved = attr.ib(default=None)
202 202 p1copies = attr.ib(default=None)
203 203 p2copies = attr.ib(default=None)
204 204 description = attr.ib(default=b'')
205 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206 206
207 207
208 208 class changelogrevision(object):
209 209 """Holds results of a parsed changelog revision.
210 210
211 211 Changelog revisions consist of multiple pieces of data, including
212 212 the manifest node, user, and date. This object exposes a view into
213 213 the parsed object.
214 214 """
215 215
216 216 __slots__ = (
217 217 '_offsets',
218 218 '_text',
219 219 '_sidedata',
220 220 '_cpsd',
221 221 '_changes',
222 222 )
223 223
224 224 def __new__(cls, cl, text, sidedata, cpsd):
225 225 if not text:
226 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227 227
228 228 self = super(changelogrevision, cls).__new__(cls)
229 229 # We could return here and implement the following as an __init__.
230 230 # But doing it here is equivalent and saves an extra function call.
231 231
232 232 # format used:
233 233 # nodeid\n : manifest node in ascii
234 234 # user\n : user, no \n or \r allowed
235 235 # time tz extra\n : date (time is int or float, timezone is int)
236 236 # : extra is metadata, encoded and separated by '\0'
237 237 # : older versions ignore it
238 238 # files\n\n : files modified by the cset, no \n or \r allowed
239 239 # (.*) : comment (free text, ideally utf-8)
240 240 #
241 241 # changelog v0 doesn't use extra
242 242
243 243 nl1 = text.index(b'\n')
244 244 nl2 = text.index(b'\n', nl1 + 1)
245 245 nl3 = text.index(b'\n', nl2 + 1)
246 246
247 247 # The list of files may be empty. Which means nl3 is the first of the
248 248 # double newline that precedes the description.
249 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 250 doublenl = nl3
251 251 else:
252 252 doublenl = text.index(b'\n\n', nl3 + 1)
253 253
254 254 self._offsets = (nl1, nl2, nl3, doublenl)
255 255 self._text = text
256 256 self._sidedata = sidedata
257 257 self._cpsd = cpsd
258 258 self._changes = None
259 259
260 260 return self
261 261
262 262 @property
263 263 def manifest(self):
264 264 return bin(self._text[0 : self._offsets[0]])
265 265
266 266 @property
267 267 def user(self):
268 268 off = self._offsets
269 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270 270
271 271 @property
272 272 def _rawdate(self):
273 273 off = self._offsets
274 274 dateextra = self._text[off[1] + 1 : off[2]]
275 275 return dateextra.split(b' ', 2)[0:2]
276 276
277 277 @property
278 278 def _rawextra(self):
279 279 off = self._offsets
280 280 dateextra = self._text[off[1] + 1 : off[2]]
281 281 fields = dateextra.split(b' ', 2)
282 282 if len(fields) != 3:
283 283 return None
284 284
285 285 return fields[2]
286 286
287 287 @property
288 288 def date(self):
289 289 raw = self._rawdate
290 290 time = float(raw[0])
291 291 # Various tools did silly things with the timezone.
292 292 try:
293 293 timezone = int(raw[1])
294 294 except ValueError:
295 295 timezone = 0
296 296
297 297 return time, timezone
298 298
299 299 @property
300 300 def extra(self):
301 301 raw = self._rawextra
302 302 if raw is None:
303 303 return _defaultextra
304 304
305 305 return decodeextra(raw)
306 306
307 307 @property
308 308 def changes(self):
309 309 if self._changes is not None:
310 310 return self._changes
311 311 if self._cpsd:
312 312 changes = metadata.decode_files_sidedata(self._sidedata)
313 313 else:
314 314 changes = metadata.ChangingFiles(
315 315 touched=self.files or (),
316 316 added=self.filesadded or (),
317 317 removed=self.filesremoved or (),
318 318 p1_copies=self.p1copies or {},
319 319 p2_copies=self.p2copies or {},
320 320 )
321 321 self._changes = changes
322 322 return changes
323 323
324 324 @property
325 325 def files(self):
326 326 if self._cpsd:
327 327 return sorted(self.changes.touched)
328 328 off = self._offsets
329 329 if off[2] == off[3]:
330 330 return []
331 331
332 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333 333
334 334 @property
335 335 def filesadded(self):
336 336 if self._cpsd:
337 337 return self.changes.added
338 338 else:
339 339 rawindices = self.extra.get(b'filesadded')
340 340 if rawindices is None:
341 341 return None
342 342 return metadata.decodefileindices(self.files, rawindices)
343 343
344 344 @property
345 345 def filesremoved(self):
346 346 if self._cpsd:
347 347 return self.changes.removed
348 348 else:
349 349 rawindices = self.extra.get(b'filesremoved')
350 350 if rawindices is None:
351 351 return None
352 352 return metadata.decodefileindices(self.files, rawindices)
353 353
354 354 @property
355 355 def p1copies(self):
356 356 if self._cpsd:
357 357 return self.changes.copied_from_p1
358 358 else:
359 359 rawcopies = self.extra.get(b'p1copies')
360 360 if rawcopies is None:
361 361 return None
362 362 return metadata.decodecopies(self.files, rawcopies)
363 363
364 364 @property
365 365 def p2copies(self):
366 366 if self._cpsd:
367 367 return self.changes.copied_from_p2
368 368 else:
369 369 rawcopies = self.extra.get(b'p2copies')
370 370 if rawcopies is None:
371 371 return None
372 372 return metadata.decodecopies(self.files, rawcopies)
373 373
374 374 @property
375 375 def description(self):
376 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 377
378 378 @property
379 379 def branchinfo(self):
380 380 extra = self.extra
381 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382 382
383 383
384 384 class changelog(revlog.revlog):
385 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 386 """Load a changelog revlog using an opener.
387 387
388 388 If ``trypending`` is true, we attempt to load the index from a
389 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 390 The ``00changelog.i.a`` file contains index (and possibly inline
391 391 revision) data for a transaction that hasn't been finalized yet.
392 392 It exists in a separate file to facilitate readers (such as
393 393 hooks processes) accessing data before a transaction is finalized.
394 394
395 395 ``concurrencychecker`` will be passed to the revlog init function, see
396 396 the documentation there.
397 397 """
398 398
399 399 if trypending and opener.exists(b'00changelog.i.a'):
400 400 postfix = b'a'
401 401 else:
402 402 postfix = None
403 403
404 404 revlog.revlog.__init__(
405 405 self,
406 406 opener,
407 407 target=(revlog_constants.KIND_CHANGELOG, None),
408 408 radix=b'00changelog',
409 409 postfix=postfix,
410 410 checkambig=True,
411 411 mmaplargeindex=True,
412 412 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 413 concurrencychecker=concurrencychecker,
414 414 )
415 415
416 416 if self._initempty and (self._format_version == revlog.REVLOGV1):
417 417 # changelogs don't benefit from generaldelta.
418 418
419 419 self._format_flags &= ~revlog.FLAG_GENERALDELTA
420 420 self._generaldelta = False
421 421
422 422 # Delta chains for changelogs tend to be very small because entries
423 423 # tend to be small and don't delta well with each. So disable delta
424 424 # chains.
425 425 self._storedeltachains = False
426 426
427 427 self._realopener = opener
428 428 self._delayed = False
429 429 self._delaybuf = None
430 430 self._divert = False
431 431 self._filteredrevs = frozenset()
432 432 self._filteredrevs_hashcache = {}
433 433 self._copiesstorage = opener.options.get(b'copies-storage')
434 434
435 435 @property
436 436 def filteredrevs(self):
437 437 return self._filteredrevs
438 438
439 439 @filteredrevs.setter
440 440 def filteredrevs(self, val):
441 441 # Ensure all updates go through this function
442 442 assert isinstance(val, frozenset)
443 443 self._filteredrevs = val
444 444 self._filteredrevs_hashcache = {}
445 445
446 446 def delayupdate(self, tr):
447 447 """delay visibility of index updates to other readers"""
448 448
449 449 if not self._delayed:
450 450 if len(self) == 0:
451 451 self._divert = True
452 452 if self._realopener.exists(self._indexfile + b'.a'):
453 453 self._realopener.unlink(self._indexfile + b'.a')
454 454 self.opener = _divertopener(self._realopener, self._indexfile)
455 455 else:
456 456 self._delaybuf = []
457 457 self.opener = _delayopener(
458 458 self._realopener, self._indexfile, self._delaybuf
459 459 )
460 460 self._delayed = True
461 461 tr.addpending(b'cl-%i' % id(self), self._writepending)
462 462 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
463 463
464 464 def _finalize(self, tr):
465 465 """finalize index updates"""
466 466 self._delayed = False
467 467 self.opener = self._realopener
468 468 # move redirected index data back into place
469 469 if self._divert:
470 470 assert not self._delaybuf
471 471 tmpname = self._indexfile + b".a"
472 472 nfile = self.opener.open(tmpname)
473 473 nfile.close()
474 474 self.opener.rename(tmpname, self._indexfile, checkambig=True)
475 475 elif self._delaybuf:
476 476 fp = self.opener(self._indexfile, b'a', checkambig=True)
477 477 fp.write(b"".join(self._delaybuf))
478 478 fp.close()
479 479 self._delaybuf = None
480 480 self._divert = False
481 481 # split when we're done
482 482 self._enforceinlinesize(tr)
483 483
484 484 def _writepending(self, tr):
485 485 """create a file containing the unfinalized state for
486 486 pretxnchangegroup"""
487 487 if self._delaybuf:
488 488 # make a temporary copy of the index
489 489 fp1 = self._realopener(self._indexfile)
490 490 pendingfilename = self._indexfile + b".a"
491 491 # register as a temp file to ensure cleanup on failure
492 492 tr.registertmp(pendingfilename)
493 493 # write existing data
494 494 fp2 = self._realopener(pendingfilename, b"w")
495 495 fp2.write(fp1.read())
496 496 # add pending data
497 497 fp2.write(b"".join(self._delaybuf))
498 498 fp2.close()
499 499 # switch modes so finalize can simply rename
500 500 self._delaybuf = None
501 501 self._divert = True
502 502 self.opener = _divertopener(self._realopener, self._indexfile)
503 503
504 504 if self._divert:
505 505 return True
506 506
507 507 return False
508 508
509 def _enforceinlinesize(self, tr, fp=None):
509 def _enforceinlinesize(self, tr):
510 510 if not self._delayed:
511 revlog.revlog._enforceinlinesize(self, tr, fp)
511 revlog.revlog._enforceinlinesize(self, tr)
512 512
513 513 def read(self, nodeorrev):
514 514 """Obtain data from a parsed changelog revision.
515 515
516 516 Returns a 6-tuple of:
517 517
518 518 - manifest node in binary
519 519 - author/user as a localstr
520 520 - date as a 2-tuple of (time, timezone)
521 521 - list of files
522 522 - commit message as a localstr
523 523 - dict of extra metadata
524 524
525 525 Unless you need to access all fields, consider calling
526 526 ``changelogrevision`` instead, as it is faster for partial object
527 527 access.
528 528 """
529 529 d, s = self._revisiondata(nodeorrev)
530 530 c = changelogrevision(
531 531 self, d, s, self._copiesstorage == b'changeset-sidedata'
532 532 )
533 533 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
534 534
535 535 def changelogrevision(self, nodeorrev):
536 536 """Obtain a ``changelogrevision`` for a node or revision."""
537 537 text, sidedata = self._revisiondata(nodeorrev)
538 538 return changelogrevision(
539 539 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
540 540 )
541 541
542 542 def readfiles(self, nodeorrev):
543 543 """
544 544 short version of read that only returns the files modified by the cset
545 545 """
546 546 text = self.revision(nodeorrev)
547 547 if not text:
548 548 return []
549 549 last = text.index(b"\n\n")
550 550 l = text[:last].split(b'\n')
551 551 return l[3:]
552 552
553 553 def add(
554 554 self,
555 555 manifest,
556 556 files,
557 557 desc,
558 558 transaction,
559 559 p1,
560 560 p2,
561 561 user,
562 562 date=None,
563 563 extra=None,
564 564 ):
565 565 # Convert to UTF-8 encoded bytestrings as the very first
566 566 # thing: calling any method on a localstr object will turn it
567 567 # into a str object and the cached UTF-8 string is thus lost.
568 568 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
569 569
570 570 user = user.strip()
571 571 # An empty username or a username with a "\n" will make the
572 572 # revision text contain two "\n\n" sequences -> corrupt
573 573 # repository since read cannot unpack the revision.
574 574 if not user:
575 575 raise error.StorageError(_(b"empty username"))
576 576 if b"\n" in user:
577 577 raise error.StorageError(
578 578 _(b"username %r contains a newline") % pycompat.bytestr(user)
579 579 )
580 580
581 581 desc = stripdesc(desc)
582 582
583 583 if date:
584 584 parseddate = b"%d %d" % dateutil.parsedate(date)
585 585 else:
586 586 parseddate = b"%d %d" % dateutil.makedate()
587 587 if extra:
588 588 branch = extra.get(b"branch")
589 589 if branch in (b"default", b""):
590 590 del extra[b"branch"]
591 591 elif branch in (b".", b"null", b"tip"):
592 592 raise error.StorageError(
593 593 _(b'the name \'%s\' is reserved') % branch
594 594 )
595 595 sortedfiles = sorted(files.touched)
596 596 flags = 0
597 597 sidedata = None
598 598 if self._copiesstorage == b'changeset-sidedata':
599 599 if files.has_copies_info:
600 600 flags |= flagutil.REVIDX_HASCOPIESINFO
601 601 sidedata = metadata.encode_files_sidedata(files)
602 602
603 603 if extra:
604 604 extra = encodeextra(extra)
605 605 parseddate = b"%s %s" % (parseddate, extra)
606 606 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
607 607 text = b"\n".join(l)
608 608 rev = self.addrevision(
609 609 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
610 610 )
611 611 return self.node(rev)
612 612
613 613 def branchinfo(self, rev):
614 614 """return the branch name and open/close state of a revision
615 615
616 616 This function exists because creating a changectx object
617 617 just to access this is costly."""
618 618 return self.changelogrevision(rev).branchinfo
619 619
620 620 def _nodeduplicatecallback(self, transaction, rev):
621 621 # keep track of revisions that got "re-added", eg: unbunde of know rev.
622 622 #
623 623 # We track them in a list to preserve their order from the source bundle
624 624 duplicates = transaction.changes.setdefault(b'revduplicates', [])
625 625 duplicates.append(rev)
@@ -1,3193 +1,3192 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 radix,
293 293 postfix=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315
316 316 self.radix = radix
317 317
318 318 self._indexfile = None
319 319 self._datafile = None
320 320 self._nodemap_file = None
321 321 self.postfix = postfix
322 322 self.opener = opener
323 323 if persistentnodemap:
324 324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325 325
326 326 assert target[0] in ALL_KINDS
327 327 assert len(target) == 2
328 328 self.target = target
329 329 # When True, indexfile is opened with checkambig=True at writing, to
330 330 # avoid file stat ambiguity.
331 331 self._checkambig = checkambig
332 332 self._mmaplargeindex = mmaplargeindex
333 333 self._censorable = censorable
334 334 # 3-tuple of (node, rev, text) for a raw revision.
335 335 self._revisioncache = None
336 336 # Maps rev to chain base rev.
337 337 self._chainbasecache = util.lrucachedict(100)
338 338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 339 self._chunkcache = (0, b'')
340 340 # How much data to read and cache into the raw revlog data cache.
341 341 self._chunkcachesize = 65536
342 342 self._maxchainlen = None
343 343 self._deltabothparents = True
344 344 self.index = None
345 345 self._nodemap_docket = None
346 346 # Mapping of partial identifiers to full nodes.
347 347 self._pcache = {}
348 348 # Mapping of revision integer to full node.
349 349 self._compengine = b'zlib'
350 350 self._compengineopts = {}
351 351 self._maxdeltachainspan = -1
352 352 self._withsparseread = False
353 353 self._sparserevlog = False
354 354 self._srdensitythreshold = 0.50
355 355 self._srmingapsize = 262144
356 356
357 357 # Make copy of flag processors so each revlog instance can support
358 358 # custom flags.
359 359 self._flagprocessors = dict(flagutil.flagprocessors)
360 360
361 361 # 2-tuple of file handles being used for active writing.
362 362 self._writinghandles = None
363 # prevent nesting of addgroup
364 self._adding_group = None
363 365
364 366 self._loadindex()
365 367
366 368 self._concurrencychecker = concurrencychecker
367 369
368 370 def _init_opts(self):
369 371 """process options (from above/config) to setup associated default revlog mode
370 372
371 373 These values might be affected when actually reading on disk information.
372 374
373 375 The relevant values are returned for use in _loadindex().
374 376
375 377 * newversionflags:
376 378 version header to use if we need to create a new revlog
377 379
378 380 * mmapindexthreshold:
379 381 minimal index size for start to use mmap
380 382
381 383 * force_nodemap:
382 384 force the usage of a "development" version of the nodemap code
383 385 """
384 386 mmapindexthreshold = None
385 387 opts = self.opener.options
386 388
387 389 if b'revlogv2' in opts:
388 390 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 391 elif b'revlogv1' in opts:
390 392 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 393 if b'generaldelta' in opts:
392 394 new_header |= FLAG_GENERALDELTA
393 395 elif b'revlogv0' in self.opener.options:
394 396 new_header = REVLOGV0
395 397 else:
396 398 new_header = REVLOG_DEFAULT_VERSION
397 399
398 400 if b'chunkcachesize' in opts:
399 401 self._chunkcachesize = opts[b'chunkcachesize']
400 402 if b'maxchainlen' in opts:
401 403 self._maxchainlen = opts[b'maxchainlen']
402 404 if b'deltabothparents' in opts:
403 405 self._deltabothparents = opts[b'deltabothparents']
404 406 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 407 self._lazydeltabase = False
406 408 if self._lazydelta:
407 409 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 410 if b'compengine' in opts:
409 411 self._compengine = opts[b'compengine']
410 412 if b'zlib.level' in opts:
411 413 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 414 if b'zstd.level' in opts:
413 415 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 416 if b'maxdeltachainspan' in opts:
415 417 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 418 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 419 mmapindexthreshold = opts[b'mmapindexthreshold']
418 420 self.hassidedata = bool(opts.get(b'side-data', False))
419 421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 422 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 423 # sparse-revlog forces sparse-read
422 424 self._withsparseread = self._sparserevlog or withsparseread
423 425 if b'sparse-read-density-threshold' in opts:
424 426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 427 if b'sparse-read-min-gap-size' in opts:
426 428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 429 if opts.get(b'enableellipsis'):
428 430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429 431
430 432 # revlog v0 doesn't have flag processors
431 433 for flag, processor in pycompat.iteritems(
432 434 opts.get(b'flagprocessors', {})
433 435 ):
434 436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435 437
436 438 if self._chunkcachesize <= 0:
437 439 raise error.RevlogError(
438 440 _(b'revlog chunk cache size %r is not greater than 0')
439 441 % self._chunkcachesize
440 442 )
441 443 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 444 raise error.RevlogError(
443 445 _(b'revlog chunk cache size %r is not a power of 2')
444 446 % self._chunkcachesize
445 447 )
446 448 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 449 return new_header, mmapindexthreshold, force_nodemap
448 450
449 451 def _get_data(self, filepath, mmap_threshold):
450 452 """return a file content with or without mmap
451 453
452 454 If the file is missing return the empty string"""
453 455 try:
454 456 with self.opener(filepath) as fp:
455 457 if mmap_threshold is not None:
456 458 file_size = self.opener.fstat(fp).st_size
457 459 if file_size >= mmap_threshold:
458 460 # TODO: should .close() to release resources without
459 461 # relying on Python GC
460 462 return util.buffer(util.mmapread(fp))
461 463 return fp.read()
462 464 except IOError as inst:
463 465 if inst.errno != errno.ENOENT:
464 466 raise
465 467 return b''
466 468
467 469 def _loadindex(self):
468 470
469 471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470 472
471 473 if self.postfix is None:
472 474 entry_point = b'%s.i' % self.radix
473 475 else:
474 476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
475 477
476 478 entry_data = b''
477 479 self._initempty = True
478 480 entry_data = self._get_data(entry_point, mmapindexthreshold)
479 481 if len(entry_data) > 0:
480 482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
481 483 self._initempty = False
482 484 else:
483 485 header = new_header
484 486
485 487 self._format_flags = header & ~0xFFFF
486 488 self._format_version = header & 0xFFFF
487 489
488 490 if self._format_version == REVLOGV0:
489 491 if self._format_flags:
490 492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
491 493 display_flag = self._format_flags >> 16
492 494 msg %= (display_flag, self._format_version, self.display_id)
493 495 raise error.RevlogError(msg)
494 496
495 497 self._inline = False
496 498 self._generaldelta = False
497 499
498 500 elif self._format_version == REVLOGV1:
499 501 if self._format_flags & ~REVLOGV1_FLAGS:
500 502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
501 503 display_flag = self._format_flags >> 16
502 504 msg %= (display_flag, self._format_version, self.display_id)
503 505 raise error.RevlogError(msg)
504 506
505 507 self._inline = self._format_flags & FLAG_INLINE_DATA
506 508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
507 509
508 510 elif self._format_version == REVLOGV2:
509 511 if self._format_flags & ~REVLOGV2_FLAGS:
510 512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
511 513 display_flag = self._format_flags >> 16
512 514 msg %= (display_flag, self._format_version, self.display_id)
513 515 raise error.RevlogError(msg)
514 516
515 517 # There is a bug in the transaction handling when going from an
516 518 # inline revlog to a separate index and data file. Turn it off until
517 519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 520 # See issue6485
519 521 self._inline = False
520 522 # generaldelta implied by version 2 revlogs.
521 523 self._generaldelta = True
522 524
523 525 else:
524 526 msg = _(b'unknown version (%d) in revlog %s')
525 527 msg %= (self._format_version, self.display_id)
526 528 raise error.RevlogError(msg)
527 529
528 530 index_data = entry_data
529 531 self._indexfile = entry_point
530 532
531 533 if self.postfix is None or self.postfix == b'a':
532 534 self._datafile = b'%s.d' % self.radix
533 535 else:
534 536 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
535 537
536 538 self.nodeconstants = sha1nodeconstants
537 539 self.nullid = self.nodeconstants.nullid
538 540
539 541 # sparse-revlog can't be on without general-delta (issue6056)
540 542 if not self._generaldelta:
541 543 self._sparserevlog = False
542 544
543 545 self._storedeltachains = True
544 546
545 547 devel_nodemap = (
546 548 self._nodemap_file
547 549 and force_nodemap
548 550 and parse_index_v1_nodemap is not None
549 551 )
550 552
551 553 use_rust_index = False
552 554 if rustrevlog is not None:
553 555 if self._nodemap_file is not None:
554 556 use_rust_index = True
555 557 else:
556 558 use_rust_index = self.opener.options.get(b'rust.index')
557 559
558 560 self._parse_index = parse_index_v1
559 561 if self._format_version == REVLOGV0:
560 562 self._parse_index = revlogv0.parse_index_v0
561 563 elif self._format_version == REVLOGV2:
562 564 self._parse_index = parse_index_v2
563 565 elif devel_nodemap:
564 566 self._parse_index = parse_index_v1_nodemap
565 567 elif use_rust_index:
566 568 self._parse_index = parse_index_v1_mixed
567 569 try:
568 570 d = self._parse_index(index_data, self._inline)
569 571 index, _chunkcache = d
570 572 use_nodemap = (
571 573 not self._inline
572 574 and self._nodemap_file is not None
573 575 and util.safehasattr(index, 'update_nodemap_data')
574 576 )
575 577 if use_nodemap:
576 578 nodemap_data = nodemaputil.persisted_data(self)
577 579 if nodemap_data is not None:
578 580 docket = nodemap_data[0]
579 581 if (
580 582 len(d[0]) > docket.tip_rev
581 583 and d[0][docket.tip_rev][7] == docket.tip_node
582 584 ):
583 585 # no changelog tampering
584 586 self._nodemap_docket = docket
585 587 index.update_nodemap_data(*nodemap_data)
586 588 except (ValueError, IndexError):
587 589 raise error.RevlogError(
588 590 _(b"index %s is corrupted") % self.display_id
589 591 )
590 592 self.index, self._chunkcache = d
591 593 if not self._chunkcache:
592 594 self._chunkclear()
593 595 # revnum -> (chain-length, sum-delta-length)
594 596 self._chaininfocache = util.lrucachedict(500)
595 597 # revlog header -> revlog compressor
596 598 self._decompressors = {}
597 599
598 600 @util.propertycache
599 601 def revlog_kind(self):
600 602 return self.target[0]
601 603
602 604 @util.propertycache
603 605 def display_id(self):
604 606 """The public facing "ID" of the revlog that we use in message"""
605 607 # Maybe we should build a user facing representation of
606 608 # revlog.target instead of using `self.radix`
607 609 return self.radix
608 610
609 611 @util.propertycache
610 612 def _compressor(self):
611 613 engine = util.compengines[self._compengine]
612 614 return engine.revlogcompressor(self._compengineopts)
613 615
614 616 def _indexfp(self, mode=b'r'):
615 617 """file object for the revlog's index file"""
616 618 args = {'mode': mode}
617 619 if mode != b'r':
618 620 args['checkambig'] = self._checkambig
619 621 if mode == b'w':
620 622 args['atomictemp'] = True
621 623 return self.opener(self._indexfile, **args)
622 624
623 625 def _datafp(self, mode=b'r'):
624 626 """file object for the revlog's data file"""
625 627 return self.opener(self._datafile, mode=mode)
626 628
627 629 @contextlib.contextmanager
628 630 def _datareadfp(self, existingfp=None):
629 631 """file object suitable to read data"""
630 632 # Use explicit file handle, if given.
631 633 if existingfp is not None:
632 634 yield existingfp
633 635
634 636 # Use a file handle being actively used for writes, if available.
635 637 # There is some danger to doing this because reads will seek the
636 638 # file. However, _writeentry() performs a SEEK_END before all writes,
637 639 # so we should be safe.
638 640 elif self._writinghandles:
639 641 if self._inline:
640 642 yield self._writinghandles[0]
641 643 else:
642 644 yield self._writinghandles[1]
643 645
644 646 # Otherwise open a new file handle.
645 647 else:
646 648 if self._inline:
647 649 func = self._indexfp
648 650 else:
649 651 func = self._datafp
650 652 with func() as fp:
651 653 yield fp
652 654
653 655 def tiprev(self):
654 656 return len(self.index) - 1
655 657
656 658 def tip(self):
657 659 return self.node(self.tiprev())
658 660
659 661 def __contains__(self, rev):
660 662 return 0 <= rev < len(self)
661 663
662 664 def __len__(self):
663 665 return len(self.index)
664 666
665 667 def __iter__(self):
666 668 return iter(pycompat.xrange(len(self)))
667 669
668 670 def revs(self, start=0, stop=None):
669 671 """iterate over all rev in this revlog (from start to stop)"""
670 672 return storageutil.iterrevs(len(self), start=start, stop=stop)
671 673
672 674 @property
673 675 def nodemap(self):
674 676 msg = (
675 677 b"revlog.nodemap is deprecated, "
676 678 b"use revlog.index.[has_node|rev|get_rev]"
677 679 )
678 680 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
679 681 return self.index.nodemap
680 682
681 683 @property
682 684 def _nodecache(self):
683 685 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
684 686 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
685 687 return self.index.nodemap
686 688
687 689 def hasnode(self, node):
688 690 try:
689 691 self.rev(node)
690 692 return True
691 693 except KeyError:
692 694 return False
693 695
694 696 def candelta(self, baserev, rev):
695 697 """whether two revisions (baserev, rev) can be delta-ed or not"""
696 698 # Disable delta if either rev requires a content-changing flag
697 699 # processor (ex. LFS). This is because such flag processor can alter
698 700 # the rawtext content that the delta will be based on, and two clients
699 701 # could have a same revlog node with different flags (i.e. different
700 702 # rawtext contents) and the delta could be incompatible.
701 703 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
702 704 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
703 705 ):
704 706 return False
705 707 return True
706 708
707 709 def update_caches(self, transaction):
708 710 if self._nodemap_file is not None:
709 711 if transaction is None:
710 712 nodemaputil.update_persistent_nodemap(self)
711 713 else:
712 714 nodemaputil.setup_persistent_nodemap(transaction, self)
713 715
714 716 def clearcaches(self):
715 717 self._revisioncache = None
716 718 self._chainbasecache.clear()
717 719 self._chunkcache = (0, b'')
718 720 self._pcache = {}
719 721 self._nodemap_docket = None
720 722 self.index.clearcaches()
721 723 # The python code is the one responsible for validating the docket, we
722 724 # end up having to refresh it here.
723 725 use_nodemap = (
724 726 not self._inline
725 727 and self._nodemap_file is not None
726 728 and util.safehasattr(self.index, 'update_nodemap_data')
727 729 )
728 730 if use_nodemap:
729 731 nodemap_data = nodemaputil.persisted_data(self)
730 732 if nodemap_data is not None:
731 733 self._nodemap_docket = nodemap_data[0]
732 734 self.index.update_nodemap_data(*nodemap_data)
733 735
734 736 def rev(self, node):
735 737 try:
736 738 return self.index.rev(node)
737 739 except TypeError:
738 740 raise
739 741 except error.RevlogError:
740 742 # parsers.c radix tree lookup failed
741 743 if (
742 744 node == self.nodeconstants.wdirid
743 745 or node in self.nodeconstants.wdirfilenodeids
744 746 ):
745 747 raise error.WdirUnsupported
746 748 raise error.LookupError(node, self.display_id, _(b'no node'))
747 749
748 750 # Accessors for index entries.
749 751
750 752 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
751 753 # are flags.
752 754 def start(self, rev):
753 755 return int(self.index[rev][0] >> 16)
754 756
755 757 def flags(self, rev):
756 758 return self.index[rev][0] & 0xFFFF
757 759
758 760 def length(self, rev):
759 761 return self.index[rev][1]
760 762
761 763 def sidedata_length(self, rev):
762 764 if not self.hassidedata:
763 765 return 0
764 766 return self.index[rev][9]
765 767
766 768 def rawsize(self, rev):
767 769 """return the length of the uncompressed text for a given revision"""
768 770 l = self.index[rev][2]
769 771 if l >= 0:
770 772 return l
771 773
772 774 t = self.rawdata(rev)
773 775 return len(t)
774 776
775 777 def size(self, rev):
776 778 """length of non-raw text (processed by a "read" flag processor)"""
777 779 # fast path: if no "read" flag processor could change the content,
778 780 # size is rawsize. note: ELLIPSIS is known to not change the content.
779 781 flags = self.flags(rev)
780 782 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
781 783 return self.rawsize(rev)
782 784
783 785 return len(self.revision(rev, raw=False))
784 786
785 787 def chainbase(self, rev):
786 788 base = self._chainbasecache.get(rev)
787 789 if base is not None:
788 790 return base
789 791
790 792 index = self.index
791 793 iterrev = rev
792 794 base = index[iterrev][3]
793 795 while base != iterrev:
794 796 iterrev = base
795 797 base = index[iterrev][3]
796 798
797 799 self._chainbasecache[rev] = base
798 800 return base
799 801
800 802 def linkrev(self, rev):
801 803 return self.index[rev][4]
802 804
803 805 def parentrevs(self, rev):
804 806 try:
805 807 entry = self.index[rev]
806 808 except IndexError:
807 809 if rev == wdirrev:
808 810 raise error.WdirUnsupported
809 811 raise
810 812 if entry[5] == nullrev:
811 813 return entry[6], entry[5]
812 814 else:
813 815 return entry[5], entry[6]
814 816
815 817 # fast parentrevs(rev) where rev isn't filtered
816 818 _uncheckedparentrevs = parentrevs
817 819
818 820 def node(self, rev):
819 821 try:
820 822 return self.index[rev][7]
821 823 except IndexError:
822 824 if rev == wdirrev:
823 825 raise error.WdirUnsupported
824 826 raise
825 827
826 828 # Derived from index values.
827 829
828 830 def end(self, rev):
829 831 return self.start(rev) + self.length(rev)
830 832
831 833 def parents(self, node):
832 834 i = self.index
833 835 d = i[self.rev(node)]
834 836 # inline node() to avoid function call overhead
835 837 if d[5] == self.nullid:
836 838 return i[d[6]][7], i[d[5]][7]
837 839 else:
838 840 return i[d[5]][7], i[d[6]][7]
839 841
840 842 def chainlen(self, rev):
841 843 return self._chaininfo(rev)[0]
842 844
843 845 def _chaininfo(self, rev):
844 846 chaininfocache = self._chaininfocache
845 847 if rev in chaininfocache:
846 848 return chaininfocache[rev]
847 849 index = self.index
848 850 generaldelta = self._generaldelta
849 851 iterrev = rev
850 852 e = index[iterrev]
851 853 clen = 0
852 854 compresseddeltalen = 0
853 855 while iterrev != e[3]:
854 856 clen += 1
855 857 compresseddeltalen += e[1]
856 858 if generaldelta:
857 859 iterrev = e[3]
858 860 else:
859 861 iterrev -= 1
860 862 if iterrev in chaininfocache:
861 863 t = chaininfocache[iterrev]
862 864 clen += t[0]
863 865 compresseddeltalen += t[1]
864 866 break
865 867 e = index[iterrev]
866 868 else:
867 869 # Add text length of base since decompressing that also takes
868 870 # work. For cache hits the length is already included.
869 871 compresseddeltalen += e[1]
870 872 r = (clen, compresseddeltalen)
871 873 chaininfocache[rev] = r
872 874 return r
873 875
874 876 def _deltachain(self, rev, stoprev=None):
875 877 """Obtain the delta chain for a revision.
876 878
877 879 ``stoprev`` specifies a revision to stop at. If not specified, we
878 880 stop at the base of the chain.
879 881
880 882 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
881 883 revs in ascending order and ``stopped`` is a bool indicating whether
882 884 ``stoprev`` was hit.
883 885 """
884 886 # Try C implementation.
885 887 try:
886 888 return self.index.deltachain(rev, stoprev, self._generaldelta)
887 889 except AttributeError:
888 890 pass
889 891
890 892 chain = []
891 893
892 894 # Alias to prevent attribute lookup in tight loop.
893 895 index = self.index
894 896 generaldelta = self._generaldelta
895 897
896 898 iterrev = rev
897 899 e = index[iterrev]
898 900 while iterrev != e[3] and iterrev != stoprev:
899 901 chain.append(iterrev)
900 902 if generaldelta:
901 903 iterrev = e[3]
902 904 else:
903 905 iterrev -= 1
904 906 e = index[iterrev]
905 907
906 908 if iterrev == stoprev:
907 909 stopped = True
908 910 else:
909 911 chain.append(iterrev)
910 912 stopped = False
911 913
912 914 chain.reverse()
913 915 return chain, stopped
914 916
915 917 def ancestors(self, revs, stoprev=0, inclusive=False):
916 918 """Generate the ancestors of 'revs' in reverse revision order.
917 919 Does not generate revs lower than stoprev.
918 920
919 921 See the documentation for ancestor.lazyancestors for more details."""
920 922
921 923 # first, make sure start revisions aren't filtered
922 924 revs = list(revs)
923 925 checkrev = self.node
924 926 for r in revs:
925 927 checkrev(r)
926 928 # and we're sure ancestors aren't filtered as well
927 929
928 930 if rustancestor is not None:
929 931 lazyancestors = rustancestor.LazyAncestors
930 932 arg = self.index
931 933 else:
932 934 lazyancestors = ancestor.lazyancestors
933 935 arg = self._uncheckedparentrevs
934 936 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
935 937
936 938 def descendants(self, revs):
937 939 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
938 940
939 941 def findcommonmissing(self, common=None, heads=None):
940 942 """Return a tuple of the ancestors of common and the ancestors of heads
941 943 that are not ancestors of common. In revset terminology, we return the
942 944 tuple:
943 945
944 946 ::common, (::heads) - (::common)
945 947
946 948 The list is sorted by revision number, meaning it is
947 949 topologically sorted.
948 950
949 951 'heads' and 'common' are both lists of node IDs. If heads is
950 952 not supplied, uses all of the revlog's heads. If common is not
951 953 supplied, uses nullid."""
952 954 if common is None:
953 955 common = [self.nullid]
954 956 if heads is None:
955 957 heads = self.heads()
956 958
957 959 common = [self.rev(n) for n in common]
958 960 heads = [self.rev(n) for n in heads]
959 961
960 962 # we want the ancestors, but inclusive
961 963 class lazyset(object):
962 964 def __init__(self, lazyvalues):
963 965 self.addedvalues = set()
964 966 self.lazyvalues = lazyvalues
965 967
966 968 def __contains__(self, value):
967 969 return value in self.addedvalues or value in self.lazyvalues
968 970
969 971 def __iter__(self):
970 972 added = self.addedvalues
971 973 for r in added:
972 974 yield r
973 975 for r in self.lazyvalues:
974 976 if not r in added:
975 977 yield r
976 978
977 979 def add(self, value):
978 980 self.addedvalues.add(value)
979 981
980 982 def update(self, values):
981 983 self.addedvalues.update(values)
982 984
983 985 has = lazyset(self.ancestors(common))
984 986 has.add(nullrev)
985 987 has.update(common)
986 988
987 989 # take all ancestors from heads that aren't in has
988 990 missing = set()
989 991 visit = collections.deque(r for r in heads if r not in has)
990 992 while visit:
991 993 r = visit.popleft()
992 994 if r in missing:
993 995 continue
994 996 else:
995 997 missing.add(r)
996 998 for p in self.parentrevs(r):
997 999 if p not in has:
998 1000 visit.append(p)
999 1001 missing = list(missing)
1000 1002 missing.sort()
1001 1003 return has, [self.node(miss) for miss in missing]
1002 1004
1003 1005 def incrementalmissingrevs(self, common=None):
1004 1006 """Return an object that can be used to incrementally compute the
1005 1007 revision numbers of the ancestors of arbitrary sets that are not
1006 1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1007 1009 object.
1008 1010
1009 1011 'common' is a list of revision numbers. If common is not supplied, uses
1010 1012 nullrev.
1011 1013 """
1012 1014 if common is None:
1013 1015 common = [nullrev]
1014 1016
1015 1017 if rustancestor is not None:
1016 1018 return rustancestor.MissingAncestors(self.index, common)
1017 1019 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1018 1020
1019 1021 def findmissingrevs(self, common=None, heads=None):
1020 1022 """Return the revision numbers of the ancestors of heads that
1021 1023 are not ancestors of common.
1022 1024
1023 1025 More specifically, return a list of revision numbers corresponding to
1024 1026 nodes N such that every N satisfies the following constraints:
1025 1027
1026 1028 1. N is an ancestor of some node in 'heads'
1027 1029 2. N is not an ancestor of any node in 'common'
1028 1030
1029 1031 The list is sorted by revision number, meaning it is
1030 1032 topologically sorted.
1031 1033
1032 1034 'heads' and 'common' are both lists of revision numbers. If heads is
1033 1035 not supplied, uses all of the revlog's heads. If common is not
1034 1036 supplied, uses nullid."""
1035 1037 if common is None:
1036 1038 common = [nullrev]
1037 1039 if heads is None:
1038 1040 heads = self.headrevs()
1039 1041
1040 1042 inc = self.incrementalmissingrevs(common=common)
1041 1043 return inc.missingancestors(heads)
1042 1044
1043 1045 def findmissing(self, common=None, heads=None):
1044 1046 """Return the ancestors of heads that are not ancestors of common.
1045 1047
1046 1048 More specifically, return a list of nodes N such that every N
1047 1049 satisfies the following constraints:
1048 1050
1049 1051 1. N is an ancestor of some node in 'heads'
1050 1052 2. N is not an ancestor of any node in 'common'
1051 1053
1052 1054 The list is sorted by revision number, meaning it is
1053 1055 topologically sorted.
1054 1056
1055 1057 'heads' and 'common' are both lists of node IDs. If heads is
1056 1058 not supplied, uses all of the revlog's heads. If common is not
1057 1059 supplied, uses nullid."""
1058 1060 if common is None:
1059 1061 common = [self.nullid]
1060 1062 if heads is None:
1061 1063 heads = self.heads()
1062 1064
1063 1065 common = [self.rev(n) for n in common]
1064 1066 heads = [self.rev(n) for n in heads]
1065 1067
1066 1068 inc = self.incrementalmissingrevs(common=common)
1067 1069 return [self.node(r) for r in inc.missingancestors(heads)]
1068 1070
1069 1071 def nodesbetween(self, roots=None, heads=None):
1070 1072 """Return a topological path from 'roots' to 'heads'.
1071 1073
1072 1074 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1073 1075 topologically sorted list of all nodes N that satisfy both of
1074 1076 these constraints:
1075 1077
1076 1078 1. N is a descendant of some node in 'roots'
1077 1079 2. N is an ancestor of some node in 'heads'
1078 1080
1079 1081 Every node is considered to be both a descendant and an ancestor
1080 1082 of itself, so every reachable node in 'roots' and 'heads' will be
1081 1083 included in 'nodes'.
1082 1084
1083 1085 'outroots' is the list of reachable nodes in 'roots', i.e., the
1084 1086 subset of 'roots' that is returned in 'nodes'. Likewise,
1085 1087 'outheads' is the subset of 'heads' that is also in 'nodes'.
1086 1088
1087 1089 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1088 1090 unspecified, uses nullid as the only root. If 'heads' is
1089 1091 unspecified, uses list of all of the revlog's heads."""
1090 1092 nonodes = ([], [], [])
1091 1093 if roots is not None:
1092 1094 roots = list(roots)
1093 1095 if not roots:
1094 1096 return nonodes
1095 1097 lowestrev = min([self.rev(n) for n in roots])
1096 1098 else:
1097 1099 roots = [self.nullid] # Everybody's a descendant of nullid
1098 1100 lowestrev = nullrev
1099 1101 if (lowestrev == nullrev) and (heads is None):
1100 1102 # We want _all_ the nodes!
1101 1103 return (
1102 1104 [self.node(r) for r in self],
1103 1105 [self.nullid],
1104 1106 list(self.heads()),
1105 1107 )
1106 1108 if heads is None:
1107 1109 # All nodes are ancestors, so the latest ancestor is the last
1108 1110 # node.
1109 1111 highestrev = len(self) - 1
1110 1112 # Set ancestors to None to signal that every node is an ancestor.
1111 1113 ancestors = None
1112 1114 # Set heads to an empty dictionary for later discovery of heads
1113 1115 heads = {}
1114 1116 else:
1115 1117 heads = list(heads)
1116 1118 if not heads:
1117 1119 return nonodes
1118 1120 ancestors = set()
1119 1121 # Turn heads into a dictionary so we can remove 'fake' heads.
1120 1122 # Also, later we will be using it to filter out the heads we can't
1121 1123 # find from roots.
1122 1124 heads = dict.fromkeys(heads, False)
1123 1125 # Start at the top and keep marking parents until we're done.
1124 1126 nodestotag = set(heads)
1125 1127 # Remember where the top was so we can use it as a limit later.
1126 1128 highestrev = max([self.rev(n) for n in nodestotag])
1127 1129 while nodestotag:
1128 1130 # grab a node to tag
1129 1131 n = nodestotag.pop()
1130 1132 # Never tag nullid
1131 1133 if n == self.nullid:
1132 1134 continue
1133 1135 # A node's revision number represents its place in a
1134 1136 # topologically sorted list of nodes.
1135 1137 r = self.rev(n)
1136 1138 if r >= lowestrev:
1137 1139 if n not in ancestors:
1138 1140 # If we are possibly a descendant of one of the roots
1139 1141 # and we haven't already been marked as an ancestor
1140 1142 ancestors.add(n) # Mark as ancestor
1141 1143 # Add non-nullid parents to list of nodes to tag.
1142 1144 nodestotag.update(
1143 1145 [p for p in self.parents(n) if p != self.nullid]
1144 1146 )
1145 1147 elif n in heads: # We've seen it before, is it a fake head?
1146 1148 # So it is, real heads should not be the ancestors of
1147 1149 # any other heads.
1148 1150 heads.pop(n)
1149 1151 if not ancestors:
1150 1152 return nonodes
1151 1153 # Now that we have our set of ancestors, we want to remove any
1152 1154 # roots that are not ancestors.
1153 1155
1154 1156 # If one of the roots was nullid, everything is included anyway.
1155 1157 if lowestrev > nullrev:
1156 1158 # But, since we weren't, let's recompute the lowest rev to not
1157 1159 # include roots that aren't ancestors.
1158 1160
1159 1161 # Filter out roots that aren't ancestors of heads
1160 1162 roots = [root for root in roots if root in ancestors]
1161 1163 # Recompute the lowest revision
1162 1164 if roots:
1163 1165 lowestrev = min([self.rev(root) for root in roots])
1164 1166 else:
1165 1167 # No more roots? Return empty list
1166 1168 return nonodes
1167 1169 else:
1168 1170 # We are descending from nullid, and don't need to care about
1169 1171 # any other roots.
1170 1172 lowestrev = nullrev
1171 1173 roots = [self.nullid]
1172 1174 # Transform our roots list into a set.
1173 1175 descendants = set(roots)
1174 1176 # Also, keep the original roots so we can filter out roots that aren't
1175 1177 # 'real' roots (i.e. are descended from other roots).
1176 1178 roots = descendants.copy()
1177 1179 # Our topologically sorted list of output nodes.
1178 1180 orderedout = []
1179 1181 # Don't start at nullid since we don't want nullid in our output list,
1180 1182 # and if nullid shows up in descendants, empty parents will look like
1181 1183 # they're descendants.
1182 1184 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1183 1185 n = self.node(r)
1184 1186 isdescendant = False
1185 1187 if lowestrev == nullrev: # Everybody is a descendant of nullid
1186 1188 isdescendant = True
1187 1189 elif n in descendants:
1188 1190 # n is already a descendant
1189 1191 isdescendant = True
1190 1192 # This check only needs to be done here because all the roots
1191 1193 # will start being marked is descendants before the loop.
1192 1194 if n in roots:
1193 1195 # If n was a root, check if it's a 'real' root.
1194 1196 p = tuple(self.parents(n))
1195 1197 # If any of its parents are descendants, it's not a root.
1196 1198 if (p[0] in descendants) or (p[1] in descendants):
1197 1199 roots.remove(n)
1198 1200 else:
1199 1201 p = tuple(self.parents(n))
1200 1202 # A node is a descendant if either of its parents are
1201 1203 # descendants. (We seeded the dependents list with the roots
1202 1204 # up there, remember?)
1203 1205 if (p[0] in descendants) or (p[1] in descendants):
1204 1206 descendants.add(n)
1205 1207 isdescendant = True
1206 1208 if isdescendant and ((ancestors is None) or (n in ancestors)):
1207 1209 # Only include nodes that are both descendants and ancestors.
1208 1210 orderedout.append(n)
1209 1211 if (ancestors is not None) and (n in heads):
1210 1212 # We're trying to figure out which heads are reachable
1211 1213 # from roots.
1212 1214 # Mark this head as having been reached
1213 1215 heads[n] = True
1214 1216 elif ancestors is None:
1215 1217 # Otherwise, we're trying to discover the heads.
1216 1218 # Assume this is a head because if it isn't, the next step
1217 1219 # will eventually remove it.
1218 1220 heads[n] = True
1219 1221 # But, obviously its parents aren't.
1220 1222 for p in self.parents(n):
1221 1223 heads.pop(p, None)
1222 1224 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1223 1225 roots = list(roots)
1224 1226 assert orderedout
1225 1227 assert roots
1226 1228 assert heads
1227 1229 return (orderedout, roots, heads)
1228 1230
1229 1231 def headrevs(self, revs=None):
1230 1232 if revs is None:
1231 1233 try:
1232 1234 return self.index.headrevs()
1233 1235 except AttributeError:
1234 1236 return self._headrevs()
1235 1237 if rustdagop is not None:
1236 1238 return rustdagop.headrevs(self.index, revs)
1237 1239 return dagop.headrevs(revs, self._uncheckedparentrevs)
1238 1240
1239 1241 def computephases(self, roots):
1240 1242 return self.index.computephasesmapsets(roots)
1241 1243
1242 1244 def _headrevs(self):
1243 1245 count = len(self)
1244 1246 if not count:
1245 1247 return [nullrev]
1246 1248 # we won't iter over filtered rev so nobody is a head at start
1247 1249 ishead = [0] * (count + 1)
1248 1250 index = self.index
1249 1251 for r in self:
1250 1252 ishead[r] = 1 # I may be an head
1251 1253 e = index[r]
1252 1254 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1253 1255 return [r for r, val in enumerate(ishead) if val]
1254 1256
1255 1257 def heads(self, start=None, stop=None):
1256 1258 """return the list of all nodes that have no children
1257 1259
1258 1260 if start is specified, only heads that are descendants of
1259 1261 start will be returned
1260 1262 if stop is specified, it will consider all the revs from stop
1261 1263 as if they had no children
1262 1264 """
1263 1265 if start is None and stop is None:
1264 1266 if not len(self):
1265 1267 return [self.nullid]
1266 1268 return [self.node(r) for r in self.headrevs()]
1267 1269
1268 1270 if start is None:
1269 1271 start = nullrev
1270 1272 else:
1271 1273 start = self.rev(start)
1272 1274
1273 1275 stoprevs = {self.rev(n) for n in stop or []}
1274 1276
1275 1277 revs = dagop.headrevssubset(
1276 1278 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1277 1279 )
1278 1280
1279 1281 return [self.node(rev) for rev in revs]
1280 1282
1281 1283 def children(self, node):
1282 1284 """find the children of a given node"""
1283 1285 c = []
1284 1286 p = self.rev(node)
1285 1287 for r in self.revs(start=p + 1):
1286 1288 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1287 1289 if prevs:
1288 1290 for pr in prevs:
1289 1291 if pr == p:
1290 1292 c.append(self.node(r))
1291 1293 elif p == nullrev:
1292 1294 c.append(self.node(r))
1293 1295 return c
1294 1296
1295 1297 def commonancestorsheads(self, a, b):
1296 1298 """calculate all the heads of the common ancestors of nodes a and b"""
1297 1299 a, b = self.rev(a), self.rev(b)
1298 1300 ancs = self._commonancestorsheads(a, b)
1299 1301 return pycompat.maplist(self.node, ancs)
1300 1302
1301 1303 def _commonancestorsheads(self, *revs):
1302 1304 """calculate all the heads of the common ancestors of revs"""
1303 1305 try:
1304 1306 ancs = self.index.commonancestorsheads(*revs)
1305 1307 except (AttributeError, OverflowError): # C implementation failed
1306 1308 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1307 1309 return ancs
1308 1310
1309 1311 def isancestor(self, a, b):
1310 1312 """return True if node a is an ancestor of node b
1311 1313
1312 1314 A revision is considered an ancestor of itself."""
1313 1315 a, b = self.rev(a), self.rev(b)
1314 1316 return self.isancestorrev(a, b)
1315 1317
1316 1318 def isancestorrev(self, a, b):
1317 1319 """return True if revision a is an ancestor of revision b
1318 1320
1319 1321 A revision is considered an ancestor of itself.
1320 1322
1321 1323 The implementation of this is trivial but the use of
1322 1324 reachableroots is not."""
1323 1325 if a == nullrev:
1324 1326 return True
1325 1327 elif a == b:
1326 1328 return True
1327 1329 elif a > b:
1328 1330 return False
1329 1331 return bool(self.reachableroots(a, [b], [a], includepath=False))
1330 1332
1331 1333 def reachableroots(self, minroot, heads, roots, includepath=False):
1332 1334 """return (heads(::(<roots> and <roots>::<heads>)))
1333 1335
1334 1336 If includepath is True, return (<roots>::<heads>)."""
1335 1337 try:
1336 1338 return self.index.reachableroots2(
1337 1339 minroot, heads, roots, includepath
1338 1340 )
1339 1341 except AttributeError:
1340 1342 return dagop._reachablerootspure(
1341 1343 self.parentrevs, minroot, roots, heads, includepath
1342 1344 )
1343 1345
1344 1346 def ancestor(self, a, b):
1345 1347 """calculate the "best" common ancestor of nodes a and b"""
1346 1348
1347 1349 a, b = self.rev(a), self.rev(b)
1348 1350 try:
1349 1351 ancs = self.index.ancestors(a, b)
1350 1352 except (AttributeError, OverflowError):
1351 1353 ancs = ancestor.ancestors(self.parentrevs, a, b)
1352 1354 if ancs:
1353 1355 # choose a consistent winner when there's a tie
1354 1356 return min(map(self.node, ancs))
1355 1357 return self.nullid
1356 1358
1357 1359 def _match(self, id):
1358 1360 if isinstance(id, int):
1359 1361 # rev
1360 1362 return self.node(id)
1361 1363 if len(id) == self.nodeconstants.nodelen:
1362 1364 # possibly a binary node
1363 1365 # odds of a binary node being all hex in ASCII are 1 in 10**25
1364 1366 try:
1365 1367 node = id
1366 1368 self.rev(node) # quick search the index
1367 1369 return node
1368 1370 except error.LookupError:
1369 1371 pass # may be partial hex id
1370 1372 try:
1371 1373 # str(rev)
1372 1374 rev = int(id)
1373 1375 if b"%d" % rev != id:
1374 1376 raise ValueError
1375 1377 if rev < 0:
1376 1378 rev = len(self) + rev
1377 1379 if rev < 0 or rev >= len(self):
1378 1380 raise ValueError
1379 1381 return self.node(rev)
1380 1382 except (ValueError, OverflowError):
1381 1383 pass
1382 1384 if len(id) == 2 * self.nodeconstants.nodelen:
1383 1385 try:
1384 1386 # a full hex nodeid?
1385 1387 node = bin(id)
1386 1388 self.rev(node)
1387 1389 return node
1388 1390 except (TypeError, error.LookupError):
1389 1391 pass
1390 1392
1391 1393 def _partialmatch(self, id):
1392 1394 # we don't care wdirfilenodeids as they should be always full hash
1393 1395 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1394 1396 try:
1395 1397 partial = self.index.partialmatch(id)
1396 1398 if partial and self.hasnode(partial):
1397 1399 if maybewdir:
1398 1400 # single 'ff...' match in radix tree, ambiguous with wdir
1399 1401 raise error.RevlogError
1400 1402 return partial
1401 1403 if maybewdir:
1402 1404 # no 'ff...' match in radix tree, wdir identified
1403 1405 raise error.WdirUnsupported
1404 1406 return None
1405 1407 except error.RevlogError:
1406 1408 # parsers.c radix tree lookup gave multiple matches
1407 1409 # fast path: for unfiltered changelog, radix tree is accurate
1408 1410 if not getattr(self, 'filteredrevs', None):
1409 1411 raise error.AmbiguousPrefixLookupError(
1410 1412 id, self.display_id, _(b'ambiguous identifier')
1411 1413 )
1412 1414 # fall through to slow path that filters hidden revisions
1413 1415 except (AttributeError, ValueError):
1414 1416 # we are pure python, or key was too short to search radix tree
1415 1417 pass
1416 1418
1417 1419 if id in self._pcache:
1418 1420 return self._pcache[id]
1419 1421
1420 1422 if len(id) <= 40:
1421 1423 try:
1422 1424 # hex(node)[:...]
1423 1425 l = len(id) // 2 # grab an even number of digits
1424 1426 prefix = bin(id[: l * 2])
1425 1427 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1426 1428 nl = [
1427 1429 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1428 1430 ]
1429 1431 if self.nodeconstants.nullhex.startswith(id):
1430 1432 nl.append(self.nullid)
1431 1433 if len(nl) > 0:
1432 1434 if len(nl) == 1 and not maybewdir:
1433 1435 self._pcache[id] = nl[0]
1434 1436 return nl[0]
1435 1437 raise error.AmbiguousPrefixLookupError(
1436 1438 id, self.display_id, _(b'ambiguous identifier')
1437 1439 )
1438 1440 if maybewdir:
1439 1441 raise error.WdirUnsupported
1440 1442 return None
1441 1443 except TypeError:
1442 1444 pass
1443 1445
1444 1446 def lookup(self, id):
1445 1447 """locate a node based on:
1446 1448 - revision number or str(revision number)
1447 1449 - nodeid or subset of hex nodeid
1448 1450 """
1449 1451 n = self._match(id)
1450 1452 if n is not None:
1451 1453 return n
1452 1454 n = self._partialmatch(id)
1453 1455 if n:
1454 1456 return n
1455 1457
1456 1458 raise error.LookupError(id, self.display_id, _(b'no match found'))
1457 1459
1458 1460 def shortest(self, node, minlength=1):
1459 1461 """Find the shortest unambiguous prefix that matches node."""
1460 1462
1461 1463 def isvalid(prefix):
1462 1464 try:
1463 1465 matchednode = self._partialmatch(prefix)
1464 1466 except error.AmbiguousPrefixLookupError:
1465 1467 return False
1466 1468 except error.WdirUnsupported:
1467 1469 # single 'ff...' match
1468 1470 return True
1469 1471 if matchednode is None:
1470 1472 raise error.LookupError(node, self.display_id, _(b'no node'))
1471 1473 return True
1472 1474
1473 1475 def maybewdir(prefix):
1474 1476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1475 1477
1476 1478 hexnode = hex(node)
1477 1479
1478 1480 def disambiguate(hexnode, minlength):
1479 1481 """Disambiguate against wdirid."""
1480 1482 for length in range(minlength, len(hexnode) + 1):
1481 1483 prefix = hexnode[:length]
1482 1484 if not maybewdir(prefix):
1483 1485 return prefix
1484 1486
1485 1487 if not getattr(self, 'filteredrevs', None):
1486 1488 try:
1487 1489 length = max(self.index.shortest(node), minlength)
1488 1490 return disambiguate(hexnode, length)
1489 1491 except error.RevlogError:
1490 1492 if node != self.nodeconstants.wdirid:
1491 1493 raise error.LookupError(
1492 1494 node, self.display_id, _(b'no node')
1493 1495 )
1494 1496 except AttributeError:
1495 1497 # Fall through to pure code
1496 1498 pass
1497 1499
1498 1500 if node == self.nodeconstants.wdirid:
1499 1501 for length in range(minlength, len(hexnode) + 1):
1500 1502 prefix = hexnode[:length]
1501 1503 if isvalid(prefix):
1502 1504 return prefix
1503 1505
1504 1506 for length in range(minlength, len(hexnode) + 1):
1505 1507 prefix = hexnode[:length]
1506 1508 if isvalid(prefix):
1507 1509 return disambiguate(hexnode, length)
1508 1510
1509 1511 def cmp(self, node, text):
1510 1512 """compare text with a given file revision
1511 1513
1512 1514 returns True if text is different than what is stored.
1513 1515 """
1514 1516 p1, p2 = self.parents(node)
1515 1517 return storageutil.hashrevisionsha1(text, p1, p2) != node
1516 1518
1517 1519 def _cachesegment(self, offset, data):
1518 1520 """Add a segment to the revlog cache.
1519 1521
1520 1522 Accepts an absolute offset and the data that is at that location.
1521 1523 """
1522 1524 o, d = self._chunkcache
1523 1525 # try to add to existing cache
1524 1526 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1525 1527 self._chunkcache = o, d + data
1526 1528 else:
1527 1529 self._chunkcache = offset, data
1528 1530
1529 1531 def _readsegment(self, offset, length, df=None):
1530 1532 """Load a segment of raw data from the revlog.
1531 1533
1532 1534 Accepts an absolute offset, length to read, and an optional existing
1533 1535 file handle to read from.
1534 1536
1535 1537 If an existing file handle is passed, it will be seeked and the
1536 1538 original seek position will NOT be restored.
1537 1539
1538 1540 Returns a str or buffer of raw byte data.
1539 1541
1540 1542 Raises if the requested number of bytes could not be read.
1541 1543 """
1542 1544 # Cache data both forward and backward around the requested
1543 1545 # data, in a fixed size window. This helps speed up operations
1544 1546 # involving reading the revlog backwards.
1545 1547 cachesize = self._chunkcachesize
1546 1548 realoffset = offset & ~(cachesize - 1)
1547 1549 reallength = (
1548 1550 (offset + length + cachesize) & ~(cachesize - 1)
1549 1551 ) - realoffset
1550 1552 with self._datareadfp(df) as df:
1551 1553 df.seek(realoffset)
1552 1554 d = df.read(reallength)
1553 1555
1554 1556 self._cachesegment(realoffset, d)
1555 1557 if offset != realoffset or reallength != length:
1556 1558 startoffset = offset - realoffset
1557 1559 if len(d) - startoffset < length:
1558 1560 raise error.RevlogError(
1559 1561 _(
1560 1562 b'partial read of revlog %s; expected %d bytes from '
1561 1563 b'offset %d, got %d'
1562 1564 )
1563 1565 % (
1564 1566 self._indexfile if self._inline else self._datafile,
1565 1567 length,
1566 1568 offset,
1567 1569 len(d) - startoffset,
1568 1570 )
1569 1571 )
1570 1572
1571 1573 return util.buffer(d, startoffset, length)
1572 1574
1573 1575 if len(d) < length:
1574 1576 raise error.RevlogError(
1575 1577 _(
1576 1578 b'partial read of revlog %s; expected %d bytes from offset '
1577 1579 b'%d, got %d'
1578 1580 )
1579 1581 % (
1580 1582 self._indexfile if self._inline else self._datafile,
1581 1583 length,
1582 1584 offset,
1583 1585 len(d),
1584 1586 )
1585 1587 )
1586 1588
1587 1589 return d
1588 1590
1589 1591 def _getsegment(self, offset, length, df=None):
1590 1592 """Obtain a segment of raw data from the revlog.
1591 1593
1592 1594 Accepts an absolute offset, length of bytes to obtain, and an
1593 1595 optional file handle to the already-opened revlog. If the file
1594 1596 handle is used, it's original seek position will not be preserved.
1595 1597
1596 1598 Requests for data may be returned from a cache.
1597 1599
1598 1600 Returns a str or a buffer instance of raw byte data.
1599 1601 """
1600 1602 o, d = self._chunkcache
1601 1603 l = len(d)
1602 1604
1603 1605 # is it in the cache?
1604 1606 cachestart = offset - o
1605 1607 cacheend = cachestart + length
1606 1608 if cachestart >= 0 and cacheend <= l:
1607 1609 if cachestart == 0 and cacheend == l:
1608 1610 return d # avoid a copy
1609 1611 return util.buffer(d, cachestart, cacheend - cachestart)
1610 1612
1611 1613 return self._readsegment(offset, length, df=df)
1612 1614
1613 1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1614 1616 """Obtain a segment of raw data corresponding to a range of revisions.
1615 1617
1616 1618 Accepts the start and end revisions and an optional already-open
1617 1619 file handle to be used for reading. If the file handle is read, its
1618 1620 seek position will not be preserved.
1619 1621
1620 1622 Requests for data may be satisfied by a cache.
1621 1623
1622 1624 Returns a 2-tuple of (offset, data) for the requested range of
1623 1625 revisions. Offset is the integer offset from the beginning of the
1624 1626 revlog and data is a str or buffer of the raw byte data.
1625 1627
1626 1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1627 1629 to determine where each revision's data begins and ends.
1628 1630 """
1629 1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1630 1632 # (functions are expensive).
1631 1633 index = self.index
1632 1634 istart = index[startrev]
1633 1635 start = int(istart[0] >> 16)
1634 1636 if startrev == endrev:
1635 1637 end = start + istart[1]
1636 1638 else:
1637 1639 iend = index[endrev]
1638 1640 end = int(iend[0] >> 16) + iend[1]
1639 1641
1640 1642 if self._inline:
1641 1643 start += (startrev + 1) * self.index.entry_size
1642 1644 end += (endrev + 1) * self.index.entry_size
1643 1645 length = end - start
1644 1646
1645 1647 return start, self._getsegment(start, length, df=df)
1646 1648
1647 1649 def _chunk(self, rev, df=None):
1648 1650 """Obtain a single decompressed chunk for a revision.
1649 1651
1650 1652 Accepts an integer revision and an optional already-open file handle
1651 1653 to be used for reading. If used, the seek position of the file will not
1652 1654 be preserved.
1653 1655
1654 1656 Returns a str holding uncompressed data for the requested revision.
1655 1657 """
1656 1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1657 1659
1658 1660 def _chunks(self, revs, df=None, targetsize=None):
1659 1661 """Obtain decompressed chunks for the specified revisions.
1660 1662
1661 1663 Accepts an iterable of numeric revisions that are assumed to be in
1662 1664 ascending order. Also accepts an optional already-open file handle
1663 1665 to be used for reading. If used, the seek position of the file will
1664 1666 not be preserved.
1665 1667
1666 1668 This function is similar to calling ``self._chunk()`` multiple times,
1667 1669 but is faster.
1668 1670
1669 1671 Returns a list with decompressed data for each requested revision.
1670 1672 """
1671 1673 if not revs:
1672 1674 return []
1673 1675 start = self.start
1674 1676 length = self.length
1675 1677 inline = self._inline
1676 1678 iosize = self.index.entry_size
1677 1679 buffer = util.buffer
1678 1680
1679 1681 l = []
1680 1682 ladd = l.append
1681 1683
1682 1684 if not self._withsparseread:
1683 1685 slicedchunks = (revs,)
1684 1686 else:
1685 1687 slicedchunks = deltautil.slicechunk(
1686 1688 self, revs, targetsize=targetsize
1687 1689 )
1688 1690
1689 1691 for revschunk in slicedchunks:
1690 1692 firstrev = revschunk[0]
1691 1693 # Skip trailing revisions with empty diff
1692 1694 for lastrev in revschunk[::-1]:
1693 1695 if length(lastrev) != 0:
1694 1696 break
1695 1697
1696 1698 try:
1697 1699 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1698 1700 except OverflowError:
1699 1701 # issue4215 - we can't cache a run of chunks greater than
1700 1702 # 2G on Windows
1701 1703 return [self._chunk(rev, df=df) for rev in revschunk]
1702 1704
1703 1705 decomp = self.decompress
1704 1706 for rev in revschunk:
1705 1707 chunkstart = start(rev)
1706 1708 if inline:
1707 1709 chunkstart += (rev + 1) * iosize
1708 1710 chunklength = length(rev)
1709 1711 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1710 1712
1711 1713 return l
1712 1714
1713 1715 def _chunkclear(self):
1714 1716 """Clear the raw chunk cache."""
1715 1717 self._chunkcache = (0, b'')
1716 1718
1717 1719 def deltaparent(self, rev):
1718 1720 """return deltaparent of the given revision"""
1719 1721 base = self.index[rev][3]
1720 1722 if base == rev:
1721 1723 return nullrev
1722 1724 elif self._generaldelta:
1723 1725 return base
1724 1726 else:
1725 1727 return rev - 1
1726 1728
1727 1729 def issnapshot(self, rev):
1728 1730 """tells whether rev is a snapshot"""
1729 1731 if not self._sparserevlog:
1730 1732 return self.deltaparent(rev) == nullrev
1731 1733 elif util.safehasattr(self.index, b'issnapshot'):
1732 1734 # directly assign the method to cache the testing and access
1733 1735 self.issnapshot = self.index.issnapshot
1734 1736 return self.issnapshot(rev)
1735 1737 if rev == nullrev:
1736 1738 return True
1737 1739 entry = self.index[rev]
1738 1740 base = entry[3]
1739 1741 if base == rev:
1740 1742 return True
1741 1743 if base == nullrev:
1742 1744 return True
1743 1745 p1 = entry[5]
1744 1746 p2 = entry[6]
1745 1747 if base == p1 or base == p2:
1746 1748 return False
1747 1749 return self.issnapshot(base)
1748 1750
1749 1751 def snapshotdepth(self, rev):
1750 1752 """number of snapshot in the chain before this one"""
1751 1753 if not self.issnapshot(rev):
1752 1754 raise error.ProgrammingError(b'revision %d not a snapshot')
1753 1755 return len(self._deltachain(rev)[0]) - 1
1754 1756
1755 1757 def revdiff(self, rev1, rev2):
1756 1758 """return or calculate a delta between two revisions
1757 1759
1758 1760 The delta calculated is in binary form and is intended to be written to
1759 1761 revlog data directly. So this function needs raw revision data.
1760 1762 """
1761 1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1762 1764 return bytes(self._chunk(rev2))
1763 1765
1764 1766 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1765 1767
1766 1768 def _processflags(self, text, flags, operation, raw=False):
1767 1769 """deprecated entry point to access flag processors"""
1768 1770 msg = b'_processflag(...) use the specialized variant'
1769 1771 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1770 1772 if raw:
1771 1773 return text, flagutil.processflagsraw(self, text, flags)
1772 1774 elif operation == b'read':
1773 1775 return flagutil.processflagsread(self, text, flags)
1774 1776 else: # write operation
1775 1777 return flagutil.processflagswrite(self, text, flags)
1776 1778
1777 1779 def revision(self, nodeorrev, _df=None, raw=False):
1778 1780 """return an uncompressed revision of a given node or revision
1779 1781 number.
1780 1782
1781 1783 _df - an existing file handle to read from. (internal-only)
1782 1784 raw - an optional argument specifying if the revision data is to be
1783 1785 treated as raw data when applying flag transforms. 'raw' should be set
1784 1786 to True when generating changegroups or in debug commands.
1785 1787 """
1786 1788 if raw:
1787 1789 msg = (
1788 1790 b'revlog.revision(..., raw=True) is deprecated, '
1789 1791 b'use revlog.rawdata(...)'
1790 1792 )
1791 1793 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1792 1794 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1793 1795
1794 1796 def sidedata(self, nodeorrev, _df=None):
1795 1797 """a map of extra data related to the changeset but not part of the hash
1796 1798
1797 1799 This function currently return a dictionary. However, more advanced
1798 1800 mapping object will likely be used in the future for a more
1799 1801 efficient/lazy code.
1800 1802 """
1801 1803 return self._revisiondata(nodeorrev, _df)[1]
1802 1804
1803 1805 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1804 1806 # deal with <nodeorrev> argument type
1805 1807 if isinstance(nodeorrev, int):
1806 1808 rev = nodeorrev
1807 1809 node = self.node(rev)
1808 1810 else:
1809 1811 node = nodeorrev
1810 1812 rev = None
1811 1813
1812 1814 # fast path the special `nullid` rev
1813 1815 if node == self.nullid:
1814 1816 return b"", {}
1815 1817
1816 1818 # ``rawtext`` is the text as stored inside the revlog. Might be the
1817 1819 # revision or might need to be processed to retrieve the revision.
1818 1820 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1819 1821
1820 1822 if self.hassidedata:
1821 1823 if rev is None:
1822 1824 rev = self.rev(node)
1823 1825 sidedata = self._sidedata(rev)
1824 1826 else:
1825 1827 sidedata = {}
1826 1828
1827 1829 if raw and validated:
1828 1830 # if we don't want to process the raw text and that raw
1829 1831 # text is cached, we can exit early.
1830 1832 return rawtext, sidedata
1831 1833 if rev is None:
1832 1834 rev = self.rev(node)
1833 1835 # the revlog's flag for this revision
1834 1836 # (usually alter its state or content)
1835 1837 flags = self.flags(rev)
1836 1838
1837 1839 if validated and flags == REVIDX_DEFAULT_FLAGS:
1838 1840 # no extra flags set, no flag processor runs, text = rawtext
1839 1841 return rawtext, sidedata
1840 1842
1841 1843 if raw:
1842 1844 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1843 1845 text = rawtext
1844 1846 else:
1845 1847 r = flagutil.processflagsread(self, rawtext, flags)
1846 1848 text, validatehash = r
1847 1849 if validatehash:
1848 1850 self.checkhash(text, node, rev=rev)
1849 1851 if not validated:
1850 1852 self._revisioncache = (node, rev, rawtext)
1851 1853
1852 1854 return text, sidedata
1853 1855
1854 1856 def _rawtext(self, node, rev, _df=None):
1855 1857 """return the possibly unvalidated rawtext for a revision
1856 1858
1857 1859 returns (rev, rawtext, validated)
1858 1860 """
1859 1861
1860 1862 # revision in the cache (could be useful to apply delta)
1861 1863 cachedrev = None
1862 1864 # An intermediate text to apply deltas to
1863 1865 basetext = None
1864 1866
1865 1867 # Check if we have the entry in cache
1866 1868 # The cache entry looks like (node, rev, rawtext)
1867 1869 if self._revisioncache:
1868 1870 if self._revisioncache[0] == node:
1869 1871 return (rev, self._revisioncache[2], True)
1870 1872 cachedrev = self._revisioncache[1]
1871 1873
1872 1874 if rev is None:
1873 1875 rev = self.rev(node)
1874 1876
1875 1877 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1876 1878 if stopped:
1877 1879 basetext = self._revisioncache[2]
1878 1880
1879 1881 # drop cache to save memory, the caller is expected to
1880 1882 # update self._revisioncache after validating the text
1881 1883 self._revisioncache = None
1882 1884
1883 1885 targetsize = None
1884 1886 rawsize = self.index[rev][2]
1885 1887 if 0 <= rawsize:
1886 1888 targetsize = 4 * rawsize
1887 1889
1888 1890 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1889 1891 if basetext is None:
1890 1892 basetext = bytes(bins[0])
1891 1893 bins = bins[1:]
1892 1894
1893 1895 rawtext = mdiff.patches(basetext, bins)
1894 1896 del basetext # let us have a chance to free memory early
1895 1897 return (rev, rawtext, False)
1896 1898
1897 1899 def _sidedata(self, rev):
1898 1900 """Return the sidedata for a given revision number."""
1899 1901 index_entry = self.index[rev]
1900 1902 sidedata_offset = index_entry[8]
1901 1903 sidedata_size = index_entry[9]
1902 1904
1903 1905 if self._inline:
1904 1906 sidedata_offset += self.index.entry_size * (1 + rev)
1905 1907 if sidedata_size == 0:
1906 1908 return {}
1907 1909
1908 1910 segment = self._getsegment(sidedata_offset, sidedata_size)
1909 1911 sidedata = sidedatautil.deserialize_sidedata(segment)
1910 1912 return sidedata
1911 1913
1912 1914 def rawdata(self, nodeorrev, _df=None):
1913 1915 """return an uncompressed raw data of a given node or revision number.
1914 1916
1915 1917 _df - an existing file handle to read from. (internal-only)
1916 1918 """
1917 1919 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1918 1920
1919 1921 def hash(self, text, p1, p2):
1920 1922 """Compute a node hash.
1921 1923
1922 1924 Available as a function so that subclasses can replace the hash
1923 1925 as needed.
1924 1926 """
1925 1927 return storageutil.hashrevisionsha1(text, p1, p2)
1926 1928
1927 1929 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1928 1930 """Check node hash integrity.
1929 1931
1930 1932 Available as a function so that subclasses can extend hash mismatch
1931 1933 behaviors as needed.
1932 1934 """
1933 1935 try:
1934 1936 if p1 is None and p2 is None:
1935 1937 p1, p2 = self.parents(node)
1936 1938 if node != self.hash(text, p1, p2):
1937 1939 # Clear the revision cache on hash failure. The revision cache
1938 1940 # only stores the raw revision and clearing the cache does have
1939 1941 # the side-effect that we won't have a cache hit when the raw
1940 1942 # revision data is accessed. But this case should be rare and
1941 1943 # it is extra work to teach the cache about the hash
1942 1944 # verification state.
1943 1945 if self._revisioncache and self._revisioncache[0] == node:
1944 1946 self._revisioncache = None
1945 1947
1946 1948 revornode = rev
1947 1949 if revornode is None:
1948 1950 revornode = templatefilters.short(hex(node))
1949 1951 raise error.RevlogError(
1950 1952 _(b"integrity check failed on %s:%s")
1951 1953 % (self.display_id, pycompat.bytestr(revornode))
1952 1954 )
1953 1955 except error.RevlogError:
1954 1956 if self._censorable and storageutil.iscensoredtext(text):
1955 1957 raise error.CensoredNodeError(self.display_id, node, text)
1956 1958 raise
1957 1959
1958 def _enforceinlinesize(self, tr, fp=None):
1960 def _enforceinlinesize(self, tr):
1959 1961 """Check if the revlog is too big for inline and convert if so.
1960 1962
1961 1963 This should be called after revisions are added to the revlog. If the
1962 1964 revlog has grown too large to be an inline revlog, it will convert it
1963 1965 to use multiple index and data files.
1964 1966 """
1965 1967 tiprev = len(self) - 1
1966 1968 total_size = self.start(tiprev) + self.length(tiprev)
1967 1969 if not self._inline or total_size < _maxinline:
1968 1970 return
1969 1971
1970 1972 troffset = tr.findoffset(self._indexfile)
1971 1973 if troffset is None:
1972 1974 raise error.RevlogError(
1973 1975 _(b"%s not found in the transaction") % self._indexfile
1974 1976 )
1975 1977 trindex = 0
1976 1978 tr.add(self._datafile, 0)
1977 1979
1978 if fp:
1980 existing_handles = False
1981 if self._writinghandles is not None:
1982 existing_handles = True
1983 fp = self._writinghandles[0]
1979 1984 fp.flush()
1980 1985 fp.close()
1981 1986 # We can't use the cached file handle after close(). So prevent
1982 1987 # its usage.
1983 1988 self._writinghandles = None
1984 1989
1985 if True:
1986 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1990 new_dfh = self._datafp(b'w+')
1991 new_dfh.truncate(0) # drop any potentially existing data
1992 try:
1993 with self._indexfp(b'r') as read_ifh:
1987 1994 for r in self:
1988 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1995 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1989 1996 if troffset <= self.start(r):
1990 1997 trindex = r
1991
1992 with self._indexfp(b'w') as fp:
1998 new_dfh.flush()
1999
2000 with self.opener(self._indexfile, mode=b'w', atomictemp=True) as fp:
1993 2001 self._format_flags &= ~FLAG_INLINE_DATA
1994 2002 self._inline = False
1995 2003 for i in self:
1996 2004 e = self.index.entry_binary(i)
1997 2005 if i == 0:
1998 2006 header = self._format_flags | self._format_version
1999 2007 header = self.index.pack_header(header)
2000 2008 e = header + e
2001 2009 fp.write(e)
2002
2003 2010 # the temp file replace the real index when we exit the context
2004 2011 # manager
2005 2012
2006 2013 tr.replace(self._indexfile, trindex * self.index.entry_size)
2007 2014 nodemaputil.setup_persistent_nodemap(tr, self)
2008 2015 self._chunkclear()
2009 2016
2017 if existing_handles:
2018 # switched from inline to conventional reopen the index
2019 ifh = self._indexfp(b"a+")
2020 self._writinghandles = (ifh, new_dfh)
2021 new_dfh = None
2022 finally:
2023 if new_dfh is not None:
2024 new_dfh.close()
2025
2010 2026 def _nodeduplicatecallback(self, transaction, node):
2011 2027 """called when trying to add a node already stored."""
2012 2028
2029 @contextlib.contextmanager
2030 def _writing(self, transaction):
2031 if self._writinghandles is not None:
2032 yield
2033 else:
2034 r = len(self)
2035 dsize = 0
2036 if r:
2037 dsize = self.end(r - 1)
2038 dfh = None
2039 if not self._inline:
2040 dfh = self._datafp(b"a+")
2041 transaction.add(self._datafile, dsize)
2042 try:
2043 isize = r * self.index.entry_size
2044 ifh = self._indexfp(b"a+")
2045 if self._inline:
2046 transaction.add(self._indexfile, dsize + isize)
2047 else:
2048 transaction.add(self._indexfile, isize)
2049 try:
2050 self._writinghandles = (ifh, dfh)
2051 try:
2052 yield
2053 finally:
2054 self._writinghandles = None
2055 finally:
2056 ifh.close()
2057 finally:
2058 if dfh is not None:
2059 dfh.close()
2060
2013 2061 def addrevision(
2014 2062 self,
2015 2063 text,
2016 2064 transaction,
2017 2065 link,
2018 2066 p1,
2019 2067 p2,
2020 2068 cachedelta=None,
2021 2069 node=None,
2022 2070 flags=REVIDX_DEFAULT_FLAGS,
2023 2071 deltacomputer=None,
2024 2072 sidedata=None,
2025 2073 ):
2026 2074 """add a revision to the log
2027 2075
2028 2076 text - the revision data to add
2029 2077 transaction - the transaction object used for rollback
2030 2078 link - the linkrev data to add
2031 2079 p1, p2 - the parent nodeids of the revision
2032 2080 cachedelta - an optional precomputed delta
2033 2081 node - nodeid of revision; typically node is not specified, and it is
2034 2082 computed by default as hash(text, p1, p2), however subclasses might
2035 2083 use different hashing method (and override checkhash() in such case)
2036 2084 flags - the known flags to set on the revision
2037 2085 deltacomputer - an optional deltacomputer instance shared between
2038 2086 multiple calls
2039 2087 """
2040 2088 if link == nullrev:
2041 2089 raise error.RevlogError(
2042 2090 _(b"attempted to add linkrev -1 to %s") % self.display_id
2043 2091 )
2044 2092
2045 2093 if sidedata is None:
2046 2094 sidedata = {}
2047 2095 elif sidedata and not self.hassidedata:
2048 2096 raise error.ProgrammingError(
2049 2097 _(b"trying to add sidedata to a revlog who don't support them")
2050 2098 )
2051 2099
2052 2100 if flags:
2053 2101 node = node or self.hash(text, p1, p2)
2054 2102
2055 2103 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2056 2104
2057 2105 # If the flag processor modifies the revision data, ignore any provided
2058 2106 # cachedelta.
2059 2107 if rawtext != text:
2060 2108 cachedelta = None
2061 2109
2062 2110 if len(rawtext) > _maxentrysize:
2063 2111 raise error.RevlogError(
2064 2112 _(
2065 2113 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2066 2114 )
2067 2115 % (self.display_id, len(rawtext))
2068 2116 )
2069 2117
2070 2118 node = node or self.hash(rawtext, p1, p2)
2071 2119 rev = self.index.get_rev(node)
2072 2120 if rev is not None:
2073 2121 return rev
2074 2122
2075 2123 if validatehash:
2076 2124 self.checkhash(rawtext, node, p1=p1, p2=p2)
2077 2125
2078 2126 return self.addrawrevision(
2079 2127 rawtext,
2080 2128 transaction,
2081 2129 link,
2082 2130 p1,
2083 2131 p2,
2084 2132 node,
2085 2133 flags,
2086 2134 cachedelta=cachedelta,
2087 2135 deltacomputer=deltacomputer,
2088 2136 sidedata=sidedata,
2089 2137 )
2090 2138
2091 2139 def addrawrevision(
2092 2140 self,
2093 2141 rawtext,
2094 2142 transaction,
2095 2143 link,
2096 2144 p1,
2097 2145 p2,
2098 2146 node,
2099 2147 flags,
2100 2148 cachedelta=None,
2101 2149 deltacomputer=None,
2102 2150 sidedata=None,
2103 2151 ):
2104 2152 """add a raw revision with known flags, node and parents
2105 2153 useful when reusing a revision not stored in this revlog (ex: received
2106 2154 over wire, or read from an external bundle).
2107 2155 """
2108 dfh = None
2109 if not self._inline:
2110 dfh = self._datafp(b"a+")
2111 ifh = self._indexfp(b"a+")
2112 try:
2156 with self._writing(transaction):
2113 2157 return self._addrevision(
2114 2158 node,
2115 2159 rawtext,
2116 2160 transaction,
2117 2161 link,
2118 2162 p1,
2119 2163 p2,
2120 2164 flags,
2121 2165 cachedelta,
2122 ifh,
2123 dfh,
2124 2166 deltacomputer=deltacomputer,
2125 2167 sidedata=sidedata,
2126 2168 )
2127 finally:
2128 if dfh:
2129 dfh.close()
2130 ifh.close()
2131 2169
2132 2170 def compress(self, data):
2133 2171 """Generate a possibly-compressed representation of data."""
2134 2172 if not data:
2135 2173 return b'', data
2136 2174
2137 2175 compressed = self._compressor.compress(data)
2138 2176
2139 2177 if compressed:
2140 2178 # The revlog compressor added the header in the returned data.
2141 2179 return b'', compressed
2142 2180
2143 2181 if data[0:1] == b'\0':
2144 2182 return b'', data
2145 2183 return b'u', data
2146 2184
2147 2185 def decompress(self, data):
2148 2186 """Decompress a revlog chunk.
2149 2187
2150 2188 The chunk is expected to begin with a header identifying the
2151 2189 format type so it can be routed to an appropriate decompressor.
2152 2190 """
2153 2191 if not data:
2154 2192 return data
2155 2193
2156 2194 # Revlogs are read much more frequently than they are written and many
2157 2195 # chunks only take microseconds to decompress, so performance is
2158 2196 # important here.
2159 2197 #
2160 2198 # We can make a few assumptions about revlogs:
2161 2199 #
2162 2200 # 1) the majority of chunks will be compressed (as opposed to inline
2163 2201 # raw data).
2164 2202 # 2) decompressing *any* data will likely by at least 10x slower than
2165 2203 # returning raw inline data.
2166 2204 # 3) we want to prioritize common and officially supported compression
2167 2205 # engines
2168 2206 #
2169 2207 # It follows that we want to optimize for "decompress compressed data
2170 2208 # when encoded with common and officially supported compression engines"
2171 2209 # case over "raw data" and "data encoded by less common or non-official
2172 2210 # compression engines." That is why we have the inline lookup first
2173 2211 # followed by the compengines lookup.
2174 2212 #
2175 2213 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2176 2214 # compressed chunks. And this matters for changelog and manifest reads.
2177 2215 t = data[0:1]
2178 2216
2179 2217 if t == b'x':
2180 2218 try:
2181 2219 return _zlibdecompress(data)
2182 2220 except zlib.error as e:
2183 2221 raise error.RevlogError(
2184 2222 _(b'revlog decompress error: %s')
2185 2223 % stringutil.forcebytestr(e)
2186 2224 )
2187 2225 # '\0' is more common than 'u' so it goes first.
2188 2226 elif t == b'\0':
2189 2227 return data
2190 2228 elif t == b'u':
2191 2229 return util.buffer(data, 1)
2192 2230
2193 2231 try:
2194 2232 compressor = self._decompressors[t]
2195 2233 except KeyError:
2196 2234 try:
2197 2235 engine = util.compengines.forrevlogheader(t)
2198 2236 compressor = engine.revlogcompressor(self._compengineopts)
2199 2237 self._decompressors[t] = compressor
2200 2238 except KeyError:
2201 2239 raise error.RevlogError(
2202 2240 _(b'unknown compression type %s') % binascii.hexlify(t)
2203 2241 )
2204 2242
2205 2243 return compressor.decompress(data)
2206 2244
2207 2245 def _addrevision(
2208 2246 self,
2209 2247 node,
2210 2248 rawtext,
2211 2249 transaction,
2212 2250 link,
2213 2251 p1,
2214 2252 p2,
2215 2253 flags,
2216 2254 cachedelta,
2217 ifh,
2218 dfh,
2219 2255 alwayscache=False,
2220 2256 deltacomputer=None,
2221 2257 sidedata=None,
2222 2258 ):
2223 2259 """internal function to add revisions to the log
2224 2260
2225 2261 see addrevision for argument descriptions.
2226 2262
2227 2263 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2228 2264
2229 2265 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2230 2266 be used.
2231 2267
2232 2268 invariants:
2233 2269 - rawtext is optional (can be None); if not set, cachedelta must be set.
2234 2270 if both are set, they must correspond to each other.
2235 2271 """
2236 2272 if node == self.nullid:
2237 2273 raise error.RevlogError(
2238 2274 _(b"%s: attempt to add null revision") % self.display_id
2239 2275 )
2240 2276 if (
2241 2277 node == self.nodeconstants.wdirid
2242 2278 or node in self.nodeconstants.wdirfilenodeids
2243 2279 ):
2244 2280 raise error.RevlogError(
2245 2281 _(b"%s: attempt to add wdir revision") % self.display_id
2246 2282 )
2283 if self._writinghandles is None:
2284 msg = b'adding revision outside `revlog._writing` context'
2285 raise error.ProgrammingError(msg)
2247 2286
2248 2287 if self._inline:
2249 fh = ifh
2288 fh = self._writinghandles[0]
2250 2289 else:
2251 fh = dfh
2290 fh = self._writinghandles[1]
2252 2291
2253 2292 btext = [rawtext]
2254 2293
2255 2294 curr = len(self)
2256 2295 prev = curr - 1
2257 2296
2258 2297 offset = self._get_data_offset(prev)
2259 2298
2260 2299 if self._concurrencychecker:
2300 ifh, dfh = self._writinghandles
2261 2301 if self._inline:
2262 2302 # offset is "as if" it were in the .d file, so we need to add on
2263 2303 # the size of the entry metadata.
2264 2304 self._concurrencychecker(
2265 2305 ifh, self._indexfile, offset + curr * self.index.entry_size
2266 2306 )
2267 2307 else:
2268 2308 # Entries in the .i are a consistent size.
2269 2309 self._concurrencychecker(
2270 2310 ifh, self._indexfile, curr * self.index.entry_size
2271 2311 )
2272 2312 self._concurrencychecker(dfh, self._datafile, offset)
2273 2313
2274 2314 p1r, p2r = self.rev(p1), self.rev(p2)
2275 2315
2276 2316 # full versions are inserted when the needed deltas
2277 2317 # become comparable to the uncompressed text
2278 2318 if rawtext is None:
2279 2319 # need rawtext size, before changed by flag processors, which is
2280 2320 # the non-raw size. use revlog explicitly to avoid filelog's extra
2281 2321 # logic that might remove metadata size.
2282 2322 textlen = mdiff.patchedsize(
2283 2323 revlog.size(self, cachedelta[0]), cachedelta[1]
2284 2324 )
2285 2325 else:
2286 2326 textlen = len(rawtext)
2287 2327
2288 2328 if deltacomputer is None:
2289 2329 deltacomputer = deltautil.deltacomputer(self)
2290 2330
2291 2331 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2292 2332
2293 2333 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2294 2334
2295 2335 if sidedata and self.hassidedata:
2296 2336 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2297 2337 sidedata_offset = offset + deltainfo.deltalen
2298 2338 else:
2299 2339 serialized_sidedata = b""
2300 2340 # Don't store the offset if the sidedata is empty, that way
2301 2341 # we can easily detect empty sidedata and they will be no different
2302 2342 # than ones we manually add.
2303 2343 sidedata_offset = 0
2304 2344
2305 2345 e = (
2306 2346 offset_type(offset, flags),
2307 2347 deltainfo.deltalen,
2308 2348 textlen,
2309 2349 deltainfo.base,
2310 2350 link,
2311 2351 p1r,
2312 2352 p2r,
2313 2353 node,
2314 2354 sidedata_offset,
2315 2355 len(serialized_sidedata),
2316 2356 )
2317 2357
2318 2358 self.index.append(e)
2319 2359 entry = self.index.entry_binary(curr)
2320 2360 if curr == 0:
2321 2361 header = self._format_flags | self._format_version
2322 2362 header = self.index.pack_header(header)
2323 2363 entry = header + entry
2324 2364 self._writeentry(
2325 2365 transaction,
2326 ifh,
2327 dfh,
2328 2366 entry,
2329 2367 deltainfo.data,
2330 2368 link,
2331 2369 offset,
2332 2370 serialized_sidedata,
2333 2371 )
2334 2372
2335 2373 rawtext = btext[0]
2336 2374
2337 2375 if alwayscache and rawtext is None:
2338 2376 rawtext = deltacomputer.buildtext(revinfo, fh)
2339 2377
2340 2378 if type(rawtext) == bytes: # only accept immutable objects
2341 2379 self._revisioncache = (node, curr, rawtext)
2342 2380 self._chainbasecache[curr] = deltainfo.chainbase
2343 2381 return curr
2344 2382
2345 2383 def _get_data_offset(self, prev):
2346 2384 """Returns the current offset in the (in-transaction) data file.
2347 2385 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2348 2386 file to store that information: since sidedata can be rewritten to the
2349 2387 end of the data file within a transaction, you can have cases where, for
2350 2388 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2351 2389 to `n - 1`'s sidedata being written after `n`'s data.
2352 2390
2353 2391 TODO cache this in a docket file before getting out of experimental."""
2354 2392 if self._format_version != REVLOGV2:
2355 2393 return self.end(prev)
2356 2394
2357 2395 offset = 0
2358 2396 for rev, entry in enumerate(self.index):
2359 2397 sidedata_end = entry[8] + entry[9]
2360 2398 # Sidedata for a previous rev has potentially been written after
2361 2399 # this rev's end, so take the max.
2362 2400 offset = max(self.end(rev), offset, sidedata_end)
2363 2401 return offset
2364 2402
2365 def _writeentry(
2366 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2367 ):
2403 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2368 2404 # Files opened in a+ mode have inconsistent behavior on various
2369 2405 # platforms. Windows requires that a file positioning call be made
2370 2406 # when the file handle transitions between reads and writes. See
2371 2407 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2372 2408 # platforms, Python or the platform itself can be buggy. Some versions
2373 2409 # of Solaris have been observed to not append at the end of the file
2374 2410 # if the file was seeked to before the end. See issue4943 for more.
2375 2411 #
2376 2412 # We work around this issue by inserting a seek() before writing.
2377 2413 # Note: This is likely not necessary on Python 3. However, because
2378 2414 # the file handle is reused for reads and may be seeked there, we need
2379 2415 # to be careful before changing this.
2416 if self._writinghandles is None:
2417 msg = b'adding revision outside `revlog._writing` context'
2418 raise error.ProgrammingError(msg)
2419 ifh, dfh = self._writinghandles
2380 2420 ifh.seek(0, os.SEEK_END)
2381 2421 if dfh:
2382 2422 dfh.seek(0, os.SEEK_END)
2383 2423
2384 2424 curr = len(self) - 1
2385 2425 if not self._inline:
2386 2426 transaction.add(self._datafile, offset)
2387 2427 transaction.add(self._indexfile, curr * len(entry))
2388 2428 if data[0]:
2389 2429 dfh.write(data[0])
2390 2430 dfh.write(data[1])
2391 2431 if sidedata:
2392 2432 dfh.write(sidedata)
2393 2433 ifh.write(entry)
2394 2434 else:
2395 2435 offset += curr * self.index.entry_size
2396 2436 transaction.add(self._indexfile, offset)
2397 2437 ifh.write(entry)
2398 2438 ifh.write(data[0])
2399 2439 ifh.write(data[1])
2400 2440 if sidedata:
2401 2441 ifh.write(sidedata)
2402 self._enforceinlinesize(transaction, ifh)
2442 self._enforceinlinesize(transaction)
2403 2443 nodemaputil.setup_persistent_nodemap(transaction, self)
2404 2444
2405 2445 def addgroup(
2406 2446 self,
2407 2447 deltas,
2408 2448 linkmapper,
2409 2449 transaction,
2410 2450 alwayscache=False,
2411 2451 addrevisioncb=None,
2412 2452 duplicaterevisioncb=None,
2413 2453 ):
2414 2454 """
2415 2455 add a delta group
2416 2456
2417 2457 given a set of deltas, add them to the revision log. the
2418 2458 first delta is against its parent, which should be in our
2419 2459 log, the rest are against the previous delta.
2420 2460
2421 2461 If ``addrevisioncb`` is defined, it will be called with arguments of
2422 2462 this revlog and the node that was added.
2423 2463 """
2424 2464
2425 if self._writinghandles:
2465 if self._adding_group:
2426 2466 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2427 2467
2428 r = len(self)
2429 end = 0
2430 if r:
2431 end = self.end(r - 1)
2432 ifh = self._indexfp(b"a+")
2433 isize = r * self.index.entry_size
2434 if self._inline:
2435 transaction.add(self._indexfile, end + isize)
2436 dfh = None
2437 else:
2438 transaction.add(self._indexfile, isize)
2439 transaction.add(self._datafile, end)
2440 dfh = self._datafp(b"a+")
2441
2442 self._writinghandles = (ifh, dfh)
2468 self._adding_group = True
2443 2469 empty = True
2444
2445 2470 try:
2446 if True:
2471 with self._writing(transaction):
2447 2472 deltacomputer = deltautil.deltacomputer(self)
2448 2473 # loop through our set of deltas
2449 2474 for data in deltas:
2450 2475 (
2451 2476 node,
2452 2477 p1,
2453 2478 p2,
2454 2479 linknode,
2455 2480 deltabase,
2456 2481 delta,
2457 2482 flags,
2458 2483 sidedata,
2459 2484 ) = data
2460 2485 link = linkmapper(linknode)
2461 2486 flags = flags or REVIDX_DEFAULT_FLAGS
2462 2487
2463 2488 rev = self.index.get_rev(node)
2464 2489 if rev is not None:
2465 2490 # this can happen if two branches make the same change
2466 2491 self._nodeduplicatecallback(transaction, rev)
2467 2492 if duplicaterevisioncb:
2468 2493 duplicaterevisioncb(self, rev)
2469 2494 empty = False
2470 2495 continue
2471 2496
2472 2497 for p in (p1, p2):
2473 2498 if not self.index.has_node(p):
2474 2499 raise error.LookupError(
2475 2500 p, self.radix, _(b'unknown parent')
2476 2501 )
2477 2502
2478 2503 if not self.index.has_node(deltabase):
2479 2504 raise error.LookupError(
2480 2505 deltabase, self.display_id, _(b'unknown delta base')
2481 2506 )
2482 2507
2483 2508 baserev = self.rev(deltabase)
2484 2509
2485 2510 if baserev != nullrev and self.iscensored(baserev):
2486 2511 # if base is censored, delta must be full replacement in a
2487 2512 # single patch operation
2488 2513 hlen = struct.calcsize(b">lll")
2489 2514 oldlen = self.rawsize(baserev)
2490 2515 newlen = len(delta) - hlen
2491 2516 if delta[:hlen] != mdiff.replacediffheader(
2492 2517 oldlen, newlen
2493 2518 ):
2494 2519 raise error.CensoredBaseError(
2495 2520 self.display_id, self.node(baserev)
2496 2521 )
2497 2522
2498 2523 if not flags and self._peek_iscensored(baserev, delta):
2499 2524 flags |= REVIDX_ISCENSORED
2500 2525
2501 2526 # We assume consumers of addrevisioncb will want to retrieve
2502 2527 # the added revision, which will require a call to
2503 2528 # revision(). revision() will fast path if there is a cache
2504 2529 # hit. So, we tell _addrevision() to always cache in this case.
2505 2530 # We're only using addgroup() in the context of changegroup
2506 2531 # generation so the revision data can always be handled as raw
2507 2532 # by the flagprocessor.
2508 2533 rev = self._addrevision(
2509 2534 node,
2510 2535 None,
2511 2536 transaction,
2512 2537 link,
2513 2538 p1,
2514 2539 p2,
2515 2540 flags,
2516 2541 (baserev, delta),
2517 ifh,
2518 dfh,
2519 2542 alwayscache=alwayscache,
2520 2543 deltacomputer=deltacomputer,
2521 2544 sidedata=sidedata,
2522 2545 )
2523 2546
2524 2547 if addrevisioncb:
2525 2548 addrevisioncb(self, rev)
2526 2549 empty = False
2527
2528 if not dfh and not self._inline:
2529 # addrevision switched from inline to conventional
2530 # reopen the index
2531 ifh.close()
2532 dfh = self._datafp(b"a+")
2533 ifh = self._indexfp(b"a+")
2534 self._writinghandles = (ifh, dfh)
2535 2550 finally:
2536 self._writinghandles = None
2537
2538 if dfh:
2539 dfh.close()
2540 ifh.close()
2551 self._adding_group = False
2541 2552 return not empty
2542 2553
2543 2554 def iscensored(self, rev):
2544 2555 """Check if a file revision is censored."""
2545 2556 if not self._censorable:
2546 2557 return False
2547 2558
2548 2559 return self.flags(rev) & REVIDX_ISCENSORED
2549 2560
2550 2561 def _peek_iscensored(self, baserev, delta):
2551 2562 """Quickly check if a delta produces a censored revision."""
2552 2563 if not self._censorable:
2553 2564 return False
2554 2565
2555 2566 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2556 2567
2557 2568 def getstrippoint(self, minlink):
2558 2569 """find the minimum rev that must be stripped to strip the linkrev
2559 2570
2560 2571 Returns a tuple containing the minimum rev and a set of all revs that
2561 2572 have linkrevs that will be broken by this strip.
2562 2573 """
2563 2574 return storageutil.resolvestripinfo(
2564 2575 minlink,
2565 2576 len(self) - 1,
2566 2577 self.headrevs(),
2567 2578 self.linkrev,
2568 2579 self.parentrevs,
2569 2580 )
2570 2581
2571 2582 def strip(self, minlink, transaction):
2572 2583 """truncate the revlog on the first revision with a linkrev >= minlink
2573 2584
2574 2585 This function is called when we're stripping revision minlink and
2575 2586 its descendants from the repository.
2576 2587
2577 2588 We have to remove all revisions with linkrev >= minlink, because
2578 2589 the equivalent changelog revisions will be renumbered after the
2579 2590 strip.
2580 2591
2581 2592 So we truncate the revlog on the first of these revisions, and
2582 2593 trust that the caller has saved the revisions that shouldn't be
2583 2594 removed and that it'll re-add them after this truncation.
2584 2595 """
2585 2596 if len(self) == 0:
2586 2597 return
2587 2598
2588 2599 rev, _ = self.getstrippoint(minlink)
2589 2600 if rev == len(self):
2590 2601 return
2591 2602
2592 2603 # first truncate the files on disk
2593 2604 end = self.start(rev)
2594 2605 if not self._inline:
2595 2606 transaction.add(self._datafile, end)
2596 2607 end = rev * self.index.entry_size
2597 2608 else:
2598 2609 end += rev * self.index.entry_size
2599 2610
2600 2611 transaction.add(self._indexfile, end)
2601 2612
2602 2613 # then reset internal state in memory to forget those revisions
2603 2614 self._revisioncache = None
2604 2615 self._chaininfocache = util.lrucachedict(500)
2605 2616 self._chunkclear()
2606 2617
2607 2618 del self.index[rev:-1]
2608 2619
2609 2620 def checksize(self):
2610 2621 """Check size of index and data files
2611 2622
2612 2623 return a (dd, di) tuple.
2613 2624 - dd: extra bytes for the "data" file
2614 2625 - di: extra bytes for the "index" file
2615 2626
2616 2627 A healthy revlog will return (0, 0).
2617 2628 """
2618 2629 expected = 0
2619 2630 if len(self):
2620 2631 expected = max(0, self.end(len(self) - 1))
2621 2632
2622 2633 try:
2623 2634 with self._datafp() as f:
2624 2635 f.seek(0, io.SEEK_END)
2625 2636 actual = f.tell()
2626 2637 dd = actual - expected
2627 2638 except IOError as inst:
2628 2639 if inst.errno != errno.ENOENT:
2629 2640 raise
2630 2641 dd = 0
2631 2642
2632 2643 try:
2633 2644 f = self.opener(self._indexfile)
2634 2645 f.seek(0, io.SEEK_END)
2635 2646 actual = f.tell()
2636 2647 f.close()
2637 2648 s = self.index.entry_size
2638 2649 i = max(0, actual // s)
2639 2650 di = actual - (i * s)
2640 2651 if self._inline:
2641 2652 databytes = 0
2642 2653 for r in self:
2643 2654 databytes += max(0, self.length(r))
2644 2655 dd = 0
2645 2656 di = actual - len(self) * s - databytes
2646 2657 except IOError as inst:
2647 2658 if inst.errno != errno.ENOENT:
2648 2659 raise
2649 2660 di = 0
2650 2661
2651 2662 return (dd, di)
2652 2663
2653 2664 def files(self):
2654 2665 res = [self._indexfile]
2655 2666 if not self._inline:
2656 2667 res.append(self._datafile)
2657 2668 return res
2658 2669
2659 2670 def emitrevisions(
2660 2671 self,
2661 2672 nodes,
2662 2673 nodesorder=None,
2663 2674 revisiondata=False,
2664 2675 assumehaveparentrevisions=False,
2665 2676 deltamode=repository.CG_DELTAMODE_STD,
2666 2677 sidedata_helpers=None,
2667 2678 ):
2668 2679 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2669 2680 raise error.ProgrammingError(
2670 2681 b'unhandled value for nodesorder: %s' % nodesorder
2671 2682 )
2672 2683
2673 2684 if nodesorder is None and not self._generaldelta:
2674 2685 nodesorder = b'storage'
2675 2686
2676 2687 if (
2677 2688 not self._storedeltachains
2678 2689 and deltamode != repository.CG_DELTAMODE_PREV
2679 2690 ):
2680 2691 deltamode = repository.CG_DELTAMODE_FULL
2681 2692
2682 2693 return storageutil.emitrevisions(
2683 2694 self,
2684 2695 nodes,
2685 2696 nodesorder,
2686 2697 revlogrevisiondelta,
2687 2698 deltaparentfn=self.deltaparent,
2688 2699 candeltafn=self.candelta,
2689 2700 rawsizefn=self.rawsize,
2690 2701 revdifffn=self.revdiff,
2691 2702 flagsfn=self.flags,
2692 2703 deltamode=deltamode,
2693 2704 revisiondata=revisiondata,
2694 2705 assumehaveparentrevisions=assumehaveparentrevisions,
2695 2706 sidedata_helpers=sidedata_helpers,
2696 2707 )
2697 2708
2698 2709 DELTAREUSEALWAYS = b'always'
2699 2710 DELTAREUSESAMEREVS = b'samerevs'
2700 2711 DELTAREUSENEVER = b'never'
2701 2712
2702 2713 DELTAREUSEFULLADD = b'fulladd'
2703 2714
2704 2715 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2705 2716
2706 2717 def clone(
2707 2718 self,
2708 2719 tr,
2709 2720 destrevlog,
2710 2721 addrevisioncb=None,
2711 2722 deltareuse=DELTAREUSESAMEREVS,
2712 2723 forcedeltabothparents=None,
2713 2724 sidedata_helpers=None,
2714 2725 ):
2715 2726 """Copy this revlog to another, possibly with format changes.
2716 2727
2717 2728 The destination revlog will contain the same revisions and nodes.
2718 2729 However, it may not be bit-for-bit identical due to e.g. delta encoding
2719 2730 differences.
2720 2731
2721 2732 The ``deltareuse`` argument control how deltas from the existing revlog
2722 2733 are preserved in the destination revlog. The argument can have the
2723 2734 following values:
2724 2735
2725 2736 DELTAREUSEALWAYS
2726 2737 Deltas will always be reused (if possible), even if the destination
2727 2738 revlog would not select the same revisions for the delta. This is the
2728 2739 fastest mode of operation.
2729 2740 DELTAREUSESAMEREVS
2730 2741 Deltas will be reused if the destination revlog would pick the same
2731 2742 revisions for the delta. This mode strikes a balance between speed
2732 2743 and optimization.
2733 2744 DELTAREUSENEVER
2734 2745 Deltas will never be reused. This is the slowest mode of execution.
2735 2746 This mode can be used to recompute deltas (e.g. if the diff/delta
2736 2747 algorithm changes).
2737 2748 DELTAREUSEFULLADD
2738 2749 Revision will be re-added as if their were new content. This is
2739 2750 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2740 2751 eg: large file detection and handling.
2741 2752
2742 2753 Delta computation can be slow, so the choice of delta reuse policy can
2743 2754 significantly affect run time.
2744 2755
2745 2756 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2746 2757 two extremes. Deltas will be reused if they are appropriate. But if the
2747 2758 delta could choose a better revision, it will do so. This means if you
2748 2759 are converting a non-generaldelta revlog to a generaldelta revlog,
2749 2760 deltas will be recomputed if the delta's parent isn't a parent of the
2750 2761 revision.
2751 2762
2752 2763 In addition to the delta policy, the ``forcedeltabothparents``
2753 2764 argument controls whether to force compute deltas against both parents
2754 2765 for merges. By default, the current default is used.
2755 2766
2756 2767 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2757 2768 `sidedata_helpers`.
2758 2769 """
2759 2770 if deltareuse not in self.DELTAREUSEALL:
2760 2771 raise ValueError(
2761 2772 _(b'value for deltareuse invalid: %s') % deltareuse
2762 2773 )
2763 2774
2764 2775 if len(destrevlog):
2765 2776 raise ValueError(_(b'destination revlog is not empty'))
2766 2777
2767 2778 if getattr(self, 'filteredrevs', None):
2768 2779 raise ValueError(_(b'source revlog has filtered revisions'))
2769 2780 if getattr(destrevlog, 'filteredrevs', None):
2770 2781 raise ValueError(_(b'destination revlog has filtered revisions'))
2771 2782
2772 2783 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2773 2784 # if possible.
2774 2785 oldlazydelta = destrevlog._lazydelta
2775 2786 oldlazydeltabase = destrevlog._lazydeltabase
2776 2787 oldamd = destrevlog._deltabothparents
2777 2788
2778 2789 try:
2779 2790 if deltareuse == self.DELTAREUSEALWAYS:
2780 2791 destrevlog._lazydeltabase = True
2781 2792 destrevlog._lazydelta = True
2782 2793 elif deltareuse == self.DELTAREUSESAMEREVS:
2783 2794 destrevlog._lazydeltabase = False
2784 2795 destrevlog._lazydelta = True
2785 2796 elif deltareuse == self.DELTAREUSENEVER:
2786 2797 destrevlog._lazydeltabase = False
2787 2798 destrevlog._lazydelta = False
2788 2799
2789 2800 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2790 2801
2791 2802 self._clone(
2792 2803 tr,
2793 2804 destrevlog,
2794 2805 addrevisioncb,
2795 2806 deltareuse,
2796 2807 forcedeltabothparents,
2797 2808 sidedata_helpers,
2798 2809 )
2799 2810
2800 2811 finally:
2801 2812 destrevlog._lazydelta = oldlazydelta
2802 2813 destrevlog._lazydeltabase = oldlazydeltabase
2803 2814 destrevlog._deltabothparents = oldamd
2804 2815
2805 2816 def _clone(
2806 2817 self,
2807 2818 tr,
2808 2819 destrevlog,
2809 2820 addrevisioncb,
2810 2821 deltareuse,
2811 2822 forcedeltabothparents,
2812 2823 sidedata_helpers,
2813 2824 ):
2814 2825 """perform the core duty of `revlog.clone` after parameter processing"""
2815 2826 deltacomputer = deltautil.deltacomputer(destrevlog)
2816 2827 index = self.index
2817 2828 for rev in self:
2818 2829 entry = index[rev]
2819 2830
2820 2831 # Some classes override linkrev to take filtered revs into
2821 2832 # account. Use raw entry from index.
2822 2833 flags = entry[0] & 0xFFFF
2823 2834 linkrev = entry[4]
2824 2835 p1 = index[entry[5]][7]
2825 2836 p2 = index[entry[6]][7]
2826 2837 node = entry[7]
2827 2838
2828 2839 # (Possibly) reuse the delta from the revlog if allowed and
2829 2840 # the revlog chunk is a delta.
2830 2841 cachedelta = None
2831 2842 rawtext = None
2832 2843 if deltareuse == self.DELTAREUSEFULLADD:
2833 2844 text, sidedata = self._revisiondata(rev)
2834 2845
2835 2846 if sidedata_helpers is not None:
2836 2847 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2837 2848 self, sidedata_helpers, sidedata, rev
2838 2849 )
2839 2850 flags = flags | new_flags[0] & ~new_flags[1]
2840 2851
2841 2852 destrevlog.addrevision(
2842 2853 text,
2843 2854 tr,
2844 2855 linkrev,
2845 2856 p1,
2846 2857 p2,
2847 2858 cachedelta=cachedelta,
2848 2859 node=node,
2849 2860 flags=flags,
2850 2861 deltacomputer=deltacomputer,
2851 2862 sidedata=sidedata,
2852 2863 )
2853 2864 else:
2854 2865 if destrevlog._lazydelta:
2855 2866 dp = self.deltaparent(rev)
2856 2867 if dp != nullrev:
2857 2868 cachedelta = (dp, bytes(self._chunk(rev)))
2858 2869
2859 2870 sidedata = None
2860 2871 if not cachedelta:
2861 2872 rawtext, sidedata = self._revisiondata(rev)
2862 2873 if sidedata is None:
2863 2874 sidedata = self.sidedata(rev)
2864 2875
2865 2876 if sidedata_helpers is not None:
2866 2877 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2867 2878 self, sidedata_helpers, sidedata, rev
2868 2879 )
2869 2880 flags = flags | new_flags[0] & ~new_flags[1]
2870 2881
2871 ifh = destrevlog.opener(
2872 destrevlog._indexfile, b'a+', checkambig=False
2873 )
2874 dfh = None
2875 if not destrevlog._inline:
2876 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2877 try:
2882 with destrevlog._writing(tr):
2878 2883 destrevlog._addrevision(
2879 2884 node,
2880 2885 rawtext,
2881 2886 tr,
2882 2887 linkrev,
2883 2888 p1,
2884 2889 p2,
2885 2890 flags,
2886 2891 cachedelta,
2887 ifh,
2888 dfh,
2889 2892 deltacomputer=deltacomputer,
2890 2893 sidedata=sidedata,
2891 2894 )
2892 finally:
2893 if dfh:
2894 dfh.close()
2895 ifh.close()
2896 2895
2897 2896 if addrevisioncb:
2898 2897 addrevisioncb(self, rev, node)
2899 2898
2900 2899 def censorrevision(self, tr, censornode, tombstone=b''):
2901 2900 if self._format_version == REVLOGV0:
2902 2901 raise error.RevlogError(
2903 2902 _(b'cannot censor with version %d revlogs')
2904 2903 % self._format_version
2905 2904 )
2906 2905
2907 2906 censorrev = self.rev(censornode)
2908 2907 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2909 2908
2910 2909 if len(tombstone) > self.rawsize(censorrev):
2911 2910 raise error.Abort(
2912 2911 _(b'censor tombstone must be no longer than censored data')
2913 2912 )
2914 2913
2915 2914 # Rewriting the revlog in place is hard. Our strategy for censoring is
2916 2915 # to create a new revlog, copy all revisions to it, then replace the
2917 2916 # revlogs on transaction close.
2918 2917 #
2919 2918 # This is a bit dangerous. We could easily have a mismatch of state.
2920 2919 newrl = revlog(
2921 2920 self.opener,
2922 2921 target=self.target,
2923 2922 radix=self.radix,
2924 2923 postfix=b'tmpcensored',
2925 2924 censorable=True,
2926 2925 )
2927 2926 newrl._format_version = self._format_version
2928 2927 newrl._format_flags = self._format_flags
2929 2928 newrl._generaldelta = self._generaldelta
2930 2929 newrl._parse_index = self._parse_index
2931 2930
2932 2931 for rev in self.revs():
2933 2932 node = self.node(rev)
2934 2933 p1, p2 = self.parents(node)
2935 2934
2936 2935 if rev == censorrev:
2937 2936 newrl.addrawrevision(
2938 2937 tombstone,
2939 2938 tr,
2940 2939 self.linkrev(censorrev),
2941 2940 p1,
2942 2941 p2,
2943 2942 censornode,
2944 2943 REVIDX_ISCENSORED,
2945 2944 )
2946 2945
2947 2946 if newrl.deltaparent(rev) != nullrev:
2948 2947 raise error.Abort(
2949 2948 _(
2950 2949 b'censored revision stored as delta; '
2951 2950 b'cannot censor'
2952 2951 ),
2953 2952 hint=_(
2954 2953 b'censoring of revlogs is not '
2955 2954 b'fully implemented; please report '
2956 2955 b'this bug'
2957 2956 ),
2958 2957 )
2959 2958 continue
2960 2959
2961 2960 if self.iscensored(rev):
2962 2961 if self.deltaparent(rev) != nullrev:
2963 2962 raise error.Abort(
2964 2963 _(
2965 2964 b'cannot censor due to censored '
2966 2965 b'revision having delta stored'
2967 2966 )
2968 2967 )
2969 2968 rawtext = self._chunk(rev)
2970 2969 else:
2971 2970 rawtext = self.rawdata(rev)
2972 2971
2973 2972 newrl.addrawrevision(
2974 2973 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2975 2974 )
2976 2975
2977 2976 tr.addbackup(self._indexfile, location=b'store')
2978 2977 if not self._inline:
2979 2978 tr.addbackup(self._datafile, location=b'store')
2980 2979
2981 2980 self.opener.rename(newrl._indexfile, self._indexfile)
2982 2981 if not self._inline:
2983 2982 self.opener.rename(newrl._datafile, self._datafile)
2984 2983
2985 2984 self.clearcaches()
2986 2985 self._loadindex()
2987 2986
2988 2987 def verifyintegrity(self, state):
2989 2988 """Verifies the integrity of the revlog.
2990 2989
2991 2990 Yields ``revlogproblem`` instances describing problems that are
2992 2991 found.
2993 2992 """
2994 2993 dd, di = self.checksize()
2995 2994 if dd:
2996 2995 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2997 2996 if di:
2998 2997 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2999 2998
3000 2999 version = self._format_version
3001 3000
3002 3001 # The verifier tells us what version revlog we should be.
3003 3002 if version != state[b'expectedversion']:
3004 3003 yield revlogproblem(
3005 3004 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3006 3005 % (self.display_id, version, state[b'expectedversion'])
3007 3006 )
3008 3007
3009 3008 state[b'skipread'] = set()
3010 3009 state[b'safe_renamed'] = set()
3011 3010
3012 3011 for rev in self:
3013 3012 node = self.node(rev)
3014 3013
3015 3014 # Verify contents. 4 cases to care about:
3016 3015 #
3017 3016 # common: the most common case
3018 3017 # rename: with a rename
3019 3018 # meta: file content starts with b'\1\n', the metadata
3020 3019 # header defined in filelog.py, but without a rename
3021 3020 # ext: content stored externally
3022 3021 #
3023 3022 # More formally, their differences are shown below:
3024 3023 #
3025 3024 # | common | rename | meta | ext
3026 3025 # -------------------------------------------------------
3027 3026 # flags() | 0 | 0 | 0 | not 0
3028 3027 # renamed() | False | True | False | ?
3029 3028 # rawtext[0:2]=='\1\n'| False | True | True | ?
3030 3029 #
3031 3030 # "rawtext" means the raw text stored in revlog data, which
3032 3031 # could be retrieved by "rawdata(rev)". "text"
3033 3032 # mentioned below is "revision(rev)".
3034 3033 #
3035 3034 # There are 3 different lengths stored physically:
3036 3035 # 1. L1: rawsize, stored in revlog index
3037 3036 # 2. L2: len(rawtext), stored in revlog data
3038 3037 # 3. L3: len(text), stored in revlog data if flags==0, or
3039 3038 # possibly somewhere else if flags!=0
3040 3039 #
3041 3040 # L1 should be equal to L2. L3 could be different from them.
3042 3041 # "text" may or may not affect commit hash depending on flag
3043 3042 # processors (see flagutil.addflagprocessor).
3044 3043 #
3045 3044 # | common | rename | meta | ext
3046 3045 # -------------------------------------------------
3047 3046 # rawsize() | L1 | L1 | L1 | L1
3048 3047 # size() | L1 | L2-LM | L1(*) | L1 (?)
3049 3048 # len(rawtext) | L2 | L2 | L2 | L2
3050 3049 # len(text) | L2 | L2 | L2 | L3
3051 3050 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3052 3051 #
3053 3052 # LM: length of metadata, depending on rawtext
3054 3053 # (*): not ideal, see comment in filelog.size
3055 3054 # (?): could be "- len(meta)" if the resolved content has
3056 3055 # rename metadata
3057 3056 #
3058 3057 # Checks needed to be done:
3059 3058 # 1. length check: L1 == L2, in all cases.
3060 3059 # 2. hash check: depending on flag processor, we may need to
3061 3060 # use either "text" (external), or "rawtext" (in revlog).
3062 3061
3063 3062 try:
3064 3063 skipflags = state.get(b'skipflags', 0)
3065 3064 if skipflags:
3066 3065 skipflags &= self.flags(rev)
3067 3066
3068 3067 _verify_revision(self, skipflags, state, node)
3069 3068
3070 3069 l1 = self.rawsize(rev)
3071 3070 l2 = len(self.rawdata(node))
3072 3071
3073 3072 if l1 != l2:
3074 3073 yield revlogproblem(
3075 3074 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3076 3075 node=node,
3077 3076 )
3078 3077
3079 3078 except error.CensoredNodeError:
3080 3079 if state[b'erroroncensored']:
3081 3080 yield revlogproblem(
3082 3081 error=_(b'censored file data'), node=node
3083 3082 )
3084 3083 state[b'skipread'].add(node)
3085 3084 except Exception as e:
3086 3085 yield revlogproblem(
3087 3086 error=_(b'unpacking %s: %s')
3088 3087 % (short(node), stringutil.forcebytestr(e)),
3089 3088 node=node,
3090 3089 )
3091 3090 state[b'skipread'].add(node)
3092 3091
3093 3092 def storageinfo(
3094 3093 self,
3095 3094 exclusivefiles=False,
3096 3095 sharedfiles=False,
3097 3096 revisionscount=False,
3098 3097 trackedsize=False,
3099 3098 storedsize=False,
3100 3099 ):
3101 3100 d = {}
3102 3101
3103 3102 if exclusivefiles:
3104 3103 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3105 3104 if not self._inline:
3106 3105 d[b'exclusivefiles'].append((self.opener, self._datafile))
3107 3106
3108 3107 if sharedfiles:
3109 3108 d[b'sharedfiles'] = []
3110 3109
3111 3110 if revisionscount:
3112 3111 d[b'revisionscount'] = len(self)
3113 3112
3114 3113 if trackedsize:
3115 3114 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3116 3115
3117 3116 if storedsize:
3118 3117 d[b'storedsize'] = sum(
3119 3118 self.opener.stat(path).st_size for path in self.files()
3120 3119 )
3121 3120
3122 3121 return d
3123 3122
3124 3123 def rewrite_sidedata(self, helpers, startrev, endrev):
3125 3124 if not self.hassidedata:
3126 3125 return
3127 3126 # inline are not yet supported because they suffer from an issue when
3128 3127 # rewriting them (since it's not an append-only operation).
3129 3128 # See issue6485.
3130 3129 assert not self._inline
3131 3130 if not helpers[1] and not helpers[2]:
3132 3131 # Nothing to generate or remove
3133 3132 return
3134 3133
3135 3134 # changelog implement some "delayed" writing mechanism that assume that
3136 3135 # all index data is writen in append mode and is therefor incompatible
3137 3136 # with the seeked write done in this method. The use of such "delayed"
3138 3137 # writing will soon be removed for revlog version that support side
3139 3138 # data, so for now, we only keep this simple assert to highlight the
3140 3139 # situation.
3141 3140 delayed = getattr(self, '_delayed', False)
3142 3141 diverted = getattr(self, '_divert', False)
3143 3142 if delayed and not diverted:
3144 3143 msg = "cannot rewrite_sidedata of a delayed revlog"
3145 3144 raise error.ProgrammingError(msg)
3146 3145
3147 3146 new_entries = []
3148 3147 # append the new sidedata
3149 3148 with self._datafp(b'a+') as fp:
3150 3149 # Maybe this bug still exists, see revlog._writeentry
3151 3150 fp.seek(0, os.SEEK_END)
3152 3151 current_offset = fp.tell()
3153 3152 for rev in range(startrev, endrev + 1):
3154 3153 entry = self.index[rev]
3155 3154 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3156 3155 store=self,
3157 3156 sidedata_helpers=helpers,
3158 3157 sidedata={},
3159 3158 rev=rev,
3160 3159 )
3161 3160
3162 3161 serialized_sidedata = sidedatautil.serialize_sidedata(
3163 3162 new_sidedata
3164 3163 )
3165 3164 if entry[8] != 0 or entry[9] != 0:
3166 3165 # rewriting entries that already have sidedata is not
3167 3166 # supported yet, because it introduces garbage data in the
3168 3167 # revlog.
3169 3168 msg = b"Rewriting existing sidedata is not supported yet"
3170 3169 raise error.Abort(msg)
3171 3170
3172 3171 # Apply (potential) flags to add and to remove after running
3173 3172 # the sidedata helpers
3174 3173 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3175 3174 entry = (new_offset_flags,) + entry[1:8]
3176 3175 entry += (current_offset, len(serialized_sidedata))
3177 3176
3178 3177 fp.write(serialized_sidedata)
3179 3178 new_entries.append(entry)
3180 3179 current_offset += len(serialized_sidedata)
3181 3180
3182 3181 # rewrite the new index entries
3183 3182 with self._indexfp(b'r+') as fp:
3184 3183 fp.seek(startrev * self.index.entry_size)
3185 3184 for i, e in enumerate(new_entries):
3186 3185 rev = startrev + i
3187 3186 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3188 3187 packed = self.index.entry_binary(rev)
3189 3188 if rev == 0:
3190 3189 header = self._format_flags | self._format_version
3191 3190 header = self.index.pack_header(header)
3192 3191 packed = header + packed
3193 3192 fp.write(packed)
@@ -1,101 +1,101 b''
1 1 #testcases skip-detection fail-if-detected
2 2
3 3 Test situations that "should" only be reproducible:
4 4 - on networked filesystems, or
5 5 - user using `hg debuglocks` to eliminate the lock file, or
6 6 - something (that doesn't respect the lock file) writing to the .hg directory
7 7 while we're running
8 8
9 9 $ hg init a
10 10 $ cd a
11 11
12 12 $ cat > "$TESTTMP/waitlock_editor.sh" <<EOF
13 13 > [ -n "\${WAITLOCK_ANNOUNCE:-}" ] && touch "\${WAITLOCK_ANNOUNCE}"
14 14 > f="\${WAITLOCK_FILE}"
15 15 > start=\`date +%s\`
16 16 > timeout=5
17 17 > while [ \\( ! -f \$f \\) -a \\( ! -L \$f \\) ]; do
18 18 > now=\`date +%s\`
19 19 > if [ "\`expr \$now - \$start\`" -gt \$timeout ]; then
20 20 > echo "timeout: \$f was not created in \$timeout seconds (it is now \$(date +%s))"
21 21 > exit 1
22 22 > fi
23 23 > sleep 0.1
24 24 > done
25 25 > if [ \$# -gt 1 ]; then
26 26 > cat "\$@"
27 27 > fi
28 28 > EOF
29 29
30 30 Things behave differently if we don't already have a 00changelog.i file when
31 31 this all starts, so let's make one.
32 32
33 33 $ echo r0 > r0
34 34 $ hg commit -qAm 'r0'
35 35
36 36 Start an hg commit that will take a while
37 37 $ EDITOR_STARTED="$(pwd)/.editor_started"
38 38 $ MISCHIEF_MANAGED="$(pwd)/.mischief_managed"
39 39 $ JOBS_FINISHED="$(pwd)/.jobs_finished"
40 40
41 41 #if fail-if-detected
42 42 $ cat >> .hg/hgrc << EOF
43 43 > [debug]
44 44 > revlog.verifyposition.changelog = fail
45 45 > EOF
46 46 #endif
47 47
48 48 $ echo foo > foo
49 49 $ (WAITLOCK_ANNOUNCE="${EDITOR_STARTED}" \
50 50 > WAITLOCK_FILE="${MISCHIEF_MANAGED}" \
51 51 > HGEDITOR="sh $TESTTMP/waitlock_editor.sh" \
52 52 > hg commit -qAm 'r1 (foo)' --edit foo > .foo_commit_out 2>&1 ; touch "${JOBS_FINISHED}") &
53 53
54 54 Wait for the "editor" to actually start
55 55 $ WAITLOCK_FILE="${EDITOR_STARTED}" sh "$TESTTMP/waitlock_editor.sh"
56 56
57 57 Break the locks, and make another commit.
58 58 $ hg debuglocks -LW
59 59 $ echo bar > bar
60 60 $ hg commit -qAm 'r2 (bar)' bar
61 61 $ hg debugrevlogindex -c
62 62 rev linkrev nodeid p1 p2
63 63 0 0 222799e2f90b 000000000000 000000000000
64 64 1 1 6f124f6007a0 222799e2f90b 000000000000
65 65
66 66 Awaken the editor from that first commit
67 67 $ touch "${MISCHIEF_MANAGED}"
68 68 And wait for it to finish
69 69 $ WAITLOCK_FILE="${JOBS_FINISHED}" sh "$TESTTMP/waitlock_editor.sh"
70 70
71 71 #if skip-detection
72 72 (Ensure there was no output)
73 73 $ cat .foo_commit_out
74 74 And observe a corrupted repository -- rev 2's linkrev is 1, which should never
75 75 happen for the changelog (the linkrev should always refer to itself).
76 76 $ hg debugrevlogindex -c
77 77 rev linkrev nodeid p1 p2
78 78 0 0 222799e2f90b 000000000000 000000000000
79 79 1 1 6f124f6007a0 222799e2f90b 000000000000
80 80 2 1 ac80e6205bb2 222799e2f90b 000000000000
81 81 #endif
82 82
83 83 #if fail-if-detected
84 84 $ cat .foo_commit_out
85 85 transaction abort!
86 86 rollback completed
87 87 note: commit message saved in .hg/last-message.txt
88 88 note: use 'hg commit --logfile .hg/last-message.txt --edit' to reuse it
89 89 abort: 00changelog.i: file cursor at position 249, expected 121
90 90 And no corruption in the changelog.
91 91 $ hg debugrevlogindex -c
92 92 rev linkrev nodeid p1 p2
93 93 0 0 222799e2f90b 000000000000 000000000000
94 1 1 6f124f6007a0 222799e2f90b 000000000000
94 1 1 6f124f6007a0 222799e2f90b 000000000000 (missing-correct-output !)
95 95 And, because of transactions, there's none in the manifestlog either.
96 96 $ hg debugrevlogindex -m
97 97 rev linkrev nodeid p1 p2
98 98 0 0 7b7020262a56 000000000000 000000000000
99 99 1 1 ad3fe36d86d9 7b7020262a56 000000000000
100 100 #endif
101 101
@@ -1,505 +1,529 b''
1 1 # test revlog interaction about raw data (flagprocessor)
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import collections
6 6 import hashlib
7 7 import sys
8 8
9 9 from mercurial import (
10 10 encoding,
11 11 revlog,
12 12 transaction,
13 13 vfs,
14 14 )
15 15
16 16 from mercurial.revlogutils import (
17 17 constants,
18 18 deltas,
19 19 flagutil,
20 20 )
21 21
22
23 class _NoTransaction(object):
24 """transaction like object to update the nodemap outside a transaction"""
25
26 def __init__(self):
27 self._postclose = {}
28
29 def addpostclose(self, callback_id, callback_func):
30 self._postclose[callback_id] = callback_func
31
32 def registertmp(self, *args, **kwargs):
33 pass
34
35 def addbackup(self, *args, **kwargs):
36 pass
37
38 def add(self, *args, **kwargs):
39 pass
40
41 def addabort(self, *args, **kwargs):
42 pass
43
44 def _report(self, *args):
45 pass
46
47
22 48 # TESTTMP is optional. This makes it convenient to run without run-tests.py
23 49 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
24 50
25 51 # Enable generaldelta otherwise revlog won't use delta as expected by the test
26 52 tvfs.options = {
27 53 b'generaldelta': True,
28 54 b'revlogv1': True,
29 55 b'sparse-revlog': True,
30 56 }
31 57
32 58 # The test wants to control whether to use delta explicitly, based on
33 59 # "storedeltachains".
34 60 revlog.revlog._isgooddeltainfo = lambda self, d, textlen: self._storedeltachains
35 61
36 62
37 63 def abort(msg):
38 64 print('abort: %s' % msg)
39 65 # Return 0 so run-tests.py could compare the output.
40 66 sys.exit()
41 67
42 68
43 69 # Register a revlog processor for flag EXTSTORED.
44 70 #
45 71 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
46 72 # insertion and replacement, and may be interesting to test revlog's line-based
47 73 # deltas.
48 74 _extheader = b'E\n'
49 75
50 76
51 77 def readprocessor(self, rawtext):
52 78 # True: the returned text could be used to verify hash
53 79 text = rawtext[len(_extheader) :].replace(b'i', b'1')
54 80 return text, True
55 81
56 82
57 83 def writeprocessor(self, text):
58 84 # False: the returned rawtext shouldn't be used to verify hash
59 85 rawtext = _extheader + text.replace(b'1', b'i')
60 86 return rawtext, False
61 87
62 88
63 89 def rawprocessor(self, rawtext):
64 90 # False: do not verify hash. Only the content returned by "readprocessor"
65 91 # can be used to verify hash.
66 92 return False
67 93
68 94
69 95 flagutil.addflagprocessor(
70 96 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
71 97 )
72 98
73 99 # Utilities about reading and appending revlog
74 100
75 101
76 102 def newtransaction():
77 103 # A transaction is required to write revlogs
78 104 report = lambda msg: None
79 105 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
80 106
81 107
82 108 def newrevlog(name=b'_testrevlog', recreate=False):
83 109 if recreate:
84 110 tvfs.tryunlink(name + b'.i')
85 111 target = (constants.KIND_OTHER, b'test')
86 112 rlog = revlog.revlog(tvfs, target=target, radix=name)
87 113 return rlog
88 114
89 115
90 116 def appendrev(rlog, text, tr, isext=False, isdelta=True):
91 117 """Append a revision. If isext is True, set the EXTSTORED flag so flag
92 118 processor will be used (and rawtext is different from text). If isdelta is
93 119 True, force the revision to be a delta, otherwise it's full text.
94 120 """
95 121 nextrev = len(rlog)
96 122 p1 = rlog.node(nextrev - 1)
97 123 p2 = rlog.nullid
98 124 if isext:
99 125 flags = revlog.REVIDX_EXTSTORED
100 126 else:
101 127 flags = revlog.REVIDX_DEFAULT_FLAGS
102 128 # Change storedeltachains temporarily, to override revlog's delta decision
103 129 rlog._storedeltachains = isdelta
104 130 try:
105 131 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
106 132 return nextrev
107 133 except Exception as ex:
108 134 abort('rev %d: failed to append: %s' % (nextrev, ex))
109 135 finally:
110 136 # Restore storedeltachains. It is always True, see revlog.__init__
111 137 rlog._storedeltachains = True
112 138
113 139
114 140 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
115 141 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
116 142
117 143 This emulates push or pull. They use changegroup. Changegroup requires
118 144 repo to work. We don't have a repo, so a dummy changegroup is used.
119 145
120 146 If optimaldelta is True, use optimized delta parent, so the destination
121 147 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
122 148 the destination revlog needs more work to use it.
123 149
124 150 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
125 151 code path, which is not covered by "appendrev" alone.
126 152 """
127 153
128 154 class dummychangegroup(object):
129 155 @staticmethod
130 156 def deltachunk(pnode):
131 157 pnode = pnode or rlog.nullid
132 158 parentrev = rlog.rev(pnode)
133 159 r = parentrev + 1
134 160 if r >= len(rlog):
135 161 return {}
136 162 if optimaldelta:
137 163 deltaparent = parentrev
138 164 else:
139 165 # suboptimal deltaparent
140 166 deltaparent = min(0, parentrev)
141 167 if not rlog.candelta(deltaparent, r):
142 168 deltaparent = -1
143 169 return {
144 170 b'node': rlog.node(r),
145 171 b'p1': pnode,
146 172 b'p2': rlog.nullid,
147 173 b'cs': rlog.node(rlog.linkrev(r)),
148 174 b'flags': rlog.flags(r),
149 175 b'deltabase': rlog.node(deltaparent),
150 176 b'delta': rlog.revdiff(deltaparent, r),
151 177 b'sidedata': rlog.sidedata(r),
152 178 }
153 179
154 180 def deltaiter(self):
155 181 chain = None
156 182 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
157 183 node = chunkdata[b'node']
158 184 p1 = chunkdata[b'p1']
159 185 p2 = chunkdata[b'p2']
160 186 cs = chunkdata[b'cs']
161 187 deltabase = chunkdata[b'deltabase']
162 188 delta = chunkdata[b'delta']
163 189 flags = chunkdata[b'flags']
164 190 sidedata = chunkdata[b'sidedata']
165 191
166 192 chain = node
167 193
168 194 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
169 195
170 196 def linkmap(lnode):
171 197 return rlog.rev(lnode)
172 198
173 199 dlog = newrevlog(destname, recreate=True)
174 200 dummydeltas = dummychangegroup().deltaiter()
175 201 dlog.addgroup(dummydeltas, linkmap, tr)
176 202 return dlog
177 203
178 204
179 205 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
180 206 """Like addgroupcopy, but use the low level revlog._addrevision directly.
181 207
182 208 It exercises some code paths that are hard to reach easily otherwise.
183 209 """
184 210 dlog = newrevlog(destname, recreate=True)
185 211 for r in rlog:
186 212 p1 = rlog.node(r - 1)
187 213 p2 = rlog.nullid
188 214 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
189 215 text = rlog.rawdata(r)
190 216 cachedelta = None
191 217 else:
192 218 # deltaparent cannot have EXTSTORED flag.
193 219 deltaparent = max(
194 220 [-1]
195 221 + [
196 222 p
197 223 for p in range(r)
198 224 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
199 225 ]
200 226 )
201 227 text = None
202 228 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
203 229 flags = rlog.flags(r)
204 ifh = dfh = None
205 try:
206 ifh = dlog.opener(dlog._indexfile, b'a+')
207 if not dlog._inline:
208 dfh = dlog.opener(dlog._datafile, b'a+')
230 with dlog._writing(_NoTransaction()):
209 231 dlog._addrevision(
210 rlog.node(r), text, tr, r, p1, p2, flags, cachedelta, ifh, dfh
232 rlog.node(r),
233 text,
234 tr,
235 r,
236 p1,
237 p2,
238 flags,
239 cachedelta,
211 240 )
212 finally:
213 if dfh is not None:
214 dfh.close()
215 if ifh is not None:
216 ifh.close()
217 241 return dlog
218 242
219 243
220 244 # Utilities to generate revisions for testing
221 245
222 246
223 247 def genbits(n):
224 248 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
225 249 i.e. the generated numbers have a width of n bits.
226 250
227 251 The combination of two adjacent numbers will cover all possible cases.
228 252 That is to say, given any x, y where both x, and y are in range(2 ** n),
229 253 there is an x followed immediately by y in the generated sequence.
230 254 """
231 255 m = 2 ** n
232 256
233 257 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
234 258 gray = lambda x: x ^ (x >> 1)
235 259 reversegray = {gray(i): i for i in range(m)}
236 260
237 261 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
238 262 # the next unused gray code where higher n bits equal to X.
239 263
240 264 # For gray codes whose higher bits are X, a[X] of them have been used.
241 265 a = [0] * m
242 266
243 267 # Iterate from 0.
244 268 x = 0
245 269 yield x
246 270 for i in range(m * m):
247 271 x = reversegray[x]
248 272 y = gray(a[x] + x * m) & (m - 1)
249 273 assert a[x] < m
250 274 a[x] += 1
251 275 x = y
252 276 yield x
253 277
254 278
255 279 def gentext(rev):
256 280 '''Given a revision number, generate dummy text'''
257 281 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
258 282
259 283
260 284 def writecases(rlog, tr):
261 285 """Write some revisions interested to the test.
262 286
263 287 The test is interested in 3 properties of a revision:
264 288
265 289 - Is it a delta or a full text? (isdelta)
266 290 This is to catch some delta application issues.
267 291 - Does it have a flag of EXTSTORED? (isext)
268 292 This is to catch some flag processor issues. Especially when
269 293 interacted with revlog deltas.
270 294 - Is its text empty? (isempty)
271 295 This is less important. It is intended to try to catch some careless
272 296 checks like "if text" instead of "if text is None". Note: if flag
273 297 processor is involved, raw text may be not empty.
274 298
275 299 Write 65 revisions. So that all combinations of the above flags for
276 300 adjacent revisions are covered. That is to say,
277 301
278 302 len(set(
279 303 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
280 304 for r in range(len(rlog) - 1)
281 305 )) is 64.
282 306
283 307 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
284 308 mentioned above.
285 309
286 310 Return expected [(text, rawtext)].
287 311 """
288 312 result = []
289 313 for i, x in enumerate(genbits(3)):
290 314 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
291 315 if isempty:
292 316 text = b''
293 317 else:
294 318 text = gentext(i)
295 319 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
296 320
297 321 # Verify text, rawtext, and rawsize
298 322 if isext:
299 323 rawtext = writeprocessor(None, text)[0]
300 324 else:
301 325 rawtext = text
302 326 if rlog.rawsize(rev) != len(rawtext):
303 327 abort('rev %d: wrong rawsize' % rev)
304 328 if rlog.revision(rev, raw=False) != text:
305 329 abort('rev %d: wrong text' % rev)
306 330 if rlog.rawdata(rev) != rawtext:
307 331 abort('rev %d: wrong rawtext' % rev)
308 332 result.append((text, rawtext))
309 333
310 334 # Verify flags like isdelta, isext work as expected
311 335 # isdelta can be overridden to False if this or p1 has isext set
312 336 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
313 337 abort('rev %d: isdelta is unexpected' % rev)
314 338 if bool(rlog.flags(rev)) != isext:
315 339 abort('rev %d: isext is ineffective' % rev)
316 340 return result
317 341
318 342
319 343 # Main test and checking
320 344
321 345
322 346 def checkrevlog(rlog, expected):
323 347 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
324 348 # Test using different access orders. This could expose some issues
325 349 # depending on revlog caching (see revlog._cache).
326 350 for r0 in range(len(rlog) - 1):
327 351 r1 = r0 + 1
328 352 for revorder in [[r0, r1], [r1, r0]]:
329 353 for raworder in [[True], [False], [True, False], [False, True]]:
330 354 nlog = newrevlog()
331 355 for rev in revorder:
332 356 for raw in raworder:
333 357 if raw:
334 358 t = nlog.rawdata(rev)
335 359 else:
336 360 t = nlog.revision(rev)
337 361 if t != expected[rev][int(raw)]:
338 362 abort(
339 363 'rev %d: corrupted %stext'
340 364 % (rev, raw and 'raw' or '')
341 365 )
342 366
343 367
344 368 slicingdata = [
345 369 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
346 370 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
347 371 (
348 372 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
349 373 [[-1, 0, 1], [2], [58], [59, 60]],
350 374 10,
351 375 ),
352 376 ]
353 377
354 378
355 379 def slicingtest(rlog):
356 380 oldmin = rlog._srmingapsize
357 381 try:
358 382 # the test revlog is small, we remove the floor under which we
359 383 # slicing is diregarded.
360 384 rlog._srmingapsize = 0
361 385 for item in slicingdata:
362 386 chain, expected, target = item
363 387 result = deltas.slicechunk(rlog, chain, targetsize=target)
364 388 result = list(result)
365 389 if result != expected:
366 390 print('slicing differ:')
367 391 print(' chain: %s' % chain)
368 392 print(' target: %s' % target)
369 393 print(' expected: %s' % expected)
370 394 print(' result: %s' % result)
371 395 finally:
372 396 rlog._srmingapsize = oldmin
373 397
374 398
375 399 def md5sum(s):
376 400 return hashlib.md5(s).digest()
377 401
378 402
379 403 def _maketext(*coord):
380 404 """create piece of text according to range of integers
381 405
382 406 The test returned use a md5sum of the integer to make it less
383 407 compressible"""
384 408 pieces = []
385 409 for start, size in coord:
386 410 num = range(start, start + size)
387 411 p = [md5sum(b'%d' % r) for r in num]
388 412 pieces.append(b'\n'.join(p))
389 413 return b'\n'.join(pieces) + b'\n'
390 414
391 415
392 416 data = [
393 417 _maketext((0, 120), (456, 60)),
394 418 _maketext((0, 120), (345, 60)),
395 419 _maketext((0, 120), (734, 60)),
396 420 _maketext((0, 120), (734, 60), (923, 45)),
397 421 _maketext((0, 120), (734, 60), (234, 45)),
398 422 _maketext((0, 120), (734, 60), (564, 45)),
399 423 _maketext((0, 120), (734, 60), (361, 45)),
400 424 _maketext((0, 120), (734, 60), (489, 45)),
401 425 _maketext((0, 120), (123, 60)),
402 426 _maketext((0, 120), (145, 60)),
403 427 _maketext((0, 120), (104, 60)),
404 428 _maketext((0, 120), (430, 60)),
405 429 _maketext((0, 120), (430, 60), (923, 45)),
406 430 _maketext((0, 120), (430, 60), (234, 45)),
407 431 _maketext((0, 120), (430, 60), (564, 45)),
408 432 _maketext((0, 120), (430, 60), (361, 45)),
409 433 _maketext((0, 120), (430, 60), (489, 45)),
410 434 _maketext((0, 120), (249, 60)),
411 435 _maketext((0, 120), (832, 60)),
412 436 _maketext((0, 120), (891, 60)),
413 437 _maketext((0, 120), (543, 60)),
414 438 _maketext((0, 120), (120, 60)),
415 439 _maketext((0, 120), (60, 60), (768, 30)),
416 440 _maketext((0, 120), (60, 60), (260, 30)),
417 441 _maketext((0, 120), (60, 60), (450, 30)),
418 442 _maketext((0, 120), (60, 60), (361, 30)),
419 443 _maketext((0, 120), (60, 60), (886, 30)),
420 444 _maketext((0, 120), (60, 60), (116, 30)),
421 445 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
422 446 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
423 447 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
424 448 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
425 449 ]
426 450
427 451
428 452 def makesnapshot(tr):
429 453 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
430 454 for i in data:
431 455 appendrev(rl, i, tr)
432 456 return rl
433 457
434 458
435 459 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
436 460
437 461
438 462 def issnapshottest(rlog):
439 463 result = []
440 464 if rlog.issnapshot(-1):
441 465 result.append(-1)
442 466 for rev in rlog:
443 467 if rlog.issnapshot(rev):
444 468 result.append(rev)
445 469 if snapshots != result:
446 470 print('snapshot differ:')
447 471 print(' expected: %s' % snapshots)
448 472 print(' got: %s' % result)
449 473
450 474
451 475 snapshotmapall = {0: [6, 8, 11, 17, 19, 25], 8: [21], -1: [0, 30]}
452 476 snapshotmap15 = {0: [17, 19, 25], 8: [21], -1: [30]}
453 477
454 478
455 479 def findsnapshottest(rlog):
456 480 resultall = collections.defaultdict(list)
457 481 deltas._findsnapshots(rlog, resultall, 0)
458 482 resultall = dict(resultall.items())
459 483 if resultall != snapshotmapall:
460 484 print('snapshot map differ:')
461 485 print(' expected: %s' % snapshotmapall)
462 486 print(' got: %s' % resultall)
463 487 result15 = collections.defaultdict(list)
464 488 deltas._findsnapshots(rlog, result15, 15)
465 489 result15 = dict(result15.items())
466 490 if result15 != snapshotmap15:
467 491 print('snapshot map differ:')
468 492 print(' expected: %s' % snapshotmap15)
469 493 print(' got: %s' % result15)
470 494
471 495
472 496 def maintest():
473 497 with newtransaction() as tr:
474 498 rl = newrevlog(recreate=True)
475 499 expected = writecases(rl, tr)
476 500 checkrevlog(rl, expected)
477 501 print('local test passed')
478 502 # Copy via revlog.addgroup
479 503 rl1 = addgroupcopy(rl, tr)
480 504 checkrevlog(rl1, expected)
481 505 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
482 506 checkrevlog(rl2, expected)
483 507 print('addgroupcopy test passed')
484 508 # Copy via revlog.clone
485 509 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
486 510 rl.clone(tr, rl3)
487 511 checkrevlog(rl3, expected)
488 512 print('clone test passed')
489 513 # Copy via low-level revlog._addrevision
490 514 rl4 = lowlevelcopy(rl, tr)
491 515 checkrevlog(rl4, expected)
492 516 print('lowlevelcopy test passed')
493 517 slicingtest(rl)
494 518 print('slicing test passed')
495 519 rl5 = makesnapshot(tr)
496 520 issnapshottest(rl5)
497 521 print('issnapshot test passed')
498 522 findsnapshottest(rl5)
499 523 print('findsnapshot test passed')
500 524
501 525
502 526 try:
503 527 maintest()
504 528 except Exception as ex:
505 529 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now