##// END OF EJS Templates
revlog: accept a revs argument in `headrevs`...
Boris Feld -
r41311:1421d048 default
parent child Browse files
Show More
@@ -1,555 +1,555 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import (
17 17 attr,
18 18 )
19 19
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 pycompat,
24 24 revlog,
25 25 )
26 26 from .utils import (
27 27 dateutil,
28 28 stringutil,
29 29 )
30 30
31 31 _defaultextra = {'branch': 'default'}
32 32
33 33 def _string_escape(text):
34 34 """
35 35 >>> from .pycompat import bytechr as chr
36 36 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 37 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
38 38 >>> s
39 39 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
40 40 >>> res = _string_escape(s)
41 41 >>> s == stringutil.unescapestr(res)
42 42 True
43 43 """
44 44 # subset of the string_escape codec
45 45 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
46 46 return text.replace('\0', '\\0')
47 47
48 48 def decodeextra(text):
49 49 """
50 50 >>> from .pycompat import bytechr as chr
51 51 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
52 52 ... ).items())
53 53 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
54 54 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
55 55 ... b'baz': chr(92) + chr(0) + b'2'})
56 56 ... ).items())
57 57 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
58 58 """
59 59 extra = _defaultextra.copy()
60 60 for l in text.split('\0'):
61 61 if l:
62 62 if '\\0' in l:
63 63 # fix up \0 without getting into trouble with \\0
64 64 l = l.replace('\\\\', '\\\\\n')
65 65 l = l.replace('\\0', '\0')
66 66 l = l.replace('\n', '')
67 67 k, v = stringutil.unescapestr(l).split(':', 1)
68 68 extra[k] = v
69 69 return extra
70 70
71 71 def encodeextra(d):
72 72 # keys must be sorted to produce a deterministic changelog entry
73 73 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
74 74 return "\0".join(items)
75 75
76 76 def stripdesc(desc):
77 77 """strip trailing whitespace and leading and trailing empty lines"""
78 78 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
79 79
80 80 class appender(object):
81 81 '''the changelog index must be updated last on disk, so we use this class
82 82 to delay writes to it'''
83 83 def __init__(self, vfs, name, mode, buf):
84 84 self.data = buf
85 85 fp = vfs(name, mode)
86 86 self.fp = fp
87 87 self.offset = fp.tell()
88 88 self.size = vfs.fstat(fp).st_size
89 89 self._end = self.size
90 90
91 91 def end(self):
92 92 return self._end
93 93 def tell(self):
94 94 return self.offset
95 95 def flush(self):
96 96 pass
97 97
98 98 @property
99 99 def closed(self):
100 100 return self.fp.closed
101 101
102 102 def close(self):
103 103 self.fp.close()
104 104
105 105 def seek(self, offset, whence=0):
106 106 '''virtual file offset spans real file and data'''
107 107 if whence == 0:
108 108 self.offset = offset
109 109 elif whence == 1:
110 110 self.offset += offset
111 111 elif whence == 2:
112 112 self.offset = self.end() + offset
113 113 if self.offset < self.size:
114 114 self.fp.seek(self.offset)
115 115
116 116 def read(self, count=-1):
117 117 '''only trick here is reads that span real file and data'''
118 118 ret = ""
119 119 if self.offset < self.size:
120 120 s = self.fp.read(count)
121 121 ret = s
122 122 self.offset += len(s)
123 123 if count > 0:
124 124 count -= len(s)
125 125 if count != 0:
126 126 doff = self.offset - self.size
127 127 self.data.insert(0, "".join(self.data))
128 128 del self.data[1:]
129 129 s = self.data[0][doff:doff + count]
130 130 self.offset += len(s)
131 131 ret += s
132 132 return ret
133 133
134 134 def write(self, s):
135 135 self.data.append(bytes(s))
136 136 self.offset += len(s)
137 137 self._end += len(s)
138 138
139 139 def __enter__(self):
140 140 self.fp.__enter__()
141 141 return self
142 142
143 143 def __exit__(self, *args):
144 144 return self.fp.__exit__(*args)
145 145
146 146 def _divertopener(opener, target):
147 147 """build an opener that writes in 'target.a' instead of 'target'"""
148 148 def _divert(name, mode='r', checkambig=False):
149 149 if name != target:
150 150 return opener(name, mode)
151 151 return opener(name + ".a", mode)
152 152 return _divert
153 153
154 154 def _delayopener(opener, target, buf):
155 155 """build an opener that stores chunks in 'buf' instead of 'target'"""
156 156 def _delay(name, mode='r', checkambig=False):
157 157 if name != target:
158 158 return opener(name, mode)
159 159 return appender(opener, name, mode, buf)
160 160 return _delay
161 161
162 162 @attr.s
163 163 class _changelogrevision(object):
164 164 # Extensions might modify _defaultextra, so let the constructor below pass
165 165 # it in
166 166 extra = attr.ib()
167 167 manifest = attr.ib(default=nullid)
168 168 user = attr.ib(default='')
169 169 date = attr.ib(default=(0, 0))
170 170 files = attr.ib(default=attr.Factory(list))
171 171 description = attr.ib(default='')
172 172
173 173 class changelogrevision(object):
174 174 """Holds results of a parsed changelog revision.
175 175
176 176 Changelog revisions consist of multiple pieces of data, including
177 177 the manifest node, user, and date. This object exposes a view into
178 178 the parsed object.
179 179 """
180 180
181 181 __slots__ = (
182 182 u'_offsets',
183 183 u'_text',
184 184 )
185 185
186 186 def __new__(cls, text):
187 187 if not text:
188 188 return _changelogrevision(extra=_defaultextra)
189 189
190 190 self = super(changelogrevision, cls).__new__(cls)
191 191 # We could return here and implement the following as an __init__.
192 192 # But doing it here is equivalent and saves an extra function call.
193 193
194 194 # format used:
195 195 # nodeid\n : manifest node in ascii
196 196 # user\n : user, no \n or \r allowed
197 197 # time tz extra\n : date (time is int or float, timezone is int)
198 198 # : extra is metadata, encoded and separated by '\0'
199 199 # : older versions ignore it
200 200 # files\n\n : files modified by the cset, no \n or \r allowed
201 201 # (.*) : comment (free text, ideally utf-8)
202 202 #
203 203 # changelog v0 doesn't use extra
204 204
205 205 nl1 = text.index('\n')
206 206 nl2 = text.index('\n', nl1 + 1)
207 207 nl3 = text.index('\n', nl2 + 1)
208 208
209 209 # The list of files may be empty. Which means nl3 is the first of the
210 210 # double newline that precedes the description.
211 211 if text[nl3 + 1:nl3 + 2] == '\n':
212 212 doublenl = nl3
213 213 else:
214 214 doublenl = text.index('\n\n', nl3 + 1)
215 215
216 216 self._offsets = (nl1, nl2, nl3, doublenl)
217 217 self._text = text
218 218
219 219 return self
220 220
221 221 @property
222 222 def manifest(self):
223 223 return bin(self._text[0:self._offsets[0]])
224 224
225 225 @property
226 226 def user(self):
227 227 off = self._offsets
228 228 return encoding.tolocal(self._text[off[0] + 1:off[1]])
229 229
230 230 @property
231 231 def _rawdate(self):
232 232 off = self._offsets
233 233 dateextra = self._text[off[1] + 1:off[2]]
234 234 return dateextra.split(' ', 2)[0:2]
235 235
236 236 @property
237 237 def _rawextra(self):
238 238 off = self._offsets
239 239 dateextra = self._text[off[1] + 1:off[2]]
240 240 fields = dateextra.split(' ', 2)
241 241 if len(fields) != 3:
242 242 return None
243 243
244 244 return fields[2]
245 245
246 246 @property
247 247 def date(self):
248 248 raw = self._rawdate
249 249 time = float(raw[0])
250 250 # Various tools did silly things with the timezone.
251 251 try:
252 252 timezone = int(raw[1])
253 253 except ValueError:
254 254 timezone = 0
255 255
256 256 return time, timezone
257 257
258 258 @property
259 259 def extra(self):
260 260 raw = self._rawextra
261 261 if raw is None:
262 262 return _defaultextra
263 263
264 264 return decodeextra(raw)
265 265
266 266 @property
267 267 def files(self):
268 268 off = self._offsets
269 269 if off[2] == off[3]:
270 270 return []
271 271
272 272 return self._text[off[2] + 1:off[3]].split('\n')
273 273
274 274 @property
275 275 def description(self):
276 276 return encoding.tolocal(self._text[self._offsets[3] + 2:])
277 277
278 278 class changelog(revlog.revlog):
279 279 def __init__(self, opener, trypending=False):
280 280 """Load a changelog revlog using an opener.
281 281
282 282 If ``trypending`` is true, we attempt to load the index from a
283 283 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
284 284 The ``00changelog.i.a`` file contains index (and possibly inline
285 285 revision) data for a transaction that hasn't been finalized yet.
286 286 It exists in a separate file to facilitate readers (such as
287 287 hooks processes) accessing data before a transaction is finalized.
288 288 """
289 289 if trypending and opener.exists('00changelog.i.a'):
290 290 indexfile = '00changelog.i.a'
291 291 else:
292 292 indexfile = '00changelog.i'
293 293
294 294 datafile = '00changelog.d'
295 295 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
296 296 checkambig=True, mmaplargeindex=True)
297 297
298 298 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
299 299 # changelogs don't benefit from generaldelta.
300 300
301 301 self.version &= ~revlog.FLAG_GENERALDELTA
302 302 self._generaldelta = False
303 303
304 304 # Delta chains for changelogs tend to be very small because entries
305 305 # tend to be small and don't delta well with each. So disable delta
306 306 # chains.
307 307 self._storedeltachains = False
308 308
309 309 self._realopener = opener
310 310 self._delayed = False
311 311 self._delaybuf = None
312 312 self._divert = False
313 313 self.filteredrevs = frozenset()
314 314
315 315 def tiprev(self):
316 316 for i in pycompat.xrange(len(self) -1, -2, -1):
317 317 if i not in self.filteredrevs:
318 318 return i
319 319
320 320 def tip(self):
321 321 """filtered version of revlog.tip"""
322 322 return self.node(self.tiprev())
323 323
324 324 def __contains__(self, rev):
325 325 """filtered version of revlog.__contains__"""
326 326 return (0 <= rev < len(self)
327 327 and rev not in self.filteredrevs)
328 328
329 329 def __iter__(self):
330 330 """filtered version of revlog.__iter__"""
331 331 if len(self.filteredrevs) == 0:
332 332 return revlog.revlog.__iter__(self)
333 333
334 334 def filterediter():
335 335 for i in pycompat.xrange(len(self)):
336 336 if i not in self.filteredrevs:
337 337 yield i
338 338
339 339 return filterediter()
340 340
341 341 def revs(self, start=0, stop=None):
342 342 """filtered version of revlog.revs"""
343 343 for i in super(changelog, self).revs(start, stop):
344 344 if i not in self.filteredrevs:
345 345 yield i
346 346
347 347 def reachableroots(self, minroot, heads, roots, includepath=False):
348 348 return self.index.reachableroots2(minroot, heads, roots, includepath)
349 349
350 def headrevs(self):
351 if self.filteredrevs:
350 def headrevs(self, revs=None):
351 if revs is None and self.filteredrevs:
352 352 try:
353 353 return self.index.headrevsfiltered(self.filteredrevs)
354 354 # AttributeError covers non-c-extension environments and
355 355 # old c extensions without filter handling.
356 356 except AttributeError:
357 357 return self._headrevs()
358 358
359 return super(changelog, self).headrevs()
359 return super(changelog, self).headrevs(revs)
360 360
361 361 def strip(self, *args, **kwargs):
362 362 # XXX make something better than assert
363 363 # We can't expect proper strip behavior if we are filtered.
364 364 assert not self.filteredrevs
365 365 super(changelog, self).strip(*args, **kwargs)
366 366
367 367 def rev(self, node):
368 368 """filtered version of revlog.rev"""
369 369 r = super(changelog, self).rev(node)
370 370 if r in self.filteredrevs:
371 371 raise error.FilteredLookupError(hex(node), self.indexfile,
372 372 _('filtered node'))
373 373 return r
374 374
375 375 def node(self, rev):
376 376 """filtered version of revlog.node"""
377 377 if rev in self.filteredrevs:
378 378 raise error.FilteredIndexError(rev)
379 379 return super(changelog, self).node(rev)
380 380
381 381 def linkrev(self, rev):
382 382 """filtered version of revlog.linkrev"""
383 383 if rev in self.filteredrevs:
384 384 raise error.FilteredIndexError(rev)
385 385 return super(changelog, self).linkrev(rev)
386 386
387 387 def parentrevs(self, rev):
388 388 """filtered version of revlog.parentrevs"""
389 389 if rev in self.filteredrevs:
390 390 raise error.FilteredIndexError(rev)
391 391 return super(changelog, self).parentrevs(rev)
392 392
393 393 def flags(self, rev):
394 394 """filtered version of revlog.flags"""
395 395 if rev in self.filteredrevs:
396 396 raise error.FilteredIndexError(rev)
397 397 return super(changelog, self).flags(rev)
398 398
399 399 def delayupdate(self, tr):
400 400 "delay visibility of index updates to other readers"
401 401
402 402 if not self._delayed:
403 403 if len(self) == 0:
404 404 self._divert = True
405 405 if self._realopener.exists(self.indexfile + '.a'):
406 406 self._realopener.unlink(self.indexfile + '.a')
407 407 self.opener = _divertopener(self._realopener, self.indexfile)
408 408 else:
409 409 self._delaybuf = []
410 410 self.opener = _delayopener(self._realopener, self.indexfile,
411 411 self._delaybuf)
412 412 self._delayed = True
413 413 tr.addpending('cl-%i' % id(self), self._writepending)
414 414 tr.addfinalize('cl-%i' % id(self), self._finalize)
415 415
416 416 def _finalize(self, tr):
417 417 "finalize index updates"
418 418 self._delayed = False
419 419 self.opener = self._realopener
420 420 # move redirected index data back into place
421 421 if self._divert:
422 422 assert not self._delaybuf
423 423 tmpname = self.indexfile + ".a"
424 424 nfile = self.opener.open(tmpname)
425 425 nfile.close()
426 426 self.opener.rename(tmpname, self.indexfile, checkambig=True)
427 427 elif self._delaybuf:
428 428 fp = self.opener(self.indexfile, 'a', checkambig=True)
429 429 fp.write("".join(self._delaybuf))
430 430 fp.close()
431 431 self._delaybuf = None
432 432 self._divert = False
433 433 # split when we're done
434 434 self._enforceinlinesize(tr)
435 435
436 436 def _writepending(self, tr):
437 437 "create a file containing the unfinalized state for pretxnchangegroup"
438 438 if self._delaybuf:
439 439 # make a temporary copy of the index
440 440 fp1 = self._realopener(self.indexfile)
441 441 pendingfilename = self.indexfile + ".a"
442 442 # register as a temp file to ensure cleanup on failure
443 443 tr.registertmp(pendingfilename)
444 444 # write existing data
445 445 fp2 = self._realopener(pendingfilename, "w")
446 446 fp2.write(fp1.read())
447 447 # add pending data
448 448 fp2.write("".join(self._delaybuf))
449 449 fp2.close()
450 450 # switch modes so finalize can simply rename
451 451 self._delaybuf = None
452 452 self._divert = True
453 453 self.opener = _divertopener(self._realopener, self.indexfile)
454 454
455 455 if self._divert:
456 456 return True
457 457
458 458 return False
459 459
460 460 def _enforceinlinesize(self, tr, fp=None):
461 461 if not self._delayed:
462 462 revlog.revlog._enforceinlinesize(self, tr, fp)
463 463
464 464 def read(self, node):
465 465 """Obtain data from a parsed changelog revision.
466 466
467 467 Returns a 6-tuple of:
468 468
469 469 - manifest node in binary
470 470 - author/user as a localstr
471 471 - date as a 2-tuple of (time, timezone)
472 472 - list of files
473 473 - commit message as a localstr
474 474 - dict of extra metadata
475 475
476 476 Unless you need to access all fields, consider calling
477 477 ``changelogrevision`` instead, as it is faster for partial object
478 478 access.
479 479 """
480 480 c = changelogrevision(self.revision(node))
481 481 return (
482 482 c.manifest,
483 483 c.user,
484 484 c.date,
485 485 c.files,
486 486 c.description,
487 487 c.extra
488 488 )
489 489
490 490 def changelogrevision(self, nodeorrev):
491 491 """Obtain a ``changelogrevision`` for a node or revision."""
492 492 return changelogrevision(self.revision(nodeorrev))
493 493
494 494 def readfiles(self, node):
495 495 """
496 496 short version of read that only returns the files modified by the cset
497 497 """
498 498 text = self.revision(node)
499 499 if not text:
500 500 return []
501 501 last = text.index("\n\n")
502 502 l = text[:last].split('\n')
503 503 return l[3:]
504 504
505 505 def add(self, manifest, files, desc, transaction, p1, p2,
506 506 user, date=None, extra=None):
507 507 # Convert to UTF-8 encoded bytestrings as the very first
508 508 # thing: calling any method on a localstr object will turn it
509 509 # into a str object and the cached UTF-8 string is thus lost.
510 510 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
511 511
512 512 user = user.strip()
513 513 # An empty username or a username with a "\n" will make the
514 514 # revision text contain two "\n\n" sequences -> corrupt
515 515 # repository since read cannot unpack the revision.
516 516 if not user:
517 517 raise error.StorageError(_("empty username"))
518 518 if "\n" in user:
519 519 raise error.StorageError(_("username %r contains a newline")
520 520 % pycompat.bytestr(user))
521 521
522 522 desc = stripdesc(desc)
523 523
524 524 if date:
525 525 parseddate = "%d %d" % dateutil.parsedate(date)
526 526 else:
527 527 parseddate = "%d %d" % dateutil.makedate()
528 528 if extra:
529 529 branch = extra.get("branch")
530 530 if branch in ("default", ""):
531 531 del extra["branch"]
532 532 elif branch in (".", "null", "tip"):
533 533 raise error.StorageError(_('the name \'%s\' is reserved')
534 534 % branch)
535 535 if extra:
536 536 extra = encodeextra(extra)
537 537 parseddate = "%s %s" % (parseddate, extra)
538 538 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
539 539 text = "\n".join(l)
540 540 return self.addrevision(text, transaction, len(self), p1, p2)
541 541
542 542 def branchinfo(self, rev):
543 543 """return the branch name and open/close state of a revision
544 544
545 545 This function exists because creating a changectx object
546 546 just to access this is costly."""
547 547 extra = self.read(rev)[5]
548 548 return encoding.tolocal(extra.get("branch")), 'close' in extra
549 549
550 550 def _nodeduplicatecallback(self, transaction, node):
551 551 # keep track of revisions that got "re-added", eg: unbunde of know rev.
552 552 #
553 553 # We track them in a list to preserve their order from the source bundle
554 554 duplicates = transaction.changes.setdefault('revduplicates', [])
555 555 duplicates.append(self.rev(node))
@@ -1,2637 +1,2639 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 398 newversionflags |= FLAG_GENERALDELTA
399 399 else:
400 400 newversionflags = REVLOG_DEFAULT_VERSION
401 401
402 402 if 'chunkcachesize' in opts:
403 403 self._chunkcachesize = opts['chunkcachesize']
404 404 if 'maxchainlen' in opts:
405 405 self._maxchainlen = opts['maxchainlen']
406 406 if 'deltabothparents' in opts:
407 407 self._deltabothparents = opts['deltabothparents']
408 408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 409 if 'compengine' in opts:
410 410 self._compengine = opts['compengine']
411 411 if 'maxdeltachainspan' in opts:
412 412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 414 mmapindexthreshold = opts['mmapindexthreshold']
415 415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 416 withsparseread = bool(opts.get('with-sparse-read', False))
417 417 # sparse-revlog forces sparse-read
418 418 self._withsparseread = self._sparserevlog or withsparseread
419 419 if 'sparse-read-density-threshold' in opts:
420 420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 421 if 'sparse-read-min-gap-size' in opts:
422 422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 423 if opts.get('enableellipsis'):
424 424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425 425
426 426 # revlog v0 doesn't have flag processors
427 427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 428 _insertflagprocessor(flag, processor, self._flagprocessors)
429 429
430 430 if self._chunkcachesize <= 0:
431 431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 432 'greater than 0') % self._chunkcachesize)
433 433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 435 'power of 2') % self._chunkcachesize)
436 436
437 437 indexdata = ''
438 438 self._initempty = True
439 439 try:
440 440 with self._indexfp() as f:
441 441 if (mmapindexthreshold is not None and
442 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 443 indexdata = util.buffer(util.mmapread(f))
444 444 else:
445 445 indexdata = f.read()
446 446 if len(indexdata) > 0:
447 447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 448 self._initempty = False
449 449 else:
450 450 versionflags = newversionflags
451 451 except IOError as inst:
452 452 if inst.errno != errno.ENOENT:
453 453 raise
454 454
455 455 versionflags = newversionflags
456 456
457 457 self.version = versionflags
458 458
459 459 flags = versionflags & ~0xFFFF
460 460 fmt = versionflags & 0xFFFF
461 461
462 462 if fmt == REVLOGV0:
463 463 if flags:
464 464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 465 'revlog %s') %
466 466 (flags >> 16, fmt, self.indexfile))
467 467
468 468 self._inline = False
469 469 self._generaldelta = False
470 470
471 471 elif fmt == REVLOGV1:
472 472 if flags & ~REVLOGV1_FLAGS:
473 473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 474 'revlog %s') %
475 475 (flags >> 16, fmt, self.indexfile))
476 476
477 477 self._inline = versionflags & FLAG_INLINE_DATA
478 478 self._generaldelta = versionflags & FLAG_GENERALDELTA
479 479
480 480 elif fmt == REVLOGV2:
481 481 if flags & ~REVLOGV2_FLAGS:
482 482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 483 'revlog %s') %
484 484 (flags >> 16, fmt, self.indexfile))
485 485
486 486 self._inline = versionflags & FLAG_INLINE_DATA
487 487 # generaldelta implied by version 2 revlogs.
488 488 self._generaldelta = True
489 489
490 490 else:
491 491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
492 492 (fmt, self.indexfile))
493 493
494 494 self._storedeltachains = True
495 495
496 496 self._io = revlogio()
497 497 if self.version == REVLOGV0:
498 498 self._io = revlogoldio()
499 499 try:
500 500 d = self._io.parseindex(indexdata, self._inline)
501 501 except (ValueError, IndexError):
502 502 raise error.RevlogError(_("index %s is corrupted") %
503 503 self.indexfile)
504 504 self.index, nodemap, self._chunkcache = d
505 505 if nodemap is not None:
506 506 self.nodemap = self._nodecache = nodemap
507 507 if not self._chunkcache:
508 508 self._chunkclear()
509 509 # revnum -> (chain-length, sum-delta-length)
510 510 self._chaininfocache = {}
511 511 # revlog header -> revlog compressor
512 512 self._decompressors = {}
513 513
514 514 @util.propertycache
515 515 def _compressor(self):
516 516 return util.compengines[self._compengine].revlogcompressor()
517 517
518 518 def _indexfp(self, mode='r'):
519 519 """file object for the revlog's index file"""
520 520 args = {r'mode': mode}
521 521 if mode != 'r':
522 522 args[r'checkambig'] = self._checkambig
523 523 if mode == 'w':
524 524 args[r'atomictemp'] = True
525 525 return self.opener(self.indexfile, **args)
526 526
527 527 def _datafp(self, mode='r'):
528 528 """file object for the revlog's data file"""
529 529 return self.opener(self.datafile, mode=mode)
530 530
531 531 @contextlib.contextmanager
532 532 def _datareadfp(self, existingfp=None):
533 533 """file object suitable to read data"""
534 534 # Use explicit file handle, if given.
535 535 if existingfp is not None:
536 536 yield existingfp
537 537
538 538 # Use a file handle being actively used for writes, if available.
539 539 # There is some danger to doing this because reads will seek the
540 540 # file. However, _writeentry() performs a SEEK_END before all writes,
541 541 # so we should be safe.
542 542 elif self._writinghandles:
543 543 if self._inline:
544 544 yield self._writinghandles[0]
545 545 else:
546 546 yield self._writinghandles[1]
547 547
548 548 # Otherwise open a new file handle.
549 549 else:
550 550 if self._inline:
551 551 func = self._indexfp
552 552 else:
553 553 func = self._datafp
554 554 with func() as fp:
555 555 yield fp
556 556
557 557 def tip(self):
558 558 return self.node(len(self.index) - 1)
559 559 def __contains__(self, rev):
560 560 return 0 <= rev < len(self)
561 561 def __len__(self):
562 562 return len(self.index)
563 563 def __iter__(self):
564 564 return iter(pycompat.xrange(len(self)))
565 565 def revs(self, start=0, stop=None):
566 566 """iterate over all rev in this revlog (from start to stop)"""
567 567 return storageutil.iterrevs(len(self), start=start, stop=stop)
568 568
569 569 @util.propertycache
570 570 def nodemap(self):
571 571 if self.index:
572 572 # populate mapping down to the initial node
573 573 node0 = self.index[0][7] # get around changelog filtering
574 574 self.rev(node0)
575 575 return self._nodecache
576 576
577 577 def hasnode(self, node):
578 578 try:
579 579 self.rev(node)
580 580 return True
581 581 except KeyError:
582 582 return False
583 583
584 584 def candelta(self, baserev, rev):
585 585 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 586 # Disable delta if either rev requires a content-changing flag
587 587 # processor (ex. LFS). This is because such flag processor can alter
588 588 # the rawtext content that the delta will be based on, and two clients
589 589 # could have a same revlog node with different flags (i.e. different
590 590 # rawtext contents) and the delta could be incompatible.
591 591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 593 return False
594 594 return True
595 595
596 596 def clearcaches(self):
597 597 self._revisioncache = None
598 598 self._chainbasecache.clear()
599 599 self._chunkcache = (0, '')
600 600 self._pcache = {}
601 601
602 602 try:
603 603 self._nodecache.clearcaches()
604 604 except AttributeError:
605 605 self._nodecache = {nullid: nullrev}
606 606 self._nodepos = None
607 607
608 608 def rev(self, node):
609 609 try:
610 610 return self._nodecache[node]
611 611 except TypeError:
612 612 raise
613 613 except error.RevlogError:
614 614 # parsers.c radix tree lookup failed
615 615 if node == wdirid or node in wdirfilenodeids:
616 616 raise error.WdirUnsupported
617 617 raise error.LookupError(node, self.indexfile, _('no node'))
618 618 except KeyError:
619 619 # pure python cache lookup failed
620 620 n = self._nodecache
621 621 i = self.index
622 622 p = self._nodepos
623 623 if p is None:
624 624 p = len(i) - 1
625 625 else:
626 626 assert p < len(i)
627 627 for r in pycompat.xrange(p, -1, -1):
628 628 v = i[r][7]
629 629 n[v] = r
630 630 if v == node:
631 631 self._nodepos = r - 1
632 632 return r
633 633 if node == wdirid or node in wdirfilenodeids:
634 634 raise error.WdirUnsupported
635 635 raise error.LookupError(node, self.indexfile, _('no node'))
636 636
637 637 # Accessors for index entries.
638 638
639 639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 640 # are flags.
641 641 def start(self, rev):
642 642 return int(self.index[rev][0] >> 16)
643 643
644 644 def flags(self, rev):
645 645 return self.index[rev][0] & 0xFFFF
646 646
647 647 def length(self, rev):
648 648 return self.index[rev][1]
649 649
650 650 def rawsize(self, rev):
651 651 """return the length of the uncompressed text for a given revision"""
652 652 l = self.index[rev][2]
653 653 if l >= 0:
654 654 return l
655 655
656 656 t = self.revision(rev, raw=True)
657 657 return len(t)
658 658
659 659 def size(self, rev):
660 660 """length of non-raw text (processed by a "read" flag processor)"""
661 661 # fast path: if no "read" flag processor could change the content,
662 662 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 663 flags = self.flags(rev)
664 664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 665 return self.rawsize(rev)
666 666
667 667 return len(self.revision(rev, raw=False))
668 668
669 669 def chainbase(self, rev):
670 670 base = self._chainbasecache.get(rev)
671 671 if base is not None:
672 672 return base
673 673
674 674 index = self.index
675 675 iterrev = rev
676 676 base = index[iterrev][3]
677 677 while base != iterrev:
678 678 iterrev = base
679 679 base = index[iterrev][3]
680 680
681 681 self._chainbasecache[rev] = base
682 682 return base
683 683
684 684 def linkrev(self, rev):
685 685 return self.index[rev][4]
686 686
687 687 def parentrevs(self, rev):
688 688 try:
689 689 entry = self.index[rev]
690 690 except IndexError:
691 691 if rev == wdirrev:
692 692 raise error.WdirUnsupported
693 693 raise
694 694
695 695 return entry[5], entry[6]
696 696
697 697 # fast parentrevs(rev) where rev isn't filtered
698 698 _uncheckedparentrevs = parentrevs
699 699
700 700 def node(self, rev):
701 701 try:
702 702 return self.index[rev][7]
703 703 except IndexError:
704 704 if rev == wdirrev:
705 705 raise error.WdirUnsupported
706 706 raise
707 707
708 708 # Derived from index values.
709 709
710 710 def end(self, rev):
711 711 return self.start(rev) + self.length(rev)
712 712
713 713 def parents(self, node):
714 714 i = self.index
715 715 d = i[self.rev(node)]
716 716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717 717
718 718 def chainlen(self, rev):
719 719 return self._chaininfo(rev)[0]
720 720
721 721 def _chaininfo(self, rev):
722 722 chaininfocache = self._chaininfocache
723 723 if rev in chaininfocache:
724 724 return chaininfocache[rev]
725 725 index = self.index
726 726 generaldelta = self._generaldelta
727 727 iterrev = rev
728 728 e = index[iterrev]
729 729 clen = 0
730 730 compresseddeltalen = 0
731 731 while iterrev != e[3]:
732 732 clen += 1
733 733 compresseddeltalen += e[1]
734 734 if generaldelta:
735 735 iterrev = e[3]
736 736 else:
737 737 iterrev -= 1
738 738 if iterrev in chaininfocache:
739 739 t = chaininfocache[iterrev]
740 740 clen += t[0]
741 741 compresseddeltalen += t[1]
742 742 break
743 743 e = index[iterrev]
744 744 else:
745 745 # Add text length of base since decompressing that also takes
746 746 # work. For cache hits the length is already included.
747 747 compresseddeltalen += e[1]
748 748 r = (clen, compresseddeltalen)
749 749 chaininfocache[rev] = r
750 750 return r
751 751
752 752 def _deltachain(self, rev, stoprev=None):
753 753 """Obtain the delta chain for a revision.
754 754
755 755 ``stoprev`` specifies a revision to stop at. If not specified, we
756 756 stop at the base of the chain.
757 757
758 758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 759 revs in ascending order and ``stopped`` is a bool indicating whether
760 760 ``stoprev`` was hit.
761 761 """
762 762 # Try C implementation.
763 763 try:
764 764 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 765 except AttributeError:
766 766 pass
767 767
768 768 chain = []
769 769
770 770 # Alias to prevent attribute lookup in tight loop.
771 771 index = self.index
772 772 generaldelta = self._generaldelta
773 773
774 774 iterrev = rev
775 775 e = index[iterrev]
776 776 while iterrev != e[3] and iterrev != stoprev:
777 777 chain.append(iterrev)
778 778 if generaldelta:
779 779 iterrev = e[3]
780 780 else:
781 781 iterrev -= 1
782 782 e = index[iterrev]
783 783
784 784 if iterrev == stoprev:
785 785 stopped = True
786 786 else:
787 787 chain.append(iterrev)
788 788 stopped = False
789 789
790 790 chain.reverse()
791 791 return chain, stopped
792 792
793 793 def ancestors(self, revs, stoprev=0, inclusive=False):
794 794 """Generate the ancestors of 'revs' in reverse revision order.
795 795 Does not generate revs lower than stoprev.
796 796
797 797 See the documentation for ancestor.lazyancestors for more details."""
798 798
799 799 # first, make sure start revisions aren't filtered
800 800 revs = list(revs)
801 801 checkrev = self.node
802 802 for r in revs:
803 803 checkrev(r)
804 804 # and we're sure ancestors aren't filtered as well
805 805
806 806 if rustext is not None:
807 807 lazyancestors = rustext.ancestor.LazyAncestors
808 808 arg = self.index
809 809 elif util.safehasattr(parsers, 'rustlazyancestors'):
810 810 lazyancestors = ancestor.rustlazyancestors
811 811 arg = self.index
812 812 else:
813 813 lazyancestors = ancestor.lazyancestors
814 814 arg = self._uncheckedparentrevs
815 815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
816 816
817 817 def descendants(self, revs):
818 818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
819 819
820 820 def findcommonmissing(self, common=None, heads=None):
821 821 """Return a tuple of the ancestors of common and the ancestors of heads
822 822 that are not ancestors of common. In revset terminology, we return the
823 823 tuple:
824 824
825 825 ::common, (::heads) - (::common)
826 826
827 827 The list is sorted by revision number, meaning it is
828 828 topologically sorted.
829 829
830 830 'heads' and 'common' are both lists of node IDs. If heads is
831 831 not supplied, uses all of the revlog's heads. If common is not
832 832 supplied, uses nullid."""
833 833 if common is None:
834 834 common = [nullid]
835 835 if heads is None:
836 836 heads = self.heads()
837 837
838 838 common = [self.rev(n) for n in common]
839 839 heads = [self.rev(n) for n in heads]
840 840
841 841 # we want the ancestors, but inclusive
842 842 class lazyset(object):
843 843 def __init__(self, lazyvalues):
844 844 self.addedvalues = set()
845 845 self.lazyvalues = lazyvalues
846 846
847 847 def __contains__(self, value):
848 848 return value in self.addedvalues or value in self.lazyvalues
849 849
850 850 def __iter__(self):
851 851 added = self.addedvalues
852 852 for r in added:
853 853 yield r
854 854 for r in self.lazyvalues:
855 855 if not r in added:
856 856 yield r
857 857
858 858 def add(self, value):
859 859 self.addedvalues.add(value)
860 860
861 861 def update(self, values):
862 862 self.addedvalues.update(values)
863 863
864 864 has = lazyset(self.ancestors(common))
865 865 has.add(nullrev)
866 866 has.update(common)
867 867
868 868 # take all ancestors from heads that aren't in has
869 869 missing = set()
870 870 visit = collections.deque(r for r in heads if r not in has)
871 871 while visit:
872 872 r = visit.popleft()
873 873 if r in missing:
874 874 continue
875 875 else:
876 876 missing.add(r)
877 877 for p in self.parentrevs(r):
878 878 if p not in has:
879 879 visit.append(p)
880 880 missing = list(missing)
881 881 missing.sort()
882 882 return has, [self.node(miss) for miss in missing]
883 883
884 884 def incrementalmissingrevs(self, common=None):
885 885 """Return an object that can be used to incrementally compute the
886 886 revision numbers of the ancestors of arbitrary sets that are not
887 887 ancestors of common. This is an ancestor.incrementalmissingancestors
888 888 object.
889 889
890 890 'common' is a list of revision numbers. If common is not supplied, uses
891 891 nullrev.
892 892 """
893 893 if common is None:
894 894 common = [nullrev]
895 895
896 896 if rustext is not None:
897 897 return rustext.ancestor.MissingAncestors(self.index, common)
898 898 return ancestor.incrementalmissingancestors(self.parentrevs, common)
899 899
900 900 def findmissingrevs(self, common=None, heads=None):
901 901 """Return the revision numbers of the ancestors of heads that
902 902 are not ancestors of common.
903 903
904 904 More specifically, return a list of revision numbers corresponding to
905 905 nodes N such that every N satisfies the following constraints:
906 906
907 907 1. N is an ancestor of some node in 'heads'
908 908 2. N is not an ancestor of any node in 'common'
909 909
910 910 The list is sorted by revision number, meaning it is
911 911 topologically sorted.
912 912
913 913 'heads' and 'common' are both lists of revision numbers. If heads is
914 914 not supplied, uses all of the revlog's heads. If common is not
915 915 supplied, uses nullid."""
916 916 if common is None:
917 917 common = [nullrev]
918 918 if heads is None:
919 919 heads = self.headrevs()
920 920
921 921 inc = self.incrementalmissingrevs(common=common)
922 922 return inc.missingancestors(heads)
923 923
924 924 def findmissing(self, common=None, heads=None):
925 925 """Return the ancestors of heads that are not ancestors of common.
926 926
927 927 More specifically, return a list of nodes N such that every N
928 928 satisfies the following constraints:
929 929
930 930 1. N is an ancestor of some node in 'heads'
931 931 2. N is not an ancestor of any node in 'common'
932 932
933 933 The list is sorted by revision number, meaning it is
934 934 topologically sorted.
935 935
936 936 'heads' and 'common' are both lists of node IDs. If heads is
937 937 not supplied, uses all of the revlog's heads. If common is not
938 938 supplied, uses nullid."""
939 939 if common is None:
940 940 common = [nullid]
941 941 if heads is None:
942 942 heads = self.heads()
943 943
944 944 common = [self.rev(n) for n in common]
945 945 heads = [self.rev(n) for n in heads]
946 946
947 947 inc = self.incrementalmissingrevs(common=common)
948 948 return [self.node(r) for r in inc.missingancestors(heads)]
949 949
950 950 def nodesbetween(self, roots=None, heads=None):
951 951 """Return a topological path from 'roots' to 'heads'.
952 952
953 953 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
954 954 topologically sorted list of all nodes N that satisfy both of
955 955 these constraints:
956 956
957 957 1. N is a descendant of some node in 'roots'
958 958 2. N is an ancestor of some node in 'heads'
959 959
960 960 Every node is considered to be both a descendant and an ancestor
961 961 of itself, so every reachable node in 'roots' and 'heads' will be
962 962 included in 'nodes'.
963 963
964 964 'outroots' is the list of reachable nodes in 'roots', i.e., the
965 965 subset of 'roots' that is returned in 'nodes'. Likewise,
966 966 'outheads' is the subset of 'heads' that is also in 'nodes'.
967 967
968 968 'roots' and 'heads' are both lists of node IDs. If 'roots' is
969 969 unspecified, uses nullid as the only root. If 'heads' is
970 970 unspecified, uses list of all of the revlog's heads."""
971 971 nonodes = ([], [], [])
972 972 if roots is not None:
973 973 roots = list(roots)
974 974 if not roots:
975 975 return nonodes
976 976 lowestrev = min([self.rev(n) for n in roots])
977 977 else:
978 978 roots = [nullid] # Everybody's a descendant of nullid
979 979 lowestrev = nullrev
980 980 if (lowestrev == nullrev) and (heads is None):
981 981 # We want _all_ the nodes!
982 982 return ([self.node(r) for r in self], [nullid], list(self.heads()))
983 983 if heads is None:
984 984 # All nodes are ancestors, so the latest ancestor is the last
985 985 # node.
986 986 highestrev = len(self) - 1
987 987 # Set ancestors to None to signal that every node is an ancestor.
988 988 ancestors = None
989 989 # Set heads to an empty dictionary for later discovery of heads
990 990 heads = {}
991 991 else:
992 992 heads = list(heads)
993 993 if not heads:
994 994 return nonodes
995 995 ancestors = set()
996 996 # Turn heads into a dictionary so we can remove 'fake' heads.
997 997 # Also, later we will be using it to filter out the heads we can't
998 998 # find from roots.
999 999 heads = dict.fromkeys(heads, False)
1000 1000 # Start at the top and keep marking parents until we're done.
1001 1001 nodestotag = set(heads)
1002 1002 # Remember where the top was so we can use it as a limit later.
1003 1003 highestrev = max([self.rev(n) for n in nodestotag])
1004 1004 while nodestotag:
1005 1005 # grab a node to tag
1006 1006 n = nodestotag.pop()
1007 1007 # Never tag nullid
1008 1008 if n == nullid:
1009 1009 continue
1010 1010 # A node's revision number represents its place in a
1011 1011 # topologically sorted list of nodes.
1012 1012 r = self.rev(n)
1013 1013 if r >= lowestrev:
1014 1014 if n not in ancestors:
1015 1015 # If we are possibly a descendant of one of the roots
1016 1016 # and we haven't already been marked as an ancestor
1017 1017 ancestors.add(n) # Mark as ancestor
1018 1018 # Add non-nullid parents to list of nodes to tag.
1019 1019 nodestotag.update([p for p in self.parents(n) if
1020 1020 p != nullid])
1021 1021 elif n in heads: # We've seen it before, is it a fake head?
1022 1022 # So it is, real heads should not be the ancestors of
1023 1023 # any other heads.
1024 1024 heads.pop(n)
1025 1025 if not ancestors:
1026 1026 return nonodes
1027 1027 # Now that we have our set of ancestors, we want to remove any
1028 1028 # roots that are not ancestors.
1029 1029
1030 1030 # If one of the roots was nullid, everything is included anyway.
1031 1031 if lowestrev > nullrev:
1032 1032 # But, since we weren't, let's recompute the lowest rev to not
1033 1033 # include roots that aren't ancestors.
1034 1034
1035 1035 # Filter out roots that aren't ancestors of heads
1036 1036 roots = [root for root in roots if root in ancestors]
1037 1037 # Recompute the lowest revision
1038 1038 if roots:
1039 1039 lowestrev = min([self.rev(root) for root in roots])
1040 1040 else:
1041 1041 # No more roots? Return empty list
1042 1042 return nonodes
1043 1043 else:
1044 1044 # We are descending from nullid, and don't need to care about
1045 1045 # any other roots.
1046 1046 lowestrev = nullrev
1047 1047 roots = [nullid]
1048 1048 # Transform our roots list into a set.
1049 1049 descendants = set(roots)
1050 1050 # Also, keep the original roots so we can filter out roots that aren't
1051 1051 # 'real' roots (i.e. are descended from other roots).
1052 1052 roots = descendants.copy()
1053 1053 # Our topologically sorted list of output nodes.
1054 1054 orderedout = []
1055 1055 # Don't start at nullid since we don't want nullid in our output list,
1056 1056 # and if nullid shows up in descendants, empty parents will look like
1057 1057 # they're descendants.
1058 1058 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1059 1059 n = self.node(r)
1060 1060 isdescendant = False
1061 1061 if lowestrev == nullrev: # Everybody is a descendant of nullid
1062 1062 isdescendant = True
1063 1063 elif n in descendants:
1064 1064 # n is already a descendant
1065 1065 isdescendant = True
1066 1066 # This check only needs to be done here because all the roots
1067 1067 # will start being marked is descendants before the loop.
1068 1068 if n in roots:
1069 1069 # If n was a root, check if it's a 'real' root.
1070 1070 p = tuple(self.parents(n))
1071 1071 # If any of its parents are descendants, it's not a root.
1072 1072 if (p[0] in descendants) or (p[1] in descendants):
1073 1073 roots.remove(n)
1074 1074 else:
1075 1075 p = tuple(self.parents(n))
1076 1076 # A node is a descendant if either of its parents are
1077 1077 # descendants. (We seeded the dependents list with the roots
1078 1078 # up there, remember?)
1079 1079 if (p[0] in descendants) or (p[1] in descendants):
1080 1080 descendants.add(n)
1081 1081 isdescendant = True
1082 1082 if isdescendant and ((ancestors is None) or (n in ancestors)):
1083 1083 # Only include nodes that are both descendants and ancestors.
1084 1084 orderedout.append(n)
1085 1085 if (ancestors is not None) and (n in heads):
1086 1086 # We're trying to figure out which heads are reachable
1087 1087 # from roots.
1088 1088 # Mark this head as having been reached
1089 1089 heads[n] = True
1090 1090 elif ancestors is None:
1091 1091 # Otherwise, we're trying to discover the heads.
1092 1092 # Assume this is a head because if it isn't, the next step
1093 1093 # will eventually remove it.
1094 1094 heads[n] = True
1095 1095 # But, obviously its parents aren't.
1096 1096 for p in self.parents(n):
1097 1097 heads.pop(p, None)
1098 1098 heads = [head for head, flag in heads.iteritems() if flag]
1099 1099 roots = list(roots)
1100 1100 assert orderedout
1101 1101 assert roots
1102 1102 assert heads
1103 1103 return (orderedout, roots, heads)
1104 1104
1105 def headrevs(self):
1105 def headrevs(self, revs=None):
1106 if revs is None:
1106 1107 try:
1107 1108 return self.index.headrevs()
1108 1109 except AttributeError:
1109 1110 return self._headrevs()
1111 return dagop.headrevs(revs, self.parentrevs)
1110 1112
1111 1113 def computephases(self, roots):
1112 1114 return self.index.computephasesmapsets(roots)
1113 1115
1114 1116 def _headrevs(self):
1115 1117 count = len(self)
1116 1118 if not count:
1117 1119 return [nullrev]
1118 1120 # we won't iter over filtered rev so nobody is a head at start
1119 1121 ishead = [0] * (count + 1)
1120 1122 index = self.index
1121 1123 for r in self:
1122 1124 ishead[r] = 1 # I may be an head
1123 1125 e = index[r]
1124 1126 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1125 1127 return [r for r, val in enumerate(ishead) if val]
1126 1128
1127 1129 def heads(self, start=None, stop=None):
1128 1130 """return the list of all nodes that have no children
1129 1131
1130 1132 if start is specified, only heads that are descendants of
1131 1133 start will be returned
1132 1134 if stop is specified, it will consider all the revs from stop
1133 1135 as if they had no children
1134 1136 """
1135 1137 if start is None and stop is None:
1136 1138 if not len(self):
1137 1139 return [nullid]
1138 1140 return [self.node(r) for r in self.headrevs()]
1139 1141
1140 1142 if start is None:
1141 1143 start = nullrev
1142 1144 else:
1143 1145 start = self.rev(start)
1144 1146
1145 1147 stoprevs = set(self.rev(n) for n in stop or [])
1146 1148
1147 1149 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1148 1150 stoprevs=stoprevs)
1149 1151
1150 1152 return [self.node(rev) for rev in revs]
1151 1153
1152 1154 def children(self, node):
1153 1155 """find the children of a given node"""
1154 1156 c = []
1155 1157 p = self.rev(node)
1156 1158 for r in self.revs(start=p + 1):
1157 1159 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1158 1160 if prevs:
1159 1161 for pr in prevs:
1160 1162 if pr == p:
1161 1163 c.append(self.node(r))
1162 1164 elif p == nullrev:
1163 1165 c.append(self.node(r))
1164 1166 return c
1165 1167
1166 1168 def commonancestorsheads(self, a, b):
1167 1169 """calculate all the heads of the common ancestors of nodes a and b"""
1168 1170 a, b = self.rev(a), self.rev(b)
1169 1171 ancs = self._commonancestorsheads(a, b)
1170 1172 return pycompat.maplist(self.node, ancs)
1171 1173
1172 1174 def _commonancestorsheads(self, *revs):
1173 1175 """calculate all the heads of the common ancestors of revs"""
1174 1176 try:
1175 1177 ancs = self.index.commonancestorsheads(*revs)
1176 1178 except (AttributeError, OverflowError): # C implementation failed
1177 1179 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1178 1180 return ancs
1179 1181
1180 1182 def isancestor(self, a, b):
1181 1183 """return True if node a is an ancestor of node b
1182 1184
1183 1185 A revision is considered an ancestor of itself."""
1184 1186 a, b = self.rev(a), self.rev(b)
1185 1187 return self.isancestorrev(a, b)
1186 1188
1187 1189 def isancestorrev(self, a, b):
1188 1190 """return True if revision a is an ancestor of revision b
1189 1191
1190 1192 A revision is considered an ancestor of itself.
1191 1193
1192 1194 The implementation of this is trivial but the use of
1193 1195 commonancestorsheads is not."""
1194 1196 if a == nullrev:
1195 1197 return True
1196 1198 elif a == b:
1197 1199 return True
1198 1200 elif a > b:
1199 1201 return False
1200 1202 return a in self._commonancestorsheads(a, b)
1201 1203
1202 1204 def ancestor(self, a, b):
1203 1205 """calculate the "best" common ancestor of nodes a and b"""
1204 1206
1205 1207 a, b = self.rev(a), self.rev(b)
1206 1208 try:
1207 1209 ancs = self.index.ancestors(a, b)
1208 1210 except (AttributeError, OverflowError):
1209 1211 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 1212 if ancs:
1211 1213 # choose a consistent winner when there's a tie
1212 1214 return min(map(self.node, ancs))
1213 1215 return nullid
1214 1216
1215 1217 def _match(self, id):
1216 1218 if isinstance(id, int):
1217 1219 # rev
1218 1220 return self.node(id)
1219 1221 if len(id) == 20:
1220 1222 # possibly a binary node
1221 1223 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 1224 try:
1223 1225 node = id
1224 1226 self.rev(node) # quick search the index
1225 1227 return node
1226 1228 except error.LookupError:
1227 1229 pass # may be partial hex id
1228 1230 try:
1229 1231 # str(rev)
1230 1232 rev = int(id)
1231 1233 if "%d" % rev != id:
1232 1234 raise ValueError
1233 1235 if rev < 0:
1234 1236 rev = len(self) + rev
1235 1237 if rev < 0 or rev >= len(self):
1236 1238 raise ValueError
1237 1239 return self.node(rev)
1238 1240 except (ValueError, OverflowError):
1239 1241 pass
1240 1242 if len(id) == 40:
1241 1243 try:
1242 1244 # a full hex nodeid?
1243 1245 node = bin(id)
1244 1246 self.rev(node)
1245 1247 return node
1246 1248 except (TypeError, error.LookupError):
1247 1249 pass
1248 1250
1249 1251 def _partialmatch(self, id):
1250 1252 # we don't care wdirfilenodeids as they should be always full hash
1251 1253 maybewdir = wdirhex.startswith(id)
1252 1254 try:
1253 1255 partial = self.index.partialmatch(id)
1254 1256 if partial and self.hasnode(partial):
1255 1257 if maybewdir:
1256 1258 # single 'ff...' match in radix tree, ambiguous with wdir
1257 1259 raise error.RevlogError
1258 1260 return partial
1259 1261 if maybewdir:
1260 1262 # no 'ff...' match in radix tree, wdir identified
1261 1263 raise error.WdirUnsupported
1262 1264 return None
1263 1265 except error.RevlogError:
1264 1266 # parsers.c radix tree lookup gave multiple matches
1265 1267 # fast path: for unfiltered changelog, radix tree is accurate
1266 1268 if not getattr(self, 'filteredrevs', None):
1267 1269 raise error.AmbiguousPrefixLookupError(
1268 1270 id, self.indexfile, _('ambiguous identifier'))
1269 1271 # fall through to slow path that filters hidden revisions
1270 1272 except (AttributeError, ValueError):
1271 1273 # we are pure python, or key was too short to search radix tree
1272 1274 pass
1273 1275
1274 1276 if id in self._pcache:
1275 1277 return self._pcache[id]
1276 1278
1277 1279 if len(id) <= 40:
1278 1280 try:
1279 1281 # hex(node)[:...]
1280 1282 l = len(id) // 2 # grab an even number of digits
1281 1283 prefix = bin(id[:l * 2])
1282 1284 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 1285 nl = [n for n in nl if hex(n).startswith(id) and
1284 1286 self.hasnode(n)]
1285 1287 if nullhex.startswith(id):
1286 1288 nl.append(nullid)
1287 1289 if len(nl) > 0:
1288 1290 if len(nl) == 1 and not maybewdir:
1289 1291 self._pcache[id] = nl[0]
1290 1292 return nl[0]
1291 1293 raise error.AmbiguousPrefixLookupError(
1292 1294 id, self.indexfile, _('ambiguous identifier'))
1293 1295 if maybewdir:
1294 1296 raise error.WdirUnsupported
1295 1297 return None
1296 1298 except TypeError:
1297 1299 pass
1298 1300
1299 1301 def lookup(self, id):
1300 1302 """locate a node based on:
1301 1303 - revision number or str(revision number)
1302 1304 - nodeid or subset of hex nodeid
1303 1305 """
1304 1306 n = self._match(id)
1305 1307 if n is not None:
1306 1308 return n
1307 1309 n = self._partialmatch(id)
1308 1310 if n:
1309 1311 return n
1310 1312
1311 1313 raise error.LookupError(id, self.indexfile, _('no match found'))
1312 1314
1313 1315 def shortest(self, node, minlength=1):
1314 1316 """Find the shortest unambiguous prefix that matches node."""
1315 1317 def isvalid(prefix):
1316 1318 try:
1317 1319 node = self._partialmatch(prefix)
1318 1320 except error.AmbiguousPrefixLookupError:
1319 1321 return False
1320 1322 except error.WdirUnsupported:
1321 1323 # single 'ff...' match
1322 1324 return True
1323 1325 if node is None:
1324 1326 raise error.LookupError(node, self.indexfile, _('no node'))
1325 1327 return True
1326 1328
1327 1329 def maybewdir(prefix):
1328 1330 return all(c == 'f' for c in prefix)
1329 1331
1330 1332 hexnode = hex(node)
1331 1333
1332 1334 def disambiguate(hexnode, minlength):
1333 1335 """Disambiguate against wdirid."""
1334 1336 for length in range(minlength, 41):
1335 1337 prefix = hexnode[:length]
1336 1338 if not maybewdir(prefix):
1337 1339 return prefix
1338 1340
1339 1341 if not getattr(self, 'filteredrevs', None):
1340 1342 try:
1341 1343 length = max(self.index.shortest(node), minlength)
1342 1344 return disambiguate(hexnode, length)
1343 1345 except error.RevlogError:
1344 1346 if node != wdirid:
1345 1347 raise error.LookupError(node, self.indexfile, _('no node'))
1346 1348 except AttributeError:
1347 1349 # Fall through to pure code
1348 1350 pass
1349 1351
1350 1352 if node == wdirid:
1351 1353 for length in range(minlength, 41):
1352 1354 prefix = hexnode[:length]
1353 1355 if isvalid(prefix):
1354 1356 return prefix
1355 1357
1356 1358 for length in range(minlength, 41):
1357 1359 prefix = hexnode[:length]
1358 1360 if isvalid(prefix):
1359 1361 return disambiguate(hexnode, length)
1360 1362
1361 1363 def cmp(self, node, text):
1362 1364 """compare text with a given file revision
1363 1365
1364 1366 returns True if text is different than what is stored.
1365 1367 """
1366 1368 p1, p2 = self.parents(node)
1367 1369 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368 1370
1369 1371 def _cachesegment(self, offset, data):
1370 1372 """Add a segment to the revlog cache.
1371 1373
1372 1374 Accepts an absolute offset and the data that is at that location.
1373 1375 """
1374 1376 o, d = self._chunkcache
1375 1377 # try to add to existing cache
1376 1378 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 1379 self._chunkcache = o, d + data
1378 1380 else:
1379 1381 self._chunkcache = offset, data
1380 1382
1381 1383 def _readsegment(self, offset, length, df=None):
1382 1384 """Load a segment of raw data from the revlog.
1383 1385
1384 1386 Accepts an absolute offset, length to read, and an optional existing
1385 1387 file handle to read from.
1386 1388
1387 1389 If an existing file handle is passed, it will be seeked and the
1388 1390 original seek position will NOT be restored.
1389 1391
1390 1392 Returns a str or buffer of raw byte data.
1391 1393
1392 1394 Raises if the requested number of bytes could not be read.
1393 1395 """
1394 1396 # Cache data both forward and backward around the requested
1395 1397 # data, in a fixed size window. This helps speed up operations
1396 1398 # involving reading the revlog backwards.
1397 1399 cachesize = self._chunkcachesize
1398 1400 realoffset = offset & ~(cachesize - 1)
1399 1401 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 1402 - realoffset)
1401 1403 with self._datareadfp(df) as df:
1402 1404 df.seek(realoffset)
1403 1405 d = df.read(reallength)
1404 1406
1405 1407 self._cachesegment(realoffset, d)
1406 1408 if offset != realoffset or reallength != length:
1407 1409 startoffset = offset - realoffset
1408 1410 if len(d) - startoffset < length:
1409 1411 raise error.RevlogError(
1410 1412 _('partial read of revlog %s; expected %d bytes from '
1411 1413 'offset %d, got %d') %
1412 1414 (self.indexfile if self._inline else self.datafile,
1413 1415 length, realoffset, len(d) - startoffset))
1414 1416
1415 1417 return util.buffer(d, startoffset, length)
1416 1418
1417 1419 if len(d) < length:
1418 1420 raise error.RevlogError(
1419 1421 _('partial read of revlog %s; expected %d bytes from offset '
1420 1422 '%d, got %d') %
1421 1423 (self.indexfile if self._inline else self.datafile,
1422 1424 length, offset, len(d)))
1423 1425
1424 1426 return d
1425 1427
1426 1428 def _getsegment(self, offset, length, df=None):
1427 1429 """Obtain a segment of raw data from the revlog.
1428 1430
1429 1431 Accepts an absolute offset, length of bytes to obtain, and an
1430 1432 optional file handle to the already-opened revlog. If the file
1431 1433 handle is used, it's original seek position will not be preserved.
1432 1434
1433 1435 Requests for data may be returned from a cache.
1434 1436
1435 1437 Returns a str or a buffer instance of raw byte data.
1436 1438 """
1437 1439 o, d = self._chunkcache
1438 1440 l = len(d)
1439 1441
1440 1442 # is it in the cache?
1441 1443 cachestart = offset - o
1442 1444 cacheend = cachestart + length
1443 1445 if cachestart >= 0 and cacheend <= l:
1444 1446 if cachestart == 0 and cacheend == l:
1445 1447 return d # avoid a copy
1446 1448 return util.buffer(d, cachestart, cacheend - cachestart)
1447 1449
1448 1450 return self._readsegment(offset, length, df=df)
1449 1451
1450 1452 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 1453 """Obtain a segment of raw data corresponding to a range of revisions.
1452 1454
1453 1455 Accepts the start and end revisions and an optional already-open
1454 1456 file handle to be used for reading. If the file handle is read, its
1455 1457 seek position will not be preserved.
1456 1458
1457 1459 Requests for data may be satisfied by a cache.
1458 1460
1459 1461 Returns a 2-tuple of (offset, data) for the requested range of
1460 1462 revisions. Offset is the integer offset from the beginning of the
1461 1463 revlog and data is a str or buffer of the raw byte data.
1462 1464
1463 1465 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 1466 to determine where each revision's data begins and ends.
1465 1467 """
1466 1468 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 1469 # (functions are expensive).
1468 1470 index = self.index
1469 1471 istart = index[startrev]
1470 1472 start = int(istart[0] >> 16)
1471 1473 if startrev == endrev:
1472 1474 end = start + istart[1]
1473 1475 else:
1474 1476 iend = index[endrev]
1475 1477 end = int(iend[0] >> 16) + iend[1]
1476 1478
1477 1479 if self._inline:
1478 1480 start += (startrev + 1) * self._io.size
1479 1481 end += (endrev + 1) * self._io.size
1480 1482 length = end - start
1481 1483
1482 1484 return start, self._getsegment(start, length, df=df)
1483 1485
1484 1486 def _chunk(self, rev, df=None):
1485 1487 """Obtain a single decompressed chunk for a revision.
1486 1488
1487 1489 Accepts an integer revision and an optional already-open file handle
1488 1490 to be used for reading. If used, the seek position of the file will not
1489 1491 be preserved.
1490 1492
1491 1493 Returns a str holding uncompressed data for the requested revision.
1492 1494 """
1493 1495 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494 1496
1495 1497 def _chunks(self, revs, df=None, targetsize=None):
1496 1498 """Obtain decompressed chunks for the specified revisions.
1497 1499
1498 1500 Accepts an iterable of numeric revisions that are assumed to be in
1499 1501 ascending order. Also accepts an optional already-open file handle
1500 1502 to be used for reading. If used, the seek position of the file will
1501 1503 not be preserved.
1502 1504
1503 1505 This function is similar to calling ``self._chunk()`` multiple times,
1504 1506 but is faster.
1505 1507
1506 1508 Returns a list with decompressed data for each requested revision.
1507 1509 """
1508 1510 if not revs:
1509 1511 return []
1510 1512 start = self.start
1511 1513 length = self.length
1512 1514 inline = self._inline
1513 1515 iosize = self._io.size
1514 1516 buffer = util.buffer
1515 1517
1516 1518 l = []
1517 1519 ladd = l.append
1518 1520
1519 1521 if not self._withsparseread:
1520 1522 slicedchunks = (revs,)
1521 1523 else:
1522 1524 slicedchunks = deltautil.slicechunk(self, revs,
1523 1525 targetsize=targetsize)
1524 1526
1525 1527 for revschunk in slicedchunks:
1526 1528 firstrev = revschunk[0]
1527 1529 # Skip trailing revisions with empty diff
1528 1530 for lastrev in revschunk[::-1]:
1529 1531 if length(lastrev) != 0:
1530 1532 break
1531 1533
1532 1534 try:
1533 1535 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 1536 except OverflowError:
1535 1537 # issue4215 - we can't cache a run of chunks greater than
1536 1538 # 2G on Windows
1537 1539 return [self._chunk(rev, df=df) for rev in revschunk]
1538 1540
1539 1541 decomp = self.decompress
1540 1542 for rev in revschunk:
1541 1543 chunkstart = start(rev)
1542 1544 if inline:
1543 1545 chunkstart += (rev + 1) * iosize
1544 1546 chunklength = length(rev)
1545 1547 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546 1548
1547 1549 return l
1548 1550
1549 1551 def _chunkclear(self):
1550 1552 """Clear the raw chunk cache."""
1551 1553 self._chunkcache = (0, '')
1552 1554
1553 1555 def deltaparent(self, rev):
1554 1556 """return deltaparent of the given revision"""
1555 1557 base = self.index[rev][3]
1556 1558 if base == rev:
1557 1559 return nullrev
1558 1560 elif self._generaldelta:
1559 1561 return base
1560 1562 else:
1561 1563 return rev - 1
1562 1564
1563 1565 def issnapshot(self, rev):
1564 1566 """tells whether rev is a snapshot
1565 1567 """
1566 1568 if not self._sparserevlog:
1567 1569 return self.deltaparent(rev) == nullrev
1568 1570 elif util.safehasattr(self.index, 'issnapshot'):
1569 1571 # directly assign the method to cache the testing and access
1570 1572 self.issnapshot = self.index.issnapshot
1571 1573 return self.issnapshot(rev)
1572 1574 if rev == nullrev:
1573 1575 return True
1574 1576 entry = self.index[rev]
1575 1577 base = entry[3]
1576 1578 if base == rev:
1577 1579 return True
1578 1580 if base == nullrev:
1579 1581 return True
1580 1582 p1 = entry[5]
1581 1583 p2 = entry[6]
1582 1584 if base == p1 or base == p2:
1583 1585 return False
1584 1586 return self.issnapshot(base)
1585 1587
1586 1588 def snapshotdepth(self, rev):
1587 1589 """number of snapshot in the chain before this one"""
1588 1590 if not self.issnapshot(rev):
1589 1591 raise error.ProgrammingError('revision %d not a snapshot')
1590 1592 return len(self._deltachain(rev)[0]) - 1
1591 1593
1592 1594 def revdiff(self, rev1, rev2):
1593 1595 """return or calculate a delta between two revisions
1594 1596
1595 1597 The delta calculated is in binary form and is intended to be written to
1596 1598 revlog data directly. So this function needs raw revision data.
1597 1599 """
1598 1600 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 1601 return bytes(self._chunk(rev2))
1600 1602
1601 1603 return mdiff.textdiff(self.revision(rev1, raw=True),
1602 1604 self.revision(rev2, raw=True))
1603 1605
1604 1606 def revision(self, nodeorrev, _df=None, raw=False):
1605 1607 """return an uncompressed revision of a given node or revision
1606 1608 number.
1607 1609
1608 1610 _df - an existing file handle to read from. (internal-only)
1609 1611 raw - an optional argument specifying if the revision data is to be
1610 1612 treated as raw data when applying flag transforms. 'raw' should be set
1611 1613 to True when generating changegroups or in debug commands.
1612 1614 """
1613 1615 if isinstance(nodeorrev, int):
1614 1616 rev = nodeorrev
1615 1617 node = self.node(rev)
1616 1618 else:
1617 1619 node = nodeorrev
1618 1620 rev = None
1619 1621
1620 1622 cachedrev = None
1621 1623 flags = None
1622 1624 rawtext = None
1623 1625 if node == nullid:
1624 1626 return ""
1625 1627 if self._revisioncache:
1626 1628 if self._revisioncache[0] == node:
1627 1629 # _cache only stores rawtext
1628 1630 if raw:
1629 1631 return self._revisioncache[2]
1630 1632 # duplicated, but good for perf
1631 1633 if rev is None:
1632 1634 rev = self.rev(node)
1633 1635 if flags is None:
1634 1636 flags = self.flags(rev)
1635 1637 # no extra flags set, no flag processor runs, text = rawtext
1636 1638 if flags == REVIDX_DEFAULT_FLAGS:
1637 1639 return self._revisioncache[2]
1638 1640 # rawtext is reusable. need to run flag processor
1639 1641 rawtext = self._revisioncache[2]
1640 1642
1641 1643 cachedrev = self._revisioncache[1]
1642 1644
1643 1645 # look up what we need to read
1644 1646 if rawtext is None:
1645 1647 if rev is None:
1646 1648 rev = self.rev(node)
1647 1649
1648 1650 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1649 1651 if stopped:
1650 1652 rawtext = self._revisioncache[2]
1651 1653
1652 1654 # drop cache to save memory
1653 1655 self._revisioncache = None
1654 1656
1655 1657 targetsize = None
1656 1658 rawsize = self.index[rev][2]
1657 1659 if 0 <= rawsize:
1658 1660 targetsize = 4 * rawsize
1659 1661
1660 1662 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1661 1663 if rawtext is None:
1662 1664 rawtext = bytes(bins[0])
1663 1665 bins = bins[1:]
1664 1666
1665 1667 rawtext = mdiff.patches(rawtext, bins)
1666 1668 self._revisioncache = (node, rev, rawtext)
1667 1669
1668 1670 if flags is None:
1669 1671 if rev is None:
1670 1672 rev = self.rev(node)
1671 1673 flags = self.flags(rev)
1672 1674
1673 1675 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1674 1676 if validatehash:
1675 1677 self.checkhash(text, node, rev=rev)
1676 1678
1677 1679 return text
1678 1680
1679 1681 def hash(self, text, p1, p2):
1680 1682 """Compute a node hash.
1681 1683
1682 1684 Available as a function so that subclasses can replace the hash
1683 1685 as needed.
1684 1686 """
1685 1687 return storageutil.hashrevisionsha1(text, p1, p2)
1686 1688
1687 1689 def _processflags(self, text, flags, operation, raw=False):
1688 1690 """Inspect revision data flags and applies transforms defined by
1689 1691 registered flag processors.
1690 1692
1691 1693 ``text`` - the revision data to process
1692 1694 ``flags`` - the revision flags
1693 1695 ``operation`` - the operation being performed (read or write)
1694 1696 ``raw`` - an optional argument describing if the raw transform should be
1695 1697 applied.
1696 1698
1697 1699 This method processes the flags in the order (or reverse order if
1698 1700 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1699 1701 flag processors registered for present flags. The order of flags defined
1700 1702 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1701 1703
1702 1704 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1703 1705 processed text and ``validatehash`` is a bool indicating whether the
1704 1706 returned text should be checked for hash integrity.
1705 1707
1706 1708 Note: If the ``raw`` argument is set, it has precedence over the
1707 1709 operation and will only update the value of ``validatehash``.
1708 1710 """
1709 1711 # fast path: no flag processors will run
1710 1712 if flags == 0:
1711 1713 return text, True
1712 1714 if not operation in ('read', 'write'):
1713 1715 raise error.ProgrammingError(_("invalid '%s' operation") %
1714 1716 operation)
1715 1717 # Check all flags are known.
1716 1718 if flags & ~REVIDX_KNOWN_FLAGS:
1717 1719 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1718 1720 (flags & ~REVIDX_KNOWN_FLAGS))
1719 1721 validatehash = True
1720 1722 # Depending on the operation (read or write), the order might be
1721 1723 # reversed due to non-commutative transforms.
1722 1724 orderedflags = REVIDX_FLAGS_ORDER
1723 1725 if operation == 'write':
1724 1726 orderedflags = reversed(orderedflags)
1725 1727
1726 1728 for flag in orderedflags:
1727 1729 # If a flagprocessor has been registered for a known flag, apply the
1728 1730 # related operation transform and update result tuple.
1729 1731 if flag & flags:
1730 1732 vhash = True
1731 1733
1732 1734 if flag not in self._flagprocessors:
1733 1735 message = _("missing processor for flag '%#x'") % (flag)
1734 1736 raise error.RevlogError(message)
1735 1737
1736 1738 processor = self._flagprocessors[flag]
1737 1739 if processor is not None:
1738 1740 readtransform, writetransform, rawtransform = processor
1739 1741
1740 1742 if raw:
1741 1743 vhash = rawtransform(self, text)
1742 1744 elif operation == 'read':
1743 1745 text, vhash = readtransform(self, text)
1744 1746 else: # write operation
1745 1747 text, vhash = writetransform(self, text)
1746 1748 validatehash = validatehash and vhash
1747 1749
1748 1750 return text, validatehash
1749 1751
1750 1752 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1751 1753 """Check node hash integrity.
1752 1754
1753 1755 Available as a function so that subclasses can extend hash mismatch
1754 1756 behaviors as needed.
1755 1757 """
1756 1758 try:
1757 1759 if p1 is None and p2 is None:
1758 1760 p1, p2 = self.parents(node)
1759 1761 if node != self.hash(text, p1, p2):
1760 1762 # Clear the revision cache on hash failure. The revision cache
1761 1763 # only stores the raw revision and clearing the cache does have
1762 1764 # the side-effect that we won't have a cache hit when the raw
1763 1765 # revision data is accessed. But this case should be rare and
1764 1766 # it is extra work to teach the cache about the hash
1765 1767 # verification state.
1766 1768 if self._revisioncache and self._revisioncache[0] == node:
1767 1769 self._revisioncache = None
1768 1770
1769 1771 revornode = rev
1770 1772 if revornode is None:
1771 1773 revornode = templatefilters.short(hex(node))
1772 1774 raise error.RevlogError(_("integrity check failed on %s:%s")
1773 1775 % (self.indexfile, pycompat.bytestr(revornode)))
1774 1776 except error.RevlogError:
1775 1777 if self._censorable and storageutil.iscensoredtext(text):
1776 1778 raise error.CensoredNodeError(self.indexfile, node, text)
1777 1779 raise
1778 1780
1779 1781 def _enforceinlinesize(self, tr, fp=None):
1780 1782 """Check if the revlog is too big for inline and convert if so.
1781 1783
1782 1784 This should be called after revisions are added to the revlog. If the
1783 1785 revlog has grown too large to be an inline revlog, it will convert it
1784 1786 to use multiple index and data files.
1785 1787 """
1786 1788 tiprev = len(self) - 1
1787 1789 if (not self._inline or
1788 1790 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1789 1791 return
1790 1792
1791 1793 trinfo = tr.find(self.indexfile)
1792 1794 if trinfo is None:
1793 1795 raise error.RevlogError(_("%s not found in the transaction")
1794 1796 % self.indexfile)
1795 1797
1796 1798 trindex = trinfo[2]
1797 1799 if trindex is not None:
1798 1800 dataoff = self.start(trindex)
1799 1801 else:
1800 1802 # revlog was stripped at start of transaction, use all leftover data
1801 1803 trindex = len(self) - 1
1802 1804 dataoff = self.end(tiprev)
1803 1805
1804 1806 tr.add(self.datafile, dataoff)
1805 1807
1806 1808 if fp:
1807 1809 fp.flush()
1808 1810 fp.close()
1809 1811 # We can't use the cached file handle after close(). So prevent
1810 1812 # its usage.
1811 1813 self._writinghandles = None
1812 1814
1813 1815 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1814 1816 for r in self:
1815 1817 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1816 1818
1817 1819 with self._indexfp('w') as fp:
1818 1820 self.version &= ~FLAG_INLINE_DATA
1819 1821 self._inline = False
1820 1822 io = self._io
1821 1823 for i in self:
1822 1824 e = io.packentry(self.index[i], self.node, self.version, i)
1823 1825 fp.write(e)
1824 1826
1825 1827 # the temp file replace the real index when we exit the context
1826 1828 # manager
1827 1829
1828 1830 tr.replace(self.indexfile, trindex * self._io.size)
1829 1831 self._chunkclear()
1830 1832
1831 1833 def _nodeduplicatecallback(self, transaction, node):
1832 1834 """called when trying to add a node already stored.
1833 1835 """
1834 1836
1835 1837 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1836 1838 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1837 1839 """add a revision to the log
1838 1840
1839 1841 text - the revision data to add
1840 1842 transaction - the transaction object used for rollback
1841 1843 link - the linkrev data to add
1842 1844 p1, p2 - the parent nodeids of the revision
1843 1845 cachedelta - an optional precomputed delta
1844 1846 node - nodeid of revision; typically node is not specified, and it is
1845 1847 computed by default as hash(text, p1, p2), however subclasses might
1846 1848 use different hashing method (and override checkhash() in such case)
1847 1849 flags - the known flags to set on the revision
1848 1850 deltacomputer - an optional deltacomputer instance shared between
1849 1851 multiple calls
1850 1852 """
1851 1853 if link == nullrev:
1852 1854 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1853 1855 % self.indexfile)
1854 1856
1855 1857 if flags:
1856 1858 node = node or self.hash(text, p1, p2)
1857 1859
1858 1860 rawtext, validatehash = self._processflags(text, flags, 'write')
1859 1861
1860 1862 # If the flag processor modifies the revision data, ignore any provided
1861 1863 # cachedelta.
1862 1864 if rawtext != text:
1863 1865 cachedelta = None
1864 1866
1865 1867 if len(rawtext) > _maxentrysize:
1866 1868 raise error.RevlogError(
1867 1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1868 1870 % (self.indexfile, len(rawtext)))
1869 1871
1870 1872 node = node or self.hash(rawtext, p1, p2)
1871 1873 if node in self.nodemap:
1872 1874 return node
1873 1875
1874 1876 if validatehash:
1875 1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1876 1878
1877 1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1878 1880 flags, cachedelta=cachedelta,
1879 1881 deltacomputer=deltacomputer)
1880 1882
1881 1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1882 1884 cachedelta=None, deltacomputer=None):
1883 1885 """add a raw revision with known flags, node and parents
1884 1886 useful when reusing a revision not stored in this revlog (ex: received
1885 1887 over wire, or read from an external bundle).
1886 1888 """
1887 1889 dfh = None
1888 1890 if not self._inline:
1889 1891 dfh = self._datafp("a+")
1890 1892 ifh = self._indexfp("a+")
1891 1893 try:
1892 1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1893 1895 flags, cachedelta, ifh, dfh,
1894 1896 deltacomputer=deltacomputer)
1895 1897 finally:
1896 1898 if dfh:
1897 1899 dfh.close()
1898 1900 ifh.close()
1899 1901
1900 1902 def compress(self, data):
1901 1903 """Generate a possibly-compressed representation of data."""
1902 1904 if not data:
1903 1905 return '', data
1904 1906
1905 1907 compressed = self._compressor.compress(data)
1906 1908
1907 1909 if compressed:
1908 1910 # The revlog compressor added the header in the returned data.
1909 1911 return '', compressed
1910 1912
1911 1913 if data[0:1] == '\0':
1912 1914 return '', data
1913 1915 return 'u', data
1914 1916
1915 1917 def decompress(self, data):
1916 1918 """Decompress a revlog chunk.
1917 1919
1918 1920 The chunk is expected to begin with a header identifying the
1919 1921 format type so it can be routed to an appropriate decompressor.
1920 1922 """
1921 1923 if not data:
1922 1924 return data
1923 1925
1924 1926 # Revlogs are read much more frequently than they are written and many
1925 1927 # chunks only take microseconds to decompress, so performance is
1926 1928 # important here.
1927 1929 #
1928 1930 # We can make a few assumptions about revlogs:
1929 1931 #
1930 1932 # 1) the majority of chunks will be compressed (as opposed to inline
1931 1933 # raw data).
1932 1934 # 2) decompressing *any* data will likely by at least 10x slower than
1933 1935 # returning raw inline data.
1934 1936 # 3) we want to prioritize common and officially supported compression
1935 1937 # engines
1936 1938 #
1937 1939 # It follows that we want to optimize for "decompress compressed data
1938 1940 # when encoded with common and officially supported compression engines"
1939 1941 # case over "raw data" and "data encoded by less common or non-official
1940 1942 # compression engines." That is why we have the inline lookup first
1941 1943 # followed by the compengines lookup.
1942 1944 #
1943 1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1944 1946 # compressed chunks. And this matters for changelog and manifest reads.
1945 1947 t = data[0:1]
1946 1948
1947 1949 if t == 'x':
1948 1950 try:
1949 1951 return _zlibdecompress(data)
1950 1952 except zlib.error as e:
1951 1953 raise error.RevlogError(_('revlog decompress error: %s') %
1952 1954 stringutil.forcebytestr(e))
1953 1955 # '\0' is more common than 'u' so it goes first.
1954 1956 elif t == '\0':
1955 1957 return data
1956 1958 elif t == 'u':
1957 1959 return util.buffer(data, 1)
1958 1960
1959 1961 try:
1960 1962 compressor = self._decompressors[t]
1961 1963 except KeyError:
1962 1964 try:
1963 1965 engine = util.compengines.forrevlogheader(t)
1964 1966 compressor = engine.revlogcompressor()
1965 1967 self._decompressors[t] = compressor
1966 1968 except KeyError:
1967 1969 raise error.RevlogError(_('unknown compression type %r') % t)
1968 1970
1969 1971 return compressor.decompress(data)
1970 1972
1971 1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1972 1974 cachedelta, ifh, dfh, alwayscache=False,
1973 1975 deltacomputer=None):
1974 1976 """internal function to add revisions to the log
1975 1977
1976 1978 see addrevision for argument descriptions.
1977 1979
1978 1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1979 1981
1980 1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1981 1983 be used.
1982 1984
1983 1985 invariants:
1984 1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1985 1987 if both are set, they must correspond to each other.
1986 1988 """
1987 1989 if node == nullid:
1988 1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1989 1991 self.indexfile)
1990 1992 if node == wdirid or node in wdirfilenodeids:
1991 1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1992 1994 self.indexfile)
1993 1995
1994 1996 if self._inline:
1995 1997 fh = ifh
1996 1998 else:
1997 1999 fh = dfh
1998 2000
1999 2001 btext = [rawtext]
2000 2002
2001 2003 curr = len(self)
2002 2004 prev = curr - 1
2003 2005 offset = self.end(prev)
2004 2006 p1r, p2r = self.rev(p1), self.rev(p2)
2005 2007
2006 2008 # full versions are inserted when the needed deltas
2007 2009 # become comparable to the uncompressed text
2008 2010 if rawtext is None:
2009 2011 # need rawtext size, before changed by flag processors, which is
2010 2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2011 2013 # logic that might remove metadata size.
2012 2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2013 2015 cachedelta[1])
2014 2016 else:
2015 2017 textlen = len(rawtext)
2016 2018
2017 2019 if deltacomputer is None:
2018 2020 deltacomputer = deltautil.deltacomputer(self)
2019 2021
2020 2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2021 2023
2022 2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2023 2025
2024 2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2025 2027 deltainfo.base, link, p1r, p2r, node)
2026 2028 self.index.append(e)
2027 2029 self.nodemap[node] = curr
2028 2030
2029 2031 # Reset the pure node cache start lookup offset to account for new
2030 2032 # revision.
2031 2033 if self._nodepos is not None:
2032 2034 self._nodepos = curr
2033 2035
2034 2036 entry = self._io.packentry(e, self.node, self.version, curr)
2035 2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2036 2038 link, offset)
2037 2039
2038 2040 rawtext = btext[0]
2039 2041
2040 2042 if alwayscache and rawtext is None:
2041 2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2042 2044
2043 2045 if type(rawtext) == bytes: # only accept immutable objects
2044 2046 self._revisioncache = (node, curr, rawtext)
2045 2047 self._chainbasecache[curr] = deltainfo.chainbase
2046 2048 return node
2047 2049
2048 2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2049 2051 # Files opened in a+ mode have inconsistent behavior on various
2050 2052 # platforms. Windows requires that a file positioning call be made
2051 2053 # when the file handle transitions between reads and writes. See
2052 2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2053 2055 # platforms, Python or the platform itself can be buggy. Some versions
2054 2056 # of Solaris have been observed to not append at the end of the file
2055 2057 # if the file was seeked to before the end. See issue4943 for more.
2056 2058 #
2057 2059 # We work around this issue by inserting a seek() before writing.
2058 2060 # Note: This is likely not necessary on Python 3. However, because
2059 2061 # the file handle is reused for reads and may be seeked there, we need
2060 2062 # to be careful before changing this.
2061 2063 ifh.seek(0, os.SEEK_END)
2062 2064 if dfh:
2063 2065 dfh.seek(0, os.SEEK_END)
2064 2066
2065 2067 curr = len(self) - 1
2066 2068 if not self._inline:
2067 2069 transaction.add(self.datafile, offset)
2068 2070 transaction.add(self.indexfile, curr * len(entry))
2069 2071 if data[0]:
2070 2072 dfh.write(data[0])
2071 2073 dfh.write(data[1])
2072 2074 ifh.write(entry)
2073 2075 else:
2074 2076 offset += curr * self._io.size
2075 2077 transaction.add(self.indexfile, offset, curr)
2076 2078 ifh.write(entry)
2077 2079 ifh.write(data[0])
2078 2080 ifh.write(data[1])
2079 2081 self._enforceinlinesize(transaction, ifh)
2080 2082
2081 2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2082 2084 """
2083 2085 add a delta group
2084 2086
2085 2087 given a set of deltas, add them to the revision log. the
2086 2088 first delta is against its parent, which should be in our
2087 2089 log, the rest are against the previous delta.
2088 2090
2089 2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2090 2092 this revlog and the node that was added.
2091 2093 """
2092 2094
2093 2095 if self._writinghandles:
2094 2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2095 2097
2096 2098 nodes = []
2097 2099
2098 2100 r = len(self)
2099 2101 end = 0
2100 2102 if r:
2101 2103 end = self.end(r - 1)
2102 2104 ifh = self._indexfp("a+")
2103 2105 isize = r * self._io.size
2104 2106 if self._inline:
2105 2107 transaction.add(self.indexfile, end + isize, r)
2106 2108 dfh = None
2107 2109 else:
2108 2110 transaction.add(self.indexfile, isize, r)
2109 2111 transaction.add(self.datafile, end)
2110 2112 dfh = self._datafp("a+")
2111 2113 def flush():
2112 2114 if dfh:
2113 2115 dfh.flush()
2114 2116 ifh.flush()
2115 2117
2116 2118 self._writinghandles = (ifh, dfh)
2117 2119
2118 2120 try:
2119 2121 deltacomputer = deltautil.deltacomputer(self)
2120 2122 # loop through our set of deltas
2121 2123 for data in deltas:
2122 2124 node, p1, p2, linknode, deltabase, delta, flags = data
2123 2125 link = linkmapper(linknode)
2124 2126 flags = flags or REVIDX_DEFAULT_FLAGS
2125 2127
2126 2128 nodes.append(node)
2127 2129
2128 2130 if node in self.nodemap:
2129 2131 self._nodeduplicatecallback(transaction, node)
2130 2132 # this can happen if two branches make the same change
2131 2133 continue
2132 2134
2133 2135 for p in (p1, p2):
2134 2136 if p not in self.nodemap:
2135 2137 raise error.LookupError(p, self.indexfile,
2136 2138 _('unknown parent'))
2137 2139
2138 2140 if deltabase not in self.nodemap:
2139 2141 raise error.LookupError(deltabase, self.indexfile,
2140 2142 _('unknown delta base'))
2141 2143
2142 2144 baserev = self.rev(deltabase)
2143 2145
2144 2146 if baserev != nullrev and self.iscensored(baserev):
2145 2147 # if base is censored, delta must be full replacement in a
2146 2148 # single patch operation
2147 2149 hlen = struct.calcsize(">lll")
2148 2150 oldlen = self.rawsize(baserev)
2149 2151 newlen = len(delta) - hlen
2150 2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2151 2153 raise error.CensoredBaseError(self.indexfile,
2152 2154 self.node(baserev))
2153 2155
2154 2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2155 2157 flags |= REVIDX_ISCENSORED
2156 2158
2157 2159 # We assume consumers of addrevisioncb will want to retrieve
2158 2160 # the added revision, which will require a call to
2159 2161 # revision(). revision() will fast path if there is a cache
2160 2162 # hit. So, we tell _addrevision() to always cache in this case.
2161 2163 # We're only using addgroup() in the context of changegroup
2162 2164 # generation so the revision data can always be handled as raw
2163 2165 # by the flagprocessor.
2164 2166 self._addrevision(node, None, transaction, link,
2165 2167 p1, p2, flags, (baserev, delta),
2166 2168 ifh, dfh,
2167 2169 alwayscache=bool(addrevisioncb),
2168 2170 deltacomputer=deltacomputer)
2169 2171
2170 2172 if addrevisioncb:
2171 2173 addrevisioncb(self, node)
2172 2174
2173 2175 if not dfh and not self._inline:
2174 2176 # addrevision switched from inline to conventional
2175 2177 # reopen the index
2176 2178 ifh.close()
2177 2179 dfh = self._datafp("a+")
2178 2180 ifh = self._indexfp("a+")
2179 2181 self._writinghandles = (ifh, dfh)
2180 2182 finally:
2181 2183 self._writinghandles = None
2182 2184
2183 2185 if dfh:
2184 2186 dfh.close()
2185 2187 ifh.close()
2186 2188
2187 2189 return nodes
2188 2190
2189 2191 def iscensored(self, rev):
2190 2192 """Check if a file revision is censored."""
2191 2193 if not self._censorable:
2192 2194 return False
2193 2195
2194 2196 return self.flags(rev) & REVIDX_ISCENSORED
2195 2197
2196 2198 def _peek_iscensored(self, baserev, delta, flush):
2197 2199 """Quickly check if a delta produces a censored revision."""
2198 2200 if not self._censorable:
2199 2201 return False
2200 2202
2201 2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2202 2204
2203 2205 def getstrippoint(self, minlink):
2204 2206 """find the minimum rev that must be stripped to strip the linkrev
2205 2207
2206 2208 Returns a tuple containing the minimum rev and a set of all revs that
2207 2209 have linkrevs that will be broken by this strip.
2208 2210 """
2209 2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2210 2212 self.headrevs(),
2211 2213 self.linkrev, self.parentrevs)
2212 2214
2213 2215 def strip(self, minlink, transaction):
2214 2216 """truncate the revlog on the first revision with a linkrev >= minlink
2215 2217
2216 2218 This function is called when we're stripping revision minlink and
2217 2219 its descendants from the repository.
2218 2220
2219 2221 We have to remove all revisions with linkrev >= minlink, because
2220 2222 the equivalent changelog revisions will be renumbered after the
2221 2223 strip.
2222 2224
2223 2225 So we truncate the revlog on the first of these revisions, and
2224 2226 trust that the caller has saved the revisions that shouldn't be
2225 2227 removed and that it'll re-add them after this truncation.
2226 2228 """
2227 2229 if len(self) == 0:
2228 2230 return
2229 2231
2230 2232 rev, _ = self.getstrippoint(minlink)
2231 2233 if rev == len(self):
2232 2234 return
2233 2235
2234 2236 # first truncate the files on disk
2235 2237 end = self.start(rev)
2236 2238 if not self._inline:
2237 2239 transaction.add(self.datafile, end)
2238 2240 end = rev * self._io.size
2239 2241 else:
2240 2242 end += rev * self._io.size
2241 2243
2242 2244 transaction.add(self.indexfile, end)
2243 2245
2244 2246 # then reset internal state in memory to forget those revisions
2245 2247 self._revisioncache = None
2246 2248 self._chaininfocache = {}
2247 2249 self._chunkclear()
2248 2250 for x in pycompat.xrange(rev, len(self)):
2249 2251 del self.nodemap[self.node(x)]
2250 2252
2251 2253 del self.index[rev:-1]
2252 2254 self._nodepos = None
2253 2255
2254 2256 def checksize(self):
2255 2257 expected = 0
2256 2258 if len(self):
2257 2259 expected = max(0, self.end(len(self) - 1))
2258 2260
2259 2261 try:
2260 2262 with self._datafp() as f:
2261 2263 f.seek(0, 2)
2262 2264 actual = f.tell()
2263 2265 dd = actual - expected
2264 2266 except IOError as inst:
2265 2267 if inst.errno != errno.ENOENT:
2266 2268 raise
2267 2269 dd = 0
2268 2270
2269 2271 try:
2270 2272 f = self.opener(self.indexfile)
2271 2273 f.seek(0, 2)
2272 2274 actual = f.tell()
2273 2275 f.close()
2274 2276 s = self._io.size
2275 2277 i = max(0, actual // s)
2276 2278 di = actual - (i * s)
2277 2279 if self._inline:
2278 2280 databytes = 0
2279 2281 for r in self:
2280 2282 databytes += max(0, self.length(r))
2281 2283 dd = 0
2282 2284 di = actual - len(self) * s - databytes
2283 2285 except IOError as inst:
2284 2286 if inst.errno != errno.ENOENT:
2285 2287 raise
2286 2288 di = 0
2287 2289
2288 2290 return (dd, di)
2289 2291
2290 2292 def files(self):
2291 2293 res = [self.indexfile]
2292 2294 if not self._inline:
2293 2295 res.append(self.datafile)
2294 2296 return res
2295 2297
2296 2298 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2297 2299 assumehaveparentrevisions=False,
2298 2300 deltamode=repository.CG_DELTAMODE_STD):
2299 2301 if nodesorder not in ('nodes', 'storage', 'linear', None):
2300 2302 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2301 2303 nodesorder)
2302 2304
2303 2305 if nodesorder is None and not self._generaldelta:
2304 2306 nodesorder = 'storage'
2305 2307
2306 2308 if (not self._storedeltachains and
2307 2309 deltamode != repository.CG_DELTAMODE_PREV):
2308 2310 deltamode = repository.CG_DELTAMODE_FULL
2309 2311
2310 2312 return storageutil.emitrevisions(
2311 2313 self, nodes, nodesorder, revlogrevisiondelta,
2312 2314 deltaparentfn=self.deltaparent,
2313 2315 candeltafn=self.candelta,
2314 2316 rawsizefn=self.rawsize,
2315 2317 revdifffn=self.revdiff,
2316 2318 flagsfn=self.flags,
2317 2319 deltamode=deltamode,
2318 2320 revisiondata=revisiondata,
2319 2321 assumehaveparentrevisions=assumehaveparentrevisions)
2320 2322
2321 2323 DELTAREUSEALWAYS = 'always'
2322 2324 DELTAREUSESAMEREVS = 'samerevs'
2323 2325 DELTAREUSENEVER = 'never'
2324 2326
2325 2327 DELTAREUSEFULLADD = 'fulladd'
2326 2328
2327 2329 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2328 2330
2329 2331 def clone(self, tr, destrevlog, addrevisioncb=None,
2330 2332 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2331 2333 """Copy this revlog to another, possibly with format changes.
2332 2334
2333 2335 The destination revlog will contain the same revisions and nodes.
2334 2336 However, it may not be bit-for-bit identical due to e.g. delta encoding
2335 2337 differences.
2336 2338
2337 2339 The ``deltareuse`` argument control how deltas from the existing revlog
2338 2340 are preserved in the destination revlog. The argument can have the
2339 2341 following values:
2340 2342
2341 2343 DELTAREUSEALWAYS
2342 2344 Deltas will always be reused (if possible), even if the destination
2343 2345 revlog would not select the same revisions for the delta. This is the
2344 2346 fastest mode of operation.
2345 2347 DELTAREUSESAMEREVS
2346 2348 Deltas will be reused if the destination revlog would pick the same
2347 2349 revisions for the delta. This mode strikes a balance between speed
2348 2350 and optimization.
2349 2351 DELTAREUSENEVER
2350 2352 Deltas will never be reused. This is the slowest mode of execution.
2351 2353 This mode can be used to recompute deltas (e.g. if the diff/delta
2352 2354 algorithm changes).
2353 2355
2354 2356 Delta computation can be slow, so the choice of delta reuse policy can
2355 2357 significantly affect run time.
2356 2358
2357 2359 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2358 2360 two extremes. Deltas will be reused if they are appropriate. But if the
2359 2361 delta could choose a better revision, it will do so. This means if you
2360 2362 are converting a non-generaldelta revlog to a generaldelta revlog,
2361 2363 deltas will be recomputed if the delta's parent isn't a parent of the
2362 2364 revision.
2363 2365
2364 2366 In addition to the delta policy, the ``forcedeltabothparents``
2365 2367 argument controls whether to force compute deltas against both parents
2366 2368 for merges. By default, the current default is used.
2367 2369 """
2368 2370 if deltareuse not in self.DELTAREUSEALL:
2369 2371 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2370 2372
2371 2373 if len(destrevlog):
2372 2374 raise ValueError(_('destination revlog is not empty'))
2373 2375
2374 2376 if getattr(self, 'filteredrevs', None):
2375 2377 raise ValueError(_('source revlog has filtered revisions'))
2376 2378 if getattr(destrevlog, 'filteredrevs', None):
2377 2379 raise ValueError(_('destination revlog has filtered revisions'))
2378 2380
2379 2381 # lazydeltabase controls whether to reuse a cached delta, if possible.
2380 2382 oldlazydeltabase = destrevlog._lazydeltabase
2381 2383 oldamd = destrevlog._deltabothparents
2382 2384
2383 2385 try:
2384 2386 if deltareuse == self.DELTAREUSEALWAYS:
2385 2387 destrevlog._lazydeltabase = True
2386 2388 elif deltareuse == self.DELTAREUSESAMEREVS:
2387 2389 destrevlog._lazydeltabase = False
2388 2390
2389 2391 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2390 2392
2391 2393 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2392 2394 self.DELTAREUSESAMEREVS)
2393 2395
2394 2396 deltacomputer = deltautil.deltacomputer(destrevlog)
2395 2397 index = self.index
2396 2398 for rev in self:
2397 2399 entry = index[rev]
2398 2400
2399 2401 # Some classes override linkrev to take filtered revs into
2400 2402 # account. Use raw entry from index.
2401 2403 flags = entry[0] & 0xffff
2402 2404 linkrev = entry[4]
2403 2405 p1 = index[entry[5]][7]
2404 2406 p2 = index[entry[6]][7]
2405 2407 node = entry[7]
2406 2408
2407 2409 # (Possibly) reuse the delta from the revlog if allowed and
2408 2410 # the revlog chunk is a delta.
2409 2411 cachedelta = None
2410 2412 rawtext = None
2411 2413 if populatecachedelta:
2412 2414 dp = self.deltaparent(rev)
2413 2415 if dp != nullrev:
2414 2416 cachedelta = (dp, bytes(self._chunk(rev)))
2415 2417
2416 2418 if not cachedelta:
2417 2419 rawtext = self.revision(rev, raw=True)
2418 2420
2419 2421
2420 2422 if deltareuse == self.DELTAREUSEFULLADD:
2421 2423 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2422 2424 cachedelta=cachedelta,
2423 2425 node=node, flags=flags,
2424 2426 deltacomputer=deltacomputer)
2425 2427 else:
2426 2428 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2427 2429 checkambig=False)
2428 2430 dfh = None
2429 2431 if not destrevlog._inline:
2430 2432 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2431 2433 try:
2432 2434 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2433 2435 p2, flags, cachedelta, ifh, dfh,
2434 2436 deltacomputer=deltacomputer)
2435 2437 finally:
2436 2438 if dfh:
2437 2439 dfh.close()
2438 2440 ifh.close()
2439 2441
2440 2442 if addrevisioncb:
2441 2443 addrevisioncb(self, rev, node)
2442 2444 finally:
2443 2445 destrevlog._lazydeltabase = oldlazydeltabase
2444 2446 destrevlog._deltabothparents = oldamd
2445 2447
2446 2448 def censorrevision(self, tr, censornode, tombstone=b''):
2447 2449 if (self.version & 0xFFFF) == REVLOGV0:
2448 2450 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2449 2451 self.version)
2450 2452
2451 2453 censorrev = self.rev(censornode)
2452 2454 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2453 2455
2454 2456 if len(tombstone) > self.rawsize(censorrev):
2455 2457 raise error.Abort(_('censor tombstone must be no longer than '
2456 2458 'censored data'))
2457 2459
2458 2460 # Rewriting the revlog in place is hard. Our strategy for censoring is
2459 2461 # to create a new revlog, copy all revisions to it, then replace the
2460 2462 # revlogs on transaction close.
2461 2463
2462 2464 newindexfile = self.indexfile + b'.tmpcensored'
2463 2465 newdatafile = self.datafile + b'.tmpcensored'
2464 2466
2465 2467 # This is a bit dangerous. We could easily have a mismatch of state.
2466 2468 newrl = revlog(self.opener, newindexfile, newdatafile,
2467 2469 censorable=True)
2468 2470 newrl.version = self.version
2469 2471 newrl._generaldelta = self._generaldelta
2470 2472 newrl._io = self._io
2471 2473
2472 2474 for rev in self.revs():
2473 2475 node = self.node(rev)
2474 2476 p1, p2 = self.parents(node)
2475 2477
2476 2478 if rev == censorrev:
2477 2479 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2478 2480 p1, p2, censornode, REVIDX_ISCENSORED)
2479 2481
2480 2482 if newrl.deltaparent(rev) != nullrev:
2481 2483 raise error.Abort(_('censored revision stored as delta; '
2482 2484 'cannot censor'),
2483 2485 hint=_('censoring of revlogs is not '
2484 2486 'fully implemented; please report '
2485 2487 'this bug'))
2486 2488 continue
2487 2489
2488 2490 if self.iscensored(rev):
2489 2491 if self.deltaparent(rev) != nullrev:
2490 2492 raise error.Abort(_('cannot censor due to censored '
2491 2493 'revision having delta stored'))
2492 2494 rawtext = self._chunk(rev)
2493 2495 else:
2494 2496 rawtext = self.revision(rev, raw=True)
2495 2497
2496 2498 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2497 2499 self.flags(rev))
2498 2500
2499 2501 tr.addbackup(self.indexfile, location='store')
2500 2502 if not self._inline:
2501 2503 tr.addbackup(self.datafile, location='store')
2502 2504
2503 2505 self.opener.rename(newrl.indexfile, self.indexfile)
2504 2506 if not self._inline:
2505 2507 self.opener.rename(newrl.datafile, self.datafile)
2506 2508
2507 2509 self.clearcaches()
2508 2510 self._loadindex()
2509 2511
2510 2512 def verifyintegrity(self, state):
2511 2513 """Verifies the integrity of the revlog.
2512 2514
2513 2515 Yields ``revlogproblem`` instances describing problems that are
2514 2516 found.
2515 2517 """
2516 2518 dd, di = self.checksize()
2517 2519 if dd:
2518 2520 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2519 2521 if di:
2520 2522 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2521 2523
2522 2524 version = self.version & 0xFFFF
2523 2525
2524 2526 # The verifier tells us what version revlog we should be.
2525 2527 if version != state['expectedversion']:
2526 2528 yield revlogproblem(
2527 2529 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2528 2530 (self.indexfile, version, state['expectedversion']))
2529 2531
2530 2532 state['skipread'] = set()
2531 2533
2532 2534 for rev in self:
2533 2535 node = self.node(rev)
2534 2536
2535 2537 # Verify contents. 4 cases to care about:
2536 2538 #
2537 2539 # common: the most common case
2538 2540 # rename: with a rename
2539 2541 # meta: file content starts with b'\1\n', the metadata
2540 2542 # header defined in filelog.py, but without a rename
2541 2543 # ext: content stored externally
2542 2544 #
2543 2545 # More formally, their differences are shown below:
2544 2546 #
2545 2547 # | common | rename | meta | ext
2546 2548 # -------------------------------------------------------
2547 2549 # flags() | 0 | 0 | 0 | not 0
2548 2550 # renamed() | False | True | False | ?
2549 2551 # rawtext[0:2]=='\1\n'| False | True | True | ?
2550 2552 #
2551 2553 # "rawtext" means the raw text stored in revlog data, which
2552 2554 # could be retrieved by "revision(rev, raw=True)". "text"
2553 2555 # mentioned below is "revision(rev, raw=False)".
2554 2556 #
2555 2557 # There are 3 different lengths stored physically:
2556 2558 # 1. L1: rawsize, stored in revlog index
2557 2559 # 2. L2: len(rawtext), stored in revlog data
2558 2560 # 3. L3: len(text), stored in revlog data if flags==0, or
2559 2561 # possibly somewhere else if flags!=0
2560 2562 #
2561 2563 # L1 should be equal to L2. L3 could be different from them.
2562 2564 # "text" may or may not affect commit hash depending on flag
2563 2565 # processors (see revlog.addflagprocessor).
2564 2566 #
2565 2567 # | common | rename | meta | ext
2566 2568 # -------------------------------------------------
2567 2569 # rawsize() | L1 | L1 | L1 | L1
2568 2570 # size() | L1 | L2-LM | L1(*) | L1 (?)
2569 2571 # len(rawtext) | L2 | L2 | L2 | L2
2570 2572 # len(text) | L2 | L2 | L2 | L3
2571 2573 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2572 2574 #
2573 2575 # LM: length of metadata, depending on rawtext
2574 2576 # (*): not ideal, see comment in filelog.size
2575 2577 # (?): could be "- len(meta)" if the resolved content has
2576 2578 # rename metadata
2577 2579 #
2578 2580 # Checks needed to be done:
2579 2581 # 1. length check: L1 == L2, in all cases.
2580 2582 # 2. hash check: depending on flag processor, we may need to
2581 2583 # use either "text" (external), or "rawtext" (in revlog).
2582 2584
2583 2585 try:
2584 2586 skipflags = state.get('skipflags', 0)
2585 2587 if skipflags:
2586 2588 skipflags &= self.flags(rev)
2587 2589
2588 2590 if skipflags:
2589 2591 state['skipread'].add(node)
2590 2592 else:
2591 2593 # Side-effect: read content and verify hash.
2592 2594 self.revision(node)
2593 2595
2594 2596 l1 = self.rawsize(rev)
2595 2597 l2 = len(self.revision(node, raw=True))
2596 2598
2597 2599 if l1 != l2:
2598 2600 yield revlogproblem(
2599 2601 error=_('unpacked size is %d, %d expected') % (l2, l1),
2600 2602 node=node)
2601 2603
2602 2604 except error.CensoredNodeError:
2603 2605 if state['erroroncensored']:
2604 2606 yield revlogproblem(error=_('censored file data'),
2605 2607 node=node)
2606 2608 state['skipread'].add(node)
2607 2609 except Exception as e:
2608 2610 yield revlogproblem(
2609 2611 error=_('unpacking %s: %s') % (short(node),
2610 2612 stringutil.forcebytestr(e)),
2611 2613 node=node)
2612 2614 state['skipread'].add(node)
2613 2615
2614 2616 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2615 2617 revisionscount=False, trackedsize=False,
2616 2618 storedsize=False):
2617 2619 d = {}
2618 2620
2619 2621 if exclusivefiles:
2620 2622 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2621 2623 if not self._inline:
2622 2624 d['exclusivefiles'].append((self.opener, self.datafile))
2623 2625
2624 2626 if sharedfiles:
2625 2627 d['sharedfiles'] = []
2626 2628
2627 2629 if revisionscount:
2628 2630 d['revisionscount'] = len(self)
2629 2631
2630 2632 if trackedsize:
2631 2633 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2632 2634
2633 2635 if storedsize:
2634 2636 d['storedsize'] = sum(self.opener.stat(path).st_size
2635 2637 for path in self.files())
2636 2638
2637 2639 return d
General Comments 0
You need to be logged in to leave comments. Login now