##// END OF EJS Templates
revlog: rename 'self.checkinlinesize' into '_enforceinlinesize'...
Boris Feld -
r35992:9ba1d0c7 default
parent child Browse files
Show More
@@ -1,565 +1,565 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import (
17 17 attr,
18 18 )
19 19
20 20 from . import (
21 21 encoding,
22 22 error,
23 23 revlog,
24 24 util,
25 25 )
26 26
27 27 _defaultextra = {'branch': 'default'}
28 28
29 29 def _string_escape(text):
30 30 """
31 31 >>> from .pycompat import bytechr as chr
32 32 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
33 33 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
34 34 >>> s
35 35 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
36 36 >>> res = _string_escape(s)
37 37 >>> s == util.unescapestr(res)
38 38 True
39 39 """
40 40 # subset of the string_escape codec
41 41 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
42 42 return text.replace('\0', '\\0')
43 43
44 44 def decodeextra(text):
45 45 """
46 46 >>> from .pycompat import bytechr as chr
47 47 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
48 48 ... ).items())
49 49 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
50 50 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
51 51 ... b'baz': chr(92) + chr(0) + b'2'})
52 52 ... ).items())
53 53 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
54 54 """
55 55 extra = _defaultextra.copy()
56 56 for l in text.split('\0'):
57 57 if l:
58 58 if '\\0' in l:
59 59 # fix up \0 without getting into trouble with \\0
60 60 l = l.replace('\\\\', '\\\\\n')
61 61 l = l.replace('\\0', '\0')
62 62 l = l.replace('\n', '')
63 63 k, v = util.unescapestr(l).split(':', 1)
64 64 extra[k] = v
65 65 return extra
66 66
67 67 def encodeextra(d):
68 68 # keys must be sorted to produce a deterministic changelog entry
69 69 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
70 70 return "\0".join(items)
71 71
72 72 def stripdesc(desc):
73 73 """strip trailing whitespace and leading and trailing empty lines"""
74 74 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
75 75
76 76 class appender(object):
77 77 '''the changelog index must be updated last on disk, so we use this class
78 78 to delay writes to it'''
79 79 def __init__(self, vfs, name, mode, buf):
80 80 self.data = buf
81 81 fp = vfs(name, mode)
82 82 self.fp = fp
83 83 self.offset = fp.tell()
84 84 self.size = vfs.fstat(fp).st_size
85 85 self._end = self.size
86 86
87 87 def end(self):
88 88 return self._end
89 89 def tell(self):
90 90 return self.offset
91 91 def flush(self):
92 92 pass
93 93
94 94 @property
95 95 def closed(self):
96 96 return self.fp.closed
97 97
98 98 def close(self):
99 99 self.fp.close()
100 100
101 101 def seek(self, offset, whence=0):
102 102 '''virtual file offset spans real file and data'''
103 103 if whence == 0:
104 104 self.offset = offset
105 105 elif whence == 1:
106 106 self.offset += offset
107 107 elif whence == 2:
108 108 self.offset = self.end() + offset
109 109 if self.offset < self.size:
110 110 self.fp.seek(self.offset)
111 111
112 112 def read(self, count=-1):
113 113 '''only trick here is reads that span real file and data'''
114 114 ret = ""
115 115 if self.offset < self.size:
116 116 s = self.fp.read(count)
117 117 ret = s
118 118 self.offset += len(s)
119 119 if count > 0:
120 120 count -= len(s)
121 121 if count != 0:
122 122 doff = self.offset - self.size
123 123 self.data.insert(0, "".join(self.data))
124 124 del self.data[1:]
125 125 s = self.data[0][doff:doff + count]
126 126 self.offset += len(s)
127 127 ret += s
128 128 return ret
129 129
130 130 def write(self, s):
131 131 self.data.append(bytes(s))
132 132 self.offset += len(s)
133 133 self._end += len(s)
134 134
135 135 def __enter__(self):
136 136 self.fp.__enter__()
137 137 return self
138 138
139 139 def __exit__(self, *args):
140 140 return self.fp.__exit__(*args)
141 141
142 142 def _divertopener(opener, target):
143 143 """build an opener that writes in 'target.a' instead of 'target'"""
144 144 def _divert(name, mode='r', checkambig=False):
145 145 if name != target:
146 146 return opener(name, mode)
147 147 return opener(name + ".a", mode)
148 148 return _divert
149 149
150 150 def _delayopener(opener, target, buf):
151 151 """build an opener that stores chunks in 'buf' instead of 'target'"""
152 152 def _delay(name, mode='r', checkambig=False):
153 153 if name != target:
154 154 return opener(name, mode)
155 155 return appender(opener, name, mode, buf)
156 156 return _delay
157 157
158 158 @attr.s
159 159 class _changelogrevision(object):
160 160 # Extensions might modify _defaultextra, so let the constructor below pass
161 161 # it in
162 162 extra = attr.ib()
163 163 manifest = attr.ib(default=nullid)
164 164 user = attr.ib(default='')
165 165 date = attr.ib(default=(0, 0))
166 166 files = attr.ib(default=attr.Factory(list))
167 167 description = attr.ib(default='')
168 168
169 169 class changelogrevision(object):
170 170 """Holds results of a parsed changelog revision.
171 171
172 172 Changelog revisions consist of multiple pieces of data, including
173 173 the manifest node, user, and date. This object exposes a view into
174 174 the parsed object.
175 175 """
176 176
177 177 __slots__ = (
178 178 u'_offsets',
179 179 u'_text',
180 180 )
181 181
182 182 def __new__(cls, text):
183 183 if not text:
184 184 return _changelogrevision(extra=_defaultextra)
185 185
186 186 self = super(changelogrevision, cls).__new__(cls)
187 187 # We could return here and implement the following as an __init__.
188 188 # But doing it here is equivalent and saves an extra function call.
189 189
190 190 # format used:
191 191 # nodeid\n : manifest node in ascii
192 192 # user\n : user, no \n or \r allowed
193 193 # time tz extra\n : date (time is int or float, timezone is int)
194 194 # : extra is metadata, encoded and separated by '\0'
195 195 # : older versions ignore it
196 196 # files\n\n : files modified by the cset, no \n or \r allowed
197 197 # (.*) : comment (free text, ideally utf-8)
198 198 #
199 199 # changelog v0 doesn't use extra
200 200
201 201 nl1 = text.index('\n')
202 202 nl2 = text.index('\n', nl1 + 1)
203 203 nl3 = text.index('\n', nl2 + 1)
204 204
205 205 # The list of files may be empty. Which means nl3 is the first of the
206 206 # double newline that precedes the description.
207 207 if text[nl3 + 1:nl3 + 2] == '\n':
208 208 doublenl = nl3
209 209 else:
210 210 doublenl = text.index('\n\n', nl3 + 1)
211 211
212 212 self._offsets = (nl1, nl2, nl3, doublenl)
213 213 self._text = text
214 214
215 215 return self
216 216
217 217 @property
218 218 def manifest(self):
219 219 return bin(self._text[0:self._offsets[0]])
220 220
221 221 @property
222 222 def user(self):
223 223 off = self._offsets
224 224 return encoding.tolocal(self._text[off[0] + 1:off[1]])
225 225
226 226 @property
227 227 def _rawdate(self):
228 228 off = self._offsets
229 229 dateextra = self._text[off[1] + 1:off[2]]
230 230 return dateextra.split(' ', 2)[0:2]
231 231
232 232 @property
233 233 def _rawextra(self):
234 234 off = self._offsets
235 235 dateextra = self._text[off[1] + 1:off[2]]
236 236 fields = dateextra.split(' ', 2)
237 237 if len(fields) != 3:
238 238 return None
239 239
240 240 return fields[2]
241 241
242 242 @property
243 243 def date(self):
244 244 raw = self._rawdate
245 245 time = float(raw[0])
246 246 # Various tools did silly things with the timezone.
247 247 try:
248 248 timezone = int(raw[1])
249 249 except ValueError:
250 250 timezone = 0
251 251
252 252 return time, timezone
253 253
254 254 @property
255 255 def extra(self):
256 256 raw = self._rawextra
257 257 if raw is None:
258 258 return _defaultextra
259 259
260 260 return decodeextra(raw)
261 261
262 262 @property
263 263 def files(self):
264 264 off = self._offsets
265 265 if off[2] == off[3]:
266 266 return []
267 267
268 268 return self._text[off[2] + 1:off[3]].split('\n')
269 269
270 270 @property
271 271 def description(self):
272 272 return encoding.tolocal(self._text[self._offsets[3] + 2:])
273 273
274 274 class changelog(revlog.revlog):
275 275 def __init__(self, opener, trypending=False):
276 276 """Load a changelog revlog using an opener.
277 277
278 278 If ``trypending`` is true, we attempt to load the index from a
279 279 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
280 280 The ``00changelog.i.a`` file contains index (and possibly inline
281 281 revision) data for a transaction that hasn't been finalized yet.
282 282 It exists in a separate file to facilitate readers (such as
283 283 hooks processes) accessing data before a transaction is finalized.
284 284 """
285 285 if trypending and opener.exists('00changelog.i.a'):
286 286 indexfile = '00changelog.i.a'
287 287 else:
288 288 indexfile = '00changelog.i'
289 289
290 290 datafile = '00changelog.d'
291 291 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
292 292 checkambig=True, mmaplargeindex=True)
293 293
294 294 if self._initempty:
295 295 # changelogs don't benefit from generaldelta
296 296 self.version &= ~revlog.FLAG_GENERALDELTA
297 297 self._generaldelta = False
298 298
299 299 # Delta chains for changelogs tend to be very small because entries
300 300 # tend to be small and don't delta well with each. So disable delta
301 301 # chains.
302 302 self.storedeltachains = False
303 303
304 304 self._realopener = opener
305 305 self._delayed = False
306 306 self._delaybuf = None
307 307 self._divert = False
308 308 self.filteredrevs = frozenset()
309 309
310 310 def tiprev(self):
311 311 for i in xrange(len(self) -1, -2, -1):
312 312 if i not in self.filteredrevs:
313 313 return i
314 314
315 315 def tip(self):
316 316 """filtered version of revlog.tip"""
317 317 return self.node(self.tiprev())
318 318
319 319 def __contains__(self, rev):
320 320 """filtered version of revlog.__contains__"""
321 321 return (0 <= rev < len(self)
322 322 and rev not in self.filteredrevs)
323 323
324 324 def __iter__(self):
325 325 """filtered version of revlog.__iter__"""
326 326 if len(self.filteredrevs) == 0:
327 327 return revlog.revlog.__iter__(self)
328 328
329 329 def filterediter():
330 330 for i in xrange(len(self)):
331 331 if i not in self.filteredrevs:
332 332 yield i
333 333
334 334 return filterediter()
335 335
336 336 def revs(self, start=0, stop=None):
337 337 """filtered version of revlog.revs"""
338 338 for i in super(changelog, self).revs(start, stop):
339 339 if i not in self.filteredrevs:
340 340 yield i
341 341
342 342 @util.propertycache
343 343 def nodemap(self):
344 344 # XXX need filtering too
345 345 self.rev(self.node(0))
346 346 return self._nodecache
347 347
348 348 def reachableroots(self, minroot, heads, roots, includepath=False):
349 349 return self.index.reachableroots2(minroot, heads, roots, includepath)
350 350
351 351 def headrevs(self):
352 352 if self.filteredrevs:
353 353 try:
354 354 return self.index.headrevsfiltered(self.filteredrevs)
355 355 # AttributeError covers non-c-extension environments and
356 356 # old c extensions without filter handling.
357 357 except AttributeError:
358 358 return self._headrevs()
359 359
360 360 return super(changelog, self).headrevs()
361 361
362 362 def strip(self, *args, **kwargs):
363 363 # XXX make something better than assert
364 364 # We can't expect proper strip behavior if we are filtered.
365 365 assert not self.filteredrevs
366 366 super(changelog, self).strip(*args, **kwargs)
367 367
368 368 def rev(self, node):
369 369 """filtered version of revlog.rev"""
370 370 r = super(changelog, self).rev(node)
371 371 if r in self.filteredrevs:
372 372 raise error.FilteredLookupError(hex(node), self.indexfile,
373 373 _('filtered node'))
374 374 return r
375 375
376 376 def node(self, rev):
377 377 """filtered version of revlog.node"""
378 378 if rev in self.filteredrevs:
379 379 raise error.FilteredIndexError(rev)
380 380 return super(changelog, self).node(rev)
381 381
382 382 def linkrev(self, rev):
383 383 """filtered version of revlog.linkrev"""
384 384 if rev in self.filteredrevs:
385 385 raise error.FilteredIndexError(rev)
386 386 return super(changelog, self).linkrev(rev)
387 387
388 388 def parentrevs(self, rev):
389 389 """filtered version of revlog.parentrevs"""
390 390 if rev in self.filteredrevs:
391 391 raise error.FilteredIndexError(rev)
392 392 return super(changelog, self).parentrevs(rev)
393 393
394 394 def flags(self, rev):
395 395 """filtered version of revlog.flags"""
396 396 if rev in self.filteredrevs:
397 397 raise error.FilteredIndexError(rev)
398 398 return super(changelog, self).flags(rev)
399 399
400 400 def delayupdate(self, tr):
401 401 "delay visibility of index updates to other readers"
402 402
403 403 if not self._delayed:
404 404 if len(self) == 0:
405 405 self._divert = True
406 406 if self._realopener.exists(self.indexfile + '.a'):
407 407 self._realopener.unlink(self.indexfile + '.a')
408 408 self.opener = _divertopener(self._realopener, self.indexfile)
409 409 else:
410 410 self._delaybuf = []
411 411 self.opener = _delayopener(self._realopener, self.indexfile,
412 412 self._delaybuf)
413 413 self._delayed = True
414 414 tr.addpending('cl-%i' % id(self), self._writepending)
415 415 tr.addfinalize('cl-%i' % id(self), self._finalize)
416 416
417 417 def _finalize(self, tr):
418 418 "finalize index updates"
419 419 self._delayed = False
420 420 self.opener = self._realopener
421 421 # move redirected index data back into place
422 422 if self._divert:
423 423 assert not self._delaybuf
424 424 tmpname = self.indexfile + ".a"
425 425 nfile = self.opener.open(tmpname)
426 426 nfile.close()
427 427 self.opener.rename(tmpname, self.indexfile, checkambig=True)
428 428 elif self._delaybuf:
429 429 fp = self.opener(self.indexfile, 'a', checkambig=True)
430 430 fp.write("".join(self._delaybuf))
431 431 fp.close()
432 432 self._delaybuf = None
433 433 self._divert = False
434 434 # split when we're done
435 self.checkinlinesize(tr)
435 self._enforceinlinesize(tr)
436 436
437 437 def _writepending(self, tr):
438 438 "create a file containing the unfinalized state for pretxnchangegroup"
439 439 if self._delaybuf:
440 440 # make a temporary copy of the index
441 441 fp1 = self._realopener(self.indexfile)
442 442 pendingfilename = self.indexfile + ".a"
443 443 # register as a temp file to ensure cleanup on failure
444 444 tr.registertmp(pendingfilename)
445 445 # write existing data
446 446 fp2 = self._realopener(pendingfilename, "w")
447 447 fp2.write(fp1.read())
448 448 # add pending data
449 449 fp2.write("".join(self._delaybuf))
450 450 fp2.close()
451 451 # switch modes so finalize can simply rename
452 452 self._delaybuf = None
453 453 self._divert = True
454 454 self.opener = _divertopener(self._realopener, self.indexfile)
455 455
456 456 if self._divert:
457 457 return True
458 458
459 459 return False
460 460
461 def checkinlinesize(self, tr, fp=None):
461 def _enforceinlinesize(self, tr, fp=None):
462 462 if not self._delayed:
463 revlog.revlog.checkinlinesize(self, tr, fp)
463 revlog.revlog._enforceinlinesize(self, tr, fp)
464 464
465 465 def read(self, node):
466 466 """Obtain data from a parsed changelog revision.
467 467
468 468 Returns a 6-tuple of:
469 469
470 470 - manifest node in binary
471 471 - author/user as a localstr
472 472 - date as a 2-tuple of (time, timezone)
473 473 - list of files
474 474 - commit message as a localstr
475 475 - dict of extra metadata
476 476
477 477 Unless you need to access all fields, consider calling
478 478 ``changelogrevision`` instead, as it is faster for partial object
479 479 access.
480 480 """
481 481 c = changelogrevision(self.revision(node))
482 482 return (
483 483 c.manifest,
484 484 c.user,
485 485 c.date,
486 486 c.files,
487 487 c.description,
488 488 c.extra
489 489 )
490 490
491 491 def changelogrevision(self, nodeorrev):
492 492 """Obtain a ``changelogrevision`` for a node or revision."""
493 493 return changelogrevision(self.revision(nodeorrev))
494 494
495 495 def readfiles(self, node):
496 496 """
497 497 short version of read that only returns the files modified by the cset
498 498 """
499 499 text = self.revision(node)
500 500 if not text:
501 501 return []
502 502 last = text.index("\n\n")
503 503 l = text[:last].split('\n')
504 504 return l[3:]
505 505
506 506 def add(self, manifest, files, desc, transaction, p1, p2,
507 507 user, date=None, extra=None):
508 508 # Convert to UTF-8 encoded bytestrings as the very first
509 509 # thing: calling any method on a localstr object will turn it
510 510 # into a str object and the cached UTF-8 string is thus lost.
511 511 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
512 512
513 513 user = user.strip()
514 514 # An empty username or a username with a "\n" will make the
515 515 # revision text contain two "\n\n" sequences -> corrupt
516 516 # repository since read cannot unpack the revision.
517 517 if not user:
518 518 raise error.RevlogError(_("empty username"))
519 519 if "\n" in user:
520 520 raise error.RevlogError(_("username %s contains a newline")
521 521 % repr(user))
522 522
523 523 desc = stripdesc(desc)
524 524
525 525 if date:
526 526 parseddate = "%d %d" % util.parsedate(date)
527 527 else:
528 528 parseddate = "%d %d" % util.makedate()
529 529 if extra:
530 530 branch = extra.get("branch")
531 531 if branch in ("default", ""):
532 532 del extra["branch"]
533 533 elif branch in (".", "null", "tip"):
534 534 raise error.RevlogError(_('the name \'%s\' is reserved')
535 535 % branch)
536 536 if extra:
537 537 extra = encodeextra(extra)
538 538 parseddate = "%s %s" % (parseddate, extra)
539 539 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
540 540 text = "\n".join(l)
541 541 return self.addrevision(text, transaction, len(self), p1, p2)
542 542
543 543 def branchinfo(self, rev):
544 544 """return the branch name and open/close state of a revision
545 545
546 546 This function exists because creating a changectx object
547 547 just to access this is costly."""
548 548 extra = self.read(rev)[5]
549 549 return encoding.tolocal(extra.get("branch")), 'close' in extra
550 550
551 551 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
552 552 # overlay over the standard revlog._addrevision to track the new
553 553 # revision on the transaction.
554 554 rev = len(self)
555 555 node = super(changelog, self)._addrevision(node, rawtext, transaction,
556 556 *args, **kwargs)
557 557 revs = transaction.changes.get('revs')
558 558 if revs is not None:
559 559 if revs:
560 560 assert revs[-1] + 1 == rev
561 561 revs = xrange(revs[0], rev + 1)
562 562 else:
563 563 revs = xrange(rev, rev + 1)
564 564 transaction.changes['revs'] = revs
565 565 return node
@@ -1,2501 +1,2501 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import hashlib
21 21 import heapq
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .thirdparty import (
38 38 attr,
39 39 )
40 40 from . import (
41 41 ancestor,
42 42 error,
43 43 mdiff,
44 44 policy,
45 45 pycompat,
46 46 templatefilters,
47 47 util,
48 48 )
49 49
50 50 parsers = policy.importmod(r'parsers')
51 51
52 52 # Aliased for performance.
53 53 _zlibdecompress = zlib.decompress
54 54
55 55 # revlog header flags
56 56 REVLOGV0 = 0
57 57 REVLOGV1 = 1
58 58 # Dummy value until file format is finalized.
59 59 # Reminder: change the bounds check in revlog.__init__ when this is changed.
60 60 REVLOGV2 = 0xDEAD
61 61 FLAG_INLINE_DATA = (1 << 16)
62 62 FLAG_GENERALDELTA = (1 << 17)
63 63 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
64 64 REVLOG_DEFAULT_FORMAT = REVLOGV1
65 65 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
66 66 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
67 67 REVLOGV2_FLAGS = REVLOGV1_FLAGS
68 68
69 69 # revlog index flags
70 70 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
71 71 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
72 72 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
73 73 REVIDX_DEFAULT_FLAGS = 0
74 74 # stable order in which flags need to be processed and their processors applied
75 75 REVIDX_FLAGS_ORDER = [
76 76 REVIDX_ISCENSORED,
77 77 REVIDX_ELLIPSIS,
78 78 REVIDX_EXTSTORED,
79 79 ]
80 80 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
81 81
82 82 # max size of revlog with inline data
83 83 _maxinline = 131072
84 84 _chunksize = 1048576
85 85
86 86 RevlogError = error.RevlogError
87 87 LookupError = error.LookupError
88 88 CensoredNodeError = error.CensoredNodeError
89 89 ProgrammingError = error.ProgrammingError
90 90
91 91 # Store flag processors (cf. 'addflagprocessor()' to register)
92 92 _flagprocessors = {
93 93 REVIDX_ISCENSORED: None,
94 94 }
95 95
96 96 def addflagprocessor(flag, processor):
97 97 """Register a flag processor on a revision data flag.
98 98
99 99 Invariant:
100 100 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
101 101 - Only one flag processor can be registered on a specific flag.
102 102 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
103 103 following signatures:
104 104 - (read) f(self, rawtext) -> text, bool
105 105 - (write) f(self, text) -> rawtext, bool
106 106 - (raw) f(self, rawtext) -> bool
107 107 "text" is presented to the user. "rawtext" is stored in revlog data, not
108 108 directly visible to the user.
109 109 The boolean returned by these transforms is used to determine whether
110 110 the returned text can be used for hash integrity checking. For example,
111 111 if "write" returns False, then "text" is used to generate hash. If
112 112 "write" returns True, that basically means "rawtext" returned by "write"
113 113 should be used to generate hash. Usually, "write" and "read" return
114 114 different booleans. And "raw" returns a same boolean as "write".
115 115
116 116 Note: The 'raw' transform is used for changegroup generation and in some
117 117 debug commands. In this case the transform only indicates whether the
118 118 contents can be used for hash integrity checks.
119 119 """
120 120 if not flag & REVIDX_KNOWN_FLAGS:
121 121 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
122 122 raise ProgrammingError(msg)
123 123 if flag not in REVIDX_FLAGS_ORDER:
124 124 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
125 125 raise ProgrammingError(msg)
126 126 if flag in _flagprocessors:
127 127 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
128 128 raise error.Abort(msg)
129 129 _flagprocessors[flag] = processor
130 130
131 131 def getoffset(q):
132 132 return int(q >> 16)
133 133
134 134 def gettype(q):
135 135 return int(q & 0xFFFF)
136 136
137 137 def offset_type(offset, type):
138 138 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
139 139 raise ValueError('unknown revlog index flags')
140 140 return int(int(offset) << 16 | type)
141 141
142 142 _nullhash = hashlib.sha1(nullid)
143 143
144 144 def hash(text, p1, p2):
145 145 """generate a hash from the given text and its parent hashes
146 146
147 147 This hash combines both the current file contents and its history
148 148 in a manner that makes it easy to distinguish nodes with the same
149 149 content in the revision graph.
150 150 """
151 151 # As of now, if one of the parent node is null, p2 is null
152 152 if p2 == nullid:
153 153 # deep copy of a hash is faster than creating one
154 154 s = _nullhash.copy()
155 155 s.update(p1)
156 156 else:
157 157 # none of the parent nodes are nullid
158 158 if p1 < p2:
159 159 a = p1
160 160 b = p2
161 161 else:
162 162 a = p2
163 163 b = p1
164 164 s = hashlib.sha1(a)
165 165 s.update(b)
166 166 s.update(text)
167 167 return s.digest()
168 168
169 169 def _trimchunk(revlog, revs, startidx, endidx=None):
170 170 """returns revs[startidx:endidx] without empty trailing revs
171 171 """
172 172 length = revlog.length
173 173
174 174 if endidx is None:
175 175 endidx = len(revs)
176 176
177 177 # Trim empty revs at the end, but never the very first revision of a chain
178 178 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
179 179 endidx -= 1
180 180
181 181 return revs[startidx:endidx]
182 182
183 183 def _slicechunk(revlog, revs):
184 184 """slice revs to reduce the amount of unrelated data to be read from disk.
185 185
186 186 ``revs`` is sliced into groups that should be read in one time.
187 187 Assume that revs are sorted.
188 188 """
189 189 start = revlog.start
190 190 length = revlog.length
191 191
192 192 if len(revs) <= 1:
193 193 yield revs
194 194 return
195 195
196 196 startbyte = start(revs[0])
197 197 endbyte = start(revs[-1]) + length(revs[-1])
198 198 readdata = deltachainspan = endbyte - startbyte
199 199
200 200 chainpayload = sum(length(r) for r in revs)
201 201
202 202 if deltachainspan:
203 203 density = chainpayload / float(deltachainspan)
204 204 else:
205 205 density = 1.0
206 206
207 207 # Store the gaps in a heap to have them sorted by decreasing size
208 208 gapsheap = []
209 209 heapq.heapify(gapsheap)
210 210 prevend = None
211 211 for i, rev in enumerate(revs):
212 212 revstart = start(rev)
213 213 revlen = length(rev)
214 214
215 215 # Skip empty revisions to form larger holes
216 216 if revlen == 0:
217 217 continue
218 218
219 219 if prevend is not None:
220 220 gapsize = revstart - prevend
221 221 # only consider holes that are large enough
222 222 if gapsize > revlog._srmingapsize:
223 223 heapq.heappush(gapsheap, (-gapsize, i))
224 224
225 225 prevend = revstart + revlen
226 226
227 227 # Collect the indices of the largest holes until the density is acceptable
228 228 indicesheap = []
229 229 heapq.heapify(indicesheap)
230 230 while gapsheap and density < revlog._srdensitythreshold:
231 231 oppgapsize, gapidx = heapq.heappop(gapsheap)
232 232
233 233 heapq.heappush(indicesheap, gapidx)
234 234
235 235 # the gap sizes are stored as negatives to be sorted decreasingly
236 236 # by the heap
237 237 readdata -= (-oppgapsize)
238 238 if readdata > 0:
239 239 density = chainpayload / float(readdata)
240 240 else:
241 241 density = 1.0
242 242
243 243 # Cut the revs at collected indices
244 244 previdx = 0
245 245 while indicesheap:
246 246 idx = heapq.heappop(indicesheap)
247 247
248 248 chunk = _trimchunk(revlog, revs, previdx, idx)
249 249 if chunk:
250 250 yield chunk
251 251
252 252 previdx = idx
253 253
254 254 chunk = _trimchunk(revlog, revs, previdx)
255 255 if chunk:
256 256 yield chunk
257 257
258 258 @attr.s(slots=True, frozen=True)
259 259 class _deltainfo(object):
260 260 distance = attr.ib()
261 261 deltalen = attr.ib()
262 262 data = attr.ib()
263 263 base = attr.ib()
264 264 chainbase = attr.ib()
265 265 chainlen = attr.ib()
266 266 compresseddeltalen = attr.ib()
267 267
268 268 class _deltacomputer(object):
269 269 def __init__(self, revlog):
270 270 self.revlog = revlog
271 271
272 272 def _getcandidaterevs(self, p1, p2, cachedelta):
273 273 """
274 274 Provides revisions that present an interest to be diffed against,
275 275 grouped by level of easiness.
276 276 """
277 277 revlog = self.revlog
278 278 curr = len(revlog)
279 279 prev = curr - 1
280 280 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
281 281
282 282 # should we try to build a delta?
283 283 if prev != nullrev and revlog.storedeltachains:
284 284 tested = set()
285 285 # This condition is true most of the time when processing
286 286 # changegroup data into a generaldelta repo. The only time it
287 287 # isn't true is if this is the first revision in a delta chain
288 288 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
289 289 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
290 290 # Assume what we received from the server is a good choice
291 291 # build delta will reuse the cache
292 292 yield (cachedelta[0],)
293 293 tested.add(cachedelta[0])
294 294
295 295 if revlog._generaldelta:
296 296 # exclude already lazy tested base if any
297 297 parents = [p for p in (p1r, p2r)
298 298 if p != nullrev and p not in tested]
299 299 if parents and not revlog._aggressivemergedeltas:
300 300 # Pick whichever parent is closer to us (to minimize the
301 301 # chance of having to build a fulltext).
302 302 parents = [max(parents)]
303 303 tested.update(parents)
304 304 yield parents
305 305
306 306 if prev not in tested:
307 307 # other approach failed try against prev to hopefully save us a
308 308 # fulltext.
309 309 yield (prev,)
310 310
311 311 def buildtext(self, revinfo, fh):
312 312 """Builds a fulltext version of a revision
313 313
314 314 revinfo: _revisioninfo instance that contains all needed info
315 315 fh: file handle to either the .i or the .d revlog file,
316 316 depending on whether it is inlined or not
317 317 """
318 318 btext = revinfo.btext
319 319 if btext[0] is not None:
320 320 return btext[0]
321 321
322 322 revlog = self.revlog
323 323 cachedelta = revinfo.cachedelta
324 324 flags = revinfo.flags
325 325 node = revinfo.node
326 326
327 327 baserev = cachedelta[0]
328 328 delta = cachedelta[1]
329 329 # special case deltas which replace entire base; no need to decode
330 330 # base revision. this neatly avoids censored bases, which throw when
331 331 # they're decoded.
332 332 hlen = struct.calcsize(">lll")
333 333 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
334 334 len(delta) - hlen):
335 335 btext[0] = delta[hlen:]
336 336 else:
337 337 basetext = revlog.revision(baserev, _df=fh, raw=True)
338 338 btext[0] = mdiff.patch(basetext, delta)
339 339
340 340 try:
341 341 res = revlog._processflags(btext[0], flags, 'read', raw=True)
342 342 btext[0], validatehash = res
343 343 if validatehash:
344 344 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
345 345 if flags & REVIDX_ISCENSORED:
346 346 raise RevlogError(_('node %s is not censored') % node)
347 347 except CensoredNodeError:
348 348 # must pass the censored index flag to add censored revisions
349 349 if not flags & REVIDX_ISCENSORED:
350 350 raise
351 351 return btext[0]
352 352
353 353 def _builddeltadiff(self, base, revinfo, fh):
354 354 revlog = self.revlog
355 355 t = self.buildtext(revinfo, fh)
356 356 if revlog.iscensored(base):
357 357 # deltas based on a censored revision must replace the
358 358 # full content in one patch, so delta works everywhere
359 359 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
360 360 delta = header + t
361 361 else:
362 362 ptext = revlog.revision(base, _df=fh, raw=True)
363 363 delta = mdiff.textdiff(ptext, t)
364 364
365 365 return delta
366 366
367 367 def _builddeltainfo(self, revinfo, base, fh):
368 368 # can we use the cached delta?
369 369 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
370 370 delta = revinfo.cachedelta[1]
371 371 else:
372 372 delta = self._builddeltadiff(base, revinfo, fh)
373 373 revlog = self.revlog
374 374 header, data = revlog.compress(delta)
375 375 deltalen = len(header) + len(data)
376 376 chainbase = revlog.chainbase(base)
377 377 offset = revlog.end(len(revlog) - 1)
378 378 dist = deltalen + offset - revlog.start(chainbase)
379 379 if revlog._generaldelta:
380 380 deltabase = base
381 381 else:
382 382 deltabase = chainbase
383 383 chainlen, compresseddeltalen = revlog._chaininfo(base)
384 384 chainlen += 1
385 385 compresseddeltalen += deltalen
386 386 return _deltainfo(dist, deltalen, (header, data), deltabase,
387 387 chainbase, chainlen, compresseddeltalen)
388 388
389 389 def finddeltainfo(self, revinfo, fh):
390 390 """Find an acceptable delta against a candidate revision
391 391
392 392 revinfo: information about the revision (instance of _revisioninfo)
393 393 fh: file handle to either the .i or the .d revlog file,
394 394 depending on whether it is inlined or not
395 395
396 396 Returns the first acceptable candidate revision, as ordered by
397 397 _getcandidaterevs
398 398 """
399 399 cachedelta = revinfo.cachedelta
400 400 p1 = revinfo.p1
401 401 p2 = revinfo.p2
402 402 revlog = self.revlog
403 403
404 404 deltainfo = None
405 405 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
406 406 nominateddeltas = []
407 407 for candidaterev in candidaterevs:
408 408 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
409 409 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
410 410 nominateddeltas.append(candidatedelta)
411 411 if nominateddeltas:
412 412 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
413 413 break
414 414
415 415 return deltainfo
416 416
417 417 @attr.s(slots=True, frozen=True)
418 418 class _revisioninfo(object):
419 419 """Information about a revision that allows building its fulltext
420 420 node: expected hash of the revision
421 421 p1, p2: parent revs of the revision
422 422 btext: built text cache consisting of a one-element list
423 423 cachedelta: (baserev, uncompressed_delta) or None
424 424 flags: flags associated to the revision storage
425 425
426 426 One of btext[0] or cachedelta must be set.
427 427 """
428 428 node = attr.ib()
429 429 p1 = attr.ib()
430 430 p2 = attr.ib()
431 431 btext = attr.ib()
432 432 textlen = attr.ib()
433 433 cachedelta = attr.ib()
434 434 flags = attr.ib()
435 435
436 436 # index v0:
437 437 # 4 bytes: offset
438 438 # 4 bytes: compressed length
439 439 # 4 bytes: base rev
440 440 # 4 bytes: link rev
441 441 # 20 bytes: parent 1 nodeid
442 442 # 20 bytes: parent 2 nodeid
443 443 # 20 bytes: nodeid
444 444 indexformatv0 = struct.Struct(">4l20s20s20s")
445 445 indexformatv0_pack = indexformatv0.pack
446 446 indexformatv0_unpack = indexformatv0.unpack
447 447
448 448 class revlogoldio(object):
449 449 def __init__(self):
450 450 self.size = indexformatv0.size
451 451
452 452 def parseindex(self, data, inline):
453 453 s = self.size
454 454 index = []
455 455 nodemap = {nullid: nullrev}
456 456 n = off = 0
457 457 l = len(data)
458 458 while off + s <= l:
459 459 cur = data[off:off + s]
460 460 off += s
461 461 e = indexformatv0_unpack(cur)
462 462 # transform to revlogv1 format
463 463 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
464 464 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
465 465 index.append(e2)
466 466 nodemap[e[6]] = n
467 467 n += 1
468 468
469 469 # add the magic null revision at -1
470 470 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
471 471
472 472 return index, nodemap, None
473 473
474 474 def packentry(self, entry, node, version, rev):
475 475 if gettype(entry[0]):
476 476 raise RevlogError(_('index entry flags need revlog version 1'))
477 477 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
478 478 node(entry[5]), node(entry[6]), entry[7])
479 479 return indexformatv0_pack(*e2)
480 480
481 481 # index ng:
482 482 # 6 bytes: offset
483 483 # 2 bytes: flags
484 484 # 4 bytes: compressed length
485 485 # 4 bytes: uncompressed length
486 486 # 4 bytes: base rev
487 487 # 4 bytes: link rev
488 488 # 4 bytes: parent 1 rev
489 489 # 4 bytes: parent 2 rev
490 490 # 32 bytes: nodeid
491 491 indexformatng = struct.Struct(">Qiiiiii20s12x")
492 492 indexformatng_pack = indexformatng.pack
493 493 versionformat = struct.Struct(">I")
494 494 versionformat_pack = versionformat.pack
495 495 versionformat_unpack = versionformat.unpack
496 496
497 497 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
498 498 # signed integer)
499 499 _maxentrysize = 0x7fffffff
500 500
501 501 class revlogio(object):
502 502 def __init__(self):
503 503 self.size = indexformatng.size
504 504
505 505 def parseindex(self, data, inline):
506 506 # call the C implementation to parse the index data
507 507 index, cache = parsers.parse_index2(data, inline)
508 508 return index, getattr(index, 'nodemap', None), cache
509 509
510 510 def packentry(self, entry, node, version, rev):
511 511 p = indexformatng_pack(*entry)
512 512 if rev == 0:
513 513 p = versionformat_pack(version) + p[4:]
514 514 return p
515 515
516 516 class revlog(object):
517 517 """
518 518 the underlying revision storage object
519 519
520 520 A revlog consists of two parts, an index and the revision data.
521 521
522 522 The index is a file with a fixed record size containing
523 523 information on each revision, including its nodeid (hash), the
524 524 nodeids of its parents, the position and offset of its data within
525 525 the data file, and the revision it's based on. Finally, each entry
526 526 contains a linkrev entry that can serve as a pointer to external
527 527 data.
528 528
529 529 The revision data itself is a linear collection of data chunks.
530 530 Each chunk represents a revision and is usually represented as a
531 531 delta against the previous chunk. To bound lookup time, runs of
532 532 deltas are limited to about 2 times the length of the original
533 533 version data. This makes retrieval of a version proportional to
534 534 its size, or O(1) relative to the number of revisions.
535 535
536 536 Both pieces of the revlog are written to in an append-only
537 537 fashion, which means we never need to rewrite a file to insert or
538 538 remove data, and can use some simple techniques to avoid the need
539 539 for locking while reading.
540 540
541 541 If checkambig, indexfile is opened with checkambig=True at
542 542 writing, to avoid file stat ambiguity.
543 543
544 544 If mmaplargeindex is True, and an mmapindexthreshold is set, the
545 545 index will be mmapped rather than read if it is larger than the
546 546 configured threshold.
547 547 """
548 548 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
549 549 mmaplargeindex=False):
550 550 """
551 551 create a revlog object
552 552
553 553 opener is a function that abstracts the file opening operation
554 554 and can be used to implement COW semantics or the like.
555 555 """
556 556 self.indexfile = indexfile
557 557 self.datafile = datafile or (indexfile[:-2] + ".d")
558 558 self.opener = opener
559 559 # When True, indexfile is opened with checkambig=True at writing, to
560 560 # avoid file stat ambiguity.
561 561 self._checkambig = checkambig
562 562 # 3-tuple of (node, rev, text) for a raw revision.
563 563 self._cache = None
564 564 # Maps rev to chain base rev.
565 565 self._chainbasecache = util.lrucachedict(100)
566 566 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
567 567 self._chunkcache = (0, '')
568 568 # How much data to read and cache into the raw revlog data cache.
569 569 self._chunkcachesize = 65536
570 570 self._maxchainlen = None
571 571 self._aggressivemergedeltas = False
572 572 self.index = []
573 573 # Mapping of partial identifiers to full nodes.
574 574 self._pcache = {}
575 575 # Mapping of revision integer to full node.
576 576 self._nodecache = {nullid: nullrev}
577 577 self._nodepos = None
578 578 self._compengine = 'zlib'
579 579 self._maxdeltachainspan = -1
580 580 self._withsparseread = False
581 581 self._srdensitythreshold = 0.25
582 582 self._srmingapsize = 262144
583 583
584 584 mmapindexthreshold = None
585 585 v = REVLOG_DEFAULT_VERSION
586 586 opts = getattr(opener, 'options', None)
587 587 if opts is not None:
588 588 if 'revlogv2' in opts:
589 589 # version 2 revlogs always use generaldelta.
590 590 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
591 591 elif 'revlogv1' in opts:
592 592 if 'generaldelta' in opts:
593 593 v |= FLAG_GENERALDELTA
594 594 else:
595 595 v = 0
596 596 if 'chunkcachesize' in opts:
597 597 self._chunkcachesize = opts['chunkcachesize']
598 598 if 'maxchainlen' in opts:
599 599 self._maxchainlen = opts['maxchainlen']
600 600 if 'aggressivemergedeltas' in opts:
601 601 self._aggressivemergedeltas = opts['aggressivemergedeltas']
602 602 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
603 603 if 'compengine' in opts:
604 604 self._compengine = opts['compengine']
605 605 if 'maxdeltachainspan' in opts:
606 606 self._maxdeltachainspan = opts['maxdeltachainspan']
607 607 if mmaplargeindex and 'mmapindexthreshold' in opts:
608 608 mmapindexthreshold = opts['mmapindexthreshold']
609 609 self._withsparseread = bool(opts.get('with-sparse-read', False))
610 610 if 'sparse-read-density-threshold' in opts:
611 611 self._srdensitythreshold = opts['sparse-read-density-threshold']
612 612 if 'sparse-read-min-gap-size' in opts:
613 613 self._srmingapsize = opts['sparse-read-min-gap-size']
614 614
615 615 if self._chunkcachesize <= 0:
616 616 raise RevlogError(_('revlog chunk cache size %r is not greater '
617 617 'than 0') % self._chunkcachesize)
618 618 elif self._chunkcachesize & (self._chunkcachesize - 1):
619 619 raise RevlogError(_('revlog chunk cache size %r is not a power '
620 620 'of 2') % self._chunkcachesize)
621 621
622 622 indexdata = ''
623 623 self._initempty = True
624 624 try:
625 625 with self._indexfp() as f:
626 626 if (mmapindexthreshold is not None and
627 627 self.opener.fstat(f).st_size >= mmapindexthreshold):
628 628 indexdata = util.buffer(util.mmapread(f))
629 629 else:
630 630 indexdata = f.read()
631 631 if len(indexdata) > 0:
632 632 v = versionformat_unpack(indexdata[:4])[0]
633 633 self._initempty = False
634 634 except IOError as inst:
635 635 if inst.errno != errno.ENOENT:
636 636 raise
637 637
638 638 self.version = v
639 639 self._inline = v & FLAG_INLINE_DATA
640 640 self._generaldelta = v & FLAG_GENERALDELTA
641 641 flags = v & ~0xFFFF
642 642 fmt = v & 0xFFFF
643 643 if fmt == REVLOGV0:
644 644 if flags:
645 645 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 646 'revlog %s') %
647 647 (flags >> 16, fmt, self.indexfile))
648 648 elif fmt == REVLOGV1:
649 649 if flags & ~REVLOGV1_FLAGS:
650 650 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 651 'revlog %s') %
652 652 (flags >> 16, fmt, self.indexfile))
653 653 elif fmt == REVLOGV2:
654 654 if flags & ~REVLOGV2_FLAGS:
655 655 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 656 'revlog %s') %
657 657 (flags >> 16, fmt, self.indexfile))
658 658 else:
659 659 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 660 (fmt, self.indexfile))
661 661
662 662 self.storedeltachains = True
663 663
664 664 self._io = revlogio()
665 665 if self.version == REVLOGV0:
666 666 self._io = revlogoldio()
667 667 try:
668 668 d = self._io.parseindex(indexdata, self._inline)
669 669 except (ValueError, IndexError):
670 670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 671 self.index, nodemap, self._chunkcache = d
672 672 if nodemap is not None:
673 673 self.nodemap = self._nodecache = nodemap
674 674 if not self._chunkcache:
675 675 self._chunkclear()
676 676 # revnum -> (chain-length, sum-delta-length)
677 677 self._chaininfocache = {}
678 678 # revlog header -> revlog compressor
679 679 self._decompressors = {}
680 680
681 681 @util.propertycache
682 682 def _compressor(self):
683 683 return util.compengines[self._compengine].revlogcompressor()
684 684
685 685 def _indexfp(self, mode='r'):
686 686 """file object for the revlog's index file"""
687 687 args = {r'mode': mode}
688 688 if mode != 'r':
689 689 args[r'checkambig'] = self._checkambig
690 690 if mode == 'w':
691 691 args[r'atomictemp'] = True
692 692 return self.opener(self.indexfile, **args)
693 693
694 694 def _datafp(self, mode='r'):
695 695 """file object for the revlog's data file"""
696 696 return self.opener(self.datafile, mode=mode)
697 697
698 698 @contextlib.contextmanager
699 699 def _datareadfp(self, existingfp=None):
700 700 """file object suitable to read data"""
701 701 if existingfp is not None:
702 702 yield existingfp
703 703 else:
704 704 if self._inline:
705 705 func = self._indexfp
706 706 else:
707 707 func = self._datafp
708 708 with func() as fp:
709 709 yield fp
710 710
711 711 def tip(self):
712 712 return self.node(len(self.index) - 2)
713 713 def __contains__(self, rev):
714 714 return 0 <= rev < len(self)
715 715 def __len__(self):
716 716 return len(self.index) - 1
717 717 def __iter__(self):
718 718 return iter(xrange(len(self)))
719 719 def revs(self, start=0, stop=None):
720 720 """iterate over all rev in this revlog (from start to stop)"""
721 721 step = 1
722 722 if stop is not None:
723 723 if start > stop:
724 724 step = -1
725 725 stop += step
726 726 else:
727 727 stop = len(self)
728 728 return xrange(start, stop, step)
729 729
730 730 @util.propertycache
731 731 def nodemap(self):
732 732 self.rev(self.node(0))
733 733 return self._nodecache
734 734
735 735 def hasnode(self, node):
736 736 try:
737 737 self.rev(node)
738 738 return True
739 739 except KeyError:
740 740 return False
741 741
742 742 def clearcaches(self):
743 743 self._cache = None
744 744 self._chainbasecache.clear()
745 745 self._chunkcache = (0, '')
746 746 self._pcache = {}
747 747
748 748 try:
749 749 self._nodecache.clearcaches()
750 750 except AttributeError:
751 751 self._nodecache = {nullid: nullrev}
752 752 self._nodepos = None
753 753
754 754 def rev(self, node):
755 755 try:
756 756 return self._nodecache[node]
757 757 except TypeError:
758 758 raise
759 759 except RevlogError:
760 760 # parsers.c radix tree lookup failed
761 761 if node == wdirid:
762 762 raise error.WdirUnsupported
763 763 raise LookupError(node, self.indexfile, _('no node'))
764 764 except KeyError:
765 765 # pure python cache lookup failed
766 766 n = self._nodecache
767 767 i = self.index
768 768 p = self._nodepos
769 769 if p is None:
770 770 p = len(i) - 2
771 771 for r in xrange(p, -1, -1):
772 772 v = i[r][7]
773 773 n[v] = r
774 774 if v == node:
775 775 self._nodepos = r - 1
776 776 return r
777 777 if node == wdirid:
778 778 raise error.WdirUnsupported
779 779 raise LookupError(node, self.indexfile, _('no node'))
780 780
781 781 # Accessors for index entries.
782 782
783 783 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
784 784 # are flags.
785 785 def start(self, rev):
786 786 return int(self.index[rev][0] >> 16)
787 787
788 788 def flags(self, rev):
789 789 return self.index[rev][0] & 0xFFFF
790 790
791 791 def length(self, rev):
792 792 return self.index[rev][1]
793 793
794 794 def rawsize(self, rev):
795 795 """return the length of the uncompressed text for a given revision"""
796 796 l = self.index[rev][2]
797 797 if l >= 0:
798 798 return l
799 799
800 800 t = self.revision(rev, raw=True)
801 801 return len(t)
802 802
803 803 def size(self, rev):
804 804 """length of non-raw text (processed by a "read" flag processor)"""
805 805 # fast path: if no "read" flag processor could change the content,
806 806 # size is rawsize. note: ELLIPSIS is known to not change the content.
807 807 flags = self.flags(rev)
808 808 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
809 809 return self.rawsize(rev)
810 810
811 811 return len(self.revision(rev, raw=False))
812 812
813 813 def chainbase(self, rev):
814 814 base = self._chainbasecache.get(rev)
815 815 if base is not None:
816 816 return base
817 817
818 818 index = self.index
819 819 base = index[rev][3]
820 820 while base != rev:
821 821 rev = base
822 822 base = index[rev][3]
823 823
824 824 self._chainbasecache[rev] = base
825 825 return base
826 826
827 827 def linkrev(self, rev):
828 828 return self.index[rev][4]
829 829
830 830 def parentrevs(self, rev):
831 831 try:
832 832 entry = self.index[rev]
833 833 except IndexError:
834 834 if rev == wdirrev:
835 835 raise error.WdirUnsupported
836 836 raise
837 837
838 838 return entry[5], entry[6]
839 839
840 840 def node(self, rev):
841 841 try:
842 842 return self.index[rev][7]
843 843 except IndexError:
844 844 if rev == wdirrev:
845 845 raise error.WdirUnsupported
846 846 raise
847 847
848 848 # Derived from index values.
849 849
850 850 def end(self, rev):
851 851 return self.start(rev) + self.length(rev)
852 852
853 853 def parents(self, node):
854 854 i = self.index
855 855 d = i[self.rev(node)]
856 856 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
857 857
858 858 def chainlen(self, rev):
859 859 return self._chaininfo(rev)[0]
860 860
861 861 def _chaininfo(self, rev):
862 862 chaininfocache = self._chaininfocache
863 863 if rev in chaininfocache:
864 864 return chaininfocache[rev]
865 865 index = self.index
866 866 generaldelta = self._generaldelta
867 867 iterrev = rev
868 868 e = index[iterrev]
869 869 clen = 0
870 870 compresseddeltalen = 0
871 871 while iterrev != e[3]:
872 872 clen += 1
873 873 compresseddeltalen += e[1]
874 874 if generaldelta:
875 875 iterrev = e[3]
876 876 else:
877 877 iterrev -= 1
878 878 if iterrev in chaininfocache:
879 879 t = chaininfocache[iterrev]
880 880 clen += t[0]
881 881 compresseddeltalen += t[1]
882 882 break
883 883 e = index[iterrev]
884 884 else:
885 885 # Add text length of base since decompressing that also takes
886 886 # work. For cache hits the length is already included.
887 887 compresseddeltalen += e[1]
888 888 r = (clen, compresseddeltalen)
889 889 chaininfocache[rev] = r
890 890 return r
891 891
892 892 def _deltachain(self, rev, stoprev=None):
893 893 """Obtain the delta chain for a revision.
894 894
895 895 ``stoprev`` specifies a revision to stop at. If not specified, we
896 896 stop at the base of the chain.
897 897
898 898 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
899 899 revs in ascending order and ``stopped`` is a bool indicating whether
900 900 ``stoprev`` was hit.
901 901 """
902 902 # Try C implementation.
903 903 try:
904 904 return self.index.deltachain(rev, stoprev, self._generaldelta)
905 905 except AttributeError:
906 906 pass
907 907
908 908 chain = []
909 909
910 910 # Alias to prevent attribute lookup in tight loop.
911 911 index = self.index
912 912 generaldelta = self._generaldelta
913 913
914 914 iterrev = rev
915 915 e = index[iterrev]
916 916 while iterrev != e[3] and iterrev != stoprev:
917 917 chain.append(iterrev)
918 918 if generaldelta:
919 919 iterrev = e[3]
920 920 else:
921 921 iterrev -= 1
922 922 e = index[iterrev]
923 923
924 924 if iterrev == stoprev:
925 925 stopped = True
926 926 else:
927 927 chain.append(iterrev)
928 928 stopped = False
929 929
930 930 chain.reverse()
931 931 return chain, stopped
932 932
933 933 def ancestors(self, revs, stoprev=0, inclusive=False):
934 934 """Generate the ancestors of 'revs' in reverse topological order.
935 935 Does not generate revs lower than stoprev.
936 936
937 937 See the documentation for ancestor.lazyancestors for more details."""
938 938
939 939 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
940 940 inclusive=inclusive)
941 941
942 942 def descendants(self, revs):
943 943 """Generate the descendants of 'revs' in revision order.
944 944
945 945 Yield a sequence of revision numbers starting with a child of
946 946 some rev in revs, i.e., each revision is *not* considered a
947 947 descendant of itself. Results are ordered by revision number (a
948 948 topological sort)."""
949 949 first = min(revs)
950 950 if first == nullrev:
951 951 for i in self:
952 952 yield i
953 953 return
954 954
955 955 seen = set(revs)
956 956 for i in self.revs(start=first + 1):
957 957 for x in self.parentrevs(i):
958 958 if x != nullrev and x in seen:
959 959 seen.add(i)
960 960 yield i
961 961 break
962 962
963 963 def findcommonmissing(self, common=None, heads=None):
964 964 """Return a tuple of the ancestors of common and the ancestors of heads
965 965 that are not ancestors of common. In revset terminology, we return the
966 966 tuple:
967 967
968 968 ::common, (::heads) - (::common)
969 969
970 970 The list is sorted by revision number, meaning it is
971 971 topologically sorted.
972 972
973 973 'heads' and 'common' are both lists of node IDs. If heads is
974 974 not supplied, uses all of the revlog's heads. If common is not
975 975 supplied, uses nullid."""
976 976 if common is None:
977 977 common = [nullid]
978 978 if heads is None:
979 979 heads = self.heads()
980 980
981 981 common = [self.rev(n) for n in common]
982 982 heads = [self.rev(n) for n in heads]
983 983
984 984 # we want the ancestors, but inclusive
985 985 class lazyset(object):
986 986 def __init__(self, lazyvalues):
987 987 self.addedvalues = set()
988 988 self.lazyvalues = lazyvalues
989 989
990 990 def __contains__(self, value):
991 991 return value in self.addedvalues or value in self.lazyvalues
992 992
993 993 def __iter__(self):
994 994 added = self.addedvalues
995 995 for r in added:
996 996 yield r
997 997 for r in self.lazyvalues:
998 998 if not r in added:
999 999 yield r
1000 1000
1001 1001 def add(self, value):
1002 1002 self.addedvalues.add(value)
1003 1003
1004 1004 def update(self, values):
1005 1005 self.addedvalues.update(values)
1006 1006
1007 1007 has = lazyset(self.ancestors(common))
1008 1008 has.add(nullrev)
1009 1009 has.update(common)
1010 1010
1011 1011 # take all ancestors from heads that aren't in has
1012 1012 missing = set()
1013 1013 visit = collections.deque(r for r in heads if r not in has)
1014 1014 while visit:
1015 1015 r = visit.popleft()
1016 1016 if r in missing:
1017 1017 continue
1018 1018 else:
1019 1019 missing.add(r)
1020 1020 for p in self.parentrevs(r):
1021 1021 if p not in has:
1022 1022 visit.append(p)
1023 1023 missing = list(missing)
1024 1024 missing.sort()
1025 1025 return has, [self.node(miss) for miss in missing]
1026 1026
1027 1027 def incrementalmissingrevs(self, common=None):
1028 1028 """Return an object that can be used to incrementally compute the
1029 1029 revision numbers of the ancestors of arbitrary sets that are not
1030 1030 ancestors of common. This is an ancestor.incrementalmissingancestors
1031 1031 object.
1032 1032
1033 1033 'common' is a list of revision numbers. If common is not supplied, uses
1034 1034 nullrev.
1035 1035 """
1036 1036 if common is None:
1037 1037 common = [nullrev]
1038 1038
1039 1039 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1040 1040
1041 1041 def findmissingrevs(self, common=None, heads=None):
1042 1042 """Return the revision numbers of the ancestors of heads that
1043 1043 are not ancestors of common.
1044 1044
1045 1045 More specifically, return a list of revision numbers corresponding to
1046 1046 nodes N such that every N satisfies the following constraints:
1047 1047
1048 1048 1. N is an ancestor of some node in 'heads'
1049 1049 2. N is not an ancestor of any node in 'common'
1050 1050
1051 1051 The list is sorted by revision number, meaning it is
1052 1052 topologically sorted.
1053 1053
1054 1054 'heads' and 'common' are both lists of revision numbers. If heads is
1055 1055 not supplied, uses all of the revlog's heads. If common is not
1056 1056 supplied, uses nullid."""
1057 1057 if common is None:
1058 1058 common = [nullrev]
1059 1059 if heads is None:
1060 1060 heads = self.headrevs()
1061 1061
1062 1062 inc = self.incrementalmissingrevs(common=common)
1063 1063 return inc.missingancestors(heads)
1064 1064
1065 1065 def findmissing(self, common=None, heads=None):
1066 1066 """Return the ancestors of heads that are not ancestors of common.
1067 1067
1068 1068 More specifically, return a list of nodes N such that every N
1069 1069 satisfies the following constraints:
1070 1070
1071 1071 1. N is an ancestor of some node in 'heads'
1072 1072 2. N is not an ancestor of any node in 'common'
1073 1073
1074 1074 The list is sorted by revision number, meaning it is
1075 1075 topologically sorted.
1076 1076
1077 1077 'heads' and 'common' are both lists of node IDs. If heads is
1078 1078 not supplied, uses all of the revlog's heads. If common is not
1079 1079 supplied, uses nullid."""
1080 1080 if common is None:
1081 1081 common = [nullid]
1082 1082 if heads is None:
1083 1083 heads = self.heads()
1084 1084
1085 1085 common = [self.rev(n) for n in common]
1086 1086 heads = [self.rev(n) for n in heads]
1087 1087
1088 1088 inc = self.incrementalmissingrevs(common=common)
1089 1089 return [self.node(r) for r in inc.missingancestors(heads)]
1090 1090
1091 1091 def nodesbetween(self, roots=None, heads=None):
1092 1092 """Return a topological path from 'roots' to 'heads'.
1093 1093
1094 1094 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1095 1095 topologically sorted list of all nodes N that satisfy both of
1096 1096 these constraints:
1097 1097
1098 1098 1. N is a descendant of some node in 'roots'
1099 1099 2. N is an ancestor of some node in 'heads'
1100 1100
1101 1101 Every node is considered to be both a descendant and an ancestor
1102 1102 of itself, so every reachable node in 'roots' and 'heads' will be
1103 1103 included in 'nodes'.
1104 1104
1105 1105 'outroots' is the list of reachable nodes in 'roots', i.e., the
1106 1106 subset of 'roots' that is returned in 'nodes'. Likewise,
1107 1107 'outheads' is the subset of 'heads' that is also in 'nodes'.
1108 1108
1109 1109 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1110 1110 unspecified, uses nullid as the only root. If 'heads' is
1111 1111 unspecified, uses list of all of the revlog's heads."""
1112 1112 nonodes = ([], [], [])
1113 1113 if roots is not None:
1114 1114 roots = list(roots)
1115 1115 if not roots:
1116 1116 return nonodes
1117 1117 lowestrev = min([self.rev(n) for n in roots])
1118 1118 else:
1119 1119 roots = [nullid] # Everybody's a descendant of nullid
1120 1120 lowestrev = nullrev
1121 1121 if (lowestrev == nullrev) and (heads is None):
1122 1122 # We want _all_ the nodes!
1123 1123 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1124 1124 if heads is None:
1125 1125 # All nodes are ancestors, so the latest ancestor is the last
1126 1126 # node.
1127 1127 highestrev = len(self) - 1
1128 1128 # Set ancestors to None to signal that every node is an ancestor.
1129 1129 ancestors = None
1130 1130 # Set heads to an empty dictionary for later discovery of heads
1131 1131 heads = {}
1132 1132 else:
1133 1133 heads = list(heads)
1134 1134 if not heads:
1135 1135 return nonodes
1136 1136 ancestors = set()
1137 1137 # Turn heads into a dictionary so we can remove 'fake' heads.
1138 1138 # Also, later we will be using it to filter out the heads we can't
1139 1139 # find from roots.
1140 1140 heads = dict.fromkeys(heads, False)
1141 1141 # Start at the top and keep marking parents until we're done.
1142 1142 nodestotag = set(heads)
1143 1143 # Remember where the top was so we can use it as a limit later.
1144 1144 highestrev = max([self.rev(n) for n in nodestotag])
1145 1145 while nodestotag:
1146 1146 # grab a node to tag
1147 1147 n = nodestotag.pop()
1148 1148 # Never tag nullid
1149 1149 if n == nullid:
1150 1150 continue
1151 1151 # A node's revision number represents its place in a
1152 1152 # topologically sorted list of nodes.
1153 1153 r = self.rev(n)
1154 1154 if r >= lowestrev:
1155 1155 if n not in ancestors:
1156 1156 # If we are possibly a descendant of one of the roots
1157 1157 # and we haven't already been marked as an ancestor
1158 1158 ancestors.add(n) # Mark as ancestor
1159 1159 # Add non-nullid parents to list of nodes to tag.
1160 1160 nodestotag.update([p for p in self.parents(n) if
1161 1161 p != nullid])
1162 1162 elif n in heads: # We've seen it before, is it a fake head?
1163 1163 # So it is, real heads should not be the ancestors of
1164 1164 # any other heads.
1165 1165 heads.pop(n)
1166 1166 if not ancestors:
1167 1167 return nonodes
1168 1168 # Now that we have our set of ancestors, we want to remove any
1169 1169 # roots that are not ancestors.
1170 1170
1171 1171 # If one of the roots was nullid, everything is included anyway.
1172 1172 if lowestrev > nullrev:
1173 1173 # But, since we weren't, let's recompute the lowest rev to not
1174 1174 # include roots that aren't ancestors.
1175 1175
1176 1176 # Filter out roots that aren't ancestors of heads
1177 1177 roots = [root for root in roots if root in ancestors]
1178 1178 # Recompute the lowest revision
1179 1179 if roots:
1180 1180 lowestrev = min([self.rev(root) for root in roots])
1181 1181 else:
1182 1182 # No more roots? Return empty list
1183 1183 return nonodes
1184 1184 else:
1185 1185 # We are descending from nullid, and don't need to care about
1186 1186 # any other roots.
1187 1187 lowestrev = nullrev
1188 1188 roots = [nullid]
1189 1189 # Transform our roots list into a set.
1190 1190 descendants = set(roots)
1191 1191 # Also, keep the original roots so we can filter out roots that aren't
1192 1192 # 'real' roots (i.e. are descended from other roots).
1193 1193 roots = descendants.copy()
1194 1194 # Our topologically sorted list of output nodes.
1195 1195 orderedout = []
1196 1196 # Don't start at nullid since we don't want nullid in our output list,
1197 1197 # and if nullid shows up in descendants, empty parents will look like
1198 1198 # they're descendants.
1199 1199 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1200 1200 n = self.node(r)
1201 1201 isdescendant = False
1202 1202 if lowestrev == nullrev: # Everybody is a descendant of nullid
1203 1203 isdescendant = True
1204 1204 elif n in descendants:
1205 1205 # n is already a descendant
1206 1206 isdescendant = True
1207 1207 # This check only needs to be done here because all the roots
1208 1208 # will start being marked is descendants before the loop.
1209 1209 if n in roots:
1210 1210 # If n was a root, check if it's a 'real' root.
1211 1211 p = tuple(self.parents(n))
1212 1212 # If any of its parents are descendants, it's not a root.
1213 1213 if (p[0] in descendants) or (p[1] in descendants):
1214 1214 roots.remove(n)
1215 1215 else:
1216 1216 p = tuple(self.parents(n))
1217 1217 # A node is a descendant if either of its parents are
1218 1218 # descendants. (We seeded the dependents list with the roots
1219 1219 # up there, remember?)
1220 1220 if (p[0] in descendants) or (p[1] in descendants):
1221 1221 descendants.add(n)
1222 1222 isdescendant = True
1223 1223 if isdescendant and ((ancestors is None) or (n in ancestors)):
1224 1224 # Only include nodes that are both descendants and ancestors.
1225 1225 orderedout.append(n)
1226 1226 if (ancestors is not None) and (n in heads):
1227 1227 # We're trying to figure out which heads are reachable
1228 1228 # from roots.
1229 1229 # Mark this head as having been reached
1230 1230 heads[n] = True
1231 1231 elif ancestors is None:
1232 1232 # Otherwise, we're trying to discover the heads.
1233 1233 # Assume this is a head because if it isn't, the next step
1234 1234 # will eventually remove it.
1235 1235 heads[n] = True
1236 1236 # But, obviously its parents aren't.
1237 1237 for p in self.parents(n):
1238 1238 heads.pop(p, None)
1239 1239 heads = [head for head, flag in heads.iteritems() if flag]
1240 1240 roots = list(roots)
1241 1241 assert orderedout
1242 1242 assert roots
1243 1243 assert heads
1244 1244 return (orderedout, roots, heads)
1245 1245
1246 1246 def headrevs(self):
1247 1247 try:
1248 1248 return self.index.headrevs()
1249 1249 except AttributeError:
1250 1250 return self._headrevs()
1251 1251
1252 1252 def computephases(self, roots):
1253 1253 return self.index.computephasesmapsets(roots)
1254 1254
1255 1255 def _headrevs(self):
1256 1256 count = len(self)
1257 1257 if not count:
1258 1258 return [nullrev]
1259 1259 # we won't iter over filtered rev so nobody is a head at start
1260 1260 ishead = [0] * (count + 1)
1261 1261 index = self.index
1262 1262 for r in self:
1263 1263 ishead[r] = 1 # I may be an head
1264 1264 e = index[r]
1265 1265 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1266 1266 return [r for r, val in enumerate(ishead) if val]
1267 1267
1268 1268 def heads(self, start=None, stop=None):
1269 1269 """return the list of all nodes that have no children
1270 1270
1271 1271 if start is specified, only heads that are descendants of
1272 1272 start will be returned
1273 1273 if stop is specified, it will consider all the revs from stop
1274 1274 as if they had no children
1275 1275 """
1276 1276 if start is None and stop is None:
1277 1277 if not len(self):
1278 1278 return [nullid]
1279 1279 return [self.node(r) for r in self.headrevs()]
1280 1280
1281 1281 if start is None:
1282 1282 start = nullid
1283 1283 if stop is None:
1284 1284 stop = []
1285 1285 stoprevs = set([self.rev(n) for n in stop])
1286 1286 startrev = self.rev(start)
1287 1287 reachable = {startrev}
1288 1288 heads = {startrev}
1289 1289
1290 1290 parentrevs = self.parentrevs
1291 1291 for r in self.revs(start=startrev + 1):
1292 1292 for p in parentrevs(r):
1293 1293 if p in reachable:
1294 1294 if r not in stoprevs:
1295 1295 reachable.add(r)
1296 1296 heads.add(r)
1297 1297 if p in heads and p not in stoprevs:
1298 1298 heads.remove(p)
1299 1299
1300 1300 return [self.node(r) for r in heads]
1301 1301
1302 1302 def children(self, node):
1303 1303 """find the children of a given node"""
1304 1304 c = []
1305 1305 p = self.rev(node)
1306 1306 for r in self.revs(start=p + 1):
1307 1307 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1308 1308 if prevs:
1309 1309 for pr in prevs:
1310 1310 if pr == p:
1311 1311 c.append(self.node(r))
1312 1312 elif p == nullrev:
1313 1313 c.append(self.node(r))
1314 1314 return c
1315 1315
1316 1316 def descendant(self, start, end):
1317 1317 if start == nullrev:
1318 1318 return True
1319 1319 for i in self.descendants([start]):
1320 1320 if i == end:
1321 1321 return True
1322 1322 elif i > end:
1323 1323 break
1324 1324 return False
1325 1325
1326 1326 def commonancestorsheads(self, a, b):
1327 1327 """calculate all the heads of the common ancestors of nodes a and b"""
1328 1328 a, b = self.rev(a), self.rev(b)
1329 1329 try:
1330 1330 ancs = self.index.commonancestorsheads(a, b)
1331 1331 except (AttributeError, OverflowError): # C implementation failed
1332 1332 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1333 1333 return pycompat.maplist(self.node, ancs)
1334 1334
1335 1335 def isancestor(self, a, b):
1336 1336 """return True if node a is an ancestor of node b
1337 1337
1338 1338 The implementation of this is trivial but the use of
1339 1339 commonancestorsheads is not."""
1340 1340 return a in self.commonancestorsheads(a, b)
1341 1341
1342 1342 def ancestor(self, a, b):
1343 1343 """calculate the "best" common ancestor of nodes a and b"""
1344 1344
1345 1345 a, b = self.rev(a), self.rev(b)
1346 1346 try:
1347 1347 ancs = self.index.ancestors(a, b)
1348 1348 except (AttributeError, OverflowError):
1349 1349 ancs = ancestor.ancestors(self.parentrevs, a, b)
1350 1350 if ancs:
1351 1351 # choose a consistent winner when there's a tie
1352 1352 return min(map(self.node, ancs))
1353 1353 return nullid
1354 1354
1355 1355 def _match(self, id):
1356 1356 if isinstance(id, int):
1357 1357 # rev
1358 1358 return self.node(id)
1359 1359 if len(id) == 20:
1360 1360 # possibly a binary node
1361 1361 # odds of a binary node being all hex in ASCII are 1 in 10**25
1362 1362 try:
1363 1363 node = id
1364 1364 self.rev(node) # quick search the index
1365 1365 return node
1366 1366 except LookupError:
1367 1367 pass # may be partial hex id
1368 1368 try:
1369 1369 # str(rev)
1370 1370 rev = int(id)
1371 1371 if str(rev) != id:
1372 1372 raise ValueError
1373 1373 if rev < 0:
1374 1374 rev = len(self) + rev
1375 1375 if rev < 0 or rev >= len(self):
1376 1376 raise ValueError
1377 1377 return self.node(rev)
1378 1378 except (ValueError, OverflowError):
1379 1379 pass
1380 1380 if len(id) == 40:
1381 1381 try:
1382 1382 # a full hex nodeid?
1383 1383 node = bin(id)
1384 1384 self.rev(node)
1385 1385 return node
1386 1386 except (TypeError, LookupError):
1387 1387 pass
1388 1388
1389 1389 def _partialmatch(self, id):
1390 1390 maybewdir = wdirhex.startswith(id)
1391 1391 try:
1392 1392 partial = self.index.partialmatch(id)
1393 1393 if partial and self.hasnode(partial):
1394 1394 if maybewdir:
1395 1395 # single 'ff...' match in radix tree, ambiguous with wdir
1396 1396 raise RevlogError
1397 1397 return partial
1398 1398 if maybewdir:
1399 1399 # no 'ff...' match in radix tree, wdir identified
1400 1400 raise error.WdirUnsupported
1401 1401 return None
1402 1402 except RevlogError:
1403 1403 # parsers.c radix tree lookup gave multiple matches
1404 1404 # fast path: for unfiltered changelog, radix tree is accurate
1405 1405 if not getattr(self, 'filteredrevs', None):
1406 1406 raise LookupError(id, self.indexfile,
1407 1407 _('ambiguous identifier'))
1408 1408 # fall through to slow path that filters hidden revisions
1409 1409 except (AttributeError, ValueError):
1410 1410 # we are pure python, or key was too short to search radix tree
1411 1411 pass
1412 1412
1413 1413 if id in self._pcache:
1414 1414 return self._pcache[id]
1415 1415
1416 1416 if len(id) < 40:
1417 1417 try:
1418 1418 # hex(node)[:...]
1419 1419 l = len(id) // 2 # grab an even number of digits
1420 1420 prefix = bin(id[:l * 2])
1421 1421 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1422 1422 nl = [n for n in nl if hex(n).startswith(id) and
1423 1423 self.hasnode(n)]
1424 1424 if len(nl) > 0:
1425 1425 if len(nl) == 1 and not maybewdir:
1426 1426 self._pcache[id] = nl[0]
1427 1427 return nl[0]
1428 1428 raise LookupError(id, self.indexfile,
1429 1429 _('ambiguous identifier'))
1430 1430 if maybewdir:
1431 1431 raise error.WdirUnsupported
1432 1432 return None
1433 1433 except (TypeError, binascii.Error):
1434 1434 pass
1435 1435
1436 1436 def lookup(self, id):
1437 1437 """locate a node based on:
1438 1438 - revision number or str(revision number)
1439 1439 - nodeid or subset of hex nodeid
1440 1440 """
1441 1441 n = self._match(id)
1442 1442 if n is not None:
1443 1443 return n
1444 1444 n = self._partialmatch(id)
1445 1445 if n:
1446 1446 return n
1447 1447
1448 1448 raise LookupError(id, self.indexfile, _('no match found'))
1449 1449
1450 1450 def shortest(self, hexnode, minlength=1):
1451 1451 """Find the shortest unambiguous prefix that matches hexnode."""
1452 1452 def isvalid(test):
1453 1453 try:
1454 1454 if self._partialmatch(test) is None:
1455 1455 return False
1456 1456
1457 1457 try:
1458 1458 i = int(test)
1459 1459 # if we are a pure int, then starting with zero will not be
1460 1460 # confused as a rev; or, obviously, if the int is larger
1461 1461 # than the value of the tip rev
1462 1462 if test[0] == '0' or i > len(self):
1463 1463 return True
1464 1464 return False
1465 1465 except ValueError:
1466 1466 return True
1467 1467 except error.RevlogError:
1468 1468 return False
1469 1469 except error.WdirUnsupported:
1470 1470 # single 'ff...' match
1471 1471 return True
1472 1472
1473 1473 shortest = hexnode
1474 1474 startlength = max(6, minlength)
1475 1475 length = startlength
1476 1476 while True:
1477 1477 test = hexnode[:length]
1478 1478 if isvalid(test):
1479 1479 shortest = test
1480 1480 if length == minlength or length > startlength:
1481 1481 return shortest
1482 1482 length -= 1
1483 1483 else:
1484 1484 length += 1
1485 1485 if len(shortest) <= length:
1486 1486 return shortest
1487 1487
1488 1488 def cmp(self, node, text):
1489 1489 """compare text with a given file revision
1490 1490
1491 1491 returns True if text is different than what is stored.
1492 1492 """
1493 1493 p1, p2 = self.parents(node)
1494 1494 return hash(text, p1, p2) != node
1495 1495
1496 1496 def _cachesegment(self, offset, data):
1497 1497 """Add a segment to the revlog cache.
1498 1498
1499 1499 Accepts an absolute offset and the data that is at that location.
1500 1500 """
1501 1501 o, d = self._chunkcache
1502 1502 # try to add to existing cache
1503 1503 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1504 1504 self._chunkcache = o, d + data
1505 1505 else:
1506 1506 self._chunkcache = offset, data
1507 1507
1508 1508 def _readsegment(self, offset, length, df=None):
1509 1509 """Load a segment of raw data from the revlog.
1510 1510
1511 1511 Accepts an absolute offset, length to read, and an optional existing
1512 1512 file handle to read from.
1513 1513
1514 1514 If an existing file handle is passed, it will be seeked and the
1515 1515 original seek position will NOT be restored.
1516 1516
1517 1517 Returns a str or buffer of raw byte data.
1518 1518 """
1519 1519 # Cache data both forward and backward around the requested
1520 1520 # data, in a fixed size window. This helps speed up operations
1521 1521 # involving reading the revlog backwards.
1522 1522 cachesize = self._chunkcachesize
1523 1523 realoffset = offset & ~(cachesize - 1)
1524 1524 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1525 1525 - realoffset)
1526 1526 with self._datareadfp(df) as df:
1527 1527 df.seek(realoffset)
1528 1528 d = df.read(reallength)
1529 1529 self._cachesegment(realoffset, d)
1530 1530 if offset != realoffset or reallength != length:
1531 1531 return util.buffer(d, offset - realoffset, length)
1532 1532 return d
1533 1533
1534 1534 def _getsegment(self, offset, length, df=None):
1535 1535 """Obtain a segment of raw data from the revlog.
1536 1536
1537 1537 Accepts an absolute offset, length of bytes to obtain, and an
1538 1538 optional file handle to the already-opened revlog. If the file
1539 1539 handle is used, it's original seek position will not be preserved.
1540 1540
1541 1541 Requests for data may be returned from a cache.
1542 1542
1543 1543 Returns a str or a buffer instance of raw byte data.
1544 1544 """
1545 1545 o, d = self._chunkcache
1546 1546 l = len(d)
1547 1547
1548 1548 # is it in the cache?
1549 1549 cachestart = offset - o
1550 1550 cacheend = cachestart + length
1551 1551 if cachestart >= 0 and cacheend <= l:
1552 1552 if cachestart == 0 and cacheend == l:
1553 1553 return d # avoid a copy
1554 1554 return util.buffer(d, cachestart, cacheend - cachestart)
1555 1555
1556 1556 return self._readsegment(offset, length, df=df)
1557 1557
1558 1558 def _getsegmentforrevs(self, startrev, endrev, df=None):
1559 1559 """Obtain a segment of raw data corresponding to a range of revisions.
1560 1560
1561 1561 Accepts the start and end revisions and an optional already-open
1562 1562 file handle to be used for reading. If the file handle is read, its
1563 1563 seek position will not be preserved.
1564 1564
1565 1565 Requests for data may be satisfied by a cache.
1566 1566
1567 1567 Returns a 2-tuple of (offset, data) for the requested range of
1568 1568 revisions. Offset is the integer offset from the beginning of the
1569 1569 revlog and data is a str or buffer of the raw byte data.
1570 1570
1571 1571 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1572 1572 to determine where each revision's data begins and ends.
1573 1573 """
1574 1574 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1575 1575 # (functions are expensive).
1576 1576 index = self.index
1577 1577 istart = index[startrev]
1578 1578 start = int(istart[0] >> 16)
1579 1579 if startrev == endrev:
1580 1580 end = start + istart[1]
1581 1581 else:
1582 1582 iend = index[endrev]
1583 1583 end = int(iend[0] >> 16) + iend[1]
1584 1584
1585 1585 if self._inline:
1586 1586 start += (startrev + 1) * self._io.size
1587 1587 end += (endrev + 1) * self._io.size
1588 1588 length = end - start
1589 1589
1590 1590 return start, self._getsegment(start, length, df=df)
1591 1591
1592 1592 def _chunk(self, rev, df=None):
1593 1593 """Obtain a single decompressed chunk for a revision.
1594 1594
1595 1595 Accepts an integer revision and an optional already-open file handle
1596 1596 to be used for reading. If used, the seek position of the file will not
1597 1597 be preserved.
1598 1598
1599 1599 Returns a str holding uncompressed data for the requested revision.
1600 1600 """
1601 1601 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1602 1602
1603 1603 def _chunks(self, revs, df=None):
1604 1604 """Obtain decompressed chunks for the specified revisions.
1605 1605
1606 1606 Accepts an iterable of numeric revisions that are assumed to be in
1607 1607 ascending order. Also accepts an optional already-open file handle
1608 1608 to be used for reading. If used, the seek position of the file will
1609 1609 not be preserved.
1610 1610
1611 1611 This function is similar to calling ``self._chunk()`` multiple times,
1612 1612 but is faster.
1613 1613
1614 1614 Returns a list with decompressed data for each requested revision.
1615 1615 """
1616 1616 if not revs:
1617 1617 return []
1618 1618 start = self.start
1619 1619 length = self.length
1620 1620 inline = self._inline
1621 1621 iosize = self._io.size
1622 1622 buffer = util.buffer
1623 1623
1624 1624 l = []
1625 1625 ladd = l.append
1626 1626
1627 1627 if not self._withsparseread:
1628 1628 slicedchunks = (revs,)
1629 1629 else:
1630 1630 slicedchunks = _slicechunk(self, revs)
1631 1631
1632 1632 for revschunk in slicedchunks:
1633 1633 firstrev = revschunk[0]
1634 1634 # Skip trailing revisions with empty diff
1635 1635 for lastrev in revschunk[::-1]:
1636 1636 if length(lastrev) != 0:
1637 1637 break
1638 1638
1639 1639 try:
1640 1640 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1641 1641 except OverflowError:
1642 1642 # issue4215 - we can't cache a run of chunks greater than
1643 1643 # 2G on Windows
1644 1644 return [self._chunk(rev, df=df) for rev in revschunk]
1645 1645
1646 1646 decomp = self.decompress
1647 1647 for rev in revschunk:
1648 1648 chunkstart = start(rev)
1649 1649 if inline:
1650 1650 chunkstart += (rev + 1) * iosize
1651 1651 chunklength = length(rev)
1652 1652 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1653 1653
1654 1654 return l
1655 1655
1656 1656 def _chunkclear(self):
1657 1657 """Clear the raw chunk cache."""
1658 1658 self._chunkcache = (0, '')
1659 1659
1660 1660 def deltaparent(self, rev):
1661 1661 """return deltaparent of the given revision"""
1662 1662 base = self.index[rev][3]
1663 1663 if base == rev:
1664 1664 return nullrev
1665 1665 elif self._generaldelta:
1666 1666 return base
1667 1667 else:
1668 1668 return rev - 1
1669 1669
1670 1670 def revdiff(self, rev1, rev2):
1671 1671 """return or calculate a delta between two revisions
1672 1672
1673 1673 The delta calculated is in binary form and is intended to be written to
1674 1674 revlog data directly. So this function needs raw revision data.
1675 1675 """
1676 1676 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1677 1677 return bytes(self._chunk(rev2))
1678 1678
1679 1679 return mdiff.textdiff(self.revision(rev1, raw=True),
1680 1680 self.revision(rev2, raw=True))
1681 1681
1682 1682 def revision(self, nodeorrev, _df=None, raw=False):
1683 1683 """return an uncompressed revision of a given node or revision
1684 1684 number.
1685 1685
1686 1686 _df - an existing file handle to read from. (internal-only)
1687 1687 raw - an optional argument specifying if the revision data is to be
1688 1688 treated as raw data when applying flag transforms. 'raw' should be set
1689 1689 to True when generating changegroups or in debug commands.
1690 1690 """
1691 1691 if isinstance(nodeorrev, int):
1692 1692 rev = nodeorrev
1693 1693 node = self.node(rev)
1694 1694 else:
1695 1695 node = nodeorrev
1696 1696 rev = None
1697 1697
1698 1698 cachedrev = None
1699 1699 flags = None
1700 1700 rawtext = None
1701 1701 if node == nullid:
1702 1702 return ""
1703 1703 if self._cache:
1704 1704 if self._cache[0] == node:
1705 1705 # _cache only stores rawtext
1706 1706 if raw:
1707 1707 return self._cache[2]
1708 1708 # duplicated, but good for perf
1709 1709 if rev is None:
1710 1710 rev = self.rev(node)
1711 1711 if flags is None:
1712 1712 flags = self.flags(rev)
1713 1713 # no extra flags set, no flag processor runs, text = rawtext
1714 1714 if flags == REVIDX_DEFAULT_FLAGS:
1715 1715 return self._cache[2]
1716 1716 # rawtext is reusable. need to run flag processor
1717 1717 rawtext = self._cache[2]
1718 1718
1719 1719 cachedrev = self._cache[1]
1720 1720
1721 1721 # look up what we need to read
1722 1722 if rawtext is None:
1723 1723 if rev is None:
1724 1724 rev = self.rev(node)
1725 1725
1726 1726 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1727 1727 if stopped:
1728 1728 rawtext = self._cache[2]
1729 1729
1730 1730 # drop cache to save memory
1731 1731 self._cache = None
1732 1732
1733 1733 bins = self._chunks(chain, df=_df)
1734 1734 if rawtext is None:
1735 1735 rawtext = bytes(bins[0])
1736 1736 bins = bins[1:]
1737 1737
1738 1738 rawtext = mdiff.patches(rawtext, bins)
1739 1739 self._cache = (node, rev, rawtext)
1740 1740
1741 1741 if flags is None:
1742 1742 if rev is None:
1743 1743 rev = self.rev(node)
1744 1744 flags = self.flags(rev)
1745 1745
1746 1746 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1747 1747 if validatehash:
1748 1748 self.checkhash(text, node, rev=rev)
1749 1749
1750 1750 return text
1751 1751
1752 1752 def hash(self, text, p1, p2):
1753 1753 """Compute a node hash.
1754 1754
1755 1755 Available as a function so that subclasses can replace the hash
1756 1756 as needed.
1757 1757 """
1758 1758 return hash(text, p1, p2)
1759 1759
1760 1760 def _processflags(self, text, flags, operation, raw=False):
1761 1761 """Inspect revision data flags and applies transforms defined by
1762 1762 registered flag processors.
1763 1763
1764 1764 ``text`` - the revision data to process
1765 1765 ``flags`` - the revision flags
1766 1766 ``operation`` - the operation being performed (read or write)
1767 1767 ``raw`` - an optional argument describing if the raw transform should be
1768 1768 applied.
1769 1769
1770 1770 This method processes the flags in the order (or reverse order if
1771 1771 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1772 1772 flag processors registered for present flags. The order of flags defined
1773 1773 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1774 1774
1775 1775 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1776 1776 processed text and ``validatehash`` is a bool indicating whether the
1777 1777 returned text should be checked for hash integrity.
1778 1778
1779 1779 Note: If the ``raw`` argument is set, it has precedence over the
1780 1780 operation and will only update the value of ``validatehash``.
1781 1781 """
1782 1782 # fast path: no flag processors will run
1783 1783 if flags == 0:
1784 1784 return text, True
1785 1785 if not operation in ('read', 'write'):
1786 1786 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1787 1787 # Check all flags are known.
1788 1788 if flags & ~REVIDX_KNOWN_FLAGS:
1789 1789 raise RevlogError(_("incompatible revision flag '%#x'") %
1790 1790 (flags & ~REVIDX_KNOWN_FLAGS))
1791 1791 validatehash = True
1792 1792 # Depending on the operation (read or write), the order might be
1793 1793 # reversed due to non-commutative transforms.
1794 1794 orderedflags = REVIDX_FLAGS_ORDER
1795 1795 if operation == 'write':
1796 1796 orderedflags = reversed(orderedflags)
1797 1797
1798 1798 for flag in orderedflags:
1799 1799 # If a flagprocessor has been registered for a known flag, apply the
1800 1800 # related operation transform and update result tuple.
1801 1801 if flag & flags:
1802 1802 vhash = True
1803 1803
1804 1804 if flag not in _flagprocessors:
1805 1805 message = _("missing processor for flag '%#x'") % (flag)
1806 1806 raise RevlogError(message)
1807 1807
1808 1808 processor = _flagprocessors[flag]
1809 1809 if processor is not None:
1810 1810 readtransform, writetransform, rawtransform = processor
1811 1811
1812 1812 if raw:
1813 1813 vhash = rawtransform(self, text)
1814 1814 elif operation == 'read':
1815 1815 text, vhash = readtransform(self, text)
1816 1816 else: # write operation
1817 1817 text, vhash = writetransform(self, text)
1818 1818 validatehash = validatehash and vhash
1819 1819
1820 1820 return text, validatehash
1821 1821
1822 1822 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1823 1823 """Check node hash integrity.
1824 1824
1825 1825 Available as a function so that subclasses can extend hash mismatch
1826 1826 behaviors as needed.
1827 1827 """
1828 1828 if p1 is None and p2 is None:
1829 1829 p1, p2 = self.parents(node)
1830 1830 if node != self.hash(text, p1, p2):
1831 1831 revornode = rev
1832 1832 if revornode is None:
1833 1833 revornode = templatefilters.short(hex(node))
1834 1834 raise RevlogError(_("integrity check failed on %s:%s")
1835 1835 % (self.indexfile, pycompat.bytestr(revornode)))
1836 1836
1837 def checkinlinesize(self, tr, fp=None):
1837 def _enforceinlinesize(self, tr, fp=None):
1838 1838 """Check if the revlog is too big for inline and convert if so.
1839 1839
1840 1840 This should be called after revisions are added to the revlog. If the
1841 1841 revlog has grown too large to be an inline revlog, it will convert it
1842 1842 to use multiple index and data files.
1843 1843 """
1844 1844 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1845 1845 return
1846 1846
1847 1847 trinfo = tr.find(self.indexfile)
1848 1848 if trinfo is None:
1849 1849 raise RevlogError(_("%s not found in the transaction")
1850 1850 % self.indexfile)
1851 1851
1852 1852 trindex = trinfo[2]
1853 1853 if trindex is not None:
1854 1854 dataoff = self.start(trindex)
1855 1855 else:
1856 1856 # revlog was stripped at start of transaction, use all leftover data
1857 1857 trindex = len(self) - 1
1858 1858 dataoff = self.end(-2)
1859 1859
1860 1860 tr.add(self.datafile, dataoff)
1861 1861
1862 1862 if fp:
1863 1863 fp.flush()
1864 1864 fp.close()
1865 1865
1866 1866 with self._datafp('w') as df:
1867 1867 for r in self:
1868 1868 df.write(self._getsegmentforrevs(r, r)[1])
1869 1869
1870 1870 with self._indexfp('w') as fp:
1871 1871 self.version &= ~FLAG_INLINE_DATA
1872 1872 self._inline = False
1873 1873 io = self._io
1874 1874 for i in self:
1875 1875 e = io.packentry(self.index[i], self.node, self.version, i)
1876 1876 fp.write(e)
1877 1877
1878 1878 # the temp file replace the real index when we exit the context
1879 1879 # manager
1880 1880
1881 1881 tr.replace(self.indexfile, trindex * self._io.size)
1882 1882 self._chunkclear()
1883 1883
1884 1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1885 1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1886 1886 """add a revision to the log
1887 1887
1888 1888 text - the revision data to add
1889 1889 transaction - the transaction object used for rollback
1890 1890 link - the linkrev data to add
1891 1891 p1, p2 - the parent nodeids of the revision
1892 1892 cachedelta - an optional precomputed delta
1893 1893 node - nodeid of revision; typically node is not specified, and it is
1894 1894 computed by default as hash(text, p1, p2), however subclasses might
1895 1895 use different hashing method (and override checkhash() in such case)
1896 1896 flags - the known flags to set on the revision
1897 1897 deltacomputer - an optional _deltacomputer instance shared between
1898 1898 multiple calls
1899 1899 """
1900 1900 if link == nullrev:
1901 1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1902 1902 % self.indexfile)
1903 1903
1904 1904 if flags:
1905 1905 node = node or self.hash(text, p1, p2)
1906 1906
1907 1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1908 1908
1909 1909 # If the flag processor modifies the revision data, ignore any provided
1910 1910 # cachedelta.
1911 1911 if rawtext != text:
1912 1912 cachedelta = None
1913 1913
1914 1914 if len(rawtext) > _maxentrysize:
1915 1915 raise RevlogError(
1916 1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1917 1917 % (self.indexfile, len(rawtext)))
1918 1918
1919 1919 node = node or self.hash(rawtext, p1, p2)
1920 1920 if node in self.nodemap:
1921 1921 return node
1922 1922
1923 1923 if validatehash:
1924 1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1925 1925
1926 1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1927 1927 flags, cachedelta=cachedelta,
1928 1928 deltacomputer=deltacomputer)
1929 1929
1930 1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1931 1931 cachedelta=None, deltacomputer=None):
1932 1932 """add a raw revision with known flags, node and parents
1933 1933 useful when reusing a revision not stored in this revlog (ex: received
1934 1934 over wire, or read from an external bundle).
1935 1935 """
1936 1936 dfh = None
1937 1937 if not self._inline:
1938 1938 dfh = self._datafp("a+")
1939 1939 ifh = self._indexfp("a+")
1940 1940 try:
1941 1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1942 1942 flags, cachedelta, ifh, dfh,
1943 1943 deltacomputer=deltacomputer)
1944 1944 finally:
1945 1945 if dfh:
1946 1946 dfh.close()
1947 1947 ifh.close()
1948 1948
1949 1949 def compress(self, data):
1950 1950 """Generate a possibly-compressed representation of data."""
1951 1951 if not data:
1952 1952 return '', data
1953 1953
1954 1954 compressed = self._compressor.compress(data)
1955 1955
1956 1956 if compressed:
1957 1957 # The revlog compressor added the header in the returned data.
1958 1958 return '', compressed
1959 1959
1960 1960 if data[0:1] == '\0':
1961 1961 return '', data
1962 1962 return 'u', data
1963 1963
1964 1964 def decompress(self, data):
1965 1965 """Decompress a revlog chunk.
1966 1966
1967 1967 The chunk is expected to begin with a header identifying the
1968 1968 format type so it can be routed to an appropriate decompressor.
1969 1969 """
1970 1970 if not data:
1971 1971 return data
1972 1972
1973 1973 # Revlogs are read much more frequently than they are written and many
1974 1974 # chunks only take microseconds to decompress, so performance is
1975 1975 # important here.
1976 1976 #
1977 1977 # We can make a few assumptions about revlogs:
1978 1978 #
1979 1979 # 1) the majority of chunks will be compressed (as opposed to inline
1980 1980 # raw data).
1981 1981 # 2) decompressing *any* data will likely by at least 10x slower than
1982 1982 # returning raw inline data.
1983 1983 # 3) we want to prioritize common and officially supported compression
1984 1984 # engines
1985 1985 #
1986 1986 # It follows that we want to optimize for "decompress compressed data
1987 1987 # when encoded with common and officially supported compression engines"
1988 1988 # case over "raw data" and "data encoded by less common or non-official
1989 1989 # compression engines." That is why we have the inline lookup first
1990 1990 # followed by the compengines lookup.
1991 1991 #
1992 1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1993 1993 # compressed chunks. And this matters for changelog and manifest reads.
1994 1994 t = data[0:1]
1995 1995
1996 1996 if t == 'x':
1997 1997 try:
1998 1998 return _zlibdecompress(data)
1999 1999 except zlib.error as e:
2000 2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
2001 2001 # '\0' is more common than 'u' so it goes first.
2002 2002 elif t == '\0':
2003 2003 return data
2004 2004 elif t == 'u':
2005 2005 return util.buffer(data, 1)
2006 2006
2007 2007 try:
2008 2008 compressor = self._decompressors[t]
2009 2009 except KeyError:
2010 2010 try:
2011 2011 engine = util.compengines.forrevlogheader(t)
2012 2012 compressor = engine.revlogcompressor()
2013 2013 self._decompressors[t] = compressor
2014 2014 except KeyError:
2015 2015 raise RevlogError(_('unknown compression type %r') % t)
2016 2016
2017 2017 return compressor.decompress(data)
2018 2018
2019 2019 def _isgooddeltainfo(self, d, textlen):
2020 2020 """Returns True if the given delta is good. Good means that it is within
2021 2021 the disk span, disk size, and chain length bounds that we know to be
2022 2022 performant."""
2023 2023 if d is None:
2024 2024 return False
2025 2025
2026 2026 # - 'd.distance' is the distance from the base revision -- bounding it
2027 2027 # limits the amount of I/O we need to do.
2028 2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2029 2029 # need to apply -- bounding it limits the amount of CPU we consume.
2030 2030
2031 2031 defaultmax = textlen * 4
2032 2032 maxdist = self._maxdeltachainspan
2033 2033 if not maxdist:
2034 2034 maxdist = d.distance # ensure the conditional pass
2035 2035 maxdist = max(maxdist, defaultmax)
2036 2036 if (d.distance > maxdist or d.deltalen > textlen or
2037 2037 d.compresseddeltalen > textlen * 2 or
2038 2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2039 2039 return False
2040 2040
2041 2041 return True
2042 2042
2043 2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2044 2044 cachedelta, ifh, dfh, alwayscache=False,
2045 2045 deltacomputer=None):
2046 2046 """internal function to add revisions to the log
2047 2047
2048 2048 see addrevision for argument descriptions.
2049 2049
2050 2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2051 2051
2052 2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2053 2053 be used.
2054 2054
2055 2055 invariants:
2056 2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2057 2057 if both are set, they must correspond to each other.
2058 2058 """
2059 2059 if node == nullid:
2060 2060 raise RevlogError(_("%s: attempt to add null revision") %
2061 2061 (self.indexfile))
2062 2062 if node == wdirid:
2063 2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2064 2064 (self.indexfile))
2065 2065
2066 2066 if self._inline:
2067 2067 fh = ifh
2068 2068 else:
2069 2069 fh = dfh
2070 2070
2071 2071 btext = [rawtext]
2072 2072
2073 2073 curr = len(self)
2074 2074 prev = curr - 1
2075 2075 offset = self.end(prev)
2076 2076 p1r, p2r = self.rev(p1), self.rev(p2)
2077 2077
2078 2078 # full versions are inserted when the needed deltas
2079 2079 # become comparable to the uncompressed text
2080 2080 if rawtext is None:
2081 2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2082 2082 cachedelta[1])
2083 2083 else:
2084 2084 textlen = len(rawtext)
2085 2085
2086 2086 if deltacomputer is None:
2087 2087 deltacomputer = _deltacomputer(self)
2088 2088
2089 2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2090 2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2091 2091
2092 2092 if deltainfo is not None:
2093 2093 base = deltainfo.base
2094 2094 chainbase = deltainfo.chainbase
2095 2095 data = deltainfo.data
2096 2096 l = deltainfo.deltalen
2097 2097 else:
2098 2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2099 2099 data = self.compress(rawtext)
2100 2100 l = len(data[1]) + len(data[0])
2101 2101 base = chainbase = curr
2102 2102
2103 2103 e = (offset_type(offset, flags), l, textlen,
2104 2104 base, link, p1r, p2r, node)
2105 2105 self.index.insert(-1, e)
2106 2106 self.nodemap[node] = curr
2107 2107
2108 2108 entry = self._io.packentry(e, self.node, self.version, curr)
2109 2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2110 2110
2111 2111 if alwayscache and rawtext is None:
2112 2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2113 2113
2114 2114 if type(rawtext) == bytes: # only accept immutable objects
2115 2115 self._cache = (node, curr, rawtext)
2116 2116 self._chainbasecache[curr] = chainbase
2117 2117 return node
2118 2118
2119 2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2120 2120 # Files opened in a+ mode have inconsistent behavior on various
2121 2121 # platforms. Windows requires that a file positioning call be made
2122 2122 # when the file handle transitions between reads and writes. See
2123 2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2124 2124 # platforms, Python or the platform itself can be buggy. Some versions
2125 2125 # of Solaris have been observed to not append at the end of the file
2126 2126 # if the file was seeked to before the end. See issue4943 for more.
2127 2127 #
2128 2128 # We work around this issue by inserting a seek() before writing.
2129 2129 # Note: This is likely not necessary on Python 3.
2130 2130 ifh.seek(0, os.SEEK_END)
2131 2131 if dfh:
2132 2132 dfh.seek(0, os.SEEK_END)
2133 2133
2134 2134 curr = len(self) - 1
2135 2135 if not self._inline:
2136 2136 transaction.add(self.datafile, offset)
2137 2137 transaction.add(self.indexfile, curr * len(entry))
2138 2138 if data[0]:
2139 2139 dfh.write(data[0])
2140 2140 dfh.write(data[1])
2141 2141 ifh.write(entry)
2142 2142 else:
2143 2143 offset += curr * self._io.size
2144 2144 transaction.add(self.indexfile, offset, curr)
2145 2145 ifh.write(entry)
2146 2146 ifh.write(data[0])
2147 2147 ifh.write(data[1])
2148 self.checkinlinesize(transaction, ifh)
2148 self._enforceinlinesize(transaction, ifh)
2149 2149
2150 2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2151 2151 """
2152 2152 add a delta group
2153 2153
2154 2154 given a set of deltas, add them to the revision log. the
2155 2155 first delta is against its parent, which should be in our
2156 2156 log, the rest are against the previous delta.
2157 2157
2158 2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2159 2159 this revlog and the node that was added.
2160 2160 """
2161 2161
2162 2162 nodes = []
2163 2163
2164 2164 r = len(self)
2165 2165 end = 0
2166 2166 if r:
2167 2167 end = self.end(r - 1)
2168 2168 ifh = self._indexfp("a+")
2169 2169 isize = r * self._io.size
2170 2170 if self._inline:
2171 2171 transaction.add(self.indexfile, end + isize, r)
2172 2172 dfh = None
2173 2173 else:
2174 2174 transaction.add(self.indexfile, isize, r)
2175 2175 transaction.add(self.datafile, end)
2176 2176 dfh = self._datafp("a+")
2177 2177 def flush():
2178 2178 if dfh:
2179 2179 dfh.flush()
2180 2180 ifh.flush()
2181 2181 try:
2182 2182 deltacomputer = _deltacomputer(self)
2183 2183 # loop through our set of deltas
2184 2184 for data in deltas:
2185 2185 node, p1, p2, linknode, deltabase, delta, flags = data
2186 2186 link = linkmapper(linknode)
2187 2187 flags = flags or REVIDX_DEFAULT_FLAGS
2188 2188
2189 2189 nodes.append(node)
2190 2190
2191 2191 if node in self.nodemap:
2192 2192 # this can happen if two branches make the same change
2193 2193 continue
2194 2194
2195 2195 for p in (p1, p2):
2196 2196 if p not in self.nodemap:
2197 2197 raise LookupError(p, self.indexfile,
2198 2198 _('unknown parent'))
2199 2199
2200 2200 if deltabase not in self.nodemap:
2201 2201 raise LookupError(deltabase, self.indexfile,
2202 2202 _('unknown delta base'))
2203 2203
2204 2204 baserev = self.rev(deltabase)
2205 2205
2206 2206 if baserev != nullrev and self.iscensored(baserev):
2207 2207 # if base is censored, delta must be full replacement in a
2208 2208 # single patch operation
2209 2209 hlen = struct.calcsize(">lll")
2210 2210 oldlen = self.rawsize(baserev)
2211 2211 newlen = len(delta) - hlen
2212 2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2213 2213 raise error.CensoredBaseError(self.indexfile,
2214 2214 self.node(baserev))
2215 2215
2216 2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2217 2217 flags |= REVIDX_ISCENSORED
2218 2218
2219 2219 # We assume consumers of addrevisioncb will want to retrieve
2220 2220 # the added revision, which will require a call to
2221 2221 # revision(). revision() will fast path if there is a cache
2222 2222 # hit. So, we tell _addrevision() to always cache in this case.
2223 2223 # We're only using addgroup() in the context of changegroup
2224 2224 # generation so the revision data can always be handled as raw
2225 2225 # by the flagprocessor.
2226 2226 self._addrevision(node, None, transaction, link,
2227 2227 p1, p2, flags, (baserev, delta),
2228 2228 ifh, dfh,
2229 2229 alwayscache=bool(addrevisioncb),
2230 2230 deltacomputer=deltacomputer)
2231 2231
2232 2232 if addrevisioncb:
2233 2233 addrevisioncb(self, node)
2234 2234
2235 2235 if not dfh and not self._inline:
2236 2236 # addrevision switched from inline to conventional
2237 2237 # reopen the index
2238 2238 ifh.close()
2239 2239 dfh = self._datafp("a+")
2240 2240 ifh = self._indexfp("a+")
2241 2241 finally:
2242 2242 if dfh:
2243 2243 dfh.close()
2244 2244 ifh.close()
2245 2245
2246 2246 return nodes
2247 2247
2248 2248 def iscensored(self, rev):
2249 2249 """Check if a file revision is censored."""
2250 2250 return False
2251 2251
2252 2252 def _peek_iscensored(self, baserev, delta, flush):
2253 2253 """Quickly check if a delta produces a censored revision."""
2254 2254 return False
2255 2255
2256 2256 def getstrippoint(self, minlink):
2257 2257 """find the minimum rev that must be stripped to strip the linkrev
2258 2258
2259 2259 Returns a tuple containing the minimum rev and a set of all revs that
2260 2260 have linkrevs that will be broken by this strip.
2261 2261 """
2262 2262 brokenrevs = set()
2263 2263 strippoint = len(self)
2264 2264
2265 2265 heads = {}
2266 2266 futurelargelinkrevs = set()
2267 2267 for head in self.headrevs():
2268 2268 headlinkrev = self.linkrev(head)
2269 2269 heads[head] = headlinkrev
2270 2270 if headlinkrev >= minlink:
2271 2271 futurelargelinkrevs.add(headlinkrev)
2272 2272
2273 2273 # This algorithm involves walking down the rev graph, starting at the
2274 2274 # heads. Since the revs are topologically sorted according to linkrev,
2275 2275 # once all head linkrevs are below the minlink, we know there are
2276 2276 # no more revs that could have a linkrev greater than minlink.
2277 2277 # So we can stop walking.
2278 2278 while futurelargelinkrevs:
2279 2279 strippoint -= 1
2280 2280 linkrev = heads.pop(strippoint)
2281 2281
2282 2282 if linkrev < minlink:
2283 2283 brokenrevs.add(strippoint)
2284 2284 else:
2285 2285 futurelargelinkrevs.remove(linkrev)
2286 2286
2287 2287 for p in self.parentrevs(strippoint):
2288 2288 if p != nullrev:
2289 2289 plinkrev = self.linkrev(p)
2290 2290 heads[p] = plinkrev
2291 2291 if plinkrev >= minlink:
2292 2292 futurelargelinkrevs.add(plinkrev)
2293 2293
2294 2294 return strippoint, brokenrevs
2295 2295
2296 2296 def strip(self, minlink, transaction):
2297 2297 """truncate the revlog on the first revision with a linkrev >= minlink
2298 2298
2299 2299 This function is called when we're stripping revision minlink and
2300 2300 its descendants from the repository.
2301 2301
2302 2302 We have to remove all revisions with linkrev >= minlink, because
2303 2303 the equivalent changelog revisions will be renumbered after the
2304 2304 strip.
2305 2305
2306 2306 So we truncate the revlog on the first of these revisions, and
2307 2307 trust that the caller has saved the revisions that shouldn't be
2308 2308 removed and that it'll re-add them after this truncation.
2309 2309 """
2310 2310 if len(self) == 0:
2311 2311 return
2312 2312
2313 2313 rev, _ = self.getstrippoint(minlink)
2314 2314 if rev == len(self):
2315 2315 return
2316 2316
2317 2317 # first truncate the files on disk
2318 2318 end = self.start(rev)
2319 2319 if not self._inline:
2320 2320 transaction.add(self.datafile, end)
2321 2321 end = rev * self._io.size
2322 2322 else:
2323 2323 end += rev * self._io.size
2324 2324
2325 2325 transaction.add(self.indexfile, end)
2326 2326
2327 2327 # then reset internal state in memory to forget those revisions
2328 2328 self._cache = None
2329 2329 self._chaininfocache = {}
2330 2330 self._chunkclear()
2331 2331 for x in xrange(rev, len(self)):
2332 2332 del self.nodemap[self.node(x)]
2333 2333
2334 2334 del self.index[rev:-1]
2335 2335
2336 2336 def checksize(self):
2337 2337 expected = 0
2338 2338 if len(self):
2339 2339 expected = max(0, self.end(len(self) - 1))
2340 2340
2341 2341 try:
2342 2342 with self._datafp() as f:
2343 2343 f.seek(0, 2)
2344 2344 actual = f.tell()
2345 2345 dd = actual - expected
2346 2346 except IOError as inst:
2347 2347 if inst.errno != errno.ENOENT:
2348 2348 raise
2349 2349 dd = 0
2350 2350
2351 2351 try:
2352 2352 f = self.opener(self.indexfile)
2353 2353 f.seek(0, 2)
2354 2354 actual = f.tell()
2355 2355 f.close()
2356 2356 s = self._io.size
2357 2357 i = max(0, actual // s)
2358 2358 di = actual - (i * s)
2359 2359 if self._inline:
2360 2360 databytes = 0
2361 2361 for r in self:
2362 2362 databytes += max(0, self.length(r))
2363 2363 dd = 0
2364 2364 di = actual - len(self) * s - databytes
2365 2365 except IOError as inst:
2366 2366 if inst.errno != errno.ENOENT:
2367 2367 raise
2368 2368 di = 0
2369 2369
2370 2370 return (dd, di)
2371 2371
2372 2372 def files(self):
2373 2373 res = [self.indexfile]
2374 2374 if not self._inline:
2375 2375 res.append(self.datafile)
2376 2376 return res
2377 2377
2378 2378 DELTAREUSEALWAYS = 'always'
2379 2379 DELTAREUSESAMEREVS = 'samerevs'
2380 2380 DELTAREUSENEVER = 'never'
2381 2381
2382 2382 DELTAREUSEFULLADD = 'fulladd'
2383 2383
2384 2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385 2385
2386 2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2388 2388 """Copy this revlog to another, possibly with format changes.
2389 2389
2390 2390 The destination revlog will contain the same revisions and nodes.
2391 2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 2392 differences.
2393 2393
2394 2394 The ``deltareuse`` argument control how deltas from the existing revlog
2395 2395 are preserved in the destination revlog. The argument can have the
2396 2396 following values:
2397 2397
2398 2398 DELTAREUSEALWAYS
2399 2399 Deltas will always be reused (if possible), even if the destination
2400 2400 revlog would not select the same revisions for the delta. This is the
2401 2401 fastest mode of operation.
2402 2402 DELTAREUSESAMEREVS
2403 2403 Deltas will be reused if the destination revlog would pick the same
2404 2404 revisions for the delta. This mode strikes a balance between speed
2405 2405 and optimization.
2406 2406 DELTAREUSENEVER
2407 2407 Deltas will never be reused. This is the slowest mode of execution.
2408 2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 2409 algorithm changes).
2410 2410
2411 2411 Delta computation can be slow, so the choice of delta reuse policy can
2412 2412 significantly affect run time.
2413 2413
2414 2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 2415 two extremes. Deltas will be reused if they are appropriate. But if the
2416 2416 delta could choose a better revision, it will do so. This means if you
2417 2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 2418 deltas will be recomputed if the delta's parent isn't a parent of the
2419 2419 revision.
2420 2420
2421 2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2422 2422 controls whether to compute deltas against both parents for merges.
2423 2423 By default, the current default is used.
2424 2424 """
2425 2425 if deltareuse not in self.DELTAREUSEALL:
2426 2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427 2427
2428 2428 if len(destrevlog):
2429 2429 raise ValueError(_('destination revlog is not empty'))
2430 2430
2431 2431 if getattr(self, 'filteredrevs', None):
2432 2432 raise ValueError(_('source revlog has filtered revisions'))
2433 2433 if getattr(destrevlog, 'filteredrevs', None):
2434 2434 raise ValueError(_('destination revlog has filtered revisions'))
2435 2435
2436 2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2437 2437 oldlazydeltabase = destrevlog._lazydeltabase
2438 2438 oldamd = destrevlog._aggressivemergedeltas
2439 2439
2440 2440 try:
2441 2441 if deltareuse == self.DELTAREUSEALWAYS:
2442 2442 destrevlog._lazydeltabase = True
2443 2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2444 2444 destrevlog._lazydeltabase = False
2445 2445
2446 2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2447 2447
2448 2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2449 2449 self.DELTAREUSESAMEREVS)
2450 2450
2451 2451 deltacomputer = _deltacomputer(destrevlog)
2452 2452 index = self.index
2453 2453 for rev in self:
2454 2454 entry = index[rev]
2455 2455
2456 2456 # Some classes override linkrev to take filtered revs into
2457 2457 # account. Use raw entry from index.
2458 2458 flags = entry[0] & 0xffff
2459 2459 linkrev = entry[4]
2460 2460 p1 = index[entry[5]][7]
2461 2461 p2 = index[entry[6]][7]
2462 2462 node = entry[7]
2463 2463
2464 2464 # (Possibly) reuse the delta from the revlog if allowed and
2465 2465 # the revlog chunk is a delta.
2466 2466 cachedelta = None
2467 2467 rawtext = None
2468 2468 if populatecachedelta:
2469 2469 dp = self.deltaparent(rev)
2470 2470 if dp != nullrev:
2471 2471 cachedelta = (dp, str(self._chunk(rev)))
2472 2472
2473 2473 if not cachedelta:
2474 2474 rawtext = self.revision(rev, raw=True)
2475 2475
2476 2476
2477 2477 if deltareuse == self.DELTAREUSEFULLADD:
2478 2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 2479 cachedelta=cachedelta,
2480 2480 node=node, flags=flags,
2481 2481 deltacomputer=deltacomputer)
2482 2482 else:
2483 2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 2484 checkambig=False)
2485 2485 dfh = None
2486 2486 if not destrevlog._inline:
2487 2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 2488 try:
2489 2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 2490 p2, flags, cachedelta, ifh, dfh,
2491 2491 deltacomputer=deltacomputer)
2492 2492 finally:
2493 2493 if dfh:
2494 2494 dfh.close()
2495 2495 ifh.close()
2496 2496
2497 2497 if addrevisioncb:
2498 2498 addrevisioncb(self, rev, node)
2499 2499 finally:
2500 2500 destrevlog._lazydeltabase = oldlazydeltabase
2501 2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now