##// END OF EJS Templates
changelog: make sure datafile is 00changelog.d (API)...
Jun Wu -
r32307:3caec778 default
parent child Browse files
Show More
@@ -1,541 +1,543 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 bin,
15 15 hex,
16 16 nullid,
17 17 )
18 18
19 19 from . import (
20 20 encoding,
21 21 error,
22 22 revlog,
23 23 util,
24 24 )
25 25
26 26 _defaultextra = {'branch': 'default'}
27 27
28 28 def _string_escape(text):
29 29 """
30 30 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
31 31 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
32 32 >>> s
33 33 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
34 34 >>> res = _string_escape(s)
35 35 >>> s == util.unescapestr(res)
36 36 True
37 37 """
38 38 # subset of the string_escape codec
39 39 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
40 40 return text.replace('\0', '\\0')
41 41
42 42 def decodeextra(text):
43 43 """
44 44 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
45 45 ... ).iteritems())
46 46 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
47 47 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
48 48 ... 'baz': chr(92) + chr(0) + '2'})
49 49 ... ).iteritems())
50 50 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
51 51 """
52 52 extra = _defaultextra.copy()
53 53 for l in text.split('\0'):
54 54 if l:
55 55 if '\\0' in l:
56 56 # fix up \0 without getting into trouble with \\0
57 57 l = l.replace('\\\\', '\\\\\n')
58 58 l = l.replace('\\0', '\0')
59 59 l = l.replace('\n', '')
60 60 k, v = util.unescapestr(l).split(':', 1)
61 61 extra[k] = v
62 62 return extra
63 63
64 64 def encodeextra(d):
65 65 # keys must be sorted to produce a deterministic changelog entry
66 66 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
67 67 return "\0".join(items)
68 68
69 69 def stripdesc(desc):
70 70 """strip trailing whitespace and leading and trailing empty lines"""
71 71 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
72 72
73 73 class appender(object):
74 74 '''the changelog index must be updated last on disk, so we use this class
75 75 to delay writes to it'''
76 76 def __init__(self, vfs, name, mode, buf):
77 77 self.data = buf
78 78 fp = vfs(name, mode)
79 79 self.fp = fp
80 80 self.offset = fp.tell()
81 81 self.size = vfs.fstat(fp).st_size
82 82 self._end = self.size
83 83
84 84 def end(self):
85 85 return self._end
86 86 def tell(self):
87 87 return self.offset
88 88 def flush(self):
89 89 pass
90 90 def close(self):
91 91 self.fp.close()
92 92
93 93 def seek(self, offset, whence=0):
94 94 '''virtual file offset spans real file and data'''
95 95 if whence == 0:
96 96 self.offset = offset
97 97 elif whence == 1:
98 98 self.offset += offset
99 99 elif whence == 2:
100 100 self.offset = self.end() + offset
101 101 if self.offset < self.size:
102 102 self.fp.seek(self.offset)
103 103
104 104 def read(self, count=-1):
105 105 '''only trick here is reads that span real file and data'''
106 106 ret = ""
107 107 if self.offset < self.size:
108 108 s = self.fp.read(count)
109 109 ret = s
110 110 self.offset += len(s)
111 111 if count > 0:
112 112 count -= len(s)
113 113 if count != 0:
114 114 doff = self.offset - self.size
115 115 self.data.insert(0, "".join(self.data))
116 116 del self.data[1:]
117 117 s = self.data[0][doff:doff + count]
118 118 self.offset += len(s)
119 119 ret += s
120 120 return ret
121 121
122 122 def write(self, s):
123 123 self.data.append(bytes(s))
124 124 self.offset += len(s)
125 125 self._end += len(s)
126 126
127 127 def _divertopener(opener, target):
128 128 """build an opener that writes in 'target.a' instead of 'target'"""
129 129 def _divert(name, mode='r', checkambig=False):
130 130 if name != target:
131 131 return opener(name, mode)
132 132 return opener(name + ".a", mode)
133 133 return _divert
134 134
135 135 def _delayopener(opener, target, buf):
136 136 """build an opener that stores chunks in 'buf' instead of 'target'"""
137 137 def _delay(name, mode='r', checkambig=False):
138 138 if name != target:
139 139 return opener(name, mode)
140 140 return appender(opener, name, mode, buf)
141 141 return _delay
142 142
143 143 _changelogrevision = collections.namedtuple(u'changelogrevision',
144 144 (u'manifest', u'user', u'date',
145 145 u'files', u'description',
146 146 u'extra'))
147 147
148 148 class changelogrevision(object):
149 149 """Holds results of a parsed changelog revision.
150 150
151 151 Changelog revisions consist of multiple pieces of data, including
152 152 the manifest node, user, and date. This object exposes a view into
153 153 the parsed object.
154 154 """
155 155
156 156 __slots__ = (
157 157 u'_offsets',
158 158 u'_text',
159 159 )
160 160
161 161 def __new__(cls, text):
162 162 if not text:
163 163 return _changelogrevision(
164 164 manifest=nullid,
165 165 user='',
166 166 date=(0, 0),
167 167 files=[],
168 168 description='',
169 169 extra=_defaultextra,
170 170 )
171 171
172 172 self = super(changelogrevision, cls).__new__(cls)
173 173 # We could return here and implement the following as an __init__.
174 174 # But doing it here is equivalent and saves an extra function call.
175 175
176 176 # format used:
177 177 # nodeid\n : manifest node in ascii
178 178 # user\n : user, no \n or \r allowed
179 179 # time tz extra\n : date (time is int or float, timezone is int)
180 180 # : extra is metadata, encoded and separated by '\0'
181 181 # : older versions ignore it
182 182 # files\n\n : files modified by the cset, no \n or \r allowed
183 183 # (.*) : comment (free text, ideally utf-8)
184 184 #
185 185 # changelog v0 doesn't use extra
186 186
187 187 nl1 = text.index('\n')
188 188 nl2 = text.index('\n', nl1 + 1)
189 189 nl3 = text.index('\n', nl2 + 1)
190 190
191 191 # The list of files may be empty. Which means nl3 is the first of the
192 192 # double newline that precedes the description.
193 193 if text[nl3 + 1:nl3 + 2] == '\n':
194 194 doublenl = nl3
195 195 else:
196 196 doublenl = text.index('\n\n', nl3 + 1)
197 197
198 198 self._offsets = (nl1, nl2, nl3, doublenl)
199 199 self._text = text
200 200
201 201 return self
202 202
203 203 @property
204 204 def manifest(self):
205 205 return bin(self._text[0:self._offsets[0]])
206 206
207 207 @property
208 208 def user(self):
209 209 off = self._offsets
210 210 return encoding.tolocal(self._text[off[0] + 1:off[1]])
211 211
212 212 @property
213 213 def _rawdate(self):
214 214 off = self._offsets
215 215 dateextra = self._text[off[1] + 1:off[2]]
216 216 return dateextra.split(' ', 2)[0:2]
217 217
218 218 @property
219 219 def _rawextra(self):
220 220 off = self._offsets
221 221 dateextra = self._text[off[1] + 1:off[2]]
222 222 fields = dateextra.split(' ', 2)
223 223 if len(fields) != 3:
224 224 return None
225 225
226 226 return fields[2]
227 227
228 228 @property
229 229 def date(self):
230 230 raw = self._rawdate
231 231 time = float(raw[0])
232 232 # Various tools did silly things with the timezone.
233 233 try:
234 234 timezone = int(raw[1])
235 235 except ValueError:
236 236 timezone = 0
237 237
238 238 return time, timezone
239 239
240 240 @property
241 241 def extra(self):
242 242 raw = self._rawextra
243 243 if raw is None:
244 244 return _defaultextra
245 245
246 246 return decodeextra(raw)
247 247
248 248 @property
249 249 def files(self):
250 250 off = self._offsets
251 251 if off[2] == off[3]:
252 252 return []
253 253
254 254 return self._text[off[2] + 1:off[3]].split('\n')
255 255
256 256 @property
257 257 def description(self):
258 258 return encoding.tolocal(self._text[self._offsets[3] + 2:])
259 259
260 260 class changelog(revlog.revlog):
261 261 def __init__(self, opener, trypending=False):
262 262 """Load a changelog revlog using an opener.
263 263
264 264 If ``trypending`` is true, we attempt to load the index from a
265 265 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
266 266 The ``00changelog.i.a`` file contains index (and possibly inline
267 267 revision) data for a transaction that hasn't been finalized yet.
268 268 It exists in a separate file to facilitate readers (such as
269 269 hooks processes) accessing data before a transaction is finalized.
270 270 """
271 271 if trypending and opener.exists('00changelog.i.a'):
272 272 indexfile = '00changelog.i.a'
273 273 else:
274 274 indexfile = '00changelog.i'
275 275
276 revlog.revlog.__init__(self, opener, indexfile, checkambig=True)
276 datafile = '00changelog.d'
277 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
278 checkambig=True)
277 279
278 280 if self._initempty:
279 281 # changelogs don't benefit from generaldelta
280 282 self.version &= ~revlog.REVLOGGENERALDELTA
281 283 self._generaldelta = False
282 284
283 285 # Delta chains for changelogs tend to be very small because entries
284 286 # tend to be small and don't delta well with each. So disable delta
285 287 # chains.
286 288 self.storedeltachains = False
287 289
288 290 self._realopener = opener
289 291 self._delayed = False
290 292 self._delaybuf = None
291 293 self._divert = False
292 294 self.filteredrevs = frozenset()
293 295
294 296 def tip(self):
295 297 """filtered version of revlog.tip"""
296 298 for i in xrange(len(self) -1, -2, -1):
297 299 if i not in self.filteredrevs:
298 300 return self.node(i)
299 301
300 302 def __contains__(self, rev):
301 303 """filtered version of revlog.__contains__"""
302 304 return (0 <= rev < len(self)
303 305 and rev not in self.filteredrevs)
304 306
305 307 def __iter__(self):
306 308 """filtered version of revlog.__iter__"""
307 309 if len(self.filteredrevs) == 0:
308 310 return revlog.revlog.__iter__(self)
309 311
310 312 def filterediter():
311 313 for i in xrange(len(self)):
312 314 if i not in self.filteredrevs:
313 315 yield i
314 316
315 317 return filterediter()
316 318
317 319 def revs(self, start=0, stop=None):
318 320 """filtered version of revlog.revs"""
319 321 for i in super(changelog, self).revs(start, stop):
320 322 if i not in self.filteredrevs:
321 323 yield i
322 324
323 325 @util.propertycache
324 326 def nodemap(self):
325 327 # XXX need filtering too
326 328 self.rev(self.node(0))
327 329 return self._nodecache
328 330
329 331 def reachableroots(self, minroot, heads, roots, includepath=False):
330 332 return self.index.reachableroots2(minroot, heads, roots, includepath)
331 333
332 334 def headrevs(self):
333 335 if self.filteredrevs:
334 336 try:
335 337 return self.index.headrevsfiltered(self.filteredrevs)
336 338 # AttributeError covers non-c-extension environments and
337 339 # old c extensions without filter handling.
338 340 except AttributeError:
339 341 return self._headrevs()
340 342
341 343 return super(changelog, self).headrevs()
342 344
343 345 def strip(self, *args, **kwargs):
344 346 # XXX make something better than assert
345 347 # We can't expect proper strip behavior if we are filtered.
346 348 assert not self.filteredrevs
347 349 super(changelog, self).strip(*args, **kwargs)
348 350
349 351 def rev(self, node):
350 352 """filtered version of revlog.rev"""
351 353 r = super(changelog, self).rev(node)
352 354 if r in self.filteredrevs:
353 355 raise error.FilteredLookupError(hex(node), self.indexfile,
354 356 _('filtered node'))
355 357 return r
356 358
357 359 def node(self, rev):
358 360 """filtered version of revlog.node"""
359 361 if rev in self.filteredrevs:
360 362 raise error.FilteredIndexError(rev)
361 363 return super(changelog, self).node(rev)
362 364
363 365 def linkrev(self, rev):
364 366 """filtered version of revlog.linkrev"""
365 367 if rev in self.filteredrevs:
366 368 raise error.FilteredIndexError(rev)
367 369 return super(changelog, self).linkrev(rev)
368 370
369 371 def parentrevs(self, rev):
370 372 """filtered version of revlog.parentrevs"""
371 373 if rev in self.filteredrevs:
372 374 raise error.FilteredIndexError(rev)
373 375 return super(changelog, self).parentrevs(rev)
374 376
375 377 def flags(self, rev):
376 378 """filtered version of revlog.flags"""
377 379 if rev in self.filteredrevs:
378 380 raise error.FilteredIndexError(rev)
379 381 return super(changelog, self).flags(rev)
380 382
381 383 def delayupdate(self, tr):
382 384 "delay visibility of index updates to other readers"
383 385
384 386 if not self._delayed:
385 387 if len(self) == 0:
386 388 self._divert = True
387 389 if self._realopener.exists(self.indexfile + '.a'):
388 390 self._realopener.unlink(self.indexfile + '.a')
389 391 self.opener = _divertopener(self._realopener, self.indexfile)
390 392 else:
391 393 self._delaybuf = []
392 394 self.opener = _delayopener(self._realopener, self.indexfile,
393 395 self._delaybuf)
394 396 self._delayed = True
395 397 tr.addpending('cl-%i' % id(self), self._writepending)
396 398 tr.addfinalize('cl-%i' % id(self), self._finalize)
397 399
398 400 def _finalize(self, tr):
399 401 "finalize index updates"
400 402 self._delayed = False
401 403 self.opener = self._realopener
402 404 # move redirected index data back into place
403 405 if self._divert:
404 406 assert not self._delaybuf
405 407 tmpname = self.indexfile + ".a"
406 408 nfile = self.opener.open(tmpname)
407 409 nfile.close()
408 410 self.opener.rename(tmpname, self.indexfile, checkambig=True)
409 411 elif self._delaybuf:
410 412 fp = self.opener(self.indexfile, 'a', checkambig=True)
411 413 fp.write("".join(self._delaybuf))
412 414 fp.close()
413 415 self._delaybuf = None
414 416 self._divert = False
415 417 # split when we're done
416 418 self.checkinlinesize(tr)
417 419
418 420 def _writepending(self, tr):
419 421 "create a file containing the unfinalized state for pretxnchangegroup"
420 422 if self._delaybuf:
421 423 # make a temporary copy of the index
422 424 fp1 = self._realopener(self.indexfile)
423 425 pendingfilename = self.indexfile + ".a"
424 426 # register as a temp file to ensure cleanup on failure
425 427 tr.registertmp(pendingfilename)
426 428 # write existing data
427 429 fp2 = self._realopener(pendingfilename, "w")
428 430 fp2.write(fp1.read())
429 431 # add pending data
430 432 fp2.write("".join(self._delaybuf))
431 433 fp2.close()
432 434 # switch modes so finalize can simply rename
433 435 self._delaybuf = None
434 436 self._divert = True
435 437 self.opener = _divertopener(self._realopener, self.indexfile)
436 438
437 439 if self._divert:
438 440 return True
439 441
440 442 return False
441 443
442 444 def checkinlinesize(self, tr, fp=None):
443 445 if not self._delayed:
444 446 revlog.revlog.checkinlinesize(self, tr, fp)
445 447
446 448 def read(self, node):
447 449 """Obtain data from a parsed changelog revision.
448 450
449 451 Returns a 6-tuple of:
450 452
451 453 - manifest node in binary
452 454 - author/user as a localstr
453 455 - date as a 2-tuple of (time, timezone)
454 456 - list of files
455 457 - commit message as a localstr
456 458 - dict of extra metadata
457 459
458 460 Unless you need to access all fields, consider calling
459 461 ``changelogrevision`` instead, as it is faster for partial object
460 462 access.
461 463 """
462 464 c = changelogrevision(self.revision(node))
463 465 return (
464 466 c.manifest,
465 467 c.user,
466 468 c.date,
467 469 c.files,
468 470 c.description,
469 471 c.extra
470 472 )
471 473
472 474 def changelogrevision(self, nodeorrev):
473 475 """Obtain a ``changelogrevision`` for a node or revision."""
474 476 return changelogrevision(self.revision(nodeorrev))
475 477
476 478 def readfiles(self, node):
477 479 """
478 480 short version of read that only returns the files modified by the cset
479 481 """
480 482 text = self.revision(node)
481 483 if not text:
482 484 return []
483 485 last = text.index("\n\n")
484 486 l = text[:last].split('\n')
485 487 return l[3:]
486 488
487 489 def add(self, manifest, files, desc, transaction, p1, p2,
488 490 user, date=None, extra=None):
489 491 # Convert to UTF-8 encoded bytestrings as the very first
490 492 # thing: calling any method on a localstr object will turn it
491 493 # into a str object and the cached UTF-8 string is thus lost.
492 494 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
493 495
494 496 user = user.strip()
495 497 # An empty username or a username with a "\n" will make the
496 498 # revision text contain two "\n\n" sequences -> corrupt
497 499 # repository since read cannot unpack the revision.
498 500 if not user:
499 501 raise error.RevlogError(_("empty username"))
500 502 if "\n" in user:
501 503 raise error.RevlogError(_("username %s contains a newline")
502 504 % repr(user))
503 505
504 506 desc = stripdesc(desc)
505 507
506 508 if date:
507 509 parseddate = "%d %d" % util.parsedate(date)
508 510 else:
509 511 parseddate = "%d %d" % util.makedate()
510 512 if extra:
511 513 branch = extra.get("branch")
512 514 if branch in ("default", ""):
513 515 del extra["branch"]
514 516 elif branch in (".", "null", "tip"):
515 517 raise error.RevlogError(_('the name \'%s\' is reserved')
516 518 % branch)
517 519 if extra:
518 520 extra = encodeextra(extra)
519 521 parseddate = "%s %s" % (parseddate, extra)
520 522 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
521 523 text = "\n".join(l)
522 524 return self.addrevision(text, transaction, len(self), p1, p2)
523 525
524 526 def branchinfo(self, rev):
525 527 """return the branch name and open/close state of a revision
526 528
527 529 This function exists because creating a changectx object
528 530 just to access this is costly."""
529 531 extra = self.read(rev)[5]
530 532 return encoding.tolocal(extra.get("branch")), 'close' in extra
531 533
532 534 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
533 535 # overlay over the standard revlog._addrevision to track the new
534 536 # revision on the transaction.
535 537 rev = len(self)
536 538 node = super(changelog, self)._addrevision(node, rawtext, transaction,
537 539 *args, **kwargs)
538 540 revs = transaction.changes.get('revs')
539 541 if revs is not None:
540 542 revs.add(rev)
541 543 return node
@@ -1,2150 +1,2150 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 pycompat,
37 37 templatefilters,
38 38 util,
39 39 )
40 40
41 41 _pack = struct.pack
42 42 _unpack = struct.unpack
43 43 # Aliased for performance.
44 44 _zlibdecompress = zlib.decompress
45 45
46 46 # revlog header flags
47 47 REVLOGV0 = 0
48 48 REVLOGNG = 1
49 49 REVLOGNGINLINEDATA = (1 << 16)
50 50 REVLOGGENERALDELTA = (1 << 17)
51 51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
52 52 REVLOG_DEFAULT_FORMAT = REVLOGNG
53 53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
54 54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
55 55
56 56 # revlog index flags
57 57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
58 58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
59 59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
60 60 REVIDX_DEFAULT_FLAGS = 0
61 61 # stable order in which flags need to be processed and their processors applied
62 62 REVIDX_FLAGS_ORDER = [
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 ]
67 67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
68 68
69 69 # max size of revlog with inline data
70 70 _maxinline = 131072
71 71 _chunksize = 1048576
72 72
73 73 RevlogError = error.RevlogError
74 74 LookupError = error.LookupError
75 75 CensoredNodeError = error.CensoredNodeError
76 76 ProgrammingError = error.ProgrammingError
77 77
78 78 # Store flag processors (cf. 'addflagprocessor()' to register)
79 79 _flagprocessors = {
80 80 REVIDX_ISCENSORED: None,
81 81 }
82 82
83 83 def addflagprocessor(flag, processor):
84 84 """Register a flag processor on a revision data flag.
85 85
86 86 Invariant:
87 87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
88 88 - Only one flag processor can be registered on a specific flag.
89 89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
90 90 following signatures:
91 91 - (read) f(self, rawtext) -> text, bool
92 92 - (write) f(self, text) -> rawtext, bool
93 93 - (raw) f(self, rawtext) -> bool
94 94 "text" is presented to the user. "rawtext" is stored in revlog data, not
95 95 directly visible to the user.
96 96 The boolean returned by these transforms is used to determine whether
97 97 the returned text can be used for hash integrity checking. For example,
98 98 if "write" returns False, then "text" is used to generate hash. If
99 99 "write" returns True, that basically means "rawtext" returned by "write"
100 100 should be used to generate hash. Usually, "write" and "read" return
101 101 different booleans. And "raw" returns a same boolean as "write".
102 102
103 103 Note: The 'raw' transform is used for changegroup generation and in some
104 104 debug commands. In this case the transform only indicates whether the
105 105 contents can be used for hash integrity checks.
106 106 """
107 107 if not flag & REVIDX_KNOWN_FLAGS:
108 108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
109 109 raise ProgrammingError(msg)
110 110 if flag not in REVIDX_FLAGS_ORDER:
111 111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
112 112 raise ProgrammingError(msg)
113 113 if flag in _flagprocessors:
114 114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
115 115 raise error.Abort(msg)
116 116 _flagprocessors[flag] = processor
117 117
118 118 def getoffset(q):
119 119 return int(q >> 16)
120 120
121 121 def gettype(q):
122 122 return int(q & 0xFFFF)
123 123
124 124 def offset_type(offset, type):
125 125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
126 126 raise ValueError('unknown revlog index flags')
127 127 return int(int(offset) << 16 | type)
128 128
129 129 _nullhash = hashlib.sha1(nullid)
130 130
131 131 def hash(text, p1, p2):
132 132 """generate a hash from the given text and its parent hashes
133 133
134 134 This hash combines both the current file contents and its history
135 135 in a manner that makes it easy to distinguish nodes with the same
136 136 content in the revision graph.
137 137 """
138 138 # As of now, if one of the parent node is null, p2 is null
139 139 if p2 == nullid:
140 140 # deep copy of a hash is faster than creating one
141 141 s = _nullhash.copy()
142 142 s.update(p1)
143 143 else:
144 144 # none of the parent nodes are nullid
145 145 l = [p1, p2]
146 146 l.sort()
147 147 s = hashlib.sha1(l[0])
148 148 s.update(l[1])
149 149 s.update(text)
150 150 return s.digest()
151 151
152 152 # index v0:
153 153 # 4 bytes: offset
154 154 # 4 bytes: compressed length
155 155 # 4 bytes: base rev
156 156 # 4 bytes: link rev
157 157 # 20 bytes: parent 1 nodeid
158 158 # 20 bytes: parent 2 nodeid
159 159 # 20 bytes: nodeid
160 160 indexformatv0 = ">4l20s20s20s"
161 161
162 162 class revlogoldio(object):
163 163 def __init__(self):
164 164 self.size = struct.calcsize(indexformatv0)
165 165
166 166 def parseindex(self, data, inline):
167 167 s = self.size
168 168 index = []
169 169 nodemap = {nullid: nullrev}
170 170 n = off = 0
171 171 l = len(data)
172 172 while off + s <= l:
173 173 cur = data[off:off + s]
174 174 off += s
175 175 e = _unpack(indexformatv0, cur)
176 176 # transform to revlogv1 format
177 177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
178 178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
179 179 index.append(e2)
180 180 nodemap[e[6]] = n
181 181 n += 1
182 182
183 183 # add the magic null revision at -1
184 184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
185 185
186 186 return index, nodemap, None
187 187
188 188 def packentry(self, entry, node, version, rev):
189 189 if gettype(entry[0]):
190 190 raise RevlogError(_("index entry flags need RevlogNG"))
191 191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
192 192 node(entry[5]), node(entry[6]), entry[7])
193 193 return _pack(indexformatv0, *e2)
194 194
195 195 # index ng:
196 196 # 6 bytes: offset
197 197 # 2 bytes: flags
198 198 # 4 bytes: compressed length
199 199 # 4 bytes: uncompressed length
200 200 # 4 bytes: base rev
201 201 # 4 bytes: link rev
202 202 # 4 bytes: parent 1 rev
203 203 # 4 bytes: parent 2 rev
204 204 # 32 bytes: nodeid
205 205 indexformatng = ">Qiiiiii20s12x"
206 206 versionformat = ">I"
207 207
208 208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
209 209 # signed integer)
210 210 _maxentrysize = 0x7fffffff
211 211
212 212 class revlogio(object):
213 213 def __init__(self):
214 214 self.size = struct.calcsize(indexformatng)
215 215
216 216 def parseindex(self, data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline)
219 219 return index, getattr(index, 'nodemap', None), cache
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 p = _pack(indexformatng, *entry)
223 223 if rev == 0:
224 224 p = _pack(versionformat, version) + p[4:]
225 225 return p
226 226
227 227 class revlog(object):
228 228 """
229 229 the underlying revision storage object
230 230
231 231 A revlog consists of two parts, an index and the revision data.
232 232
233 233 The index is a file with a fixed record size containing
234 234 information on each revision, including its nodeid (hash), the
235 235 nodeids of its parents, the position and offset of its data within
236 236 the data file, and the revision it's based on. Finally, each entry
237 237 contains a linkrev entry that can serve as a pointer to external
238 238 data.
239 239
240 240 The revision data itself is a linear collection of data chunks.
241 241 Each chunk represents a revision and is usually represented as a
242 242 delta against the previous chunk. To bound lookup time, runs of
243 243 deltas are limited to about 2 times the length of the original
244 244 version data. This makes retrieval of a version proportional to
245 245 its size, or O(1) relative to the number of revisions.
246 246
247 247 Both pieces of the revlog are written to in an append-only
248 248 fashion, which means we never need to rewrite a file to insert or
249 249 remove data, and can use some simple techniques to avoid the need
250 250 for locking while reading.
251 251
252 252 If checkambig, indexfile is opened with checkambig=True at
253 253 writing, to avoid file stat ambiguity.
254 254 """
255 def __init__(self, opener, indexfile, checkambig=False):
255 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
256 256 """
257 257 create a revlog object
258 258
259 259 opener is a function that abstracts the file opening operation
260 260 and can be used to implement COW semantics or the like.
261 261 """
262 262 self.indexfile = indexfile
263 self.datafile = indexfile[:-2] + ".d"
263 self.datafile = datafile or (indexfile[:-2] + ".d")
264 264 self.opener = opener
265 265 # When True, indexfile is opened with checkambig=True at writing, to
266 266 # avoid file stat ambiguity.
267 267 self._checkambig = checkambig
268 268 # 3-tuple of (node, rev, text) for a raw revision.
269 269 self._cache = None
270 270 # Maps rev to chain base rev.
271 271 self._chainbasecache = util.lrucachedict(100)
272 272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
273 273 self._chunkcache = (0, '')
274 274 # How much data to read and cache into the raw revlog data cache.
275 275 self._chunkcachesize = 65536
276 276 self._maxchainlen = None
277 277 self._aggressivemergedeltas = False
278 278 self.index = []
279 279 # Mapping of partial identifiers to full nodes.
280 280 self._pcache = {}
281 281 # Mapping of revision integer to full node.
282 282 self._nodecache = {nullid: nullrev}
283 283 self._nodepos = None
284 284 self._compengine = 'zlib'
285 285
286 286 v = REVLOG_DEFAULT_VERSION
287 287 opts = getattr(opener, 'options', None)
288 288 if opts is not None:
289 289 if 'revlogv1' in opts:
290 290 if 'generaldelta' in opts:
291 291 v |= REVLOGGENERALDELTA
292 292 else:
293 293 v = 0
294 294 if 'chunkcachesize' in opts:
295 295 self._chunkcachesize = opts['chunkcachesize']
296 296 if 'maxchainlen' in opts:
297 297 self._maxchainlen = opts['maxchainlen']
298 298 if 'aggressivemergedeltas' in opts:
299 299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
300 300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
301 301 if 'compengine' in opts:
302 302 self._compengine = opts['compengine']
303 303
304 304 if self._chunkcachesize <= 0:
305 305 raise RevlogError(_('revlog chunk cache size %r is not greater '
306 306 'than 0') % self._chunkcachesize)
307 307 elif self._chunkcachesize & (self._chunkcachesize - 1):
308 308 raise RevlogError(_('revlog chunk cache size %r is not a power '
309 309 'of 2') % self._chunkcachesize)
310 310
311 311 indexdata = ''
312 312 self._initempty = True
313 313 try:
314 314 f = self.opener(self.indexfile)
315 315 indexdata = f.read()
316 316 f.close()
317 317 if len(indexdata) > 0:
318 318 v = struct.unpack(versionformat, indexdata[:4])[0]
319 319 self._initempty = False
320 320 except IOError as inst:
321 321 if inst.errno != errno.ENOENT:
322 322 raise
323 323
324 324 self.version = v
325 325 self._inline = v & REVLOGNGINLINEDATA
326 326 self._generaldelta = v & REVLOGGENERALDELTA
327 327 flags = v & ~0xFFFF
328 328 fmt = v & 0xFFFF
329 329 if fmt == REVLOGV0 and flags:
330 330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
331 331 % (self.indexfile, flags >> 16))
332 332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
333 333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
334 334 % (self.indexfile, flags >> 16))
335 335 elif fmt > REVLOGNG:
336 336 raise RevlogError(_("index %s unknown format %d")
337 337 % (self.indexfile, fmt))
338 338
339 339 self.storedeltachains = True
340 340
341 341 self._io = revlogio()
342 342 if self.version == REVLOGV0:
343 343 self._io = revlogoldio()
344 344 try:
345 345 d = self._io.parseindex(indexdata, self._inline)
346 346 except (ValueError, IndexError):
347 347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
348 348 self.index, nodemap, self._chunkcache = d
349 349 if nodemap is not None:
350 350 self.nodemap = self._nodecache = nodemap
351 351 if not self._chunkcache:
352 352 self._chunkclear()
353 353 # revnum -> (chain-length, sum-delta-length)
354 354 self._chaininfocache = {}
355 355 # revlog header -> revlog compressor
356 356 self._decompressors = {}
357 357
358 358 @util.propertycache
359 359 def _compressor(self):
360 360 return util.compengines[self._compengine].revlogcompressor()
361 361
362 362 def tip(self):
363 363 return self.node(len(self.index) - 2)
364 364 def __contains__(self, rev):
365 365 return 0 <= rev < len(self)
366 366 def __len__(self):
367 367 return len(self.index) - 1
368 368 def __iter__(self):
369 369 return iter(xrange(len(self)))
370 370 def revs(self, start=0, stop=None):
371 371 """iterate over all rev in this revlog (from start to stop)"""
372 372 step = 1
373 373 if stop is not None:
374 374 if start > stop:
375 375 step = -1
376 376 stop += step
377 377 else:
378 378 stop = len(self)
379 379 return xrange(start, stop, step)
380 380
381 381 @util.propertycache
382 382 def nodemap(self):
383 383 self.rev(self.node(0))
384 384 return self._nodecache
385 385
386 386 def hasnode(self, node):
387 387 try:
388 388 self.rev(node)
389 389 return True
390 390 except KeyError:
391 391 return False
392 392
393 393 def clearcaches(self):
394 394 self._cache = None
395 395 self._chainbasecache.clear()
396 396 self._chunkcache = (0, '')
397 397 self._pcache = {}
398 398
399 399 try:
400 400 self._nodecache.clearcaches()
401 401 except AttributeError:
402 402 self._nodecache = {nullid: nullrev}
403 403 self._nodepos = None
404 404
405 405 def rev(self, node):
406 406 try:
407 407 return self._nodecache[node]
408 408 except TypeError:
409 409 raise
410 410 except RevlogError:
411 411 # parsers.c radix tree lookup failed
412 412 raise LookupError(node, self.indexfile, _('no node'))
413 413 except KeyError:
414 414 # pure python cache lookup failed
415 415 n = self._nodecache
416 416 i = self.index
417 417 p = self._nodepos
418 418 if p is None:
419 419 p = len(i) - 2
420 420 for r in xrange(p, -1, -1):
421 421 v = i[r][7]
422 422 n[v] = r
423 423 if v == node:
424 424 self._nodepos = r - 1
425 425 return r
426 426 raise LookupError(node, self.indexfile, _('no node'))
427 427
428 428 # Accessors for index entries.
429 429
430 430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
431 431 # are flags.
432 432 def start(self, rev):
433 433 return int(self.index[rev][0] >> 16)
434 434
435 435 def flags(self, rev):
436 436 return self.index[rev][0] & 0xFFFF
437 437
438 438 def length(self, rev):
439 439 return self.index[rev][1]
440 440
441 441 def rawsize(self, rev):
442 442 """return the length of the uncompressed text for a given revision"""
443 443 l = self.index[rev][2]
444 444 if l >= 0:
445 445 return l
446 446
447 447 t = self.revision(rev, raw=True)
448 448 return len(t)
449 449
450 450 def size(self, rev):
451 451 """length of non-raw text (processed by a "read" flag processor)"""
452 452 # fast path: if no "read" flag processor could change the content,
453 453 # size is rawsize. note: ELLIPSIS is known to not change the content.
454 454 flags = self.flags(rev)
455 455 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
456 456 return self.rawsize(rev)
457 457
458 458 return len(self.revision(rev, raw=False))
459 459
460 460 def chainbase(self, rev):
461 461 base = self._chainbasecache.get(rev)
462 462 if base is not None:
463 463 return base
464 464
465 465 index = self.index
466 466 base = index[rev][3]
467 467 while base != rev:
468 468 rev = base
469 469 base = index[rev][3]
470 470
471 471 self._chainbasecache[rev] = base
472 472 return base
473 473
474 474 def linkrev(self, rev):
475 475 return self.index[rev][4]
476 476
477 477 def parentrevs(self, rev):
478 478 return self.index[rev][5:7]
479 479
480 480 def node(self, rev):
481 481 return self.index[rev][7]
482 482
483 483 # Derived from index values.
484 484
485 485 def end(self, rev):
486 486 return self.start(rev) + self.length(rev)
487 487
488 488 def parents(self, node):
489 489 i = self.index
490 490 d = i[self.rev(node)]
491 491 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
492 492
493 493 def chainlen(self, rev):
494 494 return self._chaininfo(rev)[0]
495 495
496 496 def _chaininfo(self, rev):
497 497 chaininfocache = self._chaininfocache
498 498 if rev in chaininfocache:
499 499 return chaininfocache[rev]
500 500 index = self.index
501 501 generaldelta = self._generaldelta
502 502 iterrev = rev
503 503 e = index[iterrev]
504 504 clen = 0
505 505 compresseddeltalen = 0
506 506 while iterrev != e[3]:
507 507 clen += 1
508 508 compresseddeltalen += e[1]
509 509 if generaldelta:
510 510 iterrev = e[3]
511 511 else:
512 512 iterrev -= 1
513 513 if iterrev in chaininfocache:
514 514 t = chaininfocache[iterrev]
515 515 clen += t[0]
516 516 compresseddeltalen += t[1]
517 517 break
518 518 e = index[iterrev]
519 519 else:
520 520 # Add text length of base since decompressing that also takes
521 521 # work. For cache hits the length is already included.
522 522 compresseddeltalen += e[1]
523 523 r = (clen, compresseddeltalen)
524 524 chaininfocache[rev] = r
525 525 return r
526 526
527 527 def _deltachain(self, rev, stoprev=None):
528 528 """Obtain the delta chain for a revision.
529 529
530 530 ``stoprev`` specifies a revision to stop at. If not specified, we
531 531 stop at the base of the chain.
532 532
533 533 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
534 534 revs in ascending order and ``stopped`` is a bool indicating whether
535 535 ``stoprev`` was hit.
536 536 """
537 537 chain = []
538 538
539 539 # Alias to prevent attribute lookup in tight loop.
540 540 index = self.index
541 541 generaldelta = self._generaldelta
542 542
543 543 iterrev = rev
544 544 e = index[iterrev]
545 545 while iterrev != e[3] and iterrev != stoprev:
546 546 chain.append(iterrev)
547 547 if generaldelta:
548 548 iterrev = e[3]
549 549 else:
550 550 iterrev -= 1
551 551 e = index[iterrev]
552 552
553 553 if iterrev == stoprev:
554 554 stopped = True
555 555 else:
556 556 chain.append(iterrev)
557 557 stopped = False
558 558
559 559 chain.reverse()
560 560 return chain, stopped
561 561
562 562 def ancestors(self, revs, stoprev=0, inclusive=False):
563 563 """Generate the ancestors of 'revs' in reverse topological order.
564 564 Does not generate revs lower than stoprev.
565 565
566 566 See the documentation for ancestor.lazyancestors for more details."""
567 567
568 568 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
569 569 inclusive=inclusive)
570 570
571 571 def descendants(self, revs):
572 572 """Generate the descendants of 'revs' in revision order.
573 573
574 574 Yield a sequence of revision numbers starting with a child of
575 575 some rev in revs, i.e., each revision is *not* considered a
576 576 descendant of itself. Results are ordered by revision number (a
577 577 topological sort)."""
578 578 first = min(revs)
579 579 if first == nullrev:
580 580 for i in self:
581 581 yield i
582 582 return
583 583
584 584 seen = set(revs)
585 585 for i in self.revs(start=first + 1):
586 586 for x in self.parentrevs(i):
587 587 if x != nullrev and x in seen:
588 588 seen.add(i)
589 589 yield i
590 590 break
591 591
592 592 def findcommonmissing(self, common=None, heads=None):
593 593 """Return a tuple of the ancestors of common and the ancestors of heads
594 594 that are not ancestors of common. In revset terminology, we return the
595 595 tuple:
596 596
597 597 ::common, (::heads) - (::common)
598 598
599 599 The list is sorted by revision number, meaning it is
600 600 topologically sorted.
601 601
602 602 'heads' and 'common' are both lists of node IDs. If heads is
603 603 not supplied, uses all of the revlog's heads. If common is not
604 604 supplied, uses nullid."""
605 605 if common is None:
606 606 common = [nullid]
607 607 if heads is None:
608 608 heads = self.heads()
609 609
610 610 common = [self.rev(n) for n in common]
611 611 heads = [self.rev(n) for n in heads]
612 612
613 613 # we want the ancestors, but inclusive
614 614 class lazyset(object):
615 615 def __init__(self, lazyvalues):
616 616 self.addedvalues = set()
617 617 self.lazyvalues = lazyvalues
618 618
619 619 def __contains__(self, value):
620 620 return value in self.addedvalues or value in self.lazyvalues
621 621
622 622 def __iter__(self):
623 623 added = self.addedvalues
624 624 for r in added:
625 625 yield r
626 626 for r in self.lazyvalues:
627 627 if not r in added:
628 628 yield r
629 629
630 630 def add(self, value):
631 631 self.addedvalues.add(value)
632 632
633 633 def update(self, values):
634 634 self.addedvalues.update(values)
635 635
636 636 has = lazyset(self.ancestors(common))
637 637 has.add(nullrev)
638 638 has.update(common)
639 639
640 640 # take all ancestors from heads that aren't in has
641 641 missing = set()
642 642 visit = collections.deque(r for r in heads if r not in has)
643 643 while visit:
644 644 r = visit.popleft()
645 645 if r in missing:
646 646 continue
647 647 else:
648 648 missing.add(r)
649 649 for p in self.parentrevs(r):
650 650 if p not in has:
651 651 visit.append(p)
652 652 missing = list(missing)
653 653 missing.sort()
654 654 return has, [self.node(miss) for miss in missing]
655 655
656 656 def incrementalmissingrevs(self, common=None):
657 657 """Return an object that can be used to incrementally compute the
658 658 revision numbers of the ancestors of arbitrary sets that are not
659 659 ancestors of common. This is an ancestor.incrementalmissingancestors
660 660 object.
661 661
662 662 'common' is a list of revision numbers. If common is not supplied, uses
663 663 nullrev.
664 664 """
665 665 if common is None:
666 666 common = [nullrev]
667 667
668 668 return ancestor.incrementalmissingancestors(self.parentrevs, common)
669 669
670 670 def findmissingrevs(self, common=None, heads=None):
671 671 """Return the revision numbers of the ancestors of heads that
672 672 are not ancestors of common.
673 673
674 674 More specifically, return a list of revision numbers corresponding to
675 675 nodes N such that every N satisfies the following constraints:
676 676
677 677 1. N is an ancestor of some node in 'heads'
678 678 2. N is not an ancestor of any node in 'common'
679 679
680 680 The list is sorted by revision number, meaning it is
681 681 topologically sorted.
682 682
683 683 'heads' and 'common' are both lists of revision numbers. If heads is
684 684 not supplied, uses all of the revlog's heads. If common is not
685 685 supplied, uses nullid."""
686 686 if common is None:
687 687 common = [nullrev]
688 688 if heads is None:
689 689 heads = self.headrevs()
690 690
691 691 inc = self.incrementalmissingrevs(common=common)
692 692 return inc.missingancestors(heads)
693 693
694 694 def findmissing(self, common=None, heads=None):
695 695 """Return the ancestors of heads that are not ancestors of common.
696 696
697 697 More specifically, return a list of nodes N such that every N
698 698 satisfies the following constraints:
699 699
700 700 1. N is an ancestor of some node in 'heads'
701 701 2. N is not an ancestor of any node in 'common'
702 702
703 703 The list is sorted by revision number, meaning it is
704 704 topologically sorted.
705 705
706 706 'heads' and 'common' are both lists of node IDs. If heads is
707 707 not supplied, uses all of the revlog's heads. If common is not
708 708 supplied, uses nullid."""
709 709 if common is None:
710 710 common = [nullid]
711 711 if heads is None:
712 712 heads = self.heads()
713 713
714 714 common = [self.rev(n) for n in common]
715 715 heads = [self.rev(n) for n in heads]
716 716
717 717 inc = self.incrementalmissingrevs(common=common)
718 718 return [self.node(r) for r in inc.missingancestors(heads)]
719 719
720 720 def nodesbetween(self, roots=None, heads=None):
721 721 """Return a topological path from 'roots' to 'heads'.
722 722
723 723 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
724 724 topologically sorted list of all nodes N that satisfy both of
725 725 these constraints:
726 726
727 727 1. N is a descendant of some node in 'roots'
728 728 2. N is an ancestor of some node in 'heads'
729 729
730 730 Every node is considered to be both a descendant and an ancestor
731 731 of itself, so every reachable node in 'roots' and 'heads' will be
732 732 included in 'nodes'.
733 733
734 734 'outroots' is the list of reachable nodes in 'roots', i.e., the
735 735 subset of 'roots' that is returned in 'nodes'. Likewise,
736 736 'outheads' is the subset of 'heads' that is also in 'nodes'.
737 737
738 738 'roots' and 'heads' are both lists of node IDs. If 'roots' is
739 739 unspecified, uses nullid as the only root. If 'heads' is
740 740 unspecified, uses list of all of the revlog's heads."""
741 741 nonodes = ([], [], [])
742 742 if roots is not None:
743 743 roots = list(roots)
744 744 if not roots:
745 745 return nonodes
746 746 lowestrev = min([self.rev(n) for n in roots])
747 747 else:
748 748 roots = [nullid] # Everybody's a descendant of nullid
749 749 lowestrev = nullrev
750 750 if (lowestrev == nullrev) and (heads is None):
751 751 # We want _all_ the nodes!
752 752 return ([self.node(r) for r in self], [nullid], list(self.heads()))
753 753 if heads is None:
754 754 # All nodes are ancestors, so the latest ancestor is the last
755 755 # node.
756 756 highestrev = len(self) - 1
757 757 # Set ancestors to None to signal that every node is an ancestor.
758 758 ancestors = None
759 759 # Set heads to an empty dictionary for later discovery of heads
760 760 heads = {}
761 761 else:
762 762 heads = list(heads)
763 763 if not heads:
764 764 return nonodes
765 765 ancestors = set()
766 766 # Turn heads into a dictionary so we can remove 'fake' heads.
767 767 # Also, later we will be using it to filter out the heads we can't
768 768 # find from roots.
769 769 heads = dict.fromkeys(heads, False)
770 770 # Start at the top and keep marking parents until we're done.
771 771 nodestotag = set(heads)
772 772 # Remember where the top was so we can use it as a limit later.
773 773 highestrev = max([self.rev(n) for n in nodestotag])
774 774 while nodestotag:
775 775 # grab a node to tag
776 776 n = nodestotag.pop()
777 777 # Never tag nullid
778 778 if n == nullid:
779 779 continue
780 780 # A node's revision number represents its place in a
781 781 # topologically sorted list of nodes.
782 782 r = self.rev(n)
783 783 if r >= lowestrev:
784 784 if n not in ancestors:
785 785 # If we are possibly a descendant of one of the roots
786 786 # and we haven't already been marked as an ancestor
787 787 ancestors.add(n) # Mark as ancestor
788 788 # Add non-nullid parents to list of nodes to tag.
789 789 nodestotag.update([p for p in self.parents(n) if
790 790 p != nullid])
791 791 elif n in heads: # We've seen it before, is it a fake head?
792 792 # So it is, real heads should not be the ancestors of
793 793 # any other heads.
794 794 heads.pop(n)
795 795 if not ancestors:
796 796 return nonodes
797 797 # Now that we have our set of ancestors, we want to remove any
798 798 # roots that are not ancestors.
799 799
800 800 # If one of the roots was nullid, everything is included anyway.
801 801 if lowestrev > nullrev:
802 802 # But, since we weren't, let's recompute the lowest rev to not
803 803 # include roots that aren't ancestors.
804 804
805 805 # Filter out roots that aren't ancestors of heads
806 806 roots = [root for root in roots if root in ancestors]
807 807 # Recompute the lowest revision
808 808 if roots:
809 809 lowestrev = min([self.rev(root) for root in roots])
810 810 else:
811 811 # No more roots? Return empty list
812 812 return nonodes
813 813 else:
814 814 # We are descending from nullid, and don't need to care about
815 815 # any other roots.
816 816 lowestrev = nullrev
817 817 roots = [nullid]
818 818 # Transform our roots list into a set.
819 819 descendants = set(roots)
820 820 # Also, keep the original roots so we can filter out roots that aren't
821 821 # 'real' roots (i.e. are descended from other roots).
822 822 roots = descendants.copy()
823 823 # Our topologically sorted list of output nodes.
824 824 orderedout = []
825 825 # Don't start at nullid since we don't want nullid in our output list,
826 826 # and if nullid shows up in descendants, empty parents will look like
827 827 # they're descendants.
828 828 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
829 829 n = self.node(r)
830 830 isdescendant = False
831 831 if lowestrev == nullrev: # Everybody is a descendant of nullid
832 832 isdescendant = True
833 833 elif n in descendants:
834 834 # n is already a descendant
835 835 isdescendant = True
836 836 # This check only needs to be done here because all the roots
837 837 # will start being marked is descendants before the loop.
838 838 if n in roots:
839 839 # If n was a root, check if it's a 'real' root.
840 840 p = tuple(self.parents(n))
841 841 # If any of its parents are descendants, it's not a root.
842 842 if (p[0] in descendants) or (p[1] in descendants):
843 843 roots.remove(n)
844 844 else:
845 845 p = tuple(self.parents(n))
846 846 # A node is a descendant if either of its parents are
847 847 # descendants. (We seeded the dependents list with the roots
848 848 # up there, remember?)
849 849 if (p[0] in descendants) or (p[1] in descendants):
850 850 descendants.add(n)
851 851 isdescendant = True
852 852 if isdescendant and ((ancestors is None) or (n in ancestors)):
853 853 # Only include nodes that are both descendants and ancestors.
854 854 orderedout.append(n)
855 855 if (ancestors is not None) and (n in heads):
856 856 # We're trying to figure out which heads are reachable
857 857 # from roots.
858 858 # Mark this head as having been reached
859 859 heads[n] = True
860 860 elif ancestors is None:
861 861 # Otherwise, we're trying to discover the heads.
862 862 # Assume this is a head because if it isn't, the next step
863 863 # will eventually remove it.
864 864 heads[n] = True
865 865 # But, obviously its parents aren't.
866 866 for p in self.parents(n):
867 867 heads.pop(p, None)
868 868 heads = [head for head, flag in heads.iteritems() if flag]
869 869 roots = list(roots)
870 870 assert orderedout
871 871 assert roots
872 872 assert heads
873 873 return (orderedout, roots, heads)
874 874
875 875 def headrevs(self):
876 876 try:
877 877 return self.index.headrevs()
878 878 except AttributeError:
879 879 return self._headrevs()
880 880
881 881 def computephases(self, roots):
882 882 return self.index.computephasesmapsets(roots)
883 883
884 884 def _headrevs(self):
885 885 count = len(self)
886 886 if not count:
887 887 return [nullrev]
888 888 # we won't iter over filtered rev so nobody is a head at start
889 889 ishead = [0] * (count + 1)
890 890 index = self.index
891 891 for r in self:
892 892 ishead[r] = 1 # I may be an head
893 893 e = index[r]
894 894 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
895 895 return [r for r, val in enumerate(ishead) if val]
896 896
897 897 def heads(self, start=None, stop=None):
898 898 """return the list of all nodes that have no children
899 899
900 900 if start is specified, only heads that are descendants of
901 901 start will be returned
902 902 if stop is specified, it will consider all the revs from stop
903 903 as if they had no children
904 904 """
905 905 if start is None and stop is None:
906 906 if not len(self):
907 907 return [nullid]
908 908 return [self.node(r) for r in self.headrevs()]
909 909
910 910 if start is None:
911 911 start = nullid
912 912 if stop is None:
913 913 stop = []
914 914 stoprevs = set([self.rev(n) for n in stop])
915 915 startrev = self.rev(start)
916 916 reachable = {startrev}
917 917 heads = {startrev}
918 918
919 919 parentrevs = self.parentrevs
920 920 for r in self.revs(start=startrev + 1):
921 921 for p in parentrevs(r):
922 922 if p in reachable:
923 923 if r not in stoprevs:
924 924 reachable.add(r)
925 925 heads.add(r)
926 926 if p in heads and p not in stoprevs:
927 927 heads.remove(p)
928 928
929 929 return [self.node(r) for r in heads]
930 930
931 931 def children(self, node):
932 932 """find the children of a given node"""
933 933 c = []
934 934 p = self.rev(node)
935 935 for r in self.revs(start=p + 1):
936 936 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
937 937 if prevs:
938 938 for pr in prevs:
939 939 if pr == p:
940 940 c.append(self.node(r))
941 941 elif p == nullrev:
942 942 c.append(self.node(r))
943 943 return c
944 944
945 945 def descendant(self, start, end):
946 946 if start == nullrev:
947 947 return True
948 948 for i in self.descendants([start]):
949 949 if i == end:
950 950 return True
951 951 elif i > end:
952 952 break
953 953 return False
954 954
955 955 def commonancestorsheads(self, a, b):
956 956 """calculate all the heads of the common ancestors of nodes a and b"""
957 957 a, b = self.rev(a), self.rev(b)
958 958 try:
959 959 ancs = self.index.commonancestorsheads(a, b)
960 960 except (AttributeError, OverflowError): # C implementation failed
961 961 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
962 962 return pycompat.maplist(self.node, ancs)
963 963
964 964 def isancestor(self, a, b):
965 965 """return True if node a is an ancestor of node b
966 966
967 967 The implementation of this is trivial but the use of
968 968 commonancestorsheads is not."""
969 969 return a in self.commonancestorsheads(a, b)
970 970
971 971 def ancestor(self, a, b):
972 972 """calculate the "best" common ancestor of nodes a and b"""
973 973
974 974 a, b = self.rev(a), self.rev(b)
975 975 try:
976 976 ancs = self.index.ancestors(a, b)
977 977 except (AttributeError, OverflowError):
978 978 ancs = ancestor.ancestors(self.parentrevs, a, b)
979 979 if ancs:
980 980 # choose a consistent winner when there's a tie
981 981 return min(map(self.node, ancs))
982 982 return nullid
983 983
984 984 def _match(self, id):
985 985 if isinstance(id, int):
986 986 # rev
987 987 return self.node(id)
988 988 if len(id) == 20:
989 989 # possibly a binary node
990 990 # odds of a binary node being all hex in ASCII are 1 in 10**25
991 991 try:
992 992 node = id
993 993 self.rev(node) # quick search the index
994 994 return node
995 995 except LookupError:
996 996 pass # may be partial hex id
997 997 try:
998 998 # str(rev)
999 999 rev = int(id)
1000 1000 if str(rev) != id:
1001 1001 raise ValueError
1002 1002 if rev < 0:
1003 1003 rev = len(self) + rev
1004 1004 if rev < 0 or rev >= len(self):
1005 1005 raise ValueError
1006 1006 return self.node(rev)
1007 1007 except (ValueError, OverflowError):
1008 1008 pass
1009 1009 if len(id) == 40:
1010 1010 try:
1011 1011 # a full hex nodeid?
1012 1012 node = bin(id)
1013 1013 self.rev(node)
1014 1014 return node
1015 1015 except (TypeError, LookupError):
1016 1016 pass
1017 1017
1018 1018 def _partialmatch(self, id):
1019 1019 try:
1020 1020 partial = self.index.partialmatch(id)
1021 1021 if partial and self.hasnode(partial):
1022 1022 return partial
1023 1023 return None
1024 1024 except RevlogError:
1025 1025 # parsers.c radix tree lookup gave multiple matches
1026 1026 # fast path: for unfiltered changelog, radix tree is accurate
1027 1027 if not getattr(self, 'filteredrevs', None):
1028 1028 raise LookupError(id, self.indexfile,
1029 1029 _('ambiguous identifier'))
1030 1030 # fall through to slow path that filters hidden revisions
1031 1031 except (AttributeError, ValueError):
1032 1032 # we are pure python, or key was too short to search radix tree
1033 1033 pass
1034 1034
1035 1035 if id in self._pcache:
1036 1036 return self._pcache[id]
1037 1037
1038 1038 if len(id) < 40:
1039 1039 try:
1040 1040 # hex(node)[:...]
1041 1041 l = len(id) // 2 # grab an even number of digits
1042 1042 prefix = bin(id[:l * 2])
1043 1043 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1044 1044 nl = [n for n in nl if hex(n).startswith(id) and
1045 1045 self.hasnode(n)]
1046 1046 if len(nl) > 0:
1047 1047 if len(nl) == 1:
1048 1048 self._pcache[id] = nl[0]
1049 1049 return nl[0]
1050 1050 raise LookupError(id, self.indexfile,
1051 1051 _('ambiguous identifier'))
1052 1052 return None
1053 1053 except TypeError:
1054 1054 pass
1055 1055
1056 1056 def lookup(self, id):
1057 1057 """locate a node based on:
1058 1058 - revision number or str(revision number)
1059 1059 - nodeid or subset of hex nodeid
1060 1060 """
1061 1061 n = self._match(id)
1062 1062 if n is not None:
1063 1063 return n
1064 1064 n = self._partialmatch(id)
1065 1065 if n:
1066 1066 return n
1067 1067
1068 1068 raise LookupError(id, self.indexfile, _('no match found'))
1069 1069
1070 1070 def cmp(self, node, text):
1071 1071 """compare text with a given file revision
1072 1072
1073 1073 returns True if text is different than what is stored.
1074 1074 """
1075 1075 p1, p2 = self.parents(node)
1076 1076 return hash(text, p1, p2) != node
1077 1077
1078 1078 def _cachesegment(self, offset, data):
1079 1079 """Add a segment to the revlog cache.
1080 1080
1081 1081 Accepts an absolute offset and the data that is at that location.
1082 1082 """
1083 1083 o, d = self._chunkcache
1084 1084 # try to add to existing cache
1085 1085 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1086 1086 self._chunkcache = o, d + data
1087 1087 else:
1088 1088 self._chunkcache = offset, data
1089 1089
1090 1090 def _readsegment(self, offset, length, df=None):
1091 1091 """Load a segment of raw data from the revlog.
1092 1092
1093 1093 Accepts an absolute offset, length to read, and an optional existing
1094 1094 file handle to read from.
1095 1095
1096 1096 If an existing file handle is passed, it will be seeked and the
1097 1097 original seek position will NOT be restored.
1098 1098
1099 1099 Returns a str or buffer of raw byte data.
1100 1100 """
1101 1101 if df is not None:
1102 1102 closehandle = False
1103 1103 else:
1104 1104 if self._inline:
1105 1105 df = self.opener(self.indexfile)
1106 1106 else:
1107 1107 df = self.opener(self.datafile)
1108 1108 closehandle = True
1109 1109
1110 1110 # Cache data both forward and backward around the requested
1111 1111 # data, in a fixed size window. This helps speed up operations
1112 1112 # involving reading the revlog backwards.
1113 1113 cachesize = self._chunkcachesize
1114 1114 realoffset = offset & ~(cachesize - 1)
1115 1115 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1116 1116 - realoffset)
1117 1117 df.seek(realoffset)
1118 1118 d = df.read(reallength)
1119 1119 if closehandle:
1120 1120 df.close()
1121 1121 self._cachesegment(realoffset, d)
1122 1122 if offset != realoffset or reallength != length:
1123 1123 return util.buffer(d, offset - realoffset, length)
1124 1124 return d
1125 1125
1126 1126 def _getsegment(self, offset, length, df=None):
1127 1127 """Obtain a segment of raw data from the revlog.
1128 1128
1129 1129 Accepts an absolute offset, length of bytes to obtain, and an
1130 1130 optional file handle to the already-opened revlog. If the file
1131 1131 handle is used, it's original seek position will not be preserved.
1132 1132
1133 1133 Requests for data may be returned from a cache.
1134 1134
1135 1135 Returns a str or a buffer instance of raw byte data.
1136 1136 """
1137 1137 o, d = self._chunkcache
1138 1138 l = len(d)
1139 1139
1140 1140 # is it in the cache?
1141 1141 cachestart = offset - o
1142 1142 cacheend = cachestart + length
1143 1143 if cachestart >= 0 and cacheend <= l:
1144 1144 if cachestart == 0 and cacheend == l:
1145 1145 return d # avoid a copy
1146 1146 return util.buffer(d, cachestart, cacheend - cachestart)
1147 1147
1148 1148 return self._readsegment(offset, length, df=df)
1149 1149
1150 1150 def _getsegmentforrevs(self, startrev, endrev, df=None):
1151 1151 """Obtain a segment of raw data corresponding to a range of revisions.
1152 1152
1153 1153 Accepts the start and end revisions and an optional already-open
1154 1154 file handle to be used for reading. If the file handle is read, its
1155 1155 seek position will not be preserved.
1156 1156
1157 1157 Requests for data may be satisfied by a cache.
1158 1158
1159 1159 Returns a 2-tuple of (offset, data) for the requested range of
1160 1160 revisions. Offset is the integer offset from the beginning of the
1161 1161 revlog and data is a str or buffer of the raw byte data.
1162 1162
1163 1163 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1164 1164 to determine where each revision's data begins and ends.
1165 1165 """
1166 1166 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1167 1167 # (functions are expensive).
1168 1168 index = self.index
1169 1169 istart = index[startrev]
1170 1170 start = int(istart[0] >> 16)
1171 1171 if startrev == endrev:
1172 1172 end = start + istart[1]
1173 1173 else:
1174 1174 iend = index[endrev]
1175 1175 end = int(iend[0] >> 16) + iend[1]
1176 1176
1177 1177 if self._inline:
1178 1178 start += (startrev + 1) * self._io.size
1179 1179 end += (endrev + 1) * self._io.size
1180 1180 length = end - start
1181 1181
1182 1182 return start, self._getsegment(start, length, df=df)
1183 1183
1184 1184 def _chunk(self, rev, df=None):
1185 1185 """Obtain a single decompressed chunk for a revision.
1186 1186
1187 1187 Accepts an integer revision and an optional already-open file handle
1188 1188 to be used for reading. If used, the seek position of the file will not
1189 1189 be preserved.
1190 1190
1191 1191 Returns a str holding uncompressed data for the requested revision.
1192 1192 """
1193 1193 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1194 1194
1195 1195 def _chunks(self, revs, df=None):
1196 1196 """Obtain decompressed chunks for the specified revisions.
1197 1197
1198 1198 Accepts an iterable of numeric revisions that are assumed to be in
1199 1199 ascending order. Also accepts an optional already-open file handle
1200 1200 to be used for reading. If used, the seek position of the file will
1201 1201 not be preserved.
1202 1202
1203 1203 This function is similar to calling ``self._chunk()`` multiple times,
1204 1204 but is faster.
1205 1205
1206 1206 Returns a list with decompressed data for each requested revision.
1207 1207 """
1208 1208 if not revs:
1209 1209 return []
1210 1210 start = self.start
1211 1211 length = self.length
1212 1212 inline = self._inline
1213 1213 iosize = self._io.size
1214 1214 buffer = util.buffer
1215 1215
1216 1216 l = []
1217 1217 ladd = l.append
1218 1218
1219 1219 try:
1220 1220 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1221 1221 except OverflowError:
1222 1222 # issue4215 - we can't cache a run of chunks greater than
1223 1223 # 2G on Windows
1224 1224 return [self._chunk(rev, df=df) for rev in revs]
1225 1225
1226 1226 decomp = self.decompress
1227 1227 for rev in revs:
1228 1228 chunkstart = start(rev)
1229 1229 if inline:
1230 1230 chunkstart += (rev + 1) * iosize
1231 1231 chunklength = length(rev)
1232 1232 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1233 1233
1234 1234 return l
1235 1235
1236 1236 def _chunkclear(self):
1237 1237 """Clear the raw chunk cache."""
1238 1238 self._chunkcache = (0, '')
1239 1239
1240 1240 def deltaparent(self, rev):
1241 1241 """return deltaparent of the given revision"""
1242 1242 base = self.index[rev][3]
1243 1243 if base == rev:
1244 1244 return nullrev
1245 1245 elif self._generaldelta:
1246 1246 return base
1247 1247 else:
1248 1248 return rev - 1
1249 1249
1250 1250 def revdiff(self, rev1, rev2):
1251 1251 """return or calculate a delta between two revisions
1252 1252
1253 1253 The delta calculated is in binary form and is intended to be written to
1254 1254 revlog data directly. So this function needs raw revision data.
1255 1255 """
1256 1256 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1257 1257 return bytes(self._chunk(rev2))
1258 1258
1259 1259 return mdiff.textdiff(self.revision(rev1, raw=True),
1260 1260 self.revision(rev2, raw=True))
1261 1261
1262 1262 def revision(self, nodeorrev, _df=None, raw=False):
1263 1263 """return an uncompressed revision of a given node or revision
1264 1264 number.
1265 1265
1266 1266 _df - an existing file handle to read from. (internal-only)
1267 1267 raw - an optional argument specifying if the revision data is to be
1268 1268 treated as raw data when applying flag transforms. 'raw' should be set
1269 1269 to True when generating changegroups or in debug commands.
1270 1270 """
1271 1271 if isinstance(nodeorrev, int):
1272 1272 rev = nodeorrev
1273 1273 node = self.node(rev)
1274 1274 else:
1275 1275 node = nodeorrev
1276 1276 rev = None
1277 1277
1278 1278 cachedrev = None
1279 1279 flags = None
1280 1280 rawtext = None
1281 1281 if node == nullid:
1282 1282 return ""
1283 1283 if self._cache:
1284 1284 if self._cache[0] == node:
1285 1285 # _cache only stores rawtext
1286 1286 if raw:
1287 1287 return self._cache[2]
1288 1288 # duplicated, but good for perf
1289 1289 if rev is None:
1290 1290 rev = self.rev(node)
1291 1291 if flags is None:
1292 1292 flags = self.flags(rev)
1293 1293 # no extra flags set, no flag processor runs, text = rawtext
1294 1294 if flags == REVIDX_DEFAULT_FLAGS:
1295 1295 return self._cache[2]
1296 1296 # rawtext is reusable. need to run flag processor
1297 1297 rawtext = self._cache[2]
1298 1298
1299 1299 cachedrev = self._cache[1]
1300 1300
1301 1301 # look up what we need to read
1302 1302 if rawtext is None:
1303 1303 if rev is None:
1304 1304 rev = self.rev(node)
1305 1305
1306 1306 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1307 1307 if stopped:
1308 1308 rawtext = self._cache[2]
1309 1309
1310 1310 # drop cache to save memory
1311 1311 self._cache = None
1312 1312
1313 1313 bins = self._chunks(chain, df=_df)
1314 1314 if rawtext is None:
1315 1315 rawtext = bytes(bins[0])
1316 1316 bins = bins[1:]
1317 1317
1318 1318 rawtext = mdiff.patches(rawtext, bins)
1319 1319 self._cache = (node, rev, rawtext)
1320 1320
1321 1321 if flags is None:
1322 1322 if rev is None:
1323 1323 rev = self.rev(node)
1324 1324 flags = self.flags(rev)
1325 1325
1326 1326 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1327 1327 if validatehash:
1328 1328 self.checkhash(text, node, rev=rev)
1329 1329
1330 1330 return text
1331 1331
1332 1332 def hash(self, text, p1, p2):
1333 1333 """Compute a node hash.
1334 1334
1335 1335 Available as a function so that subclasses can replace the hash
1336 1336 as needed.
1337 1337 """
1338 1338 return hash(text, p1, p2)
1339 1339
1340 1340 def _processflags(self, text, flags, operation, raw=False):
1341 1341 """Inspect revision data flags and applies transforms defined by
1342 1342 registered flag processors.
1343 1343
1344 1344 ``text`` - the revision data to process
1345 1345 ``flags`` - the revision flags
1346 1346 ``operation`` - the operation being performed (read or write)
1347 1347 ``raw`` - an optional argument describing if the raw transform should be
1348 1348 applied.
1349 1349
1350 1350 This method processes the flags in the order (or reverse order if
1351 1351 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1352 1352 flag processors registered for present flags. The order of flags defined
1353 1353 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1354 1354
1355 1355 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1356 1356 processed text and ``validatehash`` is a bool indicating whether the
1357 1357 returned text should be checked for hash integrity.
1358 1358
1359 1359 Note: If the ``raw`` argument is set, it has precedence over the
1360 1360 operation and will only update the value of ``validatehash``.
1361 1361 """
1362 1362 # fast path: no flag processors will run
1363 1363 if flags == 0:
1364 1364 return text, True
1365 1365 if not operation in ('read', 'write'):
1366 1366 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1367 1367 # Check all flags are known.
1368 1368 if flags & ~REVIDX_KNOWN_FLAGS:
1369 1369 raise RevlogError(_("incompatible revision flag '%#x'") %
1370 1370 (flags & ~REVIDX_KNOWN_FLAGS))
1371 1371 validatehash = True
1372 1372 # Depending on the operation (read or write), the order might be
1373 1373 # reversed due to non-commutative transforms.
1374 1374 orderedflags = REVIDX_FLAGS_ORDER
1375 1375 if operation == 'write':
1376 1376 orderedflags = reversed(orderedflags)
1377 1377
1378 1378 for flag in orderedflags:
1379 1379 # If a flagprocessor has been registered for a known flag, apply the
1380 1380 # related operation transform and update result tuple.
1381 1381 if flag & flags:
1382 1382 vhash = True
1383 1383
1384 1384 if flag not in _flagprocessors:
1385 1385 message = _("missing processor for flag '%#x'") % (flag)
1386 1386 raise RevlogError(message)
1387 1387
1388 1388 processor = _flagprocessors[flag]
1389 1389 if processor is not None:
1390 1390 readtransform, writetransform, rawtransform = processor
1391 1391
1392 1392 if raw:
1393 1393 vhash = rawtransform(self, text)
1394 1394 elif operation == 'read':
1395 1395 text, vhash = readtransform(self, text)
1396 1396 else: # write operation
1397 1397 text, vhash = writetransform(self, text)
1398 1398 validatehash = validatehash and vhash
1399 1399
1400 1400 return text, validatehash
1401 1401
1402 1402 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1403 1403 """Check node hash integrity.
1404 1404
1405 1405 Available as a function so that subclasses can extend hash mismatch
1406 1406 behaviors as needed.
1407 1407 """
1408 1408 if p1 is None and p2 is None:
1409 1409 p1, p2 = self.parents(node)
1410 1410 if node != self.hash(text, p1, p2):
1411 1411 revornode = rev
1412 1412 if revornode is None:
1413 1413 revornode = templatefilters.short(hex(node))
1414 1414 raise RevlogError(_("integrity check failed on %s:%s")
1415 1415 % (self.indexfile, revornode))
1416 1416
1417 1417 def checkinlinesize(self, tr, fp=None):
1418 1418 """Check if the revlog is too big for inline and convert if so.
1419 1419
1420 1420 This should be called after revisions are added to the revlog. If the
1421 1421 revlog has grown too large to be an inline revlog, it will convert it
1422 1422 to use multiple index and data files.
1423 1423 """
1424 1424 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1425 1425 return
1426 1426
1427 1427 trinfo = tr.find(self.indexfile)
1428 1428 if trinfo is None:
1429 1429 raise RevlogError(_("%s not found in the transaction")
1430 1430 % self.indexfile)
1431 1431
1432 1432 trindex = trinfo[2]
1433 1433 if trindex is not None:
1434 1434 dataoff = self.start(trindex)
1435 1435 else:
1436 1436 # revlog was stripped at start of transaction, use all leftover data
1437 1437 trindex = len(self) - 1
1438 1438 dataoff = self.end(-2)
1439 1439
1440 1440 tr.add(self.datafile, dataoff)
1441 1441
1442 1442 if fp:
1443 1443 fp.flush()
1444 1444 fp.close()
1445 1445
1446 1446 df = self.opener(self.datafile, 'w')
1447 1447 try:
1448 1448 for r in self:
1449 1449 df.write(self._getsegmentforrevs(r, r)[1])
1450 1450 finally:
1451 1451 df.close()
1452 1452
1453 1453 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1454 1454 checkambig=self._checkambig)
1455 1455 self.version &= ~(REVLOGNGINLINEDATA)
1456 1456 self._inline = False
1457 1457 for i in self:
1458 1458 e = self._io.packentry(self.index[i], self.node, self.version, i)
1459 1459 fp.write(e)
1460 1460
1461 1461 # if we don't call close, the temp file will never replace the
1462 1462 # real index
1463 1463 fp.close()
1464 1464
1465 1465 tr.replace(self.indexfile, trindex * self._io.size)
1466 1466 self._chunkclear()
1467 1467
1468 1468 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1469 1469 node=None, flags=REVIDX_DEFAULT_FLAGS):
1470 1470 """add a revision to the log
1471 1471
1472 1472 text - the revision data to add
1473 1473 transaction - the transaction object used for rollback
1474 1474 link - the linkrev data to add
1475 1475 p1, p2 - the parent nodeids of the revision
1476 1476 cachedelta - an optional precomputed delta
1477 1477 node - nodeid of revision; typically node is not specified, and it is
1478 1478 computed by default as hash(text, p1, p2), however subclasses might
1479 1479 use different hashing method (and override checkhash() in such case)
1480 1480 flags - the known flags to set on the revision
1481 1481 """
1482 1482 if link == nullrev:
1483 1483 raise RevlogError(_("attempted to add linkrev -1 to %s")
1484 1484 % self.indexfile)
1485 1485
1486 1486 if flags:
1487 1487 node = node or self.hash(text, p1, p2)
1488 1488
1489 1489 rawtext, validatehash = self._processflags(text, flags, 'write')
1490 1490
1491 1491 # If the flag processor modifies the revision data, ignore any provided
1492 1492 # cachedelta.
1493 1493 if rawtext != text:
1494 1494 cachedelta = None
1495 1495
1496 1496 if len(rawtext) > _maxentrysize:
1497 1497 raise RevlogError(
1498 1498 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1499 1499 % (self.indexfile, len(rawtext)))
1500 1500
1501 1501 node = node or self.hash(rawtext, p1, p2)
1502 1502 if node in self.nodemap:
1503 1503 return node
1504 1504
1505 1505 if validatehash:
1506 1506 self.checkhash(rawtext, node, p1=p1, p2=p2)
1507 1507
1508 1508 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1509 1509 flags, cachedelta=cachedelta)
1510 1510
1511 1511 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1512 1512 cachedelta=None):
1513 1513 """add a raw revision with known flags, node and parents
1514 1514 useful when reusing a revision not stored in this revlog (ex: received
1515 1515 over wire, or read from an external bundle).
1516 1516 """
1517 1517 dfh = None
1518 1518 if not self._inline:
1519 1519 dfh = self.opener(self.datafile, "a+")
1520 1520 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1521 1521 try:
1522 1522 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1523 1523 flags, cachedelta, ifh, dfh)
1524 1524 finally:
1525 1525 if dfh:
1526 1526 dfh.close()
1527 1527 ifh.close()
1528 1528
1529 1529 def compress(self, data):
1530 1530 """Generate a possibly-compressed representation of data."""
1531 1531 if not data:
1532 1532 return '', data
1533 1533
1534 1534 compressed = self._compressor.compress(data)
1535 1535
1536 1536 if compressed:
1537 1537 # The revlog compressor added the header in the returned data.
1538 1538 return '', compressed
1539 1539
1540 1540 if data[0:1] == '\0':
1541 1541 return '', data
1542 1542 return 'u', data
1543 1543
1544 1544 def decompress(self, data):
1545 1545 """Decompress a revlog chunk.
1546 1546
1547 1547 The chunk is expected to begin with a header identifying the
1548 1548 format type so it can be routed to an appropriate decompressor.
1549 1549 """
1550 1550 if not data:
1551 1551 return data
1552 1552
1553 1553 # Revlogs are read much more frequently than they are written and many
1554 1554 # chunks only take microseconds to decompress, so performance is
1555 1555 # important here.
1556 1556 #
1557 1557 # We can make a few assumptions about revlogs:
1558 1558 #
1559 1559 # 1) the majority of chunks will be compressed (as opposed to inline
1560 1560 # raw data).
1561 1561 # 2) decompressing *any* data will likely by at least 10x slower than
1562 1562 # returning raw inline data.
1563 1563 # 3) we want to prioritize common and officially supported compression
1564 1564 # engines
1565 1565 #
1566 1566 # It follows that we want to optimize for "decompress compressed data
1567 1567 # when encoded with common and officially supported compression engines"
1568 1568 # case over "raw data" and "data encoded by less common or non-official
1569 1569 # compression engines." That is why we have the inline lookup first
1570 1570 # followed by the compengines lookup.
1571 1571 #
1572 1572 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1573 1573 # compressed chunks. And this matters for changelog and manifest reads.
1574 1574 t = data[0:1]
1575 1575
1576 1576 if t == 'x':
1577 1577 try:
1578 1578 return _zlibdecompress(data)
1579 1579 except zlib.error as e:
1580 1580 raise RevlogError(_('revlog decompress error: %s') % str(e))
1581 1581 # '\0' is more common than 'u' so it goes first.
1582 1582 elif t == '\0':
1583 1583 return data
1584 1584 elif t == 'u':
1585 1585 return util.buffer(data, 1)
1586 1586
1587 1587 try:
1588 1588 compressor = self._decompressors[t]
1589 1589 except KeyError:
1590 1590 try:
1591 1591 engine = util.compengines.forrevlogheader(t)
1592 1592 compressor = engine.revlogcompressor()
1593 1593 self._decompressors[t] = compressor
1594 1594 except KeyError:
1595 1595 raise RevlogError(_('unknown compression type %r') % t)
1596 1596
1597 1597 return compressor.decompress(data)
1598 1598
1599 1599 def _isgooddelta(self, d, textlen):
1600 1600 """Returns True if the given delta is good. Good means that it is within
1601 1601 the disk span, disk size, and chain length bounds that we know to be
1602 1602 performant."""
1603 1603 if d is None:
1604 1604 return False
1605 1605
1606 1606 # - 'dist' is the distance from the base revision -- bounding it limits
1607 1607 # the amount of I/O we need to do.
1608 1608 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1609 1609 # to apply -- bounding it limits the amount of CPU we consume.
1610 1610 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1611 1611 if (dist > textlen * 4 or l > textlen or
1612 1612 compresseddeltalen > textlen * 2 or
1613 1613 (self._maxchainlen and chainlen > self._maxchainlen)):
1614 1614 return False
1615 1615
1616 1616 return True
1617 1617
1618 1618 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1619 1619 cachedelta, ifh, dfh, alwayscache=False):
1620 1620 """internal function to add revisions to the log
1621 1621
1622 1622 see addrevision for argument descriptions.
1623 1623
1624 1624 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1625 1625
1626 1626 invariants:
1627 1627 - rawtext is optional (can be None); if not set, cachedelta must be set.
1628 1628 if both are set, they must correspond to each other.
1629 1629 """
1630 1630 btext = [rawtext]
1631 1631 def buildtext():
1632 1632 if btext[0] is not None:
1633 1633 return btext[0]
1634 1634 baserev = cachedelta[0]
1635 1635 delta = cachedelta[1]
1636 1636 # special case deltas which replace entire base; no need to decode
1637 1637 # base revision. this neatly avoids censored bases, which throw when
1638 1638 # they're decoded.
1639 1639 hlen = struct.calcsize(">lll")
1640 1640 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1641 1641 len(delta) - hlen):
1642 1642 btext[0] = delta[hlen:]
1643 1643 else:
1644 1644 if self._inline:
1645 1645 fh = ifh
1646 1646 else:
1647 1647 fh = dfh
1648 1648 basetext = self.revision(baserev, _df=fh, raw=True)
1649 1649 btext[0] = mdiff.patch(basetext, delta)
1650 1650
1651 1651 try:
1652 1652 res = self._processflags(btext[0], flags, 'read', raw=True)
1653 1653 btext[0], validatehash = res
1654 1654 if validatehash:
1655 1655 self.checkhash(btext[0], node, p1=p1, p2=p2)
1656 1656 if flags & REVIDX_ISCENSORED:
1657 1657 raise RevlogError(_('node %s is not censored') % node)
1658 1658 except CensoredNodeError:
1659 1659 # must pass the censored index flag to add censored revisions
1660 1660 if not flags & REVIDX_ISCENSORED:
1661 1661 raise
1662 1662 return btext[0]
1663 1663
1664 1664 def builddelta(rev):
1665 1665 # can we use the cached delta?
1666 1666 if cachedelta and cachedelta[0] == rev:
1667 1667 delta = cachedelta[1]
1668 1668 else:
1669 1669 t = buildtext()
1670 1670 if self.iscensored(rev):
1671 1671 # deltas based on a censored revision must replace the
1672 1672 # full content in one patch, so delta works everywhere
1673 1673 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1674 1674 delta = header + t
1675 1675 else:
1676 1676 if self._inline:
1677 1677 fh = ifh
1678 1678 else:
1679 1679 fh = dfh
1680 1680 ptext = self.revision(rev, _df=fh, raw=True)
1681 1681 delta = mdiff.textdiff(ptext, t)
1682 1682 header, data = self.compress(delta)
1683 1683 deltalen = len(header) + len(data)
1684 1684 chainbase = self.chainbase(rev)
1685 1685 dist = deltalen + offset - self.start(chainbase)
1686 1686 if self._generaldelta:
1687 1687 base = rev
1688 1688 else:
1689 1689 base = chainbase
1690 1690 chainlen, compresseddeltalen = self._chaininfo(rev)
1691 1691 chainlen += 1
1692 1692 compresseddeltalen += deltalen
1693 1693 return (dist, deltalen, (header, data), base,
1694 1694 chainbase, chainlen, compresseddeltalen)
1695 1695
1696 1696 curr = len(self)
1697 1697 prev = curr - 1
1698 1698 offset = self.end(prev)
1699 1699 delta = None
1700 1700 p1r, p2r = self.rev(p1), self.rev(p2)
1701 1701
1702 1702 # full versions are inserted when the needed deltas
1703 1703 # become comparable to the uncompressed text
1704 1704 if rawtext is None:
1705 1705 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1706 1706 cachedelta[1])
1707 1707 else:
1708 1708 textlen = len(rawtext)
1709 1709
1710 1710 # should we try to build a delta?
1711 1711 if prev != nullrev and self.storedeltachains:
1712 1712 tested = set()
1713 1713 # This condition is true most of the time when processing
1714 1714 # changegroup data into a generaldelta repo. The only time it
1715 1715 # isn't true is if this is the first revision in a delta chain
1716 1716 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1717 1717 if cachedelta and self._generaldelta and self._lazydeltabase:
1718 1718 # Assume what we received from the server is a good choice
1719 1719 # build delta will reuse the cache
1720 1720 candidatedelta = builddelta(cachedelta[0])
1721 1721 tested.add(cachedelta[0])
1722 1722 if self._isgooddelta(candidatedelta, textlen):
1723 1723 delta = candidatedelta
1724 1724 if delta is None and self._generaldelta:
1725 1725 # exclude already lazy tested base if any
1726 1726 parents = [p for p in (p1r, p2r)
1727 1727 if p != nullrev and p not in tested]
1728 1728 if parents and not self._aggressivemergedeltas:
1729 1729 # Pick whichever parent is closer to us (to minimize the
1730 1730 # chance of having to build a fulltext).
1731 1731 parents = [max(parents)]
1732 1732 tested.update(parents)
1733 1733 pdeltas = []
1734 1734 for p in parents:
1735 1735 pd = builddelta(p)
1736 1736 if self._isgooddelta(pd, textlen):
1737 1737 pdeltas.append(pd)
1738 1738 if pdeltas:
1739 1739 delta = min(pdeltas, key=lambda x: x[1])
1740 1740 if delta is None and prev not in tested:
1741 1741 # other approach failed try against prev to hopefully save us a
1742 1742 # fulltext.
1743 1743 candidatedelta = builddelta(prev)
1744 1744 if self._isgooddelta(candidatedelta, textlen):
1745 1745 delta = candidatedelta
1746 1746 if delta is not None:
1747 1747 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1748 1748 else:
1749 1749 rawtext = buildtext()
1750 1750 data = self.compress(rawtext)
1751 1751 l = len(data[1]) + len(data[0])
1752 1752 base = chainbase = curr
1753 1753
1754 1754 e = (offset_type(offset, flags), l, textlen,
1755 1755 base, link, p1r, p2r, node)
1756 1756 self.index.insert(-1, e)
1757 1757 self.nodemap[node] = curr
1758 1758
1759 1759 entry = self._io.packentry(e, self.node, self.version, curr)
1760 1760 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1761 1761
1762 1762 if alwayscache and rawtext is None:
1763 1763 rawtext = buildtext()
1764 1764
1765 1765 if type(rawtext) == str: # only accept immutable objects
1766 1766 self._cache = (node, curr, rawtext)
1767 1767 self._chainbasecache[curr] = chainbase
1768 1768 return node
1769 1769
1770 1770 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1771 1771 # Files opened in a+ mode have inconsistent behavior on various
1772 1772 # platforms. Windows requires that a file positioning call be made
1773 1773 # when the file handle transitions between reads and writes. See
1774 1774 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1775 1775 # platforms, Python or the platform itself can be buggy. Some versions
1776 1776 # of Solaris have been observed to not append at the end of the file
1777 1777 # if the file was seeked to before the end. See issue4943 for more.
1778 1778 #
1779 1779 # We work around this issue by inserting a seek() before writing.
1780 1780 # Note: This is likely not necessary on Python 3.
1781 1781 ifh.seek(0, os.SEEK_END)
1782 1782 if dfh:
1783 1783 dfh.seek(0, os.SEEK_END)
1784 1784
1785 1785 curr = len(self) - 1
1786 1786 if not self._inline:
1787 1787 transaction.add(self.datafile, offset)
1788 1788 transaction.add(self.indexfile, curr * len(entry))
1789 1789 if data[0]:
1790 1790 dfh.write(data[0])
1791 1791 dfh.write(data[1])
1792 1792 ifh.write(entry)
1793 1793 else:
1794 1794 offset += curr * self._io.size
1795 1795 transaction.add(self.indexfile, offset, curr)
1796 1796 ifh.write(entry)
1797 1797 ifh.write(data[0])
1798 1798 ifh.write(data[1])
1799 1799 self.checkinlinesize(transaction, ifh)
1800 1800
1801 1801 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1802 1802 """
1803 1803 add a delta group
1804 1804
1805 1805 given a set of deltas, add them to the revision log. the
1806 1806 first delta is against its parent, which should be in our
1807 1807 log, the rest are against the previous delta.
1808 1808
1809 1809 If ``addrevisioncb`` is defined, it will be called with arguments of
1810 1810 this revlog and the node that was added.
1811 1811 """
1812 1812
1813 1813 # track the base of the current delta log
1814 1814 content = []
1815 1815 node = None
1816 1816
1817 1817 r = len(self)
1818 1818 end = 0
1819 1819 if r:
1820 1820 end = self.end(r - 1)
1821 1821 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1822 1822 isize = r * self._io.size
1823 1823 if self._inline:
1824 1824 transaction.add(self.indexfile, end + isize, r)
1825 1825 dfh = None
1826 1826 else:
1827 1827 transaction.add(self.indexfile, isize, r)
1828 1828 transaction.add(self.datafile, end)
1829 1829 dfh = self.opener(self.datafile, "a+")
1830 1830 def flush():
1831 1831 if dfh:
1832 1832 dfh.flush()
1833 1833 ifh.flush()
1834 1834 try:
1835 1835 # loop through our set of deltas
1836 1836 chain = None
1837 1837 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1838 1838 node = chunkdata['node']
1839 1839 p1 = chunkdata['p1']
1840 1840 p2 = chunkdata['p2']
1841 1841 cs = chunkdata['cs']
1842 1842 deltabase = chunkdata['deltabase']
1843 1843 delta = chunkdata['delta']
1844 1844 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1845 1845
1846 1846 content.append(node)
1847 1847
1848 1848 link = linkmapper(cs)
1849 1849 if node in self.nodemap:
1850 1850 # this can happen if two branches make the same change
1851 1851 chain = node
1852 1852 continue
1853 1853
1854 1854 for p in (p1, p2):
1855 1855 if p not in self.nodemap:
1856 1856 raise LookupError(p, self.indexfile,
1857 1857 _('unknown parent'))
1858 1858
1859 1859 if deltabase not in self.nodemap:
1860 1860 raise LookupError(deltabase, self.indexfile,
1861 1861 _('unknown delta base'))
1862 1862
1863 1863 baserev = self.rev(deltabase)
1864 1864
1865 1865 if baserev != nullrev and self.iscensored(baserev):
1866 1866 # if base is censored, delta must be full replacement in a
1867 1867 # single patch operation
1868 1868 hlen = struct.calcsize(">lll")
1869 1869 oldlen = self.rawsize(baserev)
1870 1870 newlen = len(delta) - hlen
1871 1871 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1872 1872 raise error.CensoredBaseError(self.indexfile,
1873 1873 self.node(baserev))
1874 1874
1875 1875 if not flags and self._peek_iscensored(baserev, delta, flush):
1876 1876 flags |= REVIDX_ISCENSORED
1877 1877
1878 1878 # We assume consumers of addrevisioncb will want to retrieve
1879 1879 # the added revision, which will require a call to
1880 1880 # revision(). revision() will fast path if there is a cache
1881 1881 # hit. So, we tell _addrevision() to always cache in this case.
1882 1882 # We're only using addgroup() in the context of changegroup
1883 1883 # generation so the revision data can always be handled as raw
1884 1884 # by the flagprocessor.
1885 1885 chain = self._addrevision(node, None, transaction, link,
1886 1886 p1, p2, flags, (baserev, delta),
1887 1887 ifh, dfh,
1888 1888 alwayscache=bool(addrevisioncb))
1889 1889
1890 1890 if addrevisioncb:
1891 1891 addrevisioncb(self, chain)
1892 1892
1893 1893 if not dfh and not self._inline:
1894 1894 # addrevision switched from inline to conventional
1895 1895 # reopen the index
1896 1896 ifh.close()
1897 1897 dfh = self.opener(self.datafile, "a+")
1898 1898 ifh = self.opener(self.indexfile, "a+",
1899 1899 checkambig=self._checkambig)
1900 1900 finally:
1901 1901 if dfh:
1902 1902 dfh.close()
1903 1903 ifh.close()
1904 1904
1905 1905 return content
1906 1906
1907 1907 def iscensored(self, rev):
1908 1908 """Check if a file revision is censored."""
1909 1909 return False
1910 1910
1911 1911 def _peek_iscensored(self, baserev, delta, flush):
1912 1912 """Quickly check if a delta produces a censored revision."""
1913 1913 return False
1914 1914
1915 1915 def getstrippoint(self, minlink):
1916 1916 """find the minimum rev that must be stripped to strip the linkrev
1917 1917
1918 1918 Returns a tuple containing the minimum rev and a set of all revs that
1919 1919 have linkrevs that will be broken by this strip.
1920 1920 """
1921 1921 brokenrevs = set()
1922 1922 strippoint = len(self)
1923 1923
1924 1924 heads = {}
1925 1925 futurelargelinkrevs = set()
1926 1926 for head in self.headrevs():
1927 1927 headlinkrev = self.linkrev(head)
1928 1928 heads[head] = headlinkrev
1929 1929 if headlinkrev >= minlink:
1930 1930 futurelargelinkrevs.add(headlinkrev)
1931 1931
1932 1932 # This algorithm involves walking down the rev graph, starting at the
1933 1933 # heads. Since the revs are topologically sorted according to linkrev,
1934 1934 # once all head linkrevs are below the minlink, we know there are
1935 1935 # no more revs that could have a linkrev greater than minlink.
1936 1936 # So we can stop walking.
1937 1937 while futurelargelinkrevs:
1938 1938 strippoint -= 1
1939 1939 linkrev = heads.pop(strippoint)
1940 1940
1941 1941 if linkrev < minlink:
1942 1942 brokenrevs.add(strippoint)
1943 1943 else:
1944 1944 futurelargelinkrevs.remove(linkrev)
1945 1945
1946 1946 for p in self.parentrevs(strippoint):
1947 1947 if p != nullrev:
1948 1948 plinkrev = self.linkrev(p)
1949 1949 heads[p] = plinkrev
1950 1950 if plinkrev >= minlink:
1951 1951 futurelargelinkrevs.add(plinkrev)
1952 1952
1953 1953 return strippoint, brokenrevs
1954 1954
1955 1955 def strip(self, minlink, transaction):
1956 1956 """truncate the revlog on the first revision with a linkrev >= minlink
1957 1957
1958 1958 This function is called when we're stripping revision minlink and
1959 1959 its descendants from the repository.
1960 1960
1961 1961 We have to remove all revisions with linkrev >= minlink, because
1962 1962 the equivalent changelog revisions will be renumbered after the
1963 1963 strip.
1964 1964
1965 1965 So we truncate the revlog on the first of these revisions, and
1966 1966 trust that the caller has saved the revisions that shouldn't be
1967 1967 removed and that it'll re-add them after this truncation.
1968 1968 """
1969 1969 if len(self) == 0:
1970 1970 return
1971 1971
1972 1972 rev, _ = self.getstrippoint(minlink)
1973 1973 if rev == len(self):
1974 1974 return
1975 1975
1976 1976 # first truncate the files on disk
1977 1977 end = self.start(rev)
1978 1978 if not self._inline:
1979 1979 transaction.add(self.datafile, end)
1980 1980 end = rev * self._io.size
1981 1981 else:
1982 1982 end += rev * self._io.size
1983 1983
1984 1984 transaction.add(self.indexfile, end)
1985 1985
1986 1986 # then reset internal state in memory to forget those revisions
1987 1987 self._cache = None
1988 1988 self._chaininfocache = {}
1989 1989 self._chunkclear()
1990 1990 for x in xrange(rev, len(self)):
1991 1991 del self.nodemap[self.node(x)]
1992 1992
1993 1993 del self.index[rev:-1]
1994 1994
1995 1995 def checksize(self):
1996 1996 expected = 0
1997 1997 if len(self):
1998 1998 expected = max(0, self.end(len(self) - 1))
1999 1999
2000 2000 try:
2001 2001 f = self.opener(self.datafile)
2002 2002 f.seek(0, 2)
2003 2003 actual = f.tell()
2004 2004 f.close()
2005 2005 dd = actual - expected
2006 2006 except IOError as inst:
2007 2007 if inst.errno != errno.ENOENT:
2008 2008 raise
2009 2009 dd = 0
2010 2010
2011 2011 try:
2012 2012 f = self.opener(self.indexfile)
2013 2013 f.seek(0, 2)
2014 2014 actual = f.tell()
2015 2015 f.close()
2016 2016 s = self._io.size
2017 2017 i = max(0, actual // s)
2018 2018 di = actual - (i * s)
2019 2019 if self._inline:
2020 2020 databytes = 0
2021 2021 for r in self:
2022 2022 databytes += max(0, self.length(r))
2023 2023 dd = 0
2024 2024 di = actual - len(self) * s - databytes
2025 2025 except IOError as inst:
2026 2026 if inst.errno != errno.ENOENT:
2027 2027 raise
2028 2028 di = 0
2029 2029
2030 2030 return (dd, di)
2031 2031
2032 2032 def files(self):
2033 2033 res = [self.indexfile]
2034 2034 if not self._inline:
2035 2035 res.append(self.datafile)
2036 2036 return res
2037 2037
2038 2038 DELTAREUSEALWAYS = 'always'
2039 2039 DELTAREUSESAMEREVS = 'samerevs'
2040 2040 DELTAREUSENEVER = 'never'
2041 2041
2042 2042 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2043 2043
2044 2044 def clone(self, tr, destrevlog, addrevisioncb=None,
2045 2045 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2046 2046 """Copy this revlog to another, possibly with format changes.
2047 2047
2048 2048 The destination revlog will contain the same revisions and nodes.
2049 2049 However, it may not be bit-for-bit identical due to e.g. delta encoding
2050 2050 differences.
2051 2051
2052 2052 The ``deltareuse`` argument control how deltas from the existing revlog
2053 2053 are preserved in the destination revlog. The argument can have the
2054 2054 following values:
2055 2055
2056 2056 DELTAREUSEALWAYS
2057 2057 Deltas will always be reused (if possible), even if the destination
2058 2058 revlog would not select the same revisions for the delta. This is the
2059 2059 fastest mode of operation.
2060 2060 DELTAREUSESAMEREVS
2061 2061 Deltas will be reused if the destination revlog would pick the same
2062 2062 revisions for the delta. This mode strikes a balance between speed
2063 2063 and optimization.
2064 2064 DELTAREUSENEVER
2065 2065 Deltas will never be reused. This is the slowest mode of execution.
2066 2066 This mode can be used to recompute deltas (e.g. if the diff/delta
2067 2067 algorithm changes).
2068 2068
2069 2069 Delta computation can be slow, so the choice of delta reuse policy can
2070 2070 significantly affect run time.
2071 2071
2072 2072 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2073 2073 two extremes. Deltas will be reused if they are appropriate. But if the
2074 2074 delta could choose a better revision, it will do so. This means if you
2075 2075 are converting a non-generaldelta revlog to a generaldelta revlog,
2076 2076 deltas will be recomputed if the delta's parent isn't a parent of the
2077 2077 revision.
2078 2078
2079 2079 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2080 2080 controls whether to compute deltas against both parents for merges.
2081 2081 By default, the current default is used.
2082 2082 """
2083 2083 if deltareuse not in self.DELTAREUSEALL:
2084 2084 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2085 2085
2086 2086 if len(destrevlog):
2087 2087 raise ValueError(_('destination revlog is not empty'))
2088 2088
2089 2089 if getattr(self, 'filteredrevs', None):
2090 2090 raise ValueError(_('source revlog has filtered revisions'))
2091 2091 if getattr(destrevlog, 'filteredrevs', None):
2092 2092 raise ValueError(_('destination revlog has filtered revisions'))
2093 2093
2094 2094 # lazydeltabase controls whether to reuse a cached delta, if possible.
2095 2095 oldlazydeltabase = destrevlog._lazydeltabase
2096 2096 oldamd = destrevlog._aggressivemergedeltas
2097 2097
2098 2098 try:
2099 2099 if deltareuse == self.DELTAREUSEALWAYS:
2100 2100 destrevlog._lazydeltabase = True
2101 2101 elif deltareuse == self.DELTAREUSESAMEREVS:
2102 2102 destrevlog._lazydeltabase = False
2103 2103
2104 2104 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2105 2105
2106 2106 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2107 2107 self.DELTAREUSESAMEREVS)
2108 2108
2109 2109 index = self.index
2110 2110 for rev in self:
2111 2111 entry = index[rev]
2112 2112
2113 2113 # Some classes override linkrev to take filtered revs into
2114 2114 # account. Use raw entry from index.
2115 2115 flags = entry[0] & 0xffff
2116 2116 linkrev = entry[4]
2117 2117 p1 = index[entry[5]][7]
2118 2118 p2 = index[entry[6]][7]
2119 2119 node = entry[7]
2120 2120
2121 2121 # (Possibly) reuse the delta from the revlog if allowed and
2122 2122 # the revlog chunk is a delta.
2123 2123 cachedelta = None
2124 2124 rawtext = None
2125 2125 if populatecachedelta:
2126 2126 dp = self.deltaparent(rev)
2127 2127 if dp != nullrev:
2128 2128 cachedelta = (dp, str(self._chunk(rev)))
2129 2129
2130 2130 if not cachedelta:
2131 2131 rawtext = self.revision(rev, raw=True)
2132 2132
2133 2133 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2134 2134 checkambig=False)
2135 2135 dfh = None
2136 2136 if not destrevlog._inline:
2137 2137 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2138 2138 try:
2139 2139 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2140 2140 flags, cachedelta, ifh, dfh)
2141 2141 finally:
2142 2142 if dfh:
2143 2143 dfh.close()
2144 2144 ifh.close()
2145 2145
2146 2146 if addrevisioncb:
2147 2147 addrevisioncb(self, rev, node)
2148 2148 finally:
2149 2149 destrevlog._lazydeltabase = oldlazydeltabase
2150 2150 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now