##// END OF EJS Templates
revlog: micro-optimize the computation of hashes...
Alex Gaynor -
r33391:943b8c37 default
parent child Browse files
Show More
@@ -1,2208 +1,2212
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullid,
29 29 nullrev,
30 30 wdirhex,
31 31 wdirid,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from . import (
36 36 ancestor,
37 37 error,
38 38 mdiff,
39 39 policy,
40 40 pycompat,
41 41 templatefilters,
42 42 util,
43 43 )
44 44
45 45 parsers = policy.importmod(r'parsers')
46 46
47 47 _pack = struct.pack
48 48 _unpack = struct.unpack
49 49 # Aliased for performance.
50 50 _zlibdecompress = zlib.decompress
51 51
52 52 # revlog header flags
53 53 REVLOGV0 = 0
54 54 REVLOGV1 = 1
55 55 # Dummy value until file format is finalized.
56 56 # Reminder: change the bounds check in revlog.__init__ when this is changed.
57 57 REVLOGV2 = 0xDEAD
58 58 FLAG_INLINE_DATA = (1 << 16)
59 59 FLAG_GENERALDELTA = (1 << 17)
60 60 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
61 61 REVLOG_DEFAULT_FORMAT = REVLOGV1
62 62 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
63 63 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
64 64 REVLOGV2_FLAGS = REVLOGV1_FLAGS
65 65
66 66 # revlog index flags
67 67 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
68 68 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
69 69 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
70 70 REVIDX_DEFAULT_FLAGS = 0
71 71 # stable order in which flags need to be processed and their processors applied
72 72 REVIDX_FLAGS_ORDER = [
73 73 REVIDX_ISCENSORED,
74 74 REVIDX_ELLIPSIS,
75 75 REVIDX_EXTSTORED,
76 76 ]
77 77 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
78 78
79 79 # max size of revlog with inline data
80 80 _maxinline = 131072
81 81 _chunksize = 1048576
82 82
83 83 RevlogError = error.RevlogError
84 84 LookupError = error.LookupError
85 85 CensoredNodeError = error.CensoredNodeError
86 86 ProgrammingError = error.ProgrammingError
87 87
88 88 # Store flag processors (cf. 'addflagprocessor()' to register)
89 89 _flagprocessors = {
90 90 REVIDX_ISCENSORED: None,
91 91 }
92 92
93 93 def addflagprocessor(flag, processor):
94 94 """Register a flag processor on a revision data flag.
95 95
96 96 Invariant:
97 97 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
98 98 - Only one flag processor can be registered on a specific flag.
99 99 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
100 100 following signatures:
101 101 - (read) f(self, rawtext) -> text, bool
102 102 - (write) f(self, text) -> rawtext, bool
103 103 - (raw) f(self, rawtext) -> bool
104 104 "text" is presented to the user. "rawtext" is stored in revlog data, not
105 105 directly visible to the user.
106 106 The boolean returned by these transforms is used to determine whether
107 107 the returned text can be used for hash integrity checking. For example,
108 108 if "write" returns False, then "text" is used to generate hash. If
109 109 "write" returns True, that basically means "rawtext" returned by "write"
110 110 should be used to generate hash. Usually, "write" and "read" return
111 111 different booleans. And "raw" returns a same boolean as "write".
112 112
113 113 Note: The 'raw' transform is used for changegroup generation and in some
114 114 debug commands. In this case the transform only indicates whether the
115 115 contents can be used for hash integrity checks.
116 116 """
117 117 if not flag & REVIDX_KNOWN_FLAGS:
118 118 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
119 119 raise ProgrammingError(msg)
120 120 if flag not in REVIDX_FLAGS_ORDER:
121 121 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
122 122 raise ProgrammingError(msg)
123 123 if flag in _flagprocessors:
124 124 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
125 125 raise error.Abort(msg)
126 126 _flagprocessors[flag] = processor
127 127
128 128 def getoffset(q):
129 129 return int(q >> 16)
130 130
131 131 def gettype(q):
132 132 return int(q & 0xFFFF)
133 133
134 134 def offset_type(offset, type):
135 135 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
136 136 raise ValueError('unknown revlog index flags')
137 137 return int(int(offset) << 16 | type)
138 138
139 139 _nullhash = hashlib.sha1(nullid)
140 140
141 141 def hash(text, p1, p2):
142 142 """generate a hash from the given text and its parent hashes
143 143
144 144 This hash combines both the current file contents and its history
145 145 in a manner that makes it easy to distinguish nodes with the same
146 146 content in the revision graph.
147 147 """
148 148 # As of now, if one of the parent node is null, p2 is null
149 149 if p2 == nullid:
150 150 # deep copy of a hash is faster than creating one
151 151 s = _nullhash.copy()
152 152 s.update(p1)
153 153 else:
154 154 # none of the parent nodes are nullid
155 l = [p1, p2]
156 l.sort()
157 s = hashlib.sha1(l[0])
158 s.update(l[1])
155 if p1 < p2:
156 a = p1
157 b = p2
158 else:
159 a = p2
160 b = p1
161 s = hashlib.sha1(a)
162 s.update(b)
159 163 s.update(text)
160 164 return s.digest()
161 165
162 166 # index v0:
163 167 # 4 bytes: offset
164 168 # 4 bytes: compressed length
165 169 # 4 bytes: base rev
166 170 # 4 bytes: link rev
167 171 # 20 bytes: parent 1 nodeid
168 172 # 20 bytes: parent 2 nodeid
169 173 # 20 bytes: nodeid
170 174 indexformatv0 = ">4l20s20s20s"
171 175
172 176 class revlogoldio(object):
173 177 def __init__(self):
174 178 self.size = struct.calcsize(indexformatv0)
175 179
176 180 def parseindex(self, data, inline):
177 181 s = self.size
178 182 index = []
179 183 nodemap = {nullid: nullrev}
180 184 n = off = 0
181 185 l = len(data)
182 186 while off + s <= l:
183 187 cur = data[off:off + s]
184 188 off += s
185 189 e = _unpack(indexformatv0, cur)
186 190 # transform to revlogv1 format
187 191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
188 192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
189 193 index.append(e2)
190 194 nodemap[e[6]] = n
191 195 n += 1
192 196
193 197 # add the magic null revision at -1
194 198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
195 199
196 200 return index, nodemap, None
197 201
198 202 def packentry(self, entry, node, version, rev):
199 203 if gettype(entry[0]):
200 204 raise RevlogError(_('index entry flags need revlog version 1'))
201 205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
202 206 node(entry[5]), node(entry[6]), entry[7])
203 207 return _pack(indexformatv0, *e2)
204 208
205 209 # index ng:
206 210 # 6 bytes: offset
207 211 # 2 bytes: flags
208 212 # 4 bytes: compressed length
209 213 # 4 bytes: uncompressed length
210 214 # 4 bytes: base rev
211 215 # 4 bytes: link rev
212 216 # 4 bytes: parent 1 rev
213 217 # 4 bytes: parent 2 rev
214 218 # 32 bytes: nodeid
215 219 indexformatng = ">Qiiiiii20s12x"
216 220 versionformat = ">I"
217 221
218 222 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
219 223 # signed integer)
220 224 _maxentrysize = 0x7fffffff
221 225
222 226 class revlogio(object):
223 227 def __init__(self):
224 228 self.size = struct.calcsize(indexformatng)
225 229
226 230 def parseindex(self, data, inline):
227 231 # call the C implementation to parse the index data
228 232 index, cache = parsers.parse_index2(data, inline)
229 233 return index, getattr(index, 'nodemap', None), cache
230 234
231 235 def packentry(self, entry, node, version, rev):
232 236 p = _pack(indexformatng, *entry)
233 237 if rev == 0:
234 238 p = _pack(versionformat, version) + p[4:]
235 239 return p
236 240
237 241 class revlog(object):
238 242 """
239 243 the underlying revision storage object
240 244
241 245 A revlog consists of two parts, an index and the revision data.
242 246
243 247 The index is a file with a fixed record size containing
244 248 information on each revision, including its nodeid (hash), the
245 249 nodeids of its parents, the position and offset of its data within
246 250 the data file, and the revision it's based on. Finally, each entry
247 251 contains a linkrev entry that can serve as a pointer to external
248 252 data.
249 253
250 254 The revision data itself is a linear collection of data chunks.
251 255 Each chunk represents a revision and is usually represented as a
252 256 delta against the previous chunk. To bound lookup time, runs of
253 257 deltas are limited to about 2 times the length of the original
254 258 version data. This makes retrieval of a version proportional to
255 259 its size, or O(1) relative to the number of revisions.
256 260
257 261 Both pieces of the revlog are written to in an append-only
258 262 fashion, which means we never need to rewrite a file to insert or
259 263 remove data, and can use some simple techniques to avoid the need
260 264 for locking while reading.
261 265
262 266 If checkambig, indexfile is opened with checkambig=True at
263 267 writing, to avoid file stat ambiguity.
264 268 """
265 269 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
266 270 """
267 271 create a revlog object
268 272
269 273 opener is a function that abstracts the file opening operation
270 274 and can be used to implement COW semantics or the like.
271 275 """
272 276 self.indexfile = indexfile
273 277 self.datafile = datafile or (indexfile[:-2] + ".d")
274 278 self.opener = opener
275 279 # When True, indexfile is opened with checkambig=True at writing, to
276 280 # avoid file stat ambiguity.
277 281 self._checkambig = checkambig
278 282 # 3-tuple of (node, rev, text) for a raw revision.
279 283 self._cache = None
280 284 # Maps rev to chain base rev.
281 285 self._chainbasecache = util.lrucachedict(100)
282 286 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
283 287 self._chunkcache = (0, '')
284 288 # How much data to read and cache into the raw revlog data cache.
285 289 self._chunkcachesize = 65536
286 290 self._maxchainlen = None
287 291 self._aggressivemergedeltas = False
288 292 self.index = []
289 293 # Mapping of partial identifiers to full nodes.
290 294 self._pcache = {}
291 295 # Mapping of revision integer to full node.
292 296 self._nodecache = {nullid: nullrev}
293 297 self._nodepos = None
294 298 self._compengine = 'zlib'
295 299 self._maxdeltachainspan = -1
296 300
297 301 v = REVLOG_DEFAULT_VERSION
298 302 opts = getattr(opener, 'options', None)
299 303 if opts is not None:
300 304 if 'revlogv2' in opts:
301 305 # version 2 revlogs always use generaldelta.
302 306 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
303 307 elif 'revlogv1' in opts:
304 308 if 'generaldelta' in opts:
305 309 v |= FLAG_GENERALDELTA
306 310 else:
307 311 v = 0
308 312 if 'chunkcachesize' in opts:
309 313 self._chunkcachesize = opts['chunkcachesize']
310 314 if 'maxchainlen' in opts:
311 315 self._maxchainlen = opts['maxchainlen']
312 316 if 'aggressivemergedeltas' in opts:
313 317 self._aggressivemergedeltas = opts['aggressivemergedeltas']
314 318 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
315 319 if 'compengine' in opts:
316 320 self._compengine = opts['compengine']
317 321 if 'maxdeltachainspan' in opts:
318 322 self._maxdeltachainspan = opts['maxdeltachainspan']
319 323
320 324 if self._chunkcachesize <= 0:
321 325 raise RevlogError(_('revlog chunk cache size %r is not greater '
322 326 'than 0') % self._chunkcachesize)
323 327 elif self._chunkcachesize & (self._chunkcachesize - 1):
324 328 raise RevlogError(_('revlog chunk cache size %r is not a power '
325 329 'of 2') % self._chunkcachesize)
326 330
327 331 indexdata = ''
328 332 self._initempty = True
329 333 try:
330 334 f = self.opener(self.indexfile)
331 335 indexdata = f.read()
332 336 f.close()
333 337 if len(indexdata) > 0:
334 338 v = struct.unpack(versionformat, indexdata[:4])[0]
335 339 self._initempty = False
336 340 except IOError as inst:
337 341 if inst.errno != errno.ENOENT:
338 342 raise
339 343
340 344 self.version = v
341 345 self._inline = v & FLAG_INLINE_DATA
342 346 self._generaldelta = v & FLAG_GENERALDELTA
343 347 flags = v & ~0xFFFF
344 348 fmt = v & 0xFFFF
345 349 if fmt == REVLOGV0:
346 350 if flags:
347 351 raise RevlogError(_('unknown flags (%#04x) in version %d '
348 352 'revlog %s') %
349 353 (flags >> 16, fmt, self.indexfile))
350 354 elif fmt == REVLOGV1:
351 355 if flags & ~REVLOGV1_FLAGS:
352 356 raise RevlogError(_('unknown flags (%#04x) in version %d '
353 357 'revlog %s') %
354 358 (flags >> 16, fmt, self.indexfile))
355 359 elif fmt == REVLOGV2:
356 360 if flags & ~REVLOGV2_FLAGS:
357 361 raise RevlogError(_('unknown flags (%#04x) in version %d '
358 362 'revlog %s') %
359 363 (flags >> 16, fmt, self.indexfile))
360 364 else:
361 365 raise RevlogError(_('unknown version (%d) in revlog %s') %
362 366 (fmt, self.indexfile))
363 367
364 368 self.storedeltachains = True
365 369
366 370 self._io = revlogio()
367 371 if self.version == REVLOGV0:
368 372 self._io = revlogoldio()
369 373 try:
370 374 d = self._io.parseindex(indexdata, self._inline)
371 375 except (ValueError, IndexError):
372 376 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
373 377 self.index, nodemap, self._chunkcache = d
374 378 if nodemap is not None:
375 379 self.nodemap = self._nodecache = nodemap
376 380 if not self._chunkcache:
377 381 self._chunkclear()
378 382 # revnum -> (chain-length, sum-delta-length)
379 383 self._chaininfocache = {}
380 384 # revlog header -> revlog compressor
381 385 self._decompressors = {}
382 386
383 387 @util.propertycache
384 388 def _compressor(self):
385 389 return util.compengines[self._compengine].revlogcompressor()
386 390
387 391 def tip(self):
388 392 return self.node(len(self.index) - 2)
389 393 def __contains__(self, rev):
390 394 return 0 <= rev < len(self)
391 395 def __len__(self):
392 396 return len(self.index) - 1
393 397 def __iter__(self):
394 398 return iter(xrange(len(self)))
395 399 def revs(self, start=0, stop=None):
396 400 """iterate over all rev in this revlog (from start to stop)"""
397 401 step = 1
398 402 if stop is not None:
399 403 if start > stop:
400 404 step = -1
401 405 stop += step
402 406 else:
403 407 stop = len(self)
404 408 return xrange(start, stop, step)
405 409
406 410 @util.propertycache
407 411 def nodemap(self):
408 412 self.rev(self.node(0))
409 413 return self._nodecache
410 414
411 415 def hasnode(self, node):
412 416 try:
413 417 self.rev(node)
414 418 return True
415 419 except KeyError:
416 420 return False
417 421
418 422 def clearcaches(self):
419 423 self._cache = None
420 424 self._chainbasecache.clear()
421 425 self._chunkcache = (0, '')
422 426 self._pcache = {}
423 427
424 428 try:
425 429 self._nodecache.clearcaches()
426 430 except AttributeError:
427 431 self._nodecache = {nullid: nullrev}
428 432 self._nodepos = None
429 433
430 434 def rev(self, node):
431 435 try:
432 436 return self._nodecache[node]
433 437 except TypeError:
434 438 raise
435 439 except RevlogError:
436 440 # parsers.c radix tree lookup failed
437 441 if node == wdirid:
438 442 raise error.WdirUnsupported
439 443 raise LookupError(node, self.indexfile, _('no node'))
440 444 except KeyError:
441 445 # pure python cache lookup failed
442 446 n = self._nodecache
443 447 i = self.index
444 448 p = self._nodepos
445 449 if p is None:
446 450 p = len(i) - 2
447 451 for r in xrange(p, -1, -1):
448 452 v = i[r][7]
449 453 n[v] = r
450 454 if v == node:
451 455 self._nodepos = r - 1
452 456 return r
453 457 if node == wdirid:
454 458 raise error.WdirUnsupported
455 459 raise LookupError(node, self.indexfile, _('no node'))
456 460
457 461 # Accessors for index entries.
458 462
459 463 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
460 464 # are flags.
461 465 def start(self, rev):
462 466 return int(self.index[rev][0] >> 16)
463 467
464 468 def flags(self, rev):
465 469 return self.index[rev][0] & 0xFFFF
466 470
467 471 def length(self, rev):
468 472 return self.index[rev][1]
469 473
470 474 def rawsize(self, rev):
471 475 """return the length of the uncompressed text for a given revision"""
472 476 l = self.index[rev][2]
473 477 if l >= 0:
474 478 return l
475 479
476 480 t = self.revision(rev, raw=True)
477 481 return len(t)
478 482
479 483 def size(self, rev):
480 484 """length of non-raw text (processed by a "read" flag processor)"""
481 485 # fast path: if no "read" flag processor could change the content,
482 486 # size is rawsize. note: ELLIPSIS is known to not change the content.
483 487 flags = self.flags(rev)
484 488 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
485 489 return self.rawsize(rev)
486 490
487 491 return len(self.revision(rev, raw=False))
488 492
489 493 def chainbase(self, rev):
490 494 base = self._chainbasecache.get(rev)
491 495 if base is not None:
492 496 return base
493 497
494 498 index = self.index
495 499 base = index[rev][3]
496 500 while base != rev:
497 501 rev = base
498 502 base = index[rev][3]
499 503
500 504 self._chainbasecache[rev] = base
501 505 return base
502 506
503 507 def linkrev(self, rev):
504 508 return self.index[rev][4]
505 509
506 510 def parentrevs(self, rev):
507 511 try:
508 512 return self.index[rev][5:7]
509 513 except IndexError:
510 514 if rev == wdirrev:
511 515 raise error.WdirUnsupported
512 516 raise
513 517
514 518 def node(self, rev):
515 519 try:
516 520 return self.index[rev][7]
517 521 except IndexError:
518 522 if rev == wdirrev:
519 523 raise error.WdirUnsupported
520 524 raise
521 525
522 526 # Derived from index values.
523 527
524 528 def end(self, rev):
525 529 return self.start(rev) + self.length(rev)
526 530
527 531 def parents(self, node):
528 532 i = self.index
529 533 d = i[self.rev(node)]
530 534 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
531 535
532 536 def chainlen(self, rev):
533 537 return self._chaininfo(rev)[0]
534 538
535 539 def _chaininfo(self, rev):
536 540 chaininfocache = self._chaininfocache
537 541 if rev in chaininfocache:
538 542 return chaininfocache[rev]
539 543 index = self.index
540 544 generaldelta = self._generaldelta
541 545 iterrev = rev
542 546 e = index[iterrev]
543 547 clen = 0
544 548 compresseddeltalen = 0
545 549 while iterrev != e[3]:
546 550 clen += 1
547 551 compresseddeltalen += e[1]
548 552 if generaldelta:
549 553 iterrev = e[3]
550 554 else:
551 555 iterrev -= 1
552 556 if iterrev in chaininfocache:
553 557 t = chaininfocache[iterrev]
554 558 clen += t[0]
555 559 compresseddeltalen += t[1]
556 560 break
557 561 e = index[iterrev]
558 562 else:
559 563 # Add text length of base since decompressing that also takes
560 564 # work. For cache hits the length is already included.
561 565 compresseddeltalen += e[1]
562 566 r = (clen, compresseddeltalen)
563 567 chaininfocache[rev] = r
564 568 return r
565 569
566 570 def _deltachain(self, rev, stoprev=None):
567 571 """Obtain the delta chain for a revision.
568 572
569 573 ``stoprev`` specifies a revision to stop at. If not specified, we
570 574 stop at the base of the chain.
571 575
572 576 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
573 577 revs in ascending order and ``stopped`` is a bool indicating whether
574 578 ``stoprev`` was hit.
575 579 """
576 580 # Try C implementation.
577 581 try:
578 582 return self.index.deltachain(rev, stoprev, self._generaldelta)
579 583 except AttributeError:
580 584 pass
581 585
582 586 chain = []
583 587
584 588 # Alias to prevent attribute lookup in tight loop.
585 589 index = self.index
586 590 generaldelta = self._generaldelta
587 591
588 592 iterrev = rev
589 593 e = index[iterrev]
590 594 while iterrev != e[3] and iterrev != stoprev:
591 595 chain.append(iterrev)
592 596 if generaldelta:
593 597 iterrev = e[3]
594 598 else:
595 599 iterrev -= 1
596 600 e = index[iterrev]
597 601
598 602 if iterrev == stoprev:
599 603 stopped = True
600 604 else:
601 605 chain.append(iterrev)
602 606 stopped = False
603 607
604 608 chain.reverse()
605 609 return chain, stopped
606 610
607 611 def ancestors(self, revs, stoprev=0, inclusive=False):
608 612 """Generate the ancestors of 'revs' in reverse topological order.
609 613 Does not generate revs lower than stoprev.
610 614
611 615 See the documentation for ancestor.lazyancestors for more details."""
612 616
613 617 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
614 618 inclusive=inclusive)
615 619
616 620 def descendants(self, revs):
617 621 """Generate the descendants of 'revs' in revision order.
618 622
619 623 Yield a sequence of revision numbers starting with a child of
620 624 some rev in revs, i.e., each revision is *not* considered a
621 625 descendant of itself. Results are ordered by revision number (a
622 626 topological sort)."""
623 627 first = min(revs)
624 628 if first == nullrev:
625 629 for i in self:
626 630 yield i
627 631 return
628 632
629 633 seen = set(revs)
630 634 for i in self.revs(start=first + 1):
631 635 for x in self.parentrevs(i):
632 636 if x != nullrev and x in seen:
633 637 seen.add(i)
634 638 yield i
635 639 break
636 640
637 641 def findcommonmissing(self, common=None, heads=None):
638 642 """Return a tuple of the ancestors of common and the ancestors of heads
639 643 that are not ancestors of common. In revset terminology, we return the
640 644 tuple:
641 645
642 646 ::common, (::heads) - (::common)
643 647
644 648 The list is sorted by revision number, meaning it is
645 649 topologically sorted.
646 650
647 651 'heads' and 'common' are both lists of node IDs. If heads is
648 652 not supplied, uses all of the revlog's heads. If common is not
649 653 supplied, uses nullid."""
650 654 if common is None:
651 655 common = [nullid]
652 656 if heads is None:
653 657 heads = self.heads()
654 658
655 659 common = [self.rev(n) for n in common]
656 660 heads = [self.rev(n) for n in heads]
657 661
658 662 # we want the ancestors, but inclusive
659 663 class lazyset(object):
660 664 def __init__(self, lazyvalues):
661 665 self.addedvalues = set()
662 666 self.lazyvalues = lazyvalues
663 667
664 668 def __contains__(self, value):
665 669 return value in self.addedvalues or value in self.lazyvalues
666 670
667 671 def __iter__(self):
668 672 added = self.addedvalues
669 673 for r in added:
670 674 yield r
671 675 for r in self.lazyvalues:
672 676 if not r in added:
673 677 yield r
674 678
675 679 def add(self, value):
676 680 self.addedvalues.add(value)
677 681
678 682 def update(self, values):
679 683 self.addedvalues.update(values)
680 684
681 685 has = lazyset(self.ancestors(common))
682 686 has.add(nullrev)
683 687 has.update(common)
684 688
685 689 # take all ancestors from heads that aren't in has
686 690 missing = set()
687 691 visit = collections.deque(r for r in heads if r not in has)
688 692 while visit:
689 693 r = visit.popleft()
690 694 if r in missing:
691 695 continue
692 696 else:
693 697 missing.add(r)
694 698 for p in self.parentrevs(r):
695 699 if p not in has:
696 700 visit.append(p)
697 701 missing = list(missing)
698 702 missing.sort()
699 703 return has, [self.node(miss) for miss in missing]
700 704
701 705 def incrementalmissingrevs(self, common=None):
702 706 """Return an object that can be used to incrementally compute the
703 707 revision numbers of the ancestors of arbitrary sets that are not
704 708 ancestors of common. This is an ancestor.incrementalmissingancestors
705 709 object.
706 710
707 711 'common' is a list of revision numbers. If common is not supplied, uses
708 712 nullrev.
709 713 """
710 714 if common is None:
711 715 common = [nullrev]
712 716
713 717 return ancestor.incrementalmissingancestors(self.parentrevs, common)
714 718
715 719 def findmissingrevs(self, common=None, heads=None):
716 720 """Return the revision numbers of the ancestors of heads that
717 721 are not ancestors of common.
718 722
719 723 More specifically, return a list of revision numbers corresponding to
720 724 nodes N such that every N satisfies the following constraints:
721 725
722 726 1. N is an ancestor of some node in 'heads'
723 727 2. N is not an ancestor of any node in 'common'
724 728
725 729 The list is sorted by revision number, meaning it is
726 730 topologically sorted.
727 731
728 732 'heads' and 'common' are both lists of revision numbers. If heads is
729 733 not supplied, uses all of the revlog's heads. If common is not
730 734 supplied, uses nullid."""
731 735 if common is None:
732 736 common = [nullrev]
733 737 if heads is None:
734 738 heads = self.headrevs()
735 739
736 740 inc = self.incrementalmissingrevs(common=common)
737 741 return inc.missingancestors(heads)
738 742
739 743 def findmissing(self, common=None, heads=None):
740 744 """Return the ancestors of heads that are not ancestors of common.
741 745
742 746 More specifically, return a list of nodes N such that every N
743 747 satisfies the following constraints:
744 748
745 749 1. N is an ancestor of some node in 'heads'
746 750 2. N is not an ancestor of any node in 'common'
747 751
748 752 The list is sorted by revision number, meaning it is
749 753 topologically sorted.
750 754
751 755 'heads' and 'common' are both lists of node IDs. If heads is
752 756 not supplied, uses all of the revlog's heads. If common is not
753 757 supplied, uses nullid."""
754 758 if common is None:
755 759 common = [nullid]
756 760 if heads is None:
757 761 heads = self.heads()
758 762
759 763 common = [self.rev(n) for n in common]
760 764 heads = [self.rev(n) for n in heads]
761 765
762 766 inc = self.incrementalmissingrevs(common=common)
763 767 return [self.node(r) for r in inc.missingancestors(heads)]
764 768
765 769 def nodesbetween(self, roots=None, heads=None):
766 770 """Return a topological path from 'roots' to 'heads'.
767 771
768 772 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
769 773 topologically sorted list of all nodes N that satisfy both of
770 774 these constraints:
771 775
772 776 1. N is a descendant of some node in 'roots'
773 777 2. N is an ancestor of some node in 'heads'
774 778
775 779 Every node is considered to be both a descendant and an ancestor
776 780 of itself, so every reachable node in 'roots' and 'heads' will be
777 781 included in 'nodes'.
778 782
779 783 'outroots' is the list of reachable nodes in 'roots', i.e., the
780 784 subset of 'roots' that is returned in 'nodes'. Likewise,
781 785 'outheads' is the subset of 'heads' that is also in 'nodes'.
782 786
783 787 'roots' and 'heads' are both lists of node IDs. If 'roots' is
784 788 unspecified, uses nullid as the only root. If 'heads' is
785 789 unspecified, uses list of all of the revlog's heads."""
786 790 nonodes = ([], [], [])
787 791 if roots is not None:
788 792 roots = list(roots)
789 793 if not roots:
790 794 return nonodes
791 795 lowestrev = min([self.rev(n) for n in roots])
792 796 else:
793 797 roots = [nullid] # Everybody's a descendant of nullid
794 798 lowestrev = nullrev
795 799 if (lowestrev == nullrev) and (heads is None):
796 800 # We want _all_ the nodes!
797 801 return ([self.node(r) for r in self], [nullid], list(self.heads()))
798 802 if heads is None:
799 803 # All nodes are ancestors, so the latest ancestor is the last
800 804 # node.
801 805 highestrev = len(self) - 1
802 806 # Set ancestors to None to signal that every node is an ancestor.
803 807 ancestors = None
804 808 # Set heads to an empty dictionary for later discovery of heads
805 809 heads = {}
806 810 else:
807 811 heads = list(heads)
808 812 if not heads:
809 813 return nonodes
810 814 ancestors = set()
811 815 # Turn heads into a dictionary so we can remove 'fake' heads.
812 816 # Also, later we will be using it to filter out the heads we can't
813 817 # find from roots.
814 818 heads = dict.fromkeys(heads, False)
815 819 # Start at the top and keep marking parents until we're done.
816 820 nodestotag = set(heads)
817 821 # Remember where the top was so we can use it as a limit later.
818 822 highestrev = max([self.rev(n) for n in nodestotag])
819 823 while nodestotag:
820 824 # grab a node to tag
821 825 n = nodestotag.pop()
822 826 # Never tag nullid
823 827 if n == nullid:
824 828 continue
825 829 # A node's revision number represents its place in a
826 830 # topologically sorted list of nodes.
827 831 r = self.rev(n)
828 832 if r >= lowestrev:
829 833 if n not in ancestors:
830 834 # If we are possibly a descendant of one of the roots
831 835 # and we haven't already been marked as an ancestor
832 836 ancestors.add(n) # Mark as ancestor
833 837 # Add non-nullid parents to list of nodes to tag.
834 838 nodestotag.update([p for p in self.parents(n) if
835 839 p != nullid])
836 840 elif n in heads: # We've seen it before, is it a fake head?
837 841 # So it is, real heads should not be the ancestors of
838 842 # any other heads.
839 843 heads.pop(n)
840 844 if not ancestors:
841 845 return nonodes
842 846 # Now that we have our set of ancestors, we want to remove any
843 847 # roots that are not ancestors.
844 848
845 849 # If one of the roots was nullid, everything is included anyway.
846 850 if lowestrev > nullrev:
847 851 # But, since we weren't, let's recompute the lowest rev to not
848 852 # include roots that aren't ancestors.
849 853
850 854 # Filter out roots that aren't ancestors of heads
851 855 roots = [root for root in roots if root in ancestors]
852 856 # Recompute the lowest revision
853 857 if roots:
854 858 lowestrev = min([self.rev(root) for root in roots])
855 859 else:
856 860 # No more roots? Return empty list
857 861 return nonodes
858 862 else:
859 863 # We are descending from nullid, and don't need to care about
860 864 # any other roots.
861 865 lowestrev = nullrev
862 866 roots = [nullid]
863 867 # Transform our roots list into a set.
864 868 descendants = set(roots)
865 869 # Also, keep the original roots so we can filter out roots that aren't
866 870 # 'real' roots (i.e. are descended from other roots).
867 871 roots = descendants.copy()
868 872 # Our topologically sorted list of output nodes.
869 873 orderedout = []
870 874 # Don't start at nullid since we don't want nullid in our output list,
871 875 # and if nullid shows up in descendants, empty parents will look like
872 876 # they're descendants.
873 877 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
874 878 n = self.node(r)
875 879 isdescendant = False
876 880 if lowestrev == nullrev: # Everybody is a descendant of nullid
877 881 isdescendant = True
878 882 elif n in descendants:
879 883 # n is already a descendant
880 884 isdescendant = True
881 885 # This check only needs to be done here because all the roots
882 886 # will start being marked is descendants before the loop.
883 887 if n in roots:
884 888 # If n was a root, check if it's a 'real' root.
885 889 p = tuple(self.parents(n))
886 890 # If any of its parents are descendants, it's not a root.
887 891 if (p[0] in descendants) or (p[1] in descendants):
888 892 roots.remove(n)
889 893 else:
890 894 p = tuple(self.parents(n))
891 895 # A node is a descendant if either of its parents are
892 896 # descendants. (We seeded the dependents list with the roots
893 897 # up there, remember?)
894 898 if (p[0] in descendants) or (p[1] in descendants):
895 899 descendants.add(n)
896 900 isdescendant = True
897 901 if isdescendant and ((ancestors is None) or (n in ancestors)):
898 902 # Only include nodes that are both descendants and ancestors.
899 903 orderedout.append(n)
900 904 if (ancestors is not None) and (n in heads):
901 905 # We're trying to figure out which heads are reachable
902 906 # from roots.
903 907 # Mark this head as having been reached
904 908 heads[n] = True
905 909 elif ancestors is None:
906 910 # Otherwise, we're trying to discover the heads.
907 911 # Assume this is a head because if it isn't, the next step
908 912 # will eventually remove it.
909 913 heads[n] = True
910 914 # But, obviously its parents aren't.
911 915 for p in self.parents(n):
912 916 heads.pop(p, None)
913 917 heads = [head for head, flag in heads.iteritems() if flag]
914 918 roots = list(roots)
915 919 assert orderedout
916 920 assert roots
917 921 assert heads
918 922 return (orderedout, roots, heads)
919 923
920 924 def headrevs(self):
921 925 try:
922 926 return self.index.headrevs()
923 927 except AttributeError:
924 928 return self._headrevs()
925 929
926 930 def computephases(self, roots):
927 931 return self.index.computephasesmapsets(roots)
928 932
929 933 def _headrevs(self):
930 934 count = len(self)
931 935 if not count:
932 936 return [nullrev]
933 937 # we won't iter over filtered rev so nobody is a head at start
934 938 ishead = [0] * (count + 1)
935 939 index = self.index
936 940 for r in self:
937 941 ishead[r] = 1 # I may be an head
938 942 e = index[r]
939 943 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
940 944 return [r for r, val in enumerate(ishead) if val]
941 945
942 946 def heads(self, start=None, stop=None):
943 947 """return the list of all nodes that have no children
944 948
945 949 if start is specified, only heads that are descendants of
946 950 start will be returned
947 951 if stop is specified, it will consider all the revs from stop
948 952 as if they had no children
949 953 """
950 954 if start is None and stop is None:
951 955 if not len(self):
952 956 return [nullid]
953 957 return [self.node(r) for r in self.headrevs()]
954 958
955 959 if start is None:
956 960 start = nullid
957 961 if stop is None:
958 962 stop = []
959 963 stoprevs = set([self.rev(n) for n in stop])
960 964 startrev = self.rev(start)
961 965 reachable = {startrev}
962 966 heads = {startrev}
963 967
964 968 parentrevs = self.parentrevs
965 969 for r in self.revs(start=startrev + 1):
966 970 for p in parentrevs(r):
967 971 if p in reachable:
968 972 if r not in stoprevs:
969 973 reachable.add(r)
970 974 heads.add(r)
971 975 if p in heads and p not in stoprevs:
972 976 heads.remove(p)
973 977
974 978 return [self.node(r) for r in heads]
975 979
976 980 def children(self, node):
977 981 """find the children of a given node"""
978 982 c = []
979 983 p = self.rev(node)
980 984 for r in self.revs(start=p + 1):
981 985 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
982 986 if prevs:
983 987 for pr in prevs:
984 988 if pr == p:
985 989 c.append(self.node(r))
986 990 elif p == nullrev:
987 991 c.append(self.node(r))
988 992 return c
989 993
990 994 def descendant(self, start, end):
991 995 if start == nullrev:
992 996 return True
993 997 for i in self.descendants([start]):
994 998 if i == end:
995 999 return True
996 1000 elif i > end:
997 1001 break
998 1002 return False
999 1003
1000 1004 def commonancestorsheads(self, a, b):
1001 1005 """calculate all the heads of the common ancestors of nodes a and b"""
1002 1006 a, b = self.rev(a), self.rev(b)
1003 1007 try:
1004 1008 ancs = self.index.commonancestorsheads(a, b)
1005 1009 except (AttributeError, OverflowError): # C implementation failed
1006 1010 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1007 1011 return pycompat.maplist(self.node, ancs)
1008 1012
1009 1013 def isancestor(self, a, b):
1010 1014 """return True if node a is an ancestor of node b
1011 1015
1012 1016 The implementation of this is trivial but the use of
1013 1017 commonancestorsheads is not."""
1014 1018 return a in self.commonancestorsheads(a, b)
1015 1019
1016 1020 def ancestor(self, a, b):
1017 1021 """calculate the "best" common ancestor of nodes a and b"""
1018 1022
1019 1023 a, b = self.rev(a), self.rev(b)
1020 1024 try:
1021 1025 ancs = self.index.ancestors(a, b)
1022 1026 except (AttributeError, OverflowError):
1023 1027 ancs = ancestor.ancestors(self.parentrevs, a, b)
1024 1028 if ancs:
1025 1029 # choose a consistent winner when there's a tie
1026 1030 return min(map(self.node, ancs))
1027 1031 return nullid
1028 1032
1029 1033 def _match(self, id):
1030 1034 if isinstance(id, int):
1031 1035 # rev
1032 1036 return self.node(id)
1033 1037 if len(id) == 20:
1034 1038 # possibly a binary node
1035 1039 # odds of a binary node being all hex in ASCII are 1 in 10**25
1036 1040 try:
1037 1041 node = id
1038 1042 self.rev(node) # quick search the index
1039 1043 return node
1040 1044 except LookupError:
1041 1045 pass # may be partial hex id
1042 1046 try:
1043 1047 # str(rev)
1044 1048 rev = int(id)
1045 1049 if str(rev) != id:
1046 1050 raise ValueError
1047 1051 if rev < 0:
1048 1052 rev = len(self) + rev
1049 1053 if rev < 0 or rev >= len(self):
1050 1054 raise ValueError
1051 1055 return self.node(rev)
1052 1056 except (ValueError, OverflowError):
1053 1057 pass
1054 1058 if len(id) == 40:
1055 1059 try:
1056 1060 # a full hex nodeid?
1057 1061 node = bin(id)
1058 1062 self.rev(node)
1059 1063 return node
1060 1064 except (TypeError, LookupError):
1061 1065 pass
1062 1066
1063 1067 def _partialmatch(self, id):
1064 1068 maybewdir = wdirhex.startswith(id)
1065 1069 try:
1066 1070 partial = self.index.partialmatch(id)
1067 1071 if partial and self.hasnode(partial):
1068 1072 if maybewdir:
1069 1073 # single 'ff...' match in radix tree, ambiguous with wdir
1070 1074 raise RevlogError
1071 1075 return partial
1072 1076 if maybewdir:
1073 1077 # no 'ff...' match in radix tree, wdir identified
1074 1078 raise error.WdirUnsupported
1075 1079 return None
1076 1080 except RevlogError:
1077 1081 # parsers.c radix tree lookup gave multiple matches
1078 1082 # fast path: for unfiltered changelog, radix tree is accurate
1079 1083 if not getattr(self, 'filteredrevs', None):
1080 1084 raise LookupError(id, self.indexfile,
1081 1085 _('ambiguous identifier'))
1082 1086 # fall through to slow path that filters hidden revisions
1083 1087 except (AttributeError, ValueError):
1084 1088 # we are pure python, or key was too short to search radix tree
1085 1089 pass
1086 1090
1087 1091 if id in self._pcache:
1088 1092 return self._pcache[id]
1089 1093
1090 1094 if len(id) < 40:
1091 1095 try:
1092 1096 # hex(node)[:...]
1093 1097 l = len(id) // 2 # grab an even number of digits
1094 1098 prefix = bin(id[:l * 2])
1095 1099 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1096 1100 nl = [n for n in nl if hex(n).startswith(id) and
1097 1101 self.hasnode(n)]
1098 1102 if len(nl) > 0:
1099 1103 if len(nl) == 1 and not maybewdir:
1100 1104 self._pcache[id] = nl[0]
1101 1105 return nl[0]
1102 1106 raise LookupError(id, self.indexfile,
1103 1107 _('ambiguous identifier'))
1104 1108 if maybewdir:
1105 1109 raise error.WdirUnsupported
1106 1110 return None
1107 1111 except (TypeError, binascii.Error):
1108 1112 pass
1109 1113
1110 1114 def lookup(self, id):
1111 1115 """locate a node based on:
1112 1116 - revision number or str(revision number)
1113 1117 - nodeid or subset of hex nodeid
1114 1118 """
1115 1119 n = self._match(id)
1116 1120 if n is not None:
1117 1121 return n
1118 1122 n = self._partialmatch(id)
1119 1123 if n:
1120 1124 return n
1121 1125
1122 1126 raise LookupError(id, self.indexfile, _('no match found'))
1123 1127
1124 1128 def cmp(self, node, text):
1125 1129 """compare text with a given file revision
1126 1130
1127 1131 returns True if text is different than what is stored.
1128 1132 """
1129 1133 p1, p2 = self.parents(node)
1130 1134 return hash(text, p1, p2) != node
1131 1135
1132 1136 def _cachesegment(self, offset, data):
1133 1137 """Add a segment to the revlog cache.
1134 1138
1135 1139 Accepts an absolute offset and the data that is at that location.
1136 1140 """
1137 1141 o, d = self._chunkcache
1138 1142 # try to add to existing cache
1139 1143 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1140 1144 self._chunkcache = o, d + data
1141 1145 else:
1142 1146 self._chunkcache = offset, data
1143 1147
1144 1148 def _readsegment(self, offset, length, df=None):
1145 1149 """Load a segment of raw data from the revlog.
1146 1150
1147 1151 Accepts an absolute offset, length to read, and an optional existing
1148 1152 file handle to read from.
1149 1153
1150 1154 If an existing file handle is passed, it will be seeked and the
1151 1155 original seek position will NOT be restored.
1152 1156
1153 1157 Returns a str or buffer of raw byte data.
1154 1158 """
1155 1159 if df is not None:
1156 1160 closehandle = False
1157 1161 else:
1158 1162 if self._inline:
1159 1163 df = self.opener(self.indexfile)
1160 1164 else:
1161 1165 df = self.opener(self.datafile)
1162 1166 closehandle = True
1163 1167
1164 1168 # Cache data both forward and backward around the requested
1165 1169 # data, in a fixed size window. This helps speed up operations
1166 1170 # involving reading the revlog backwards.
1167 1171 cachesize = self._chunkcachesize
1168 1172 realoffset = offset & ~(cachesize - 1)
1169 1173 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1170 1174 - realoffset)
1171 1175 df.seek(realoffset)
1172 1176 d = df.read(reallength)
1173 1177 if closehandle:
1174 1178 df.close()
1175 1179 self._cachesegment(realoffset, d)
1176 1180 if offset != realoffset or reallength != length:
1177 1181 return util.buffer(d, offset - realoffset, length)
1178 1182 return d
1179 1183
1180 1184 def _getsegment(self, offset, length, df=None):
1181 1185 """Obtain a segment of raw data from the revlog.
1182 1186
1183 1187 Accepts an absolute offset, length of bytes to obtain, and an
1184 1188 optional file handle to the already-opened revlog. If the file
1185 1189 handle is used, it's original seek position will not be preserved.
1186 1190
1187 1191 Requests for data may be returned from a cache.
1188 1192
1189 1193 Returns a str or a buffer instance of raw byte data.
1190 1194 """
1191 1195 o, d = self._chunkcache
1192 1196 l = len(d)
1193 1197
1194 1198 # is it in the cache?
1195 1199 cachestart = offset - o
1196 1200 cacheend = cachestart + length
1197 1201 if cachestart >= 0 and cacheend <= l:
1198 1202 if cachestart == 0 and cacheend == l:
1199 1203 return d # avoid a copy
1200 1204 return util.buffer(d, cachestart, cacheend - cachestart)
1201 1205
1202 1206 return self._readsegment(offset, length, df=df)
1203 1207
1204 1208 def _getsegmentforrevs(self, startrev, endrev, df=None):
1205 1209 """Obtain a segment of raw data corresponding to a range of revisions.
1206 1210
1207 1211 Accepts the start and end revisions and an optional already-open
1208 1212 file handle to be used for reading. If the file handle is read, its
1209 1213 seek position will not be preserved.
1210 1214
1211 1215 Requests for data may be satisfied by a cache.
1212 1216
1213 1217 Returns a 2-tuple of (offset, data) for the requested range of
1214 1218 revisions. Offset is the integer offset from the beginning of the
1215 1219 revlog and data is a str or buffer of the raw byte data.
1216 1220
1217 1221 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1218 1222 to determine where each revision's data begins and ends.
1219 1223 """
1220 1224 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1221 1225 # (functions are expensive).
1222 1226 index = self.index
1223 1227 istart = index[startrev]
1224 1228 start = int(istart[0] >> 16)
1225 1229 if startrev == endrev:
1226 1230 end = start + istart[1]
1227 1231 else:
1228 1232 iend = index[endrev]
1229 1233 end = int(iend[0] >> 16) + iend[1]
1230 1234
1231 1235 if self._inline:
1232 1236 start += (startrev + 1) * self._io.size
1233 1237 end += (endrev + 1) * self._io.size
1234 1238 length = end - start
1235 1239
1236 1240 return start, self._getsegment(start, length, df=df)
1237 1241
1238 1242 def _chunk(self, rev, df=None):
1239 1243 """Obtain a single decompressed chunk for a revision.
1240 1244
1241 1245 Accepts an integer revision and an optional already-open file handle
1242 1246 to be used for reading. If used, the seek position of the file will not
1243 1247 be preserved.
1244 1248
1245 1249 Returns a str holding uncompressed data for the requested revision.
1246 1250 """
1247 1251 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1248 1252
1249 1253 def _chunks(self, revs, df=None):
1250 1254 """Obtain decompressed chunks for the specified revisions.
1251 1255
1252 1256 Accepts an iterable of numeric revisions that are assumed to be in
1253 1257 ascending order. Also accepts an optional already-open file handle
1254 1258 to be used for reading. If used, the seek position of the file will
1255 1259 not be preserved.
1256 1260
1257 1261 This function is similar to calling ``self._chunk()`` multiple times,
1258 1262 but is faster.
1259 1263
1260 1264 Returns a list with decompressed data for each requested revision.
1261 1265 """
1262 1266 if not revs:
1263 1267 return []
1264 1268 start = self.start
1265 1269 length = self.length
1266 1270 inline = self._inline
1267 1271 iosize = self._io.size
1268 1272 buffer = util.buffer
1269 1273
1270 1274 l = []
1271 1275 ladd = l.append
1272 1276
1273 1277 try:
1274 1278 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1275 1279 except OverflowError:
1276 1280 # issue4215 - we can't cache a run of chunks greater than
1277 1281 # 2G on Windows
1278 1282 return [self._chunk(rev, df=df) for rev in revs]
1279 1283
1280 1284 decomp = self.decompress
1281 1285 for rev in revs:
1282 1286 chunkstart = start(rev)
1283 1287 if inline:
1284 1288 chunkstart += (rev + 1) * iosize
1285 1289 chunklength = length(rev)
1286 1290 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1287 1291
1288 1292 return l
1289 1293
1290 1294 def _chunkclear(self):
1291 1295 """Clear the raw chunk cache."""
1292 1296 self._chunkcache = (0, '')
1293 1297
1294 1298 def deltaparent(self, rev):
1295 1299 """return deltaparent of the given revision"""
1296 1300 base = self.index[rev][3]
1297 1301 if base == rev:
1298 1302 return nullrev
1299 1303 elif self._generaldelta:
1300 1304 return base
1301 1305 else:
1302 1306 return rev - 1
1303 1307
1304 1308 def revdiff(self, rev1, rev2):
1305 1309 """return or calculate a delta between two revisions
1306 1310
1307 1311 The delta calculated is in binary form and is intended to be written to
1308 1312 revlog data directly. So this function needs raw revision data.
1309 1313 """
1310 1314 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1311 1315 return bytes(self._chunk(rev2))
1312 1316
1313 1317 return mdiff.textdiff(self.revision(rev1, raw=True),
1314 1318 self.revision(rev2, raw=True))
1315 1319
1316 1320 def revision(self, nodeorrev, _df=None, raw=False):
1317 1321 """return an uncompressed revision of a given node or revision
1318 1322 number.
1319 1323
1320 1324 _df - an existing file handle to read from. (internal-only)
1321 1325 raw - an optional argument specifying if the revision data is to be
1322 1326 treated as raw data when applying flag transforms. 'raw' should be set
1323 1327 to True when generating changegroups or in debug commands.
1324 1328 """
1325 1329 if isinstance(nodeorrev, int):
1326 1330 rev = nodeorrev
1327 1331 node = self.node(rev)
1328 1332 else:
1329 1333 node = nodeorrev
1330 1334 rev = None
1331 1335
1332 1336 cachedrev = None
1333 1337 flags = None
1334 1338 rawtext = None
1335 1339 if node == nullid:
1336 1340 return ""
1337 1341 if self._cache:
1338 1342 if self._cache[0] == node:
1339 1343 # _cache only stores rawtext
1340 1344 if raw:
1341 1345 return self._cache[2]
1342 1346 # duplicated, but good for perf
1343 1347 if rev is None:
1344 1348 rev = self.rev(node)
1345 1349 if flags is None:
1346 1350 flags = self.flags(rev)
1347 1351 # no extra flags set, no flag processor runs, text = rawtext
1348 1352 if flags == REVIDX_DEFAULT_FLAGS:
1349 1353 return self._cache[2]
1350 1354 # rawtext is reusable. need to run flag processor
1351 1355 rawtext = self._cache[2]
1352 1356
1353 1357 cachedrev = self._cache[1]
1354 1358
1355 1359 # look up what we need to read
1356 1360 if rawtext is None:
1357 1361 if rev is None:
1358 1362 rev = self.rev(node)
1359 1363
1360 1364 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1361 1365 if stopped:
1362 1366 rawtext = self._cache[2]
1363 1367
1364 1368 # drop cache to save memory
1365 1369 self._cache = None
1366 1370
1367 1371 bins = self._chunks(chain, df=_df)
1368 1372 if rawtext is None:
1369 1373 rawtext = bytes(bins[0])
1370 1374 bins = bins[1:]
1371 1375
1372 1376 rawtext = mdiff.patches(rawtext, bins)
1373 1377 self._cache = (node, rev, rawtext)
1374 1378
1375 1379 if flags is None:
1376 1380 if rev is None:
1377 1381 rev = self.rev(node)
1378 1382 flags = self.flags(rev)
1379 1383
1380 1384 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1381 1385 if validatehash:
1382 1386 self.checkhash(text, node, rev=rev)
1383 1387
1384 1388 return text
1385 1389
1386 1390 def hash(self, text, p1, p2):
1387 1391 """Compute a node hash.
1388 1392
1389 1393 Available as a function so that subclasses can replace the hash
1390 1394 as needed.
1391 1395 """
1392 1396 return hash(text, p1, p2)
1393 1397
1394 1398 def _processflags(self, text, flags, operation, raw=False):
1395 1399 """Inspect revision data flags and applies transforms defined by
1396 1400 registered flag processors.
1397 1401
1398 1402 ``text`` - the revision data to process
1399 1403 ``flags`` - the revision flags
1400 1404 ``operation`` - the operation being performed (read or write)
1401 1405 ``raw`` - an optional argument describing if the raw transform should be
1402 1406 applied.
1403 1407
1404 1408 This method processes the flags in the order (or reverse order if
1405 1409 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1406 1410 flag processors registered for present flags. The order of flags defined
1407 1411 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1408 1412
1409 1413 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1410 1414 processed text and ``validatehash`` is a bool indicating whether the
1411 1415 returned text should be checked for hash integrity.
1412 1416
1413 1417 Note: If the ``raw`` argument is set, it has precedence over the
1414 1418 operation and will only update the value of ``validatehash``.
1415 1419 """
1416 1420 # fast path: no flag processors will run
1417 1421 if flags == 0:
1418 1422 return text, True
1419 1423 if not operation in ('read', 'write'):
1420 1424 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1421 1425 # Check all flags are known.
1422 1426 if flags & ~REVIDX_KNOWN_FLAGS:
1423 1427 raise RevlogError(_("incompatible revision flag '%#x'") %
1424 1428 (flags & ~REVIDX_KNOWN_FLAGS))
1425 1429 validatehash = True
1426 1430 # Depending on the operation (read or write), the order might be
1427 1431 # reversed due to non-commutative transforms.
1428 1432 orderedflags = REVIDX_FLAGS_ORDER
1429 1433 if operation == 'write':
1430 1434 orderedflags = reversed(orderedflags)
1431 1435
1432 1436 for flag in orderedflags:
1433 1437 # If a flagprocessor has been registered for a known flag, apply the
1434 1438 # related operation transform and update result tuple.
1435 1439 if flag & flags:
1436 1440 vhash = True
1437 1441
1438 1442 if flag not in _flagprocessors:
1439 1443 message = _("missing processor for flag '%#x'") % (flag)
1440 1444 raise RevlogError(message)
1441 1445
1442 1446 processor = _flagprocessors[flag]
1443 1447 if processor is not None:
1444 1448 readtransform, writetransform, rawtransform = processor
1445 1449
1446 1450 if raw:
1447 1451 vhash = rawtransform(self, text)
1448 1452 elif operation == 'read':
1449 1453 text, vhash = readtransform(self, text)
1450 1454 else: # write operation
1451 1455 text, vhash = writetransform(self, text)
1452 1456 validatehash = validatehash and vhash
1453 1457
1454 1458 return text, validatehash
1455 1459
1456 1460 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1457 1461 """Check node hash integrity.
1458 1462
1459 1463 Available as a function so that subclasses can extend hash mismatch
1460 1464 behaviors as needed.
1461 1465 """
1462 1466 if p1 is None and p2 is None:
1463 1467 p1, p2 = self.parents(node)
1464 1468 if node != self.hash(text, p1, p2):
1465 1469 revornode = rev
1466 1470 if revornode is None:
1467 1471 revornode = templatefilters.short(hex(node))
1468 1472 raise RevlogError(_("integrity check failed on %s:%s")
1469 1473 % (self.indexfile, revornode))
1470 1474
1471 1475 def checkinlinesize(self, tr, fp=None):
1472 1476 """Check if the revlog is too big for inline and convert if so.
1473 1477
1474 1478 This should be called after revisions are added to the revlog. If the
1475 1479 revlog has grown too large to be an inline revlog, it will convert it
1476 1480 to use multiple index and data files.
1477 1481 """
1478 1482 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1479 1483 return
1480 1484
1481 1485 trinfo = tr.find(self.indexfile)
1482 1486 if trinfo is None:
1483 1487 raise RevlogError(_("%s not found in the transaction")
1484 1488 % self.indexfile)
1485 1489
1486 1490 trindex = trinfo[2]
1487 1491 if trindex is not None:
1488 1492 dataoff = self.start(trindex)
1489 1493 else:
1490 1494 # revlog was stripped at start of transaction, use all leftover data
1491 1495 trindex = len(self) - 1
1492 1496 dataoff = self.end(-2)
1493 1497
1494 1498 tr.add(self.datafile, dataoff)
1495 1499
1496 1500 if fp:
1497 1501 fp.flush()
1498 1502 fp.close()
1499 1503
1500 1504 df = self.opener(self.datafile, 'w')
1501 1505 try:
1502 1506 for r in self:
1503 1507 df.write(self._getsegmentforrevs(r, r)[1])
1504 1508 finally:
1505 1509 df.close()
1506 1510
1507 1511 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1508 1512 checkambig=self._checkambig)
1509 1513 self.version &= ~FLAG_INLINE_DATA
1510 1514 self._inline = False
1511 1515 for i in self:
1512 1516 e = self._io.packentry(self.index[i], self.node, self.version, i)
1513 1517 fp.write(e)
1514 1518
1515 1519 # if we don't call close, the temp file will never replace the
1516 1520 # real index
1517 1521 fp.close()
1518 1522
1519 1523 tr.replace(self.indexfile, trindex * self._io.size)
1520 1524 self._chunkclear()
1521 1525
1522 1526 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1523 1527 node=None, flags=REVIDX_DEFAULT_FLAGS):
1524 1528 """add a revision to the log
1525 1529
1526 1530 text - the revision data to add
1527 1531 transaction - the transaction object used for rollback
1528 1532 link - the linkrev data to add
1529 1533 p1, p2 - the parent nodeids of the revision
1530 1534 cachedelta - an optional precomputed delta
1531 1535 node - nodeid of revision; typically node is not specified, and it is
1532 1536 computed by default as hash(text, p1, p2), however subclasses might
1533 1537 use different hashing method (and override checkhash() in such case)
1534 1538 flags - the known flags to set on the revision
1535 1539 """
1536 1540 if link == nullrev:
1537 1541 raise RevlogError(_("attempted to add linkrev -1 to %s")
1538 1542 % self.indexfile)
1539 1543
1540 1544 if flags:
1541 1545 node = node or self.hash(text, p1, p2)
1542 1546
1543 1547 rawtext, validatehash = self._processflags(text, flags, 'write')
1544 1548
1545 1549 # If the flag processor modifies the revision data, ignore any provided
1546 1550 # cachedelta.
1547 1551 if rawtext != text:
1548 1552 cachedelta = None
1549 1553
1550 1554 if len(rawtext) > _maxentrysize:
1551 1555 raise RevlogError(
1552 1556 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1553 1557 % (self.indexfile, len(rawtext)))
1554 1558
1555 1559 node = node or self.hash(rawtext, p1, p2)
1556 1560 if node in self.nodemap:
1557 1561 return node
1558 1562
1559 1563 if validatehash:
1560 1564 self.checkhash(rawtext, node, p1=p1, p2=p2)
1561 1565
1562 1566 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1563 1567 flags, cachedelta=cachedelta)
1564 1568
1565 1569 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1566 1570 cachedelta=None):
1567 1571 """add a raw revision with known flags, node and parents
1568 1572 useful when reusing a revision not stored in this revlog (ex: received
1569 1573 over wire, or read from an external bundle).
1570 1574 """
1571 1575 dfh = None
1572 1576 if not self._inline:
1573 1577 dfh = self.opener(self.datafile, "a+")
1574 1578 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1575 1579 try:
1576 1580 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1577 1581 flags, cachedelta, ifh, dfh)
1578 1582 finally:
1579 1583 if dfh:
1580 1584 dfh.close()
1581 1585 ifh.close()
1582 1586
1583 1587 def compress(self, data):
1584 1588 """Generate a possibly-compressed representation of data."""
1585 1589 if not data:
1586 1590 return '', data
1587 1591
1588 1592 compressed = self._compressor.compress(data)
1589 1593
1590 1594 if compressed:
1591 1595 # The revlog compressor added the header in the returned data.
1592 1596 return '', compressed
1593 1597
1594 1598 if data[0:1] == '\0':
1595 1599 return '', data
1596 1600 return 'u', data
1597 1601
1598 1602 def decompress(self, data):
1599 1603 """Decompress a revlog chunk.
1600 1604
1601 1605 The chunk is expected to begin with a header identifying the
1602 1606 format type so it can be routed to an appropriate decompressor.
1603 1607 """
1604 1608 if not data:
1605 1609 return data
1606 1610
1607 1611 # Revlogs are read much more frequently than they are written and many
1608 1612 # chunks only take microseconds to decompress, so performance is
1609 1613 # important here.
1610 1614 #
1611 1615 # We can make a few assumptions about revlogs:
1612 1616 #
1613 1617 # 1) the majority of chunks will be compressed (as opposed to inline
1614 1618 # raw data).
1615 1619 # 2) decompressing *any* data will likely by at least 10x slower than
1616 1620 # returning raw inline data.
1617 1621 # 3) we want to prioritize common and officially supported compression
1618 1622 # engines
1619 1623 #
1620 1624 # It follows that we want to optimize for "decompress compressed data
1621 1625 # when encoded with common and officially supported compression engines"
1622 1626 # case over "raw data" and "data encoded by less common or non-official
1623 1627 # compression engines." That is why we have the inline lookup first
1624 1628 # followed by the compengines lookup.
1625 1629 #
1626 1630 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1627 1631 # compressed chunks. And this matters for changelog and manifest reads.
1628 1632 t = data[0:1]
1629 1633
1630 1634 if t == 'x':
1631 1635 try:
1632 1636 return _zlibdecompress(data)
1633 1637 except zlib.error as e:
1634 1638 raise RevlogError(_('revlog decompress error: %s') % str(e))
1635 1639 # '\0' is more common than 'u' so it goes first.
1636 1640 elif t == '\0':
1637 1641 return data
1638 1642 elif t == 'u':
1639 1643 return util.buffer(data, 1)
1640 1644
1641 1645 try:
1642 1646 compressor = self._decompressors[t]
1643 1647 except KeyError:
1644 1648 try:
1645 1649 engine = util.compengines.forrevlogheader(t)
1646 1650 compressor = engine.revlogcompressor()
1647 1651 self._decompressors[t] = compressor
1648 1652 except KeyError:
1649 1653 raise RevlogError(_('unknown compression type %r') % t)
1650 1654
1651 1655 return compressor.decompress(data)
1652 1656
1653 1657 def _isgooddelta(self, d, textlen):
1654 1658 """Returns True if the given delta is good. Good means that it is within
1655 1659 the disk span, disk size, and chain length bounds that we know to be
1656 1660 performant."""
1657 1661 if d is None:
1658 1662 return False
1659 1663
1660 1664 # - 'dist' is the distance from the base revision -- bounding it limits
1661 1665 # the amount of I/O we need to do.
1662 1666 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1663 1667 # to apply -- bounding it limits the amount of CPU we consume.
1664 1668 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1665 1669
1666 1670 defaultmax = textlen * 4
1667 1671 maxdist = self._maxdeltachainspan
1668 1672 if not maxdist:
1669 1673 maxdist = dist # ensure the conditional pass
1670 1674 maxdist = max(maxdist, defaultmax)
1671 1675 if (dist > maxdist or l > textlen or
1672 1676 compresseddeltalen > textlen * 2 or
1673 1677 (self._maxchainlen and chainlen > self._maxchainlen)):
1674 1678 return False
1675 1679
1676 1680 return True
1677 1681
1678 1682 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1679 1683 cachedelta, ifh, dfh, alwayscache=False):
1680 1684 """internal function to add revisions to the log
1681 1685
1682 1686 see addrevision for argument descriptions.
1683 1687
1684 1688 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1685 1689
1686 1690 invariants:
1687 1691 - rawtext is optional (can be None); if not set, cachedelta must be set.
1688 1692 if both are set, they must correspond to each other.
1689 1693 """
1690 1694 btext = [rawtext]
1691 1695 def buildtext():
1692 1696 if btext[0] is not None:
1693 1697 return btext[0]
1694 1698 baserev = cachedelta[0]
1695 1699 delta = cachedelta[1]
1696 1700 # special case deltas which replace entire base; no need to decode
1697 1701 # base revision. this neatly avoids censored bases, which throw when
1698 1702 # they're decoded.
1699 1703 hlen = struct.calcsize(">lll")
1700 1704 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1701 1705 len(delta) - hlen):
1702 1706 btext[0] = delta[hlen:]
1703 1707 else:
1704 1708 if self._inline:
1705 1709 fh = ifh
1706 1710 else:
1707 1711 fh = dfh
1708 1712 basetext = self.revision(baserev, _df=fh, raw=True)
1709 1713 btext[0] = mdiff.patch(basetext, delta)
1710 1714
1711 1715 try:
1712 1716 res = self._processflags(btext[0], flags, 'read', raw=True)
1713 1717 btext[0], validatehash = res
1714 1718 if validatehash:
1715 1719 self.checkhash(btext[0], node, p1=p1, p2=p2)
1716 1720 if flags & REVIDX_ISCENSORED:
1717 1721 raise RevlogError(_('node %s is not censored') % node)
1718 1722 except CensoredNodeError:
1719 1723 # must pass the censored index flag to add censored revisions
1720 1724 if not flags & REVIDX_ISCENSORED:
1721 1725 raise
1722 1726 return btext[0]
1723 1727
1724 1728 def builddelta(rev):
1725 1729 # can we use the cached delta?
1726 1730 if cachedelta and cachedelta[0] == rev:
1727 1731 delta = cachedelta[1]
1728 1732 else:
1729 1733 t = buildtext()
1730 1734 if self.iscensored(rev):
1731 1735 # deltas based on a censored revision must replace the
1732 1736 # full content in one patch, so delta works everywhere
1733 1737 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1734 1738 delta = header + t
1735 1739 else:
1736 1740 if self._inline:
1737 1741 fh = ifh
1738 1742 else:
1739 1743 fh = dfh
1740 1744 ptext = self.revision(rev, _df=fh, raw=True)
1741 1745 delta = mdiff.textdiff(ptext, t)
1742 1746 header, data = self.compress(delta)
1743 1747 deltalen = len(header) + len(data)
1744 1748 chainbase = self.chainbase(rev)
1745 1749 dist = deltalen + offset - self.start(chainbase)
1746 1750 if self._generaldelta:
1747 1751 base = rev
1748 1752 else:
1749 1753 base = chainbase
1750 1754 chainlen, compresseddeltalen = self._chaininfo(rev)
1751 1755 chainlen += 1
1752 1756 compresseddeltalen += deltalen
1753 1757 return (dist, deltalen, (header, data), base,
1754 1758 chainbase, chainlen, compresseddeltalen)
1755 1759
1756 1760 curr = len(self)
1757 1761 prev = curr - 1
1758 1762 offset = self.end(prev)
1759 1763 delta = None
1760 1764 p1r, p2r = self.rev(p1), self.rev(p2)
1761 1765
1762 1766 # full versions are inserted when the needed deltas
1763 1767 # become comparable to the uncompressed text
1764 1768 if rawtext is None:
1765 1769 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1766 1770 cachedelta[1])
1767 1771 else:
1768 1772 textlen = len(rawtext)
1769 1773
1770 1774 # should we try to build a delta?
1771 1775 if prev != nullrev and self.storedeltachains:
1772 1776 tested = set()
1773 1777 # This condition is true most of the time when processing
1774 1778 # changegroup data into a generaldelta repo. The only time it
1775 1779 # isn't true is if this is the first revision in a delta chain
1776 1780 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1777 1781 if cachedelta and self._generaldelta and self._lazydeltabase:
1778 1782 # Assume what we received from the server is a good choice
1779 1783 # build delta will reuse the cache
1780 1784 candidatedelta = builddelta(cachedelta[0])
1781 1785 tested.add(cachedelta[0])
1782 1786 if self._isgooddelta(candidatedelta, textlen):
1783 1787 delta = candidatedelta
1784 1788 if delta is None and self._generaldelta:
1785 1789 # exclude already lazy tested base if any
1786 1790 parents = [p for p in (p1r, p2r)
1787 1791 if p != nullrev and p not in tested]
1788 1792 if parents and not self._aggressivemergedeltas:
1789 1793 # Pick whichever parent is closer to us (to minimize the
1790 1794 # chance of having to build a fulltext).
1791 1795 parents = [max(parents)]
1792 1796 tested.update(parents)
1793 1797 pdeltas = []
1794 1798 for p in parents:
1795 1799 pd = builddelta(p)
1796 1800 if self._isgooddelta(pd, textlen):
1797 1801 pdeltas.append(pd)
1798 1802 if pdeltas:
1799 1803 delta = min(pdeltas, key=lambda x: x[1])
1800 1804 if delta is None and prev not in tested:
1801 1805 # other approach failed try against prev to hopefully save us a
1802 1806 # fulltext.
1803 1807 candidatedelta = builddelta(prev)
1804 1808 if self._isgooddelta(candidatedelta, textlen):
1805 1809 delta = candidatedelta
1806 1810 if delta is not None:
1807 1811 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1808 1812 else:
1809 1813 rawtext = buildtext()
1810 1814 data = self.compress(rawtext)
1811 1815 l = len(data[1]) + len(data[0])
1812 1816 base = chainbase = curr
1813 1817
1814 1818 e = (offset_type(offset, flags), l, textlen,
1815 1819 base, link, p1r, p2r, node)
1816 1820 self.index.insert(-1, e)
1817 1821 self.nodemap[node] = curr
1818 1822
1819 1823 entry = self._io.packentry(e, self.node, self.version, curr)
1820 1824 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1821 1825
1822 1826 if alwayscache and rawtext is None:
1823 1827 rawtext = buildtext()
1824 1828
1825 1829 if type(rawtext) == str: # only accept immutable objects
1826 1830 self._cache = (node, curr, rawtext)
1827 1831 self._chainbasecache[curr] = chainbase
1828 1832 return node
1829 1833
1830 1834 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1831 1835 # Files opened in a+ mode have inconsistent behavior on various
1832 1836 # platforms. Windows requires that a file positioning call be made
1833 1837 # when the file handle transitions between reads and writes. See
1834 1838 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1835 1839 # platforms, Python or the platform itself can be buggy. Some versions
1836 1840 # of Solaris have been observed to not append at the end of the file
1837 1841 # if the file was seeked to before the end. See issue4943 for more.
1838 1842 #
1839 1843 # We work around this issue by inserting a seek() before writing.
1840 1844 # Note: This is likely not necessary on Python 3.
1841 1845 ifh.seek(0, os.SEEK_END)
1842 1846 if dfh:
1843 1847 dfh.seek(0, os.SEEK_END)
1844 1848
1845 1849 curr = len(self) - 1
1846 1850 if not self._inline:
1847 1851 transaction.add(self.datafile, offset)
1848 1852 transaction.add(self.indexfile, curr * len(entry))
1849 1853 if data[0]:
1850 1854 dfh.write(data[0])
1851 1855 dfh.write(data[1])
1852 1856 ifh.write(entry)
1853 1857 else:
1854 1858 offset += curr * self._io.size
1855 1859 transaction.add(self.indexfile, offset, curr)
1856 1860 ifh.write(entry)
1857 1861 ifh.write(data[0])
1858 1862 ifh.write(data[1])
1859 1863 self.checkinlinesize(transaction, ifh)
1860 1864
1861 1865 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1862 1866 """
1863 1867 add a delta group
1864 1868
1865 1869 given a set of deltas, add them to the revision log. the
1866 1870 first delta is against its parent, which should be in our
1867 1871 log, the rest are against the previous delta.
1868 1872
1869 1873 If ``addrevisioncb`` is defined, it will be called with arguments of
1870 1874 this revlog and the node that was added.
1871 1875 """
1872 1876
1873 1877 nodes = []
1874 1878
1875 1879 r = len(self)
1876 1880 end = 0
1877 1881 if r:
1878 1882 end = self.end(r - 1)
1879 1883 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1880 1884 isize = r * self._io.size
1881 1885 if self._inline:
1882 1886 transaction.add(self.indexfile, end + isize, r)
1883 1887 dfh = None
1884 1888 else:
1885 1889 transaction.add(self.indexfile, isize, r)
1886 1890 transaction.add(self.datafile, end)
1887 1891 dfh = self.opener(self.datafile, "a+")
1888 1892 def flush():
1889 1893 if dfh:
1890 1894 dfh.flush()
1891 1895 ifh.flush()
1892 1896 try:
1893 1897 # loop through our set of deltas
1894 1898 chain = None
1895 1899 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1896 1900 node = chunkdata['node']
1897 1901 p1 = chunkdata['p1']
1898 1902 p2 = chunkdata['p2']
1899 1903 cs = chunkdata['cs']
1900 1904 deltabase = chunkdata['deltabase']
1901 1905 delta = chunkdata['delta']
1902 1906 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1903 1907
1904 1908 nodes.append(node)
1905 1909
1906 1910 link = linkmapper(cs)
1907 1911 if node in self.nodemap:
1908 1912 # this can happen if two branches make the same change
1909 1913 chain = node
1910 1914 continue
1911 1915
1912 1916 for p in (p1, p2):
1913 1917 if p not in self.nodemap:
1914 1918 raise LookupError(p, self.indexfile,
1915 1919 _('unknown parent'))
1916 1920
1917 1921 if deltabase not in self.nodemap:
1918 1922 raise LookupError(deltabase, self.indexfile,
1919 1923 _('unknown delta base'))
1920 1924
1921 1925 baserev = self.rev(deltabase)
1922 1926
1923 1927 if baserev != nullrev and self.iscensored(baserev):
1924 1928 # if base is censored, delta must be full replacement in a
1925 1929 # single patch operation
1926 1930 hlen = struct.calcsize(">lll")
1927 1931 oldlen = self.rawsize(baserev)
1928 1932 newlen = len(delta) - hlen
1929 1933 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1930 1934 raise error.CensoredBaseError(self.indexfile,
1931 1935 self.node(baserev))
1932 1936
1933 1937 if not flags and self._peek_iscensored(baserev, delta, flush):
1934 1938 flags |= REVIDX_ISCENSORED
1935 1939
1936 1940 # We assume consumers of addrevisioncb will want to retrieve
1937 1941 # the added revision, which will require a call to
1938 1942 # revision(). revision() will fast path if there is a cache
1939 1943 # hit. So, we tell _addrevision() to always cache in this case.
1940 1944 # We're only using addgroup() in the context of changegroup
1941 1945 # generation so the revision data can always be handled as raw
1942 1946 # by the flagprocessor.
1943 1947 chain = self._addrevision(node, None, transaction, link,
1944 1948 p1, p2, flags, (baserev, delta),
1945 1949 ifh, dfh,
1946 1950 alwayscache=bool(addrevisioncb))
1947 1951
1948 1952 if addrevisioncb:
1949 1953 addrevisioncb(self, chain)
1950 1954
1951 1955 if not dfh and not self._inline:
1952 1956 # addrevision switched from inline to conventional
1953 1957 # reopen the index
1954 1958 ifh.close()
1955 1959 dfh = self.opener(self.datafile, "a+")
1956 1960 ifh = self.opener(self.indexfile, "a+",
1957 1961 checkambig=self._checkambig)
1958 1962 finally:
1959 1963 if dfh:
1960 1964 dfh.close()
1961 1965 ifh.close()
1962 1966
1963 1967 return nodes
1964 1968
1965 1969 def iscensored(self, rev):
1966 1970 """Check if a file revision is censored."""
1967 1971 return False
1968 1972
1969 1973 def _peek_iscensored(self, baserev, delta, flush):
1970 1974 """Quickly check if a delta produces a censored revision."""
1971 1975 return False
1972 1976
1973 1977 def getstrippoint(self, minlink):
1974 1978 """find the minimum rev that must be stripped to strip the linkrev
1975 1979
1976 1980 Returns a tuple containing the minimum rev and a set of all revs that
1977 1981 have linkrevs that will be broken by this strip.
1978 1982 """
1979 1983 brokenrevs = set()
1980 1984 strippoint = len(self)
1981 1985
1982 1986 heads = {}
1983 1987 futurelargelinkrevs = set()
1984 1988 for head in self.headrevs():
1985 1989 headlinkrev = self.linkrev(head)
1986 1990 heads[head] = headlinkrev
1987 1991 if headlinkrev >= minlink:
1988 1992 futurelargelinkrevs.add(headlinkrev)
1989 1993
1990 1994 # This algorithm involves walking down the rev graph, starting at the
1991 1995 # heads. Since the revs are topologically sorted according to linkrev,
1992 1996 # once all head linkrevs are below the minlink, we know there are
1993 1997 # no more revs that could have a linkrev greater than minlink.
1994 1998 # So we can stop walking.
1995 1999 while futurelargelinkrevs:
1996 2000 strippoint -= 1
1997 2001 linkrev = heads.pop(strippoint)
1998 2002
1999 2003 if linkrev < minlink:
2000 2004 brokenrevs.add(strippoint)
2001 2005 else:
2002 2006 futurelargelinkrevs.remove(linkrev)
2003 2007
2004 2008 for p in self.parentrevs(strippoint):
2005 2009 if p != nullrev:
2006 2010 plinkrev = self.linkrev(p)
2007 2011 heads[p] = plinkrev
2008 2012 if plinkrev >= minlink:
2009 2013 futurelargelinkrevs.add(plinkrev)
2010 2014
2011 2015 return strippoint, brokenrevs
2012 2016
2013 2017 def strip(self, minlink, transaction):
2014 2018 """truncate the revlog on the first revision with a linkrev >= minlink
2015 2019
2016 2020 This function is called when we're stripping revision minlink and
2017 2021 its descendants from the repository.
2018 2022
2019 2023 We have to remove all revisions with linkrev >= minlink, because
2020 2024 the equivalent changelog revisions will be renumbered after the
2021 2025 strip.
2022 2026
2023 2027 So we truncate the revlog on the first of these revisions, and
2024 2028 trust that the caller has saved the revisions that shouldn't be
2025 2029 removed and that it'll re-add them after this truncation.
2026 2030 """
2027 2031 if len(self) == 0:
2028 2032 return
2029 2033
2030 2034 rev, _ = self.getstrippoint(minlink)
2031 2035 if rev == len(self):
2032 2036 return
2033 2037
2034 2038 # first truncate the files on disk
2035 2039 end = self.start(rev)
2036 2040 if not self._inline:
2037 2041 transaction.add(self.datafile, end)
2038 2042 end = rev * self._io.size
2039 2043 else:
2040 2044 end += rev * self._io.size
2041 2045
2042 2046 transaction.add(self.indexfile, end)
2043 2047
2044 2048 # then reset internal state in memory to forget those revisions
2045 2049 self._cache = None
2046 2050 self._chaininfocache = {}
2047 2051 self._chunkclear()
2048 2052 for x in xrange(rev, len(self)):
2049 2053 del self.nodemap[self.node(x)]
2050 2054
2051 2055 del self.index[rev:-1]
2052 2056
2053 2057 def checksize(self):
2054 2058 expected = 0
2055 2059 if len(self):
2056 2060 expected = max(0, self.end(len(self) - 1))
2057 2061
2058 2062 try:
2059 2063 f = self.opener(self.datafile)
2060 2064 f.seek(0, 2)
2061 2065 actual = f.tell()
2062 2066 f.close()
2063 2067 dd = actual - expected
2064 2068 except IOError as inst:
2065 2069 if inst.errno != errno.ENOENT:
2066 2070 raise
2067 2071 dd = 0
2068 2072
2069 2073 try:
2070 2074 f = self.opener(self.indexfile)
2071 2075 f.seek(0, 2)
2072 2076 actual = f.tell()
2073 2077 f.close()
2074 2078 s = self._io.size
2075 2079 i = max(0, actual // s)
2076 2080 di = actual - (i * s)
2077 2081 if self._inline:
2078 2082 databytes = 0
2079 2083 for r in self:
2080 2084 databytes += max(0, self.length(r))
2081 2085 dd = 0
2082 2086 di = actual - len(self) * s - databytes
2083 2087 except IOError as inst:
2084 2088 if inst.errno != errno.ENOENT:
2085 2089 raise
2086 2090 di = 0
2087 2091
2088 2092 return (dd, di)
2089 2093
2090 2094 def files(self):
2091 2095 res = [self.indexfile]
2092 2096 if not self._inline:
2093 2097 res.append(self.datafile)
2094 2098 return res
2095 2099
2096 2100 DELTAREUSEALWAYS = 'always'
2097 2101 DELTAREUSESAMEREVS = 'samerevs'
2098 2102 DELTAREUSENEVER = 'never'
2099 2103
2100 2104 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2101 2105
2102 2106 def clone(self, tr, destrevlog, addrevisioncb=None,
2103 2107 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2104 2108 """Copy this revlog to another, possibly with format changes.
2105 2109
2106 2110 The destination revlog will contain the same revisions and nodes.
2107 2111 However, it may not be bit-for-bit identical due to e.g. delta encoding
2108 2112 differences.
2109 2113
2110 2114 The ``deltareuse`` argument control how deltas from the existing revlog
2111 2115 are preserved in the destination revlog. The argument can have the
2112 2116 following values:
2113 2117
2114 2118 DELTAREUSEALWAYS
2115 2119 Deltas will always be reused (if possible), even if the destination
2116 2120 revlog would not select the same revisions for the delta. This is the
2117 2121 fastest mode of operation.
2118 2122 DELTAREUSESAMEREVS
2119 2123 Deltas will be reused if the destination revlog would pick the same
2120 2124 revisions for the delta. This mode strikes a balance between speed
2121 2125 and optimization.
2122 2126 DELTAREUSENEVER
2123 2127 Deltas will never be reused. This is the slowest mode of execution.
2124 2128 This mode can be used to recompute deltas (e.g. if the diff/delta
2125 2129 algorithm changes).
2126 2130
2127 2131 Delta computation can be slow, so the choice of delta reuse policy can
2128 2132 significantly affect run time.
2129 2133
2130 2134 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2131 2135 two extremes. Deltas will be reused if they are appropriate. But if the
2132 2136 delta could choose a better revision, it will do so. This means if you
2133 2137 are converting a non-generaldelta revlog to a generaldelta revlog,
2134 2138 deltas will be recomputed if the delta's parent isn't a parent of the
2135 2139 revision.
2136 2140
2137 2141 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2138 2142 controls whether to compute deltas against both parents for merges.
2139 2143 By default, the current default is used.
2140 2144 """
2141 2145 if deltareuse not in self.DELTAREUSEALL:
2142 2146 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2143 2147
2144 2148 if len(destrevlog):
2145 2149 raise ValueError(_('destination revlog is not empty'))
2146 2150
2147 2151 if getattr(self, 'filteredrevs', None):
2148 2152 raise ValueError(_('source revlog has filtered revisions'))
2149 2153 if getattr(destrevlog, 'filteredrevs', None):
2150 2154 raise ValueError(_('destination revlog has filtered revisions'))
2151 2155
2152 2156 # lazydeltabase controls whether to reuse a cached delta, if possible.
2153 2157 oldlazydeltabase = destrevlog._lazydeltabase
2154 2158 oldamd = destrevlog._aggressivemergedeltas
2155 2159
2156 2160 try:
2157 2161 if deltareuse == self.DELTAREUSEALWAYS:
2158 2162 destrevlog._lazydeltabase = True
2159 2163 elif deltareuse == self.DELTAREUSESAMEREVS:
2160 2164 destrevlog._lazydeltabase = False
2161 2165
2162 2166 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2163 2167
2164 2168 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2165 2169 self.DELTAREUSESAMEREVS)
2166 2170
2167 2171 index = self.index
2168 2172 for rev in self:
2169 2173 entry = index[rev]
2170 2174
2171 2175 # Some classes override linkrev to take filtered revs into
2172 2176 # account. Use raw entry from index.
2173 2177 flags = entry[0] & 0xffff
2174 2178 linkrev = entry[4]
2175 2179 p1 = index[entry[5]][7]
2176 2180 p2 = index[entry[6]][7]
2177 2181 node = entry[7]
2178 2182
2179 2183 # (Possibly) reuse the delta from the revlog if allowed and
2180 2184 # the revlog chunk is a delta.
2181 2185 cachedelta = None
2182 2186 rawtext = None
2183 2187 if populatecachedelta:
2184 2188 dp = self.deltaparent(rev)
2185 2189 if dp != nullrev:
2186 2190 cachedelta = (dp, str(self._chunk(rev)))
2187 2191
2188 2192 if not cachedelta:
2189 2193 rawtext = self.revision(rev, raw=True)
2190 2194
2191 2195 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2192 2196 checkambig=False)
2193 2197 dfh = None
2194 2198 if not destrevlog._inline:
2195 2199 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2196 2200 try:
2197 2201 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2198 2202 flags, cachedelta, ifh, dfh)
2199 2203 finally:
2200 2204 if dfh:
2201 2205 dfh.close()
2202 2206 ifh.close()
2203 2207
2204 2208 if addrevisioncb:
2205 2209 addrevisioncb(self, rev, node)
2206 2210 finally:
2207 2211 destrevlog._lazydeltabase = oldlazydeltabase
2208 2212 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now