##// END OF EJS Templates
sparse-read: ignore trailing empty revs in each read chunk...
Paul Morelle -
r34899:1bde8e8e default
parent child Browse files
Show More
@@ -1,2350 +1,2375 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullid,
30 30 nullrev,
31 31 wdirhex,
32 32 wdirid,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from . import (
37 37 ancestor,
38 38 error,
39 39 mdiff,
40 40 policy,
41 41 pycompat,
42 42 templatefilters,
43 43 util,
44 44 )
45 45
46 46 parsers = policy.importmod(r'parsers')
47 47
48 48 # Aliased for performance.
49 49 _zlibdecompress = zlib.decompress
50 50
51 51 # revlog header flags
52 52 REVLOGV0 = 0
53 53 REVLOGV1 = 1
54 54 # Dummy value until file format is finalized.
55 55 # Reminder: change the bounds check in revlog.__init__ when this is changed.
56 56 REVLOGV2 = 0xDEAD
57 57 FLAG_INLINE_DATA = (1 << 16)
58 58 FLAG_GENERALDELTA = (1 << 17)
59 59 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
60 60 REVLOG_DEFAULT_FORMAT = REVLOGV1
61 61 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
62 62 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
63 63 REVLOGV2_FLAGS = REVLOGV1_FLAGS
64 64
65 65 # revlog index flags
66 66 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
67 67 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
68 68 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
69 69 REVIDX_DEFAULT_FLAGS = 0
70 70 # stable order in which flags need to be processed and their processors applied
71 71 REVIDX_FLAGS_ORDER = [
72 72 REVIDX_ISCENSORED,
73 73 REVIDX_ELLIPSIS,
74 74 REVIDX_EXTSTORED,
75 75 ]
76 76 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
77 77
78 78 # max size of revlog with inline data
79 79 _maxinline = 131072
80 80 _chunksize = 1048576
81 81
82 82 RevlogError = error.RevlogError
83 83 LookupError = error.LookupError
84 84 CensoredNodeError = error.CensoredNodeError
85 85 ProgrammingError = error.ProgrammingError
86 86
87 87 # Store flag processors (cf. 'addflagprocessor()' to register)
88 88 _flagprocessors = {
89 89 REVIDX_ISCENSORED: None,
90 90 }
91 91
92 92 def addflagprocessor(flag, processor):
93 93 """Register a flag processor on a revision data flag.
94 94
95 95 Invariant:
96 96 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
97 97 - Only one flag processor can be registered on a specific flag.
98 98 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
99 99 following signatures:
100 100 - (read) f(self, rawtext) -> text, bool
101 101 - (write) f(self, text) -> rawtext, bool
102 102 - (raw) f(self, rawtext) -> bool
103 103 "text" is presented to the user. "rawtext" is stored in revlog data, not
104 104 directly visible to the user.
105 105 The boolean returned by these transforms is used to determine whether
106 106 the returned text can be used for hash integrity checking. For example,
107 107 if "write" returns False, then "text" is used to generate hash. If
108 108 "write" returns True, that basically means "rawtext" returned by "write"
109 109 should be used to generate hash. Usually, "write" and "read" return
110 110 different booleans. And "raw" returns a same boolean as "write".
111 111
112 112 Note: The 'raw' transform is used for changegroup generation and in some
113 113 debug commands. In this case the transform only indicates whether the
114 114 contents can be used for hash integrity checks.
115 115 """
116 116 if not flag & REVIDX_KNOWN_FLAGS:
117 117 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
118 118 raise ProgrammingError(msg)
119 119 if flag not in REVIDX_FLAGS_ORDER:
120 120 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
121 121 raise ProgrammingError(msg)
122 122 if flag in _flagprocessors:
123 123 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
124 124 raise error.Abort(msg)
125 125 _flagprocessors[flag] = processor
126 126
127 127 def getoffset(q):
128 128 return int(q >> 16)
129 129
130 130 def gettype(q):
131 131 return int(q & 0xFFFF)
132 132
133 133 def offset_type(offset, type):
134 134 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
135 135 raise ValueError('unknown revlog index flags')
136 136 return int(int(offset) << 16 | type)
137 137
138 138 _nullhash = hashlib.sha1(nullid)
139 139
140 140 def hash(text, p1, p2):
141 141 """generate a hash from the given text and its parent hashes
142 142
143 143 This hash combines both the current file contents and its history
144 144 in a manner that makes it easy to distinguish nodes with the same
145 145 content in the revision graph.
146 146 """
147 147 # As of now, if one of the parent node is null, p2 is null
148 148 if p2 == nullid:
149 149 # deep copy of a hash is faster than creating one
150 150 s = _nullhash.copy()
151 151 s.update(p1)
152 152 else:
153 153 # none of the parent nodes are nullid
154 154 if p1 < p2:
155 155 a = p1
156 156 b = p2
157 157 else:
158 158 a = p2
159 159 b = p1
160 160 s = hashlib.sha1(a)
161 161 s.update(b)
162 162 s.update(text)
163 163 return s.digest()
164 164
165 def _trimchunk(revlog, revs, startidx, endidx=None):
166 """returns revs[startidx:endidx] without empty trailing revs
167 """
168 length = revlog.length
169
170 if endidx is None:
171 endidx = len(revs)
172
173 # Trim empty revs at the end, but never the very first revision of a chain
174 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
175 endidx -= 1
176
177 return revs[startidx:endidx]
178
165 179 def _slicechunk(revlog, revs):
166 180 """slice revs to reduce the amount of unrelated data to be read from disk.
167 181
168 182 ``revs`` is sliced into groups that should be read in one time.
169 183 Assume that revs are sorted.
170 184 """
171 185 start = revlog.start
172 186 length = revlog.length
173 187
174 188 if len(revs) <= 1:
175 189 yield revs
176 190 return
177 191
178 192 startbyte = start(revs[0])
179 193 endbyte = start(revs[-1]) + length(revs[-1])
180 194 readdata = deltachainspan = endbyte - startbyte
181 195
182 196 chainpayload = sum(length(r) for r in revs)
183 197
184 198 if deltachainspan:
185 199 density = chainpayload / float(deltachainspan)
186 200 else:
187 201 density = 1.0
188 202
189 203 # Store the gaps in a heap to have them sorted by decreasing size
190 204 gapsheap = []
191 205 heapq.heapify(gapsheap)
192 206 prevend = None
193 207 for i, rev in enumerate(revs):
194 208 revstart = start(rev)
195 209 revlen = length(rev)
196 210
211 # Skip empty revisions to form larger holes
212 if revlen == 0:
213 continue
214
197 215 if prevend is not None:
198 216 gapsize = revstart - prevend
199 217 # only consider holes that are large enough
200 218 if gapsize > revlog._srmingapsize:
201 219 heapq.heappush(gapsheap, (-gapsize, i))
202 220
203 221 prevend = revstart + revlen
204 222
205 223 # Collect the indices of the largest holes until the density is acceptable
206 224 indicesheap = []
207 225 heapq.heapify(indicesheap)
208 226 while gapsheap and density < revlog._srdensitythreshold:
209 227 oppgapsize, gapidx = heapq.heappop(gapsheap)
210 228
211 229 heapq.heappush(indicesheap, gapidx)
212 230
213 231 # the gap sizes are stored as negatives to be sorted decreasingly
214 232 # by the heap
215 233 readdata -= (-oppgapsize)
216 234 if readdata > 0:
217 235 density = chainpayload / float(readdata)
218 236 else:
219 237 density = 1.0
220 238
221 239 # Cut the revs at collected indices
222 240 previdx = 0
223 241 while indicesheap:
224 242 idx = heapq.heappop(indicesheap)
225 yield revs[previdx:idx]
243
244 chunk = _trimchunk(revlog, revs, previdx, idx)
245 if chunk:
246 yield chunk
247
226 248 previdx = idx
227 yield revs[previdx:]
249
250 chunk = _trimchunk(revlog, revs, previdx)
251 if chunk:
252 yield chunk
228 253
229 254 # index v0:
230 255 # 4 bytes: offset
231 256 # 4 bytes: compressed length
232 257 # 4 bytes: base rev
233 258 # 4 bytes: link rev
234 259 # 20 bytes: parent 1 nodeid
235 260 # 20 bytes: parent 2 nodeid
236 261 # 20 bytes: nodeid
237 262 indexformatv0 = struct.Struct(">4l20s20s20s")
238 263 indexformatv0_pack = indexformatv0.pack
239 264 indexformatv0_unpack = indexformatv0.unpack
240 265
241 266 class revlogoldio(object):
242 267 def __init__(self):
243 268 self.size = indexformatv0.size
244 269
245 270 def parseindex(self, data, inline):
246 271 s = self.size
247 272 index = []
248 273 nodemap = {nullid: nullrev}
249 274 n = off = 0
250 275 l = len(data)
251 276 while off + s <= l:
252 277 cur = data[off:off + s]
253 278 off += s
254 279 e = indexformatv0_unpack(cur)
255 280 # transform to revlogv1 format
256 281 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 282 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 283 index.append(e2)
259 284 nodemap[e[6]] = n
260 285 n += 1
261 286
262 287 # add the magic null revision at -1
263 288 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
264 289
265 290 return index, nodemap, None
266 291
267 292 def packentry(self, entry, node, version, rev):
268 293 if gettype(entry[0]):
269 294 raise RevlogError(_('index entry flags need revlog version 1'))
270 295 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
271 296 node(entry[5]), node(entry[6]), entry[7])
272 297 return indexformatv0_pack(*e2)
273 298
274 299 # index ng:
275 300 # 6 bytes: offset
276 301 # 2 bytes: flags
277 302 # 4 bytes: compressed length
278 303 # 4 bytes: uncompressed length
279 304 # 4 bytes: base rev
280 305 # 4 bytes: link rev
281 306 # 4 bytes: parent 1 rev
282 307 # 4 bytes: parent 2 rev
283 308 # 32 bytes: nodeid
284 309 indexformatng = struct.Struct(">Qiiiiii20s12x")
285 310 indexformatng_pack = indexformatng.pack
286 311 versionformat = struct.Struct(">I")
287 312 versionformat_pack = versionformat.pack
288 313 versionformat_unpack = versionformat.unpack
289 314
290 315 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
291 316 # signed integer)
292 317 _maxentrysize = 0x7fffffff
293 318
294 319 class revlogio(object):
295 320 def __init__(self):
296 321 self.size = indexformatng.size
297 322
298 323 def parseindex(self, data, inline):
299 324 # call the C implementation to parse the index data
300 325 index, cache = parsers.parse_index2(data, inline)
301 326 return index, getattr(index, 'nodemap', None), cache
302 327
303 328 def packentry(self, entry, node, version, rev):
304 329 p = indexformatng_pack(*entry)
305 330 if rev == 0:
306 331 p = versionformat_pack(version) + p[4:]
307 332 return p
308 333
309 334 class revlog(object):
310 335 """
311 336 the underlying revision storage object
312 337
313 338 A revlog consists of two parts, an index and the revision data.
314 339
315 340 The index is a file with a fixed record size containing
316 341 information on each revision, including its nodeid (hash), the
317 342 nodeids of its parents, the position and offset of its data within
318 343 the data file, and the revision it's based on. Finally, each entry
319 344 contains a linkrev entry that can serve as a pointer to external
320 345 data.
321 346
322 347 The revision data itself is a linear collection of data chunks.
323 348 Each chunk represents a revision and is usually represented as a
324 349 delta against the previous chunk. To bound lookup time, runs of
325 350 deltas are limited to about 2 times the length of the original
326 351 version data. This makes retrieval of a version proportional to
327 352 its size, or O(1) relative to the number of revisions.
328 353
329 354 Both pieces of the revlog are written to in an append-only
330 355 fashion, which means we never need to rewrite a file to insert or
331 356 remove data, and can use some simple techniques to avoid the need
332 357 for locking while reading.
333 358
334 359 If checkambig, indexfile is opened with checkambig=True at
335 360 writing, to avoid file stat ambiguity.
336 361
337 362 If mmaplargeindex is True, and an mmapindexthreshold is set, the
338 363 index will be mmapped rather than read if it is larger than the
339 364 configured threshold.
340 365 """
341 366 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 367 mmaplargeindex=False):
343 368 """
344 369 create a revlog object
345 370
346 371 opener is a function that abstracts the file opening operation
347 372 and can be used to implement COW semantics or the like.
348 373 """
349 374 self.indexfile = indexfile
350 375 self.datafile = datafile or (indexfile[:-2] + ".d")
351 376 self.opener = opener
352 377 # When True, indexfile is opened with checkambig=True at writing, to
353 378 # avoid file stat ambiguity.
354 379 self._checkambig = checkambig
355 380 # 3-tuple of (node, rev, text) for a raw revision.
356 381 self._cache = None
357 382 # Maps rev to chain base rev.
358 383 self._chainbasecache = util.lrucachedict(100)
359 384 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 385 self._chunkcache = (0, '')
361 386 # How much data to read and cache into the raw revlog data cache.
362 387 self._chunkcachesize = 65536
363 388 self._maxchainlen = None
364 389 self._aggressivemergedeltas = False
365 390 self.index = []
366 391 # Mapping of partial identifiers to full nodes.
367 392 self._pcache = {}
368 393 # Mapping of revision integer to full node.
369 394 self._nodecache = {nullid: nullrev}
370 395 self._nodepos = None
371 396 self._compengine = 'zlib'
372 397 self._maxdeltachainspan = -1
373 398 self._withsparseread = False
374 399 self._srdensitythreshold = 0.25
375 400 self._srmingapsize = 262144
376 401
377 402 mmapindexthreshold = None
378 403 v = REVLOG_DEFAULT_VERSION
379 404 opts = getattr(opener, 'options', None)
380 405 if opts is not None:
381 406 if 'revlogv2' in opts:
382 407 # version 2 revlogs always use generaldelta.
383 408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
384 409 elif 'revlogv1' in opts:
385 410 if 'generaldelta' in opts:
386 411 v |= FLAG_GENERALDELTA
387 412 else:
388 413 v = 0
389 414 if 'chunkcachesize' in opts:
390 415 self._chunkcachesize = opts['chunkcachesize']
391 416 if 'maxchainlen' in opts:
392 417 self._maxchainlen = opts['maxchainlen']
393 418 if 'aggressivemergedeltas' in opts:
394 419 self._aggressivemergedeltas = opts['aggressivemergedeltas']
395 420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
396 421 if 'compengine' in opts:
397 422 self._compengine = opts['compengine']
398 423 if 'maxdeltachainspan' in opts:
399 424 self._maxdeltachainspan = opts['maxdeltachainspan']
400 425 if mmaplargeindex and 'mmapindexthreshold' in opts:
401 426 mmapindexthreshold = opts['mmapindexthreshold']
402 427 self._withsparseread = bool(opts.get('with-sparse-read', False))
403 428 if 'sparse-read-density-threshold' in opts:
404 429 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 430 if 'sparse-read-min-gap-size' in opts:
406 431 self._srmingapsize = opts['sparse-read-min-gap-size']
407 432
408 433 if self._chunkcachesize <= 0:
409 434 raise RevlogError(_('revlog chunk cache size %r is not greater '
410 435 'than 0') % self._chunkcachesize)
411 436 elif self._chunkcachesize & (self._chunkcachesize - 1):
412 437 raise RevlogError(_('revlog chunk cache size %r is not a power '
413 438 'of 2') % self._chunkcachesize)
414 439
415 440 indexdata = ''
416 441 self._initempty = True
417 442 try:
418 443 f = self.opener(self.indexfile)
419 444 if (mmapindexthreshold is not None and
420 445 self.opener.fstat(f).st_size >= mmapindexthreshold):
421 446 indexdata = util.buffer(util.mmapread(f))
422 447 else:
423 448 indexdata = f.read()
424 449 f.close()
425 450 if len(indexdata) > 0:
426 451 v = versionformat_unpack(indexdata[:4])[0]
427 452 self._initempty = False
428 453 except IOError as inst:
429 454 if inst.errno != errno.ENOENT:
430 455 raise
431 456
432 457 self.version = v
433 458 self._inline = v & FLAG_INLINE_DATA
434 459 self._generaldelta = v & FLAG_GENERALDELTA
435 460 flags = v & ~0xFFFF
436 461 fmt = v & 0xFFFF
437 462 if fmt == REVLOGV0:
438 463 if flags:
439 464 raise RevlogError(_('unknown flags (%#04x) in version %d '
440 465 'revlog %s') %
441 466 (flags >> 16, fmt, self.indexfile))
442 467 elif fmt == REVLOGV1:
443 468 if flags & ~REVLOGV1_FLAGS:
444 469 raise RevlogError(_('unknown flags (%#04x) in version %d '
445 470 'revlog %s') %
446 471 (flags >> 16, fmt, self.indexfile))
447 472 elif fmt == REVLOGV2:
448 473 if flags & ~REVLOGV2_FLAGS:
449 474 raise RevlogError(_('unknown flags (%#04x) in version %d '
450 475 'revlog %s') %
451 476 (flags >> 16, fmt, self.indexfile))
452 477 else:
453 478 raise RevlogError(_('unknown version (%d) in revlog %s') %
454 479 (fmt, self.indexfile))
455 480
456 481 self.storedeltachains = True
457 482
458 483 self._io = revlogio()
459 484 if self.version == REVLOGV0:
460 485 self._io = revlogoldio()
461 486 try:
462 487 d = self._io.parseindex(indexdata, self._inline)
463 488 except (ValueError, IndexError):
464 489 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
465 490 self.index, nodemap, self._chunkcache = d
466 491 if nodemap is not None:
467 492 self.nodemap = self._nodecache = nodemap
468 493 if not self._chunkcache:
469 494 self._chunkclear()
470 495 # revnum -> (chain-length, sum-delta-length)
471 496 self._chaininfocache = {}
472 497 # revlog header -> revlog compressor
473 498 self._decompressors = {}
474 499
475 500 @util.propertycache
476 501 def _compressor(self):
477 502 return util.compengines[self._compengine].revlogcompressor()
478 503
479 504 def tip(self):
480 505 return self.node(len(self.index) - 2)
481 506 def __contains__(self, rev):
482 507 return 0 <= rev < len(self)
483 508 def __len__(self):
484 509 return len(self.index) - 1
485 510 def __iter__(self):
486 511 return iter(xrange(len(self)))
487 512 def revs(self, start=0, stop=None):
488 513 """iterate over all rev in this revlog (from start to stop)"""
489 514 step = 1
490 515 if stop is not None:
491 516 if start > stop:
492 517 step = -1
493 518 stop += step
494 519 else:
495 520 stop = len(self)
496 521 return xrange(start, stop, step)
497 522
498 523 @util.propertycache
499 524 def nodemap(self):
500 525 self.rev(self.node(0))
501 526 return self._nodecache
502 527
503 528 def hasnode(self, node):
504 529 try:
505 530 self.rev(node)
506 531 return True
507 532 except KeyError:
508 533 return False
509 534
510 535 def clearcaches(self):
511 536 self._cache = None
512 537 self._chainbasecache.clear()
513 538 self._chunkcache = (0, '')
514 539 self._pcache = {}
515 540
516 541 try:
517 542 self._nodecache.clearcaches()
518 543 except AttributeError:
519 544 self._nodecache = {nullid: nullrev}
520 545 self._nodepos = None
521 546
522 547 def rev(self, node):
523 548 try:
524 549 return self._nodecache[node]
525 550 except TypeError:
526 551 raise
527 552 except RevlogError:
528 553 # parsers.c radix tree lookup failed
529 554 if node == wdirid:
530 555 raise error.WdirUnsupported
531 556 raise LookupError(node, self.indexfile, _('no node'))
532 557 except KeyError:
533 558 # pure python cache lookup failed
534 559 n = self._nodecache
535 560 i = self.index
536 561 p = self._nodepos
537 562 if p is None:
538 563 p = len(i) - 2
539 564 for r in xrange(p, -1, -1):
540 565 v = i[r][7]
541 566 n[v] = r
542 567 if v == node:
543 568 self._nodepos = r - 1
544 569 return r
545 570 if node == wdirid:
546 571 raise error.WdirUnsupported
547 572 raise LookupError(node, self.indexfile, _('no node'))
548 573
549 574 # Accessors for index entries.
550 575
551 576 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
552 577 # are flags.
553 578 def start(self, rev):
554 579 return int(self.index[rev][0] >> 16)
555 580
556 581 def flags(self, rev):
557 582 return self.index[rev][0] & 0xFFFF
558 583
559 584 def length(self, rev):
560 585 return self.index[rev][1]
561 586
562 587 def rawsize(self, rev):
563 588 """return the length of the uncompressed text for a given revision"""
564 589 l = self.index[rev][2]
565 590 if l >= 0:
566 591 return l
567 592
568 593 t = self.revision(rev, raw=True)
569 594 return len(t)
570 595
571 596 def size(self, rev):
572 597 """length of non-raw text (processed by a "read" flag processor)"""
573 598 # fast path: if no "read" flag processor could change the content,
574 599 # size is rawsize. note: ELLIPSIS is known to not change the content.
575 600 flags = self.flags(rev)
576 601 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
577 602 return self.rawsize(rev)
578 603
579 604 return len(self.revision(rev, raw=False))
580 605
581 606 def chainbase(self, rev):
582 607 base = self._chainbasecache.get(rev)
583 608 if base is not None:
584 609 return base
585 610
586 611 index = self.index
587 612 base = index[rev][3]
588 613 while base != rev:
589 614 rev = base
590 615 base = index[rev][3]
591 616
592 617 self._chainbasecache[rev] = base
593 618 return base
594 619
595 620 def linkrev(self, rev):
596 621 return self.index[rev][4]
597 622
598 623 def parentrevs(self, rev):
599 624 try:
600 625 return self.index[rev][5:7]
601 626 except IndexError:
602 627 if rev == wdirrev:
603 628 raise error.WdirUnsupported
604 629 raise
605 630
606 631 def node(self, rev):
607 632 try:
608 633 return self.index[rev][7]
609 634 except IndexError:
610 635 if rev == wdirrev:
611 636 raise error.WdirUnsupported
612 637 raise
613 638
614 639 # Derived from index values.
615 640
616 641 def end(self, rev):
617 642 return self.start(rev) + self.length(rev)
618 643
619 644 def parents(self, node):
620 645 i = self.index
621 646 d = i[self.rev(node)]
622 647 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
623 648
624 649 def chainlen(self, rev):
625 650 return self._chaininfo(rev)[0]
626 651
627 652 def _chaininfo(self, rev):
628 653 chaininfocache = self._chaininfocache
629 654 if rev in chaininfocache:
630 655 return chaininfocache[rev]
631 656 index = self.index
632 657 generaldelta = self._generaldelta
633 658 iterrev = rev
634 659 e = index[iterrev]
635 660 clen = 0
636 661 compresseddeltalen = 0
637 662 while iterrev != e[3]:
638 663 clen += 1
639 664 compresseddeltalen += e[1]
640 665 if generaldelta:
641 666 iterrev = e[3]
642 667 else:
643 668 iterrev -= 1
644 669 if iterrev in chaininfocache:
645 670 t = chaininfocache[iterrev]
646 671 clen += t[0]
647 672 compresseddeltalen += t[1]
648 673 break
649 674 e = index[iterrev]
650 675 else:
651 676 # Add text length of base since decompressing that also takes
652 677 # work. For cache hits the length is already included.
653 678 compresseddeltalen += e[1]
654 679 r = (clen, compresseddeltalen)
655 680 chaininfocache[rev] = r
656 681 return r
657 682
658 683 def _deltachain(self, rev, stoprev=None):
659 684 """Obtain the delta chain for a revision.
660 685
661 686 ``stoprev`` specifies a revision to stop at. If not specified, we
662 687 stop at the base of the chain.
663 688
664 689 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
665 690 revs in ascending order and ``stopped`` is a bool indicating whether
666 691 ``stoprev`` was hit.
667 692 """
668 693 # Try C implementation.
669 694 try:
670 695 return self.index.deltachain(rev, stoprev, self._generaldelta)
671 696 except AttributeError:
672 697 pass
673 698
674 699 chain = []
675 700
676 701 # Alias to prevent attribute lookup in tight loop.
677 702 index = self.index
678 703 generaldelta = self._generaldelta
679 704
680 705 iterrev = rev
681 706 e = index[iterrev]
682 707 while iterrev != e[3] and iterrev != stoprev:
683 708 chain.append(iterrev)
684 709 if generaldelta:
685 710 iterrev = e[3]
686 711 else:
687 712 iterrev -= 1
688 713 e = index[iterrev]
689 714
690 715 if iterrev == stoprev:
691 716 stopped = True
692 717 else:
693 718 chain.append(iterrev)
694 719 stopped = False
695 720
696 721 chain.reverse()
697 722 return chain, stopped
698 723
699 724 def ancestors(self, revs, stoprev=0, inclusive=False):
700 725 """Generate the ancestors of 'revs' in reverse topological order.
701 726 Does not generate revs lower than stoprev.
702 727
703 728 See the documentation for ancestor.lazyancestors for more details."""
704 729
705 730 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
706 731 inclusive=inclusive)
707 732
708 733 def descendants(self, revs):
709 734 """Generate the descendants of 'revs' in revision order.
710 735
711 736 Yield a sequence of revision numbers starting with a child of
712 737 some rev in revs, i.e., each revision is *not* considered a
713 738 descendant of itself. Results are ordered by revision number (a
714 739 topological sort)."""
715 740 first = min(revs)
716 741 if first == nullrev:
717 742 for i in self:
718 743 yield i
719 744 return
720 745
721 746 seen = set(revs)
722 747 for i in self.revs(start=first + 1):
723 748 for x in self.parentrevs(i):
724 749 if x != nullrev and x in seen:
725 750 seen.add(i)
726 751 yield i
727 752 break
728 753
729 754 def findcommonmissing(self, common=None, heads=None):
730 755 """Return a tuple of the ancestors of common and the ancestors of heads
731 756 that are not ancestors of common. In revset terminology, we return the
732 757 tuple:
733 758
734 759 ::common, (::heads) - (::common)
735 760
736 761 The list is sorted by revision number, meaning it is
737 762 topologically sorted.
738 763
739 764 'heads' and 'common' are both lists of node IDs. If heads is
740 765 not supplied, uses all of the revlog's heads. If common is not
741 766 supplied, uses nullid."""
742 767 if common is None:
743 768 common = [nullid]
744 769 if heads is None:
745 770 heads = self.heads()
746 771
747 772 common = [self.rev(n) for n in common]
748 773 heads = [self.rev(n) for n in heads]
749 774
750 775 # we want the ancestors, but inclusive
751 776 class lazyset(object):
752 777 def __init__(self, lazyvalues):
753 778 self.addedvalues = set()
754 779 self.lazyvalues = lazyvalues
755 780
756 781 def __contains__(self, value):
757 782 return value in self.addedvalues or value in self.lazyvalues
758 783
759 784 def __iter__(self):
760 785 added = self.addedvalues
761 786 for r in added:
762 787 yield r
763 788 for r in self.lazyvalues:
764 789 if not r in added:
765 790 yield r
766 791
767 792 def add(self, value):
768 793 self.addedvalues.add(value)
769 794
770 795 def update(self, values):
771 796 self.addedvalues.update(values)
772 797
773 798 has = lazyset(self.ancestors(common))
774 799 has.add(nullrev)
775 800 has.update(common)
776 801
777 802 # take all ancestors from heads that aren't in has
778 803 missing = set()
779 804 visit = collections.deque(r for r in heads if r not in has)
780 805 while visit:
781 806 r = visit.popleft()
782 807 if r in missing:
783 808 continue
784 809 else:
785 810 missing.add(r)
786 811 for p in self.parentrevs(r):
787 812 if p not in has:
788 813 visit.append(p)
789 814 missing = list(missing)
790 815 missing.sort()
791 816 return has, [self.node(miss) for miss in missing]
792 817
793 818 def incrementalmissingrevs(self, common=None):
794 819 """Return an object that can be used to incrementally compute the
795 820 revision numbers of the ancestors of arbitrary sets that are not
796 821 ancestors of common. This is an ancestor.incrementalmissingancestors
797 822 object.
798 823
799 824 'common' is a list of revision numbers. If common is not supplied, uses
800 825 nullrev.
801 826 """
802 827 if common is None:
803 828 common = [nullrev]
804 829
805 830 return ancestor.incrementalmissingancestors(self.parentrevs, common)
806 831
807 832 def findmissingrevs(self, common=None, heads=None):
808 833 """Return the revision numbers of the ancestors of heads that
809 834 are not ancestors of common.
810 835
811 836 More specifically, return a list of revision numbers corresponding to
812 837 nodes N such that every N satisfies the following constraints:
813 838
814 839 1. N is an ancestor of some node in 'heads'
815 840 2. N is not an ancestor of any node in 'common'
816 841
817 842 The list is sorted by revision number, meaning it is
818 843 topologically sorted.
819 844
820 845 'heads' and 'common' are both lists of revision numbers. If heads is
821 846 not supplied, uses all of the revlog's heads. If common is not
822 847 supplied, uses nullid."""
823 848 if common is None:
824 849 common = [nullrev]
825 850 if heads is None:
826 851 heads = self.headrevs()
827 852
828 853 inc = self.incrementalmissingrevs(common=common)
829 854 return inc.missingancestors(heads)
830 855
831 856 def findmissing(self, common=None, heads=None):
832 857 """Return the ancestors of heads that are not ancestors of common.
833 858
834 859 More specifically, return a list of nodes N such that every N
835 860 satisfies the following constraints:
836 861
837 862 1. N is an ancestor of some node in 'heads'
838 863 2. N is not an ancestor of any node in 'common'
839 864
840 865 The list is sorted by revision number, meaning it is
841 866 topologically sorted.
842 867
843 868 'heads' and 'common' are both lists of node IDs. If heads is
844 869 not supplied, uses all of the revlog's heads. If common is not
845 870 supplied, uses nullid."""
846 871 if common is None:
847 872 common = [nullid]
848 873 if heads is None:
849 874 heads = self.heads()
850 875
851 876 common = [self.rev(n) for n in common]
852 877 heads = [self.rev(n) for n in heads]
853 878
854 879 inc = self.incrementalmissingrevs(common=common)
855 880 return [self.node(r) for r in inc.missingancestors(heads)]
856 881
857 882 def nodesbetween(self, roots=None, heads=None):
858 883 """Return a topological path from 'roots' to 'heads'.
859 884
860 885 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
861 886 topologically sorted list of all nodes N that satisfy both of
862 887 these constraints:
863 888
864 889 1. N is a descendant of some node in 'roots'
865 890 2. N is an ancestor of some node in 'heads'
866 891
867 892 Every node is considered to be both a descendant and an ancestor
868 893 of itself, so every reachable node in 'roots' and 'heads' will be
869 894 included in 'nodes'.
870 895
871 896 'outroots' is the list of reachable nodes in 'roots', i.e., the
872 897 subset of 'roots' that is returned in 'nodes'. Likewise,
873 898 'outheads' is the subset of 'heads' that is also in 'nodes'.
874 899
875 900 'roots' and 'heads' are both lists of node IDs. If 'roots' is
876 901 unspecified, uses nullid as the only root. If 'heads' is
877 902 unspecified, uses list of all of the revlog's heads."""
878 903 nonodes = ([], [], [])
879 904 if roots is not None:
880 905 roots = list(roots)
881 906 if not roots:
882 907 return nonodes
883 908 lowestrev = min([self.rev(n) for n in roots])
884 909 else:
885 910 roots = [nullid] # Everybody's a descendant of nullid
886 911 lowestrev = nullrev
887 912 if (lowestrev == nullrev) and (heads is None):
888 913 # We want _all_ the nodes!
889 914 return ([self.node(r) for r in self], [nullid], list(self.heads()))
890 915 if heads is None:
891 916 # All nodes are ancestors, so the latest ancestor is the last
892 917 # node.
893 918 highestrev = len(self) - 1
894 919 # Set ancestors to None to signal that every node is an ancestor.
895 920 ancestors = None
896 921 # Set heads to an empty dictionary for later discovery of heads
897 922 heads = {}
898 923 else:
899 924 heads = list(heads)
900 925 if not heads:
901 926 return nonodes
902 927 ancestors = set()
903 928 # Turn heads into a dictionary so we can remove 'fake' heads.
904 929 # Also, later we will be using it to filter out the heads we can't
905 930 # find from roots.
906 931 heads = dict.fromkeys(heads, False)
907 932 # Start at the top and keep marking parents until we're done.
908 933 nodestotag = set(heads)
909 934 # Remember where the top was so we can use it as a limit later.
910 935 highestrev = max([self.rev(n) for n in nodestotag])
911 936 while nodestotag:
912 937 # grab a node to tag
913 938 n = nodestotag.pop()
914 939 # Never tag nullid
915 940 if n == nullid:
916 941 continue
917 942 # A node's revision number represents its place in a
918 943 # topologically sorted list of nodes.
919 944 r = self.rev(n)
920 945 if r >= lowestrev:
921 946 if n not in ancestors:
922 947 # If we are possibly a descendant of one of the roots
923 948 # and we haven't already been marked as an ancestor
924 949 ancestors.add(n) # Mark as ancestor
925 950 # Add non-nullid parents to list of nodes to tag.
926 951 nodestotag.update([p for p in self.parents(n) if
927 952 p != nullid])
928 953 elif n in heads: # We've seen it before, is it a fake head?
929 954 # So it is, real heads should not be the ancestors of
930 955 # any other heads.
931 956 heads.pop(n)
932 957 if not ancestors:
933 958 return nonodes
934 959 # Now that we have our set of ancestors, we want to remove any
935 960 # roots that are not ancestors.
936 961
937 962 # If one of the roots was nullid, everything is included anyway.
938 963 if lowestrev > nullrev:
939 964 # But, since we weren't, let's recompute the lowest rev to not
940 965 # include roots that aren't ancestors.
941 966
942 967 # Filter out roots that aren't ancestors of heads
943 968 roots = [root for root in roots if root in ancestors]
944 969 # Recompute the lowest revision
945 970 if roots:
946 971 lowestrev = min([self.rev(root) for root in roots])
947 972 else:
948 973 # No more roots? Return empty list
949 974 return nonodes
950 975 else:
951 976 # We are descending from nullid, and don't need to care about
952 977 # any other roots.
953 978 lowestrev = nullrev
954 979 roots = [nullid]
955 980 # Transform our roots list into a set.
956 981 descendants = set(roots)
957 982 # Also, keep the original roots so we can filter out roots that aren't
958 983 # 'real' roots (i.e. are descended from other roots).
959 984 roots = descendants.copy()
960 985 # Our topologically sorted list of output nodes.
961 986 orderedout = []
962 987 # Don't start at nullid since we don't want nullid in our output list,
963 988 # and if nullid shows up in descendants, empty parents will look like
964 989 # they're descendants.
965 990 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
966 991 n = self.node(r)
967 992 isdescendant = False
968 993 if lowestrev == nullrev: # Everybody is a descendant of nullid
969 994 isdescendant = True
970 995 elif n in descendants:
971 996 # n is already a descendant
972 997 isdescendant = True
973 998 # This check only needs to be done here because all the roots
974 999 # will start being marked is descendants before the loop.
975 1000 if n in roots:
976 1001 # If n was a root, check if it's a 'real' root.
977 1002 p = tuple(self.parents(n))
978 1003 # If any of its parents are descendants, it's not a root.
979 1004 if (p[0] in descendants) or (p[1] in descendants):
980 1005 roots.remove(n)
981 1006 else:
982 1007 p = tuple(self.parents(n))
983 1008 # A node is a descendant if either of its parents are
984 1009 # descendants. (We seeded the dependents list with the roots
985 1010 # up there, remember?)
986 1011 if (p[0] in descendants) or (p[1] in descendants):
987 1012 descendants.add(n)
988 1013 isdescendant = True
989 1014 if isdescendant and ((ancestors is None) or (n in ancestors)):
990 1015 # Only include nodes that are both descendants and ancestors.
991 1016 orderedout.append(n)
992 1017 if (ancestors is not None) and (n in heads):
993 1018 # We're trying to figure out which heads are reachable
994 1019 # from roots.
995 1020 # Mark this head as having been reached
996 1021 heads[n] = True
997 1022 elif ancestors is None:
998 1023 # Otherwise, we're trying to discover the heads.
999 1024 # Assume this is a head because if it isn't, the next step
1000 1025 # will eventually remove it.
1001 1026 heads[n] = True
1002 1027 # But, obviously its parents aren't.
1003 1028 for p in self.parents(n):
1004 1029 heads.pop(p, None)
1005 1030 heads = [head for head, flag in heads.iteritems() if flag]
1006 1031 roots = list(roots)
1007 1032 assert orderedout
1008 1033 assert roots
1009 1034 assert heads
1010 1035 return (orderedout, roots, heads)
1011 1036
1012 1037 def headrevs(self):
1013 1038 try:
1014 1039 return self.index.headrevs()
1015 1040 except AttributeError:
1016 1041 return self._headrevs()
1017 1042
1018 1043 def computephases(self, roots):
1019 1044 return self.index.computephasesmapsets(roots)
1020 1045
1021 1046 def _headrevs(self):
1022 1047 count = len(self)
1023 1048 if not count:
1024 1049 return [nullrev]
1025 1050 # we won't iter over filtered rev so nobody is a head at start
1026 1051 ishead = [0] * (count + 1)
1027 1052 index = self.index
1028 1053 for r in self:
1029 1054 ishead[r] = 1 # I may be an head
1030 1055 e = index[r]
1031 1056 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1032 1057 return [r for r, val in enumerate(ishead) if val]
1033 1058
1034 1059 def heads(self, start=None, stop=None):
1035 1060 """return the list of all nodes that have no children
1036 1061
1037 1062 if start is specified, only heads that are descendants of
1038 1063 start will be returned
1039 1064 if stop is specified, it will consider all the revs from stop
1040 1065 as if they had no children
1041 1066 """
1042 1067 if start is None and stop is None:
1043 1068 if not len(self):
1044 1069 return [nullid]
1045 1070 return [self.node(r) for r in self.headrevs()]
1046 1071
1047 1072 if start is None:
1048 1073 start = nullid
1049 1074 if stop is None:
1050 1075 stop = []
1051 1076 stoprevs = set([self.rev(n) for n in stop])
1052 1077 startrev = self.rev(start)
1053 1078 reachable = {startrev}
1054 1079 heads = {startrev}
1055 1080
1056 1081 parentrevs = self.parentrevs
1057 1082 for r in self.revs(start=startrev + 1):
1058 1083 for p in parentrevs(r):
1059 1084 if p in reachable:
1060 1085 if r not in stoprevs:
1061 1086 reachable.add(r)
1062 1087 heads.add(r)
1063 1088 if p in heads and p not in stoprevs:
1064 1089 heads.remove(p)
1065 1090
1066 1091 return [self.node(r) for r in heads]
1067 1092
1068 1093 def children(self, node):
1069 1094 """find the children of a given node"""
1070 1095 c = []
1071 1096 p = self.rev(node)
1072 1097 for r in self.revs(start=p + 1):
1073 1098 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1074 1099 if prevs:
1075 1100 for pr in prevs:
1076 1101 if pr == p:
1077 1102 c.append(self.node(r))
1078 1103 elif p == nullrev:
1079 1104 c.append(self.node(r))
1080 1105 return c
1081 1106
1082 1107 def descendant(self, start, end):
1083 1108 if start == nullrev:
1084 1109 return True
1085 1110 for i in self.descendants([start]):
1086 1111 if i == end:
1087 1112 return True
1088 1113 elif i > end:
1089 1114 break
1090 1115 return False
1091 1116
1092 1117 def commonancestorsheads(self, a, b):
1093 1118 """calculate all the heads of the common ancestors of nodes a and b"""
1094 1119 a, b = self.rev(a), self.rev(b)
1095 1120 try:
1096 1121 ancs = self.index.commonancestorsheads(a, b)
1097 1122 except (AttributeError, OverflowError): # C implementation failed
1098 1123 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1099 1124 return pycompat.maplist(self.node, ancs)
1100 1125
1101 1126 def isancestor(self, a, b):
1102 1127 """return True if node a is an ancestor of node b
1103 1128
1104 1129 The implementation of this is trivial but the use of
1105 1130 commonancestorsheads is not."""
1106 1131 return a in self.commonancestorsheads(a, b)
1107 1132
1108 1133 def ancestor(self, a, b):
1109 1134 """calculate the "best" common ancestor of nodes a and b"""
1110 1135
1111 1136 a, b = self.rev(a), self.rev(b)
1112 1137 try:
1113 1138 ancs = self.index.ancestors(a, b)
1114 1139 except (AttributeError, OverflowError):
1115 1140 ancs = ancestor.ancestors(self.parentrevs, a, b)
1116 1141 if ancs:
1117 1142 # choose a consistent winner when there's a tie
1118 1143 return min(map(self.node, ancs))
1119 1144 return nullid
1120 1145
1121 1146 def _match(self, id):
1122 1147 if isinstance(id, int):
1123 1148 # rev
1124 1149 return self.node(id)
1125 1150 if len(id) == 20:
1126 1151 # possibly a binary node
1127 1152 # odds of a binary node being all hex in ASCII are 1 in 10**25
1128 1153 try:
1129 1154 node = id
1130 1155 self.rev(node) # quick search the index
1131 1156 return node
1132 1157 except LookupError:
1133 1158 pass # may be partial hex id
1134 1159 try:
1135 1160 # str(rev)
1136 1161 rev = int(id)
1137 1162 if str(rev) != id:
1138 1163 raise ValueError
1139 1164 if rev < 0:
1140 1165 rev = len(self) + rev
1141 1166 if rev < 0 or rev >= len(self):
1142 1167 raise ValueError
1143 1168 return self.node(rev)
1144 1169 except (ValueError, OverflowError):
1145 1170 pass
1146 1171 if len(id) == 40:
1147 1172 try:
1148 1173 # a full hex nodeid?
1149 1174 node = bin(id)
1150 1175 self.rev(node)
1151 1176 return node
1152 1177 except (TypeError, LookupError):
1153 1178 pass
1154 1179
1155 1180 def _partialmatch(self, id):
1156 1181 maybewdir = wdirhex.startswith(id)
1157 1182 try:
1158 1183 partial = self.index.partialmatch(id)
1159 1184 if partial and self.hasnode(partial):
1160 1185 if maybewdir:
1161 1186 # single 'ff...' match in radix tree, ambiguous with wdir
1162 1187 raise RevlogError
1163 1188 return partial
1164 1189 if maybewdir:
1165 1190 # no 'ff...' match in radix tree, wdir identified
1166 1191 raise error.WdirUnsupported
1167 1192 return None
1168 1193 except RevlogError:
1169 1194 # parsers.c radix tree lookup gave multiple matches
1170 1195 # fast path: for unfiltered changelog, radix tree is accurate
1171 1196 if not getattr(self, 'filteredrevs', None):
1172 1197 raise LookupError(id, self.indexfile,
1173 1198 _('ambiguous identifier'))
1174 1199 # fall through to slow path that filters hidden revisions
1175 1200 except (AttributeError, ValueError):
1176 1201 # we are pure python, or key was too short to search radix tree
1177 1202 pass
1178 1203
1179 1204 if id in self._pcache:
1180 1205 return self._pcache[id]
1181 1206
1182 1207 if len(id) < 40:
1183 1208 try:
1184 1209 # hex(node)[:...]
1185 1210 l = len(id) // 2 # grab an even number of digits
1186 1211 prefix = bin(id[:l * 2])
1187 1212 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1188 1213 nl = [n for n in nl if hex(n).startswith(id) and
1189 1214 self.hasnode(n)]
1190 1215 if len(nl) > 0:
1191 1216 if len(nl) == 1 and not maybewdir:
1192 1217 self._pcache[id] = nl[0]
1193 1218 return nl[0]
1194 1219 raise LookupError(id, self.indexfile,
1195 1220 _('ambiguous identifier'))
1196 1221 if maybewdir:
1197 1222 raise error.WdirUnsupported
1198 1223 return None
1199 1224 except (TypeError, binascii.Error):
1200 1225 pass
1201 1226
1202 1227 def lookup(self, id):
1203 1228 """locate a node based on:
1204 1229 - revision number or str(revision number)
1205 1230 - nodeid or subset of hex nodeid
1206 1231 """
1207 1232 n = self._match(id)
1208 1233 if n is not None:
1209 1234 return n
1210 1235 n = self._partialmatch(id)
1211 1236 if n:
1212 1237 return n
1213 1238
1214 1239 raise LookupError(id, self.indexfile, _('no match found'))
1215 1240
1216 1241 def shortest(self, hexnode, minlength=1):
1217 1242 """Find the shortest unambiguous prefix that matches hexnode."""
1218 1243 def isvalid(test):
1219 1244 try:
1220 1245 if self._partialmatch(test) is None:
1221 1246 return False
1222 1247
1223 1248 try:
1224 1249 i = int(test)
1225 1250 # if we are a pure int, then starting with zero will not be
1226 1251 # confused as a rev; or, obviously, if the int is larger
1227 1252 # than the value of the tip rev
1228 1253 if test[0] == '0' or i > len(self):
1229 1254 return True
1230 1255 return False
1231 1256 except ValueError:
1232 1257 return True
1233 1258 except error.RevlogError:
1234 1259 return False
1235 1260 except error.WdirUnsupported:
1236 1261 # single 'ff...' match
1237 1262 return True
1238 1263
1239 1264 shortest = hexnode
1240 1265 startlength = max(6, minlength)
1241 1266 length = startlength
1242 1267 while True:
1243 1268 test = hexnode[:length]
1244 1269 if isvalid(test):
1245 1270 shortest = test
1246 1271 if length == minlength or length > startlength:
1247 1272 return shortest
1248 1273 length -= 1
1249 1274 else:
1250 1275 length += 1
1251 1276 if len(shortest) <= length:
1252 1277 return shortest
1253 1278
1254 1279 def cmp(self, node, text):
1255 1280 """compare text with a given file revision
1256 1281
1257 1282 returns True if text is different than what is stored.
1258 1283 """
1259 1284 p1, p2 = self.parents(node)
1260 1285 return hash(text, p1, p2) != node
1261 1286
1262 1287 def _cachesegment(self, offset, data):
1263 1288 """Add a segment to the revlog cache.
1264 1289
1265 1290 Accepts an absolute offset and the data that is at that location.
1266 1291 """
1267 1292 o, d = self._chunkcache
1268 1293 # try to add to existing cache
1269 1294 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1270 1295 self._chunkcache = o, d + data
1271 1296 else:
1272 1297 self._chunkcache = offset, data
1273 1298
1274 1299 def _readsegment(self, offset, length, df=None):
1275 1300 """Load a segment of raw data from the revlog.
1276 1301
1277 1302 Accepts an absolute offset, length to read, and an optional existing
1278 1303 file handle to read from.
1279 1304
1280 1305 If an existing file handle is passed, it will be seeked and the
1281 1306 original seek position will NOT be restored.
1282 1307
1283 1308 Returns a str or buffer of raw byte data.
1284 1309 """
1285 1310 if df is not None:
1286 1311 closehandle = False
1287 1312 else:
1288 1313 if self._inline:
1289 1314 df = self.opener(self.indexfile)
1290 1315 else:
1291 1316 df = self.opener(self.datafile)
1292 1317 closehandle = True
1293 1318
1294 1319 # Cache data both forward and backward around the requested
1295 1320 # data, in a fixed size window. This helps speed up operations
1296 1321 # involving reading the revlog backwards.
1297 1322 cachesize = self._chunkcachesize
1298 1323 realoffset = offset & ~(cachesize - 1)
1299 1324 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1300 1325 - realoffset)
1301 1326 df.seek(realoffset)
1302 1327 d = df.read(reallength)
1303 1328 if closehandle:
1304 1329 df.close()
1305 1330 self._cachesegment(realoffset, d)
1306 1331 if offset != realoffset or reallength != length:
1307 1332 return util.buffer(d, offset - realoffset, length)
1308 1333 return d
1309 1334
1310 1335 def _getsegment(self, offset, length, df=None):
1311 1336 """Obtain a segment of raw data from the revlog.
1312 1337
1313 1338 Accepts an absolute offset, length of bytes to obtain, and an
1314 1339 optional file handle to the already-opened revlog. If the file
1315 1340 handle is used, it's original seek position will not be preserved.
1316 1341
1317 1342 Requests for data may be returned from a cache.
1318 1343
1319 1344 Returns a str or a buffer instance of raw byte data.
1320 1345 """
1321 1346 o, d = self._chunkcache
1322 1347 l = len(d)
1323 1348
1324 1349 # is it in the cache?
1325 1350 cachestart = offset - o
1326 1351 cacheend = cachestart + length
1327 1352 if cachestart >= 0 and cacheend <= l:
1328 1353 if cachestart == 0 and cacheend == l:
1329 1354 return d # avoid a copy
1330 1355 return util.buffer(d, cachestart, cacheend - cachestart)
1331 1356
1332 1357 return self._readsegment(offset, length, df=df)
1333 1358
1334 1359 def _getsegmentforrevs(self, startrev, endrev, df=None):
1335 1360 """Obtain a segment of raw data corresponding to a range of revisions.
1336 1361
1337 1362 Accepts the start and end revisions and an optional already-open
1338 1363 file handle to be used for reading. If the file handle is read, its
1339 1364 seek position will not be preserved.
1340 1365
1341 1366 Requests for data may be satisfied by a cache.
1342 1367
1343 1368 Returns a 2-tuple of (offset, data) for the requested range of
1344 1369 revisions. Offset is the integer offset from the beginning of the
1345 1370 revlog and data is a str or buffer of the raw byte data.
1346 1371
1347 1372 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1348 1373 to determine where each revision's data begins and ends.
1349 1374 """
1350 1375 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1351 1376 # (functions are expensive).
1352 1377 index = self.index
1353 1378 istart = index[startrev]
1354 1379 start = int(istart[0] >> 16)
1355 1380 if startrev == endrev:
1356 1381 end = start + istart[1]
1357 1382 else:
1358 1383 iend = index[endrev]
1359 1384 end = int(iend[0] >> 16) + iend[1]
1360 1385
1361 1386 if self._inline:
1362 1387 start += (startrev + 1) * self._io.size
1363 1388 end += (endrev + 1) * self._io.size
1364 1389 length = end - start
1365 1390
1366 1391 return start, self._getsegment(start, length, df=df)
1367 1392
1368 1393 def _chunk(self, rev, df=None):
1369 1394 """Obtain a single decompressed chunk for a revision.
1370 1395
1371 1396 Accepts an integer revision and an optional already-open file handle
1372 1397 to be used for reading. If used, the seek position of the file will not
1373 1398 be preserved.
1374 1399
1375 1400 Returns a str holding uncompressed data for the requested revision.
1376 1401 """
1377 1402 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1378 1403
1379 1404 def _chunks(self, revs, df=None):
1380 1405 """Obtain decompressed chunks for the specified revisions.
1381 1406
1382 1407 Accepts an iterable of numeric revisions that are assumed to be in
1383 1408 ascending order. Also accepts an optional already-open file handle
1384 1409 to be used for reading. If used, the seek position of the file will
1385 1410 not be preserved.
1386 1411
1387 1412 This function is similar to calling ``self._chunk()`` multiple times,
1388 1413 but is faster.
1389 1414
1390 1415 Returns a list with decompressed data for each requested revision.
1391 1416 """
1392 1417 if not revs:
1393 1418 return []
1394 1419 start = self.start
1395 1420 length = self.length
1396 1421 inline = self._inline
1397 1422 iosize = self._io.size
1398 1423 buffer = util.buffer
1399 1424
1400 1425 l = []
1401 1426 ladd = l.append
1402 1427
1403 1428 if not self._withsparseread:
1404 1429 slicedchunks = (revs,)
1405 1430 else:
1406 1431 slicedchunks = _slicechunk(self, revs)
1407 1432
1408 1433 for revschunk in slicedchunks:
1409 1434 firstrev = revschunk[0]
1410 1435 # Skip trailing revisions with empty diff
1411 1436 for lastrev in revschunk[::-1]:
1412 1437 if length(lastrev) != 0:
1413 1438 break
1414 1439
1415 1440 try:
1416 1441 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1417 1442 except OverflowError:
1418 1443 # issue4215 - we can't cache a run of chunks greater than
1419 1444 # 2G on Windows
1420 1445 return [self._chunk(rev, df=df) for rev in revschunk]
1421 1446
1422 1447 decomp = self.decompress
1423 1448 for rev in revschunk:
1424 1449 chunkstart = start(rev)
1425 1450 if inline:
1426 1451 chunkstart += (rev + 1) * iosize
1427 1452 chunklength = length(rev)
1428 1453 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1429 1454
1430 1455 return l
1431 1456
1432 1457 def _chunkclear(self):
1433 1458 """Clear the raw chunk cache."""
1434 1459 self._chunkcache = (0, '')
1435 1460
1436 1461 def deltaparent(self, rev):
1437 1462 """return deltaparent of the given revision"""
1438 1463 base = self.index[rev][3]
1439 1464 if base == rev:
1440 1465 return nullrev
1441 1466 elif self._generaldelta:
1442 1467 return base
1443 1468 else:
1444 1469 return rev - 1
1445 1470
1446 1471 def revdiff(self, rev1, rev2):
1447 1472 """return or calculate a delta between two revisions
1448 1473
1449 1474 The delta calculated is in binary form and is intended to be written to
1450 1475 revlog data directly. So this function needs raw revision data.
1451 1476 """
1452 1477 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1453 1478 return bytes(self._chunk(rev2))
1454 1479
1455 1480 return mdiff.textdiff(self.revision(rev1, raw=True),
1456 1481 self.revision(rev2, raw=True))
1457 1482
1458 1483 def revision(self, nodeorrev, _df=None, raw=False):
1459 1484 """return an uncompressed revision of a given node or revision
1460 1485 number.
1461 1486
1462 1487 _df - an existing file handle to read from. (internal-only)
1463 1488 raw - an optional argument specifying if the revision data is to be
1464 1489 treated as raw data when applying flag transforms. 'raw' should be set
1465 1490 to True when generating changegroups or in debug commands.
1466 1491 """
1467 1492 if isinstance(nodeorrev, int):
1468 1493 rev = nodeorrev
1469 1494 node = self.node(rev)
1470 1495 else:
1471 1496 node = nodeorrev
1472 1497 rev = None
1473 1498
1474 1499 cachedrev = None
1475 1500 flags = None
1476 1501 rawtext = None
1477 1502 if node == nullid:
1478 1503 return ""
1479 1504 if self._cache:
1480 1505 if self._cache[0] == node:
1481 1506 # _cache only stores rawtext
1482 1507 if raw:
1483 1508 return self._cache[2]
1484 1509 # duplicated, but good for perf
1485 1510 if rev is None:
1486 1511 rev = self.rev(node)
1487 1512 if flags is None:
1488 1513 flags = self.flags(rev)
1489 1514 # no extra flags set, no flag processor runs, text = rawtext
1490 1515 if flags == REVIDX_DEFAULT_FLAGS:
1491 1516 return self._cache[2]
1492 1517 # rawtext is reusable. need to run flag processor
1493 1518 rawtext = self._cache[2]
1494 1519
1495 1520 cachedrev = self._cache[1]
1496 1521
1497 1522 # look up what we need to read
1498 1523 if rawtext is None:
1499 1524 if rev is None:
1500 1525 rev = self.rev(node)
1501 1526
1502 1527 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1503 1528 if stopped:
1504 1529 rawtext = self._cache[2]
1505 1530
1506 1531 # drop cache to save memory
1507 1532 self._cache = None
1508 1533
1509 1534 bins = self._chunks(chain, df=_df)
1510 1535 if rawtext is None:
1511 1536 rawtext = bytes(bins[0])
1512 1537 bins = bins[1:]
1513 1538
1514 1539 rawtext = mdiff.patches(rawtext, bins)
1515 1540 self._cache = (node, rev, rawtext)
1516 1541
1517 1542 if flags is None:
1518 1543 if rev is None:
1519 1544 rev = self.rev(node)
1520 1545 flags = self.flags(rev)
1521 1546
1522 1547 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1523 1548 if validatehash:
1524 1549 self.checkhash(text, node, rev=rev)
1525 1550
1526 1551 return text
1527 1552
1528 1553 def hash(self, text, p1, p2):
1529 1554 """Compute a node hash.
1530 1555
1531 1556 Available as a function so that subclasses can replace the hash
1532 1557 as needed.
1533 1558 """
1534 1559 return hash(text, p1, p2)
1535 1560
1536 1561 def _processflags(self, text, flags, operation, raw=False):
1537 1562 """Inspect revision data flags and applies transforms defined by
1538 1563 registered flag processors.
1539 1564
1540 1565 ``text`` - the revision data to process
1541 1566 ``flags`` - the revision flags
1542 1567 ``operation`` - the operation being performed (read or write)
1543 1568 ``raw`` - an optional argument describing if the raw transform should be
1544 1569 applied.
1545 1570
1546 1571 This method processes the flags in the order (or reverse order if
1547 1572 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1548 1573 flag processors registered for present flags. The order of flags defined
1549 1574 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1550 1575
1551 1576 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1552 1577 processed text and ``validatehash`` is a bool indicating whether the
1553 1578 returned text should be checked for hash integrity.
1554 1579
1555 1580 Note: If the ``raw`` argument is set, it has precedence over the
1556 1581 operation and will only update the value of ``validatehash``.
1557 1582 """
1558 1583 # fast path: no flag processors will run
1559 1584 if flags == 0:
1560 1585 return text, True
1561 1586 if not operation in ('read', 'write'):
1562 1587 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1563 1588 # Check all flags are known.
1564 1589 if flags & ~REVIDX_KNOWN_FLAGS:
1565 1590 raise RevlogError(_("incompatible revision flag '%#x'") %
1566 1591 (flags & ~REVIDX_KNOWN_FLAGS))
1567 1592 validatehash = True
1568 1593 # Depending on the operation (read or write), the order might be
1569 1594 # reversed due to non-commutative transforms.
1570 1595 orderedflags = REVIDX_FLAGS_ORDER
1571 1596 if operation == 'write':
1572 1597 orderedflags = reversed(orderedflags)
1573 1598
1574 1599 for flag in orderedflags:
1575 1600 # If a flagprocessor has been registered for a known flag, apply the
1576 1601 # related operation transform and update result tuple.
1577 1602 if flag & flags:
1578 1603 vhash = True
1579 1604
1580 1605 if flag not in _flagprocessors:
1581 1606 message = _("missing processor for flag '%#x'") % (flag)
1582 1607 raise RevlogError(message)
1583 1608
1584 1609 processor = _flagprocessors[flag]
1585 1610 if processor is not None:
1586 1611 readtransform, writetransform, rawtransform = processor
1587 1612
1588 1613 if raw:
1589 1614 vhash = rawtransform(self, text)
1590 1615 elif operation == 'read':
1591 1616 text, vhash = readtransform(self, text)
1592 1617 else: # write operation
1593 1618 text, vhash = writetransform(self, text)
1594 1619 validatehash = validatehash and vhash
1595 1620
1596 1621 return text, validatehash
1597 1622
1598 1623 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1599 1624 """Check node hash integrity.
1600 1625
1601 1626 Available as a function so that subclasses can extend hash mismatch
1602 1627 behaviors as needed.
1603 1628 """
1604 1629 if p1 is None and p2 is None:
1605 1630 p1, p2 = self.parents(node)
1606 1631 if node != self.hash(text, p1, p2):
1607 1632 revornode = rev
1608 1633 if revornode is None:
1609 1634 revornode = templatefilters.short(hex(node))
1610 1635 raise RevlogError(_("integrity check failed on %s:%s")
1611 1636 % (self.indexfile, pycompat.bytestr(revornode)))
1612 1637
1613 1638 def checkinlinesize(self, tr, fp=None):
1614 1639 """Check if the revlog is too big for inline and convert if so.
1615 1640
1616 1641 This should be called after revisions are added to the revlog. If the
1617 1642 revlog has grown too large to be an inline revlog, it will convert it
1618 1643 to use multiple index and data files.
1619 1644 """
1620 1645 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1621 1646 return
1622 1647
1623 1648 trinfo = tr.find(self.indexfile)
1624 1649 if trinfo is None:
1625 1650 raise RevlogError(_("%s not found in the transaction")
1626 1651 % self.indexfile)
1627 1652
1628 1653 trindex = trinfo[2]
1629 1654 if trindex is not None:
1630 1655 dataoff = self.start(trindex)
1631 1656 else:
1632 1657 # revlog was stripped at start of transaction, use all leftover data
1633 1658 trindex = len(self) - 1
1634 1659 dataoff = self.end(-2)
1635 1660
1636 1661 tr.add(self.datafile, dataoff)
1637 1662
1638 1663 if fp:
1639 1664 fp.flush()
1640 1665 fp.close()
1641 1666
1642 1667 df = self.opener(self.datafile, 'w')
1643 1668 try:
1644 1669 for r in self:
1645 1670 df.write(self._getsegmentforrevs(r, r)[1])
1646 1671 finally:
1647 1672 df.close()
1648 1673
1649 1674 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1650 1675 checkambig=self._checkambig)
1651 1676 self.version &= ~FLAG_INLINE_DATA
1652 1677 self._inline = False
1653 1678 for i in self:
1654 1679 e = self._io.packentry(self.index[i], self.node, self.version, i)
1655 1680 fp.write(e)
1656 1681
1657 1682 # if we don't call close, the temp file will never replace the
1658 1683 # real index
1659 1684 fp.close()
1660 1685
1661 1686 tr.replace(self.indexfile, trindex * self._io.size)
1662 1687 self._chunkclear()
1663 1688
1664 1689 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1665 1690 node=None, flags=REVIDX_DEFAULT_FLAGS):
1666 1691 """add a revision to the log
1667 1692
1668 1693 text - the revision data to add
1669 1694 transaction - the transaction object used for rollback
1670 1695 link - the linkrev data to add
1671 1696 p1, p2 - the parent nodeids of the revision
1672 1697 cachedelta - an optional precomputed delta
1673 1698 node - nodeid of revision; typically node is not specified, and it is
1674 1699 computed by default as hash(text, p1, p2), however subclasses might
1675 1700 use different hashing method (and override checkhash() in such case)
1676 1701 flags - the known flags to set on the revision
1677 1702 """
1678 1703 if link == nullrev:
1679 1704 raise RevlogError(_("attempted to add linkrev -1 to %s")
1680 1705 % self.indexfile)
1681 1706
1682 1707 if flags:
1683 1708 node = node or self.hash(text, p1, p2)
1684 1709
1685 1710 rawtext, validatehash = self._processflags(text, flags, 'write')
1686 1711
1687 1712 # If the flag processor modifies the revision data, ignore any provided
1688 1713 # cachedelta.
1689 1714 if rawtext != text:
1690 1715 cachedelta = None
1691 1716
1692 1717 if len(rawtext) > _maxentrysize:
1693 1718 raise RevlogError(
1694 1719 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1695 1720 % (self.indexfile, len(rawtext)))
1696 1721
1697 1722 node = node or self.hash(rawtext, p1, p2)
1698 1723 if node in self.nodemap:
1699 1724 return node
1700 1725
1701 1726 if validatehash:
1702 1727 self.checkhash(rawtext, node, p1=p1, p2=p2)
1703 1728
1704 1729 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1705 1730 flags, cachedelta=cachedelta)
1706 1731
1707 1732 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1708 1733 cachedelta=None):
1709 1734 """add a raw revision with known flags, node and parents
1710 1735 useful when reusing a revision not stored in this revlog (ex: received
1711 1736 over wire, or read from an external bundle).
1712 1737 """
1713 1738 dfh = None
1714 1739 if not self._inline:
1715 1740 dfh = self.opener(self.datafile, "a+")
1716 1741 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1717 1742 try:
1718 1743 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1719 1744 flags, cachedelta, ifh, dfh)
1720 1745 finally:
1721 1746 if dfh:
1722 1747 dfh.close()
1723 1748 ifh.close()
1724 1749
1725 1750 def compress(self, data):
1726 1751 """Generate a possibly-compressed representation of data."""
1727 1752 if not data:
1728 1753 return '', data
1729 1754
1730 1755 compressed = self._compressor.compress(data)
1731 1756
1732 1757 if compressed:
1733 1758 # The revlog compressor added the header in the returned data.
1734 1759 return '', compressed
1735 1760
1736 1761 if data[0:1] == '\0':
1737 1762 return '', data
1738 1763 return 'u', data
1739 1764
1740 1765 def decompress(self, data):
1741 1766 """Decompress a revlog chunk.
1742 1767
1743 1768 The chunk is expected to begin with a header identifying the
1744 1769 format type so it can be routed to an appropriate decompressor.
1745 1770 """
1746 1771 if not data:
1747 1772 return data
1748 1773
1749 1774 # Revlogs are read much more frequently than they are written and many
1750 1775 # chunks only take microseconds to decompress, so performance is
1751 1776 # important here.
1752 1777 #
1753 1778 # We can make a few assumptions about revlogs:
1754 1779 #
1755 1780 # 1) the majority of chunks will be compressed (as opposed to inline
1756 1781 # raw data).
1757 1782 # 2) decompressing *any* data will likely by at least 10x slower than
1758 1783 # returning raw inline data.
1759 1784 # 3) we want to prioritize common and officially supported compression
1760 1785 # engines
1761 1786 #
1762 1787 # It follows that we want to optimize for "decompress compressed data
1763 1788 # when encoded with common and officially supported compression engines"
1764 1789 # case over "raw data" and "data encoded by less common or non-official
1765 1790 # compression engines." That is why we have the inline lookup first
1766 1791 # followed by the compengines lookup.
1767 1792 #
1768 1793 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1769 1794 # compressed chunks. And this matters for changelog and manifest reads.
1770 1795 t = data[0:1]
1771 1796
1772 1797 if t == 'x':
1773 1798 try:
1774 1799 return _zlibdecompress(data)
1775 1800 except zlib.error as e:
1776 1801 raise RevlogError(_('revlog decompress error: %s') % str(e))
1777 1802 # '\0' is more common than 'u' so it goes first.
1778 1803 elif t == '\0':
1779 1804 return data
1780 1805 elif t == 'u':
1781 1806 return util.buffer(data, 1)
1782 1807
1783 1808 try:
1784 1809 compressor = self._decompressors[t]
1785 1810 except KeyError:
1786 1811 try:
1787 1812 engine = util.compengines.forrevlogheader(t)
1788 1813 compressor = engine.revlogcompressor()
1789 1814 self._decompressors[t] = compressor
1790 1815 except KeyError:
1791 1816 raise RevlogError(_('unknown compression type %r') % t)
1792 1817
1793 1818 return compressor.decompress(data)
1794 1819
1795 1820 def _isgooddelta(self, d, textlen):
1796 1821 """Returns True if the given delta is good. Good means that it is within
1797 1822 the disk span, disk size, and chain length bounds that we know to be
1798 1823 performant."""
1799 1824 if d is None:
1800 1825 return False
1801 1826
1802 1827 # - 'dist' is the distance from the base revision -- bounding it limits
1803 1828 # the amount of I/O we need to do.
1804 1829 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1805 1830 # to apply -- bounding it limits the amount of CPU we consume.
1806 1831 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1807 1832
1808 1833 defaultmax = textlen * 4
1809 1834 maxdist = self._maxdeltachainspan
1810 1835 if not maxdist:
1811 1836 maxdist = dist # ensure the conditional pass
1812 1837 maxdist = max(maxdist, defaultmax)
1813 1838 if (dist > maxdist or l > textlen or
1814 1839 compresseddeltalen > textlen * 2 or
1815 1840 (self._maxchainlen and chainlen > self._maxchainlen)):
1816 1841 return False
1817 1842
1818 1843 return True
1819 1844
1820 1845 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1821 1846 cachedelta, ifh, dfh, alwayscache=False):
1822 1847 """internal function to add revisions to the log
1823 1848
1824 1849 see addrevision for argument descriptions.
1825 1850
1826 1851 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1827 1852
1828 1853 invariants:
1829 1854 - rawtext is optional (can be None); if not set, cachedelta must be set.
1830 1855 if both are set, they must correspond to each other.
1831 1856 """
1832 1857 if node == nullid:
1833 1858 raise RevlogError(_("%s: attempt to add null revision") %
1834 1859 (self.indexfile))
1835 1860 if node == wdirid:
1836 1861 raise RevlogError(_("%s: attempt to add wdir revision") %
1837 1862 (self.indexfile))
1838 1863
1839 1864 btext = [rawtext]
1840 1865 def buildtext():
1841 1866 if btext[0] is not None:
1842 1867 return btext[0]
1843 1868 baserev = cachedelta[0]
1844 1869 delta = cachedelta[1]
1845 1870 # special case deltas which replace entire base; no need to decode
1846 1871 # base revision. this neatly avoids censored bases, which throw when
1847 1872 # they're decoded.
1848 1873 hlen = struct.calcsize(">lll")
1849 1874 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1850 1875 len(delta) - hlen):
1851 1876 btext[0] = delta[hlen:]
1852 1877 else:
1853 1878 if self._inline:
1854 1879 fh = ifh
1855 1880 else:
1856 1881 fh = dfh
1857 1882 basetext = self.revision(baserev, _df=fh, raw=True)
1858 1883 btext[0] = mdiff.patch(basetext, delta)
1859 1884
1860 1885 try:
1861 1886 res = self._processflags(btext[0], flags, 'read', raw=True)
1862 1887 btext[0], validatehash = res
1863 1888 if validatehash:
1864 1889 self.checkhash(btext[0], node, p1=p1, p2=p2)
1865 1890 if flags & REVIDX_ISCENSORED:
1866 1891 raise RevlogError(_('node %s is not censored') % node)
1867 1892 except CensoredNodeError:
1868 1893 # must pass the censored index flag to add censored revisions
1869 1894 if not flags & REVIDX_ISCENSORED:
1870 1895 raise
1871 1896 return btext[0]
1872 1897
1873 1898 def builddelta(rev):
1874 1899 # can we use the cached delta?
1875 1900 if cachedelta and cachedelta[0] == rev:
1876 1901 delta = cachedelta[1]
1877 1902 else:
1878 1903 t = buildtext()
1879 1904 if self.iscensored(rev):
1880 1905 # deltas based on a censored revision must replace the
1881 1906 # full content in one patch, so delta works everywhere
1882 1907 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1883 1908 delta = header + t
1884 1909 else:
1885 1910 if self._inline:
1886 1911 fh = ifh
1887 1912 else:
1888 1913 fh = dfh
1889 1914 ptext = self.revision(rev, _df=fh, raw=True)
1890 1915 delta = mdiff.textdiff(ptext, t)
1891 1916 header, data = self.compress(delta)
1892 1917 deltalen = len(header) + len(data)
1893 1918 chainbase = self.chainbase(rev)
1894 1919 dist = deltalen + offset - self.start(chainbase)
1895 1920 if self._generaldelta:
1896 1921 base = rev
1897 1922 else:
1898 1923 base = chainbase
1899 1924 chainlen, compresseddeltalen = self._chaininfo(rev)
1900 1925 chainlen += 1
1901 1926 compresseddeltalen += deltalen
1902 1927 return (dist, deltalen, (header, data), base,
1903 1928 chainbase, chainlen, compresseddeltalen)
1904 1929
1905 1930 curr = len(self)
1906 1931 prev = curr - 1
1907 1932 offset = self.end(prev)
1908 1933 delta = None
1909 1934 p1r, p2r = self.rev(p1), self.rev(p2)
1910 1935
1911 1936 # full versions are inserted when the needed deltas
1912 1937 # become comparable to the uncompressed text
1913 1938 if rawtext is None:
1914 1939 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1915 1940 cachedelta[1])
1916 1941 else:
1917 1942 textlen = len(rawtext)
1918 1943
1919 1944 # should we try to build a delta?
1920 1945 if prev != nullrev and self.storedeltachains:
1921 1946 tested = set()
1922 1947 # This condition is true most of the time when processing
1923 1948 # changegroup data into a generaldelta repo. The only time it
1924 1949 # isn't true is if this is the first revision in a delta chain
1925 1950 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1926 1951 if cachedelta and self._generaldelta and self._lazydeltabase:
1927 1952 # Assume what we received from the server is a good choice
1928 1953 # build delta will reuse the cache
1929 1954 candidatedelta = builddelta(cachedelta[0])
1930 1955 tested.add(cachedelta[0])
1931 1956 if self._isgooddelta(candidatedelta, textlen):
1932 1957 delta = candidatedelta
1933 1958 if delta is None and self._generaldelta:
1934 1959 # exclude already lazy tested base if any
1935 1960 parents = [p for p in (p1r, p2r)
1936 1961 if p != nullrev and p not in tested]
1937 1962 if parents and not self._aggressivemergedeltas:
1938 1963 # Pick whichever parent is closer to us (to minimize the
1939 1964 # chance of having to build a fulltext).
1940 1965 parents = [max(parents)]
1941 1966 tested.update(parents)
1942 1967 pdeltas = []
1943 1968 for p in parents:
1944 1969 pd = builddelta(p)
1945 1970 if self._isgooddelta(pd, textlen):
1946 1971 pdeltas.append(pd)
1947 1972 if pdeltas:
1948 1973 delta = min(pdeltas, key=lambda x: x[1])
1949 1974 if delta is None and prev not in tested:
1950 1975 # other approach failed try against prev to hopefully save us a
1951 1976 # fulltext.
1952 1977 candidatedelta = builddelta(prev)
1953 1978 if self._isgooddelta(candidatedelta, textlen):
1954 1979 delta = candidatedelta
1955 1980 if delta is not None:
1956 1981 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1957 1982 else:
1958 1983 rawtext = buildtext()
1959 1984 data = self.compress(rawtext)
1960 1985 l = len(data[1]) + len(data[0])
1961 1986 base = chainbase = curr
1962 1987
1963 1988 e = (offset_type(offset, flags), l, textlen,
1964 1989 base, link, p1r, p2r, node)
1965 1990 self.index.insert(-1, e)
1966 1991 self.nodemap[node] = curr
1967 1992
1968 1993 entry = self._io.packentry(e, self.node, self.version, curr)
1969 1994 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1970 1995
1971 1996 if alwayscache and rawtext is None:
1972 1997 rawtext = buildtext()
1973 1998
1974 1999 if type(rawtext) == str: # only accept immutable objects
1975 2000 self._cache = (node, curr, rawtext)
1976 2001 self._chainbasecache[curr] = chainbase
1977 2002 return node
1978 2003
1979 2004 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1980 2005 # Files opened in a+ mode have inconsistent behavior on various
1981 2006 # platforms. Windows requires that a file positioning call be made
1982 2007 # when the file handle transitions between reads and writes. See
1983 2008 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1984 2009 # platforms, Python or the platform itself can be buggy. Some versions
1985 2010 # of Solaris have been observed to not append at the end of the file
1986 2011 # if the file was seeked to before the end. See issue4943 for more.
1987 2012 #
1988 2013 # We work around this issue by inserting a seek() before writing.
1989 2014 # Note: This is likely not necessary on Python 3.
1990 2015 ifh.seek(0, os.SEEK_END)
1991 2016 if dfh:
1992 2017 dfh.seek(0, os.SEEK_END)
1993 2018
1994 2019 curr = len(self) - 1
1995 2020 if not self._inline:
1996 2021 transaction.add(self.datafile, offset)
1997 2022 transaction.add(self.indexfile, curr * len(entry))
1998 2023 if data[0]:
1999 2024 dfh.write(data[0])
2000 2025 dfh.write(data[1])
2001 2026 ifh.write(entry)
2002 2027 else:
2003 2028 offset += curr * self._io.size
2004 2029 transaction.add(self.indexfile, offset, curr)
2005 2030 ifh.write(entry)
2006 2031 ifh.write(data[0])
2007 2032 ifh.write(data[1])
2008 2033 self.checkinlinesize(transaction, ifh)
2009 2034
2010 2035 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2011 2036 """
2012 2037 add a delta group
2013 2038
2014 2039 given a set of deltas, add them to the revision log. the
2015 2040 first delta is against its parent, which should be in our
2016 2041 log, the rest are against the previous delta.
2017 2042
2018 2043 If ``addrevisioncb`` is defined, it will be called with arguments of
2019 2044 this revlog and the node that was added.
2020 2045 """
2021 2046
2022 2047 nodes = []
2023 2048
2024 2049 r = len(self)
2025 2050 end = 0
2026 2051 if r:
2027 2052 end = self.end(r - 1)
2028 2053 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2029 2054 isize = r * self._io.size
2030 2055 if self._inline:
2031 2056 transaction.add(self.indexfile, end + isize, r)
2032 2057 dfh = None
2033 2058 else:
2034 2059 transaction.add(self.indexfile, isize, r)
2035 2060 transaction.add(self.datafile, end)
2036 2061 dfh = self.opener(self.datafile, "a+")
2037 2062 def flush():
2038 2063 if dfh:
2039 2064 dfh.flush()
2040 2065 ifh.flush()
2041 2066 try:
2042 2067 # loop through our set of deltas
2043 2068 for data in deltas:
2044 2069 node, p1, p2, linknode, deltabase, delta, flags = data
2045 2070 link = linkmapper(linknode)
2046 2071 flags = flags or REVIDX_DEFAULT_FLAGS
2047 2072
2048 2073 nodes.append(node)
2049 2074
2050 2075 if node in self.nodemap:
2051 2076 # this can happen if two branches make the same change
2052 2077 continue
2053 2078
2054 2079 for p in (p1, p2):
2055 2080 if p not in self.nodemap:
2056 2081 raise LookupError(p, self.indexfile,
2057 2082 _('unknown parent'))
2058 2083
2059 2084 if deltabase not in self.nodemap:
2060 2085 raise LookupError(deltabase, self.indexfile,
2061 2086 _('unknown delta base'))
2062 2087
2063 2088 baserev = self.rev(deltabase)
2064 2089
2065 2090 if baserev != nullrev and self.iscensored(baserev):
2066 2091 # if base is censored, delta must be full replacement in a
2067 2092 # single patch operation
2068 2093 hlen = struct.calcsize(">lll")
2069 2094 oldlen = self.rawsize(baserev)
2070 2095 newlen = len(delta) - hlen
2071 2096 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2072 2097 raise error.CensoredBaseError(self.indexfile,
2073 2098 self.node(baserev))
2074 2099
2075 2100 if not flags and self._peek_iscensored(baserev, delta, flush):
2076 2101 flags |= REVIDX_ISCENSORED
2077 2102
2078 2103 # We assume consumers of addrevisioncb will want to retrieve
2079 2104 # the added revision, which will require a call to
2080 2105 # revision(). revision() will fast path if there is a cache
2081 2106 # hit. So, we tell _addrevision() to always cache in this case.
2082 2107 # We're only using addgroup() in the context of changegroup
2083 2108 # generation so the revision data can always be handled as raw
2084 2109 # by the flagprocessor.
2085 2110 self._addrevision(node, None, transaction, link,
2086 2111 p1, p2, flags, (baserev, delta),
2087 2112 ifh, dfh,
2088 2113 alwayscache=bool(addrevisioncb))
2089 2114
2090 2115 if addrevisioncb:
2091 2116 addrevisioncb(self, node)
2092 2117
2093 2118 if not dfh and not self._inline:
2094 2119 # addrevision switched from inline to conventional
2095 2120 # reopen the index
2096 2121 ifh.close()
2097 2122 dfh = self.opener(self.datafile, "a+")
2098 2123 ifh = self.opener(self.indexfile, "a+",
2099 2124 checkambig=self._checkambig)
2100 2125 finally:
2101 2126 if dfh:
2102 2127 dfh.close()
2103 2128 ifh.close()
2104 2129
2105 2130 return nodes
2106 2131
2107 2132 def iscensored(self, rev):
2108 2133 """Check if a file revision is censored."""
2109 2134 return False
2110 2135
2111 2136 def _peek_iscensored(self, baserev, delta, flush):
2112 2137 """Quickly check if a delta produces a censored revision."""
2113 2138 return False
2114 2139
2115 2140 def getstrippoint(self, minlink):
2116 2141 """find the minimum rev that must be stripped to strip the linkrev
2117 2142
2118 2143 Returns a tuple containing the minimum rev and a set of all revs that
2119 2144 have linkrevs that will be broken by this strip.
2120 2145 """
2121 2146 brokenrevs = set()
2122 2147 strippoint = len(self)
2123 2148
2124 2149 heads = {}
2125 2150 futurelargelinkrevs = set()
2126 2151 for head in self.headrevs():
2127 2152 headlinkrev = self.linkrev(head)
2128 2153 heads[head] = headlinkrev
2129 2154 if headlinkrev >= minlink:
2130 2155 futurelargelinkrevs.add(headlinkrev)
2131 2156
2132 2157 # This algorithm involves walking down the rev graph, starting at the
2133 2158 # heads. Since the revs are topologically sorted according to linkrev,
2134 2159 # once all head linkrevs are below the minlink, we know there are
2135 2160 # no more revs that could have a linkrev greater than minlink.
2136 2161 # So we can stop walking.
2137 2162 while futurelargelinkrevs:
2138 2163 strippoint -= 1
2139 2164 linkrev = heads.pop(strippoint)
2140 2165
2141 2166 if linkrev < minlink:
2142 2167 brokenrevs.add(strippoint)
2143 2168 else:
2144 2169 futurelargelinkrevs.remove(linkrev)
2145 2170
2146 2171 for p in self.parentrevs(strippoint):
2147 2172 if p != nullrev:
2148 2173 plinkrev = self.linkrev(p)
2149 2174 heads[p] = plinkrev
2150 2175 if plinkrev >= minlink:
2151 2176 futurelargelinkrevs.add(plinkrev)
2152 2177
2153 2178 return strippoint, brokenrevs
2154 2179
2155 2180 def strip(self, minlink, transaction):
2156 2181 """truncate the revlog on the first revision with a linkrev >= minlink
2157 2182
2158 2183 This function is called when we're stripping revision minlink and
2159 2184 its descendants from the repository.
2160 2185
2161 2186 We have to remove all revisions with linkrev >= minlink, because
2162 2187 the equivalent changelog revisions will be renumbered after the
2163 2188 strip.
2164 2189
2165 2190 So we truncate the revlog on the first of these revisions, and
2166 2191 trust that the caller has saved the revisions that shouldn't be
2167 2192 removed and that it'll re-add them after this truncation.
2168 2193 """
2169 2194 if len(self) == 0:
2170 2195 return
2171 2196
2172 2197 rev, _ = self.getstrippoint(minlink)
2173 2198 if rev == len(self):
2174 2199 return
2175 2200
2176 2201 # first truncate the files on disk
2177 2202 end = self.start(rev)
2178 2203 if not self._inline:
2179 2204 transaction.add(self.datafile, end)
2180 2205 end = rev * self._io.size
2181 2206 else:
2182 2207 end += rev * self._io.size
2183 2208
2184 2209 transaction.add(self.indexfile, end)
2185 2210
2186 2211 # then reset internal state in memory to forget those revisions
2187 2212 self._cache = None
2188 2213 self._chaininfocache = {}
2189 2214 self._chunkclear()
2190 2215 for x in xrange(rev, len(self)):
2191 2216 del self.nodemap[self.node(x)]
2192 2217
2193 2218 del self.index[rev:-1]
2194 2219
2195 2220 def checksize(self):
2196 2221 expected = 0
2197 2222 if len(self):
2198 2223 expected = max(0, self.end(len(self) - 1))
2199 2224
2200 2225 try:
2201 2226 f = self.opener(self.datafile)
2202 2227 f.seek(0, 2)
2203 2228 actual = f.tell()
2204 2229 f.close()
2205 2230 dd = actual - expected
2206 2231 except IOError as inst:
2207 2232 if inst.errno != errno.ENOENT:
2208 2233 raise
2209 2234 dd = 0
2210 2235
2211 2236 try:
2212 2237 f = self.opener(self.indexfile)
2213 2238 f.seek(0, 2)
2214 2239 actual = f.tell()
2215 2240 f.close()
2216 2241 s = self._io.size
2217 2242 i = max(0, actual // s)
2218 2243 di = actual - (i * s)
2219 2244 if self._inline:
2220 2245 databytes = 0
2221 2246 for r in self:
2222 2247 databytes += max(0, self.length(r))
2223 2248 dd = 0
2224 2249 di = actual - len(self) * s - databytes
2225 2250 except IOError as inst:
2226 2251 if inst.errno != errno.ENOENT:
2227 2252 raise
2228 2253 di = 0
2229 2254
2230 2255 return (dd, di)
2231 2256
2232 2257 def files(self):
2233 2258 res = [self.indexfile]
2234 2259 if not self._inline:
2235 2260 res.append(self.datafile)
2236 2261 return res
2237 2262
2238 2263 DELTAREUSEALWAYS = 'always'
2239 2264 DELTAREUSESAMEREVS = 'samerevs'
2240 2265 DELTAREUSENEVER = 'never'
2241 2266
2242 2267 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2243 2268
2244 2269 def clone(self, tr, destrevlog, addrevisioncb=None,
2245 2270 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2246 2271 """Copy this revlog to another, possibly with format changes.
2247 2272
2248 2273 The destination revlog will contain the same revisions and nodes.
2249 2274 However, it may not be bit-for-bit identical due to e.g. delta encoding
2250 2275 differences.
2251 2276
2252 2277 The ``deltareuse`` argument control how deltas from the existing revlog
2253 2278 are preserved in the destination revlog. The argument can have the
2254 2279 following values:
2255 2280
2256 2281 DELTAREUSEALWAYS
2257 2282 Deltas will always be reused (if possible), even if the destination
2258 2283 revlog would not select the same revisions for the delta. This is the
2259 2284 fastest mode of operation.
2260 2285 DELTAREUSESAMEREVS
2261 2286 Deltas will be reused if the destination revlog would pick the same
2262 2287 revisions for the delta. This mode strikes a balance between speed
2263 2288 and optimization.
2264 2289 DELTAREUSENEVER
2265 2290 Deltas will never be reused. This is the slowest mode of execution.
2266 2291 This mode can be used to recompute deltas (e.g. if the diff/delta
2267 2292 algorithm changes).
2268 2293
2269 2294 Delta computation can be slow, so the choice of delta reuse policy can
2270 2295 significantly affect run time.
2271 2296
2272 2297 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2273 2298 two extremes. Deltas will be reused if they are appropriate. But if the
2274 2299 delta could choose a better revision, it will do so. This means if you
2275 2300 are converting a non-generaldelta revlog to a generaldelta revlog,
2276 2301 deltas will be recomputed if the delta's parent isn't a parent of the
2277 2302 revision.
2278 2303
2279 2304 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2280 2305 controls whether to compute deltas against both parents for merges.
2281 2306 By default, the current default is used.
2282 2307 """
2283 2308 if deltareuse not in self.DELTAREUSEALL:
2284 2309 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2285 2310
2286 2311 if len(destrevlog):
2287 2312 raise ValueError(_('destination revlog is not empty'))
2288 2313
2289 2314 if getattr(self, 'filteredrevs', None):
2290 2315 raise ValueError(_('source revlog has filtered revisions'))
2291 2316 if getattr(destrevlog, 'filteredrevs', None):
2292 2317 raise ValueError(_('destination revlog has filtered revisions'))
2293 2318
2294 2319 # lazydeltabase controls whether to reuse a cached delta, if possible.
2295 2320 oldlazydeltabase = destrevlog._lazydeltabase
2296 2321 oldamd = destrevlog._aggressivemergedeltas
2297 2322
2298 2323 try:
2299 2324 if deltareuse == self.DELTAREUSEALWAYS:
2300 2325 destrevlog._lazydeltabase = True
2301 2326 elif deltareuse == self.DELTAREUSESAMEREVS:
2302 2327 destrevlog._lazydeltabase = False
2303 2328
2304 2329 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2305 2330
2306 2331 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2307 2332 self.DELTAREUSESAMEREVS)
2308 2333
2309 2334 index = self.index
2310 2335 for rev in self:
2311 2336 entry = index[rev]
2312 2337
2313 2338 # Some classes override linkrev to take filtered revs into
2314 2339 # account. Use raw entry from index.
2315 2340 flags = entry[0] & 0xffff
2316 2341 linkrev = entry[4]
2317 2342 p1 = index[entry[5]][7]
2318 2343 p2 = index[entry[6]][7]
2319 2344 node = entry[7]
2320 2345
2321 2346 # (Possibly) reuse the delta from the revlog if allowed and
2322 2347 # the revlog chunk is a delta.
2323 2348 cachedelta = None
2324 2349 rawtext = None
2325 2350 if populatecachedelta:
2326 2351 dp = self.deltaparent(rev)
2327 2352 if dp != nullrev:
2328 2353 cachedelta = (dp, str(self._chunk(rev)))
2329 2354
2330 2355 if not cachedelta:
2331 2356 rawtext = self.revision(rev, raw=True)
2332 2357
2333 2358 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2334 2359 checkambig=False)
2335 2360 dfh = None
2336 2361 if not destrevlog._inline:
2337 2362 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2338 2363 try:
2339 2364 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2340 2365 flags, cachedelta, ifh, dfh)
2341 2366 finally:
2342 2367 if dfh:
2343 2368 dfh.close()
2344 2369 ifh.close()
2345 2370
2346 2371 if addrevisioncb:
2347 2372 addrevisioncb(self, rev, node)
2348 2373 finally:
2349 2374 destrevlog._lazydeltabase = oldlazydeltabase
2350 2375 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now