##// END OF EJS Templates
sparse-read: move from a recursive-based approach to a heap-based one...
Boris Feld -
r34881:9e18ab7f default
parent child Browse files
Show More
@@ -1,2338 +1,2349 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 import heapq
20 21 import os
21 22 import struct
22 23 import zlib
23 24
24 25 # import stuff from node for others to import from revlog
25 26 from .node import (
26 27 bin,
27 28 hex,
28 29 nullid,
29 30 nullrev,
30 31 wdirhex,
31 32 wdirid,
32 33 wdirrev,
33 34 )
34 35 from .i18n import _
35 36 from . import (
36 37 ancestor,
37 38 error,
38 39 mdiff,
39 40 policy,
40 41 pycompat,
41 42 templatefilters,
42 43 util,
43 44 )
44 45
45 46 parsers = policy.importmod(r'parsers')
46 47
47 48 # Aliased for performance.
48 49 _zlibdecompress = zlib.decompress
49 50
50 51 # revlog header flags
51 52 REVLOGV0 = 0
52 53 REVLOGV1 = 1
53 54 # Dummy value until file format is finalized.
54 55 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 56 REVLOGV2 = 0xDEAD
56 57 FLAG_INLINE_DATA = (1 << 16)
57 58 FLAG_GENERALDELTA = (1 << 17)
58 59 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 60 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 61 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 62 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 63 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63 64
64 65 # revlog index flags
65 66 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 67 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 68 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 69 REVIDX_DEFAULT_FLAGS = 0
69 70 # stable order in which flags need to be processed and their processors applied
70 71 REVIDX_FLAGS_ORDER = [
71 72 REVIDX_ISCENSORED,
72 73 REVIDX_ELLIPSIS,
73 74 REVIDX_EXTSTORED,
74 75 ]
75 76 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76 77
77 78 # max size of revlog with inline data
78 79 _maxinline = 131072
79 80 _chunksize = 1048576
80 81
81 82 RevlogError = error.RevlogError
82 83 LookupError = error.LookupError
83 84 CensoredNodeError = error.CensoredNodeError
84 85 ProgrammingError = error.ProgrammingError
85 86
86 87 # Store flag processors (cf. 'addflagprocessor()' to register)
87 88 _flagprocessors = {
88 89 REVIDX_ISCENSORED: None,
89 90 }
90 91
91 92 def addflagprocessor(flag, processor):
92 93 """Register a flag processor on a revision data flag.
93 94
94 95 Invariant:
95 96 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 97 - Only one flag processor can be registered on a specific flag.
97 98 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 99 following signatures:
99 100 - (read) f(self, rawtext) -> text, bool
100 101 - (write) f(self, text) -> rawtext, bool
101 102 - (raw) f(self, rawtext) -> bool
102 103 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 104 directly visible to the user.
104 105 The boolean returned by these transforms is used to determine whether
105 106 the returned text can be used for hash integrity checking. For example,
106 107 if "write" returns False, then "text" is used to generate hash. If
107 108 "write" returns True, that basically means "rawtext" returned by "write"
108 109 should be used to generate hash. Usually, "write" and "read" return
109 110 different booleans. And "raw" returns a same boolean as "write".
110 111
111 112 Note: The 'raw' transform is used for changegroup generation and in some
112 113 debug commands. In this case the transform only indicates whether the
113 114 contents can be used for hash integrity checks.
114 115 """
115 116 if not flag & REVIDX_KNOWN_FLAGS:
116 117 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 118 raise ProgrammingError(msg)
118 119 if flag not in REVIDX_FLAGS_ORDER:
119 120 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 121 raise ProgrammingError(msg)
121 122 if flag in _flagprocessors:
122 123 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 124 raise error.Abort(msg)
124 125 _flagprocessors[flag] = processor
125 126
126 127 def getoffset(q):
127 128 return int(q >> 16)
128 129
129 130 def gettype(q):
130 131 return int(q & 0xFFFF)
131 132
132 133 def offset_type(offset, type):
133 134 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 135 raise ValueError('unknown revlog index flags')
135 136 return int(int(offset) << 16 | type)
136 137
137 138 _nullhash = hashlib.sha1(nullid)
138 139
139 140 def hash(text, p1, p2):
140 141 """generate a hash from the given text and its parent hashes
141 142
142 143 This hash combines both the current file contents and its history
143 144 in a manner that makes it easy to distinguish nodes with the same
144 145 content in the revision graph.
145 146 """
146 147 # As of now, if one of the parent node is null, p2 is null
147 148 if p2 == nullid:
148 149 # deep copy of a hash is faster than creating one
149 150 s = _nullhash.copy()
150 151 s.update(p1)
151 152 else:
152 153 # none of the parent nodes are nullid
153 154 if p1 < p2:
154 155 a = p1
155 156 b = p2
156 157 else:
157 158 a = p2
158 159 b = p1
159 160 s = hashlib.sha1(a)
160 161 s.update(b)
161 162 s.update(text)
162 163 return s.digest()
163 164
164 165 def _slicechunk(revlog, revs):
165 166 """slice revs to reduce the amount of unrelated data to be read from disk.
166 167
167 168 ``revs`` is sliced into groups that should be read in one time.
168 169 Assume that revs are sorted.
169 170 """
170 171 start = revlog.start
171 172 length = revlog.length
172 173
173 chunkqueue = collections.deque()
174 chunkqueue.append((revs, 0))
174 if len(revs) <= 1:
175 yield revs
176 return
175 177
176 while chunkqueue:
177 revs, depth = chunkqueue.popleft()
178 startbyte = start(revs[0])
179 endbyte = start(revs[-1]) + length(revs[-1])
180 readdata = deltachainspan = endbyte - startbyte
181
182 chainpayload = sum(length(r) for r in revs)
178 183
179 startbyte = start(revs[0])
180 endbyte = start(revs[-1]) + length(revs[-1])
181 deltachainspan = endbyte - startbyte
184 if deltachainspan:
185 density = chainpayload / float(deltachainspan)
186 else:
187 density = 1.0
182 188
183 if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:
184 yield revs
185 continue
189 # Store the gaps in a heap to have them sorted by decreasing size
190 gapsheap = []
191 heapq.heapify(gapsheap)
192 prevend = None
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
186 196
187 # Find where is the largest hole (this is where we would split) and
188 # sum up the lengths of useful data to compute the density of the span
189 textlen = 0
190 prevend = None
191 largesthole = 0
192 idxlargesthole = -1
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
197 if prevend is not None:
198 gapsize = revstart - prevend
199 if gapsize:
200 heapq.heappush(gapsheap, (-gapsize, i))
201
202 prevend = revstart + revlen
203
204 # Collect the indices of the largest holes until the density is acceptable
205 indicesheap = []
206 heapq.heapify(indicesheap)
207 while gapsheap and density < revlog._srdensitythreshold:
208 oppgapsize, gapidx = heapq.heappop(gapsheap)
209
210 heapq.heappush(indicesheap, gapidx)
196 211
197 if prevend is not None:
198 hole = revstart - prevend
199 if hole > largesthole:
200 largesthole = hole
201 idxlargesthole = i
202
203 textlen += revlen
204 prevend = revstart + revlen
212 # the gap sizes are stored as negatives to be sorted decreasingly
213 # by the heap
214 readdata -= (-oppgapsize)
215 if readdata > 0:
216 density = chainpayload / float(readdata)
217 else:
218 density = 1.0
205 219
206 density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
207
208 if density > revlog._srdensitythreshold:
209 yield revs
210 continue
211
212 # Add the left and right parts so that they will be sliced
213 # recursively too
214 chunkqueue.append((revs[:idxlargesthole], depth + 1))
215 chunkqueue.append((revs[idxlargesthole:], depth + 1))
220 # Cut the revs at collected indices
221 previdx = 0
222 while indicesheap:
223 idx = heapq.heappop(indicesheap)
224 yield revs[previdx:idx]
225 previdx = idx
226 yield revs[previdx:]
216 227
217 228 # index v0:
218 229 # 4 bytes: offset
219 230 # 4 bytes: compressed length
220 231 # 4 bytes: base rev
221 232 # 4 bytes: link rev
222 233 # 20 bytes: parent 1 nodeid
223 234 # 20 bytes: parent 2 nodeid
224 235 # 20 bytes: nodeid
225 236 indexformatv0 = struct.Struct(">4l20s20s20s")
226 237 indexformatv0_pack = indexformatv0.pack
227 238 indexformatv0_unpack = indexformatv0.unpack
228 239
229 240 class revlogoldio(object):
230 241 def __init__(self):
231 242 self.size = indexformatv0.size
232 243
233 244 def parseindex(self, data, inline):
234 245 s = self.size
235 246 index = []
236 247 nodemap = {nullid: nullrev}
237 248 n = off = 0
238 249 l = len(data)
239 250 while off + s <= l:
240 251 cur = data[off:off + s]
241 252 off += s
242 253 e = indexformatv0_unpack(cur)
243 254 # transform to revlogv1 format
244 255 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
245 256 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
246 257 index.append(e2)
247 258 nodemap[e[6]] = n
248 259 n += 1
249 260
250 261 # add the magic null revision at -1
251 262 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
252 263
253 264 return index, nodemap, None
254 265
255 266 def packentry(self, entry, node, version, rev):
256 267 if gettype(entry[0]):
257 268 raise RevlogError(_('index entry flags need revlog version 1'))
258 269 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
259 270 node(entry[5]), node(entry[6]), entry[7])
260 271 return indexformatv0_pack(*e2)
261 272
262 273 # index ng:
263 274 # 6 bytes: offset
264 275 # 2 bytes: flags
265 276 # 4 bytes: compressed length
266 277 # 4 bytes: uncompressed length
267 278 # 4 bytes: base rev
268 279 # 4 bytes: link rev
269 280 # 4 bytes: parent 1 rev
270 281 # 4 bytes: parent 2 rev
271 282 # 32 bytes: nodeid
272 283 indexformatng = struct.Struct(">Qiiiiii20s12x")
273 284 indexformatng_pack = indexformatng.pack
274 285 versionformat = struct.Struct(">I")
275 286 versionformat_pack = versionformat.pack
276 287 versionformat_unpack = versionformat.unpack
277 288
278 289 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
279 290 # signed integer)
280 291 _maxentrysize = 0x7fffffff
281 292
282 293 class revlogio(object):
283 294 def __init__(self):
284 295 self.size = indexformatng.size
285 296
286 297 def parseindex(self, data, inline):
287 298 # call the C implementation to parse the index data
288 299 index, cache = parsers.parse_index2(data, inline)
289 300 return index, getattr(index, 'nodemap', None), cache
290 301
291 302 def packentry(self, entry, node, version, rev):
292 303 p = indexformatng_pack(*entry)
293 304 if rev == 0:
294 305 p = versionformat_pack(version) + p[4:]
295 306 return p
296 307
297 308 class revlog(object):
298 309 """
299 310 the underlying revision storage object
300 311
301 312 A revlog consists of two parts, an index and the revision data.
302 313
303 314 The index is a file with a fixed record size containing
304 315 information on each revision, including its nodeid (hash), the
305 316 nodeids of its parents, the position and offset of its data within
306 317 the data file, and the revision it's based on. Finally, each entry
307 318 contains a linkrev entry that can serve as a pointer to external
308 319 data.
309 320
310 321 The revision data itself is a linear collection of data chunks.
311 322 Each chunk represents a revision and is usually represented as a
312 323 delta against the previous chunk. To bound lookup time, runs of
313 324 deltas are limited to about 2 times the length of the original
314 325 version data. This makes retrieval of a version proportional to
315 326 its size, or O(1) relative to the number of revisions.
316 327
317 328 Both pieces of the revlog are written to in an append-only
318 329 fashion, which means we never need to rewrite a file to insert or
319 330 remove data, and can use some simple techniques to avoid the need
320 331 for locking while reading.
321 332
322 333 If checkambig, indexfile is opened with checkambig=True at
323 334 writing, to avoid file stat ambiguity.
324 335
325 336 If mmaplargeindex is True, and an mmapindexthreshold is set, the
326 337 index will be mmapped rather than read if it is larger than the
327 338 configured threshold.
328 339 """
329 340 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
330 341 mmaplargeindex=False):
331 342 """
332 343 create a revlog object
333 344
334 345 opener is a function that abstracts the file opening operation
335 346 and can be used to implement COW semantics or the like.
336 347 """
337 348 self.indexfile = indexfile
338 349 self.datafile = datafile or (indexfile[:-2] + ".d")
339 350 self.opener = opener
340 351 # When True, indexfile is opened with checkambig=True at writing, to
341 352 # avoid file stat ambiguity.
342 353 self._checkambig = checkambig
343 354 # 3-tuple of (node, rev, text) for a raw revision.
344 355 self._cache = None
345 356 # Maps rev to chain base rev.
346 357 self._chainbasecache = util.lrucachedict(100)
347 358 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 359 self._chunkcache = (0, '')
349 360 # How much data to read and cache into the raw revlog data cache.
350 361 self._chunkcachesize = 65536
351 362 self._maxchainlen = None
352 363 self._aggressivemergedeltas = False
353 364 self.index = []
354 365 # Mapping of partial identifiers to full nodes.
355 366 self._pcache = {}
356 367 # Mapping of revision integer to full node.
357 368 self._nodecache = {nullid: nullrev}
358 369 self._nodepos = None
359 370 self._compengine = 'zlib'
360 371 self._maxdeltachainspan = -1
361 372 self._withsparseread = False
362 373 self._srdensitythreshold = 0.25
363 374 self._srminblocksize = 262144
364 375
365 376 mmapindexthreshold = None
366 377 v = REVLOG_DEFAULT_VERSION
367 378 opts = getattr(opener, 'options', None)
368 379 if opts is not None:
369 380 if 'revlogv2' in opts:
370 381 # version 2 revlogs always use generaldelta.
371 382 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
372 383 elif 'revlogv1' in opts:
373 384 if 'generaldelta' in opts:
374 385 v |= FLAG_GENERALDELTA
375 386 else:
376 387 v = 0
377 388 if 'chunkcachesize' in opts:
378 389 self._chunkcachesize = opts['chunkcachesize']
379 390 if 'maxchainlen' in opts:
380 391 self._maxchainlen = opts['maxchainlen']
381 392 if 'aggressivemergedeltas' in opts:
382 393 self._aggressivemergedeltas = opts['aggressivemergedeltas']
383 394 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 395 if 'compengine' in opts:
385 396 self._compengine = opts['compengine']
386 397 if 'maxdeltachainspan' in opts:
387 398 self._maxdeltachainspan = opts['maxdeltachainspan']
388 399 if mmaplargeindex and 'mmapindexthreshold' in opts:
389 400 mmapindexthreshold = opts['mmapindexthreshold']
390 401 self._withsparseread = bool(opts.get('with-sparse-read', False))
391 402 if 'sparse-read-density-threshold' in opts:
392 403 self._srdensitythreshold = opts['sparse-read-density-threshold']
393 404 if 'sparse-read-min-block-size' in opts:
394 405 self._srminblocksize = opts['sparse-read-min-block-size']
395 406
396 407 if self._chunkcachesize <= 0:
397 408 raise RevlogError(_('revlog chunk cache size %r is not greater '
398 409 'than 0') % self._chunkcachesize)
399 410 elif self._chunkcachesize & (self._chunkcachesize - 1):
400 411 raise RevlogError(_('revlog chunk cache size %r is not a power '
401 412 'of 2') % self._chunkcachesize)
402 413
403 414 indexdata = ''
404 415 self._initempty = True
405 416 try:
406 417 f = self.opener(self.indexfile)
407 418 if (mmapindexthreshold is not None and
408 419 self.opener.fstat(f).st_size >= mmapindexthreshold):
409 420 indexdata = util.buffer(util.mmapread(f))
410 421 else:
411 422 indexdata = f.read()
412 423 f.close()
413 424 if len(indexdata) > 0:
414 425 v = versionformat_unpack(indexdata[:4])[0]
415 426 self._initempty = False
416 427 except IOError as inst:
417 428 if inst.errno != errno.ENOENT:
418 429 raise
419 430
420 431 self.version = v
421 432 self._inline = v & FLAG_INLINE_DATA
422 433 self._generaldelta = v & FLAG_GENERALDELTA
423 434 flags = v & ~0xFFFF
424 435 fmt = v & 0xFFFF
425 436 if fmt == REVLOGV0:
426 437 if flags:
427 438 raise RevlogError(_('unknown flags (%#04x) in version %d '
428 439 'revlog %s') %
429 440 (flags >> 16, fmt, self.indexfile))
430 441 elif fmt == REVLOGV1:
431 442 if flags & ~REVLOGV1_FLAGS:
432 443 raise RevlogError(_('unknown flags (%#04x) in version %d '
433 444 'revlog %s') %
434 445 (flags >> 16, fmt, self.indexfile))
435 446 elif fmt == REVLOGV2:
436 447 if flags & ~REVLOGV2_FLAGS:
437 448 raise RevlogError(_('unknown flags (%#04x) in version %d '
438 449 'revlog %s') %
439 450 (flags >> 16, fmt, self.indexfile))
440 451 else:
441 452 raise RevlogError(_('unknown version (%d) in revlog %s') %
442 453 (fmt, self.indexfile))
443 454
444 455 self.storedeltachains = True
445 456
446 457 self._io = revlogio()
447 458 if self.version == REVLOGV0:
448 459 self._io = revlogoldio()
449 460 try:
450 461 d = self._io.parseindex(indexdata, self._inline)
451 462 except (ValueError, IndexError):
452 463 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
453 464 self.index, nodemap, self._chunkcache = d
454 465 if nodemap is not None:
455 466 self.nodemap = self._nodecache = nodemap
456 467 if not self._chunkcache:
457 468 self._chunkclear()
458 469 # revnum -> (chain-length, sum-delta-length)
459 470 self._chaininfocache = {}
460 471 # revlog header -> revlog compressor
461 472 self._decompressors = {}
462 473
463 474 @util.propertycache
464 475 def _compressor(self):
465 476 return util.compengines[self._compengine].revlogcompressor()
466 477
467 478 def tip(self):
468 479 return self.node(len(self.index) - 2)
469 480 def __contains__(self, rev):
470 481 return 0 <= rev < len(self)
471 482 def __len__(self):
472 483 return len(self.index) - 1
473 484 def __iter__(self):
474 485 return iter(xrange(len(self)))
475 486 def revs(self, start=0, stop=None):
476 487 """iterate over all rev in this revlog (from start to stop)"""
477 488 step = 1
478 489 if stop is not None:
479 490 if start > stop:
480 491 step = -1
481 492 stop += step
482 493 else:
483 494 stop = len(self)
484 495 return xrange(start, stop, step)
485 496
486 497 @util.propertycache
487 498 def nodemap(self):
488 499 self.rev(self.node(0))
489 500 return self._nodecache
490 501
491 502 def hasnode(self, node):
492 503 try:
493 504 self.rev(node)
494 505 return True
495 506 except KeyError:
496 507 return False
497 508
498 509 def clearcaches(self):
499 510 self._cache = None
500 511 self._chainbasecache.clear()
501 512 self._chunkcache = (0, '')
502 513 self._pcache = {}
503 514
504 515 try:
505 516 self._nodecache.clearcaches()
506 517 except AttributeError:
507 518 self._nodecache = {nullid: nullrev}
508 519 self._nodepos = None
509 520
510 521 def rev(self, node):
511 522 try:
512 523 return self._nodecache[node]
513 524 except TypeError:
514 525 raise
515 526 except RevlogError:
516 527 # parsers.c radix tree lookup failed
517 528 if node == wdirid:
518 529 raise error.WdirUnsupported
519 530 raise LookupError(node, self.indexfile, _('no node'))
520 531 except KeyError:
521 532 # pure python cache lookup failed
522 533 n = self._nodecache
523 534 i = self.index
524 535 p = self._nodepos
525 536 if p is None:
526 537 p = len(i) - 2
527 538 for r in xrange(p, -1, -1):
528 539 v = i[r][7]
529 540 n[v] = r
530 541 if v == node:
531 542 self._nodepos = r - 1
532 543 return r
533 544 if node == wdirid:
534 545 raise error.WdirUnsupported
535 546 raise LookupError(node, self.indexfile, _('no node'))
536 547
537 548 # Accessors for index entries.
538 549
539 550 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
540 551 # are flags.
541 552 def start(self, rev):
542 553 return int(self.index[rev][0] >> 16)
543 554
544 555 def flags(self, rev):
545 556 return self.index[rev][0] & 0xFFFF
546 557
547 558 def length(self, rev):
548 559 return self.index[rev][1]
549 560
550 561 def rawsize(self, rev):
551 562 """return the length of the uncompressed text for a given revision"""
552 563 l = self.index[rev][2]
553 564 if l >= 0:
554 565 return l
555 566
556 567 t = self.revision(rev, raw=True)
557 568 return len(t)
558 569
559 570 def size(self, rev):
560 571 """length of non-raw text (processed by a "read" flag processor)"""
561 572 # fast path: if no "read" flag processor could change the content,
562 573 # size is rawsize. note: ELLIPSIS is known to not change the content.
563 574 flags = self.flags(rev)
564 575 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
565 576 return self.rawsize(rev)
566 577
567 578 return len(self.revision(rev, raw=False))
568 579
569 580 def chainbase(self, rev):
570 581 base = self._chainbasecache.get(rev)
571 582 if base is not None:
572 583 return base
573 584
574 585 index = self.index
575 586 base = index[rev][3]
576 587 while base != rev:
577 588 rev = base
578 589 base = index[rev][3]
579 590
580 591 self._chainbasecache[rev] = base
581 592 return base
582 593
583 594 def linkrev(self, rev):
584 595 return self.index[rev][4]
585 596
586 597 def parentrevs(self, rev):
587 598 try:
588 599 return self.index[rev][5:7]
589 600 except IndexError:
590 601 if rev == wdirrev:
591 602 raise error.WdirUnsupported
592 603 raise
593 604
594 605 def node(self, rev):
595 606 try:
596 607 return self.index[rev][7]
597 608 except IndexError:
598 609 if rev == wdirrev:
599 610 raise error.WdirUnsupported
600 611 raise
601 612
602 613 # Derived from index values.
603 614
604 615 def end(self, rev):
605 616 return self.start(rev) + self.length(rev)
606 617
607 618 def parents(self, node):
608 619 i = self.index
609 620 d = i[self.rev(node)]
610 621 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
611 622
612 623 def chainlen(self, rev):
613 624 return self._chaininfo(rev)[0]
614 625
615 626 def _chaininfo(self, rev):
616 627 chaininfocache = self._chaininfocache
617 628 if rev in chaininfocache:
618 629 return chaininfocache[rev]
619 630 index = self.index
620 631 generaldelta = self._generaldelta
621 632 iterrev = rev
622 633 e = index[iterrev]
623 634 clen = 0
624 635 compresseddeltalen = 0
625 636 while iterrev != e[3]:
626 637 clen += 1
627 638 compresseddeltalen += e[1]
628 639 if generaldelta:
629 640 iterrev = e[3]
630 641 else:
631 642 iterrev -= 1
632 643 if iterrev in chaininfocache:
633 644 t = chaininfocache[iterrev]
634 645 clen += t[0]
635 646 compresseddeltalen += t[1]
636 647 break
637 648 e = index[iterrev]
638 649 else:
639 650 # Add text length of base since decompressing that also takes
640 651 # work. For cache hits the length is already included.
641 652 compresseddeltalen += e[1]
642 653 r = (clen, compresseddeltalen)
643 654 chaininfocache[rev] = r
644 655 return r
645 656
646 657 def _deltachain(self, rev, stoprev=None):
647 658 """Obtain the delta chain for a revision.
648 659
649 660 ``stoprev`` specifies a revision to stop at. If not specified, we
650 661 stop at the base of the chain.
651 662
652 663 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
653 664 revs in ascending order and ``stopped`` is a bool indicating whether
654 665 ``stoprev`` was hit.
655 666 """
656 667 # Try C implementation.
657 668 try:
658 669 return self.index.deltachain(rev, stoprev, self._generaldelta)
659 670 except AttributeError:
660 671 pass
661 672
662 673 chain = []
663 674
664 675 # Alias to prevent attribute lookup in tight loop.
665 676 index = self.index
666 677 generaldelta = self._generaldelta
667 678
668 679 iterrev = rev
669 680 e = index[iterrev]
670 681 while iterrev != e[3] and iterrev != stoprev:
671 682 chain.append(iterrev)
672 683 if generaldelta:
673 684 iterrev = e[3]
674 685 else:
675 686 iterrev -= 1
676 687 e = index[iterrev]
677 688
678 689 if iterrev == stoprev:
679 690 stopped = True
680 691 else:
681 692 chain.append(iterrev)
682 693 stopped = False
683 694
684 695 chain.reverse()
685 696 return chain, stopped
686 697
687 698 def ancestors(self, revs, stoprev=0, inclusive=False):
688 699 """Generate the ancestors of 'revs' in reverse topological order.
689 700 Does not generate revs lower than stoprev.
690 701
691 702 See the documentation for ancestor.lazyancestors for more details."""
692 703
693 704 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
694 705 inclusive=inclusive)
695 706
696 707 def descendants(self, revs):
697 708 """Generate the descendants of 'revs' in revision order.
698 709
699 710 Yield a sequence of revision numbers starting with a child of
700 711 some rev in revs, i.e., each revision is *not* considered a
701 712 descendant of itself. Results are ordered by revision number (a
702 713 topological sort)."""
703 714 first = min(revs)
704 715 if first == nullrev:
705 716 for i in self:
706 717 yield i
707 718 return
708 719
709 720 seen = set(revs)
710 721 for i in self.revs(start=first + 1):
711 722 for x in self.parentrevs(i):
712 723 if x != nullrev and x in seen:
713 724 seen.add(i)
714 725 yield i
715 726 break
716 727
717 728 def findcommonmissing(self, common=None, heads=None):
718 729 """Return a tuple of the ancestors of common and the ancestors of heads
719 730 that are not ancestors of common. In revset terminology, we return the
720 731 tuple:
721 732
722 733 ::common, (::heads) - (::common)
723 734
724 735 The list is sorted by revision number, meaning it is
725 736 topologically sorted.
726 737
727 738 'heads' and 'common' are both lists of node IDs. If heads is
728 739 not supplied, uses all of the revlog's heads. If common is not
729 740 supplied, uses nullid."""
730 741 if common is None:
731 742 common = [nullid]
732 743 if heads is None:
733 744 heads = self.heads()
734 745
735 746 common = [self.rev(n) for n in common]
736 747 heads = [self.rev(n) for n in heads]
737 748
738 749 # we want the ancestors, but inclusive
739 750 class lazyset(object):
740 751 def __init__(self, lazyvalues):
741 752 self.addedvalues = set()
742 753 self.lazyvalues = lazyvalues
743 754
744 755 def __contains__(self, value):
745 756 return value in self.addedvalues or value in self.lazyvalues
746 757
747 758 def __iter__(self):
748 759 added = self.addedvalues
749 760 for r in added:
750 761 yield r
751 762 for r in self.lazyvalues:
752 763 if not r in added:
753 764 yield r
754 765
755 766 def add(self, value):
756 767 self.addedvalues.add(value)
757 768
758 769 def update(self, values):
759 770 self.addedvalues.update(values)
760 771
761 772 has = lazyset(self.ancestors(common))
762 773 has.add(nullrev)
763 774 has.update(common)
764 775
765 776 # take all ancestors from heads that aren't in has
766 777 missing = set()
767 778 visit = collections.deque(r for r in heads if r not in has)
768 779 while visit:
769 780 r = visit.popleft()
770 781 if r in missing:
771 782 continue
772 783 else:
773 784 missing.add(r)
774 785 for p in self.parentrevs(r):
775 786 if p not in has:
776 787 visit.append(p)
777 788 missing = list(missing)
778 789 missing.sort()
779 790 return has, [self.node(miss) for miss in missing]
780 791
781 792 def incrementalmissingrevs(self, common=None):
782 793 """Return an object that can be used to incrementally compute the
783 794 revision numbers of the ancestors of arbitrary sets that are not
784 795 ancestors of common. This is an ancestor.incrementalmissingancestors
785 796 object.
786 797
787 798 'common' is a list of revision numbers. If common is not supplied, uses
788 799 nullrev.
789 800 """
790 801 if common is None:
791 802 common = [nullrev]
792 803
793 804 return ancestor.incrementalmissingancestors(self.parentrevs, common)
794 805
795 806 def findmissingrevs(self, common=None, heads=None):
796 807 """Return the revision numbers of the ancestors of heads that
797 808 are not ancestors of common.
798 809
799 810 More specifically, return a list of revision numbers corresponding to
800 811 nodes N such that every N satisfies the following constraints:
801 812
802 813 1. N is an ancestor of some node in 'heads'
803 814 2. N is not an ancestor of any node in 'common'
804 815
805 816 The list is sorted by revision number, meaning it is
806 817 topologically sorted.
807 818
808 819 'heads' and 'common' are both lists of revision numbers. If heads is
809 820 not supplied, uses all of the revlog's heads. If common is not
810 821 supplied, uses nullid."""
811 822 if common is None:
812 823 common = [nullrev]
813 824 if heads is None:
814 825 heads = self.headrevs()
815 826
816 827 inc = self.incrementalmissingrevs(common=common)
817 828 return inc.missingancestors(heads)
818 829
819 830 def findmissing(self, common=None, heads=None):
820 831 """Return the ancestors of heads that are not ancestors of common.
821 832
822 833 More specifically, return a list of nodes N such that every N
823 834 satisfies the following constraints:
824 835
825 836 1. N is an ancestor of some node in 'heads'
826 837 2. N is not an ancestor of any node in 'common'
827 838
828 839 The list is sorted by revision number, meaning it is
829 840 topologically sorted.
830 841
831 842 'heads' and 'common' are both lists of node IDs. If heads is
832 843 not supplied, uses all of the revlog's heads. If common is not
833 844 supplied, uses nullid."""
834 845 if common is None:
835 846 common = [nullid]
836 847 if heads is None:
837 848 heads = self.heads()
838 849
839 850 common = [self.rev(n) for n in common]
840 851 heads = [self.rev(n) for n in heads]
841 852
842 853 inc = self.incrementalmissingrevs(common=common)
843 854 return [self.node(r) for r in inc.missingancestors(heads)]
844 855
845 856 def nodesbetween(self, roots=None, heads=None):
846 857 """Return a topological path from 'roots' to 'heads'.
847 858
848 859 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
849 860 topologically sorted list of all nodes N that satisfy both of
850 861 these constraints:
851 862
852 863 1. N is a descendant of some node in 'roots'
853 864 2. N is an ancestor of some node in 'heads'
854 865
855 866 Every node is considered to be both a descendant and an ancestor
856 867 of itself, so every reachable node in 'roots' and 'heads' will be
857 868 included in 'nodes'.
858 869
859 870 'outroots' is the list of reachable nodes in 'roots', i.e., the
860 871 subset of 'roots' that is returned in 'nodes'. Likewise,
861 872 'outheads' is the subset of 'heads' that is also in 'nodes'.
862 873
863 874 'roots' and 'heads' are both lists of node IDs. If 'roots' is
864 875 unspecified, uses nullid as the only root. If 'heads' is
865 876 unspecified, uses list of all of the revlog's heads."""
866 877 nonodes = ([], [], [])
867 878 if roots is not None:
868 879 roots = list(roots)
869 880 if not roots:
870 881 return nonodes
871 882 lowestrev = min([self.rev(n) for n in roots])
872 883 else:
873 884 roots = [nullid] # Everybody's a descendant of nullid
874 885 lowestrev = nullrev
875 886 if (lowestrev == nullrev) and (heads is None):
876 887 # We want _all_ the nodes!
877 888 return ([self.node(r) for r in self], [nullid], list(self.heads()))
878 889 if heads is None:
879 890 # All nodes are ancestors, so the latest ancestor is the last
880 891 # node.
881 892 highestrev = len(self) - 1
882 893 # Set ancestors to None to signal that every node is an ancestor.
883 894 ancestors = None
884 895 # Set heads to an empty dictionary for later discovery of heads
885 896 heads = {}
886 897 else:
887 898 heads = list(heads)
888 899 if not heads:
889 900 return nonodes
890 901 ancestors = set()
891 902 # Turn heads into a dictionary so we can remove 'fake' heads.
892 903 # Also, later we will be using it to filter out the heads we can't
893 904 # find from roots.
894 905 heads = dict.fromkeys(heads, False)
895 906 # Start at the top and keep marking parents until we're done.
896 907 nodestotag = set(heads)
897 908 # Remember where the top was so we can use it as a limit later.
898 909 highestrev = max([self.rev(n) for n in nodestotag])
899 910 while nodestotag:
900 911 # grab a node to tag
901 912 n = nodestotag.pop()
902 913 # Never tag nullid
903 914 if n == nullid:
904 915 continue
905 916 # A node's revision number represents its place in a
906 917 # topologically sorted list of nodes.
907 918 r = self.rev(n)
908 919 if r >= lowestrev:
909 920 if n not in ancestors:
910 921 # If we are possibly a descendant of one of the roots
911 922 # and we haven't already been marked as an ancestor
912 923 ancestors.add(n) # Mark as ancestor
913 924 # Add non-nullid parents to list of nodes to tag.
914 925 nodestotag.update([p for p in self.parents(n) if
915 926 p != nullid])
916 927 elif n in heads: # We've seen it before, is it a fake head?
917 928 # So it is, real heads should not be the ancestors of
918 929 # any other heads.
919 930 heads.pop(n)
920 931 if not ancestors:
921 932 return nonodes
922 933 # Now that we have our set of ancestors, we want to remove any
923 934 # roots that are not ancestors.
924 935
925 936 # If one of the roots was nullid, everything is included anyway.
926 937 if lowestrev > nullrev:
927 938 # But, since we weren't, let's recompute the lowest rev to not
928 939 # include roots that aren't ancestors.
929 940
930 941 # Filter out roots that aren't ancestors of heads
931 942 roots = [root for root in roots if root in ancestors]
932 943 # Recompute the lowest revision
933 944 if roots:
934 945 lowestrev = min([self.rev(root) for root in roots])
935 946 else:
936 947 # No more roots? Return empty list
937 948 return nonodes
938 949 else:
939 950 # We are descending from nullid, and don't need to care about
940 951 # any other roots.
941 952 lowestrev = nullrev
942 953 roots = [nullid]
943 954 # Transform our roots list into a set.
944 955 descendants = set(roots)
945 956 # Also, keep the original roots so we can filter out roots that aren't
946 957 # 'real' roots (i.e. are descended from other roots).
947 958 roots = descendants.copy()
948 959 # Our topologically sorted list of output nodes.
949 960 orderedout = []
950 961 # Don't start at nullid since we don't want nullid in our output list,
951 962 # and if nullid shows up in descendants, empty parents will look like
952 963 # they're descendants.
953 964 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
954 965 n = self.node(r)
955 966 isdescendant = False
956 967 if lowestrev == nullrev: # Everybody is a descendant of nullid
957 968 isdescendant = True
958 969 elif n in descendants:
959 970 # n is already a descendant
960 971 isdescendant = True
961 972 # This check only needs to be done here because all the roots
962 973 # will start being marked is descendants before the loop.
963 974 if n in roots:
964 975 # If n was a root, check if it's a 'real' root.
965 976 p = tuple(self.parents(n))
966 977 # If any of its parents are descendants, it's not a root.
967 978 if (p[0] in descendants) or (p[1] in descendants):
968 979 roots.remove(n)
969 980 else:
970 981 p = tuple(self.parents(n))
971 982 # A node is a descendant if either of its parents are
972 983 # descendants. (We seeded the dependents list with the roots
973 984 # up there, remember?)
974 985 if (p[0] in descendants) or (p[1] in descendants):
975 986 descendants.add(n)
976 987 isdescendant = True
977 988 if isdescendant and ((ancestors is None) or (n in ancestors)):
978 989 # Only include nodes that are both descendants and ancestors.
979 990 orderedout.append(n)
980 991 if (ancestors is not None) and (n in heads):
981 992 # We're trying to figure out which heads are reachable
982 993 # from roots.
983 994 # Mark this head as having been reached
984 995 heads[n] = True
985 996 elif ancestors is None:
986 997 # Otherwise, we're trying to discover the heads.
987 998 # Assume this is a head because if it isn't, the next step
988 999 # will eventually remove it.
989 1000 heads[n] = True
990 1001 # But, obviously its parents aren't.
991 1002 for p in self.parents(n):
992 1003 heads.pop(p, None)
993 1004 heads = [head for head, flag in heads.iteritems() if flag]
994 1005 roots = list(roots)
995 1006 assert orderedout
996 1007 assert roots
997 1008 assert heads
998 1009 return (orderedout, roots, heads)
999 1010
1000 1011 def headrevs(self):
1001 1012 try:
1002 1013 return self.index.headrevs()
1003 1014 except AttributeError:
1004 1015 return self._headrevs()
1005 1016
1006 1017 def computephases(self, roots):
1007 1018 return self.index.computephasesmapsets(roots)
1008 1019
1009 1020 def _headrevs(self):
1010 1021 count = len(self)
1011 1022 if not count:
1012 1023 return [nullrev]
1013 1024 # we won't iter over filtered rev so nobody is a head at start
1014 1025 ishead = [0] * (count + 1)
1015 1026 index = self.index
1016 1027 for r in self:
1017 1028 ishead[r] = 1 # I may be an head
1018 1029 e = index[r]
1019 1030 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1020 1031 return [r for r, val in enumerate(ishead) if val]
1021 1032
1022 1033 def heads(self, start=None, stop=None):
1023 1034 """return the list of all nodes that have no children
1024 1035
1025 1036 if start is specified, only heads that are descendants of
1026 1037 start will be returned
1027 1038 if stop is specified, it will consider all the revs from stop
1028 1039 as if they had no children
1029 1040 """
1030 1041 if start is None and stop is None:
1031 1042 if not len(self):
1032 1043 return [nullid]
1033 1044 return [self.node(r) for r in self.headrevs()]
1034 1045
1035 1046 if start is None:
1036 1047 start = nullid
1037 1048 if stop is None:
1038 1049 stop = []
1039 1050 stoprevs = set([self.rev(n) for n in stop])
1040 1051 startrev = self.rev(start)
1041 1052 reachable = {startrev}
1042 1053 heads = {startrev}
1043 1054
1044 1055 parentrevs = self.parentrevs
1045 1056 for r in self.revs(start=startrev + 1):
1046 1057 for p in parentrevs(r):
1047 1058 if p in reachable:
1048 1059 if r not in stoprevs:
1049 1060 reachable.add(r)
1050 1061 heads.add(r)
1051 1062 if p in heads and p not in stoprevs:
1052 1063 heads.remove(p)
1053 1064
1054 1065 return [self.node(r) for r in heads]
1055 1066
1056 1067 def children(self, node):
1057 1068 """find the children of a given node"""
1058 1069 c = []
1059 1070 p = self.rev(node)
1060 1071 for r in self.revs(start=p + 1):
1061 1072 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1062 1073 if prevs:
1063 1074 for pr in prevs:
1064 1075 if pr == p:
1065 1076 c.append(self.node(r))
1066 1077 elif p == nullrev:
1067 1078 c.append(self.node(r))
1068 1079 return c
1069 1080
1070 1081 def descendant(self, start, end):
1071 1082 if start == nullrev:
1072 1083 return True
1073 1084 for i in self.descendants([start]):
1074 1085 if i == end:
1075 1086 return True
1076 1087 elif i > end:
1077 1088 break
1078 1089 return False
1079 1090
1080 1091 def commonancestorsheads(self, a, b):
1081 1092 """calculate all the heads of the common ancestors of nodes a and b"""
1082 1093 a, b = self.rev(a), self.rev(b)
1083 1094 try:
1084 1095 ancs = self.index.commonancestorsheads(a, b)
1085 1096 except (AttributeError, OverflowError): # C implementation failed
1086 1097 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1087 1098 return pycompat.maplist(self.node, ancs)
1088 1099
1089 1100 def isancestor(self, a, b):
1090 1101 """return True if node a is an ancestor of node b
1091 1102
1092 1103 The implementation of this is trivial but the use of
1093 1104 commonancestorsheads is not."""
1094 1105 return a in self.commonancestorsheads(a, b)
1095 1106
1096 1107 def ancestor(self, a, b):
1097 1108 """calculate the "best" common ancestor of nodes a and b"""
1098 1109
1099 1110 a, b = self.rev(a), self.rev(b)
1100 1111 try:
1101 1112 ancs = self.index.ancestors(a, b)
1102 1113 except (AttributeError, OverflowError):
1103 1114 ancs = ancestor.ancestors(self.parentrevs, a, b)
1104 1115 if ancs:
1105 1116 # choose a consistent winner when there's a tie
1106 1117 return min(map(self.node, ancs))
1107 1118 return nullid
1108 1119
1109 1120 def _match(self, id):
1110 1121 if isinstance(id, int):
1111 1122 # rev
1112 1123 return self.node(id)
1113 1124 if len(id) == 20:
1114 1125 # possibly a binary node
1115 1126 # odds of a binary node being all hex in ASCII are 1 in 10**25
1116 1127 try:
1117 1128 node = id
1118 1129 self.rev(node) # quick search the index
1119 1130 return node
1120 1131 except LookupError:
1121 1132 pass # may be partial hex id
1122 1133 try:
1123 1134 # str(rev)
1124 1135 rev = int(id)
1125 1136 if str(rev) != id:
1126 1137 raise ValueError
1127 1138 if rev < 0:
1128 1139 rev = len(self) + rev
1129 1140 if rev < 0 or rev >= len(self):
1130 1141 raise ValueError
1131 1142 return self.node(rev)
1132 1143 except (ValueError, OverflowError):
1133 1144 pass
1134 1145 if len(id) == 40:
1135 1146 try:
1136 1147 # a full hex nodeid?
1137 1148 node = bin(id)
1138 1149 self.rev(node)
1139 1150 return node
1140 1151 except (TypeError, LookupError):
1141 1152 pass
1142 1153
1143 1154 def _partialmatch(self, id):
1144 1155 maybewdir = wdirhex.startswith(id)
1145 1156 try:
1146 1157 partial = self.index.partialmatch(id)
1147 1158 if partial and self.hasnode(partial):
1148 1159 if maybewdir:
1149 1160 # single 'ff...' match in radix tree, ambiguous with wdir
1150 1161 raise RevlogError
1151 1162 return partial
1152 1163 if maybewdir:
1153 1164 # no 'ff...' match in radix tree, wdir identified
1154 1165 raise error.WdirUnsupported
1155 1166 return None
1156 1167 except RevlogError:
1157 1168 # parsers.c radix tree lookup gave multiple matches
1158 1169 # fast path: for unfiltered changelog, radix tree is accurate
1159 1170 if not getattr(self, 'filteredrevs', None):
1160 1171 raise LookupError(id, self.indexfile,
1161 1172 _('ambiguous identifier'))
1162 1173 # fall through to slow path that filters hidden revisions
1163 1174 except (AttributeError, ValueError):
1164 1175 # we are pure python, or key was too short to search radix tree
1165 1176 pass
1166 1177
1167 1178 if id in self._pcache:
1168 1179 return self._pcache[id]
1169 1180
1170 1181 if len(id) < 40:
1171 1182 try:
1172 1183 # hex(node)[:...]
1173 1184 l = len(id) // 2 # grab an even number of digits
1174 1185 prefix = bin(id[:l * 2])
1175 1186 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1176 1187 nl = [n for n in nl if hex(n).startswith(id) and
1177 1188 self.hasnode(n)]
1178 1189 if len(nl) > 0:
1179 1190 if len(nl) == 1 and not maybewdir:
1180 1191 self._pcache[id] = nl[0]
1181 1192 return nl[0]
1182 1193 raise LookupError(id, self.indexfile,
1183 1194 _('ambiguous identifier'))
1184 1195 if maybewdir:
1185 1196 raise error.WdirUnsupported
1186 1197 return None
1187 1198 except (TypeError, binascii.Error):
1188 1199 pass
1189 1200
1190 1201 def lookup(self, id):
1191 1202 """locate a node based on:
1192 1203 - revision number or str(revision number)
1193 1204 - nodeid or subset of hex nodeid
1194 1205 """
1195 1206 n = self._match(id)
1196 1207 if n is not None:
1197 1208 return n
1198 1209 n = self._partialmatch(id)
1199 1210 if n:
1200 1211 return n
1201 1212
1202 1213 raise LookupError(id, self.indexfile, _('no match found'))
1203 1214
1204 1215 def shortest(self, hexnode, minlength=1):
1205 1216 """Find the shortest unambiguous prefix that matches hexnode."""
1206 1217 def isvalid(test):
1207 1218 try:
1208 1219 if self._partialmatch(test) is None:
1209 1220 return False
1210 1221
1211 1222 try:
1212 1223 i = int(test)
1213 1224 # if we are a pure int, then starting with zero will not be
1214 1225 # confused as a rev; or, obviously, if the int is larger
1215 1226 # than the value of the tip rev
1216 1227 if test[0] == '0' or i > len(self):
1217 1228 return True
1218 1229 return False
1219 1230 except ValueError:
1220 1231 return True
1221 1232 except error.RevlogError:
1222 1233 return False
1223 1234 except error.WdirUnsupported:
1224 1235 # single 'ff...' match
1225 1236 return True
1226 1237
1227 1238 shortest = hexnode
1228 1239 startlength = max(6, minlength)
1229 1240 length = startlength
1230 1241 while True:
1231 1242 test = hexnode[:length]
1232 1243 if isvalid(test):
1233 1244 shortest = test
1234 1245 if length == minlength or length > startlength:
1235 1246 return shortest
1236 1247 length -= 1
1237 1248 else:
1238 1249 length += 1
1239 1250 if len(shortest) <= length:
1240 1251 return shortest
1241 1252
1242 1253 def cmp(self, node, text):
1243 1254 """compare text with a given file revision
1244 1255
1245 1256 returns True if text is different than what is stored.
1246 1257 """
1247 1258 p1, p2 = self.parents(node)
1248 1259 return hash(text, p1, p2) != node
1249 1260
1250 1261 def _cachesegment(self, offset, data):
1251 1262 """Add a segment to the revlog cache.
1252 1263
1253 1264 Accepts an absolute offset and the data that is at that location.
1254 1265 """
1255 1266 o, d = self._chunkcache
1256 1267 # try to add to existing cache
1257 1268 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1258 1269 self._chunkcache = o, d + data
1259 1270 else:
1260 1271 self._chunkcache = offset, data
1261 1272
1262 1273 def _readsegment(self, offset, length, df=None):
1263 1274 """Load a segment of raw data from the revlog.
1264 1275
1265 1276 Accepts an absolute offset, length to read, and an optional existing
1266 1277 file handle to read from.
1267 1278
1268 1279 If an existing file handle is passed, it will be seeked and the
1269 1280 original seek position will NOT be restored.
1270 1281
1271 1282 Returns a str or buffer of raw byte data.
1272 1283 """
1273 1284 if df is not None:
1274 1285 closehandle = False
1275 1286 else:
1276 1287 if self._inline:
1277 1288 df = self.opener(self.indexfile)
1278 1289 else:
1279 1290 df = self.opener(self.datafile)
1280 1291 closehandle = True
1281 1292
1282 1293 # Cache data both forward and backward around the requested
1283 1294 # data, in a fixed size window. This helps speed up operations
1284 1295 # involving reading the revlog backwards.
1285 1296 cachesize = self._chunkcachesize
1286 1297 realoffset = offset & ~(cachesize - 1)
1287 1298 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1288 1299 - realoffset)
1289 1300 df.seek(realoffset)
1290 1301 d = df.read(reallength)
1291 1302 if closehandle:
1292 1303 df.close()
1293 1304 self._cachesegment(realoffset, d)
1294 1305 if offset != realoffset or reallength != length:
1295 1306 return util.buffer(d, offset - realoffset, length)
1296 1307 return d
1297 1308
1298 1309 def _getsegment(self, offset, length, df=None):
1299 1310 """Obtain a segment of raw data from the revlog.
1300 1311
1301 1312 Accepts an absolute offset, length of bytes to obtain, and an
1302 1313 optional file handle to the already-opened revlog. If the file
1303 1314 handle is used, it's original seek position will not be preserved.
1304 1315
1305 1316 Requests for data may be returned from a cache.
1306 1317
1307 1318 Returns a str or a buffer instance of raw byte data.
1308 1319 """
1309 1320 o, d = self._chunkcache
1310 1321 l = len(d)
1311 1322
1312 1323 # is it in the cache?
1313 1324 cachestart = offset - o
1314 1325 cacheend = cachestart + length
1315 1326 if cachestart >= 0 and cacheend <= l:
1316 1327 if cachestart == 0 and cacheend == l:
1317 1328 return d # avoid a copy
1318 1329 return util.buffer(d, cachestart, cacheend - cachestart)
1319 1330
1320 1331 return self._readsegment(offset, length, df=df)
1321 1332
1322 1333 def _getsegmentforrevs(self, startrev, endrev, df=None):
1323 1334 """Obtain a segment of raw data corresponding to a range of revisions.
1324 1335
1325 1336 Accepts the start and end revisions and an optional already-open
1326 1337 file handle to be used for reading. If the file handle is read, its
1327 1338 seek position will not be preserved.
1328 1339
1329 1340 Requests for data may be satisfied by a cache.
1330 1341
1331 1342 Returns a 2-tuple of (offset, data) for the requested range of
1332 1343 revisions. Offset is the integer offset from the beginning of the
1333 1344 revlog and data is a str or buffer of the raw byte data.
1334 1345
1335 1346 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1336 1347 to determine where each revision's data begins and ends.
1337 1348 """
1338 1349 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1339 1350 # (functions are expensive).
1340 1351 index = self.index
1341 1352 istart = index[startrev]
1342 1353 start = int(istart[0] >> 16)
1343 1354 if startrev == endrev:
1344 1355 end = start + istart[1]
1345 1356 else:
1346 1357 iend = index[endrev]
1347 1358 end = int(iend[0] >> 16) + iend[1]
1348 1359
1349 1360 if self._inline:
1350 1361 start += (startrev + 1) * self._io.size
1351 1362 end += (endrev + 1) * self._io.size
1352 1363 length = end - start
1353 1364
1354 1365 return start, self._getsegment(start, length, df=df)
1355 1366
1356 1367 def _chunk(self, rev, df=None):
1357 1368 """Obtain a single decompressed chunk for a revision.
1358 1369
1359 1370 Accepts an integer revision and an optional already-open file handle
1360 1371 to be used for reading. If used, the seek position of the file will not
1361 1372 be preserved.
1362 1373
1363 1374 Returns a str holding uncompressed data for the requested revision.
1364 1375 """
1365 1376 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1366 1377
1367 1378 def _chunks(self, revs, df=None):
1368 1379 """Obtain decompressed chunks for the specified revisions.
1369 1380
1370 1381 Accepts an iterable of numeric revisions that are assumed to be in
1371 1382 ascending order. Also accepts an optional already-open file handle
1372 1383 to be used for reading. If used, the seek position of the file will
1373 1384 not be preserved.
1374 1385
1375 1386 This function is similar to calling ``self._chunk()`` multiple times,
1376 1387 but is faster.
1377 1388
1378 1389 Returns a list with decompressed data for each requested revision.
1379 1390 """
1380 1391 if not revs:
1381 1392 return []
1382 1393 start = self.start
1383 1394 length = self.length
1384 1395 inline = self._inline
1385 1396 iosize = self._io.size
1386 1397 buffer = util.buffer
1387 1398
1388 1399 l = []
1389 1400 ladd = l.append
1390 1401
1391 1402 if not self._withsparseread:
1392 1403 slicedchunks = (revs,)
1393 1404 else:
1394 1405 slicedchunks = _slicechunk(self, revs)
1395 1406
1396 1407 for revschunk in slicedchunks:
1397 1408 firstrev = revschunk[0]
1398 1409 # Skip trailing revisions with empty diff
1399 1410 for lastrev in revschunk[::-1]:
1400 1411 if length(lastrev) != 0:
1401 1412 break
1402 1413
1403 1414 try:
1404 1415 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1405 1416 except OverflowError:
1406 1417 # issue4215 - we can't cache a run of chunks greater than
1407 1418 # 2G on Windows
1408 1419 return [self._chunk(rev, df=df) for rev in revschunk]
1409 1420
1410 1421 decomp = self.decompress
1411 1422 for rev in revschunk:
1412 1423 chunkstart = start(rev)
1413 1424 if inline:
1414 1425 chunkstart += (rev + 1) * iosize
1415 1426 chunklength = length(rev)
1416 1427 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1417 1428
1418 1429 return l
1419 1430
1420 1431 def _chunkclear(self):
1421 1432 """Clear the raw chunk cache."""
1422 1433 self._chunkcache = (0, '')
1423 1434
1424 1435 def deltaparent(self, rev):
1425 1436 """return deltaparent of the given revision"""
1426 1437 base = self.index[rev][3]
1427 1438 if base == rev:
1428 1439 return nullrev
1429 1440 elif self._generaldelta:
1430 1441 return base
1431 1442 else:
1432 1443 return rev - 1
1433 1444
1434 1445 def revdiff(self, rev1, rev2):
1435 1446 """return or calculate a delta between two revisions
1436 1447
1437 1448 The delta calculated is in binary form and is intended to be written to
1438 1449 revlog data directly. So this function needs raw revision data.
1439 1450 """
1440 1451 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1441 1452 return bytes(self._chunk(rev2))
1442 1453
1443 1454 return mdiff.textdiff(self.revision(rev1, raw=True),
1444 1455 self.revision(rev2, raw=True))
1445 1456
1446 1457 def revision(self, nodeorrev, _df=None, raw=False):
1447 1458 """return an uncompressed revision of a given node or revision
1448 1459 number.
1449 1460
1450 1461 _df - an existing file handle to read from. (internal-only)
1451 1462 raw - an optional argument specifying if the revision data is to be
1452 1463 treated as raw data when applying flag transforms. 'raw' should be set
1453 1464 to True when generating changegroups or in debug commands.
1454 1465 """
1455 1466 if isinstance(nodeorrev, int):
1456 1467 rev = nodeorrev
1457 1468 node = self.node(rev)
1458 1469 else:
1459 1470 node = nodeorrev
1460 1471 rev = None
1461 1472
1462 1473 cachedrev = None
1463 1474 flags = None
1464 1475 rawtext = None
1465 1476 if node == nullid:
1466 1477 return ""
1467 1478 if self._cache:
1468 1479 if self._cache[0] == node:
1469 1480 # _cache only stores rawtext
1470 1481 if raw:
1471 1482 return self._cache[2]
1472 1483 # duplicated, but good for perf
1473 1484 if rev is None:
1474 1485 rev = self.rev(node)
1475 1486 if flags is None:
1476 1487 flags = self.flags(rev)
1477 1488 # no extra flags set, no flag processor runs, text = rawtext
1478 1489 if flags == REVIDX_DEFAULT_FLAGS:
1479 1490 return self._cache[2]
1480 1491 # rawtext is reusable. need to run flag processor
1481 1492 rawtext = self._cache[2]
1482 1493
1483 1494 cachedrev = self._cache[1]
1484 1495
1485 1496 # look up what we need to read
1486 1497 if rawtext is None:
1487 1498 if rev is None:
1488 1499 rev = self.rev(node)
1489 1500
1490 1501 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1491 1502 if stopped:
1492 1503 rawtext = self._cache[2]
1493 1504
1494 1505 # drop cache to save memory
1495 1506 self._cache = None
1496 1507
1497 1508 bins = self._chunks(chain, df=_df)
1498 1509 if rawtext is None:
1499 1510 rawtext = bytes(bins[0])
1500 1511 bins = bins[1:]
1501 1512
1502 1513 rawtext = mdiff.patches(rawtext, bins)
1503 1514 self._cache = (node, rev, rawtext)
1504 1515
1505 1516 if flags is None:
1506 1517 if rev is None:
1507 1518 rev = self.rev(node)
1508 1519 flags = self.flags(rev)
1509 1520
1510 1521 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1511 1522 if validatehash:
1512 1523 self.checkhash(text, node, rev=rev)
1513 1524
1514 1525 return text
1515 1526
1516 1527 def hash(self, text, p1, p2):
1517 1528 """Compute a node hash.
1518 1529
1519 1530 Available as a function so that subclasses can replace the hash
1520 1531 as needed.
1521 1532 """
1522 1533 return hash(text, p1, p2)
1523 1534
1524 1535 def _processflags(self, text, flags, operation, raw=False):
1525 1536 """Inspect revision data flags and applies transforms defined by
1526 1537 registered flag processors.
1527 1538
1528 1539 ``text`` - the revision data to process
1529 1540 ``flags`` - the revision flags
1530 1541 ``operation`` - the operation being performed (read or write)
1531 1542 ``raw`` - an optional argument describing if the raw transform should be
1532 1543 applied.
1533 1544
1534 1545 This method processes the flags in the order (or reverse order if
1535 1546 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1536 1547 flag processors registered for present flags. The order of flags defined
1537 1548 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1538 1549
1539 1550 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1540 1551 processed text and ``validatehash`` is a bool indicating whether the
1541 1552 returned text should be checked for hash integrity.
1542 1553
1543 1554 Note: If the ``raw`` argument is set, it has precedence over the
1544 1555 operation and will only update the value of ``validatehash``.
1545 1556 """
1546 1557 # fast path: no flag processors will run
1547 1558 if flags == 0:
1548 1559 return text, True
1549 1560 if not operation in ('read', 'write'):
1550 1561 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1551 1562 # Check all flags are known.
1552 1563 if flags & ~REVIDX_KNOWN_FLAGS:
1553 1564 raise RevlogError(_("incompatible revision flag '%#x'") %
1554 1565 (flags & ~REVIDX_KNOWN_FLAGS))
1555 1566 validatehash = True
1556 1567 # Depending on the operation (read or write), the order might be
1557 1568 # reversed due to non-commutative transforms.
1558 1569 orderedflags = REVIDX_FLAGS_ORDER
1559 1570 if operation == 'write':
1560 1571 orderedflags = reversed(orderedflags)
1561 1572
1562 1573 for flag in orderedflags:
1563 1574 # If a flagprocessor has been registered for a known flag, apply the
1564 1575 # related operation transform and update result tuple.
1565 1576 if flag & flags:
1566 1577 vhash = True
1567 1578
1568 1579 if flag not in _flagprocessors:
1569 1580 message = _("missing processor for flag '%#x'") % (flag)
1570 1581 raise RevlogError(message)
1571 1582
1572 1583 processor = _flagprocessors[flag]
1573 1584 if processor is not None:
1574 1585 readtransform, writetransform, rawtransform = processor
1575 1586
1576 1587 if raw:
1577 1588 vhash = rawtransform(self, text)
1578 1589 elif operation == 'read':
1579 1590 text, vhash = readtransform(self, text)
1580 1591 else: # write operation
1581 1592 text, vhash = writetransform(self, text)
1582 1593 validatehash = validatehash and vhash
1583 1594
1584 1595 return text, validatehash
1585 1596
1586 1597 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1587 1598 """Check node hash integrity.
1588 1599
1589 1600 Available as a function so that subclasses can extend hash mismatch
1590 1601 behaviors as needed.
1591 1602 """
1592 1603 if p1 is None and p2 is None:
1593 1604 p1, p2 = self.parents(node)
1594 1605 if node != self.hash(text, p1, p2):
1595 1606 revornode = rev
1596 1607 if revornode is None:
1597 1608 revornode = templatefilters.short(hex(node))
1598 1609 raise RevlogError(_("integrity check failed on %s:%s")
1599 1610 % (self.indexfile, pycompat.bytestr(revornode)))
1600 1611
1601 1612 def checkinlinesize(self, tr, fp=None):
1602 1613 """Check if the revlog is too big for inline and convert if so.
1603 1614
1604 1615 This should be called after revisions are added to the revlog. If the
1605 1616 revlog has grown too large to be an inline revlog, it will convert it
1606 1617 to use multiple index and data files.
1607 1618 """
1608 1619 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1609 1620 return
1610 1621
1611 1622 trinfo = tr.find(self.indexfile)
1612 1623 if trinfo is None:
1613 1624 raise RevlogError(_("%s not found in the transaction")
1614 1625 % self.indexfile)
1615 1626
1616 1627 trindex = trinfo[2]
1617 1628 if trindex is not None:
1618 1629 dataoff = self.start(trindex)
1619 1630 else:
1620 1631 # revlog was stripped at start of transaction, use all leftover data
1621 1632 trindex = len(self) - 1
1622 1633 dataoff = self.end(-2)
1623 1634
1624 1635 tr.add(self.datafile, dataoff)
1625 1636
1626 1637 if fp:
1627 1638 fp.flush()
1628 1639 fp.close()
1629 1640
1630 1641 df = self.opener(self.datafile, 'w')
1631 1642 try:
1632 1643 for r in self:
1633 1644 df.write(self._getsegmentforrevs(r, r)[1])
1634 1645 finally:
1635 1646 df.close()
1636 1647
1637 1648 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1638 1649 checkambig=self._checkambig)
1639 1650 self.version &= ~FLAG_INLINE_DATA
1640 1651 self._inline = False
1641 1652 for i in self:
1642 1653 e = self._io.packentry(self.index[i], self.node, self.version, i)
1643 1654 fp.write(e)
1644 1655
1645 1656 # if we don't call close, the temp file will never replace the
1646 1657 # real index
1647 1658 fp.close()
1648 1659
1649 1660 tr.replace(self.indexfile, trindex * self._io.size)
1650 1661 self._chunkclear()
1651 1662
1652 1663 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1653 1664 node=None, flags=REVIDX_DEFAULT_FLAGS):
1654 1665 """add a revision to the log
1655 1666
1656 1667 text - the revision data to add
1657 1668 transaction - the transaction object used for rollback
1658 1669 link - the linkrev data to add
1659 1670 p1, p2 - the parent nodeids of the revision
1660 1671 cachedelta - an optional precomputed delta
1661 1672 node - nodeid of revision; typically node is not specified, and it is
1662 1673 computed by default as hash(text, p1, p2), however subclasses might
1663 1674 use different hashing method (and override checkhash() in such case)
1664 1675 flags - the known flags to set on the revision
1665 1676 """
1666 1677 if link == nullrev:
1667 1678 raise RevlogError(_("attempted to add linkrev -1 to %s")
1668 1679 % self.indexfile)
1669 1680
1670 1681 if flags:
1671 1682 node = node or self.hash(text, p1, p2)
1672 1683
1673 1684 rawtext, validatehash = self._processflags(text, flags, 'write')
1674 1685
1675 1686 # If the flag processor modifies the revision data, ignore any provided
1676 1687 # cachedelta.
1677 1688 if rawtext != text:
1678 1689 cachedelta = None
1679 1690
1680 1691 if len(rawtext) > _maxentrysize:
1681 1692 raise RevlogError(
1682 1693 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1683 1694 % (self.indexfile, len(rawtext)))
1684 1695
1685 1696 node = node or self.hash(rawtext, p1, p2)
1686 1697 if node in self.nodemap:
1687 1698 return node
1688 1699
1689 1700 if validatehash:
1690 1701 self.checkhash(rawtext, node, p1=p1, p2=p2)
1691 1702
1692 1703 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1693 1704 flags, cachedelta=cachedelta)
1694 1705
1695 1706 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1696 1707 cachedelta=None):
1697 1708 """add a raw revision with known flags, node and parents
1698 1709 useful when reusing a revision not stored in this revlog (ex: received
1699 1710 over wire, or read from an external bundle).
1700 1711 """
1701 1712 dfh = None
1702 1713 if not self._inline:
1703 1714 dfh = self.opener(self.datafile, "a+")
1704 1715 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1705 1716 try:
1706 1717 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1707 1718 flags, cachedelta, ifh, dfh)
1708 1719 finally:
1709 1720 if dfh:
1710 1721 dfh.close()
1711 1722 ifh.close()
1712 1723
1713 1724 def compress(self, data):
1714 1725 """Generate a possibly-compressed representation of data."""
1715 1726 if not data:
1716 1727 return '', data
1717 1728
1718 1729 compressed = self._compressor.compress(data)
1719 1730
1720 1731 if compressed:
1721 1732 # The revlog compressor added the header in the returned data.
1722 1733 return '', compressed
1723 1734
1724 1735 if data[0:1] == '\0':
1725 1736 return '', data
1726 1737 return 'u', data
1727 1738
1728 1739 def decompress(self, data):
1729 1740 """Decompress a revlog chunk.
1730 1741
1731 1742 The chunk is expected to begin with a header identifying the
1732 1743 format type so it can be routed to an appropriate decompressor.
1733 1744 """
1734 1745 if not data:
1735 1746 return data
1736 1747
1737 1748 # Revlogs are read much more frequently than they are written and many
1738 1749 # chunks only take microseconds to decompress, so performance is
1739 1750 # important here.
1740 1751 #
1741 1752 # We can make a few assumptions about revlogs:
1742 1753 #
1743 1754 # 1) the majority of chunks will be compressed (as opposed to inline
1744 1755 # raw data).
1745 1756 # 2) decompressing *any* data will likely by at least 10x slower than
1746 1757 # returning raw inline data.
1747 1758 # 3) we want to prioritize common and officially supported compression
1748 1759 # engines
1749 1760 #
1750 1761 # It follows that we want to optimize for "decompress compressed data
1751 1762 # when encoded with common and officially supported compression engines"
1752 1763 # case over "raw data" and "data encoded by less common or non-official
1753 1764 # compression engines." That is why we have the inline lookup first
1754 1765 # followed by the compengines lookup.
1755 1766 #
1756 1767 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1757 1768 # compressed chunks. And this matters for changelog and manifest reads.
1758 1769 t = data[0:1]
1759 1770
1760 1771 if t == 'x':
1761 1772 try:
1762 1773 return _zlibdecompress(data)
1763 1774 except zlib.error as e:
1764 1775 raise RevlogError(_('revlog decompress error: %s') % str(e))
1765 1776 # '\0' is more common than 'u' so it goes first.
1766 1777 elif t == '\0':
1767 1778 return data
1768 1779 elif t == 'u':
1769 1780 return util.buffer(data, 1)
1770 1781
1771 1782 try:
1772 1783 compressor = self._decompressors[t]
1773 1784 except KeyError:
1774 1785 try:
1775 1786 engine = util.compengines.forrevlogheader(t)
1776 1787 compressor = engine.revlogcompressor()
1777 1788 self._decompressors[t] = compressor
1778 1789 except KeyError:
1779 1790 raise RevlogError(_('unknown compression type %r') % t)
1780 1791
1781 1792 return compressor.decompress(data)
1782 1793
1783 1794 def _isgooddelta(self, d, textlen):
1784 1795 """Returns True if the given delta is good. Good means that it is within
1785 1796 the disk span, disk size, and chain length bounds that we know to be
1786 1797 performant."""
1787 1798 if d is None:
1788 1799 return False
1789 1800
1790 1801 # - 'dist' is the distance from the base revision -- bounding it limits
1791 1802 # the amount of I/O we need to do.
1792 1803 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1793 1804 # to apply -- bounding it limits the amount of CPU we consume.
1794 1805 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1795 1806
1796 1807 defaultmax = textlen * 4
1797 1808 maxdist = self._maxdeltachainspan
1798 1809 if not maxdist:
1799 1810 maxdist = dist # ensure the conditional pass
1800 1811 maxdist = max(maxdist, defaultmax)
1801 1812 if (dist > maxdist or l > textlen or
1802 1813 compresseddeltalen > textlen * 2 or
1803 1814 (self._maxchainlen and chainlen > self._maxchainlen)):
1804 1815 return False
1805 1816
1806 1817 return True
1807 1818
1808 1819 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1809 1820 cachedelta, ifh, dfh, alwayscache=False):
1810 1821 """internal function to add revisions to the log
1811 1822
1812 1823 see addrevision for argument descriptions.
1813 1824
1814 1825 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1815 1826
1816 1827 invariants:
1817 1828 - rawtext is optional (can be None); if not set, cachedelta must be set.
1818 1829 if both are set, they must correspond to each other.
1819 1830 """
1820 1831 if node == nullid:
1821 1832 raise RevlogError(_("%s: attempt to add null revision") %
1822 1833 (self.indexfile))
1823 1834 if node == wdirid:
1824 1835 raise RevlogError(_("%s: attempt to add wdir revision") %
1825 1836 (self.indexfile))
1826 1837
1827 1838 btext = [rawtext]
1828 1839 def buildtext():
1829 1840 if btext[0] is not None:
1830 1841 return btext[0]
1831 1842 baserev = cachedelta[0]
1832 1843 delta = cachedelta[1]
1833 1844 # special case deltas which replace entire base; no need to decode
1834 1845 # base revision. this neatly avoids censored bases, which throw when
1835 1846 # they're decoded.
1836 1847 hlen = struct.calcsize(">lll")
1837 1848 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1838 1849 len(delta) - hlen):
1839 1850 btext[0] = delta[hlen:]
1840 1851 else:
1841 1852 if self._inline:
1842 1853 fh = ifh
1843 1854 else:
1844 1855 fh = dfh
1845 1856 basetext = self.revision(baserev, _df=fh, raw=True)
1846 1857 btext[0] = mdiff.patch(basetext, delta)
1847 1858
1848 1859 try:
1849 1860 res = self._processflags(btext[0], flags, 'read', raw=True)
1850 1861 btext[0], validatehash = res
1851 1862 if validatehash:
1852 1863 self.checkhash(btext[0], node, p1=p1, p2=p2)
1853 1864 if flags & REVIDX_ISCENSORED:
1854 1865 raise RevlogError(_('node %s is not censored') % node)
1855 1866 except CensoredNodeError:
1856 1867 # must pass the censored index flag to add censored revisions
1857 1868 if not flags & REVIDX_ISCENSORED:
1858 1869 raise
1859 1870 return btext[0]
1860 1871
1861 1872 def builddelta(rev):
1862 1873 # can we use the cached delta?
1863 1874 if cachedelta and cachedelta[0] == rev:
1864 1875 delta = cachedelta[1]
1865 1876 else:
1866 1877 t = buildtext()
1867 1878 if self.iscensored(rev):
1868 1879 # deltas based on a censored revision must replace the
1869 1880 # full content in one patch, so delta works everywhere
1870 1881 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1871 1882 delta = header + t
1872 1883 else:
1873 1884 if self._inline:
1874 1885 fh = ifh
1875 1886 else:
1876 1887 fh = dfh
1877 1888 ptext = self.revision(rev, _df=fh, raw=True)
1878 1889 delta = mdiff.textdiff(ptext, t)
1879 1890 header, data = self.compress(delta)
1880 1891 deltalen = len(header) + len(data)
1881 1892 chainbase = self.chainbase(rev)
1882 1893 dist = deltalen + offset - self.start(chainbase)
1883 1894 if self._generaldelta:
1884 1895 base = rev
1885 1896 else:
1886 1897 base = chainbase
1887 1898 chainlen, compresseddeltalen = self._chaininfo(rev)
1888 1899 chainlen += 1
1889 1900 compresseddeltalen += deltalen
1890 1901 return (dist, deltalen, (header, data), base,
1891 1902 chainbase, chainlen, compresseddeltalen)
1892 1903
1893 1904 curr = len(self)
1894 1905 prev = curr - 1
1895 1906 offset = self.end(prev)
1896 1907 delta = None
1897 1908 p1r, p2r = self.rev(p1), self.rev(p2)
1898 1909
1899 1910 # full versions are inserted when the needed deltas
1900 1911 # become comparable to the uncompressed text
1901 1912 if rawtext is None:
1902 1913 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1903 1914 cachedelta[1])
1904 1915 else:
1905 1916 textlen = len(rawtext)
1906 1917
1907 1918 # should we try to build a delta?
1908 1919 if prev != nullrev and self.storedeltachains:
1909 1920 tested = set()
1910 1921 # This condition is true most of the time when processing
1911 1922 # changegroup data into a generaldelta repo. The only time it
1912 1923 # isn't true is if this is the first revision in a delta chain
1913 1924 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1914 1925 if cachedelta and self._generaldelta and self._lazydeltabase:
1915 1926 # Assume what we received from the server is a good choice
1916 1927 # build delta will reuse the cache
1917 1928 candidatedelta = builddelta(cachedelta[0])
1918 1929 tested.add(cachedelta[0])
1919 1930 if self._isgooddelta(candidatedelta, textlen):
1920 1931 delta = candidatedelta
1921 1932 if delta is None and self._generaldelta:
1922 1933 # exclude already lazy tested base if any
1923 1934 parents = [p for p in (p1r, p2r)
1924 1935 if p != nullrev and p not in tested]
1925 1936 if parents and not self._aggressivemergedeltas:
1926 1937 # Pick whichever parent is closer to us (to minimize the
1927 1938 # chance of having to build a fulltext).
1928 1939 parents = [max(parents)]
1929 1940 tested.update(parents)
1930 1941 pdeltas = []
1931 1942 for p in parents:
1932 1943 pd = builddelta(p)
1933 1944 if self._isgooddelta(pd, textlen):
1934 1945 pdeltas.append(pd)
1935 1946 if pdeltas:
1936 1947 delta = min(pdeltas, key=lambda x: x[1])
1937 1948 if delta is None and prev not in tested:
1938 1949 # other approach failed try against prev to hopefully save us a
1939 1950 # fulltext.
1940 1951 candidatedelta = builddelta(prev)
1941 1952 if self._isgooddelta(candidatedelta, textlen):
1942 1953 delta = candidatedelta
1943 1954 if delta is not None:
1944 1955 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1945 1956 else:
1946 1957 rawtext = buildtext()
1947 1958 data = self.compress(rawtext)
1948 1959 l = len(data[1]) + len(data[0])
1949 1960 base = chainbase = curr
1950 1961
1951 1962 e = (offset_type(offset, flags), l, textlen,
1952 1963 base, link, p1r, p2r, node)
1953 1964 self.index.insert(-1, e)
1954 1965 self.nodemap[node] = curr
1955 1966
1956 1967 entry = self._io.packentry(e, self.node, self.version, curr)
1957 1968 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1958 1969
1959 1970 if alwayscache and rawtext is None:
1960 1971 rawtext = buildtext()
1961 1972
1962 1973 if type(rawtext) == str: # only accept immutable objects
1963 1974 self._cache = (node, curr, rawtext)
1964 1975 self._chainbasecache[curr] = chainbase
1965 1976 return node
1966 1977
1967 1978 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1968 1979 # Files opened in a+ mode have inconsistent behavior on various
1969 1980 # platforms. Windows requires that a file positioning call be made
1970 1981 # when the file handle transitions between reads and writes. See
1971 1982 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1972 1983 # platforms, Python or the platform itself can be buggy. Some versions
1973 1984 # of Solaris have been observed to not append at the end of the file
1974 1985 # if the file was seeked to before the end. See issue4943 for more.
1975 1986 #
1976 1987 # We work around this issue by inserting a seek() before writing.
1977 1988 # Note: This is likely not necessary on Python 3.
1978 1989 ifh.seek(0, os.SEEK_END)
1979 1990 if dfh:
1980 1991 dfh.seek(0, os.SEEK_END)
1981 1992
1982 1993 curr = len(self) - 1
1983 1994 if not self._inline:
1984 1995 transaction.add(self.datafile, offset)
1985 1996 transaction.add(self.indexfile, curr * len(entry))
1986 1997 if data[0]:
1987 1998 dfh.write(data[0])
1988 1999 dfh.write(data[1])
1989 2000 ifh.write(entry)
1990 2001 else:
1991 2002 offset += curr * self._io.size
1992 2003 transaction.add(self.indexfile, offset, curr)
1993 2004 ifh.write(entry)
1994 2005 ifh.write(data[0])
1995 2006 ifh.write(data[1])
1996 2007 self.checkinlinesize(transaction, ifh)
1997 2008
1998 2009 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1999 2010 """
2000 2011 add a delta group
2001 2012
2002 2013 given a set of deltas, add them to the revision log. the
2003 2014 first delta is against its parent, which should be in our
2004 2015 log, the rest are against the previous delta.
2005 2016
2006 2017 If ``addrevisioncb`` is defined, it will be called with arguments of
2007 2018 this revlog and the node that was added.
2008 2019 """
2009 2020
2010 2021 nodes = []
2011 2022
2012 2023 r = len(self)
2013 2024 end = 0
2014 2025 if r:
2015 2026 end = self.end(r - 1)
2016 2027 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2017 2028 isize = r * self._io.size
2018 2029 if self._inline:
2019 2030 transaction.add(self.indexfile, end + isize, r)
2020 2031 dfh = None
2021 2032 else:
2022 2033 transaction.add(self.indexfile, isize, r)
2023 2034 transaction.add(self.datafile, end)
2024 2035 dfh = self.opener(self.datafile, "a+")
2025 2036 def flush():
2026 2037 if dfh:
2027 2038 dfh.flush()
2028 2039 ifh.flush()
2029 2040 try:
2030 2041 # loop through our set of deltas
2031 2042 for data in deltas:
2032 2043 node, p1, p2, linknode, deltabase, delta, flags = data
2033 2044 link = linkmapper(linknode)
2034 2045 flags = flags or REVIDX_DEFAULT_FLAGS
2035 2046
2036 2047 nodes.append(node)
2037 2048
2038 2049 if node in self.nodemap:
2039 2050 # this can happen if two branches make the same change
2040 2051 continue
2041 2052
2042 2053 for p in (p1, p2):
2043 2054 if p not in self.nodemap:
2044 2055 raise LookupError(p, self.indexfile,
2045 2056 _('unknown parent'))
2046 2057
2047 2058 if deltabase not in self.nodemap:
2048 2059 raise LookupError(deltabase, self.indexfile,
2049 2060 _('unknown delta base'))
2050 2061
2051 2062 baserev = self.rev(deltabase)
2052 2063
2053 2064 if baserev != nullrev and self.iscensored(baserev):
2054 2065 # if base is censored, delta must be full replacement in a
2055 2066 # single patch operation
2056 2067 hlen = struct.calcsize(">lll")
2057 2068 oldlen = self.rawsize(baserev)
2058 2069 newlen = len(delta) - hlen
2059 2070 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2060 2071 raise error.CensoredBaseError(self.indexfile,
2061 2072 self.node(baserev))
2062 2073
2063 2074 if not flags and self._peek_iscensored(baserev, delta, flush):
2064 2075 flags |= REVIDX_ISCENSORED
2065 2076
2066 2077 # We assume consumers of addrevisioncb will want to retrieve
2067 2078 # the added revision, which will require a call to
2068 2079 # revision(). revision() will fast path if there is a cache
2069 2080 # hit. So, we tell _addrevision() to always cache in this case.
2070 2081 # We're only using addgroup() in the context of changegroup
2071 2082 # generation so the revision data can always be handled as raw
2072 2083 # by the flagprocessor.
2073 2084 self._addrevision(node, None, transaction, link,
2074 2085 p1, p2, flags, (baserev, delta),
2075 2086 ifh, dfh,
2076 2087 alwayscache=bool(addrevisioncb))
2077 2088
2078 2089 if addrevisioncb:
2079 2090 addrevisioncb(self, node)
2080 2091
2081 2092 if not dfh and not self._inline:
2082 2093 # addrevision switched from inline to conventional
2083 2094 # reopen the index
2084 2095 ifh.close()
2085 2096 dfh = self.opener(self.datafile, "a+")
2086 2097 ifh = self.opener(self.indexfile, "a+",
2087 2098 checkambig=self._checkambig)
2088 2099 finally:
2089 2100 if dfh:
2090 2101 dfh.close()
2091 2102 ifh.close()
2092 2103
2093 2104 return nodes
2094 2105
2095 2106 def iscensored(self, rev):
2096 2107 """Check if a file revision is censored."""
2097 2108 return False
2098 2109
2099 2110 def _peek_iscensored(self, baserev, delta, flush):
2100 2111 """Quickly check if a delta produces a censored revision."""
2101 2112 return False
2102 2113
2103 2114 def getstrippoint(self, minlink):
2104 2115 """find the minimum rev that must be stripped to strip the linkrev
2105 2116
2106 2117 Returns a tuple containing the minimum rev and a set of all revs that
2107 2118 have linkrevs that will be broken by this strip.
2108 2119 """
2109 2120 brokenrevs = set()
2110 2121 strippoint = len(self)
2111 2122
2112 2123 heads = {}
2113 2124 futurelargelinkrevs = set()
2114 2125 for head in self.headrevs():
2115 2126 headlinkrev = self.linkrev(head)
2116 2127 heads[head] = headlinkrev
2117 2128 if headlinkrev >= minlink:
2118 2129 futurelargelinkrevs.add(headlinkrev)
2119 2130
2120 2131 # This algorithm involves walking down the rev graph, starting at the
2121 2132 # heads. Since the revs are topologically sorted according to linkrev,
2122 2133 # once all head linkrevs are below the minlink, we know there are
2123 2134 # no more revs that could have a linkrev greater than minlink.
2124 2135 # So we can stop walking.
2125 2136 while futurelargelinkrevs:
2126 2137 strippoint -= 1
2127 2138 linkrev = heads.pop(strippoint)
2128 2139
2129 2140 if linkrev < minlink:
2130 2141 brokenrevs.add(strippoint)
2131 2142 else:
2132 2143 futurelargelinkrevs.remove(linkrev)
2133 2144
2134 2145 for p in self.parentrevs(strippoint):
2135 2146 if p != nullrev:
2136 2147 plinkrev = self.linkrev(p)
2137 2148 heads[p] = plinkrev
2138 2149 if plinkrev >= minlink:
2139 2150 futurelargelinkrevs.add(plinkrev)
2140 2151
2141 2152 return strippoint, brokenrevs
2142 2153
2143 2154 def strip(self, minlink, transaction):
2144 2155 """truncate the revlog on the first revision with a linkrev >= minlink
2145 2156
2146 2157 This function is called when we're stripping revision minlink and
2147 2158 its descendants from the repository.
2148 2159
2149 2160 We have to remove all revisions with linkrev >= minlink, because
2150 2161 the equivalent changelog revisions will be renumbered after the
2151 2162 strip.
2152 2163
2153 2164 So we truncate the revlog on the first of these revisions, and
2154 2165 trust that the caller has saved the revisions that shouldn't be
2155 2166 removed and that it'll re-add them after this truncation.
2156 2167 """
2157 2168 if len(self) == 0:
2158 2169 return
2159 2170
2160 2171 rev, _ = self.getstrippoint(minlink)
2161 2172 if rev == len(self):
2162 2173 return
2163 2174
2164 2175 # first truncate the files on disk
2165 2176 end = self.start(rev)
2166 2177 if not self._inline:
2167 2178 transaction.add(self.datafile, end)
2168 2179 end = rev * self._io.size
2169 2180 else:
2170 2181 end += rev * self._io.size
2171 2182
2172 2183 transaction.add(self.indexfile, end)
2173 2184
2174 2185 # then reset internal state in memory to forget those revisions
2175 2186 self._cache = None
2176 2187 self._chaininfocache = {}
2177 2188 self._chunkclear()
2178 2189 for x in xrange(rev, len(self)):
2179 2190 del self.nodemap[self.node(x)]
2180 2191
2181 2192 del self.index[rev:-1]
2182 2193
2183 2194 def checksize(self):
2184 2195 expected = 0
2185 2196 if len(self):
2186 2197 expected = max(0, self.end(len(self) - 1))
2187 2198
2188 2199 try:
2189 2200 f = self.opener(self.datafile)
2190 2201 f.seek(0, 2)
2191 2202 actual = f.tell()
2192 2203 f.close()
2193 2204 dd = actual - expected
2194 2205 except IOError as inst:
2195 2206 if inst.errno != errno.ENOENT:
2196 2207 raise
2197 2208 dd = 0
2198 2209
2199 2210 try:
2200 2211 f = self.opener(self.indexfile)
2201 2212 f.seek(0, 2)
2202 2213 actual = f.tell()
2203 2214 f.close()
2204 2215 s = self._io.size
2205 2216 i = max(0, actual // s)
2206 2217 di = actual - (i * s)
2207 2218 if self._inline:
2208 2219 databytes = 0
2209 2220 for r in self:
2210 2221 databytes += max(0, self.length(r))
2211 2222 dd = 0
2212 2223 di = actual - len(self) * s - databytes
2213 2224 except IOError as inst:
2214 2225 if inst.errno != errno.ENOENT:
2215 2226 raise
2216 2227 di = 0
2217 2228
2218 2229 return (dd, di)
2219 2230
2220 2231 def files(self):
2221 2232 res = [self.indexfile]
2222 2233 if not self._inline:
2223 2234 res.append(self.datafile)
2224 2235 return res
2225 2236
2226 2237 DELTAREUSEALWAYS = 'always'
2227 2238 DELTAREUSESAMEREVS = 'samerevs'
2228 2239 DELTAREUSENEVER = 'never'
2229 2240
2230 2241 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2231 2242
2232 2243 def clone(self, tr, destrevlog, addrevisioncb=None,
2233 2244 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2234 2245 """Copy this revlog to another, possibly with format changes.
2235 2246
2236 2247 The destination revlog will contain the same revisions and nodes.
2237 2248 However, it may not be bit-for-bit identical due to e.g. delta encoding
2238 2249 differences.
2239 2250
2240 2251 The ``deltareuse`` argument control how deltas from the existing revlog
2241 2252 are preserved in the destination revlog. The argument can have the
2242 2253 following values:
2243 2254
2244 2255 DELTAREUSEALWAYS
2245 2256 Deltas will always be reused (if possible), even if the destination
2246 2257 revlog would not select the same revisions for the delta. This is the
2247 2258 fastest mode of operation.
2248 2259 DELTAREUSESAMEREVS
2249 2260 Deltas will be reused if the destination revlog would pick the same
2250 2261 revisions for the delta. This mode strikes a balance between speed
2251 2262 and optimization.
2252 2263 DELTAREUSENEVER
2253 2264 Deltas will never be reused. This is the slowest mode of execution.
2254 2265 This mode can be used to recompute deltas (e.g. if the diff/delta
2255 2266 algorithm changes).
2256 2267
2257 2268 Delta computation can be slow, so the choice of delta reuse policy can
2258 2269 significantly affect run time.
2259 2270
2260 2271 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2261 2272 two extremes. Deltas will be reused if they are appropriate. But if the
2262 2273 delta could choose a better revision, it will do so. This means if you
2263 2274 are converting a non-generaldelta revlog to a generaldelta revlog,
2264 2275 deltas will be recomputed if the delta's parent isn't a parent of the
2265 2276 revision.
2266 2277
2267 2278 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2268 2279 controls whether to compute deltas against both parents for merges.
2269 2280 By default, the current default is used.
2270 2281 """
2271 2282 if deltareuse not in self.DELTAREUSEALL:
2272 2283 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2273 2284
2274 2285 if len(destrevlog):
2275 2286 raise ValueError(_('destination revlog is not empty'))
2276 2287
2277 2288 if getattr(self, 'filteredrevs', None):
2278 2289 raise ValueError(_('source revlog has filtered revisions'))
2279 2290 if getattr(destrevlog, 'filteredrevs', None):
2280 2291 raise ValueError(_('destination revlog has filtered revisions'))
2281 2292
2282 2293 # lazydeltabase controls whether to reuse a cached delta, if possible.
2283 2294 oldlazydeltabase = destrevlog._lazydeltabase
2284 2295 oldamd = destrevlog._aggressivemergedeltas
2285 2296
2286 2297 try:
2287 2298 if deltareuse == self.DELTAREUSEALWAYS:
2288 2299 destrevlog._lazydeltabase = True
2289 2300 elif deltareuse == self.DELTAREUSESAMEREVS:
2290 2301 destrevlog._lazydeltabase = False
2291 2302
2292 2303 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2293 2304
2294 2305 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2295 2306 self.DELTAREUSESAMEREVS)
2296 2307
2297 2308 index = self.index
2298 2309 for rev in self:
2299 2310 entry = index[rev]
2300 2311
2301 2312 # Some classes override linkrev to take filtered revs into
2302 2313 # account. Use raw entry from index.
2303 2314 flags = entry[0] & 0xffff
2304 2315 linkrev = entry[4]
2305 2316 p1 = index[entry[5]][7]
2306 2317 p2 = index[entry[6]][7]
2307 2318 node = entry[7]
2308 2319
2309 2320 # (Possibly) reuse the delta from the revlog if allowed and
2310 2321 # the revlog chunk is a delta.
2311 2322 cachedelta = None
2312 2323 rawtext = None
2313 2324 if populatecachedelta:
2314 2325 dp = self.deltaparent(rev)
2315 2326 if dp != nullrev:
2316 2327 cachedelta = (dp, str(self._chunk(rev)))
2317 2328
2318 2329 if not cachedelta:
2319 2330 rawtext = self.revision(rev, raw=True)
2320 2331
2321 2332 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2322 2333 checkambig=False)
2323 2334 dfh = None
2324 2335 if not destrevlog._inline:
2325 2336 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2326 2337 try:
2327 2338 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2328 2339 flags, cachedelta, ifh, dfh)
2329 2340 finally:
2330 2341 if dfh:
2331 2342 dfh.close()
2332 2343 ifh.close()
2333 2344
2334 2345 if addrevisioncb:
2335 2346 addrevisioncb(self, rev, node)
2336 2347 finally:
2337 2348 destrevlog._lazydeltabase = oldlazydeltabase
2338 2349 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now