##// END OF EJS Templates
revlog: tweak wording and logic for flags validation...
Gregory Szorc -
r32391:36d3559c default
parent child Browse files
Show More
@@ -1,2152 +1,2156 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 policy,
36 36 pycompat,
37 37 templatefilters,
38 38 util,
39 39 )
40 40
41 41 parsers = policy.importmod(r'parsers')
42 42
43 43 _pack = struct.pack
44 44 _unpack = struct.unpack
45 45 # Aliased for performance.
46 46 _zlibdecompress = zlib.decompress
47 47
48 48 # revlog header flags
49 49 REVLOGV0 = 0
50 50 REVLOGV1 = 1
51 51 FLAG_INLINE_DATA = (1 << 16)
52 52 FLAG_GENERALDELTA = (1 << 17)
53 53 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
54 54 REVLOG_DEFAULT_FORMAT = REVLOGV1
55 55 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
56 56 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
57 57
58 58 # revlog index flags
59 59 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
60 60 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
61 61 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
62 62 REVIDX_DEFAULT_FLAGS = 0
63 63 # stable order in which flags need to be processed and their processors applied
64 64 REVIDX_FLAGS_ORDER = [
65 65 REVIDX_ISCENSORED,
66 66 REVIDX_ELLIPSIS,
67 67 REVIDX_EXTSTORED,
68 68 ]
69 69 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
70 70
71 71 # max size of revlog with inline data
72 72 _maxinline = 131072
73 73 _chunksize = 1048576
74 74
75 75 RevlogError = error.RevlogError
76 76 LookupError = error.LookupError
77 77 CensoredNodeError = error.CensoredNodeError
78 78 ProgrammingError = error.ProgrammingError
79 79
80 80 # Store flag processors (cf. 'addflagprocessor()' to register)
81 81 _flagprocessors = {
82 82 REVIDX_ISCENSORED: None,
83 83 }
84 84
85 85 def addflagprocessor(flag, processor):
86 86 """Register a flag processor on a revision data flag.
87 87
88 88 Invariant:
89 89 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
90 90 - Only one flag processor can be registered on a specific flag.
91 91 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
92 92 following signatures:
93 93 - (read) f(self, rawtext) -> text, bool
94 94 - (write) f(self, text) -> rawtext, bool
95 95 - (raw) f(self, rawtext) -> bool
96 96 "text" is presented to the user. "rawtext" is stored in revlog data, not
97 97 directly visible to the user.
98 98 The boolean returned by these transforms is used to determine whether
99 99 the returned text can be used for hash integrity checking. For example,
100 100 if "write" returns False, then "text" is used to generate hash. If
101 101 "write" returns True, that basically means "rawtext" returned by "write"
102 102 should be used to generate hash. Usually, "write" and "read" return
103 103 different booleans. And "raw" returns a same boolean as "write".
104 104
105 105 Note: The 'raw' transform is used for changegroup generation and in some
106 106 debug commands. In this case the transform only indicates whether the
107 107 contents can be used for hash integrity checks.
108 108 """
109 109 if not flag & REVIDX_KNOWN_FLAGS:
110 110 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
111 111 raise ProgrammingError(msg)
112 112 if flag not in REVIDX_FLAGS_ORDER:
113 113 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
114 114 raise ProgrammingError(msg)
115 115 if flag in _flagprocessors:
116 116 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
117 117 raise error.Abort(msg)
118 118 _flagprocessors[flag] = processor
119 119
120 120 def getoffset(q):
121 121 return int(q >> 16)
122 122
123 123 def gettype(q):
124 124 return int(q & 0xFFFF)
125 125
126 126 def offset_type(offset, type):
127 127 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
128 128 raise ValueError('unknown revlog index flags')
129 129 return int(int(offset) << 16 | type)
130 130
131 131 _nullhash = hashlib.sha1(nullid)
132 132
133 133 def hash(text, p1, p2):
134 134 """generate a hash from the given text and its parent hashes
135 135
136 136 This hash combines both the current file contents and its history
137 137 in a manner that makes it easy to distinguish nodes with the same
138 138 content in the revision graph.
139 139 """
140 140 # As of now, if one of the parent node is null, p2 is null
141 141 if p2 == nullid:
142 142 # deep copy of a hash is faster than creating one
143 143 s = _nullhash.copy()
144 144 s.update(p1)
145 145 else:
146 146 # none of the parent nodes are nullid
147 147 l = [p1, p2]
148 148 l.sort()
149 149 s = hashlib.sha1(l[0])
150 150 s.update(l[1])
151 151 s.update(text)
152 152 return s.digest()
153 153
154 154 # index v0:
155 155 # 4 bytes: offset
156 156 # 4 bytes: compressed length
157 157 # 4 bytes: base rev
158 158 # 4 bytes: link rev
159 159 # 20 bytes: parent 1 nodeid
160 160 # 20 bytes: parent 2 nodeid
161 161 # 20 bytes: nodeid
162 162 indexformatv0 = ">4l20s20s20s"
163 163
164 164 class revlogoldio(object):
165 165 def __init__(self):
166 166 self.size = struct.calcsize(indexformatv0)
167 167
168 168 def parseindex(self, data, inline):
169 169 s = self.size
170 170 index = []
171 171 nodemap = {nullid: nullrev}
172 172 n = off = 0
173 173 l = len(data)
174 174 while off + s <= l:
175 175 cur = data[off:off + s]
176 176 off += s
177 177 e = _unpack(indexformatv0, cur)
178 178 # transform to revlogv1 format
179 179 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
180 180 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
181 181 index.append(e2)
182 182 nodemap[e[6]] = n
183 183 n += 1
184 184
185 185 # add the magic null revision at -1
186 186 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
187 187
188 188 return index, nodemap, None
189 189
190 190 def packentry(self, entry, node, version, rev):
191 191 if gettype(entry[0]):
192 192 raise RevlogError(_("index entry flags need RevlogNG"))
193 193 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
194 194 node(entry[5]), node(entry[6]), entry[7])
195 195 return _pack(indexformatv0, *e2)
196 196
197 197 # index ng:
198 198 # 6 bytes: offset
199 199 # 2 bytes: flags
200 200 # 4 bytes: compressed length
201 201 # 4 bytes: uncompressed length
202 202 # 4 bytes: base rev
203 203 # 4 bytes: link rev
204 204 # 4 bytes: parent 1 rev
205 205 # 4 bytes: parent 2 rev
206 206 # 32 bytes: nodeid
207 207 indexformatng = ">Qiiiiii20s12x"
208 208 versionformat = ">I"
209 209
210 210 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
211 211 # signed integer)
212 212 _maxentrysize = 0x7fffffff
213 213
214 214 class revlogio(object):
215 215 def __init__(self):
216 216 self.size = struct.calcsize(indexformatng)
217 217
218 218 def parseindex(self, data, inline):
219 219 # call the C implementation to parse the index data
220 220 index, cache = parsers.parse_index2(data, inline)
221 221 return index, getattr(index, 'nodemap', None), cache
222 222
223 223 def packentry(self, entry, node, version, rev):
224 224 p = _pack(indexformatng, *entry)
225 225 if rev == 0:
226 226 p = _pack(versionformat, version) + p[4:]
227 227 return p
228 228
229 229 class revlog(object):
230 230 """
231 231 the underlying revision storage object
232 232
233 233 A revlog consists of two parts, an index and the revision data.
234 234
235 235 The index is a file with a fixed record size containing
236 236 information on each revision, including its nodeid (hash), the
237 237 nodeids of its parents, the position and offset of its data within
238 238 the data file, and the revision it's based on. Finally, each entry
239 239 contains a linkrev entry that can serve as a pointer to external
240 240 data.
241 241
242 242 The revision data itself is a linear collection of data chunks.
243 243 Each chunk represents a revision and is usually represented as a
244 244 delta against the previous chunk. To bound lookup time, runs of
245 245 deltas are limited to about 2 times the length of the original
246 246 version data. This makes retrieval of a version proportional to
247 247 its size, or O(1) relative to the number of revisions.
248 248
249 249 Both pieces of the revlog are written to in an append-only
250 250 fashion, which means we never need to rewrite a file to insert or
251 251 remove data, and can use some simple techniques to avoid the need
252 252 for locking while reading.
253 253
254 254 If checkambig, indexfile is opened with checkambig=True at
255 255 writing, to avoid file stat ambiguity.
256 256 """
257 257 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
258 258 """
259 259 create a revlog object
260 260
261 261 opener is a function that abstracts the file opening operation
262 262 and can be used to implement COW semantics or the like.
263 263 """
264 264 self.indexfile = indexfile
265 265 self.datafile = datafile or (indexfile[:-2] + ".d")
266 266 self.opener = opener
267 267 # When True, indexfile is opened with checkambig=True at writing, to
268 268 # avoid file stat ambiguity.
269 269 self._checkambig = checkambig
270 270 # 3-tuple of (node, rev, text) for a raw revision.
271 271 self._cache = None
272 272 # Maps rev to chain base rev.
273 273 self._chainbasecache = util.lrucachedict(100)
274 274 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
275 275 self._chunkcache = (0, '')
276 276 # How much data to read and cache into the raw revlog data cache.
277 277 self._chunkcachesize = 65536
278 278 self._maxchainlen = None
279 279 self._aggressivemergedeltas = False
280 280 self.index = []
281 281 # Mapping of partial identifiers to full nodes.
282 282 self._pcache = {}
283 283 # Mapping of revision integer to full node.
284 284 self._nodecache = {nullid: nullrev}
285 285 self._nodepos = None
286 286 self._compengine = 'zlib'
287 287
288 288 v = REVLOG_DEFAULT_VERSION
289 289 opts = getattr(opener, 'options', None)
290 290 if opts is not None:
291 291 if 'revlogv1' in opts:
292 292 if 'generaldelta' in opts:
293 293 v |= FLAG_GENERALDELTA
294 294 else:
295 295 v = 0
296 296 if 'chunkcachesize' in opts:
297 297 self._chunkcachesize = opts['chunkcachesize']
298 298 if 'maxchainlen' in opts:
299 299 self._maxchainlen = opts['maxchainlen']
300 300 if 'aggressivemergedeltas' in opts:
301 301 self._aggressivemergedeltas = opts['aggressivemergedeltas']
302 302 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
303 303 if 'compengine' in opts:
304 304 self._compengine = opts['compengine']
305 305
306 306 if self._chunkcachesize <= 0:
307 307 raise RevlogError(_('revlog chunk cache size %r is not greater '
308 308 'than 0') % self._chunkcachesize)
309 309 elif self._chunkcachesize & (self._chunkcachesize - 1):
310 310 raise RevlogError(_('revlog chunk cache size %r is not a power '
311 311 'of 2') % self._chunkcachesize)
312 312
313 313 indexdata = ''
314 314 self._initempty = True
315 315 try:
316 316 f = self.opener(self.indexfile)
317 317 indexdata = f.read()
318 318 f.close()
319 319 if len(indexdata) > 0:
320 320 v = struct.unpack(versionformat, indexdata[:4])[0]
321 321 self._initempty = False
322 322 except IOError as inst:
323 323 if inst.errno != errno.ENOENT:
324 324 raise
325 325
326 326 self.version = v
327 327 self._inline = v & FLAG_INLINE_DATA
328 328 self._generaldelta = v & FLAG_GENERALDELTA
329 329 flags = v & ~0xFFFF
330 330 fmt = v & 0xFFFF
331 if fmt == REVLOGV0 and flags:
332 raise RevlogError(_("index %s unknown flags %#04x for format v0")
333 % (self.indexfile, flags >> 16))
334 elif fmt == REVLOGV1 and flags & ~REVLOGV1_FLAGS:
335 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
336 % (self.indexfile, flags >> 16))
337 elif fmt > REVLOGV1:
338 raise RevlogError(_("index %s unknown format %d")
339 % (self.indexfile, fmt))
331 if fmt == REVLOGV0:
332 if flags:
333 raise RevlogError(_('unknown flags (%#04x) in version %d '
334 'revlog %s') %
335 (flags >> 16, fmt, self.indexfile))
336 elif fmt == REVLOGV1:
337 if flags & ~REVLOGV1_FLAGS:
338 raise RevlogError(_('unknown flags (%#04x) in version %d '
339 'revlog %s') %
340 (flags >> 16, fmt, self.indexfile))
341 else:
342 raise RevlogError(_('unknown version (%d) in revlog %s') %
343 (fmt, self.indexfile))
340 344
341 345 self.storedeltachains = True
342 346
343 347 self._io = revlogio()
344 348 if self.version == REVLOGV0:
345 349 self._io = revlogoldio()
346 350 try:
347 351 d = self._io.parseindex(indexdata, self._inline)
348 352 except (ValueError, IndexError):
349 353 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
350 354 self.index, nodemap, self._chunkcache = d
351 355 if nodemap is not None:
352 356 self.nodemap = self._nodecache = nodemap
353 357 if not self._chunkcache:
354 358 self._chunkclear()
355 359 # revnum -> (chain-length, sum-delta-length)
356 360 self._chaininfocache = {}
357 361 # revlog header -> revlog compressor
358 362 self._decompressors = {}
359 363
360 364 @util.propertycache
361 365 def _compressor(self):
362 366 return util.compengines[self._compengine].revlogcompressor()
363 367
364 368 def tip(self):
365 369 return self.node(len(self.index) - 2)
366 370 def __contains__(self, rev):
367 371 return 0 <= rev < len(self)
368 372 def __len__(self):
369 373 return len(self.index) - 1
370 374 def __iter__(self):
371 375 return iter(xrange(len(self)))
372 376 def revs(self, start=0, stop=None):
373 377 """iterate over all rev in this revlog (from start to stop)"""
374 378 step = 1
375 379 if stop is not None:
376 380 if start > stop:
377 381 step = -1
378 382 stop += step
379 383 else:
380 384 stop = len(self)
381 385 return xrange(start, stop, step)
382 386
383 387 @util.propertycache
384 388 def nodemap(self):
385 389 self.rev(self.node(0))
386 390 return self._nodecache
387 391
388 392 def hasnode(self, node):
389 393 try:
390 394 self.rev(node)
391 395 return True
392 396 except KeyError:
393 397 return False
394 398
395 399 def clearcaches(self):
396 400 self._cache = None
397 401 self._chainbasecache.clear()
398 402 self._chunkcache = (0, '')
399 403 self._pcache = {}
400 404
401 405 try:
402 406 self._nodecache.clearcaches()
403 407 except AttributeError:
404 408 self._nodecache = {nullid: nullrev}
405 409 self._nodepos = None
406 410
407 411 def rev(self, node):
408 412 try:
409 413 return self._nodecache[node]
410 414 except TypeError:
411 415 raise
412 416 except RevlogError:
413 417 # parsers.c radix tree lookup failed
414 418 raise LookupError(node, self.indexfile, _('no node'))
415 419 except KeyError:
416 420 # pure python cache lookup failed
417 421 n = self._nodecache
418 422 i = self.index
419 423 p = self._nodepos
420 424 if p is None:
421 425 p = len(i) - 2
422 426 for r in xrange(p, -1, -1):
423 427 v = i[r][7]
424 428 n[v] = r
425 429 if v == node:
426 430 self._nodepos = r - 1
427 431 return r
428 432 raise LookupError(node, self.indexfile, _('no node'))
429 433
430 434 # Accessors for index entries.
431 435
432 436 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
433 437 # are flags.
434 438 def start(self, rev):
435 439 return int(self.index[rev][0] >> 16)
436 440
437 441 def flags(self, rev):
438 442 return self.index[rev][0] & 0xFFFF
439 443
440 444 def length(self, rev):
441 445 return self.index[rev][1]
442 446
443 447 def rawsize(self, rev):
444 448 """return the length of the uncompressed text for a given revision"""
445 449 l = self.index[rev][2]
446 450 if l >= 0:
447 451 return l
448 452
449 453 t = self.revision(rev, raw=True)
450 454 return len(t)
451 455
452 456 def size(self, rev):
453 457 """length of non-raw text (processed by a "read" flag processor)"""
454 458 # fast path: if no "read" flag processor could change the content,
455 459 # size is rawsize. note: ELLIPSIS is known to not change the content.
456 460 flags = self.flags(rev)
457 461 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
458 462 return self.rawsize(rev)
459 463
460 464 return len(self.revision(rev, raw=False))
461 465
462 466 def chainbase(self, rev):
463 467 base = self._chainbasecache.get(rev)
464 468 if base is not None:
465 469 return base
466 470
467 471 index = self.index
468 472 base = index[rev][3]
469 473 while base != rev:
470 474 rev = base
471 475 base = index[rev][3]
472 476
473 477 self._chainbasecache[rev] = base
474 478 return base
475 479
476 480 def linkrev(self, rev):
477 481 return self.index[rev][4]
478 482
479 483 def parentrevs(self, rev):
480 484 return self.index[rev][5:7]
481 485
482 486 def node(self, rev):
483 487 return self.index[rev][7]
484 488
485 489 # Derived from index values.
486 490
487 491 def end(self, rev):
488 492 return self.start(rev) + self.length(rev)
489 493
490 494 def parents(self, node):
491 495 i = self.index
492 496 d = i[self.rev(node)]
493 497 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
494 498
495 499 def chainlen(self, rev):
496 500 return self._chaininfo(rev)[0]
497 501
498 502 def _chaininfo(self, rev):
499 503 chaininfocache = self._chaininfocache
500 504 if rev in chaininfocache:
501 505 return chaininfocache[rev]
502 506 index = self.index
503 507 generaldelta = self._generaldelta
504 508 iterrev = rev
505 509 e = index[iterrev]
506 510 clen = 0
507 511 compresseddeltalen = 0
508 512 while iterrev != e[3]:
509 513 clen += 1
510 514 compresseddeltalen += e[1]
511 515 if generaldelta:
512 516 iterrev = e[3]
513 517 else:
514 518 iterrev -= 1
515 519 if iterrev in chaininfocache:
516 520 t = chaininfocache[iterrev]
517 521 clen += t[0]
518 522 compresseddeltalen += t[1]
519 523 break
520 524 e = index[iterrev]
521 525 else:
522 526 # Add text length of base since decompressing that also takes
523 527 # work. For cache hits the length is already included.
524 528 compresseddeltalen += e[1]
525 529 r = (clen, compresseddeltalen)
526 530 chaininfocache[rev] = r
527 531 return r
528 532
529 533 def _deltachain(self, rev, stoprev=None):
530 534 """Obtain the delta chain for a revision.
531 535
532 536 ``stoprev`` specifies a revision to stop at. If not specified, we
533 537 stop at the base of the chain.
534 538
535 539 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
536 540 revs in ascending order and ``stopped`` is a bool indicating whether
537 541 ``stoprev`` was hit.
538 542 """
539 543 chain = []
540 544
541 545 # Alias to prevent attribute lookup in tight loop.
542 546 index = self.index
543 547 generaldelta = self._generaldelta
544 548
545 549 iterrev = rev
546 550 e = index[iterrev]
547 551 while iterrev != e[3] and iterrev != stoprev:
548 552 chain.append(iterrev)
549 553 if generaldelta:
550 554 iterrev = e[3]
551 555 else:
552 556 iterrev -= 1
553 557 e = index[iterrev]
554 558
555 559 if iterrev == stoprev:
556 560 stopped = True
557 561 else:
558 562 chain.append(iterrev)
559 563 stopped = False
560 564
561 565 chain.reverse()
562 566 return chain, stopped
563 567
564 568 def ancestors(self, revs, stoprev=0, inclusive=False):
565 569 """Generate the ancestors of 'revs' in reverse topological order.
566 570 Does not generate revs lower than stoprev.
567 571
568 572 See the documentation for ancestor.lazyancestors for more details."""
569 573
570 574 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
571 575 inclusive=inclusive)
572 576
573 577 def descendants(self, revs):
574 578 """Generate the descendants of 'revs' in revision order.
575 579
576 580 Yield a sequence of revision numbers starting with a child of
577 581 some rev in revs, i.e., each revision is *not* considered a
578 582 descendant of itself. Results are ordered by revision number (a
579 583 topological sort)."""
580 584 first = min(revs)
581 585 if first == nullrev:
582 586 for i in self:
583 587 yield i
584 588 return
585 589
586 590 seen = set(revs)
587 591 for i in self.revs(start=first + 1):
588 592 for x in self.parentrevs(i):
589 593 if x != nullrev and x in seen:
590 594 seen.add(i)
591 595 yield i
592 596 break
593 597
594 598 def findcommonmissing(self, common=None, heads=None):
595 599 """Return a tuple of the ancestors of common and the ancestors of heads
596 600 that are not ancestors of common. In revset terminology, we return the
597 601 tuple:
598 602
599 603 ::common, (::heads) - (::common)
600 604
601 605 The list is sorted by revision number, meaning it is
602 606 topologically sorted.
603 607
604 608 'heads' and 'common' are both lists of node IDs. If heads is
605 609 not supplied, uses all of the revlog's heads. If common is not
606 610 supplied, uses nullid."""
607 611 if common is None:
608 612 common = [nullid]
609 613 if heads is None:
610 614 heads = self.heads()
611 615
612 616 common = [self.rev(n) for n in common]
613 617 heads = [self.rev(n) for n in heads]
614 618
615 619 # we want the ancestors, but inclusive
616 620 class lazyset(object):
617 621 def __init__(self, lazyvalues):
618 622 self.addedvalues = set()
619 623 self.lazyvalues = lazyvalues
620 624
621 625 def __contains__(self, value):
622 626 return value in self.addedvalues or value in self.lazyvalues
623 627
624 628 def __iter__(self):
625 629 added = self.addedvalues
626 630 for r in added:
627 631 yield r
628 632 for r in self.lazyvalues:
629 633 if not r in added:
630 634 yield r
631 635
632 636 def add(self, value):
633 637 self.addedvalues.add(value)
634 638
635 639 def update(self, values):
636 640 self.addedvalues.update(values)
637 641
638 642 has = lazyset(self.ancestors(common))
639 643 has.add(nullrev)
640 644 has.update(common)
641 645
642 646 # take all ancestors from heads that aren't in has
643 647 missing = set()
644 648 visit = collections.deque(r for r in heads if r not in has)
645 649 while visit:
646 650 r = visit.popleft()
647 651 if r in missing:
648 652 continue
649 653 else:
650 654 missing.add(r)
651 655 for p in self.parentrevs(r):
652 656 if p not in has:
653 657 visit.append(p)
654 658 missing = list(missing)
655 659 missing.sort()
656 660 return has, [self.node(miss) for miss in missing]
657 661
658 662 def incrementalmissingrevs(self, common=None):
659 663 """Return an object that can be used to incrementally compute the
660 664 revision numbers of the ancestors of arbitrary sets that are not
661 665 ancestors of common. This is an ancestor.incrementalmissingancestors
662 666 object.
663 667
664 668 'common' is a list of revision numbers. If common is not supplied, uses
665 669 nullrev.
666 670 """
667 671 if common is None:
668 672 common = [nullrev]
669 673
670 674 return ancestor.incrementalmissingancestors(self.parentrevs, common)
671 675
672 676 def findmissingrevs(self, common=None, heads=None):
673 677 """Return the revision numbers of the ancestors of heads that
674 678 are not ancestors of common.
675 679
676 680 More specifically, return a list of revision numbers corresponding to
677 681 nodes N such that every N satisfies the following constraints:
678 682
679 683 1. N is an ancestor of some node in 'heads'
680 684 2. N is not an ancestor of any node in 'common'
681 685
682 686 The list is sorted by revision number, meaning it is
683 687 topologically sorted.
684 688
685 689 'heads' and 'common' are both lists of revision numbers. If heads is
686 690 not supplied, uses all of the revlog's heads. If common is not
687 691 supplied, uses nullid."""
688 692 if common is None:
689 693 common = [nullrev]
690 694 if heads is None:
691 695 heads = self.headrevs()
692 696
693 697 inc = self.incrementalmissingrevs(common=common)
694 698 return inc.missingancestors(heads)
695 699
696 700 def findmissing(self, common=None, heads=None):
697 701 """Return the ancestors of heads that are not ancestors of common.
698 702
699 703 More specifically, return a list of nodes N such that every N
700 704 satisfies the following constraints:
701 705
702 706 1. N is an ancestor of some node in 'heads'
703 707 2. N is not an ancestor of any node in 'common'
704 708
705 709 The list is sorted by revision number, meaning it is
706 710 topologically sorted.
707 711
708 712 'heads' and 'common' are both lists of node IDs. If heads is
709 713 not supplied, uses all of the revlog's heads. If common is not
710 714 supplied, uses nullid."""
711 715 if common is None:
712 716 common = [nullid]
713 717 if heads is None:
714 718 heads = self.heads()
715 719
716 720 common = [self.rev(n) for n in common]
717 721 heads = [self.rev(n) for n in heads]
718 722
719 723 inc = self.incrementalmissingrevs(common=common)
720 724 return [self.node(r) for r in inc.missingancestors(heads)]
721 725
722 726 def nodesbetween(self, roots=None, heads=None):
723 727 """Return a topological path from 'roots' to 'heads'.
724 728
725 729 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
726 730 topologically sorted list of all nodes N that satisfy both of
727 731 these constraints:
728 732
729 733 1. N is a descendant of some node in 'roots'
730 734 2. N is an ancestor of some node in 'heads'
731 735
732 736 Every node is considered to be both a descendant and an ancestor
733 737 of itself, so every reachable node in 'roots' and 'heads' will be
734 738 included in 'nodes'.
735 739
736 740 'outroots' is the list of reachable nodes in 'roots', i.e., the
737 741 subset of 'roots' that is returned in 'nodes'. Likewise,
738 742 'outheads' is the subset of 'heads' that is also in 'nodes'.
739 743
740 744 'roots' and 'heads' are both lists of node IDs. If 'roots' is
741 745 unspecified, uses nullid as the only root. If 'heads' is
742 746 unspecified, uses list of all of the revlog's heads."""
743 747 nonodes = ([], [], [])
744 748 if roots is not None:
745 749 roots = list(roots)
746 750 if not roots:
747 751 return nonodes
748 752 lowestrev = min([self.rev(n) for n in roots])
749 753 else:
750 754 roots = [nullid] # Everybody's a descendant of nullid
751 755 lowestrev = nullrev
752 756 if (lowestrev == nullrev) and (heads is None):
753 757 # We want _all_ the nodes!
754 758 return ([self.node(r) for r in self], [nullid], list(self.heads()))
755 759 if heads is None:
756 760 # All nodes are ancestors, so the latest ancestor is the last
757 761 # node.
758 762 highestrev = len(self) - 1
759 763 # Set ancestors to None to signal that every node is an ancestor.
760 764 ancestors = None
761 765 # Set heads to an empty dictionary for later discovery of heads
762 766 heads = {}
763 767 else:
764 768 heads = list(heads)
765 769 if not heads:
766 770 return nonodes
767 771 ancestors = set()
768 772 # Turn heads into a dictionary so we can remove 'fake' heads.
769 773 # Also, later we will be using it to filter out the heads we can't
770 774 # find from roots.
771 775 heads = dict.fromkeys(heads, False)
772 776 # Start at the top and keep marking parents until we're done.
773 777 nodestotag = set(heads)
774 778 # Remember where the top was so we can use it as a limit later.
775 779 highestrev = max([self.rev(n) for n in nodestotag])
776 780 while nodestotag:
777 781 # grab a node to tag
778 782 n = nodestotag.pop()
779 783 # Never tag nullid
780 784 if n == nullid:
781 785 continue
782 786 # A node's revision number represents its place in a
783 787 # topologically sorted list of nodes.
784 788 r = self.rev(n)
785 789 if r >= lowestrev:
786 790 if n not in ancestors:
787 791 # If we are possibly a descendant of one of the roots
788 792 # and we haven't already been marked as an ancestor
789 793 ancestors.add(n) # Mark as ancestor
790 794 # Add non-nullid parents to list of nodes to tag.
791 795 nodestotag.update([p for p in self.parents(n) if
792 796 p != nullid])
793 797 elif n in heads: # We've seen it before, is it a fake head?
794 798 # So it is, real heads should not be the ancestors of
795 799 # any other heads.
796 800 heads.pop(n)
797 801 if not ancestors:
798 802 return nonodes
799 803 # Now that we have our set of ancestors, we want to remove any
800 804 # roots that are not ancestors.
801 805
802 806 # If one of the roots was nullid, everything is included anyway.
803 807 if lowestrev > nullrev:
804 808 # But, since we weren't, let's recompute the lowest rev to not
805 809 # include roots that aren't ancestors.
806 810
807 811 # Filter out roots that aren't ancestors of heads
808 812 roots = [root for root in roots if root in ancestors]
809 813 # Recompute the lowest revision
810 814 if roots:
811 815 lowestrev = min([self.rev(root) for root in roots])
812 816 else:
813 817 # No more roots? Return empty list
814 818 return nonodes
815 819 else:
816 820 # We are descending from nullid, and don't need to care about
817 821 # any other roots.
818 822 lowestrev = nullrev
819 823 roots = [nullid]
820 824 # Transform our roots list into a set.
821 825 descendants = set(roots)
822 826 # Also, keep the original roots so we can filter out roots that aren't
823 827 # 'real' roots (i.e. are descended from other roots).
824 828 roots = descendants.copy()
825 829 # Our topologically sorted list of output nodes.
826 830 orderedout = []
827 831 # Don't start at nullid since we don't want nullid in our output list,
828 832 # and if nullid shows up in descendants, empty parents will look like
829 833 # they're descendants.
830 834 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
831 835 n = self.node(r)
832 836 isdescendant = False
833 837 if lowestrev == nullrev: # Everybody is a descendant of nullid
834 838 isdescendant = True
835 839 elif n in descendants:
836 840 # n is already a descendant
837 841 isdescendant = True
838 842 # This check only needs to be done here because all the roots
839 843 # will start being marked is descendants before the loop.
840 844 if n in roots:
841 845 # If n was a root, check if it's a 'real' root.
842 846 p = tuple(self.parents(n))
843 847 # If any of its parents are descendants, it's not a root.
844 848 if (p[0] in descendants) or (p[1] in descendants):
845 849 roots.remove(n)
846 850 else:
847 851 p = tuple(self.parents(n))
848 852 # A node is a descendant if either of its parents are
849 853 # descendants. (We seeded the dependents list with the roots
850 854 # up there, remember?)
851 855 if (p[0] in descendants) or (p[1] in descendants):
852 856 descendants.add(n)
853 857 isdescendant = True
854 858 if isdescendant and ((ancestors is None) or (n in ancestors)):
855 859 # Only include nodes that are both descendants and ancestors.
856 860 orderedout.append(n)
857 861 if (ancestors is not None) and (n in heads):
858 862 # We're trying to figure out which heads are reachable
859 863 # from roots.
860 864 # Mark this head as having been reached
861 865 heads[n] = True
862 866 elif ancestors is None:
863 867 # Otherwise, we're trying to discover the heads.
864 868 # Assume this is a head because if it isn't, the next step
865 869 # will eventually remove it.
866 870 heads[n] = True
867 871 # But, obviously its parents aren't.
868 872 for p in self.parents(n):
869 873 heads.pop(p, None)
870 874 heads = [head for head, flag in heads.iteritems() if flag]
871 875 roots = list(roots)
872 876 assert orderedout
873 877 assert roots
874 878 assert heads
875 879 return (orderedout, roots, heads)
876 880
877 881 def headrevs(self):
878 882 try:
879 883 return self.index.headrevs()
880 884 except AttributeError:
881 885 return self._headrevs()
882 886
883 887 def computephases(self, roots):
884 888 return self.index.computephasesmapsets(roots)
885 889
886 890 def _headrevs(self):
887 891 count = len(self)
888 892 if not count:
889 893 return [nullrev]
890 894 # we won't iter over filtered rev so nobody is a head at start
891 895 ishead = [0] * (count + 1)
892 896 index = self.index
893 897 for r in self:
894 898 ishead[r] = 1 # I may be an head
895 899 e = index[r]
896 900 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
897 901 return [r for r, val in enumerate(ishead) if val]
898 902
899 903 def heads(self, start=None, stop=None):
900 904 """return the list of all nodes that have no children
901 905
902 906 if start is specified, only heads that are descendants of
903 907 start will be returned
904 908 if stop is specified, it will consider all the revs from stop
905 909 as if they had no children
906 910 """
907 911 if start is None and stop is None:
908 912 if not len(self):
909 913 return [nullid]
910 914 return [self.node(r) for r in self.headrevs()]
911 915
912 916 if start is None:
913 917 start = nullid
914 918 if stop is None:
915 919 stop = []
916 920 stoprevs = set([self.rev(n) for n in stop])
917 921 startrev = self.rev(start)
918 922 reachable = {startrev}
919 923 heads = {startrev}
920 924
921 925 parentrevs = self.parentrevs
922 926 for r in self.revs(start=startrev + 1):
923 927 for p in parentrevs(r):
924 928 if p in reachable:
925 929 if r not in stoprevs:
926 930 reachable.add(r)
927 931 heads.add(r)
928 932 if p in heads and p not in stoprevs:
929 933 heads.remove(p)
930 934
931 935 return [self.node(r) for r in heads]
932 936
933 937 def children(self, node):
934 938 """find the children of a given node"""
935 939 c = []
936 940 p = self.rev(node)
937 941 for r in self.revs(start=p + 1):
938 942 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
939 943 if prevs:
940 944 for pr in prevs:
941 945 if pr == p:
942 946 c.append(self.node(r))
943 947 elif p == nullrev:
944 948 c.append(self.node(r))
945 949 return c
946 950
947 951 def descendant(self, start, end):
948 952 if start == nullrev:
949 953 return True
950 954 for i in self.descendants([start]):
951 955 if i == end:
952 956 return True
953 957 elif i > end:
954 958 break
955 959 return False
956 960
957 961 def commonancestorsheads(self, a, b):
958 962 """calculate all the heads of the common ancestors of nodes a and b"""
959 963 a, b = self.rev(a), self.rev(b)
960 964 try:
961 965 ancs = self.index.commonancestorsheads(a, b)
962 966 except (AttributeError, OverflowError): # C implementation failed
963 967 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
964 968 return pycompat.maplist(self.node, ancs)
965 969
966 970 def isancestor(self, a, b):
967 971 """return True if node a is an ancestor of node b
968 972
969 973 The implementation of this is trivial but the use of
970 974 commonancestorsheads is not."""
971 975 return a in self.commonancestorsheads(a, b)
972 976
973 977 def ancestor(self, a, b):
974 978 """calculate the "best" common ancestor of nodes a and b"""
975 979
976 980 a, b = self.rev(a), self.rev(b)
977 981 try:
978 982 ancs = self.index.ancestors(a, b)
979 983 except (AttributeError, OverflowError):
980 984 ancs = ancestor.ancestors(self.parentrevs, a, b)
981 985 if ancs:
982 986 # choose a consistent winner when there's a tie
983 987 return min(map(self.node, ancs))
984 988 return nullid
985 989
986 990 def _match(self, id):
987 991 if isinstance(id, int):
988 992 # rev
989 993 return self.node(id)
990 994 if len(id) == 20:
991 995 # possibly a binary node
992 996 # odds of a binary node being all hex in ASCII are 1 in 10**25
993 997 try:
994 998 node = id
995 999 self.rev(node) # quick search the index
996 1000 return node
997 1001 except LookupError:
998 1002 pass # may be partial hex id
999 1003 try:
1000 1004 # str(rev)
1001 1005 rev = int(id)
1002 1006 if str(rev) != id:
1003 1007 raise ValueError
1004 1008 if rev < 0:
1005 1009 rev = len(self) + rev
1006 1010 if rev < 0 or rev >= len(self):
1007 1011 raise ValueError
1008 1012 return self.node(rev)
1009 1013 except (ValueError, OverflowError):
1010 1014 pass
1011 1015 if len(id) == 40:
1012 1016 try:
1013 1017 # a full hex nodeid?
1014 1018 node = bin(id)
1015 1019 self.rev(node)
1016 1020 return node
1017 1021 except (TypeError, LookupError):
1018 1022 pass
1019 1023
1020 1024 def _partialmatch(self, id):
1021 1025 try:
1022 1026 partial = self.index.partialmatch(id)
1023 1027 if partial and self.hasnode(partial):
1024 1028 return partial
1025 1029 return None
1026 1030 except RevlogError:
1027 1031 # parsers.c radix tree lookup gave multiple matches
1028 1032 # fast path: for unfiltered changelog, radix tree is accurate
1029 1033 if not getattr(self, 'filteredrevs', None):
1030 1034 raise LookupError(id, self.indexfile,
1031 1035 _('ambiguous identifier'))
1032 1036 # fall through to slow path that filters hidden revisions
1033 1037 except (AttributeError, ValueError):
1034 1038 # we are pure python, or key was too short to search radix tree
1035 1039 pass
1036 1040
1037 1041 if id in self._pcache:
1038 1042 return self._pcache[id]
1039 1043
1040 1044 if len(id) < 40:
1041 1045 try:
1042 1046 # hex(node)[:...]
1043 1047 l = len(id) // 2 # grab an even number of digits
1044 1048 prefix = bin(id[:l * 2])
1045 1049 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1046 1050 nl = [n for n in nl if hex(n).startswith(id) and
1047 1051 self.hasnode(n)]
1048 1052 if len(nl) > 0:
1049 1053 if len(nl) == 1:
1050 1054 self._pcache[id] = nl[0]
1051 1055 return nl[0]
1052 1056 raise LookupError(id, self.indexfile,
1053 1057 _('ambiguous identifier'))
1054 1058 return None
1055 1059 except TypeError:
1056 1060 pass
1057 1061
1058 1062 def lookup(self, id):
1059 1063 """locate a node based on:
1060 1064 - revision number or str(revision number)
1061 1065 - nodeid or subset of hex nodeid
1062 1066 """
1063 1067 n = self._match(id)
1064 1068 if n is not None:
1065 1069 return n
1066 1070 n = self._partialmatch(id)
1067 1071 if n:
1068 1072 return n
1069 1073
1070 1074 raise LookupError(id, self.indexfile, _('no match found'))
1071 1075
1072 1076 def cmp(self, node, text):
1073 1077 """compare text with a given file revision
1074 1078
1075 1079 returns True if text is different than what is stored.
1076 1080 """
1077 1081 p1, p2 = self.parents(node)
1078 1082 return hash(text, p1, p2) != node
1079 1083
1080 1084 def _cachesegment(self, offset, data):
1081 1085 """Add a segment to the revlog cache.
1082 1086
1083 1087 Accepts an absolute offset and the data that is at that location.
1084 1088 """
1085 1089 o, d = self._chunkcache
1086 1090 # try to add to existing cache
1087 1091 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1088 1092 self._chunkcache = o, d + data
1089 1093 else:
1090 1094 self._chunkcache = offset, data
1091 1095
1092 1096 def _readsegment(self, offset, length, df=None):
1093 1097 """Load a segment of raw data from the revlog.
1094 1098
1095 1099 Accepts an absolute offset, length to read, and an optional existing
1096 1100 file handle to read from.
1097 1101
1098 1102 If an existing file handle is passed, it will be seeked and the
1099 1103 original seek position will NOT be restored.
1100 1104
1101 1105 Returns a str or buffer of raw byte data.
1102 1106 """
1103 1107 if df is not None:
1104 1108 closehandle = False
1105 1109 else:
1106 1110 if self._inline:
1107 1111 df = self.opener(self.indexfile)
1108 1112 else:
1109 1113 df = self.opener(self.datafile)
1110 1114 closehandle = True
1111 1115
1112 1116 # Cache data both forward and backward around the requested
1113 1117 # data, in a fixed size window. This helps speed up operations
1114 1118 # involving reading the revlog backwards.
1115 1119 cachesize = self._chunkcachesize
1116 1120 realoffset = offset & ~(cachesize - 1)
1117 1121 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1118 1122 - realoffset)
1119 1123 df.seek(realoffset)
1120 1124 d = df.read(reallength)
1121 1125 if closehandle:
1122 1126 df.close()
1123 1127 self._cachesegment(realoffset, d)
1124 1128 if offset != realoffset or reallength != length:
1125 1129 return util.buffer(d, offset - realoffset, length)
1126 1130 return d
1127 1131
1128 1132 def _getsegment(self, offset, length, df=None):
1129 1133 """Obtain a segment of raw data from the revlog.
1130 1134
1131 1135 Accepts an absolute offset, length of bytes to obtain, and an
1132 1136 optional file handle to the already-opened revlog. If the file
1133 1137 handle is used, it's original seek position will not be preserved.
1134 1138
1135 1139 Requests for data may be returned from a cache.
1136 1140
1137 1141 Returns a str or a buffer instance of raw byte data.
1138 1142 """
1139 1143 o, d = self._chunkcache
1140 1144 l = len(d)
1141 1145
1142 1146 # is it in the cache?
1143 1147 cachestart = offset - o
1144 1148 cacheend = cachestart + length
1145 1149 if cachestart >= 0 and cacheend <= l:
1146 1150 if cachestart == 0 and cacheend == l:
1147 1151 return d # avoid a copy
1148 1152 return util.buffer(d, cachestart, cacheend - cachestart)
1149 1153
1150 1154 return self._readsegment(offset, length, df=df)
1151 1155
1152 1156 def _getsegmentforrevs(self, startrev, endrev, df=None):
1153 1157 """Obtain a segment of raw data corresponding to a range of revisions.
1154 1158
1155 1159 Accepts the start and end revisions and an optional already-open
1156 1160 file handle to be used for reading. If the file handle is read, its
1157 1161 seek position will not be preserved.
1158 1162
1159 1163 Requests for data may be satisfied by a cache.
1160 1164
1161 1165 Returns a 2-tuple of (offset, data) for the requested range of
1162 1166 revisions. Offset is the integer offset from the beginning of the
1163 1167 revlog and data is a str or buffer of the raw byte data.
1164 1168
1165 1169 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1166 1170 to determine where each revision's data begins and ends.
1167 1171 """
1168 1172 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1169 1173 # (functions are expensive).
1170 1174 index = self.index
1171 1175 istart = index[startrev]
1172 1176 start = int(istart[0] >> 16)
1173 1177 if startrev == endrev:
1174 1178 end = start + istart[1]
1175 1179 else:
1176 1180 iend = index[endrev]
1177 1181 end = int(iend[0] >> 16) + iend[1]
1178 1182
1179 1183 if self._inline:
1180 1184 start += (startrev + 1) * self._io.size
1181 1185 end += (endrev + 1) * self._io.size
1182 1186 length = end - start
1183 1187
1184 1188 return start, self._getsegment(start, length, df=df)
1185 1189
1186 1190 def _chunk(self, rev, df=None):
1187 1191 """Obtain a single decompressed chunk for a revision.
1188 1192
1189 1193 Accepts an integer revision and an optional already-open file handle
1190 1194 to be used for reading. If used, the seek position of the file will not
1191 1195 be preserved.
1192 1196
1193 1197 Returns a str holding uncompressed data for the requested revision.
1194 1198 """
1195 1199 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1196 1200
1197 1201 def _chunks(self, revs, df=None):
1198 1202 """Obtain decompressed chunks for the specified revisions.
1199 1203
1200 1204 Accepts an iterable of numeric revisions that are assumed to be in
1201 1205 ascending order. Also accepts an optional already-open file handle
1202 1206 to be used for reading. If used, the seek position of the file will
1203 1207 not be preserved.
1204 1208
1205 1209 This function is similar to calling ``self._chunk()`` multiple times,
1206 1210 but is faster.
1207 1211
1208 1212 Returns a list with decompressed data for each requested revision.
1209 1213 """
1210 1214 if not revs:
1211 1215 return []
1212 1216 start = self.start
1213 1217 length = self.length
1214 1218 inline = self._inline
1215 1219 iosize = self._io.size
1216 1220 buffer = util.buffer
1217 1221
1218 1222 l = []
1219 1223 ladd = l.append
1220 1224
1221 1225 try:
1222 1226 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1223 1227 except OverflowError:
1224 1228 # issue4215 - we can't cache a run of chunks greater than
1225 1229 # 2G on Windows
1226 1230 return [self._chunk(rev, df=df) for rev in revs]
1227 1231
1228 1232 decomp = self.decompress
1229 1233 for rev in revs:
1230 1234 chunkstart = start(rev)
1231 1235 if inline:
1232 1236 chunkstart += (rev + 1) * iosize
1233 1237 chunklength = length(rev)
1234 1238 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1235 1239
1236 1240 return l
1237 1241
1238 1242 def _chunkclear(self):
1239 1243 """Clear the raw chunk cache."""
1240 1244 self._chunkcache = (0, '')
1241 1245
1242 1246 def deltaparent(self, rev):
1243 1247 """return deltaparent of the given revision"""
1244 1248 base = self.index[rev][3]
1245 1249 if base == rev:
1246 1250 return nullrev
1247 1251 elif self._generaldelta:
1248 1252 return base
1249 1253 else:
1250 1254 return rev - 1
1251 1255
1252 1256 def revdiff(self, rev1, rev2):
1253 1257 """return or calculate a delta between two revisions
1254 1258
1255 1259 The delta calculated is in binary form and is intended to be written to
1256 1260 revlog data directly. So this function needs raw revision data.
1257 1261 """
1258 1262 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1259 1263 return bytes(self._chunk(rev2))
1260 1264
1261 1265 return mdiff.textdiff(self.revision(rev1, raw=True),
1262 1266 self.revision(rev2, raw=True))
1263 1267
1264 1268 def revision(self, nodeorrev, _df=None, raw=False):
1265 1269 """return an uncompressed revision of a given node or revision
1266 1270 number.
1267 1271
1268 1272 _df - an existing file handle to read from. (internal-only)
1269 1273 raw - an optional argument specifying if the revision data is to be
1270 1274 treated as raw data when applying flag transforms. 'raw' should be set
1271 1275 to True when generating changegroups or in debug commands.
1272 1276 """
1273 1277 if isinstance(nodeorrev, int):
1274 1278 rev = nodeorrev
1275 1279 node = self.node(rev)
1276 1280 else:
1277 1281 node = nodeorrev
1278 1282 rev = None
1279 1283
1280 1284 cachedrev = None
1281 1285 flags = None
1282 1286 rawtext = None
1283 1287 if node == nullid:
1284 1288 return ""
1285 1289 if self._cache:
1286 1290 if self._cache[0] == node:
1287 1291 # _cache only stores rawtext
1288 1292 if raw:
1289 1293 return self._cache[2]
1290 1294 # duplicated, but good for perf
1291 1295 if rev is None:
1292 1296 rev = self.rev(node)
1293 1297 if flags is None:
1294 1298 flags = self.flags(rev)
1295 1299 # no extra flags set, no flag processor runs, text = rawtext
1296 1300 if flags == REVIDX_DEFAULT_FLAGS:
1297 1301 return self._cache[2]
1298 1302 # rawtext is reusable. need to run flag processor
1299 1303 rawtext = self._cache[2]
1300 1304
1301 1305 cachedrev = self._cache[1]
1302 1306
1303 1307 # look up what we need to read
1304 1308 if rawtext is None:
1305 1309 if rev is None:
1306 1310 rev = self.rev(node)
1307 1311
1308 1312 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1309 1313 if stopped:
1310 1314 rawtext = self._cache[2]
1311 1315
1312 1316 # drop cache to save memory
1313 1317 self._cache = None
1314 1318
1315 1319 bins = self._chunks(chain, df=_df)
1316 1320 if rawtext is None:
1317 1321 rawtext = bytes(bins[0])
1318 1322 bins = bins[1:]
1319 1323
1320 1324 rawtext = mdiff.patches(rawtext, bins)
1321 1325 self._cache = (node, rev, rawtext)
1322 1326
1323 1327 if flags is None:
1324 1328 if rev is None:
1325 1329 rev = self.rev(node)
1326 1330 flags = self.flags(rev)
1327 1331
1328 1332 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1329 1333 if validatehash:
1330 1334 self.checkhash(text, node, rev=rev)
1331 1335
1332 1336 return text
1333 1337
1334 1338 def hash(self, text, p1, p2):
1335 1339 """Compute a node hash.
1336 1340
1337 1341 Available as a function so that subclasses can replace the hash
1338 1342 as needed.
1339 1343 """
1340 1344 return hash(text, p1, p2)
1341 1345
1342 1346 def _processflags(self, text, flags, operation, raw=False):
1343 1347 """Inspect revision data flags and applies transforms defined by
1344 1348 registered flag processors.
1345 1349
1346 1350 ``text`` - the revision data to process
1347 1351 ``flags`` - the revision flags
1348 1352 ``operation`` - the operation being performed (read or write)
1349 1353 ``raw`` - an optional argument describing if the raw transform should be
1350 1354 applied.
1351 1355
1352 1356 This method processes the flags in the order (or reverse order if
1353 1357 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1354 1358 flag processors registered for present flags. The order of flags defined
1355 1359 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1356 1360
1357 1361 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1358 1362 processed text and ``validatehash`` is a bool indicating whether the
1359 1363 returned text should be checked for hash integrity.
1360 1364
1361 1365 Note: If the ``raw`` argument is set, it has precedence over the
1362 1366 operation and will only update the value of ``validatehash``.
1363 1367 """
1364 1368 # fast path: no flag processors will run
1365 1369 if flags == 0:
1366 1370 return text, True
1367 1371 if not operation in ('read', 'write'):
1368 1372 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1369 1373 # Check all flags are known.
1370 1374 if flags & ~REVIDX_KNOWN_FLAGS:
1371 1375 raise RevlogError(_("incompatible revision flag '%#x'") %
1372 1376 (flags & ~REVIDX_KNOWN_FLAGS))
1373 1377 validatehash = True
1374 1378 # Depending on the operation (read or write), the order might be
1375 1379 # reversed due to non-commutative transforms.
1376 1380 orderedflags = REVIDX_FLAGS_ORDER
1377 1381 if operation == 'write':
1378 1382 orderedflags = reversed(orderedflags)
1379 1383
1380 1384 for flag in orderedflags:
1381 1385 # If a flagprocessor has been registered for a known flag, apply the
1382 1386 # related operation transform and update result tuple.
1383 1387 if flag & flags:
1384 1388 vhash = True
1385 1389
1386 1390 if flag not in _flagprocessors:
1387 1391 message = _("missing processor for flag '%#x'") % (flag)
1388 1392 raise RevlogError(message)
1389 1393
1390 1394 processor = _flagprocessors[flag]
1391 1395 if processor is not None:
1392 1396 readtransform, writetransform, rawtransform = processor
1393 1397
1394 1398 if raw:
1395 1399 vhash = rawtransform(self, text)
1396 1400 elif operation == 'read':
1397 1401 text, vhash = readtransform(self, text)
1398 1402 else: # write operation
1399 1403 text, vhash = writetransform(self, text)
1400 1404 validatehash = validatehash and vhash
1401 1405
1402 1406 return text, validatehash
1403 1407
1404 1408 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1405 1409 """Check node hash integrity.
1406 1410
1407 1411 Available as a function so that subclasses can extend hash mismatch
1408 1412 behaviors as needed.
1409 1413 """
1410 1414 if p1 is None and p2 is None:
1411 1415 p1, p2 = self.parents(node)
1412 1416 if node != self.hash(text, p1, p2):
1413 1417 revornode = rev
1414 1418 if revornode is None:
1415 1419 revornode = templatefilters.short(hex(node))
1416 1420 raise RevlogError(_("integrity check failed on %s:%s")
1417 1421 % (self.indexfile, revornode))
1418 1422
1419 1423 def checkinlinesize(self, tr, fp=None):
1420 1424 """Check if the revlog is too big for inline and convert if so.
1421 1425
1422 1426 This should be called after revisions are added to the revlog. If the
1423 1427 revlog has grown too large to be an inline revlog, it will convert it
1424 1428 to use multiple index and data files.
1425 1429 """
1426 1430 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1427 1431 return
1428 1432
1429 1433 trinfo = tr.find(self.indexfile)
1430 1434 if trinfo is None:
1431 1435 raise RevlogError(_("%s not found in the transaction")
1432 1436 % self.indexfile)
1433 1437
1434 1438 trindex = trinfo[2]
1435 1439 if trindex is not None:
1436 1440 dataoff = self.start(trindex)
1437 1441 else:
1438 1442 # revlog was stripped at start of transaction, use all leftover data
1439 1443 trindex = len(self) - 1
1440 1444 dataoff = self.end(-2)
1441 1445
1442 1446 tr.add(self.datafile, dataoff)
1443 1447
1444 1448 if fp:
1445 1449 fp.flush()
1446 1450 fp.close()
1447 1451
1448 1452 df = self.opener(self.datafile, 'w')
1449 1453 try:
1450 1454 for r in self:
1451 1455 df.write(self._getsegmentforrevs(r, r)[1])
1452 1456 finally:
1453 1457 df.close()
1454 1458
1455 1459 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1456 1460 checkambig=self._checkambig)
1457 1461 self.version &= ~FLAG_INLINE_DATA
1458 1462 self._inline = False
1459 1463 for i in self:
1460 1464 e = self._io.packentry(self.index[i], self.node, self.version, i)
1461 1465 fp.write(e)
1462 1466
1463 1467 # if we don't call close, the temp file will never replace the
1464 1468 # real index
1465 1469 fp.close()
1466 1470
1467 1471 tr.replace(self.indexfile, trindex * self._io.size)
1468 1472 self._chunkclear()
1469 1473
1470 1474 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1471 1475 node=None, flags=REVIDX_DEFAULT_FLAGS):
1472 1476 """add a revision to the log
1473 1477
1474 1478 text - the revision data to add
1475 1479 transaction - the transaction object used for rollback
1476 1480 link - the linkrev data to add
1477 1481 p1, p2 - the parent nodeids of the revision
1478 1482 cachedelta - an optional precomputed delta
1479 1483 node - nodeid of revision; typically node is not specified, and it is
1480 1484 computed by default as hash(text, p1, p2), however subclasses might
1481 1485 use different hashing method (and override checkhash() in such case)
1482 1486 flags - the known flags to set on the revision
1483 1487 """
1484 1488 if link == nullrev:
1485 1489 raise RevlogError(_("attempted to add linkrev -1 to %s")
1486 1490 % self.indexfile)
1487 1491
1488 1492 if flags:
1489 1493 node = node or self.hash(text, p1, p2)
1490 1494
1491 1495 rawtext, validatehash = self._processflags(text, flags, 'write')
1492 1496
1493 1497 # If the flag processor modifies the revision data, ignore any provided
1494 1498 # cachedelta.
1495 1499 if rawtext != text:
1496 1500 cachedelta = None
1497 1501
1498 1502 if len(rawtext) > _maxentrysize:
1499 1503 raise RevlogError(
1500 1504 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1501 1505 % (self.indexfile, len(rawtext)))
1502 1506
1503 1507 node = node or self.hash(rawtext, p1, p2)
1504 1508 if node in self.nodemap:
1505 1509 return node
1506 1510
1507 1511 if validatehash:
1508 1512 self.checkhash(rawtext, node, p1=p1, p2=p2)
1509 1513
1510 1514 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1511 1515 flags, cachedelta=cachedelta)
1512 1516
1513 1517 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1514 1518 cachedelta=None):
1515 1519 """add a raw revision with known flags, node and parents
1516 1520 useful when reusing a revision not stored in this revlog (ex: received
1517 1521 over wire, or read from an external bundle).
1518 1522 """
1519 1523 dfh = None
1520 1524 if not self._inline:
1521 1525 dfh = self.opener(self.datafile, "a+")
1522 1526 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1523 1527 try:
1524 1528 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1525 1529 flags, cachedelta, ifh, dfh)
1526 1530 finally:
1527 1531 if dfh:
1528 1532 dfh.close()
1529 1533 ifh.close()
1530 1534
1531 1535 def compress(self, data):
1532 1536 """Generate a possibly-compressed representation of data."""
1533 1537 if not data:
1534 1538 return '', data
1535 1539
1536 1540 compressed = self._compressor.compress(data)
1537 1541
1538 1542 if compressed:
1539 1543 # The revlog compressor added the header in the returned data.
1540 1544 return '', compressed
1541 1545
1542 1546 if data[0:1] == '\0':
1543 1547 return '', data
1544 1548 return 'u', data
1545 1549
1546 1550 def decompress(self, data):
1547 1551 """Decompress a revlog chunk.
1548 1552
1549 1553 The chunk is expected to begin with a header identifying the
1550 1554 format type so it can be routed to an appropriate decompressor.
1551 1555 """
1552 1556 if not data:
1553 1557 return data
1554 1558
1555 1559 # Revlogs are read much more frequently than they are written and many
1556 1560 # chunks only take microseconds to decompress, so performance is
1557 1561 # important here.
1558 1562 #
1559 1563 # We can make a few assumptions about revlogs:
1560 1564 #
1561 1565 # 1) the majority of chunks will be compressed (as opposed to inline
1562 1566 # raw data).
1563 1567 # 2) decompressing *any* data will likely by at least 10x slower than
1564 1568 # returning raw inline data.
1565 1569 # 3) we want to prioritize common and officially supported compression
1566 1570 # engines
1567 1571 #
1568 1572 # It follows that we want to optimize for "decompress compressed data
1569 1573 # when encoded with common and officially supported compression engines"
1570 1574 # case over "raw data" and "data encoded by less common or non-official
1571 1575 # compression engines." That is why we have the inline lookup first
1572 1576 # followed by the compengines lookup.
1573 1577 #
1574 1578 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1575 1579 # compressed chunks. And this matters for changelog and manifest reads.
1576 1580 t = data[0:1]
1577 1581
1578 1582 if t == 'x':
1579 1583 try:
1580 1584 return _zlibdecompress(data)
1581 1585 except zlib.error as e:
1582 1586 raise RevlogError(_('revlog decompress error: %s') % str(e))
1583 1587 # '\0' is more common than 'u' so it goes first.
1584 1588 elif t == '\0':
1585 1589 return data
1586 1590 elif t == 'u':
1587 1591 return util.buffer(data, 1)
1588 1592
1589 1593 try:
1590 1594 compressor = self._decompressors[t]
1591 1595 except KeyError:
1592 1596 try:
1593 1597 engine = util.compengines.forrevlogheader(t)
1594 1598 compressor = engine.revlogcompressor()
1595 1599 self._decompressors[t] = compressor
1596 1600 except KeyError:
1597 1601 raise RevlogError(_('unknown compression type %r') % t)
1598 1602
1599 1603 return compressor.decompress(data)
1600 1604
1601 1605 def _isgooddelta(self, d, textlen):
1602 1606 """Returns True if the given delta is good. Good means that it is within
1603 1607 the disk span, disk size, and chain length bounds that we know to be
1604 1608 performant."""
1605 1609 if d is None:
1606 1610 return False
1607 1611
1608 1612 # - 'dist' is the distance from the base revision -- bounding it limits
1609 1613 # the amount of I/O we need to do.
1610 1614 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1611 1615 # to apply -- bounding it limits the amount of CPU we consume.
1612 1616 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1613 1617 if (dist > textlen * 4 or l > textlen or
1614 1618 compresseddeltalen > textlen * 2 or
1615 1619 (self._maxchainlen and chainlen > self._maxchainlen)):
1616 1620 return False
1617 1621
1618 1622 return True
1619 1623
1620 1624 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1621 1625 cachedelta, ifh, dfh, alwayscache=False):
1622 1626 """internal function to add revisions to the log
1623 1627
1624 1628 see addrevision for argument descriptions.
1625 1629
1626 1630 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1627 1631
1628 1632 invariants:
1629 1633 - rawtext is optional (can be None); if not set, cachedelta must be set.
1630 1634 if both are set, they must correspond to each other.
1631 1635 """
1632 1636 btext = [rawtext]
1633 1637 def buildtext():
1634 1638 if btext[0] is not None:
1635 1639 return btext[0]
1636 1640 baserev = cachedelta[0]
1637 1641 delta = cachedelta[1]
1638 1642 # special case deltas which replace entire base; no need to decode
1639 1643 # base revision. this neatly avoids censored bases, which throw when
1640 1644 # they're decoded.
1641 1645 hlen = struct.calcsize(">lll")
1642 1646 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1643 1647 len(delta) - hlen):
1644 1648 btext[0] = delta[hlen:]
1645 1649 else:
1646 1650 if self._inline:
1647 1651 fh = ifh
1648 1652 else:
1649 1653 fh = dfh
1650 1654 basetext = self.revision(baserev, _df=fh, raw=True)
1651 1655 btext[0] = mdiff.patch(basetext, delta)
1652 1656
1653 1657 try:
1654 1658 res = self._processflags(btext[0], flags, 'read', raw=True)
1655 1659 btext[0], validatehash = res
1656 1660 if validatehash:
1657 1661 self.checkhash(btext[0], node, p1=p1, p2=p2)
1658 1662 if flags & REVIDX_ISCENSORED:
1659 1663 raise RevlogError(_('node %s is not censored') % node)
1660 1664 except CensoredNodeError:
1661 1665 # must pass the censored index flag to add censored revisions
1662 1666 if not flags & REVIDX_ISCENSORED:
1663 1667 raise
1664 1668 return btext[0]
1665 1669
1666 1670 def builddelta(rev):
1667 1671 # can we use the cached delta?
1668 1672 if cachedelta and cachedelta[0] == rev:
1669 1673 delta = cachedelta[1]
1670 1674 else:
1671 1675 t = buildtext()
1672 1676 if self.iscensored(rev):
1673 1677 # deltas based on a censored revision must replace the
1674 1678 # full content in one patch, so delta works everywhere
1675 1679 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1676 1680 delta = header + t
1677 1681 else:
1678 1682 if self._inline:
1679 1683 fh = ifh
1680 1684 else:
1681 1685 fh = dfh
1682 1686 ptext = self.revision(rev, _df=fh, raw=True)
1683 1687 delta = mdiff.textdiff(ptext, t)
1684 1688 header, data = self.compress(delta)
1685 1689 deltalen = len(header) + len(data)
1686 1690 chainbase = self.chainbase(rev)
1687 1691 dist = deltalen + offset - self.start(chainbase)
1688 1692 if self._generaldelta:
1689 1693 base = rev
1690 1694 else:
1691 1695 base = chainbase
1692 1696 chainlen, compresseddeltalen = self._chaininfo(rev)
1693 1697 chainlen += 1
1694 1698 compresseddeltalen += deltalen
1695 1699 return (dist, deltalen, (header, data), base,
1696 1700 chainbase, chainlen, compresseddeltalen)
1697 1701
1698 1702 curr = len(self)
1699 1703 prev = curr - 1
1700 1704 offset = self.end(prev)
1701 1705 delta = None
1702 1706 p1r, p2r = self.rev(p1), self.rev(p2)
1703 1707
1704 1708 # full versions are inserted when the needed deltas
1705 1709 # become comparable to the uncompressed text
1706 1710 if rawtext is None:
1707 1711 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1708 1712 cachedelta[1])
1709 1713 else:
1710 1714 textlen = len(rawtext)
1711 1715
1712 1716 # should we try to build a delta?
1713 1717 if prev != nullrev and self.storedeltachains:
1714 1718 tested = set()
1715 1719 # This condition is true most of the time when processing
1716 1720 # changegroup data into a generaldelta repo. The only time it
1717 1721 # isn't true is if this is the first revision in a delta chain
1718 1722 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1719 1723 if cachedelta and self._generaldelta and self._lazydeltabase:
1720 1724 # Assume what we received from the server is a good choice
1721 1725 # build delta will reuse the cache
1722 1726 candidatedelta = builddelta(cachedelta[0])
1723 1727 tested.add(cachedelta[0])
1724 1728 if self._isgooddelta(candidatedelta, textlen):
1725 1729 delta = candidatedelta
1726 1730 if delta is None and self._generaldelta:
1727 1731 # exclude already lazy tested base if any
1728 1732 parents = [p for p in (p1r, p2r)
1729 1733 if p != nullrev and p not in tested]
1730 1734 if parents and not self._aggressivemergedeltas:
1731 1735 # Pick whichever parent is closer to us (to minimize the
1732 1736 # chance of having to build a fulltext).
1733 1737 parents = [max(parents)]
1734 1738 tested.update(parents)
1735 1739 pdeltas = []
1736 1740 for p in parents:
1737 1741 pd = builddelta(p)
1738 1742 if self._isgooddelta(pd, textlen):
1739 1743 pdeltas.append(pd)
1740 1744 if pdeltas:
1741 1745 delta = min(pdeltas, key=lambda x: x[1])
1742 1746 if delta is None and prev not in tested:
1743 1747 # other approach failed try against prev to hopefully save us a
1744 1748 # fulltext.
1745 1749 candidatedelta = builddelta(prev)
1746 1750 if self._isgooddelta(candidatedelta, textlen):
1747 1751 delta = candidatedelta
1748 1752 if delta is not None:
1749 1753 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1750 1754 else:
1751 1755 rawtext = buildtext()
1752 1756 data = self.compress(rawtext)
1753 1757 l = len(data[1]) + len(data[0])
1754 1758 base = chainbase = curr
1755 1759
1756 1760 e = (offset_type(offset, flags), l, textlen,
1757 1761 base, link, p1r, p2r, node)
1758 1762 self.index.insert(-1, e)
1759 1763 self.nodemap[node] = curr
1760 1764
1761 1765 entry = self._io.packentry(e, self.node, self.version, curr)
1762 1766 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1763 1767
1764 1768 if alwayscache and rawtext is None:
1765 1769 rawtext = buildtext()
1766 1770
1767 1771 if type(rawtext) == str: # only accept immutable objects
1768 1772 self._cache = (node, curr, rawtext)
1769 1773 self._chainbasecache[curr] = chainbase
1770 1774 return node
1771 1775
1772 1776 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1773 1777 # Files opened in a+ mode have inconsistent behavior on various
1774 1778 # platforms. Windows requires that a file positioning call be made
1775 1779 # when the file handle transitions between reads and writes. See
1776 1780 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1777 1781 # platforms, Python or the platform itself can be buggy. Some versions
1778 1782 # of Solaris have been observed to not append at the end of the file
1779 1783 # if the file was seeked to before the end. See issue4943 for more.
1780 1784 #
1781 1785 # We work around this issue by inserting a seek() before writing.
1782 1786 # Note: This is likely not necessary on Python 3.
1783 1787 ifh.seek(0, os.SEEK_END)
1784 1788 if dfh:
1785 1789 dfh.seek(0, os.SEEK_END)
1786 1790
1787 1791 curr = len(self) - 1
1788 1792 if not self._inline:
1789 1793 transaction.add(self.datafile, offset)
1790 1794 transaction.add(self.indexfile, curr * len(entry))
1791 1795 if data[0]:
1792 1796 dfh.write(data[0])
1793 1797 dfh.write(data[1])
1794 1798 ifh.write(entry)
1795 1799 else:
1796 1800 offset += curr * self._io.size
1797 1801 transaction.add(self.indexfile, offset, curr)
1798 1802 ifh.write(entry)
1799 1803 ifh.write(data[0])
1800 1804 ifh.write(data[1])
1801 1805 self.checkinlinesize(transaction, ifh)
1802 1806
1803 1807 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1804 1808 """
1805 1809 add a delta group
1806 1810
1807 1811 given a set of deltas, add them to the revision log. the
1808 1812 first delta is against its parent, which should be in our
1809 1813 log, the rest are against the previous delta.
1810 1814
1811 1815 If ``addrevisioncb`` is defined, it will be called with arguments of
1812 1816 this revlog and the node that was added.
1813 1817 """
1814 1818
1815 1819 # track the base of the current delta log
1816 1820 content = []
1817 1821 node = None
1818 1822
1819 1823 r = len(self)
1820 1824 end = 0
1821 1825 if r:
1822 1826 end = self.end(r - 1)
1823 1827 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1824 1828 isize = r * self._io.size
1825 1829 if self._inline:
1826 1830 transaction.add(self.indexfile, end + isize, r)
1827 1831 dfh = None
1828 1832 else:
1829 1833 transaction.add(self.indexfile, isize, r)
1830 1834 transaction.add(self.datafile, end)
1831 1835 dfh = self.opener(self.datafile, "a+")
1832 1836 def flush():
1833 1837 if dfh:
1834 1838 dfh.flush()
1835 1839 ifh.flush()
1836 1840 try:
1837 1841 # loop through our set of deltas
1838 1842 chain = None
1839 1843 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1840 1844 node = chunkdata['node']
1841 1845 p1 = chunkdata['p1']
1842 1846 p2 = chunkdata['p2']
1843 1847 cs = chunkdata['cs']
1844 1848 deltabase = chunkdata['deltabase']
1845 1849 delta = chunkdata['delta']
1846 1850 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1847 1851
1848 1852 content.append(node)
1849 1853
1850 1854 link = linkmapper(cs)
1851 1855 if node in self.nodemap:
1852 1856 # this can happen if two branches make the same change
1853 1857 chain = node
1854 1858 continue
1855 1859
1856 1860 for p in (p1, p2):
1857 1861 if p not in self.nodemap:
1858 1862 raise LookupError(p, self.indexfile,
1859 1863 _('unknown parent'))
1860 1864
1861 1865 if deltabase not in self.nodemap:
1862 1866 raise LookupError(deltabase, self.indexfile,
1863 1867 _('unknown delta base'))
1864 1868
1865 1869 baserev = self.rev(deltabase)
1866 1870
1867 1871 if baserev != nullrev and self.iscensored(baserev):
1868 1872 # if base is censored, delta must be full replacement in a
1869 1873 # single patch operation
1870 1874 hlen = struct.calcsize(">lll")
1871 1875 oldlen = self.rawsize(baserev)
1872 1876 newlen = len(delta) - hlen
1873 1877 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1874 1878 raise error.CensoredBaseError(self.indexfile,
1875 1879 self.node(baserev))
1876 1880
1877 1881 if not flags and self._peek_iscensored(baserev, delta, flush):
1878 1882 flags |= REVIDX_ISCENSORED
1879 1883
1880 1884 # We assume consumers of addrevisioncb will want to retrieve
1881 1885 # the added revision, which will require a call to
1882 1886 # revision(). revision() will fast path if there is a cache
1883 1887 # hit. So, we tell _addrevision() to always cache in this case.
1884 1888 # We're only using addgroup() in the context of changegroup
1885 1889 # generation so the revision data can always be handled as raw
1886 1890 # by the flagprocessor.
1887 1891 chain = self._addrevision(node, None, transaction, link,
1888 1892 p1, p2, flags, (baserev, delta),
1889 1893 ifh, dfh,
1890 1894 alwayscache=bool(addrevisioncb))
1891 1895
1892 1896 if addrevisioncb:
1893 1897 addrevisioncb(self, chain)
1894 1898
1895 1899 if not dfh and not self._inline:
1896 1900 # addrevision switched from inline to conventional
1897 1901 # reopen the index
1898 1902 ifh.close()
1899 1903 dfh = self.opener(self.datafile, "a+")
1900 1904 ifh = self.opener(self.indexfile, "a+",
1901 1905 checkambig=self._checkambig)
1902 1906 finally:
1903 1907 if dfh:
1904 1908 dfh.close()
1905 1909 ifh.close()
1906 1910
1907 1911 return content
1908 1912
1909 1913 def iscensored(self, rev):
1910 1914 """Check if a file revision is censored."""
1911 1915 return False
1912 1916
1913 1917 def _peek_iscensored(self, baserev, delta, flush):
1914 1918 """Quickly check if a delta produces a censored revision."""
1915 1919 return False
1916 1920
1917 1921 def getstrippoint(self, minlink):
1918 1922 """find the minimum rev that must be stripped to strip the linkrev
1919 1923
1920 1924 Returns a tuple containing the minimum rev and a set of all revs that
1921 1925 have linkrevs that will be broken by this strip.
1922 1926 """
1923 1927 brokenrevs = set()
1924 1928 strippoint = len(self)
1925 1929
1926 1930 heads = {}
1927 1931 futurelargelinkrevs = set()
1928 1932 for head in self.headrevs():
1929 1933 headlinkrev = self.linkrev(head)
1930 1934 heads[head] = headlinkrev
1931 1935 if headlinkrev >= minlink:
1932 1936 futurelargelinkrevs.add(headlinkrev)
1933 1937
1934 1938 # This algorithm involves walking down the rev graph, starting at the
1935 1939 # heads. Since the revs are topologically sorted according to linkrev,
1936 1940 # once all head linkrevs are below the minlink, we know there are
1937 1941 # no more revs that could have a linkrev greater than minlink.
1938 1942 # So we can stop walking.
1939 1943 while futurelargelinkrevs:
1940 1944 strippoint -= 1
1941 1945 linkrev = heads.pop(strippoint)
1942 1946
1943 1947 if linkrev < minlink:
1944 1948 brokenrevs.add(strippoint)
1945 1949 else:
1946 1950 futurelargelinkrevs.remove(linkrev)
1947 1951
1948 1952 for p in self.parentrevs(strippoint):
1949 1953 if p != nullrev:
1950 1954 plinkrev = self.linkrev(p)
1951 1955 heads[p] = plinkrev
1952 1956 if plinkrev >= minlink:
1953 1957 futurelargelinkrevs.add(plinkrev)
1954 1958
1955 1959 return strippoint, brokenrevs
1956 1960
1957 1961 def strip(self, minlink, transaction):
1958 1962 """truncate the revlog on the first revision with a linkrev >= minlink
1959 1963
1960 1964 This function is called when we're stripping revision minlink and
1961 1965 its descendants from the repository.
1962 1966
1963 1967 We have to remove all revisions with linkrev >= minlink, because
1964 1968 the equivalent changelog revisions will be renumbered after the
1965 1969 strip.
1966 1970
1967 1971 So we truncate the revlog on the first of these revisions, and
1968 1972 trust that the caller has saved the revisions that shouldn't be
1969 1973 removed and that it'll re-add them after this truncation.
1970 1974 """
1971 1975 if len(self) == 0:
1972 1976 return
1973 1977
1974 1978 rev, _ = self.getstrippoint(minlink)
1975 1979 if rev == len(self):
1976 1980 return
1977 1981
1978 1982 # first truncate the files on disk
1979 1983 end = self.start(rev)
1980 1984 if not self._inline:
1981 1985 transaction.add(self.datafile, end)
1982 1986 end = rev * self._io.size
1983 1987 else:
1984 1988 end += rev * self._io.size
1985 1989
1986 1990 transaction.add(self.indexfile, end)
1987 1991
1988 1992 # then reset internal state in memory to forget those revisions
1989 1993 self._cache = None
1990 1994 self._chaininfocache = {}
1991 1995 self._chunkclear()
1992 1996 for x in xrange(rev, len(self)):
1993 1997 del self.nodemap[self.node(x)]
1994 1998
1995 1999 del self.index[rev:-1]
1996 2000
1997 2001 def checksize(self):
1998 2002 expected = 0
1999 2003 if len(self):
2000 2004 expected = max(0, self.end(len(self) - 1))
2001 2005
2002 2006 try:
2003 2007 f = self.opener(self.datafile)
2004 2008 f.seek(0, 2)
2005 2009 actual = f.tell()
2006 2010 f.close()
2007 2011 dd = actual - expected
2008 2012 except IOError as inst:
2009 2013 if inst.errno != errno.ENOENT:
2010 2014 raise
2011 2015 dd = 0
2012 2016
2013 2017 try:
2014 2018 f = self.opener(self.indexfile)
2015 2019 f.seek(0, 2)
2016 2020 actual = f.tell()
2017 2021 f.close()
2018 2022 s = self._io.size
2019 2023 i = max(0, actual // s)
2020 2024 di = actual - (i * s)
2021 2025 if self._inline:
2022 2026 databytes = 0
2023 2027 for r in self:
2024 2028 databytes += max(0, self.length(r))
2025 2029 dd = 0
2026 2030 di = actual - len(self) * s - databytes
2027 2031 except IOError as inst:
2028 2032 if inst.errno != errno.ENOENT:
2029 2033 raise
2030 2034 di = 0
2031 2035
2032 2036 return (dd, di)
2033 2037
2034 2038 def files(self):
2035 2039 res = [self.indexfile]
2036 2040 if not self._inline:
2037 2041 res.append(self.datafile)
2038 2042 return res
2039 2043
2040 2044 DELTAREUSEALWAYS = 'always'
2041 2045 DELTAREUSESAMEREVS = 'samerevs'
2042 2046 DELTAREUSENEVER = 'never'
2043 2047
2044 2048 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2045 2049
2046 2050 def clone(self, tr, destrevlog, addrevisioncb=None,
2047 2051 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2048 2052 """Copy this revlog to another, possibly with format changes.
2049 2053
2050 2054 The destination revlog will contain the same revisions and nodes.
2051 2055 However, it may not be bit-for-bit identical due to e.g. delta encoding
2052 2056 differences.
2053 2057
2054 2058 The ``deltareuse`` argument control how deltas from the existing revlog
2055 2059 are preserved in the destination revlog. The argument can have the
2056 2060 following values:
2057 2061
2058 2062 DELTAREUSEALWAYS
2059 2063 Deltas will always be reused (if possible), even if the destination
2060 2064 revlog would not select the same revisions for the delta. This is the
2061 2065 fastest mode of operation.
2062 2066 DELTAREUSESAMEREVS
2063 2067 Deltas will be reused if the destination revlog would pick the same
2064 2068 revisions for the delta. This mode strikes a balance between speed
2065 2069 and optimization.
2066 2070 DELTAREUSENEVER
2067 2071 Deltas will never be reused. This is the slowest mode of execution.
2068 2072 This mode can be used to recompute deltas (e.g. if the diff/delta
2069 2073 algorithm changes).
2070 2074
2071 2075 Delta computation can be slow, so the choice of delta reuse policy can
2072 2076 significantly affect run time.
2073 2077
2074 2078 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2075 2079 two extremes. Deltas will be reused if they are appropriate. But if the
2076 2080 delta could choose a better revision, it will do so. This means if you
2077 2081 are converting a non-generaldelta revlog to a generaldelta revlog,
2078 2082 deltas will be recomputed if the delta's parent isn't a parent of the
2079 2083 revision.
2080 2084
2081 2085 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2082 2086 controls whether to compute deltas against both parents for merges.
2083 2087 By default, the current default is used.
2084 2088 """
2085 2089 if deltareuse not in self.DELTAREUSEALL:
2086 2090 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2087 2091
2088 2092 if len(destrevlog):
2089 2093 raise ValueError(_('destination revlog is not empty'))
2090 2094
2091 2095 if getattr(self, 'filteredrevs', None):
2092 2096 raise ValueError(_('source revlog has filtered revisions'))
2093 2097 if getattr(destrevlog, 'filteredrevs', None):
2094 2098 raise ValueError(_('destination revlog has filtered revisions'))
2095 2099
2096 2100 # lazydeltabase controls whether to reuse a cached delta, if possible.
2097 2101 oldlazydeltabase = destrevlog._lazydeltabase
2098 2102 oldamd = destrevlog._aggressivemergedeltas
2099 2103
2100 2104 try:
2101 2105 if deltareuse == self.DELTAREUSEALWAYS:
2102 2106 destrevlog._lazydeltabase = True
2103 2107 elif deltareuse == self.DELTAREUSESAMEREVS:
2104 2108 destrevlog._lazydeltabase = False
2105 2109
2106 2110 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2107 2111
2108 2112 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2109 2113 self.DELTAREUSESAMEREVS)
2110 2114
2111 2115 index = self.index
2112 2116 for rev in self:
2113 2117 entry = index[rev]
2114 2118
2115 2119 # Some classes override linkrev to take filtered revs into
2116 2120 # account. Use raw entry from index.
2117 2121 flags = entry[0] & 0xffff
2118 2122 linkrev = entry[4]
2119 2123 p1 = index[entry[5]][7]
2120 2124 p2 = index[entry[6]][7]
2121 2125 node = entry[7]
2122 2126
2123 2127 # (Possibly) reuse the delta from the revlog if allowed and
2124 2128 # the revlog chunk is a delta.
2125 2129 cachedelta = None
2126 2130 rawtext = None
2127 2131 if populatecachedelta:
2128 2132 dp = self.deltaparent(rev)
2129 2133 if dp != nullrev:
2130 2134 cachedelta = (dp, str(self._chunk(rev)))
2131 2135
2132 2136 if not cachedelta:
2133 2137 rawtext = self.revision(rev, raw=True)
2134 2138
2135 2139 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2136 2140 checkambig=False)
2137 2141 dfh = None
2138 2142 if not destrevlog._inline:
2139 2143 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2140 2144 try:
2141 2145 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2142 2146 flags, cachedelta, ifh, dfh)
2143 2147 finally:
2144 2148 if dfh:
2145 2149 dfh.close()
2146 2150 ifh.close()
2147 2151
2148 2152 if addrevisioncb:
2149 2153 addrevisioncb(self, rev, node)
2150 2154 finally:
2151 2155 destrevlog._lazydeltabase = oldlazydeltabase
2152 2156 destrevlog._aggressivemergedeltas = oldamd
@@ -1,72 +1,72 b''
1 1 $ hg init t
2 2 $ cd t
3 3 $ echo a > a
4 4 $ hg add a
5 5 $ hg commit -m test
6 6 $ rm .hg/requires
7 7 $ hg tip
8 abort: index 00changelog.i unknown format 2!
8 abort: unknown version (2) in revlog 00changelog.i!
9 9 [255]
10 10 $ echo indoor-pool > .hg/requires
11 11 $ hg tip
12 12 abort: repository requires features unknown to this Mercurial: indoor-pool!
13 13 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
14 14 [255]
15 15 $ echo outdoor-pool >> .hg/requires
16 16 $ hg tip
17 17 abort: repository requires features unknown to this Mercurial: indoor-pool outdoor-pool!
18 18 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
19 19 [255]
20 20 $ cd ..
21 21
22 22 Test checking between features supported locally and ones required in
23 23 another repository of push/pull/clone on localhost:
24 24
25 25 $ mkdir supported-locally
26 26 $ cd supported-locally
27 27
28 28 $ hg init supported
29 29 $ echo a > supported/a
30 30 $ hg -R supported commit -Am '#0 at supported'
31 31 adding a
32 32
33 33 $ echo 'featuresetup-test' >> supported/.hg/requires
34 34 $ cat > $TESTTMP/supported-locally/supportlocally.py <<EOF
35 35 > from mercurial import localrepo, extensions
36 36 > def featuresetup(ui, supported):
37 37 > for name, module in extensions.extensions(ui):
38 38 > if __name__ == module.__name__:
39 39 > # support specific feature locally
40 40 > supported |= {'featuresetup-test'}
41 41 > return
42 42 > def uisetup(ui):
43 43 > localrepo.localrepository.featuresetupfuncs.add(featuresetup)
44 44 > EOF
45 45 $ cat > supported/.hg/hgrc <<EOF
46 46 > [extensions]
47 47 > # enable extension locally
48 48 > supportlocally = $TESTTMP/supported-locally/supportlocally.py
49 49 > EOF
50 50 $ hg -R supported status
51 51
52 52 $ hg init push-dst
53 53 $ hg -R supported push push-dst
54 54 pushing to push-dst
55 55 abort: required features are not supported in the destination: featuresetup-test
56 56 [255]
57 57
58 58 $ hg init pull-src
59 59 $ hg -R pull-src pull supported
60 60 pulling from supported
61 61 abort: required features are not supported in the destination: featuresetup-test
62 62 [255]
63 63
64 64 $ hg clone supported clone-dst
65 65 abort: repository requires features unknown to this Mercurial: featuresetup-test!
66 66 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
67 67 [255]
68 68 $ hg clone --pull supported clone-dst
69 69 abort: required features are not supported in the destination: featuresetup-test
70 70 [255]
71 71
72 72 $ cd ..
@@ -1,47 +1,47 b''
1 1 $ hg init empty-repo
2 2 $ cd empty-repo
3 3
4 4 Flags on revlog version 0 are rejected
5 5
6 6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
7 7 ... fh.write('\x00\x01\x00\x00')
8 8
9 9 $ hg log
10 abort: index 00changelog.i unknown flags 0x01 for format v0!
10 abort: unknown flags (0x01) in version 0 revlog 00changelog.i!
11 11 [255]
12 12
13 13 Unknown flags on revlog version 1 are rejected
14 14
15 15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
16 16 ... fh.write('\x00\x04\x00\x01')
17 17
18 18 $ hg log
19 abort: index 00changelog.i unknown flags 0x04 for revlogng!
19 abort: unknown flags (0x04) in version 1 revlog 00changelog.i!
20 20 [255]
21 21
22 22 Unknown version is rejected
23 23
24 24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
25 25 ... fh.write('\x00\x00\x00\x02')
26 26
27 27 $ hg log
28 abort: index 00changelog.i unknown format 2!
28 abort: unknown version (2) in revlog 00changelog.i!
29 29 [255]
30 30
31 31 $ cd ..
32 32
33 33 Test for CVE-2016-3630
34 34
35 35 $ hg init
36 36
37 37 >>> open("a.i", "w").write(
38 38 ... """eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
39 39 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA=="""
40 40 ... .decode("base64").decode("zlib"))
41 41
42 42 $ hg debugindex a.i
43 43 rev offset length delta linkrev nodeid p1 p2
44 44 0 0 19 -1 2 99e0332bd498 000000000000 000000000000
45 45 1 19 12 0 3 6674f57a23d8 99e0332bd498 000000000000
46 46 $ hg debugdata a.i 1 2>&1 | egrep 'Error:.*decoded'
47 47 (mercurial\.\w+\.mpatch\.)?mpatchError: patch cannot be decoded (re)
General Comments 0
You need to be logged in to leave comments. Login now