##// END OF EJS Templates
changegroup: remove changegroup dependency from revlog.addgroup...
Durham Goode -
r34147:c8b6ed51 default
parent child Browse files
Show More
@@ -1,980 +1,1005 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import tempfile
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
32 32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
33 33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
34 34
35 35 def readexactly(stream, n):
36 36 '''read n bytes from stream.read and abort if less was available'''
37 37 s = stream.read(n)
38 38 if len(s) < n:
39 39 raise error.Abort(_("stream ended unexpectedly"
40 40 " (got %d bytes, expected %d)")
41 41 % (len(s), n))
42 42 return s
43 43
44 44 def getchunk(stream):
45 45 """return the next chunk from stream as a string"""
46 46 d = readexactly(stream, 4)
47 47 l = struct.unpack(">l", d)[0]
48 48 if l <= 4:
49 49 if l:
50 50 raise error.Abort(_("invalid chunk length %d") % l)
51 51 return ""
52 52 return readexactly(stream, l - 4)
53 53
54 54 def chunkheader(length):
55 55 """return a changegroup chunk header (string)"""
56 56 return struct.pack(">l", length + 4)
57 57
58 58 def closechunk():
59 59 """return a changegroup chunk header (string) for a zero-length chunk"""
60 60 return struct.pack(">l", 0)
61 61
62 62 def writechunks(ui, chunks, filename, vfs=None):
63 63 """Write chunks to a file and return its filename.
64 64
65 65 The stream is assumed to be a bundle file.
66 66 Existing files will not be overwritten.
67 67 If no filename is specified, a temporary file is created.
68 68 """
69 69 fh = None
70 70 cleanup = None
71 71 try:
72 72 if filename:
73 73 if vfs:
74 74 fh = vfs.open(filename, "wb")
75 75 else:
76 76 # Increase default buffer size because default is usually
77 77 # small (4k is common on Linux).
78 78 fh = open(filename, "wb", 131072)
79 79 else:
80 80 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 81 fh = os.fdopen(fd, pycompat.sysstr("wb"))
82 82 cleanup = filename
83 83 for c in chunks:
84 84 fh.write(c)
85 85 cleanup = None
86 86 return filename
87 87 finally:
88 88 if fh is not None:
89 89 fh.close()
90 90 if cleanup is not None:
91 91 if filename and vfs:
92 92 vfs.unlink(cleanup)
93 93 else:
94 94 os.unlink(cleanup)
95 95
96 96 class cg1unpacker(object):
97 97 """Unpacker for cg1 changegroup streams.
98 98
99 99 A changegroup unpacker handles the framing of the revision data in
100 100 the wire format. Most consumers will want to use the apply()
101 101 method to add the changes from the changegroup to a repository.
102 102
103 103 If you're forwarding a changegroup unmodified to another consumer,
104 104 use getchunks(), which returns an iterator of changegroup
105 105 chunks. This is mostly useful for cases where you need to know the
106 106 data stream has ended by observing the end of the changegroup.
107 107
108 108 deltachunk() is useful only if you're applying delta data. Most
109 109 consumers should prefer apply() instead.
110 110
111 111 A few other public methods exist. Those are used only for
112 112 bundlerepo and some debug commands - their use is discouraged.
113 113 """
114 114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 115 deltaheadersize = struct.calcsize(deltaheader)
116 116 version = '01'
117 117 _grouplistcount = 1 # One list of files after the manifests
118 118
119 119 def __init__(self, fh, alg, extras=None):
120 120 if alg is None:
121 121 alg = 'UN'
122 122 if alg not in util.compengines.supportedbundletypes:
123 123 raise error.Abort(_('unknown stream compression type: %s')
124 124 % alg)
125 125 if alg == 'BZ':
126 126 alg = '_truncatedBZ'
127 127
128 128 compengine = util.compengines.forbundletype(alg)
129 129 self._stream = compengine.decompressorreader(fh)
130 130 self._type = alg
131 131 self.extras = extras or {}
132 132 self.callback = None
133 133
134 134 # These methods (compressed, read, seek, tell) all appear to only
135 135 # be used by bundlerepo, but it's a little hard to tell.
136 136 def compressed(self):
137 137 return self._type is not None and self._type != 'UN'
138 138 def read(self, l):
139 139 return self._stream.read(l)
140 140 def seek(self, pos):
141 141 return self._stream.seek(pos)
142 142 def tell(self):
143 143 return self._stream.tell()
144 144 def close(self):
145 145 return self._stream.close()
146 146
147 147 def _chunklength(self):
148 148 d = readexactly(self._stream, 4)
149 149 l = struct.unpack(">l", d)[0]
150 150 if l <= 4:
151 151 if l:
152 152 raise error.Abort(_("invalid chunk length %d") % l)
153 153 return 0
154 154 if self.callback:
155 155 self.callback()
156 156 return l - 4
157 157
158 158 def changelogheader(self):
159 159 """v10 does not have a changelog header chunk"""
160 160 return {}
161 161
162 162 def manifestheader(self):
163 163 """v10 does not have a manifest header chunk"""
164 164 return {}
165 165
166 166 def filelogheader(self):
167 167 """return the header of the filelogs chunk, v10 only has the filename"""
168 168 l = self._chunklength()
169 169 if not l:
170 170 return {}
171 171 fname = readexactly(self._stream, l)
172 172 return {'filename': fname}
173 173
174 174 def _deltaheader(self, headertuple, prevnode):
175 175 node, p1, p2, cs = headertuple
176 176 if prevnode is None:
177 177 deltabase = p1
178 178 else:
179 179 deltabase = prevnode
180 180 flags = 0
181 181 return node, p1, p2, deltabase, cs, flags
182 182
183 183 def deltachunk(self, prevnode):
184 184 l = self._chunklength()
185 185 if not l:
186 186 return {}
187 187 headerdata = readexactly(self._stream, self.deltaheadersize)
188 188 header = struct.unpack(self.deltaheader, headerdata)
189 189 delta = readexactly(self._stream, l - self.deltaheadersize)
190 190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 191 return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
192 192 'deltabase': deltabase, 'delta': delta, 'flags': flags}
193 193
194 194 def getchunks(self):
195 195 """returns all the chunks contains in the bundle
196 196
197 197 Used when you need to forward the binary stream to a file or another
198 198 network API. To do so, it parse the changegroup data, otherwise it will
199 199 block in case of sshrepo because it don't know the end of the stream.
200 200 """
201 201 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
202 202 # and a list of filelogs. For changegroup 3, we expect 4 parts:
203 203 # changelog, manifestlog, a list of tree manifestlogs, and a list of
204 204 # filelogs.
205 205 #
206 206 # Changelog and manifestlog parts are terminated with empty chunks. The
207 207 # tree and file parts are a list of entry sections. Each entry section
208 208 # is a series of chunks terminating in an empty chunk. The list of these
209 209 # entry sections is terminated in yet another empty chunk, so we know
210 210 # we've reached the end of the tree/file list when we reach an empty
211 211 # chunk that was proceeded by no non-empty chunks.
212 212
213 213 parts = 0
214 214 while parts < 2 + self._grouplistcount:
215 215 noentries = True
216 216 while True:
217 217 chunk = getchunk(self)
218 218 if not chunk:
219 219 # The first two empty chunks represent the end of the
220 220 # changelog and the manifestlog portions. The remaining
221 221 # empty chunks represent either A) the end of individual
222 222 # tree or file entries in the file list, or B) the end of
223 223 # the entire list. It's the end of the entire list if there
224 224 # were no entries (i.e. noentries is True).
225 225 if parts < 2:
226 226 parts += 1
227 227 elif noentries:
228 228 parts += 1
229 229 break
230 230 noentries = False
231 231 yield chunkheader(len(chunk))
232 232 pos = 0
233 233 while pos < len(chunk):
234 234 next = pos + 2**20
235 235 yield chunk[pos:next]
236 236 pos = next
237 237 yield closechunk()
238 238
239 239 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
240 240 # We know that we'll never have more manifests than we had
241 241 # changesets.
242 242 self.callback = prog(_('manifests'), numchanges)
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 repo.manifestlog._revlog.addgroup(self, revmap, trp)
248 deltas = self.deltaiter(revmap)
249 repo.manifestlog._revlog.addgroup(deltas, trp)
249 250 repo.ui.progress(_('manifests'), None)
250 251 self.callback = None
251 252
252 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
253 254 expectedtotal=None):
254 255 """Add the changegroup returned by source.read() to this repo.
255 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
256 257 the URL of the repo where this changegroup is coming from.
257 258
258 259 Return an integer summarizing the change to this repo:
259 260 - nothing changed or no source: 0
260 261 - more heads than before: 1+added heads (2..n)
261 262 - fewer heads than before: -1-removed heads (-2..-n)
262 263 - number of heads stays the same: 1
263 264 """
264 265 repo = repo.unfiltered()
265 266 def csmap(x):
266 267 repo.ui.debug("add changeset %s\n" % short(x))
267 268 return len(cl)
268 269
269 270 def revmap(x):
270 271 return cl.rev(x)
271 272
272 273 changesets = files = revisions = 0
273 274
274 275 try:
275 276 # The transaction may already carry source information. In this
276 277 # case we use the top level data. We overwrite the argument
277 278 # because we need to use the top level value (if they exist)
278 279 # in this function.
279 280 srctype = tr.hookargs.setdefault('source', srctype)
280 281 url = tr.hookargs.setdefault('url', url)
281 282 repo.hook('prechangegroup',
282 283 throw=True, **pycompat.strkwargs(tr.hookargs))
283 284
284 285 # write changelog data to temp files so concurrent readers
285 286 # will not see an inconsistent view
286 287 cl = repo.changelog
287 288 cl.delayupdate(tr)
288 289 oldheads = set(cl.heads())
289 290
290 291 trp = weakref.proxy(tr)
291 292 # pull off the changeset group
292 293 repo.ui.status(_("adding changesets\n"))
293 294 clstart = len(cl)
294 295 class prog(object):
295 296 def __init__(self, step, total):
296 297 self._step = step
297 298 self._total = total
298 299 self._count = 1
299 300 def __call__(self):
300 301 repo.ui.progress(self._step, self._count, unit=_('chunks'),
301 302 total=self._total)
302 303 self._count += 1
303 304 self.callback = prog(_('changesets'), expectedtotal)
304 305
305 306 efiles = set()
306 307 def onchangelog(cl, node):
307 308 efiles.update(cl.readfiles(node))
308 309
309 310 self.changelogheader()
310 cgnodes = cl.addgroup(self, csmap, trp, addrevisioncb=onchangelog)
311 deltas = self.deltaiter(csmap)
312 cgnodes = cl.addgroup(deltas, trp, addrevisioncb=onchangelog)
311 313 efiles = len(efiles)
312 314
313 315 if not cgnodes:
314 316 repo.ui.develwarn('applied empty changegroup',
315 317 config='empty-changegroup')
316 318 clend = len(cl)
317 319 changesets = clend - clstart
318 320 repo.ui.progress(_('changesets'), None)
319 321 self.callback = None
320 322
321 323 # pull off the manifest group
322 324 repo.ui.status(_("adding manifests\n"))
323 325 self._unpackmanifests(repo, revmap, trp, prog, changesets)
324 326
325 327 needfiles = {}
326 328 if repo.ui.configbool('server', 'validate'):
327 329 cl = repo.changelog
328 330 ml = repo.manifestlog
329 331 # validate incoming csets have their manifests
330 332 for cset in xrange(clstart, clend):
331 333 mfnode = cl.changelogrevision(cset).manifest
332 334 mfest = ml[mfnode].readdelta()
333 335 # store file cgnodes we must see
334 336 for f, n in mfest.iteritems():
335 337 needfiles.setdefault(f, set()).add(n)
336 338
337 339 # process the files
338 340 repo.ui.status(_("adding file changes\n"))
339 341 newrevs, newfiles = _addchangegroupfiles(
340 342 repo, self, revmap, trp, efiles, needfiles)
341 343 revisions += newrevs
342 344 files += newfiles
343 345
344 346 deltaheads = 0
345 347 if oldheads:
346 348 heads = cl.heads()
347 349 deltaheads = len(heads) - len(oldheads)
348 350 for h in heads:
349 351 if h not in oldheads and repo[h].closesbranch():
350 352 deltaheads -= 1
351 353 htext = ""
352 354 if deltaheads:
353 355 htext = _(" (%+d heads)") % deltaheads
354 356
355 357 repo.ui.status(_("added %d changesets"
356 358 " with %d changes to %d files%s\n")
357 359 % (changesets, revisions, files, htext))
358 360 repo.invalidatevolatilesets()
359 361
360 362 if changesets > 0:
361 363 if 'node' not in tr.hookargs:
362 364 tr.hookargs['node'] = hex(cl.node(clstart))
363 365 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
364 366 hookargs = dict(tr.hookargs)
365 367 else:
366 368 hookargs = dict(tr.hookargs)
367 369 hookargs['node'] = hex(cl.node(clstart))
368 370 hookargs['node_last'] = hex(cl.node(clend - 1))
369 371 repo.hook('pretxnchangegroup',
370 372 throw=True, **pycompat.strkwargs(hookargs))
371 373
372 374 added = [cl.node(r) for r in xrange(clstart, clend)]
373 375 phaseall = None
374 376 if srctype in ('push', 'serve'):
375 377 # Old servers can not push the boundary themselves.
376 378 # New servers won't push the boundary if changeset already
377 379 # exists locally as secret
378 380 #
379 381 # We should not use added here but the list of all change in
380 382 # the bundle
381 383 if repo.publishing():
382 384 targetphase = phaseall = phases.public
383 385 else:
384 386 # closer target phase computation
385 387
386 388 # Those changesets have been pushed from the
387 389 # outside, their phases are going to be pushed
388 390 # alongside. Therefor `targetphase` is
389 391 # ignored.
390 392 targetphase = phaseall = phases.draft
391 393 if added:
392 394 phases.registernew(repo, tr, targetphase, added)
393 395 if phaseall is not None:
394 396 phases.advanceboundary(repo, tr, phaseall, cgnodes)
395 397
396 398 if changesets > 0:
397 399
398 400 def runhooks():
399 401 # These hooks run when the lock releases, not when the
400 402 # transaction closes. So it's possible for the changelog
401 403 # to have changed since we last saw it.
402 404 if clstart >= len(repo):
403 405 return
404 406
405 407 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
406 408
407 409 for n in added:
408 410 args = hookargs.copy()
409 411 args['node'] = hex(n)
410 412 del args['node_last']
411 413 repo.hook("incoming", **pycompat.strkwargs(args))
412 414
413 415 newheads = [h for h in repo.heads()
414 416 if h not in oldheads]
415 417 repo.ui.log("incoming",
416 418 "%s incoming changes - new heads: %s\n",
417 419 len(added),
418 420 ', '.join([hex(c[:6]) for c in newheads]))
419 421
420 422 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
421 423 lambda tr: repo._afterlock(runhooks))
422 424 finally:
423 425 repo.ui.flush()
424 426 # never return 0 here:
425 427 if deltaheads < 0:
426 428 ret = deltaheads - 1
427 429 else:
428 430 ret = deltaheads + 1
429 431 return ret
430 432
433 def deltaiter(self, linkmapper):
434 """
435 returns an iterator of the deltas in this changegroup
436
437 Useful for passing to the underlying storage system to be stored.
438 """
439 chain = None
440 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
441 node = chunkdata['node']
442 p1 = chunkdata['p1']
443 p2 = chunkdata['p2']
444 cs = chunkdata['cs']
445 deltabase = chunkdata['deltabase']
446 delta = chunkdata['delta']
447 flags = chunkdata['flags']
448
449 link = linkmapper(cs)
450 chain = node
451
452 yield (node, p1, p2, link, deltabase, delta, flags)
453
431 454 class cg2unpacker(cg1unpacker):
432 455 """Unpacker for cg2 streams.
433 456
434 457 cg2 streams add support for generaldelta, so the delta header
435 458 format is slightly different. All other features about the data
436 459 remain the same.
437 460 """
438 461 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
439 462 deltaheadersize = struct.calcsize(deltaheader)
440 463 version = '02'
441 464
442 465 def _deltaheader(self, headertuple, prevnode):
443 466 node, p1, p2, deltabase, cs = headertuple
444 467 flags = 0
445 468 return node, p1, p2, deltabase, cs, flags
446 469
447 470 class cg3unpacker(cg2unpacker):
448 471 """Unpacker for cg3 streams.
449 472
450 473 cg3 streams add support for exchanging treemanifests and revlog
451 474 flags. It adds the revlog flags to the delta header and an empty chunk
452 475 separating manifests and files.
453 476 """
454 477 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
455 478 deltaheadersize = struct.calcsize(deltaheader)
456 479 version = '03'
457 480 _grouplistcount = 2 # One list of manifests and one list of files
458 481
459 482 def _deltaheader(self, headertuple, prevnode):
460 483 node, p1, p2, deltabase, cs, flags = headertuple
461 484 return node, p1, p2, deltabase, cs, flags
462 485
463 486 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
464 487 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
465 488 numchanges)
466 489 for chunkdata in iter(self.filelogheader, {}):
467 490 # If we get here, there are directory manifests in the changegroup
468 491 d = chunkdata["filename"]
469 492 repo.ui.debug("adding %s revisions\n" % d)
470 493 dirlog = repo.manifestlog._revlog.dirlog(d)
471 if not dirlog.addgroup(self, revmap, trp):
494 deltas = self.deltaiter(revmap)
495 if not dirlog.addgroup(deltas, trp):
472 496 raise error.Abort(_("received dir revlog group is empty"))
473 497
474 498 class headerlessfixup(object):
475 499 def __init__(self, fh, h):
476 500 self._h = h
477 501 self._fh = fh
478 502 def read(self, n):
479 503 if self._h:
480 504 d, self._h = self._h[:n], self._h[n:]
481 505 if len(d) < n:
482 506 d += readexactly(self._fh, n - len(d))
483 507 return d
484 508 return readexactly(self._fh, n)
485 509
486 510 class cg1packer(object):
487 511 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
488 512 version = '01'
489 513 def __init__(self, repo, bundlecaps=None):
490 514 """Given a source repo, construct a bundler.
491 515
492 516 bundlecaps is optional and can be used to specify the set of
493 517 capabilities which can be used to build the bundle. While bundlecaps is
494 518 unused in core Mercurial, extensions rely on this feature to communicate
495 519 capabilities to customize the changegroup packer.
496 520 """
497 521 # Set of capabilities we can use to build the bundle.
498 522 if bundlecaps is None:
499 523 bundlecaps = set()
500 524 self._bundlecaps = bundlecaps
501 525 # experimental config: bundle.reorder
502 526 reorder = repo.ui.config('bundle', 'reorder')
503 527 if reorder == 'auto':
504 528 reorder = None
505 529 else:
506 530 reorder = util.parsebool(reorder)
507 531 self._repo = repo
508 532 self._reorder = reorder
509 533 self._progress = repo.ui.progress
510 534 if self._repo.ui.verbose and not self._repo.ui.debugflag:
511 535 self._verbosenote = self._repo.ui.note
512 536 else:
513 537 self._verbosenote = lambda s: None
514 538
515 539 def close(self):
516 540 return closechunk()
517 541
518 542 def fileheader(self, fname):
519 543 return chunkheader(len(fname)) + fname
520 544
521 545 # Extracted both for clarity and for overriding in extensions.
522 546 def _sortgroup(self, revlog, nodelist, lookup):
523 547 """Sort nodes for change group and turn them into revnums."""
524 548 # for generaldelta revlogs, we linearize the revs; this will both be
525 549 # much quicker and generate a much smaller bundle
526 550 if (revlog._generaldelta and self._reorder is None) or self._reorder:
527 551 dag = dagutil.revlogdag(revlog)
528 552 return dag.linearize(set(revlog.rev(n) for n in nodelist))
529 553 else:
530 554 return sorted([revlog.rev(n) for n in nodelist])
531 555
532 556 def group(self, nodelist, revlog, lookup, units=None):
533 557 """Calculate a delta group, yielding a sequence of changegroup chunks
534 558 (strings).
535 559
536 560 Given a list of changeset revs, return a set of deltas and
537 561 metadata corresponding to nodes. The first delta is
538 562 first parent(nodelist[0]) -> nodelist[0], the receiver is
539 563 guaranteed to have this parent as it has all history before
540 564 these changesets. In the case firstparent is nullrev the
541 565 changegroup starts with a full revision.
542 566
543 567 If units is not None, progress detail will be generated, units specifies
544 568 the type of revlog that is touched (changelog, manifest, etc.).
545 569 """
546 570 # if we don't have any revisions touched by these changesets, bail
547 571 if len(nodelist) == 0:
548 572 yield self.close()
549 573 return
550 574
551 575 revs = self._sortgroup(revlog, nodelist, lookup)
552 576
553 577 # add the parent of the first rev
554 578 p = revlog.parentrevs(revs[0])[0]
555 579 revs.insert(0, p)
556 580
557 581 # build deltas
558 582 total = len(revs) - 1
559 583 msgbundling = _('bundling')
560 584 for r in xrange(len(revs) - 1):
561 585 if units is not None:
562 586 self._progress(msgbundling, r + 1, unit=units, total=total)
563 587 prev, curr = revs[r], revs[r + 1]
564 588 linknode = lookup(revlog.node(curr))
565 589 for c in self.revchunk(revlog, curr, prev, linknode):
566 590 yield c
567 591
568 592 if units is not None:
569 593 self._progress(msgbundling, None)
570 594 yield self.close()
571 595
572 596 # filter any nodes that claim to be part of the known set
573 597 def prune(self, revlog, missing, commonrevs):
574 598 rr, rl = revlog.rev, revlog.linkrev
575 599 return [n for n in missing if rl(rr(n)) not in commonrevs]
576 600
577 601 def _packmanifests(self, dir, mfnodes, lookuplinknode):
578 602 """Pack flat manifests into a changegroup stream."""
579 603 assert not dir
580 604 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
581 605 lookuplinknode, units=_('manifests')):
582 606 yield chunk
583 607
584 608 def _manifestsdone(self):
585 609 return ''
586 610
587 611 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
588 612 '''yield a sequence of changegroup chunks (strings)'''
589 613 repo = self._repo
590 614 cl = repo.changelog
591 615
592 616 clrevorder = {}
593 617 mfs = {} # needed manifests
594 618 fnodes = {} # needed file nodes
595 619 changedfiles = set()
596 620
597 621 # Callback for the changelog, used to collect changed files and manifest
598 622 # nodes.
599 623 # Returns the linkrev node (identity in the changelog case).
600 624 def lookupcl(x):
601 625 c = cl.read(x)
602 626 clrevorder[x] = len(clrevorder)
603 627 n = c[0]
604 628 # record the first changeset introducing this manifest version
605 629 mfs.setdefault(n, x)
606 630 # Record a complete list of potentially-changed files in
607 631 # this manifest.
608 632 changedfiles.update(c[3])
609 633 return x
610 634
611 635 self._verbosenote(_('uncompressed size of bundle content:\n'))
612 636 size = 0
613 637 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
614 638 size += len(chunk)
615 639 yield chunk
616 640 self._verbosenote(_('%8.i (changelog)\n') % size)
617 641
618 642 # We need to make sure that the linkrev in the changegroup refers to
619 643 # the first changeset that introduced the manifest or file revision.
620 644 # The fastpath is usually safer than the slowpath, because the filelogs
621 645 # are walked in revlog order.
622 646 #
623 647 # When taking the slowpath with reorder=None and the manifest revlog
624 648 # uses generaldelta, the manifest may be walked in the "wrong" order.
625 649 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
626 650 # cc0ff93d0c0c).
627 651 #
628 652 # When taking the fastpath, we are only vulnerable to reordering
629 653 # of the changelog itself. The changelog never uses generaldelta, so
630 654 # it is only reordered when reorder=True. To handle this case, we
631 655 # simply take the slowpath, which already has the 'clrevorder' logic.
632 656 # This was also fixed in cc0ff93d0c0c.
633 657 fastpathlinkrev = fastpathlinkrev and not self._reorder
634 658 # Treemanifests don't work correctly with fastpathlinkrev
635 659 # either, because we don't discover which directory nodes to
636 660 # send along with files. This could probably be fixed.
637 661 fastpathlinkrev = fastpathlinkrev and (
638 662 'treemanifest' not in repo.requirements)
639 663
640 664 for chunk in self.generatemanifests(commonrevs, clrevorder,
641 665 fastpathlinkrev, mfs, fnodes):
642 666 yield chunk
643 667 mfs.clear()
644 668 clrevs = set(cl.rev(x) for x in clnodes)
645 669
646 670 if not fastpathlinkrev:
647 671 def linknodes(unused, fname):
648 672 return fnodes.get(fname, {})
649 673 else:
650 674 cln = cl.node
651 675 def linknodes(filerevlog, fname):
652 676 llr = filerevlog.linkrev
653 677 fln = filerevlog.node
654 678 revs = ((r, llr(r)) for r in filerevlog)
655 679 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
656 680
657 681 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
658 682 source):
659 683 yield chunk
660 684
661 685 yield self.close()
662 686
663 687 if clnodes:
664 688 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
665 689
666 690 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
667 691 fnodes):
668 692 repo = self._repo
669 693 mfl = repo.manifestlog
670 694 dirlog = mfl._revlog.dirlog
671 695 tmfnodes = {'': mfs}
672 696
673 697 # Callback for the manifest, used to collect linkrevs for filelog
674 698 # revisions.
675 699 # Returns the linkrev node (collected in lookupcl).
676 700 def makelookupmflinknode(dir):
677 701 if fastpathlinkrev:
678 702 assert not dir
679 703 return mfs.__getitem__
680 704
681 705 def lookupmflinknode(x):
682 706 """Callback for looking up the linknode for manifests.
683 707
684 708 Returns the linkrev node for the specified manifest.
685 709
686 710 SIDE EFFECT:
687 711
688 712 1) fclnodes gets populated with the list of relevant
689 713 file nodes if we're not using fastpathlinkrev
690 714 2) When treemanifests are in use, collects treemanifest nodes
691 715 to send
692 716
693 717 Note that this means manifests must be completely sent to
694 718 the client before you can trust the list of files and
695 719 treemanifests to send.
696 720 """
697 721 clnode = tmfnodes[dir][x]
698 722 mdata = mfl.get(dir, x).readfast(shallow=True)
699 723 for p, n, fl in mdata.iterentries():
700 724 if fl == 't': # subdirectory manifest
701 725 subdir = dir + p + '/'
702 726 tmfclnodes = tmfnodes.setdefault(subdir, {})
703 727 tmfclnode = tmfclnodes.setdefault(n, clnode)
704 728 if clrevorder[clnode] < clrevorder[tmfclnode]:
705 729 tmfclnodes[n] = clnode
706 730 else:
707 731 f = dir + p
708 732 fclnodes = fnodes.setdefault(f, {})
709 733 fclnode = fclnodes.setdefault(n, clnode)
710 734 if clrevorder[clnode] < clrevorder[fclnode]:
711 735 fclnodes[n] = clnode
712 736 return clnode
713 737 return lookupmflinknode
714 738
715 739 size = 0
716 740 while tmfnodes:
717 741 dir = min(tmfnodes)
718 742 nodes = tmfnodes[dir]
719 743 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
720 744 if not dir or prunednodes:
721 745 for x in self._packmanifests(dir, prunednodes,
722 746 makelookupmflinknode(dir)):
723 747 size += len(x)
724 748 yield x
725 749 del tmfnodes[dir]
726 750 self._verbosenote(_('%8.i (manifests)\n') % size)
727 751 yield self._manifestsdone()
728 752
729 753 # The 'source' parameter is useful for extensions
730 754 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
731 755 repo = self._repo
732 756 progress = self._progress
733 757 msgbundling = _('bundling')
734 758
735 759 total = len(changedfiles)
736 760 # for progress output
737 761 msgfiles = _('files')
738 762 for i, fname in enumerate(sorted(changedfiles)):
739 763 filerevlog = repo.file(fname)
740 764 if not filerevlog:
741 765 raise error.Abort(_("empty or missing revlog for %s") % fname)
742 766
743 767 linkrevnodes = linknodes(filerevlog, fname)
744 768 # Lookup for filenodes, we collected the linkrev nodes above in the
745 769 # fastpath case and with lookupmf in the slowpath case.
746 770 def lookupfilelog(x):
747 771 return linkrevnodes[x]
748 772
749 773 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
750 774 if filenodes:
751 775 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
752 776 total=total)
753 777 h = self.fileheader(fname)
754 778 size = len(h)
755 779 yield h
756 780 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
757 781 size += len(chunk)
758 782 yield chunk
759 783 self._verbosenote(_('%8.i %s\n') % (size, fname))
760 784 progress(msgbundling, None)
761 785
762 786 def deltaparent(self, revlog, rev, p1, p2, prev):
763 787 return prev
764 788
765 789 def revchunk(self, revlog, rev, prev, linknode):
766 790 node = revlog.node(rev)
767 791 p1, p2 = revlog.parentrevs(rev)
768 792 base = self.deltaparent(revlog, rev, p1, p2, prev)
769 793
770 794 prefix = ''
771 795 if revlog.iscensored(base) or revlog.iscensored(rev):
772 796 try:
773 797 delta = revlog.revision(node, raw=True)
774 798 except error.CensoredNodeError as e:
775 799 delta = e.tombstone
776 800 if base == nullrev:
777 801 prefix = mdiff.trivialdiffheader(len(delta))
778 802 else:
779 803 baselen = revlog.rawsize(base)
780 804 prefix = mdiff.replacediffheader(baselen, len(delta))
781 805 elif base == nullrev:
782 806 delta = revlog.revision(node, raw=True)
783 807 prefix = mdiff.trivialdiffheader(len(delta))
784 808 else:
785 809 delta = revlog.revdiff(base, rev)
786 810 p1n, p2n = revlog.parents(node)
787 811 basenode = revlog.node(base)
788 812 flags = revlog.flags(rev)
789 813 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
790 814 meta += prefix
791 815 l = len(meta) + len(delta)
792 816 yield chunkheader(l)
793 817 yield meta
794 818 yield delta
795 819 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
796 820 # do nothing with basenode, it is implicitly the previous one in HG10
797 821 # do nothing with flags, it is implicitly 0 for cg1 and cg2
798 822 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
799 823
800 824 class cg2packer(cg1packer):
801 825 version = '02'
802 826 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
803 827
804 828 def __init__(self, repo, bundlecaps=None):
805 829 super(cg2packer, self).__init__(repo, bundlecaps)
806 830 if self._reorder is None:
807 831 # Since generaldelta is directly supported by cg2, reordering
808 832 # generally doesn't help, so we disable it by default (treating
809 833 # bundle.reorder=auto just like bundle.reorder=False).
810 834 self._reorder = False
811 835
812 836 def deltaparent(self, revlog, rev, p1, p2, prev):
813 837 dp = revlog.deltaparent(rev)
814 838 if dp == nullrev and revlog.storedeltachains:
815 839 # Avoid sending full revisions when delta parent is null. Pick prev
816 840 # in that case. It's tempting to pick p1 in this case, as p1 will
817 841 # be smaller in the common case. However, computing a delta against
818 842 # p1 may require resolving the raw text of p1, which could be
819 843 # expensive. The revlog caches should have prev cached, meaning
820 844 # less CPU for changegroup generation. There is likely room to add
821 845 # a flag and/or config option to control this behavior.
822 846 return prev
823 847 elif dp == nullrev:
824 848 # revlog is configured to use full snapshot for a reason,
825 849 # stick to full snapshot.
826 850 return nullrev
827 851 elif dp not in (p1, p2, prev):
828 852 # Pick prev when we can't be sure remote has the base revision.
829 853 return prev
830 854 else:
831 855 return dp
832 856
833 857 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
834 858 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
835 859 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
836 860
837 861 class cg3packer(cg2packer):
838 862 version = '03'
839 863 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
840 864
841 865 def _packmanifests(self, dir, mfnodes, lookuplinknode):
842 866 if dir:
843 867 yield self.fileheader(dir)
844 868
845 869 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
846 870 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
847 871 units=_('manifests')):
848 872 yield chunk
849 873
850 874 def _manifestsdone(self):
851 875 return self.close()
852 876
853 877 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
854 878 return struct.pack(
855 879 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
856 880
857 881 _packermap = {'01': (cg1packer, cg1unpacker),
858 882 # cg2 adds support for exchanging generaldelta
859 883 '02': (cg2packer, cg2unpacker),
860 884 # cg3 adds support for exchanging revlog flags and treemanifests
861 885 '03': (cg3packer, cg3unpacker),
862 886 }
863 887
864 888 def allsupportedversions(repo):
865 889 versions = set(_packermap.keys())
866 890 if not (repo.ui.configbool('experimental', 'changegroup3') or
867 891 repo.ui.configbool('experimental', 'treemanifest') or
868 892 'treemanifest' in repo.requirements):
869 893 versions.discard('03')
870 894 return versions
871 895
872 896 # Changegroup versions that can be applied to the repo
873 897 def supportedincomingversions(repo):
874 898 return allsupportedversions(repo)
875 899
876 900 # Changegroup versions that can be created from the repo
877 901 def supportedoutgoingversions(repo):
878 902 versions = allsupportedversions(repo)
879 903 if 'treemanifest' in repo.requirements:
880 904 # Versions 01 and 02 support only flat manifests and it's just too
881 905 # expensive to convert between the flat manifest and tree manifest on
882 906 # the fly. Since tree manifests are hashed differently, all of history
883 907 # would have to be converted. Instead, we simply don't even pretend to
884 908 # support versions 01 and 02.
885 909 versions.discard('01')
886 910 versions.discard('02')
887 911 return versions
888 912
889 913 def safeversion(repo):
890 914 # Finds the smallest version that it's safe to assume clients of the repo
891 915 # will support. For example, all hg versions that support generaldelta also
892 916 # support changegroup 02.
893 917 versions = supportedoutgoingversions(repo)
894 918 if 'generaldelta' in repo.requirements:
895 919 versions.discard('01')
896 920 assert versions
897 921 return min(versions)
898 922
899 923 def getbundler(version, repo, bundlecaps=None):
900 924 assert version in supportedoutgoingversions(repo)
901 925 return _packermap[version][0](repo, bundlecaps)
902 926
903 927 def getunbundler(version, fh, alg, extras=None):
904 928 return _packermap[version][1](fh, alg, extras=extras)
905 929
906 930 def _changegroupinfo(repo, nodes, source):
907 931 if repo.ui.verbose or source == 'bundle':
908 932 repo.ui.status(_("%d changesets found\n") % len(nodes))
909 933 if repo.ui.debugflag:
910 934 repo.ui.debug("list of changesets:\n")
911 935 for node in nodes:
912 936 repo.ui.debug("%s\n" % hex(node))
913 937
914 938 def makechangegroup(repo, outgoing, version, source, fastpath=False,
915 939 bundlecaps=None):
916 940 cgstream = makestream(repo, outgoing, version, source,
917 941 fastpath=fastpath, bundlecaps=bundlecaps)
918 942 return getunbundler(version, util.chunkbuffer(cgstream), None,
919 943 {'clcount': len(outgoing.missing) })
920 944
921 945 def makestream(repo, outgoing, version, source, fastpath=False,
922 946 bundlecaps=None):
923 947 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
924 948
925 949 repo = repo.unfiltered()
926 950 commonrevs = outgoing.common
927 951 csets = outgoing.missing
928 952 heads = outgoing.missingheads
929 953 # We go through the fast path if we get told to, or if all (unfiltered
930 954 # heads have been requested (since we then know there all linkrevs will
931 955 # be pulled by the client).
932 956 heads.sort()
933 957 fastpathlinkrev = fastpath or (
934 958 repo.filtername is None and heads == sorted(repo.heads()))
935 959
936 960 repo.hook('preoutgoing', throw=True, source=source)
937 961 _changegroupinfo(repo, csets, source)
938 962 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
939 963
940 964 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
941 965 revisions = 0
942 966 files = 0
943 967 for chunkdata in iter(source.filelogheader, {}):
944 968 files += 1
945 969 f = chunkdata["filename"]
946 970 repo.ui.debug("adding %s revisions\n" % f)
947 971 repo.ui.progress(_('files'), files, unit=_('files'),
948 972 total=expectedfiles)
949 973 fl = repo.file(f)
950 974 o = len(fl)
951 975 try:
952 if not fl.addgroup(source, revmap, trp):
976 deltas = source.deltaiter(revmap)
977 if not fl.addgroup(deltas, trp):
953 978 raise error.Abort(_("received file revlog group is empty"))
954 979 except error.CensoredBaseError as e:
955 980 raise error.Abort(_("received delta base is censored: %s") % e)
956 981 revisions += len(fl) - o
957 982 if f in needfiles:
958 983 needs = needfiles[f]
959 984 for new in xrange(o, len(fl)):
960 985 n = fl.node(new)
961 986 if n in needs:
962 987 needs.remove(n)
963 988 else:
964 989 raise error.Abort(
965 990 _("received spurious file revlog entry"))
966 991 if not needs:
967 992 del needfiles[f]
968 993 repo.ui.progress(_('files'), None)
969 994
970 995 for f, needs in needfiles.iteritems():
971 996 fl = repo.file(f)
972 997 for n in needs:
973 998 try:
974 999 fl.rev(n)
975 1000 except error.LookupError:
976 1001 raise error.Abort(
977 1002 _('missing file data for %s:%s - run hg verify') %
978 1003 (f, hex(n)))
979 1004
980 1005 return revisions, files
@@ -1,2222 +1,2214 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullid,
29 29 nullrev,
30 30 wdirhex,
31 31 wdirid,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from . import (
36 36 ancestor,
37 37 error,
38 38 mdiff,
39 39 policy,
40 40 pycompat,
41 41 templatefilters,
42 42 util,
43 43 )
44 44
45 45 parsers = policy.importmod(r'parsers')
46 46
47 47 # Aliased for performance.
48 48 _zlibdecompress = zlib.decompress
49 49
50 50 # revlog header flags
51 51 REVLOGV0 = 0
52 52 REVLOGV1 = 1
53 53 # Dummy value until file format is finalized.
54 54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 55 REVLOGV2 = 0xDEAD
56 56 FLAG_INLINE_DATA = (1 << 16)
57 57 FLAG_GENERALDELTA = (1 << 17)
58 58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 59 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63 63
64 64 # revlog index flags
65 65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 68 REVIDX_DEFAULT_FLAGS = 0
69 69 # stable order in which flags need to be processed and their processors applied
70 70 REVIDX_FLAGS_ORDER = [
71 71 REVIDX_ISCENSORED,
72 72 REVIDX_ELLIPSIS,
73 73 REVIDX_EXTSTORED,
74 74 ]
75 75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76 76
77 77 # max size of revlog with inline data
78 78 _maxinline = 131072
79 79 _chunksize = 1048576
80 80
81 81 RevlogError = error.RevlogError
82 82 LookupError = error.LookupError
83 83 CensoredNodeError = error.CensoredNodeError
84 84 ProgrammingError = error.ProgrammingError
85 85
86 86 # Store flag processors (cf. 'addflagprocessor()' to register)
87 87 _flagprocessors = {
88 88 REVIDX_ISCENSORED: None,
89 89 }
90 90
91 91 def addflagprocessor(flag, processor):
92 92 """Register a flag processor on a revision data flag.
93 93
94 94 Invariant:
95 95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 96 - Only one flag processor can be registered on a specific flag.
97 97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 98 following signatures:
99 99 - (read) f(self, rawtext) -> text, bool
100 100 - (write) f(self, text) -> rawtext, bool
101 101 - (raw) f(self, rawtext) -> bool
102 102 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 103 directly visible to the user.
104 104 The boolean returned by these transforms is used to determine whether
105 105 the returned text can be used for hash integrity checking. For example,
106 106 if "write" returns False, then "text" is used to generate hash. If
107 107 "write" returns True, that basically means "rawtext" returned by "write"
108 108 should be used to generate hash. Usually, "write" and "read" return
109 109 different booleans. And "raw" returns a same boolean as "write".
110 110
111 111 Note: The 'raw' transform is used for changegroup generation and in some
112 112 debug commands. In this case the transform only indicates whether the
113 113 contents can be used for hash integrity checks.
114 114 """
115 115 if not flag & REVIDX_KNOWN_FLAGS:
116 116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 117 raise ProgrammingError(msg)
118 118 if flag not in REVIDX_FLAGS_ORDER:
119 119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 120 raise ProgrammingError(msg)
121 121 if flag in _flagprocessors:
122 122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 123 raise error.Abort(msg)
124 124 _flagprocessors[flag] = processor
125 125
126 126 def getoffset(q):
127 127 return int(q >> 16)
128 128
129 129 def gettype(q):
130 130 return int(q & 0xFFFF)
131 131
132 132 def offset_type(offset, type):
133 133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 134 raise ValueError('unknown revlog index flags')
135 135 return int(int(offset) << 16 | type)
136 136
137 137 _nullhash = hashlib.sha1(nullid)
138 138
139 139 def hash(text, p1, p2):
140 140 """generate a hash from the given text and its parent hashes
141 141
142 142 This hash combines both the current file contents and its history
143 143 in a manner that makes it easy to distinguish nodes with the same
144 144 content in the revision graph.
145 145 """
146 146 # As of now, if one of the parent node is null, p2 is null
147 147 if p2 == nullid:
148 148 # deep copy of a hash is faster than creating one
149 149 s = _nullhash.copy()
150 150 s.update(p1)
151 151 else:
152 152 # none of the parent nodes are nullid
153 153 if p1 < p2:
154 154 a = p1
155 155 b = p2
156 156 else:
157 157 a = p2
158 158 b = p1
159 159 s = hashlib.sha1(a)
160 160 s.update(b)
161 161 s.update(text)
162 162 return s.digest()
163 163
164 164 # index v0:
165 165 # 4 bytes: offset
166 166 # 4 bytes: compressed length
167 167 # 4 bytes: base rev
168 168 # 4 bytes: link rev
169 169 # 20 bytes: parent 1 nodeid
170 170 # 20 bytes: parent 2 nodeid
171 171 # 20 bytes: nodeid
172 172 indexformatv0 = struct.Struct(">4l20s20s20s")
173 173 indexformatv0_pack = indexformatv0.pack
174 174 indexformatv0_unpack = indexformatv0.unpack
175 175
176 176 class revlogoldio(object):
177 177 def __init__(self):
178 178 self.size = indexformatv0.size
179 179
180 180 def parseindex(self, data, inline):
181 181 s = self.size
182 182 index = []
183 183 nodemap = {nullid: nullrev}
184 184 n = off = 0
185 185 l = len(data)
186 186 while off + s <= l:
187 187 cur = data[off:off + s]
188 188 off += s
189 189 e = indexformatv0_unpack(cur)
190 190 # transform to revlogv1 format
191 191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
192 192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
193 193 index.append(e2)
194 194 nodemap[e[6]] = n
195 195 n += 1
196 196
197 197 # add the magic null revision at -1
198 198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
199 199
200 200 return index, nodemap, None
201 201
202 202 def packentry(self, entry, node, version, rev):
203 203 if gettype(entry[0]):
204 204 raise RevlogError(_('index entry flags need revlog version 1'))
205 205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
206 206 node(entry[5]), node(entry[6]), entry[7])
207 207 return indexformatv0_pack(*e2)
208 208
209 209 # index ng:
210 210 # 6 bytes: offset
211 211 # 2 bytes: flags
212 212 # 4 bytes: compressed length
213 213 # 4 bytes: uncompressed length
214 214 # 4 bytes: base rev
215 215 # 4 bytes: link rev
216 216 # 4 bytes: parent 1 rev
217 217 # 4 bytes: parent 2 rev
218 218 # 32 bytes: nodeid
219 219 indexformatng = struct.Struct(">Qiiiiii20s12x")
220 220 indexformatng_pack = indexformatng.pack
221 221 versionformat = struct.Struct(">I")
222 222 versionformat_pack = versionformat.pack
223 223 versionformat_unpack = versionformat.unpack
224 224
225 225 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
226 226 # signed integer)
227 227 _maxentrysize = 0x7fffffff
228 228
229 229 class revlogio(object):
230 230 def __init__(self):
231 231 self.size = indexformatng.size
232 232
233 233 def parseindex(self, data, inline):
234 234 # call the C implementation to parse the index data
235 235 index, cache = parsers.parse_index2(data, inline)
236 236 return index, getattr(index, 'nodemap', None), cache
237 237
238 238 def packentry(self, entry, node, version, rev):
239 239 p = indexformatng_pack(*entry)
240 240 if rev == 0:
241 241 p = versionformat_pack(version) + p[4:]
242 242 return p
243 243
244 244 class revlog(object):
245 245 """
246 246 the underlying revision storage object
247 247
248 248 A revlog consists of two parts, an index and the revision data.
249 249
250 250 The index is a file with a fixed record size containing
251 251 information on each revision, including its nodeid (hash), the
252 252 nodeids of its parents, the position and offset of its data within
253 253 the data file, and the revision it's based on. Finally, each entry
254 254 contains a linkrev entry that can serve as a pointer to external
255 255 data.
256 256
257 257 The revision data itself is a linear collection of data chunks.
258 258 Each chunk represents a revision and is usually represented as a
259 259 delta against the previous chunk. To bound lookup time, runs of
260 260 deltas are limited to about 2 times the length of the original
261 261 version data. This makes retrieval of a version proportional to
262 262 its size, or O(1) relative to the number of revisions.
263 263
264 264 Both pieces of the revlog are written to in an append-only
265 265 fashion, which means we never need to rewrite a file to insert or
266 266 remove data, and can use some simple techniques to avoid the need
267 267 for locking while reading.
268 268
269 269 If checkambig, indexfile is opened with checkambig=True at
270 270 writing, to avoid file stat ambiguity.
271 271 """
272 272 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
273 273 """
274 274 create a revlog object
275 275
276 276 opener is a function that abstracts the file opening operation
277 277 and can be used to implement COW semantics or the like.
278 278 """
279 279 self.indexfile = indexfile
280 280 self.datafile = datafile or (indexfile[:-2] + ".d")
281 281 self.opener = opener
282 282 # When True, indexfile is opened with checkambig=True at writing, to
283 283 # avoid file stat ambiguity.
284 284 self._checkambig = checkambig
285 285 # 3-tuple of (node, rev, text) for a raw revision.
286 286 self._cache = None
287 287 # Maps rev to chain base rev.
288 288 self._chainbasecache = util.lrucachedict(100)
289 289 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
290 290 self._chunkcache = (0, '')
291 291 # How much data to read and cache into the raw revlog data cache.
292 292 self._chunkcachesize = 65536
293 293 self._maxchainlen = None
294 294 self._aggressivemergedeltas = False
295 295 self.index = []
296 296 # Mapping of partial identifiers to full nodes.
297 297 self._pcache = {}
298 298 # Mapping of revision integer to full node.
299 299 self._nodecache = {nullid: nullrev}
300 300 self._nodepos = None
301 301 self._compengine = 'zlib'
302 302 self._maxdeltachainspan = -1
303 303
304 304 v = REVLOG_DEFAULT_VERSION
305 305 opts = getattr(opener, 'options', None)
306 306 if opts is not None:
307 307 if 'revlogv2' in opts:
308 308 # version 2 revlogs always use generaldelta.
309 309 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
310 310 elif 'revlogv1' in opts:
311 311 if 'generaldelta' in opts:
312 312 v |= FLAG_GENERALDELTA
313 313 else:
314 314 v = 0
315 315 if 'chunkcachesize' in opts:
316 316 self._chunkcachesize = opts['chunkcachesize']
317 317 if 'maxchainlen' in opts:
318 318 self._maxchainlen = opts['maxchainlen']
319 319 if 'aggressivemergedeltas' in opts:
320 320 self._aggressivemergedeltas = opts['aggressivemergedeltas']
321 321 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
322 322 if 'compengine' in opts:
323 323 self._compengine = opts['compengine']
324 324 if 'maxdeltachainspan' in opts:
325 325 self._maxdeltachainspan = opts['maxdeltachainspan']
326 326
327 327 if self._chunkcachesize <= 0:
328 328 raise RevlogError(_('revlog chunk cache size %r is not greater '
329 329 'than 0') % self._chunkcachesize)
330 330 elif self._chunkcachesize & (self._chunkcachesize - 1):
331 331 raise RevlogError(_('revlog chunk cache size %r is not a power '
332 332 'of 2') % self._chunkcachesize)
333 333
334 334 indexdata = ''
335 335 self._initempty = True
336 336 try:
337 337 f = self.opener(self.indexfile)
338 338 indexdata = f.read()
339 339 f.close()
340 340 if len(indexdata) > 0:
341 341 v = versionformat_unpack(indexdata[:4])[0]
342 342 self._initempty = False
343 343 except IOError as inst:
344 344 if inst.errno != errno.ENOENT:
345 345 raise
346 346
347 347 self.version = v
348 348 self._inline = v & FLAG_INLINE_DATA
349 349 self._generaldelta = v & FLAG_GENERALDELTA
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_('unknown flags (%#04x) in version %d '
355 355 'revlog %s') %
356 356 (flags >> 16, fmt, self.indexfile))
357 357 elif fmt == REVLOGV1:
358 358 if flags & ~REVLOGV1_FLAGS:
359 359 raise RevlogError(_('unknown flags (%#04x) in version %d '
360 360 'revlog %s') %
361 361 (flags >> 16, fmt, self.indexfile))
362 362 elif fmt == REVLOGV2:
363 363 if flags & ~REVLOGV2_FLAGS:
364 364 raise RevlogError(_('unknown flags (%#04x) in version %d '
365 365 'revlog %s') %
366 366 (flags >> 16, fmt, self.indexfile))
367 367 else:
368 368 raise RevlogError(_('unknown version (%d) in revlog %s') %
369 369 (fmt, self.indexfile))
370 370
371 371 self.storedeltachains = True
372 372
373 373 self._io = revlogio()
374 374 if self.version == REVLOGV0:
375 375 self._io = revlogoldio()
376 376 try:
377 377 d = self._io.parseindex(indexdata, self._inline)
378 378 except (ValueError, IndexError):
379 379 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
380 380 self.index, nodemap, self._chunkcache = d
381 381 if nodemap is not None:
382 382 self.nodemap = self._nodecache = nodemap
383 383 if not self._chunkcache:
384 384 self._chunkclear()
385 385 # revnum -> (chain-length, sum-delta-length)
386 386 self._chaininfocache = {}
387 387 # revlog header -> revlog compressor
388 388 self._decompressors = {}
389 389
390 390 @util.propertycache
391 391 def _compressor(self):
392 392 return util.compengines[self._compengine].revlogcompressor()
393 393
394 394 def tip(self):
395 395 return self.node(len(self.index) - 2)
396 396 def __contains__(self, rev):
397 397 return 0 <= rev < len(self)
398 398 def __len__(self):
399 399 return len(self.index) - 1
400 400 def __iter__(self):
401 401 return iter(xrange(len(self)))
402 402 def revs(self, start=0, stop=None):
403 403 """iterate over all rev in this revlog (from start to stop)"""
404 404 step = 1
405 405 if stop is not None:
406 406 if start > stop:
407 407 step = -1
408 408 stop += step
409 409 else:
410 410 stop = len(self)
411 411 return xrange(start, stop, step)
412 412
413 413 @util.propertycache
414 414 def nodemap(self):
415 415 self.rev(self.node(0))
416 416 return self._nodecache
417 417
418 418 def hasnode(self, node):
419 419 try:
420 420 self.rev(node)
421 421 return True
422 422 except KeyError:
423 423 return False
424 424
425 425 def clearcaches(self):
426 426 self._cache = None
427 427 self._chainbasecache.clear()
428 428 self._chunkcache = (0, '')
429 429 self._pcache = {}
430 430
431 431 try:
432 432 self._nodecache.clearcaches()
433 433 except AttributeError:
434 434 self._nodecache = {nullid: nullrev}
435 435 self._nodepos = None
436 436
437 437 def rev(self, node):
438 438 try:
439 439 return self._nodecache[node]
440 440 except TypeError:
441 441 raise
442 442 except RevlogError:
443 443 # parsers.c radix tree lookup failed
444 444 if node == wdirid:
445 445 raise error.WdirUnsupported
446 446 raise LookupError(node, self.indexfile, _('no node'))
447 447 except KeyError:
448 448 # pure python cache lookup failed
449 449 n = self._nodecache
450 450 i = self.index
451 451 p = self._nodepos
452 452 if p is None:
453 453 p = len(i) - 2
454 454 for r in xrange(p, -1, -1):
455 455 v = i[r][7]
456 456 n[v] = r
457 457 if v == node:
458 458 self._nodepos = r - 1
459 459 return r
460 460 if node == wdirid:
461 461 raise error.WdirUnsupported
462 462 raise LookupError(node, self.indexfile, _('no node'))
463 463
464 464 # Accessors for index entries.
465 465
466 466 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
467 467 # are flags.
468 468 def start(self, rev):
469 469 return int(self.index[rev][0] >> 16)
470 470
471 471 def flags(self, rev):
472 472 return self.index[rev][0] & 0xFFFF
473 473
474 474 def length(self, rev):
475 475 return self.index[rev][1]
476 476
477 477 def rawsize(self, rev):
478 478 """return the length of the uncompressed text for a given revision"""
479 479 l = self.index[rev][2]
480 480 if l >= 0:
481 481 return l
482 482
483 483 t = self.revision(rev, raw=True)
484 484 return len(t)
485 485
486 486 def size(self, rev):
487 487 """length of non-raw text (processed by a "read" flag processor)"""
488 488 # fast path: if no "read" flag processor could change the content,
489 489 # size is rawsize. note: ELLIPSIS is known to not change the content.
490 490 flags = self.flags(rev)
491 491 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
492 492 return self.rawsize(rev)
493 493
494 494 return len(self.revision(rev, raw=False))
495 495
496 496 def chainbase(self, rev):
497 497 base = self._chainbasecache.get(rev)
498 498 if base is not None:
499 499 return base
500 500
501 501 index = self.index
502 502 base = index[rev][3]
503 503 while base != rev:
504 504 rev = base
505 505 base = index[rev][3]
506 506
507 507 self._chainbasecache[rev] = base
508 508 return base
509 509
510 510 def linkrev(self, rev):
511 511 return self.index[rev][4]
512 512
513 513 def parentrevs(self, rev):
514 514 try:
515 515 return self.index[rev][5:7]
516 516 except IndexError:
517 517 if rev == wdirrev:
518 518 raise error.WdirUnsupported
519 519 raise
520 520
521 521 def node(self, rev):
522 522 try:
523 523 return self.index[rev][7]
524 524 except IndexError:
525 525 if rev == wdirrev:
526 526 raise error.WdirUnsupported
527 527 raise
528 528
529 529 # Derived from index values.
530 530
531 531 def end(self, rev):
532 532 return self.start(rev) + self.length(rev)
533 533
534 534 def parents(self, node):
535 535 i = self.index
536 536 d = i[self.rev(node)]
537 537 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
538 538
539 539 def chainlen(self, rev):
540 540 return self._chaininfo(rev)[0]
541 541
542 542 def _chaininfo(self, rev):
543 543 chaininfocache = self._chaininfocache
544 544 if rev in chaininfocache:
545 545 return chaininfocache[rev]
546 546 index = self.index
547 547 generaldelta = self._generaldelta
548 548 iterrev = rev
549 549 e = index[iterrev]
550 550 clen = 0
551 551 compresseddeltalen = 0
552 552 while iterrev != e[3]:
553 553 clen += 1
554 554 compresseddeltalen += e[1]
555 555 if generaldelta:
556 556 iterrev = e[3]
557 557 else:
558 558 iterrev -= 1
559 559 if iterrev in chaininfocache:
560 560 t = chaininfocache[iterrev]
561 561 clen += t[0]
562 562 compresseddeltalen += t[1]
563 563 break
564 564 e = index[iterrev]
565 565 else:
566 566 # Add text length of base since decompressing that also takes
567 567 # work. For cache hits the length is already included.
568 568 compresseddeltalen += e[1]
569 569 r = (clen, compresseddeltalen)
570 570 chaininfocache[rev] = r
571 571 return r
572 572
573 573 def _deltachain(self, rev, stoprev=None):
574 574 """Obtain the delta chain for a revision.
575 575
576 576 ``stoprev`` specifies a revision to stop at. If not specified, we
577 577 stop at the base of the chain.
578 578
579 579 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
580 580 revs in ascending order and ``stopped`` is a bool indicating whether
581 581 ``stoprev`` was hit.
582 582 """
583 583 # Try C implementation.
584 584 try:
585 585 return self.index.deltachain(rev, stoprev, self._generaldelta)
586 586 except AttributeError:
587 587 pass
588 588
589 589 chain = []
590 590
591 591 # Alias to prevent attribute lookup in tight loop.
592 592 index = self.index
593 593 generaldelta = self._generaldelta
594 594
595 595 iterrev = rev
596 596 e = index[iterrev]
597 597 while iterrev != e[3] and iterrev != stoprev:
598 598 chain.append(iterrev)
599 599 if generaldelta:
600 600 iterrev = e[3]
601 601 else:
602 602 iterrev -= 1
603 603 e = index[iterrev]
604 604
605 605 if iterrev == stoprev:
606 606 stopped = True
607 607 else:
608 608 chain.append(iterrev)
609 609 stopped = False
610 610
611 611 chain.reverse()
612 612 return chain, stopped
613 613
614 614 def ancestors(self, revs, stoprev=0, inclusive=False):
615 615 """Generate the ancestors of 'revs' in reverse topological order.
616 616 Does not generate revs lower than stoprev.
617 617
618 618 See the documentation for ancestor.lazyancestors for more details."""
619 619
620 620 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
621 621 inclusive=inclusive)
622 622
623 623 def descendants(self, revs):
624 624 """Generate the descendants of 'revs' in revision order.
625 625
626 626 Yield a sequence of revision numbers starting with a child of
627 627 some rev in revs, i.e., each revision is *not* considered a
628 628 descendant of itself. Results are ordered by revision number (a
629 629 topological sort)."""
630 630 first = min(revs)
631 631 if first == nullrev:
632 632 for i in self:
633 633 yield i
634 634 return
635 635
636 636 seen = set(revs)
637 637 for i in self.revs(start=first + 1):
638 638 for x in self.parentrevs(i):
639 639 if x != nullrev and x in seen:
640 640 seen.add(i)
641 641 yield i
642 642 break
643 643
644 644 def findcommonmissing(self, common=None, heads=None):
645 645 """Return a tuple of the ancestors of common and the ancestors of heads
646 646 that are not ancestors of common. In revset terminology, we return the
647 647 tuple:
648 648
649 649 ::common, (::heads) - (::common)
650 650
651 651 The list is sorted by revision number, meaning it is
652 652 topologically sorted.
653 653
654 654 'heads' and 'common' are both lists of node IDs. If heads is
655 655 not supplied, uses all of the revlog's heads. If common is not
656 656 supplied, uses nullid."""
657 657 if common is None:
658 658 common = [nullid]
659 659 if heads is None:
660 660 heads = self.heads()
661 661
662 662 common = [self.rev(n) for n in common]
663 663 heads = [self.rev(n) for n in heads]
664 664
665 665 # we want the ancestors, but inclusive
666 666 class lazyset(object):
667 667 def __init__(self, lazyvalues):
668 668 self.addedvalues = set()
669 669 self.lazyvalues = lazyvalues
670 670
671 671 def __contains__(self, value):
672 672 return value in self.addedvalues or value in self.lazyvalues
673 673
674 674 def __iter__(self):
675 675 added = self.addedvalues
676 676 for r in added:
677 677 yield r
678 678 for r in self.lazyvalues:
679 679 if not r in added:
680 680 yield r
681 681
682 682 def add(self, value):
683 683 self.addedvalues.add(value)
684 684
685 685 def update(self, values):
686 686 self.addedvalues.update(values)
687 687
688 688 has = lazyset(self.ancestors(common))
689 689 has.add(nullrev)
690 690 has.update(common)
691 691
692 692 # take all ancestors from heads that aren't in has
693 693 missing = set()
694 694 visit = collections.deque(r for r in heads if r not in has)
695 695 while visit:
696 696 r = visit.popleft()
697 697 if r in missing:
698 698 continue
699 699 else:
700 700 missing.add(r)
701 701 for p in self.parentrevs(r):
702 702 if p not in has:
703 703 visit.append(p)
704 704 missing = list(missing)
705 705 missing.sort()
706 706 return has, [self.node(miss) for miss in missing]
707 707
708 708 def incrementalmissingrevs(self, common=None):
709 709 """Return an object that can be used to incrementally compute the
710 710 revision numbers of the ancestors of arbitrary sets that are not
711 711 ancestors of common. This is an ancestor.incrementalmissingancestors
712 712 object.
713 713
714 714 'common' is a list of revision numbers. If common is not supplied, uses
715 715 nullrev.
716 716 """
717 717 if common is None:
718 718 common = [nullrev]
719 719
720 720 return ancestor.incrementalmissingancestors(self.parentrevs, common)
721 721
722 722 def findmissingrevs(self, common=None, heads=None):
723 723 """Return the revision numbers of the ancestors of heads that
724 724 are not ancestors of common.
725 725
726 726 More specifically, return a list of revision numbers corresponding to
727 727 nodes N such that every N satisfies the following constraints:
728 728
729 729 1. N is an ancestor of some node in 'heads'
730 730 2. N is not an ancestor of any node in 'common'
731 731
732 732 The list is sorted by revision number, meaning it is
733 733 topologically sorted.
734 734
735 735 'heads' and 'common' are both lists of revision numbers. If heads is
736 736 not supplied, uses all of the revlog's heads. If common is not
737 737 supplied, uses nullid."""
738 738 if common is None:
739 739 common = [nullrev]
740 740 if heads is None:
741 741 heads = self.headrevs()
742 742
743 743 inc = self.incrementalmissingrevs(common=common)
744 744 return inc.missingancestors(heads)
745 745
746 746 def findmissing(self, common=None, heads=None):
747 747 """Return the ancestors of heads that are not ancestors of common.
748 748
749 749 More specifically, return a list of nodes N such that every N
750 750 satisfies the following constraints:
751 751
752 752 1. N is an ancestor of some node in 'heads'
753 753 2. N is not an ancestor of any node in 'common'
754 754
755 755 The list is sorted by revision number, meaning it is
756 756 topologically sorted.
757 757
758 758 'heads' and 'common' are both lists of node IDs. If heads is
759 759 not supplied, uses all of the revlog's heads. If common is not
760 760 supplied, uses nullid."""
761 761 if common is None:
762 762 common = [nullid]
763 763 if heads is None:
764 764 heads = self.heads()
765 765
766 766 common = [self.rev(n) for n in common]
767 767 heads = [self.rev(n) for n in heads]
768 768
769 769 inc = self.incrementalmissingrevs(common=common)
770 770 return [self.node(r) for r in inc.missingancestors(heads)]
771 771
772 772 def nodesbetween(self, roots=None, heads=None):
773 773 """Return a topological path from 'roots' to 'heads'.
774 774
775 775 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
776 776 topologically sorted list of all nodes N that satisfy both of
777 777 these constraints:
778 778
779 779 1. N is a descendant of some node in 'roots'
780 780 2. N is an ancestor of some node in 'heads'
781 781
782 782 Every node is considered to be both a descendant and an ancestor
783 783 of itself, so every reachable node in 'roots' and 'heads' will be
784 784 included in 'nodes'.
785 785
786 786 'outroots' is the list of reachable nodes in 'roots', i.e., the
787 787 subset of 'roots' that is returned in 'nodes'. Likewise,
788 788 'outheads' is the subset of 'heads' that is also in 'nodes'.
789 789
790 790 'roots' and 'heads' are both lists of node IDs. If 'roots' is
791 791 unspecified, uses nullid as the only root. If 'heads' is
792 792 unspecified, uses list of all of the revlog's heads."""
793 793 nonodes = ([], [], [])
794 794 if roots is not None:
795 795 roots = list(roots)
796 796 if not roots:
797 797 return nonodes
798 798 lowestrev = min([self.rev(n) for n in roots])
799 799 else:
800 800 roots = [nullid] # Everybody's a descendant of nullid
801 801 lowestrev = nullrev
802 802 if (lowestrev == nullrev) and (heads is None):
803 803 # We want _all_ the nodes!
804 804 return ([self.node(r) for r in self], [nullid], list(self.heads()))
805 805 if heads is None:
806 806 # All nodes are ancestors, so the latest ancestor is the last
807 807 # node.
808 808 highestrev = len(self) - 1
809 809 # Set ancestors to None to signal that every node is an ancestor.
810 810 ancestors = None
811 811 # Set heads to an empty dictionary for later discovery of heads
812 812 heads = {}
813 813 else:
814 814 heads = list(heads)
815 815 if not heads:
816 816 return nonodes
817 817 ancestors = set()
818 818 # Turn heads into a dictionary so we can remove 'fake' heads.
819 819 # Also, later we will be using it to filter out the heads we can't
820 820 # find from roots.
821 821 heads = dict.fromkeys(heads, False)
822 822 # Start at the top and keep marking parents until we're done.
823 823 nodestotag = set(heads)
824 824 # Remember where the top was so we can use it as a limit later.
825 825 highestrev = max([self.rev(n) for n in nodestotag])
826 826 while nodestotag:
827 827 # grab a node to tag
828 828 n = nodestotag.pop()
829 829 # Never tag nullid
830 830 if n == nullid:
831 831 continue
832 832 # A node's revision number represents its place in a
833 833 # topologically sorted list of nodes.
834 834 r = self.rev(n)
835 835 if r >= lowestrev:
836 836 if n not in ancestors:
837 837 # If we are possibly a descendant of one of the roots
838 838 # and we haven't already been marked as an ancestor
839 839 ancestors.add(n) # Mark as ancestor
840 840 # Add non-nullid parents to list of nodes to tag.
841 841 nodestotag.update([p for p in self.parents(n) if
842 842 p != nullid])
843 843 elif n in heads: # We've seen it before, is it a fake head?
844 844 # So it is, real heads should not be the ancestors of
845 845 # any other heads.
846 846 heads.pop(n)
847 847 if not ancestors:
848 848 return nonodes
849 849 # Now that we have our set of ancestors, we want to remove any
850 850 # roots that are not ancestors.
851 851
852 852 # If one of the roots was nullid, everything is included anyway.
853 853 if lowestrev > nullrev:
854 854 # But, since we weren't, let's recompute the lowest rev to not
855 855 # include roots that aren't ancestors.
856 856
857 857 # Filter out roots that aren't ancestors of heads
858 858 roots = [root for root in roots if root in ancestors]
859 859 # Recompute the lowest revision
860 860 if roots:
861 861 lowestrev = min([self.rev(root) for root in roots])
862 862 else:
863 863 # No more roots? Return empty list
864 864 return nonodes
865 865 else:
866 866 # We are descending from nullid, and don't need to care about
867 867 # any other roots.
868 868 lowestrev = nullrev
869 869 roots = [nullid]
870 870 # Transform our roots list into a set.
871 871 descendants = set(roots)
872 872 # Also, keep the original roots so we can filter out roots that aren't
873 873 # 'real' roots (i.e. are descended from other roots).
874 874 roots = descendants.copy()
875 875 # Our topologically sorted list of output nodes.
876 876 orderedout = []
877 877 # Don't start at nullid since we don't want nullid in our output list,
878 878 # and if nullid shows up in descendants, empty parents will look like
879 879 # they're descendants.
880 880 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
881 881 n = self.node(r)
882 882 isdescendant = False
883 883 if lowestrev == nullrev: # Everybody is a descendant of nullid
884 884 isdescendant = True
885 885 elif n in descendants:
886 886 # n is already a descendant
887 887 isdescendant = True
888 888 # This check only needs to be done here because all the roots
889 889 # will start being marked is descendants before the loop.
890 890 if n in roots:
891 891 # If n was a root, check if it's a 'real' root.
892 892 p = tuple(self.parents(n))
893 893 # If any of its parents are descendants, it's not a root.
894 894 if (p[0] in descendants) or (p[1] in descendants):
895 895 roots.remove(n)
896 896 else:
897 897 p = tuple(self.parents(n))
898 898 # A node is a descendant if either of its parents are
899 899 # descendants. (We seeded the dependents list with the roots
900 900 # up there, remember?)
901 901 if (p[0] in descendants) or (p[1] in descendants):
902 902 descendants.add(n)
903 903 isdescendant = True
904 904 if isdescendant and ((ancestors is None) or (n in ancestors)):
905 905 # Only include nodes that are both descendants and ancestors.
906 906 orderedout.append(n)
907 907 if (ancestors is not None) and (n in heads):
908 908 # We're trying to figure out which heads are reachable
909 909 # from roots.
910 910 # Mark this head as having been reached
911 911 heads[n] = True
912 912 elif ancestors is None:
913 913 # Otherwise, we're trying to discover the heads.
914 914 # Assume this is a head because if it isn't, the next step
915 915 # will eventually remove it.
916 916 heads[n] = True
917 917 # But, obviously its parents aren't.
918 918 for p in self.parents(n):
919 919 heads.pop(p, None)
920 920 heads = [head for head, flag in heads.iteritems() if flag]
921 921 roots = list(roots)
922 922 assert orderedout
923 923 assert roots
924 924 assert heads
925 925 return (orderedout, roots, heads)
926 926
927 927 def headrevs(self):
928 928 try:
929 929 return self.index.headrevs()
930 930 except AttributeError:
931 931 return self._headrevs()
932 932
933 933 def computephases(self, roots):
934 934 return self.index.computephasesmapsets(roots)
935 935
936 936 def _headrevs(self):
937 937 count = len(self)
938 938 if not count:
939 939 return [nullrev]
940 940 # we won't iter over filtered rev so nobody is a head at start
941 941 ishead = [0] * (count + 1)
942 942 index = self.index
943 943 for r in self:
944 944 ishead[r] = 1 # I may be an head
945 945 e = index[r]
946 946 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
947 947 return [r for r, val in enumerate(ishead) if val]
948 948
949 949 def heads(self, start=None, stop=None):
950 950 """return the list of all nodes that have no children
951 951
952 952 if start is specified, only heads that are descendants of
953 953 start will be returned
954 954 if stop is specified, it will consider all the revs from stop
955 955 as if they had no children
956 956 """
957 957 if start is None and stop is None:
958 958 if not len(self):
959 959 return [nullid]
960 960 return [self.node(r) for r in self.headrevs()]
961 961
962 962 if start is None:
963 963 start = nullid
964 964 if stop is None:
965 965 stop = []
966 966 stoprevs = set([self.rev(n) for n in stop])
967 967 startrev = self.rev(start)
968 968 reachable = {startrev}
969 969 heads = {startrev}
970 970
971 971 parentrevs = self.parentrevs
972 972 for r in self.revs(start=startrev + 1):
973 973 for p in parentrevs(r):
974 974 if p in reachable:
975 975 if r not in stoprevs:
976 976 reachable.add(r)
977 977 heads.add(r)
978 978 if p in heads and p not in stoprevs:
979 979 heads.remove(p)
980 980
981 981 return [self.node(r) for r in heads]
982 982
983 983 def children(self, node):
984 984 """find the children of a given node"""
985 985 c = []
986 986 p = self.rev(node)
987 987 for r in self.revs(start=p + 1):
988 988 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
989 989 if prevs:
990 990 for pr in prevs:
991 991 if pr == p:
992 992 c.append(self.node(r))
993 993 elif p == nullrev:
994 994 c.append(self.node(r))
995 995 return c
996 996
997 997 def descendant(self, start, end):
998 998 if start == nullrev:
999 999 return True
1000 1000 for i in self.descendants([start]):
1001 1001 if i == end:
1002 1002 return True
1003 1003 elif i > end:
1004 1004 break
1005 1005 return False
1006 1006
1007 1007 def commonancestorsheads(self, a, b):
1008 1008 """calculate all the heads of the common ancestors of nodes a and b"""
1009 1009 a, b = self.rev(a), self.rev(b)
1010 1010 try:
1011 1011 ancs = self.index.commonancestorsheads(a, b)
1012 1012 except (AttributeError, OverflowError): # C implementation failed
1013 1013 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1014 1014 return pycompat.maplist(self.node, ancs)
1015 1015
1016 1016 def isancestor(self, a, b):
1017 1017 """return True if node a is an ancestor of node b
1018 1018
1019 1019 The implementation of this is trivial but the use of
1020 1020 commonancestorsheads is not."""
1021 1021 return a in self.commonancestorsheads(a, b)
1022 1022
1023 1023 def ancestor(self, a, b):
1024 1024 """calculate the "best" common ancestor of nodes a and b"""
1025 1025
1026 1026 a, b = self.rev(a), self.rev(b)
1027 1027 try:
1028 1028 ancs = self.index.ancestors(a, b)
1029 1029 except (AttributeError, OverflowError):
1030 1030 ancs = ancestor.ancestors(self.parentrevs, a, b)
1031 1031 if ancs:
1032 1032 # choose a consistent winner when there's a tie
1033 1033 return min(map(self.node, ancs))
1034 1034 return nullid
1035 1035
1036 1036 def _match(self, id):
1037 1037 if isinstance(id, int):
1038 1038 # rev
1039 1039 return self.node(id)
1040 1040 if len(id) == 20:
1041 1041 # possibly a binary node
1042 1042 # odds of a binary node being all hex in ASCII are 1 in 10**25
1043 1043 try:
1044 1044 node = id
1045 1045 self.rev(node) # quick search the index
1046 1046 return node
1047 1047 except LookupError:
1048 1048 pass # may be partial hex id
1049 1049 try:
1050 1050 # str(rev)
1051 1051 rev = int(id)
1052 1052 if str(rev) != id:
1053 1053 raise ValueError
1054 1054 if rev < 0:
1055 1055 rev = len(self) + rev
1056 1056 if rev < 0 or rev >= len(self):
1057 1057 raise ValueError
1058 1058 return self.node(rev)
1059 1059 except (ValueError, OverflowError):
1060 1060 pass
1061 1061 if len(id) == 40:
1062 1062 try:
1063 1063 # a full hex nodeid?
1064 1064 node = bin(id)
1065 1065 self.rev(node)
1066 1066 return node
1067 1067 except (TypeError, LookupError):
1068 1068 pass
1069 1069
1070 1070 def _partialmatch(self, id):
1071 1071 maybewdir = wdirhex.startswith(id)
1072 1072 try:
1073 1073 partial = self.index.partialmatch(id)
1074 1074 if partial and self.hasnode(partial):
1075 1075 if maybewdir:
1076 1076 # single 'ff...' match in radix tree, ambiguous with wdir
1077 1077 raise RevlogError
1078 1078 return partial
1079 1079 if maybewdir:
1080 1080 # no 'ff...' match in radix tree, wdir identified
1081 1081 raise error.WdirUnsupported
1082 1082 return None
1083 1083 except RevlogError:
1084 1084 # parsers.c radix tree lookup gave multiple matches
1085 1085 # fast path: for unfiltered changelog, radix tree is accurate
1086 1086 if not getattr(self, 'filteredrevs', None):
1087 1087 raise LookupError(id, self.indexfile,
1088 1088 _('ambiguous identifier'))
1089 1089 # fall through to slow path that filters hidden revisions
1090 1090 except (AttributeError, ValueError):
1091 1091 # we are pure python, or key was too short to search radix tree
1092 1092 pass
1093 1093
1094 1094 if id in self._pcache:
1095 1095 return self._pcache[id]
1096 1096
1097 1097 if len(id) < 40:
1098 1098 try:
1099 1099 # hex(node)[:...]
1100 1100 l = len(id) // 2 # grab an even number of digits
1101 1101 prefix = bin(id[:l * 2])
1102 1102 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1103 1103 nl = [n for n in nl if hex(n).startswith(id) and
1104 1104 self.hasnode(n)]
1105 1105 if len(nl) > 0:
1106 1106 if len(nl) == 1 and not maybewdir:
1107 1107 self._pcache[id] = nl[0]
1108 1108 return nl[0]
1109 1109 raise LookupError(id, self.indexfile,
1110 1110 _('ambiguous identifier'))
1111 1111 if maybewdir:
1112 1112 raise error.WdirUnsupported
1113 1113 return None
1114 1114 except (TypeError, binascii.Error):
1115 1115 pass
1116 1116
1117 1117 def lookup(self, id):
1118 1118 """locate a node based on:
1119 1119 - revision number or str(revision number)
1120 1120 - nodeid or subset of hex nodeid
1121 1121 """
1122 1122 n = self._match(id)
1123 1123 if n is not None:
1124 1124 return n
1125 1125 n = self._partialmatch(id)
1126 1126 if n:
1127 1127 return n
1128 1128
1129 1129 raise LookupError(id, self.indexfile, _('no match found'))
1130 1130
1131 1131 def cmp(self, node, text):
1132 1132 """compare text with a given file revision
1133 1133
1134 1134 returns True if text is different than what is stored.
1135 1135 """
1136 1136 p1, p2 = self.parents(node)
1137 1137 return hash(text, p1, p2) != node
1138 1138
1139 1139 def _cachesegment(self, offset, data):
1140 1140 """Add a segment to the revlog cache.
1141 1141
1142 1142 Accepts an absolute offset and the data that is at that location.
1143 1143 """
1144 1144 o, d = self._chunkcache
1145 1145 # try to add to existing cache
1146 1146 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1147 1147 self._chunkcache = o, d + data
1148 1148 else:
1149 1149 self._chunkcache = offset, data
1150 1150
1151 1151 def _readsegment(self, offset, length, df=None):
1152 1152 """Load a segment of raw data from the revlog.
1153 1153
1154 1154 Accepts an absolute offset, length to read, and an optional existing
1155 1155 file handle to read from.
1156 1156
1157 1157 If an existing file handle is passed, it will be seeked and the
1158 1158 original seek position will NOT be restored.
1159 1159
1160 1160 Returns a str or buffer of raw byte data.
1161 1161 """
1162 1162 if df is not None:
1163 1163 closehandle = False
1164 1164 else:
1165 1165 if self._inline:
1166 1166 df = self.opener(self.indexfile)
1167 1167 else:
1168 1168 df = self.opener(self.datafile)
1169 1169 closehandle = True
1170 1170
1171 1171 # Cache data both forward and backward around the requested
1172 1172 # data, in a fixed size window. This helps speed up operations
1173 1173 # involving reading the revlog backwards.
1174 1174 cachesize = self._chunkcachesize
1175 1175 realoffset = offset & ~(cachesize - 1)
1176 1176 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1177 1177 - realoffset)
1178 1178 df.seek(realoffset)
1179 1179 d = df.read(reallength)
1180 1180 if closehandle:
1181 1181 df.close()
1182 1182 self._cachesegment(realoffset, d)
1183 1183 if offset != realoffset or reallength != length:
1184 1184 return util.buffer(d, offset - realoffset, length)
1185 1185 return d
1186 1186
1187 1187 def _getsegment(self, offset, length, df=None):
1188 1188 """Obtain a segment of raw data from the revlog.
1189 1189
1190 1190 Accepts an absolute offset, length of bytes to obtain, and an
1191 1191 optional file handle to the already-opened revlog. If the file
1192 1192 handle is used, it's original seek position will not be preserved.
1193 1193
1194 1194 Requests for data may be returned from a cache.
1195 1195
1196 1196 Returns a str or a buffer instance of raw byte data.
1197 1197 """
1198 1198 o, d = self._chunkcache
1199 1199 l = len(d)
1200 1200
1201 1201 # is it in the cache?
1202 1202 cachestart = offset - o
1203 1203 cacheend = cachestart + length
1204 1204 if cachestart >= 0 and cacheend <= l:
1205 1205 if cachestart == 0 and cacheend == l:
1206 1206 return d # avoid a copy
1207 1207 return util.buffer(d, cachestart, cacheend - cachestart)
1208 1208
1209 1209 return self._readsegment(offset, length, df=df)
1210 1210
1211 1211 def _getsegmentforrevs(self, startrev, endrev, df=None):
1212 1212 """Obtain a segment of raw data corresponding to a range of revisions.
1213 1213
1214 1214 Accepts the start and end revisions and an optional already-open
1215 1215 file handle to be used for reading. If the file handle is read, its
1216 1216 seek position will not be preserved.
1217 1217
1218 1218 Requests for data may be satisfied by a cache.
1219 1219
1220 1220 Returns a 2-tuple of (offset, data) for the requested range of
1221 1221 revisions. Offset is the integer offset from the beginning of the
1222 1222 revlog and data is a str or buffer of the raw byte data.
1223 1223
1224 1224 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1225 1225 to determine where each revision's data begins and ends.
1226 1226 """
1227 1227 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1228 1228 # (functions are expensive).
1229 1229 index = self.index
1230 1230 istart = index[startrev]
1231 1231 start = int(istart[0] >> 16)
1232 1232 if startrev == endrev:
1233 1233 end = start + istart[1]
1234 1234 else:
1235 1235 iend = index[endrev]
1236 1236 end = int(iend[0] >> 16) + iend[1]
1237 1237
1238 1238 if self._inline:
1239 1239 start += (startrev + 1) * self._io.size
1240 1240 end += (endrev + 1) * self._io.size
1241 1241 length = end - start
1242 1242
1243 1243 return start, self._getsegment(start, length, df=df)
1244 1244
1245 1245 def _chunk(self, rev, df=None):
1246 1246 """Obtain a single decompressed chunk for a revision.
1247 1247
1248 1248 Accepts an integer revision and an optional already-open file handle
1249 1249 to be used for reading. If used, the seek position of the file will not
1250 1250 be preserved.
1251 1251
1252 1252 Returns a str holding uncompressed data for the requested revision.
1253 1253 """
1254 1254 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1255 1255
1256 1256 def _chunks(self, revs, df=None):
1257 1257 """Obtain decompressed chunks for the specified revisions.
1258 1258
1259 1259 Accepts an iterable of numeric revisions that are assumed to be in
1260 1260 ascending order. Also accepts an optional already-open file handle
1261 1261 to be used for reading. If used, the seek position of the file will
1262 1262 not be preserved.
1263 1263
1264 1264 This function is similar to calling ``self._chunk()`` multiple times,
1265 1265 but is faster.
1266 1266
1267 1267 Returns a list with decompressed data for each requested revision.
1268 1268 """
1269 1269 if not revs:
1270 1270 return []
1271 1271 start = self.start
1272 1272 length = self.length
1273 1273 inline = self._inline
1274 1274 iosize = self._io.size
1275 1275 buffer = util.buffer
1276 1276
1277 1277 l = []
1278 1278 ladd = l.append
1279 1279
1280 1280 try:
1281 1281 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1282 1282 except OverflowError:
1283 1283 # issue4215 - we can't cache a run of chunks greater than
1284 1284 # 2G on Windows
1285 1285 return [self._chunk(rev, df=df) for rev in revs]
1286 1286
1287 1287 decomp = self.decompress
1288 1288 for rev in revs:
1289 1289 chunkstart = start(rev)
1290 1290 if inline:
1291 1291 chunkstart += (rev + 1) * iosize
1292 1292 chunklength = length(rev)
1293 1293 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1294 1294
1295 1295 return l
1296 1296
1297 1297 def _chunkclear(self):
1298 1298 """Clear the raw chunk cache."""
1299 1299 self._chunkcache = (0, '')
1300 1300
1301 1301 def deltaparent(self, rev):
1302 1302 """return deltaparent of the given revision"""
1303 1303 base = self.index[rev][3]
1304 1304 if base == rev:
1305 1305 return nullrev
1306 1306 elif self._generaldelta:
1307 1307 return base
1308 1308 else:
1309 1309 return rev - 1
1310 1310
1311 1311 def revdiff(self, rev1, rev2):
1312 1312 """return or calculate a delta between two revisions
1313 1313
1314 1314 The delta calculated is in binary form and is intended to be written to
1315 1315 revlog data directly. So this function needs raw revision data.
1316 1316 """
1317 1317 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1318 1318 return bytes(self._chunk(rev2))
1319 1319
1320 1320 return mdiff.textdiff(self.revision(rev1, raw=True),
1321 1321 self.revision(rev2, raw=True))
1322 1322
1323 1323 def revision(self, nodeorrev, _df=None, raw=False):
1324 1324 """return an uncompressed revision of a given node or revision
1325 1325 number.
1326 1326
1327 1327 _df - an existing file handle to read from. (internal-only)
1328 1328 raw - an optional argument specifying if the revision data is to be
1329 1329 treated as raw data when applying flag transforms. 'raw' should be set
1330 1330 to True when generating changegroups or in debug commands.
1331 1331 """
1332 1332 if isinstance(nodeorrev, int):
1333 1333 rev = nodeorrev
1334 1334 node = self.node(rev)
1335 1335 else:
1336 1336 node = nodeorrev
1337 1337 rev = None
1338 1338
1339 1339 cachedrev = None
1340 1340 flags = None
1341 1341 rawtext = None
1342 1342 if node == nullid:
1343 1343 return ""
1344 1344 if self._cache:
1345 1345 if self._cache[0] == node:
1346 1346 # _cache only stores rawtext
1347 1347 if raw:
1348 1348 return self._cache[2]
1349 1349 # duplicated, but good for perf
1350 1350 if rev is None:
1351 1351 rev = self.rev(node)
1352 1352 if flags is None:
1353 1353 flags = self.flags(rev)
1354 1354 # no extra flags set, no flag processor runs, text = rawtext
1355 1355 if flags == REVIDX_DEFAULT_FLAGS:
1356 1356 return self._cache[2]
1357 1357 # rawtext is reusable. need to run flag processor
1358 1358 rawtext = self._cache[2]
1359 1359
1360 1360 cachedrev = self._cache[1]
1361 1361
1362 1362 # look up what we need to read
1363 1363 if rawtext is None:
1364 1364 if rev is None:
1365 1365 rev = self.rev(node)
1366 1366
1367 1367 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1368 1368 if stopped:
1369 1369 rawtext = self._cache[2]
1370 1370
1371 1371 # drop cache to save memory
1372 1372 self._cache = None
1373 1373
1374 1374 bins = self._chunks(chain, df=_df)
1375 1375 if rawtext is None:
1376 1376 rawtext = bytes(bins[0])
1377 1377 bins = bins[1:]
1378 1378
1379 1379 rawtext = mdiff.patches(rawtext, bins)
1380 1380 self._cache = (node, rev, rawtext)
1381 1381
1382 1382 if flags is None:
1383 1383 if rev is None:
1384 1384 rev = self.rev(node)
1385 1385 flags = self.flags(rev)
1386 1386
1387 1387 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1388 1388 if validatehash:
1389 1389 self.checkhash(text, node, rev=rev)
1390 1390
1391 1391 return text
1392 1392
1393 1393 def hash(self, text, p1, p2):
1394 1394 """Compute a node hash.
1395 1395
1396 1396 Available as a function so that subclasses can replace the hash
1397 1397 as needed.
1398 1398 """
1399 1399 return hash(text, p1, p2)
1400 1400
1401 1401 def _processflags(self, text, flags, operation, raw=False):
1402 1402 """Inspect revision data flags and applies transforms defined by
1403 1403 registered flag processors.
1404 1404
1405 1405 ``text`` - the revision data to process
1406 1406 ``flags`` - the revision flags
1407 1407 ``operation`` - the operation being performed (read or write)
1408 1408 ``raw`` - an optional argument describing if the raw transform should be
1409 1409 applied.
1410 1410
1411 1411 This method processes the flags in the order (or reverse order if
1412 1412 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1413 1413 flag processors registered for present flags. The order of flags defined
1414 1414 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1415 1415
1416 1416 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1417 1417 processed text and ``validatehash`` is a bool indicating whether the
1418 1418 returned text should be checked for hash integrity.
1419 1419
1420 1420 Note: If the ``raw`` argument is set, it has precedence over the
1421 1421 operation and will only update the value of ``validatehash``.
1422 1422 """
1423 1423 # fast path: no flag processors will run
1424 1424 if flags == 0:
1425 1425 return text, True
1426 1426 if not operation in ('read', 'write'):
1427 1427 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1428 1428 # Check all flags are known.
1429 1429 if flags & ~REVIDX_KNOWN_FLAGS:
1430 1430 raise RevlogError(_("incompatible revision flag '%#x'") %
1431 1431 (flags & ~REVIDX_KNOWN_FLAGS))
1432 1432 validatehash = True
1433 1433 # Depending on the operation (read or write), the order might be
1434 1434 # reversed due to non-commutative transforms.
1435 1435 orderedflags = REVIDX_FLAGS_ORDER
1436 1436 if operation == 'write':
1437 1437 orderedflags = reversed(orderedflags)
1438 1438
1439 1439 for flag in orderedflags:
1440 1440 # If a flagprocessor has been registered for a known flag, apply the
1441 1441 # related operation transform and update result tuple.
1442 1442 if flag & flags:
1443 1443 vhash = True
1444 1444
1445 1445 if flag not in _flagprocessors:
1446 1446 message = _("missing processor for flag '%#x'") % (flag)
1447 1447 raise RevlogError(message)
1448 1448
1449 1449 processor = _flagprocessors[flag]
1450 1450 if processor is not None:
1451 1451 readtransform, writetransform, rawtransform = processor
1452 1452
1453 1453 if raw:
1454 1454 vhash = rawtransform(self, text)
1455 1455 elif operation == 'read':
1456 1456 text, vhash = readtransform(self, text)
1457 1457 else: # write operation
1458 1458 text, vhash = writetransform(self, text)
1459 1459 validatehash = validatehash and vhash
1460 1460
1461 1461 return text, validatehash
1462 1462
1463 1463 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1464 1464 """Check node hash integrity.
1465 1465
1466 1466 Available as a function so that subclasses can extend hash mismatch
1467 1467 behaviors as needed.
1468 1468 """
1469 1469 if p1 is None and p2 is None:
1470 1470 p1, p2 = self.parents(node)
1471 1471 if node != self.hash(text, p1, p2):
1472 1472 revornode = rev
1473 1473 if revornode is None:
1474 1474 revornode = templatefilters.short(hex(node))
1475 1475 raise RevlogError(_("integrity check failed on %s:%s")
1476 1476 % (self.indexfile, pycompat.bytestr(revornode)))
1477 1477
1478 1478 def checkinlinesize(self, tr, fp=None):
1479 1479 """Check if the revlog is too big for inline and convert if so.
1480 1480
1481 1481 This should be called after revisions are added to the revlog. If the
1482 1482 revlog has grown too large to be an inline revlog, it will convert it
1483 1483 to use multiple index and data files.
1484 1484 """
1485 1485 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1486 1486 return
1487 1487
1488 1488 trinfo = tr.find(self.indexfile)
1489 1489 if trinfo is None:
1490 1490 raise RevlogError(_("%s not found in the transaction")
1491 1491 % self.indexfile)
1492 1492
1493 1493 trindex = trinfo[2]
1494 1494 if trindex is not None:
1495 1495 dataoff = self.start(trindex)
1496 1496 else:
1497 1497 # revlog was stripped at start of transaction, use all leftover data
1498 1498 trindex = len(self) - 1
1499 1499 dataoff = self.end(-2)
1500 1500
1501 1501 tr.add(self.datafile, dataoff)
1502 1502
1503 1503 if fp:
1504 1504 fp.flush()
1505 1505 fp.close()
1506 1506
1507 1507 df = self.opener(self.datafile, 'w')
1508 1508 try:
1509 1509 for r in self:
1510 1510 df.write(self._getsegmentforrevs(r, r)[1])
1511 1511 finally:
1512 1512 df.close()
1513 1513
1514 1514 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1515 1515 checkambig=self._checkambig)
1516 1516 self.version &= ~FLAG_INLINE_DATA
1517 1517 self._inline = False
1518 1518 for i in self:
1519 1519 e = self._io.packentry(self.index[i], self.node, self.version, i)
1520 1520 fp.write(e)
1521 1521
1522 1522 # if we don't call close, the temp file will never replace the
1523 1523 # real index
1524 1524 fp.close()
1525 1525
1526 1526 tr.replace(self.indexfile, trindex * self._io.size)
1527 1527 self._chunkclear()
1528 1528
1529 1529 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1530 1530 node=None, flags=REVIDX_DEFAULT_FLAGS):
1531 1531 """add a revision to the log
1532 1532
1533 1533 text - the revision data to add
1534 1534 transaction - the transaction object used for rollback
1535 1535 link - the linkrev data to add
1536 1536 p1, p2 - the parent nodeids of the revision
1537 1537 cachedelta - an optional precomputed delta
1538 1538 node - nodeid of revision; typically node is not specified, and it is
1539 1539 computed by default as hash(text, p1, p2), however subclasses might
1540 1540 use different hashing method (and override checkhash() in such case)
1541 1541 flags - the known flags to set on the revision
1542 1542 """
1543 1543 if link == nullrev:
1544 1544 raise RevlogError(_("attempted to add linkrev -1 to %s")
1545 1545 % self.indexfile)
1546 1546
1547 1547 if flags:
1548 1548 node = node or self.hash(text, p1, p2)
1549 1549
1550 1550 rawtext, validatehash = self._processflags(text, flags, 'write')
1551 1551
1552 1552 # If the flag processor modifies the revision data, ignore any provided
1553 1553 # cachedelta.
1554 1554 if rawtext != text:
1555 1555 cachedelta = None
1556 1556
1557 1557 if len(rawtext) > _maxentrysize:
1558 1558 raise RevlogError(
1559 1559 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1560 1560 % (self.indexfile, len(rawtext)))
1561 1561
1562 1562 node = node or self.hash(rawtext, p1, p2)
1563 1563 if node in self.nodemap:
1564 1564 return node
1565 1565
1566 1566 if validatehash:
1567 1567 self.checkhash(rawtext, node, p1=p1, p2=p2)
1568 1568
1569 1569 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1570 1570 flags, cachedelta=cachedelta)
1571 1571
1572 1572 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1573 1573 cachedelta=None):
1574 1574 """add a raw revision with known flags, node and parents
1575 1575 useful when reusing a revision not stored in this revlog (ex: received
1576 1576 over wire, or read from an external bundle).
1577 1577 """
1578 1578 dfh = None
1579 1579 if not self._inline:
1580 1580 dfh = self.opener(self.datafile, "a+")
1581 1581 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1582 1582 try:
1583 1583 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1584 1584 flags, cachedelta, ifh, dfh)
1585 1585 finally:
1586 1586 if dfh:
1587 1587 dfh.close()
1588 1588 ifh.close()
1589 1589
1590 1590 def compress(self, data):
1591 1591 """Generate a possibly-compressed representation of data."""
1592 1592 if not data:
1593 1593 return '', data
1594 1594
1595 1595 compressed = self._compressor.compress(data)
1596 1596
1597 1597 if compressed:
1598 1598 # The revlog compressor added the header in the returned data.
1599 1599 return '', compressed
1600 1600
1601 1601 if data[0:1] == '\0':
1602 1602 return '', data
1603 1603 return 'u', data
1604 1604
1605 1605 def decompress(self, data):
1606 1606 """Decompress a revlog chunk.
1607 1607
1608 1608 The chunk is expected to begin with a header identifying the
1609 1609 format type so it can be routed to an appropriate decompressor.
1610 1610 """
1611 1611 if not data:
1612 1612 return data
1613 1613
1614 1614 # Revlogs are read much more frequently than they are written and many
1615 1615 # chunks only take microseconds to decompress, so performance is
1616 1616 # important here.
1617 1617 #
1618 1618 # We can make a few assumptions about revlogs:
1619 1619 #
1620 1620 # 1) the majority of chunks will be compressed (as opposed to inline
1621 1621 # raw data).
1622 1622 # 2) decompressing *any* data will likely by at least 10x slower than
1623 1623 # returning raw inline data.
1624 1624 # 3) we want to prioritize common and officially supported compression
1625 1625 # engines
1626 1626 #
1627 1627 # It follows that we want to optimize for "decompress compressed data
1628 1628 # when encoded with common and officially supported compression engines"
1629 1629 # case over "raw data" and "data encoded by less common or non-official
1630 1630 # compression engines." That is why we have the inline lookup first
1631 1631 # followed by the compengines lookup.
1632 1632 #
1633 1633 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1634 1634 # compressed chunks. And this matters for changelog and manifest reads.
1635 1635 t = data[0:1]
1636 1636
1637 1637 if t == 'x':
1638 1638 try:
1639 1639 return _zlibdecompress(data)
1640 1640 except zlib.error as e:
1641 1641 raise RevlogError(_('revlog decompress error: %s') % str(e))
1642 1642 # '\0' is more common than 'u' so it goes first.
1643 1643 elif t == '\0':
1644 1644 return data
1645 1645 elif t == 'u':
1646 1646 return util.buffer(data, 1)
1647 1647
1648 1648 try:
1649 1649 compressor = self._decompressors[t]
1650 1650 except KeyError:
1651 1651 try:
1652 1652 engine = util.compengines.forrevlogheader(t)
1653 1653 compressor = engine.revlogcompressor()
1654 1654 self._decompressors[t] = compressor
1655 1655 except KeyError:
1656 1656 raise RevlogError(_('unknown compression type %r') % t)
1657 1657
1658 1658 return compressor.decompress(data)
1659 1659
1660 1660 def _isgooddelta(self, d, textlen):
1661 1661 """Returns True if the given delta is good. Good means that it is within
1662 1662 the disk span, disk size, and chain length bounds that we know to be
1663 1663 performant."""
1664 1664 if d is None:
1665 1665 return False
1666 1666
1667 1667 # - 'dist' is the distance from the base revision -- bounding it limits
1668 1668 # the amount of I/O we need to do.
1669 1669 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1670 1670 # to apply -- bounding it limits the amount of CPU we consume.
1671 1671 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1672 1672
1673 1673 defaultmax = textlen * 4
1674 1674 maxdist = self._maxdeltachainspan
1675 1675 if not maxdist:
1676 1676 maxdist = dist # ensure the conditional pass
1677 1677 maxdist = max(maxdist, defaultmax)
1678 1678 if (dist > maxdist or l > textlen or
1679 1679 compresseddeltalen > textlen * 2 or
1680 1680 (self._maxchainlen and chainlen > self._maxchainlen)):
1681 1681 return False
1682 1682
1683 1683 return True
1684 1684
1685 1685 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1686 1686 cachedelta, ifh, dfh, alwayscache=False):
1687 1687 """internal function to add revisions to the log
1688 1688
1689 1689 see addrevision for argument descriptions.
1690 1690
1691 1691 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1692 1692
1693 1693 invariants:
1694 1694 - rawtext is optional (can be None); if not set, cachedelta must be set.
1695 1695 if both are set, they must correspond to each other.
1696 1696 """
1697 1697 if node == nullid:
1698 1698 raise RevlogError(_("%s: attempt to add null revision") %
1699 1699 (self.indexfile))
1700 1700 if node == wdirid:
1701 1701 raise RevlogError(_("%s: attempt to add wdir revision") %
1702 1702 (self.indexfile))
1703 1703
1704 1704 btext = [rawtext]
1705 1705 def buildtext():
1706 1706 if btext[0] is not None:
1707 1707 return btext[0]
1708 1708 baserev = cachedelta[0]
1709 1709 delta = cachedelta[1]
1710 1710 # special case deltas which replace entire base; no need to decode
1711 1711 # base revision. this neatly avoids censored bases, which throw when
1712 1712 # they're decoded.
1713 1713 hlen = struct.calcsize(">lll")
1714 1714 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1715 1715 len(delta) - hlen):
1716 1716 btext[0] = delta[hlen:]
1717 1717 else:
1718 1718 if self._inline:
1719 1719 fh = ifh
1720 1720 else:
1721 1721 fh = dfh
1722 1722 basetext = self.revision(baserev, _df=fh, raw=True)
1723 1723 btext[0] = mdiff.patch(basetext, delta)
1724 1724
1725 1725 try:
1726 1726 res = self._processflags(btext[0], flags, 'read', raw=True)
1727 1727 btext[0], validatehash = res
1728 1728 if validatehash:
1729 1729 self.checkhash(btext[0], node, p1=p1, p2=p2)
1730 1730 if flags & REVIDX_ISCENSORED:
1731 1731 raise RevlogError(_('node %s is not censored') % node)
1732 1732 except CensoredNodeError:
1733 1733 # must pass the censored index flag to add censored revisions
1734 1734 if not flags & REVIDX_ISCENSORED:
1735 1735 raise
1736 1736 return btext[0]
1737 1737
1738 1738 def builddelta(rev):
1739 1739 # can we use the cached delta?
1740 1740 if cachedelta and cachedelta[0] == rev:
1741 1741 delta = cachedelta[1]
1742 1742 else:
1743 1743 t = buildtext()
1744 1744 if self.iscensored(rev):
1745 1745 # deltas based on a censored revision must replace the
1746 1746 # full content in one patch, so delta works everywhere
1747 1747 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1748 1748 delta = header + t
1749 1749 else:
1750 1750 if self._inline:
1751 1751 fh = ifh
1752 1752 else:
1753 1753 fh = dfh
1754 1754 ptext = self.revision(rev, _df=fh, raw=True)
1755 1755 delta = mdiff.textdiff(ptext, t)
1756 1756 header, data = self.compress(delta)
1757 1757 deltalen = len(header) + len(data)
1758 1758 chainbase = self.chainbase(rev)
1759 1759 dist = deltalen + offset - self.start(chainbase)
1760 1760 if self._generaldelta:
1761 1761 base = rev
1762 1762 else:
1763 1763 base = chainbase
1764 1764 chainlen, compresseddeltalen = self._chaininfo(rev)
1765 1765 chainlen += 1
1766 1766 compresseddeltalen += deltalen
1767 1767 return (dist, deltalen, (header, data), base,
1768 1768 chainbase, chainlen, compresseddeltalen)
1769 1769
1770 1770 curr = len(self)
1771 1771 prev = curr - 1
1772 1772 offset = self.end(prev)
1773 1773 delta = None
1774 1774 p1r, p2r = self.rev(p1), self.rev(p2)
1775 1775
1776 1776 # full versions are inserted when the needed deltas
1777 1777 # become comparable to the uncompressed text
1778 1778 if rawtext is None:
1779 1779 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1780 1780 cachedelta[1])
1781 1781 else:
1782 1782 textlen = len(rawtext)
1783 1783
1784 1784 # should we try to build a delta?
1785 1785 if prev != nullrev and self.storedeltachains:
1786 1786 tested = set()
1787 1787 # This condition is true most of the time when processing
1788 1788 # changegroup data into a generaldelta repo. The only time it
1789 1789 # isn't true is if this is the first revision in a delta chain
1790 1790 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1791 1791 if cachedelta and self._generaldelta and self._lazydeltabase:
1792 1792 # Assume what we received from the server is a good choice
1793 1793 # build delta will reuse the cache
1794 1794 candidatedelta = builddelta(cachedelta[0])
1795 1795 tested.add(cachedelta[0])
1796 1796 if self._isgooddelta(candidatedelta, textlen):
1797 1797 delta = candidatedelta
1798 1798 if delta is None and self._generaldelta:
1799 1799 # exclude already lazy tested base if any
1800 1800 parents = [p for p in (p1r, p2r)
1801 1801 if p != nullrev and p not in tested]
1802 1802 if parents and not self._aggressivemergedeltas:
1803 1803 # Pick whichever parent is closer to us (to minimize the
1804 1804 # chance of having to build a fulltext).
1805 1805 parents = [max(parents)]
1806 1806 tested.update(parents)
1807 1807 pdeltas = []
1808 1808 for p in parents:
1809 1809 pd = builddelta(p)
1810 1810 if self._isgooddelta(pd, textlen):
1811 1811 pdeltas.append(pd)
1812 1812 if pdeltas:
1813 1813 delta = min(pdeltas, key=lambda x: x[1])
1814 1814 if delta is None and prev not in tested:
1815 1815 # other approach failed try against prev to hopefully save us a
1816 1816 # fulltext.
1817 1817 candidatedelta = builddelta(prev)
1818 1818 if self._isgooddelta(candidatedelta, textlen):
1819 1819 delta = candidatedelta
1820 1820 if delta is not None:
1821 1821 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1822 1822 else:
1823 1823 rawtext = buildtext()
1824 1824 data = self.compress(rawtext)
1825 1825 l = len(data[1]) + len(data[0])
1826 1826 base = chainbase = curr
1827 1827
1828 1828 e = (offset_type(offset, flags), l, textlen,
1829 1829 base, link, p1r, p2r, node)
1830 1830 self.index.insert(-1, e)
1831 1831 self.nodemap[node] = curr
1832 1832
1833 1833 entry = self._io.packentry(e, self.node, self.version, curr)
1834 1834 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1835 1835
1836 1836 if alwayscache and rawtext is None:
1837 1837 rawtext = buildtext()
1838 1838
1839 1839 if type(rawtext) == str: # only accept immutable objects
1840 1840 self._cache = (node, curr, rawtext)
1841 1841 self._chainbasecache[curr] = chainbase
1842 1842 return node
1843 1843
1844 1844 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1845 1845 # Files opened in a+ mode have inconsistent behavior on various
1846 1846 # platforms. Windows requires that a file positioning call be made
1847 1847 # when the file handle transitions between reads and writes. See
1848 1848 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1849 1849 # platforms, Python or the platform itself can be buggy. Some versions
1850 1850 # of Solaris have been observed to not append at the end of the file
1851 1851 # if the file was seeked to before the end. See issue4943 for more.
1852 1852 #
1853 1853 # We work around this issue by inserting a seek() before writing.
1854 1854 # Note: This is likely not necessary on Python 3.
1855 1855 ifh.seek(0, os.SEEK_END)
1856 1856 if dfh:
1857 1857 dfh.seek(0, os.SEEK_END)
1858 1858
1859 1859 curr = len(self) - 1
1860 1860 if not self._inline:
1861 1861 transaction.add(self.datafile, offset)
1862 1862 transaction.add(self.indexfile, curr * len(entry))
1863 1863 if data[0]:
1864 1864 dfh.write(data[0])
1865 1865 dfh.write(data[1])
1866 1866 ifh.write(entry)
1867 1867 else:
1868 1868 offset += curr * self._io.size
1869 1869 transaction.add(self.indexfile, offset, curr)
1870 1870 ifh.write(entry)
1871 1871 ifh.write(data[0])
1872 1872 ifh.write(data[1])
1873 1873 self.checkinlinesize(transaction, ifh)
1874 1874
1875 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1875 def addgroup(self, deltas, transaction, addrevisioncb=None):
1876 1876 """
1877 1877 add a delta group
1878 1878
1879 1879 given a set of deltas, add them to the revision log. the
1880 1880 first delta is against its parent, which should be in our
1881 1881 log, the rest are against the previous delta.
1882 1882
1883 1883 If ``addrevisioncb`` is defined, it will be called with arguments of
1884 1884 this revlog and the node that was added.
1885 1885 """
1886 1886
1887 1887 nodes = []
1888 1888
1889 1889 r = len(self)
1890 1890 end = 0
1891 1891 if r:
1892 1892 end = self.end(r - 1)
1893 1893 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1894 1894 isize = r * self._io.size
1895 1895 if self._inline:
1896 1896 transaction.add(self.indexfile, end + isize, r)
1897 1897 dfh = None
1898 1898 else:
1899 1899 transaction.add(self.indexfile, isize, r)
1900 1900 transaction.add(self.datafile, end)
1901 1901 dfh = self.opener(self.datafile, "a+")
1902 1902 def flush():
1903 1903 if dfh:
1904 1904 dfh.flush()
1905 1905 ifh.flush()
1906 1906 try:
1907 1907 # loop through our set of deltas
1908 chain = None
1909 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1910 node = chunkdata['node']
1911 p1 = chunkdata['p1']
1912 p2 = chunkdata['p2']
1913 cs = chunkdata['cs']
1914 deltabase = chunkdata['deltabase']
1915 delta = chunkdata['delta']
1916 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1908 for data in deltas:
1909 node, p1, p2, link, deltabase, delta, flags = data
1910 flags = flags or REVIDX_DEFAULT_FLAGS
1917 1911
1918 1912 nodes.append(node)
1919 chain = node
1920 1913
1921 link = linkmapper(cs)
1922 1914 if node in self.nodemap:
1923 1915 # this can happen if two branches make the same change
1924 1916 continue
1925 1917
1926 1918 for p in (p1, p2):
1927 1919 if p not in self.nodemap:
1928 1920 raise LookupError(p, self.indexfile,
1929 1921 _('unknown parent'))
1930 1922
1931 1923 if deltabase not in self.nodemap:
1932 1924 raise LookupError(deltabase, self.indexfile,
1933 1925 _('unknown delta base'))
1934 1926
1935 1927 baserev = self.rev(deltabase)
1936 1928
1937 1929 if baserev != nullrev and self.iscensored(baserev):
1938 1930 # if base is censored, delta must be full replacement in a
1939 1931 # single patch operation
1940 1932 hlen = struct.calcsize(">lll")
1941 1933 oldlen = self.rawsize(baserev)
1942 1934 newlen = len(delta) - hlen
1943 1935 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1944 1936 raise error.CensoredBaseError(self.indexfile,
1945 1937 self.node(baserev))
1946 1938
1947 1939 if not flags and self._peek_iscensored(baserev, delta, flush):
1948 1940 flags |= REVIDX_ISCENSORED
1949 1941
1950 1942 # We assume consumers of addrevisioncb will want to retrieve
1951 1943 # the added revision, which will require a call to
1952 1944 # revision(). revision() will fast path if there is a cache
1953 1945 # hit. So, we tell _addrevision() to always cache in this case.
1954 1946 # We're only using addgroup() in the context of changegroup
1955 1947 # generation so the revision data can always be handled as raw
1956 1948 # by the flagprocessor.
1957 1949 self._addrevision(node, None, transaction, link,
1958 1950 p1, p2, flags, (baserev, delta),
1959 1951 ifh, dfh,
1960 1952 alwayscache=bool(addrevisioncb))
1961 1953
1962 1954 if addrevisioncb:
1963 1955 addrevisioncb(self, node)
1964 1956
1965 1957 if not dfh and not self._inline:
1966 1958 # addrevision switched from inline to conventional
1967 1959 # reopen the index
1968 1960 ifh.close()
1969 1961 dfh = self.opener(self.datafile, "a+")
1970 1962 ifh = self.opener(self.indexfile, "a+",
1971 1963 checkambig=self._checkambig)
1972 1964 finally:
1973 1965 if dfh:
1974 1966 dfh.close()
1975 1967 ifh.close()
1976 1968
1977 1969 return nodes
1978 1970
1979 1971 def iscensored(self, rev):
1980 1972 """Check if a file revision is censored."""
1981 1973 return False
1982 1974
1983 1975 def _peek_iscensored(self, baserev, delta, flush):
1984 1976 """Quickly check if a delta produces a censored revision."""
1985 1977 return False
1986 1978
1987 1979 def getstrippoint(self, minlink):
1988 1980 """find the minimum rev that must be stripped to strip the linkrev
1989 1981
1990 1982 Returns a tuple containing the minimum rev and a set of all revs that
1991 1983 have linkrevs that will be broken by this strip.
1992 1984 """
1993 1985 brokenrevs = set()
1994 1986 strippoint = len(self)
1995 1987
1996 1988 heads = {}
1997 1989 futurelargelinkrevs = set()
1998 1990 for head in self.headrevs():
1999 1991 headlinkrev = self.linkrev(head)
2000 1992 heads[head] = headlinkrev
2001 1993 if headlinkrev >= minlink:
2002 1994 futurelargelinkrevs.add(headlinkrev)
2003 1995
2004 1996 # This algorithm involves walking down the rev graph, starting at the
2005 1997 # heads. Since the revs are topologically sorted according to linkrev,
2006 1998 # once all head linkrevs are below the minlink, we know there are
2007 1999 # no more revs that could have a linkrev greater than minlink.
2008 2000 # So we can stop walking.
2009 2001 while futurelargelinkrevs:
2010 2002 strippoint -= 1
2011 2003 linkrev = heads.pop(strippoint)
2012 2004
2013 2005 if linkrev < minlink:
2014 2006 brokenrevs.add(strippoint)
2015 2007 else:
2016 2008 futurelargelinkrevs.remove(linkrev)
2017 2009
2018 2010 for p in self.parentrevs(strippoint):
2019 2011 if p != nullrev:
2020 2012 plinkrev = self.linkrev(p)
2021 2013 heads[p] = plinkrev
2022 2014 if plinkrev >= minlink:
2023 2015 futurelargelinkrevs.add(plinkrev)
2024 2016
2025 2017 return strippoint, brokenrevs
2026 2018
2027 2019 def strip(self, minlink, transaction):
2028 2020 """truncate the revlog on the first revision with a linkrev >= minlink
2029 2021
2030 2022 This function is called when we're stripping revision minlink and
2031 2023 its descendants from the repository.
2032 2024
2033 2025 We have to remove all revisions with linkrev >= minlink, because
2034 2026 the equivalent changelog revisions will be renumbered after the
2035 2027 strip.
2036 2028
2037 2029 So we truncate the revlog on the first of these revisions, and
2038 2030 trust that the caller has saved the revisions that shouldn't be
2039 2031 removed and that it'll re-add them after this truncation.
2040 2032 """
2041 2033 if len(self) == 0:
2042 2034 return
2043 2035
2044 2036 rev, _ = self.getstrippoint(minlink)
2045 2037 if rev == len(self):
2046 2038 return
2047 2039
2048 2040 # first truncate the files on disk
2049 2041 end = self.start(rev)
2050 2042 if not self._inline:
2051 2043 transaction.add(self.datafile, end)
2052 2044 end = rev * self._io.size
2053 2045 else:
2054 2046 end += rev * self._io.size
2055 2047
2056 2048 transaction.add(self.indexfile, end)
2057 2049
2058 2050 # then reset internal state in memory to forget those revisions
2059 2051 self._cache = None
2060 2052 self._chaininfocache = {}
2061 2053 self._chunkclear()
2062 2054 for x in xrange(rev, len(self)):
2063 2055 del self.nodemap[self.node(x)]
2064 2056
2065 2057 del self.index[rev:-1]
2066 2058
2067 2059 def checksize(self):
2068 2060 expected = 0
2069 2061 if len(self):
2070 2062 expected = max(0, self.end(len(self) - 1))
2071 2063
2072 2064 try:
2073 2065 f = self.opener(self.datafile)
2074 2066 f.seek(0, 2)
2075 2067 actual = f.tell()
2076 2068 f.close()
2077 2069 dd = actual - expected
2078 2070 except IOError as inst:
2079 2071 if inst.errno != errno.ENOENT:
2080 2072 raise
2081 2073 dd = 0
2082 2074
2083 2075 try:
2084 2076 f = self.opener(self.indexfile)
2085 2077 f.seek(0, 2)
2086 2078 actual = f.tell()
2087 2079 f.close()
2088 2080 s = self._io.size
2089 2081 i = max(0, actual // s)
2090 2082 di = actual - (i * s)
2091 2083 if self._inline:
2092 2084 databytes = 0
2093 2085 for r in self:
2094 2086 databytes += max(0, self.length(r))
2095 2087 dd = 0
2096 2088 di = actual - len(self) * s - databytes
2097 2089 except IOError as inst:
2098 2090 if inst.errno != errno.ENOENT:
2099 2091 raise
2100 2092 di = 0
2101 2093
2102 2094 return (dd, di)
2103 2095
2104 2096 def files(self):
2105 2097 res = [self.indexfile]
2106 2098 if not self._inline:
2107 2099 res.append(self.datafile)
2108 2100 return res
2109 2101
2110 2102 DELTAREUSEALWAYS = 'always'
2111 2103 DELTAREUSESAMEREVS = 'samerevs'
2112 2104 DELTAREUSENEVER = 'never'
2113 2105
2114 2106 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2115 2107
2116 2108 def clone(self, tr, destrevlog, addrevisioncb=None,
2117 2109 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2118 2110 """Copy this revlog to another, possibly with format changes.
2119 2111
2120 2112 The destination revlog will contain the same revisions and nodes.
2121 2113 However, it may not be bit-for-bit identical due to e.g. delta encoding
2122 2114 differences.
2123 2115
2124 2116 The ``deltareuse`` argument control how deltas from the existing revlog
2125 2117 are preserved in the destination revlog. The argument can have the
2126 2118 following values:
2127 2119
2128 2120 DELTAREUSEALWAYS
2129 2121 Deltas will always be reused (if possible), even if the destination
2130 2122 revlog would not select the same revisions for the delta. This is the
2131 2123 fastest mode of operation.
2132 2124 DELTAREUSESAMEREVS
2133 2125 Deltas will be reused if the destination revlog would pick the same
2134 2126 revisions for the delta. This mode strikes a balance between speed
2135 2127 and optimization.
2136 2128 DELTAREUSENEVER
2137 2129 Deltas will never be reused. This is the slowest mode of execution.
2138 2130 This mode can be used to recompute deltas (e.g. if the diff/delta
2139 2131 algorithm changes).
2140 2132
2141 2133 Delta computation can be slow, so the choice of delta reuse policy can
2142 2134 significantly affect run time.
2143 2135
2144 2136 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2145 2137 two extremes. Deltas will be reused if they are appropriate. But if the
2146 2138 delta could choose a better revision, it will do so. This means if you
2147 2139 are converting a non-generaldelta revlog to a generaldelta revlog,
2148 2140 deltas will be recomputed if the delta's parent isn't a parent of the
2149 2141 revision.
2150 2142
2151 2143 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2152 2144 controls whether to compute deltas against both parents for merges.
2153 2145 By default, the current default is used.
2154 2146 """
2155 2147 if deltareuse not in self.DELTAREUSEALL:
2156 2148 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2157 2149
2158 2150 if len(destrevlog):
2159 2151 raise ValueError(_('destination revlog is not empty'))
2160 2152
2161 2153 if getattr(self, 'filteredrevs', None):
2162 2154 raise ValueError(_('source revlog has filtered revisions'))
2163 2155 if getattr(destrevlog, 'filteredrevs', None):
2164 2156 raise ValueError(_('destination revlog has filtered revisions'))
2165 2157
2166 2158 # lazydeltabase controls whether to reuse a cached delta, if possible.
2167 2159 oldlazydeltabase = destrevlog._lazydeltabase
2168 2160 oldamd = destrevlog._aggressivemergedeltas
2169 2161
2170 2162 try:
2171 2163 if deltareuse == self.DELTAREUSEALWAYS:
2172 2164 destrevlog._lazydeltabase = True
2173 2165 elif deltareuse == self.DELTAREUSESAMEREVS:
2174 2166 destrevlog._lazydeltabase = False
2175 2167
2176 2168 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2177 2169
2178 2170 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2179 2171 self.DELTAREUSESAMEREVS)
2180 2172
2181 2173 index = self.index
2182 2174 for rev in self:
2183 2175 entry = index[rev]
2184 2176
2185 2177 # Some classes override linkrev to take filtered revs into
2186 2178 # account. Use raw entry from index.
2187 2179 flags = entry[0] & 0xffff
2188 2180 linkrev = entry[4]
2189 2181 p1 = index[entry[5]][7]
2190 2182 p2 = index[entry[6]][7]
2191 2183 node = entry[7]
2192 2184
2193 2185 # (Possibly) reuse the delta from the revlog if allowed and
2194 2186 # the revlog chunk is a delta.
2195 2187 cachedelta = None
2196 2188 rawtext = None
2197 2189 if populatecachedelta:
2198 2190 dp = self.deltaparent(rev)
2199 2191 if dp != nullrev:
2200 2192 cachedelta = (dp, str(self._chunk(rev)))
2201 2193
2202 2194 if not cachedelta:
2203 2195 rawtext = self.revision(rev, raw=True)
2204 2196
2205 2197 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2206 2198 checkambig=False)
2207 2199 dfh = None
2208 2200 if not destrevlog._inline:
2209 2201 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2210 2202 try:
2211 2203 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2212 2204 flags, cachedelta, ifh, dfh)
2213 2205 finally:
2214 2206 if dfh:
2215 2207 dfh.close()
2216 2208 ifh.close()
2217 2209
2218 2210 if addrevisioncb:
2219 2211 addrevisioncb(self, rev, node)
2220 2212 finally:
2221 2213 destrevlog._lazydeltabase = oldlazydeltabase
2222 2214 destrevlog._aggressivemergedeltas = oldamd
@@ -1,299 +1,316 b''
1 1 # test revlog interaction about raw data (flagprocessor)
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import sys
6 6
7 7 from mercurial import (
8 8 encoding,
9 9 node,
10 10 revlog,
11 11 transaction,
12 12 vfs,
13 13 )
14 14
15 15 # TESTTMP is optional. This makes it convenient to run without run-tests.py
16 16 tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp'))
17 17
18 18 # Enable generaldelta otherwise revlog won't use delta as expected by the test
19 19 tvfs.options = {'generaldelta': True, 'revlogv1': True}
20 20
21 21 # The test wants to control whether to use delta explicitly, based on
22 22 # "storedeltachains".
23 23 revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains
24 24
25 25 def abort(msg):
26 26 print('abort: %s' % msg)
27 27 # Return 0 so run-tests.py could compare the output.
28 28 sys.exit()
29 29
30 30 # Register a revlog processor for flag EXTSTORED.
31 31 #
32 32 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
33 33 # insertion and replacement, and may be interesting to test revlog's line-based
34 34 # deltas.
35 35 _extheader = b'E\n'
36 36
37 37 def readprocessor(self, rawtext):
38 38 # True: the returned text could be used to verify hash
39 39 text = rawtext[len(_extheader):].replace(b'i', b'1')
40 40 return text, True
41 41
42 42 def writeprocessor(self, text):
43 43 # False: the returned rawtext shouldn't be used to verify hash
44 44 rawtext = _extheader + text.replace(b'1', b'i')
45 45 return rawtext, False
46 46
47 47 def rawprocessor(self, rawtext):
48 48 # False: do not verify hash. Only the content returned by "readprocessor"
49 49 # can be used to verify hash.
50 50 return False
51 51
52 52 revlog.addflagprocessor(revlog.REVIDX_EXTSTORED,
53 53 (readprocessor, writeprocessor, rawprocessor))
54 54
55 55 # Utilities about reading and appending revlog
56 56
57 57 def newtransaction():
58 58 # A transaction is required to write revlogs
59 59 report = lambda msg: None
60 60 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
61 61
62 62 def newrevlog(name=b'_testrevlog.i', recreate=False):
63 63 if recreate:
64 64 tvfs.tryunlink(name)
65 65 rlog = revlog.revlog(tvfs, name)
66 66 return rlog
67 67
68 68 def appendrev(rlog, text, tr, isext=False, isdelta=True):
69 69 '''Append a revision. If isext is True, set the EXTSTORED flag so flag
70 70 processor will be used (and rawtext is different from text). If isdelta is
71 71 True, force the revision to be a delta, otherwise it's full text.
72 72 '''
73 73 nextrev = len(rlog)
74 74 p1 = rlog.node(nextrev - 1)
75 75 p2 = node.nullid
76 76 if isext:
77 77 flags = revlog.REVIDX_EXTSTORED
78 78 else:
79 79 flags = revlog.REVIDX_DEFAULT_FLAGS
80 80 # Change storedeltachains temporarily, to override revlog's delta decision
81 81 rlog.storedeltachains = isdelta
82 82 try:
83 83 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
84 84 return nextrev
85 85 except Exception as ex:
86 86 abort('rev %d: failed to append: %s' % (nextrev, ex))
87 87 finally:
88 88 # Restore storedeltachains. It is always True, see revlog.__init__
89 89 rlog.storedeltachains = True
90 90
91 91 def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True):
92 92 '''Copy revlog to destname using revlog.addgroup. Return the copied revlog.
93 93
94 94 This emulates push or pull. They use changegroup. Changegroup requires
95 95 repo to work. We don't have a repo, so a dummy changegroup is used.
96 96
97 97 If optimaldelta is True, use optimized delta parent, so the destination
98 98 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
99 99 the destination revlog needs more work to use it.
100 100
101 101 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
102 102 code path, which is not covered by "appendrev" alone.
103 103 '''
104 104 class dummychangegroup(object):
105 105 @staticmethod
106 106 def deltachunk(pnode):
107 107 pnode = pnode or node.nullid
108 108 parentrev = rlog.rev(pnode)
109 109 r = parentrev + 1
110 110 if r >= len(rlog):
111 111 return {}
112 112 if optimaldelta:
113 113 deltaparent = parentrev
114 114 else:
115 115 # suboptimal deltaparent
116 116 deltaparent = min(0, parentrev)
117 117 return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid,
118 118 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r),
119 119 'deltabase': rlog.node(deltaparent),
120 120 'delta': rlog.revdiff(deltaparent, r)}
121 121
122 def deltaiter(self, linkmapper):
123 chain = None
124 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
125 node = chunkdata['node']
126 p1 = chunkdata['p1']
127 p2 = chunkdata['p2']
128 cs = chunkdata['cs']
129 deltabase = chunkdata['deltabase']
130 delta = chunkdata['delta']
131 flags = chunkdata['flags']
132
133 link = linkmapper(cs)
134 chain = node
135
136 yield (node, p1, p2, link, deltabase, delta, flags)
137
122 138 def linkmap(lnode):
123 139 return rlog.rev(lnode)
124 140
125 141 dlog = newrevlog(destname, recreate=True)
126 dlog.addgroup(dummychangegroup(), linkmap, tr)
142 dummydeltas = dummychangegroup().deltaiter(linkmap)
143 dlog.addgroup(dummydeltas, tr)
127 144 return dlog
128 145
129 146 def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'):
130 147 '''Like addgroupcopy, but use the low level revlog._addrevision directly.
131 148
132 149 It exercises some code paths that are hard to reach easily otherwise.
133 150 '''
134 151 dlog = newrevlog(destname, recreate=True)
135 152 for r in rlog:
136 153 p1 = rlog.node(r - 1)
137 154 p2 = node.nullid
138 155 if r == 0:
139 156 text = rlog.revision(r, raw=True)
140 157 cachedelta = None
141 158 else:
142 159 # deltaparent is more interesting if it has the EXTSTORED flag.
143 160 deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)])
144 161 text = None
145 162 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
146 163 flags = rlog.flags(r)
147 164 ifh = dfh = None
148 165 try:
149 166 ifh = dlog.opener(dlog.indexfile, 'a+')
150 167 if not dlog._inline:
151 168 dfh = dlog.opener(dlog.datafile, 'a+')
152 169 dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags,
153 170 cachedelta, ifh, dfh)
154 171 finally:
155 172 if dfh is not None:
156 173 dfh.close()
157 174 if ifh is not None:
158 175 ifh.close()
159 176 return dlog
160 177
161 178 # Utilities to generate revisions for testing
162 179
163 180 def genbits(n):
164 181 '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
165 182 i.e. the generated numbers have a width of n bits.
166 183
167 184 The combination of two adjacent numbers will cover all possible cases.
168 185 That is to say, given any x, y where both x, and y are in range(2 ** n),
169 186 there is an x followed immediately by y in the generated sequence.
170 187 '''
171 188 m = 2 ** n
172 189
173 190 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
174 191 gray = lambda x: x ^ (x >> 1)
175 192 reversegray = dict((gray(i), i) for i in range(m))
176 193
177 194 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
178 195 # the next unused gray code where higher n bits equal to X.
179 196
180 197 # For gray codes whose higher bits are X, a[X] of them have been used.
181 198 a = [0] * m
182 199
183 200 # Iterate from 0.
184 201 x = 0
185 202 yield x
186 203 for i in range(m * m):
187 204 x = reversegray[x]
188 205 y = gray(a[x] + x * m) & (m - 1)
189 206 assert a[x] < m
190 207 a[x] += 1
191 208 x = y
192 209 yield x
193 210
194 211 def gentext(rev):
195 212 '''Given a revision number, generate dummy text'''
196 213 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
197 214
198 215 def writecases(rlog, tr):
199 216 '''Write some revisions interested to the test.
200 217
201 218 The test is interested in 3 properties of a revision:
202 219
203 220 - Is it a delta or a full text? (isdelta)
204 221 This is to catch some delta application issues.
205 222 - Does it have a flag of EXTSTORED? (isext)
206 223 This is to catch some flag processor issues. Especially when
207 224 interacted with revlog deltas.
208 225 - Is its text empty? (isempty)
209 226 This is less important. It is intended to try to catch some careless
210 227 checks like "if text" instead of "if text is None". Note: if flag
211 228 processor is involved, raw text may be not empty.
212 229
213 230 Write 65 revisions. So that all combinations of the above flags for
214 231 adjacent revisions are covered. That is to say,
215 232
216 233 len(set(
217 234 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
218 235 for r in range(len(rlog) - 1)
219 236 )) is 64.
220 237
221 238 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
222 239 mentioned above.
223 240
224 241 Return expected [(text, rawtext)].
225 242 '''
226 243 result = []
227 244 for i, x in enumerate(genbits(3)):
228 245 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
229 246 if isempty:
230 247 text = b''
231 248 else:
232 249 text = gentext(i)
233 250 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
234 251
235 252 # Verify text, rawtext, and rawsize
236 253 if isext:
237 254 rawtext = writeprocessor(None, text)[0]
238 255 else:
239 256 rawtext = text
240 257 if rlog.rawsize(rev) != len(rawtext):
241 258 abort('rev %d: wrong rawsize' % rev)
242 259 if rlog.revision(rev, raw=False) != text:
243 260 abort('rev %d: wrong text' % rev)
244 261 if rlog.revision(rev, raw=True) != rawtext:
245 262 abort('rev %d: wrong rawtext' % rev)
246 263 result.append((text, rawtext))
247 264
248 265 # Verify flags like isdelta, isext work as expected
249 266 if bool(rlog.deltaparent(rev) > -1) != isdelta:
250 267 abort('rev %d: isdelta is ineffective' % rev)
251 268 if bool(rlog.flags(rev)) != isext:
252 269 abort('rev %d: isext is ineffective' % rev)
253 270 return result
254 271
255 272 # Main test and checking
256 273
257 274 def checkrevlog(rlog, expected):
258 275 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
259 276 # Test using different access orders. This could expose some issues
260 277 # depending on revlog caching (see revlog._cache).
261 278 for r0 in range(len(rlog) - 1):
262 279 r1 = r0 + 1
263 280 for revorder in [[r0, r1], [r1, r0]]:
264 281 for raworder in [[True], [False], [True, False], [False, True]]:
265 282 nlog = newrevlog()
266 283 for rev in revorder:
267 284 for raw in raworder:
268 285 t = nlog.revision(rev, raw=raw)
269 286 if t != expected[rev][int(raw)]:
270 287 abort('rev %d: corrupted %stext'
271 288 % (rev, raw and 'raw' or ''))
272 289
273 290 def maintest():
274 291 expected = rl = None
275 292 with newtransaction() as tr:
276 293 rl = newrevlog(recreate=True)
277 294 expected = writecases(rl, tr)
278 295 checkrevlog(rl, expected)
279 296 print('local test passed')
280 297 # Copy via revlog.addgroup
281 298 rl1 = addgroupcopy(rl, tr)
282 299 checkrevlog(rl1, expected)
283 300 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
284 301 checkrevlog(rl2, expected)
285 302 print('addgroupcopy test passed')
286 303 # Copy via revlog.clone
287 304 rl3 = newrevlog(name='_destrevlog3.i', recreate=True)
288 305 rl.clone(tr, rl3)
289 306 checkrevlog(rl3, expected)
290 307 print('clone test passed')
291 308 # Copy via low-level revlog._addrevision
292 309 rl4 = lowlevelcopy(rl, tr)
293 310 checkrevlog(rl4, expected)
294 311 print('lowlevelcopy test passed')
295 312
296 313 try:
297 314 maintest()
298 315 except Exception as ex:
299 316 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now