##// END OF EJS Templates
revlog: add revmap back to revlog.addgroup...
Durham Goode -
r34292:1db9abf4 default
parent child Browse files
Show More
@@ -1,1015 +1,1014 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import tempfile
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 dagutil,
24 24 error,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
32 32 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
33 33 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
34 34
35 35 def readexactly(stream, n):
36 36 '''read n bytes from stream.read and abort if less was available'''
37 37 s = stream.read(n)
38 38 if len(s) < n:
39 39 raise error.Abort(_("stream ended unexpectedly"
40 40 " (got %d bytes, expected %d)")
41 41 % (len(s), n))
42 42 return s
43 43
44 44 def getchunk(stream):
45 45 """return the next chunk from stream as a string"""
46 46 d = readexactly(stream, 4)
47 47 l = struct.unpack(">l", d)[0]
48 48 if l <= 4:
49 49 if l:
50 50 raise error.Abort(_("invalid chunk length %d") % l)
51 51 return ""
52 52 return readexactly(stream, l - 4)
53 53
54 54 def chunkheader(length):
55 55 """return a changegroup chunk header (string)"""
56 56 return struct.pack(">l", length + 4)
57 57
58 58 def closechunk():
59 59 """return a changegroup chunk header (string) for a zero-length chunk"""
60 60 return struct.pack(">l", 0)
61 61
62 62 def writechunks(ui, chunks, filename, vfs=None):
63 63 """Write chunks to a file and return its filename.
64 64
65 65 The stream is assumed to be a bundle file.
66 66 Existing files will not be overwritten.
67 67 If no filename is specified, a temporary file is created.
68 68 """
69 69 fh = None
70 70 cleanup = None
71 71 try:
72 72 if filename:
73 73 if vfs:
74 74 fh = vfs.open(filename, "wb")
75 75 else:
76 76 # Increase default buffer size because default is usually
77 77 # small (4k is common on Linux).
78 78 fh = open(filename, "wb", 131072)
79 79 else:
80 80 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 81 fh = os.fdopen(fd, pycompat.sysstr("wb"))
82 82 cleanup = filename
83 83 for c in chunks:
84 84 fh.write(c)
85 85 cleanup = None
86 86 return filename
87 87 finally:
88 88 if fh is not None:
89 89 fh.close()
90 90 if cleanup is not None:
91 91 if filename and vfs:
92 92 vfs.unlink(cleanup)
93 93 else:
94 94 os.unlink(cleanup)
95 95
96 96 class cg1unpacker(object):
97 97 """Unpacker for cg1 changegroup streams.
98 98
99 99 A changegroup unpacker handles the framing of the revision data in
100 100 the wire format. Most consumers will want to use the apply()
101 101 method to add the changes from the changegroup to a repository.
102 102
103 103 If you're forwarding a changegroup unmodified to another consumer,
104 104 use getchunks(), which returns an iterator of changegroup
105 105 chunks. This is mostly useful for cases where you need to know the
106 106 data stream has ended by observing the end of the changegroup.
107 107
108 108 deltachunk() is useful only if you're applying delta data. Most
109 109 consumers should prefer apply() instead.
110 110
111 111 A few other public methods exist. Those are used only for
112 112 bundlerepo and some debug commands - their use is discouraged.
113 113 """
114 114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 115 deltaheadersize = struct.calcsize(deltaheader)
116 116 version = '01'
117 117 _grouplistcount = 1 # One list of files after the manifests
118 118
119 119 def __init__(self, fh, alg, extras=None):
120 120 if alg is None:
121 121 alg = 'UN'
122 122 if alg not in util.compengines.supportedbundletypes:
123 123 raise error.Abort(_('unknown stream compression type: %s')
124 124 % alg)
125 125 if alg == 'BZ':
126 126 alg = '_truncatedBZ'
127 127
128 128 compengine = util.compengines.forbundletype(alg)
129 129 self._stream = compengine.decompressorreader(fh)
130 130 self._type = alg
131 131 self.extras = extras or {}
132 132 self.callback = None
133 133
134 134 # These methods (compressed, read, seek, tell) all appear to only
135 135 # be used by bundlerepo, but it's a little hard to tell.
136 136 def compressed(self):
137 137 return self._type is not None and self._type != 'UN'
138 138 def read(self, l):
139 139 return self._stream.read(l)
140 140 def seek(self, pos):
141 141 return self._stream.seek(pos)
142 142 def tell(self):
143 143 return self._stream.tell()
144 144 def close(self):
145 145 return self._stream.close()
146 146
147 147 def _chunklength(self):
148 148 d = readexactly(self._stream, 4)
149 149 l = struct.unpack(">l", d)[0]
150 150 if l <= 4:
151 151 if l:
152 152 raise error.Abort(_("invalid chunk length %d") % l)
153 153 return 0
154 154 if self.callback:
155 155 self.callback()
156 156 return l - 4
157 157
158 158 def changelogheader(self):
159 159 """v10 does not have a changelog header chunk"""
160 160 return {}
161 161
162 162 def manifestheader(self):
163 163 """v10 does not have a manifest header chunk"""
164 164 return {}
165 165
166 166 def filelogheader(self):
167 167 """return the header of the filelogs chunk, v10 only has the filename"""
168 168 l = self._chunklength()
169 169 if not l:
170 170 return {}
171 171 fname = readexactly(self._stream, l)
172 172 return {'filename': fname}
173 173
174 174 def _deltaheader(self, headertuple, prevnode):
175 175 node, p1, p2, cs = headertuple
176 176 if prevnode is None:
177 177 deltabase = p1
178 178 else:
179 179 deltabase = prevnode
180 180 flags = 0
181 181 return node, p1, p2, deltabase, cs, flags
182 182
183 183 def deltachunk(self, prevnode):
184 184 l = self._chunklength()
185 185 if not l:
186 186 return {}
187 187 headerdata = readexactly(self._stream, self.deltaheadersize)
188 188 header = struct.unpack(self.deltaheader, headerdata)
189 189 delta = readexactly(self._stream, l - self.deltaheadersize)
190 190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 191 return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
192 192 'deltabase': deltabase, 'delta': delta, 'flags': flags}
193 193
194 194 def getchunks(self):
195 195 """returns all the chunks contains in the bundle
196 196
197 197 Used when you need to forward the binary stream to a file or another
198 198 network API. To do so, it parse the changegroup data, otherwise it will
199 199 block in case of sshrepo because it don't know the end of the stream.
200 200 """
201 201 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
202 202 # and a list of filelogs. For changegroup 3, we expect 4 parts:
203 203 # changelog, manifestlog, a list of tree manifestlogs, and a list of
204 204 # filelogs.
205 205 #
206 206 # Changelog and manifestlog parts are terminated with empty chunks. The
207 207 # tree and file parts are a list of entry sections. Each entry section
208 208 # is a series of chunks terminating in an empty chunk. The list of these
209 209 # entry sections is terminated in yet another empty chunk, so we know
210 210 # we've reached the end of the tree/file list when we reach an empty
211 211 # chunk that was proceeded by no non-empty chunks.
212 212
213 213 parts = 0
214 214 while parts < 2 + self._grouplistcount:
215 215 noentries = True
216 216 while True:
217 217 chunk = getchunk(self)
218 218 if not chunk:
219 219 # The first two empty chunks represent the end of the
220 220 # changelog and the manifestlog portions. The remaining
221 221 # empty chunks represent either A) the end of individual
222 222 # tree or file entries in the file list, or B) the end of
223 223 # the entire list. It's the end of the entire list if there
224 224 # were no entries (i.e. noentries is True).
225 225 if parts < 2:
226 226 parts += 1
227 227 elif noentries:
228 228 parts += 1
229 229 break
230 230 noentries = False
231 231 yield chunkheader(len(chunk))
232 232 pos = 0
233 233 while pos < len(chunk):
234 234 next = pos + 2**20
235 235 yield chunk[pos:next]
236 236 pos = next
237 237 yield closechunk()
238 238
239 239 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
240 240 # We know that we'll never have more manifests than we had
241 241 # changesets.
242 242 self.callback = prog(_('manifests'), numchanges)
243 243 # no need to check for empty manifest group here:
244 244 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 245 # no new manifest will be created and the manifest group will
246 246 # be empty during the pull
247 247 self.manifestheader()
248 deltas = self.deltaiter(revmap)
249 repo.manifestlog._revlog.addgroup(deltas, trp)
248 deltas = self.deltaiter()
249 repo.manifestlog._revlog.addgroup(deltas, revmap, trp)
250 250 repo.ui.progress(_('manifests'), None)
251 251 self.callback = None
252 252
253 253 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 254 expectedtotal=None):
255 255 """Add the changegroup returned by source.read() to this repo.
256 256 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 257 the URL of the repo where this changegroup is coming from.
258 258
259 259 Return an integer summarizing the change to this repo:
260 260 - nothing changed or no source: 0
261 261 - more heads than before: 1+added heads (2..n)
262 262 - fewer heads than before: -1-removed heads (-2..-n)
263 263 - number of heads stays the same: 1
264 264 """
265 265 repo = repo.unfiltered()
266 266 def csmap(x):
267 267 repo.ui.debug("add changeset %s\n" % short(x))
268 268 return len(cl)
269 269
270 270 def revmap(x):
271 271 return cl.rev(x)
272 272
273 273 changesets = files = revisions = 0
274 274
275 275 try:
276 276 # The transaction may already carry source information. In this
277 277 # case we use the top level data. We overwrite the argument
278 278 # because we need to use the top level value (if they exist)
279 279 # in this function.
280 280 srctype = tr.hookargs.setdefault('source', srctype)
281 281 url = tr.hookargs.setdefault('url', url)
282 282 repo.hook('prechangegroup',
283 283 throw=True, **pycompat.strkwargs(tr.hookargs))
284 284
285 285 # write changelog data to temp files so concurrent readers
286 286 # will not see an inconsistent view
287 287 cl = repo.changelog
288 288 cl.delayupdate(tr)
289 289 oldheads = set(cl.heads())
290 290
291 291 trp = weakref.proxy(tr)
292 292 # pull off the changeset group
293 293 repo.ui.status(_("adding changesets\n"))
294 294 clstart = len(cl)
295 295 class prog(object):
296 296 def __init__(self, step, total):
297 297 self._step = step
298 298 self._total = total
299 299 self._count = 1
300 300 def __call__(self):
301 301 repo.ui.progress(self._step, self._count, unit=_('chunks'),
302 302 total=self._total)
303 303 self._count += 1
304 304 self.callback = prog(_('changesets'), expectedtotal)
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 deltas = self.deltaiter(csmap)
312 cgnodes = cl.addgroup(deltas, trp, addrevisioncb=onchangelog)
311 deltas = self.deltaiter()
312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 repo.ui.progress(_('changesets'), None)
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 self._unpackmanifests(repo, revmap, trp, prog, changesets)
326 326
327 327 needfiles = {}
328 328 if repo.ui.configbool('server', 'validate'):
329 329 cl = repo.changelog
330 330 ml = repo.manifestlog
331 331 # validate incoming csets have their manifests
332 332 for cset in xrange(clstart, clend):
333 333 mfnode = cl.changelogrevision(cset).manifest
334 334 mfest = ml[mfnode].readdelta()
335 335 # store file cgnodes we must see
336 336 for f, n in mfest.iteritems():
337 337 needfiles.setdefault(f, set()).add(n)
338 338
339 339 # process the files
340 340 repo.ui.status(_("adding file changes\n"))
341 341 newrevs, newfiles = _addchangegroupfiles(
342 342 repo, self, revmap, trp, efiles, needfiles)
343 343 revisions += newrevs
344 344 files += newfiles
345 345
346 346 deltaheads = 0
347 347 if oldheads:
348 348 heads = cl.heads()
349 349 deltaheads = len(heads) - len(oldheads)
350 350 for h in heads:
351 351 if h not in oldheads and repo[h].closesbranch():
352 352 deltaheads -= 1
353 353 htext = ""
354 354 if deltaheads:
355 355 htext = _(" (%+d heads)") % deltaheads
356 356
357 357 repo.ui.status(_("added %d changesets"
358 358 " with %d changes to %d files%s\n")
359 359 % (changesets, revisions, files, htext))
360 360 repo.invalidatevolatilesets()
361 361
362 362 if changesets > 0:
363 363 if 'node' not in tr.hookargs:
364 364 tr.hookargs['node'] = hex(cl.node(clstart))
365 365 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
366 366 hookargs = dict(tr.hookargs)
367 367 else:
368 368 hookargs = dict(tr.hookargs)
369 369 hookargs['node'] = hex(cl.node(clstart))
370 370 hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 repo.hook('pretxnchangegroup',
372 372 throw=True, **pycompat.strkwargs(hookargs))
373 373
374 374 added = [cl.node(r) for r in xrange(clstart, clend)]
375 375 phaseall = None
376 376 if srctype in ('push', 'serve'):
377 377 # Old servers can not push the boundary themselves.
378 378 # New servers won't push the boundary if changeset already
379 379 # exists locally as secret
380 380 #
381 381 # We should not use added here but the list of all change in
382 382 # the bundle
383 383 if repo.publishing():
384 384 targetphase = phaseall = phases.public
385 385 else:
386 386 # closer target phase computation
387 387
388 388 # Those changesets have been pushed from the
389 389 # outside, their phases are going to be pushed
390 390 # alongside. Therefor `targetphase` is
391 391 # ignored.
392 392 targetphase = phaseall = phases.draft
393 393 if added:
394 394 phases.registernew(repo, tr, targetphase, added)
395 395 if phaseall is not None:
396 396 phases.advanceboundary(repo, tr, phaseall, cgnodes)
397 397
398 398 if changesets > 0:
399 399
400 400 def runhooks():
401 401 # These hooks run when the lock releases, not when the
402 402 # transaction closes. So it's possible for the changelog
403 403 # to have changed since we last saw it.
404 404 if clstart >= len(repo):
405 405 return
406 406
407 407 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
408 408
409 409 for n in added:
410 410 args = hookargs.copy()
411 411 args['node'] = hex(n)
412 412 del args['node_last']
413 413 repo.hook("incoming", **pycompat.strkwargs(args))
414 414
415 415 newheads = [h for h in repo.heads()
416 416 if h not in oldheads]
417 417 repo.ui.log("incoming",
418 418 "%s incoming changes - new heads: %s\n",
419 419 len(added),
420 420 ', '.join([hex(c[:6]) for c in newheads]))
421 421
422 422 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
423 423 lambda tr: repo._afterlock(runhooks))
424 424 finally:
425 425 repo.ui.flush()
426 426 # never return 0 here:
427 427 if deltaheads < 0:
428 428 ret = deltaheads - 1
429 429 else:
430 430 ret = deltaheads + 1
431 431 return ret
432 432
433 def deltaiter(self, linkmapper):
433 def deltaiter(self):
434 434 """
435 435 returns an iterator of the deltas in this changegroup
436 436
437 437 Useful for passing to the underlying storage system to be stored.
438 438 """
439 439 chain = None
440 440 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
441 441 node = chunkdata['node']
442 442 p1 = chunkdata['p1']
443 443 p2 = chunkdata['p2']
444 444 cs = chunkdata['cs']
445 445 deltabase = chunkdata['deltabase']
446 446 delta = chunkdata['delta']
447 447 flags = chunkdata['flags']
448 448
449 link = linkmapper(cs)
450 449 chain = node
451 450
452 yield (node, p1, p2, link, deltabase, delta, flags)
451 yield (node, p1, p2, cs, deltabase, delta, flags)
453 452
454 453 class cg2unpacker(cg1unpacker):
455 454 """Unpacker for cg2 streams.
456 455
457 456 cg2 streams add support for generaldelta, so the delta header
458 457 format is slightly different. All other features about the data
459 458 remain the same.
460 459 """
461 460 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
462 461 deltaheadersize = struct.calcsize(deltaheader)
463 462 version = '02'
464 463
465 464 def _deltaheader(self, headertuple, prevnode):
466 465 node, p1, p2, deltabase, cs = headertuple
467 466 flags = 0
468 467 return node, p1, p2, deltabase, cs, flags
469 468
470 469 class cg3unpacker(cg2unpacker):
471 470 """Unpacker for cg3 streams.
472 471
473 472 cg3 streams add support for exchanging treemanifests and revlog
474 473 flags. It adds the revlog flags to the delta header and an empty chunk
475 474 separating manifests and files.
476 475 """
477 476 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
478 477 deltaheadersize = struct.calcsize(deltaheader)
479 478 version = '03'
480 479 _grouplistcount = 2 # One list of manifests and one list of files
481 480
482 481 def _deltaheader(self, headertuple, prevnode):
483 482 node, p1, p2, deltabase, cs, flags = headertuple
484 483 return node, p1, p2, deltabase, cs, flags
485 484
486 485 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
487 486 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
488 487 numchanges)
489 488 for chunkdata in iter(self.filelogheader, {}):
490 489 # If we get here, there are directory manifests in the changegroup
491 490 d = chunkdata["filename"]
492 491 repo.ui.debug("adding %s revisions\n" % d)
493 492 dirlog = repo.manifestlog._revlog.dirlog(d)
494 deltas = self.deltaiter(revmap)
495 if not dirlog.addgroup(deltas, trp):
493 deltas = self.deltaiter()
494 if not dirlog.addgroup(deltas, revmap, trp):
496 495 raise error.Abort(_("received dir revlog group is empty"))
497 496
498 497 class headerlessfixup(object):
499 498 def __init__(self, fh, h):
500 499 self._h = h
501 500 self._fh = fh
502 501 def read(self, n):
503 502 if self._h:
504 503 d, self._h = self._h[:n], self._h[n:]
505 504 if len(d) < n:
506 505 d += readexactly(self._fh, n - len(d))
507 506 return d
508 507 return readexactly(self._fh, n)
509 508
510 509 class cg1packer(object):
511 510 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
512 511 version = '01'
513 512 def __init__(self, repo, bundlecaps=None):
514 513 """Given a source repo, construct a bundler.
515 514
516 515 bundlecaps is optional and can be used to specify the set of
517 516 capabilities which can be used to build the bundle. While bundlecaps is
518 517 unused in core Mercurial, extensions rely on this feature to communicate
519 518 capabilities to customize the changegroup packer.
520 519 """
521 520 # Set of capabilities we can use to build the bundle.
522 521 if bundlecaps is None:
523 522 bundlecaps = set()
524 523 self._bundlecaps = bundlecaps
525 524 # experimental config: bundle.reorder
526 525 reorder = repo.ui.config('bundle', 'reorder')
527 526 if reorder == 'auto':
528 527 reorder = None
529 528 else:
530 529 reorder = util.parsebool(reorder)
531 530 self._repo = repo
532 531 self._reorder = reorder
533 532 self._progress = repo.ui.progress
534 533 if self._repo.ui.verbose and not self._repo.ui.debugflag:
535 534 self._verbosenote = self._repo.ui.note
536 535 else:
537 536 self._verbosenote = lambda s: None
538 537
539 538 def close(self):
540 539 return closechunk()
541 540
542 541 def fileheader(self, fname):
543 542 return chunkheader(len(fname)) + fname
544 543
545 544 # Extracted both for clarity and for overriding in extensions.
546 545 def _sortgroup(self, revlog, nodelist, lookup):
547 546 """Sort nodes for change group and turn them into revnums."""
548 547 # for generaldelta revlogs, we linearize the revs; this will both be
549 548 # much quicker and generate a much smaller bundle
550 549 if (revlog._generaldelta and self._reorder is None) or self._reorder:
551 550 dag = dagutil.revlogdag(revlog)
552 551 return dag.linearize(set(revlog.rev(n) for n in nodelist))
553 552 else:
554 553 return sorted([revlog.rev(n) for n in nodelist])
555 554
556 555 def group(self, nodelist, revlog, lookup, units=None):
557 556 """Calculate a delta group, yielding a sequence of changegroup chunks
558 557 (strings).
559 558
560 559 Given a list of changeset revs, return a set of deltas and
561 560 metadata corresponding to nodes. The first delta is
562 561 first parent(nodelist[0]) -> nodelist[0], the receiver is
563 562 guaranteed to have this parent as it has all history before
564 563 these changesets. In the case firstparent is nullrev the
565 564 changegroup starts with a full revision.
566 565
567 566 If units is not None, progress detail will be generated, units specifies
568 567 the type of revlog that is touched (changelog, manifest, etc.).
569 568 """
570 569 # if we don't have any revisions touched by these changesets, bail
571 570 if len(nodelist) == 0:
572 571 yield self.close()
573 572 return
574 573
575 574 revs = self._sortgroup(revlog, nodelist, lookup)
576 575
577 576 # add the parent of the first rev
578 577 p = revlog.parentrevs(revs[0])[0]
579 578 revs.insert(0, p)
580 579
581 580 # build deltas
582 581 total = len(revs) - 1
583 582 msgbundling = _('bundling')
584 583 for r in xrange(len(revs) - 1):
585 584 if units is not None:
586 585 self._progress(msgbundling, r + 1, unit=units, total=total)
587 586 prev, curr = revs[r], revs[r + 1]
588 587 linknode = lookup(revlog.node(curr))
589 588 for c in self.revchunk(revlog, curr, prev, linknode):
590 589 yield c
591 590
592 591 if units is not None:
593 592 self._progress(msgbundling, None)
594 593 yield self.close()
595 594
596 595 # filter any nodes that claim to be part of the known set
597 596 def prune(self, revlog, missing, commonrevs):
598 597 rr, rl = revlog.rev, revlog.linkrev
599 598 return [n for n in missing if rl(rr(n)) not in commonrevs]
600 599
601 600 def _packmanifests(self, dir, mfnodes, lookuplinknode):
602 601 """Pack flat manifests into a changegroup stream."""
603 602 assert not dir
604 603 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
605 604 lookuplinknode, units=_('manifests')):
606 605 yield chunk
607 606
608 607 def _manifestsdone(self):
609 608 return ''
610 609
611 610 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
612 611 '''yield a sequence of changegroup chunks (strings)'''
613 612 repo = self._repo
614 613 cl = repo.changelog
615 614
616 615 clrevorder = {}
617 616 mfs = {} # needed manifests
618 617 fnodes = {} # needed file nodes
619 618 changedfiles = set()
620 619
621 620 # Callback for the changelog, used to collect changed files and manifest
622 621 # nodes.
623 622 # Returns the linkrev node (identity in the changelog case).
624 623 def lookupcl(x):
625 624 c = cl.read(x)
626 625 clrevorder[x] = len(clrevorder)
627 626 n = c[0]
628 627 # record the first changeset introducing this manifest version
629 628 mfs.setdefault(n, x)
630 629 # Record a complete list of potentially-changed files in
631 630 # this manifest.
632 631 changedfiles.update(c[3])
633 632 return x
634 633
635 634 self._verbosenote(_('uncompressed size of bundle content:\n'))
636 635 size = 0
637 636 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
638 637 size += len(chunk)
639 638 yield chunk
640 639 self._verbosenote(_('%8.i (changelog)\n') % size)
641 640
642 641 # We need to make sure that the linkrev in the changegroup refers to
643 642 # the first changeset that introduced the manifest or file revision.
644 643 # The fastpath is usually safer than the slowpath, because the filelogs
645 644 # are walked in revlog order.
646 645 #
647 646 # When taking the slowpath with reorder=None and the manifest revlog
648 647 # uses generaldelta, the manifest may be walked in the "wrong" order.
649 648 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
650 649 # cc0ff93d0c0c).
651 650 #
652 651 # When taking the fastpath, we are only vulnerable to reordering
653 652 # of the changelog itself. The changelog never uses generaldelta, so
654 653 # it is only reordered when reorder=True. To handle this case, we
655 654 # simply take the slowpath, which already has the 'clrevorder' logic.
656 655 # This was also fixed in cc0ff93d0c0c.
657 656 fastpathlinkrev = fastpathlinkrev and not self._reorder
658 657 # Treemanifests don't work correctly with fastpathlinkrev
659 658 # either, because we don't discover which directory nodes to
660 659 # send along with files. This could probably be fixed.
661 660 fastpathlinkrev = fastpathlinkrev and (
662 661 'treemanifest' not in repo.requirements)
663 662
664 663 for chunk in self.generatemanifests(commonrevs, clrevorder,
665 664 fastpathlinkrev, mfs, fnodes, source):
666 665 yield chunk
667 666 mfs.clear()
668 667 clrevs = set(cl.rev(x) for x in clnodes)
669 668
670 669 if not fastpathlinkrev:
671 670 def linknodes(unused, fname):
672 671 return fnodes.get(fname, {})
673 672 else:
674 673 cln = cl.node
675 674 def linknodes(filerevlog, fname):
676 675 llr = filerevlog.linkrev
677 676 fln = filerevlog.node
678 677 revs = ((r, llr(r)) for r in filerevlog)
679 678 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
680 679
681 680 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
682 681 source):
683 682 yield chunk
684 683
685 684 yield self.close()
686 685
687 686 if clnodes:
688 687 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
689 688
690 689 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
691 690 fnodes, source):
692 691 """Returns an iterator of changegroup chunks containing manifests.
693 692
694 693 `source` is unused here, but is used by extensions like remotefilelog to
695 694 change what is sent based in pulls vs pushes, etc.
696 695 """
697 696 repo = self._repo
698 697 mfl = repo.manifestlog
699 698 dirlog = mfl._revlog.dirlog
700 699 tmfnodes = {'': mfs}
701 700
702 701 # Callback for the manifest, used to collect linkrevs for filelog
703 702 # revisions.
704 703 # Returns the linkrev node (collected in lookupcl).
705 704 def makelookupmflinknode(dir):
706 705 if fastpathlinkrev:
707 706 assert not dir
708 707 return mfs.__getitem__
709 708
710 709 def lookupmflinknode(x):
711 710 """Callback for looking up the linknode for manifests.
712 711
713 712 Returns the linkrev node for the specified manifest.
714 713
715 714 SIDE EFFECT:
716 715
717 716 1) fclnodes gets populated with the list of relevant
718 717 file nodes if we're not using fastpathlinkrev
719 718 2) When treemanifests are in use, collects treemanifest nodes
720 719 to send
721 720
722 721 Note that this means manifests must be completely sent to
723 722 the client before you can trust the list of files and
724 723 treemanifests to send.
725 724 """
726 725 clnode = tmfnodes[dir][x]
727 726 mdata = mfl.get(dir, x).readfast(shallow=True)
728 727 for p, n, fl in mdata.iterentries():
729 728 if fl == 't': # subdirectory manifest
730 729 subdir = dir + p + '/'
731 730 tmfclnodes = tmfnodes.setdefault(subdir, {})
732 731 tmfclnode = tmfclnodes.setdefault(n, clnode)
733 732 if clrevorder[clnode] < clrevorder[tmfclnode]:
734 733 tmfclnodes[n] = clnode
735 734 else:
736 735 f = dir + p
737 736 fclnodes = fnodes.setdefault(f, {})
738 737 fclnode = fclnodes.setdefault(n, clnode)
739 738 if clrevorder[clnode] < clrevorder[fclnode]:
740 739 fclnodes[n] = clnode
741 740 return clnode
742 741 return lookupmflinknode
743 742
744 743 size = 0
745 744 while tmfnodes:
746 745 dir = min(tmfnodes)
747 746 nodes = tmfnodes[dir]
748 747 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
749 748 if not dir or prunednodes:
750 749 for x in self._packmanifests(dir, prunednodes,
751 750 makelookupmflinknode(dir)):
752 751 size += len(x)
753 752 yield x
754 753 del tmfnodes[dir]
755 754 self._verbosenote(_('%8.i (manifests)\n') % size)
756 755 yield self._manifestsdone()
757 756
758 757 # The 'source' parameter is useful for extensions
759 758 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
760 759 repo = self._repo
761 760 progress = self._progress
762 761 msgbundling = _('bundling')
763 762
764 763 total = len(changedfiles)
765 764 # for progress output
766 765 msgfiles = _('files')
767 766 for i, fname in enumerate(sorted(changedfiles)):
768 767 filerevlog = repo.file(fname)
769 768 if not filerevlog:
770 769 raise error.Abort(_("empty or missing revlog for %s") % fname)
771 770
772 771 linkrevnodes = linknodes(filerevlog, fname)
773 772 # Lookup for filenodes, we collected the linkrev nodes above in the
774 773 # fastpath case and with lookupmf in the slowpath case.
775 774 def lookupfilelog(x):
776 775 return linkrevnodes[x]
777 776
778 777 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
779 778 if filenodes:
780 779 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
781 780 total=total)
782 781 h = self.fileheader(fname)
783 782 size = len(h)
784 783 yield h
785 784 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
786 785 size += len(chunk)
787 786 yield chunk
788 787 self._verbosenote(_('%8.i %s\n') % (size, fname))
789 788 progress(msgbundling, None)
790 789
791 790 def deltaparent(self, revlog, rev, p1, p2, prev):
792 791 return prev
793 792
794 793 def revchunk(self, revlog, rev, prev, linknode):
795 794 node = revlog.node(rev)
796 795 p1, p2 = revlog.parentrevs(rev)
797 796 base = self.deltaparent(revlog, rev, p1, p2, prev)
798 797
799 798 prefix = ''
800 799 if revlog.iscensored(base) or revlog.iscensored(rev):
801 800 try:
802 801 delta = revlog.revision(node, raw=True)
803 802 except error.CensoredNodeError as e:
804 803 delta = e.tombstone
805 804 if base == nullrev:
806 805 prefix = mdiff.trivialdiffheader(len(delta))
807 806 else:
808 807 baselen = revlog.rawsize(base)
809 808 prefix = mdiff.replacediffheader(baselen, len(delta))
810 809 elif base == nullrev:
811 810 delta = revlog.revision(node, raw=True)
812 811 prefix = mdiff.trivialdiffheader(len(delta))
813 812 else:
814 813 delta = revlog.revdiff(base, rev)
815 814 p1n, p2n = revlog.parents(node)
816 815 basenode = revlog.node(base)
817 816 flags = revlog.flags(rev)
818 817 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
819 818 meta += prefix
820 819 l = len(meta) + len(delta)
821 820 yield chunkheader(l)
822 821 yield meta
823 822 yield delta
824 823 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
825 824 # do nothing with basenode, it is implicitly the previous one in HG10
826 825 # do nothing with flags, it is implicitly 0 for cg1 and cg2
827 826 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
828 827
829 828 class cg2packer(cg1packer):
830 829 version = '02'
831 830 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
832 831
833 832 def __init__(self, repo, bundlecaps=None):
834 833 super(cg2packer, self).__init__(repo, bundlecaps)
835 834 if self._reorder is None:
836 835 # Since generaldelta is directly supported by cg2, reordering
837 836 # generally doesn't help, so we disable it by default (treating
838 837 # bundle.reorder=auto just like bundle.reorder=False).
839 838 self._reorder = False
840 839
841 840 def deltaparent(self, revlog, rev, p1, p2, prev):
842 841 dp = revlog.deltaparent(rev)
843 842 if dp == nullrev and revlog.storedeltachains:
844 843 # Avoid sending full revisions when delta parent is null. Pick prev
845 844 # in that case. It's tempting to pick p1 in this case, as p1 will
846 845 # be smaller in the common case. However, computing a delta against
847 846 # p1 may require resolving the raw text of p1, which could be
848 847 # expensive. The revlog caches should have prev cached, meaning
849 848 # less CPU for changegroup generation. There is likely room to add
850 849 # a flag and/or config option to control this behavior.
851 850 return prev
852 851 elif dp == nullrev:
853 852 # revlog is configured to use full snapshot for a reason,
854 853 # stick to full snapshot.
855 854 return nullrev
856 855 elif dp not in (p1, p2, prev):
857 856 # Pick prev when we can't be sure remote has the base revision.
858 857 return prev
859 858 else:
860 859 return dp
861 860
862 861 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
863 862 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
864 863 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
865 864
866 865 class cg3packer(cg2packer):
867 866 version = '03'
868 867 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
869 868
870 869 def _packmanifests(self, dir, mfnodes, lookuplinknode):
871 870 if dir:
872 871 yield self.fileheader(dir)
873 872
874 873 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
875 874 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
876 875 units=_('manifests')):
877 876 yield chunk
878 877
879 878 def _manifestsdone(self):
880 879 return self.close()
881 880
882 881 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
883 882 return struct.pack(
884 883 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
885 884
886 885 _packermap = {'01': (cg1packer, cg1unpacker),
887 886 # cg2 adds support for exchanging generaldelta
888 887 '02': (cg2packer, cg2unpacker),
889 888 # cg3 adds support for exchanging revlog flags and treemanifests
890 889 '03': (cg3packer, cg3unpacker),
891 890 }
892 891
893 892 def allsupportedversions(repo):
894 893 versions = set(_packermap.keys())
895 894 if not (repo.ui.configbool('experimental', 'changegroup3') or
896 895 repo.ui.configbool('experimental', 'treemanifest') or
897 896 'treemanifest' in repo.requirements):
898 897 versions.discard('03')
899 898 return versions
900 899
901 900 # Changegroup versions that can be applied to the repo
902 901 def supportedincomingversions(repo):
903 902 return allsupportedversions(repo)
904 903
905 904 # Changegroup versions that can be created from the repo
906 905 def supportedoutgoingversions(repo):
907 906 versions = allsupportedversions(repo)
908 907 if 'treemanifest' in repo.requirements:
909 908 # Versions 01 and 02 support only flat manifests and it's just too
910 909 # expensive to convert between the flat manifest and tree manifest on
911 910 # the fly. Since tree manifests are hashed differently, all of history
912 911 # would have to be converted. Instead, we simply don't even pretend to
913 912 # support versions 01 and 02.
914 913 versions.discard('01')
915 914 versions.discard('02')
916 915 return versions
917 916
918 917 def localversion(repo):
919 918 # Finds the best version to use for bundles that are meant to be used
920 919 # locally, such as those from strip and shelve, and temporary bundles.
921 920 return max(supportedoutgoingversions(repo))
922 921
923 922 def safeversion(repo):
924 923 # Finds the smallest version that it's safe to assume clients of the repo
925 924 # will support. For example, all hg versions that support generaldelta also
926 925 # support changegroup 02.
927 926 versions = supportedoutgoingversions(repo)
928 927 if 'generaldelta' in repo.requirements:
929 928 versions.discard('01')
930 929 assert versions
931 930 return min(versions)
932 931
933 932 def getbundler(version, repo, bundlecaps=None):
934 933 assert version in supportedoutgoingversions(repo)
935 934 return _packermap[version][0](repo, bundlecaps)
936 935
937 936 def getunbundler(version, fh, alg, extras=None):
938 937 return _packermap[version][1](fh, alg, extras=extras)
939 938
940 939 def _changegroupinfo(repo, nodes, source):
941 940 if repo.ui.verbose or source == 'bundle':
942 941 repo.ui.status(_("%d changesets found\n") % len(nodes))
943 942 if repo.ui.debugflag:
944 943 repo.ui.debug("list of changesets:\n")
945 944 for node in nodes:
946 945 repo.ui.debug("%s\n" % hex(node))
947 946
948 947 def makechangegroup(repo, outgoing, version, source, fastpath=False,
949 948 bundlecaps=None):
950 949 cgstream = makestream(repo, outgoing, version, source,
951 950 fastpath=fastpath, bundlecaps=bundlecaps)
952 951 return getunbundler(version, util.chunkbuffer(cgstream), None,
953 952 {'clcount': len(outgoing.missing) })
954 953
955 954 def makestream(repo, outgoing, version, source, fastpath=False,
956 955 bundlecaps=None):
957 956 bundler = getbundler(version, repo, bundlecaps=bundlecaps)
958 957
959 958 repo = repo.unfiltered()
960 959 commonrevs = outgoing.common
961 960 csets = outgoing.missing
962 961 heads = outgoing.missingheads
963 962 # We go through the fast path if we get told to, or if all (unfiltered
964 963 # heads have been requested (since we then know there all linkrevs will
965 964 # be pulled by the client).
966 965 heads.sort()
967 966 fastpathlinkrev = fastpath or (
968 967 repo.filtername is None and heads == sorted(repo.heads()))
969 968
970 969 repo.hook('preoutgoing', throw=True, source=source)
971 970 _changegroupinfo(repo, csets, source)
972 971 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
973 972
974 973 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
975 974 revisions = 0
976 975 files = 0
977 976 for chunkdata in iter(source.filelogheader, {}):
978 977 files += 1
979 978 f = chunkdata["filename"]
980 979 repo.ui.debug("adding %s revisions\n" % f)
981 980 repo.ui.progress(_('files'), files, unit=_('files'),
982 981 total=expectedfiles)
983 982 fl = repo.file(f)
984 983 o = len(fl)
985 984 try:
986 deltas = source.deltaiter(revmap)
987 if not fl.addgroup(deltas, trp):
985 deltas = source.deltaiter()
986 if not fl.addgroup(deltas, revmap, trp):
988 987 raise error.Abort(_("received file revlog group is empty"))
989 988 except error.CensoredBaseError as e:
990 989 raise error.Abort(_("received delta base is censored: %s") % e)
991 990 revisions += len(fl) - o
992 991 if f in needfiles:
993 992 needs = needfiles[f]
994 993 for new in xrange(o, len(fl)):
995 994 n = fl.node(new)
996 995 if n in needs:
997 996 needs.remove(n)
998 997 else:
999 998 raise error.Abort(
1000 999 _("received spurious file revlog entry"))
1001 1000 if not needs:
1002 1001 del needfiles[f]
1003 1002 repo.ui.progress(_('files'), None)
1004 1003
1005 1004 for f, needs in needfiles.iteritems():
1006 1005 fl = repo.file(f)
1007 1006 for n in needs:
1008 1007 try:
1009 1008 fl.rev(n)
1010 1009 except error.LookupError:
1011 1010 raise error.Abort(
1012 1011 _('missing file data for %s:%s - run hg verify') %
1013 1012 (f, hex(n)))
1014 1013
1015 1014 return revisions, files
@@ -1,2252 +1,2253 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import errno
19 19 import hashlib
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullid,
29 29 nullrev,
30 30 wdirhex,
31 31 wdirid,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from . import (
36 36 ancestor,
37 37 error,
38 38 mdiff,
39 39 policy,
40 40 pycompat,
41 41 templatefilters,
42 42 util,
43 43 )
44 44
45 45 parsers = policy.importmod(r'parsers')
46 46
47 47 # Aliased for performance.
48 48 _zlibdecompress = zlib.decompress
49 49
50 50 # revlog header flags
51 51 REVLOGV0 = 0
52 52 REVLOGV1 = 1
53 53 # Dummy value until file format is finalized.
54 54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 55 REVLOGV2 = 0xDEAD
56 56 FLAG_INLINE_DATA = (1 << 16)
57 57 FLAG_GENERALDELTA = (1 << 17)
58 58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 59 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63 63
64 64 # revlog index flags
65 65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 68 REVIDX_DEFAULT_FLAGS = 0
69 69 # stable order in which flags need to be processed and their processors applied
70 70 REVIDX_FLAGS_ORDER = [
71 71 REVIDX_ISCENSORED,
72 72 REVIDX_ELLIPSIS,
73 73 REVIDX_EXTSTORED,
74 74 ]
75 75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76 76
77 77 # max size of revlog with inline data
78 78 _maxinline = 131072
79 79 _chunksize = 1048576
80 80
81 81 RevlogError = error.RevlogError
82 82 LookupError = error.LookupError
83 83 CensoredNodeError = error.CensoredNodeError
84 84 ProgrammingError = error.ProgrammingError
85 85
86 86 # Store flag processors (cf. 'addflagprocessor()' to register)
87 87 _flagprocessors = {
88 88 REVIDX_ISCENSORED: None,
89 89 }
90 90
91 91 def addflagprocessor(flag, processor):
92 92 """Register a flag processor on a revision data flag.
93 93
94 94 Invariant:
95 95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 96 - Only one flag processor can be registered on a specific flag.
97 97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 98 following signatures:
99 99 - (read) f(self, rawtext) -> text, bool
100 100 - (write) f(self, text) -> rawtext, bool
101 101 - (raw) f(self, rawtext) -> bool
102 102 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 103 directly visible to the user.
104 104 The boolean returned by these transforms is used to determine whether
105 105 the returned text can be used for hash integrity checking. For example,
106 106 if "write" returns False, then "text" is used to generate hash. If
107 107 "write" returns True, that basically means "rawtext" returned by "write"
108 108 should be used to generate hash. Usually, "write" and "read" return
109 109 different booleans. And "raw" returns a same boolean as "write".
110 110
111 111 Note: The 'raw' transform is used for changegroup generation and in some
112 112 debug commands. In this case the transform only indicates whether the
113 113 contents can be used for hash integrity checks.
114 114 """
115 115 if not flag & REVIDX_KNOWN_FLAGS:
116 116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 117 raise ProgrammingError(msg)
118 118 if flag not in REVIDX_FLAGS_ORDER:
119 119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 120 raise ProgrammingError(msg)
121 121 if flag in _flagprocessors:
122 122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 123 raise error.Abort(msg)
124 124 _flagprocessors[flag] = processor
125 125
126 126 def getoffset(q):
127 127 return int(q >> 16)
128 128
129 129 def gettype(q):
130 130 return int(q & 0xFFFF)
131 131
132 132 def offset_type(offset, type):
133 133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 134 raise ValueError('unknown revlog index flags')
135 135 return int(int(offset) << 16 | type)
136 136
137 137 _nullhash = hashlib.sha1(nullid)
138 138
139 139 def hash(text, p1, p2):
140 140 """generate a hash from the given text and its parent hashes
141 141
142 142 This hash combines both the current file contents and its history
143 143 in a manner that makes it easy to distinguish nodes with the same
144 144 content in the revision graph.
145 145 """
146 146 # As of now, if one of the parent node is null, p2 is null
147 147 if p2 == nullid:
148 148 # deep copy of a hash is faster than creating one
149 149 s = _nullhash.copy()
150 150 s.update(p1)
151 151 else:
152 152 # none of the parent nodes are nullid
153 153 if p1 < p2:
154 154 a = p1
155 155 b = p2
156 156 else:
157 157 a = p2
158 158 b = p1
159 159 s = hashlib.sha1(a)
160 160 s.update(b)
161 161 s.update(text)
162 162 return s.digest()
163 163
164 164 # index v0:
165 165 # 4 bytes: offset
166 166 # 4 bytes: compressed length
167 167 # 4 bytes: base rev
168 168 # 4 bytes: link rev
169 169 # 20 bytes: parent 1 nodeid
170 170 # 20 bytes: parent 2 nodeid
171 171 # 20 bytes: nodeid
172 172 indexformatv0 = struct.Struct(">4l20s20s20s")
173 173 indexformatv0_pack = indexformatv0.pack
174 174 indexformatv0_unpack = indexformatv0.unpack
175 175
176 176 class revlogoldio(object):
177 177 def __init__(self):
178 178 self.size = indexformatv0.size
179 179
180 180 def parseindex(self, data, inline):
181 181 s = self.size
182 182 index = []
183 183 nodemap = {nullid: nullrev}
184 184 n = off = 0
185 185 l = len(data)
186 186 while off + s <= l:
187 187 cur = data[off:off + s]
188 188 off += s
189 189 e = indexformatv0_unpack(cur)
190 190 # transform to revlogv1 format
191 191 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
192 192 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
193 193 index.append(e2)
194 194 nodemap[e[6]] = n
195 195 n += 1
196 196
197 197 # add the magic null revision at -1
198 198 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
199 199
200 200 return index, nodemap, None
201 201
202 202 def packentry(self, entry, node, version, rev):
203 203 if gettype(entry[0]):
204 204 raise RevlogError(_('index entry flags need revlog version 1'))
205 205 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
206 206 node(entry[5]), node(entry[6]), entry[7])
207 207 return indexformatv0_pack(*e2)
208 208
209 209 # index ng:
210 210 # 6 bytes: offset
211 211 # 2 bytes: flags
212 212 # 4 bytes: compressed length
213 213 # 4 bytes: uncompressed length
214 214 # 4 bytes: base rev
215 215 # 4 bytes: link rev
216 216 # 4 bytes: parent 1 rev
217 217 # 4 bytes: parent 2 rev
218 218 # 32 bytes: nodeid
219 219 indexformatng = struct.Struct(">Qiiiiii20s12x")
220 220 indexformatng_pack = indexformatng.pack
221 221 versionformat = struct.Struct(">I")
222 222 versionformat_pack = versionformat.pack
223 223 versionformat_unpack = versionformat.unpack
224 224
225 225 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
226 226 # signed integer)
227 227 _maxentrysize = 0x7fffffff
228 228
229 229 class revlogio(object):
230 230 def __init__(self):
231 231 self.size = indexformatng.size
232 232
233 233 def parseindex(self, data, inline):
234 234 # call the C implementation to parse the index data
235 235 index, cache = parsers.parse_index2(data, inline)
236 236 return index, getattr(index, 'nodemap', None), cache
237 237
238 238 def packentry(self, entry, node, version, rev):
239 239 p = indexformatng_pack(*entry)
240 240 if rev == 0:
241 241 p = versionformat_pack(version) + p[4:]
242 242 return p
243 243
244 244 class revlog(object):
245 245 """
246 246 the underlying revision storage object
247 247
248 248 A revlog consists of two parts, an index and the revision data.
249 249
250 250 The index is a file with a fixed record size containing
251 251 information on each revision, including its nodeid (hash), the
252 252 nodeids of its parents, the position and offset of its data within
253 253 the data file, and the revision it's based on. Finally, each entry
254 254 contains a linkrev entry that can serve as a pointer to external
255 255 data.
256 256
257 257 The revision data itself is a linear collection of data chunks.
258 258 Each chunk represents a revision and is usually represented as a
259 259 delta against the previous chunk. To bound lookup time, runs of
260 260 deltas are limited to about 2 times the length of the original
261 261 version data. This makes retrieval of a version proportional to
262 262 its size, or O(1) relative to the number of revisions.
263 263
264 264 Both pieces of the revlog are written to in an append-only
265 265 fashion, which means we never need to rewrite a file to insert or
266 266 remove data, and can use some simple techniques to avoid the need
267 267 for locking while reading.
268 268
269 269 If checkambig, indexfile is opened with checkambig=True at
270 270 writing, to avoid file stat ambiguity.
271 271 """
272 272 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
273 273 """
274 274 create a revlog object
275 275
276 276 opener is a function that abstracts the file opening operation
277 277 and can be used to implement COW semantics or the like.
278 278 """
279 279 self.indexfile = indexfile
280 280 self.datafile = datafile or (indexfile[:-2] + ".d")
281 281 self.opener = opener
282 282 # When True, indexfile is opened with checkambig=True at writing, to
283 283 # avoid file stat ambiguity.
284 284 self._checkambig = checkambig
285 285 # 3-tuple of (node, rev, text) for a raw revision.
286 286 self._cache = None
287 287 # Maps rev to chain base rev.
288 288 self._chainbasecache = util.lrucachedict(100)
289 289 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
290 290 self._chunkcache = (0, '')
291 291 # How much data to read and cache into the raw revlog data cache.
292 292 self._chunkcachesize = 65536
293 293 self._maxchainlen = None
294 294 self._aggressivemergedeltas = False
295 295 self.index = []
296 296 # Mapping of partial identifiers to full nodes.
297 297 self._pcache = {}
298 298 # Mapping of revision integer to full node.
299 299 self._nodecache = {nullid: nullrev}
300 300 self._nodepos = None
301 301 self._compengine = 'zlib'
302 302 self._maxdeltachainspan = -1
303 303
304 304 v = REVLOG_DEFAULT_VERSION
305 305 opts = getattr(opener, 'options', None)
306 306 if opts is not None:
307 307 if 'revlogv2' in opts:
308 308 # version 2 revlogs always use generaldelta.
309 309 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
310 310 elif 'revlogv1' in opts:
311 311 if 'generaldelta' in opts:
312 312 v |= FLAG_GENERALDELTA
313 313 else:
314 314 v = 0
315 315 if 'chunkcachesize' in opts:
316 316 self._chunkcachesize = opts['chunkcachesize']
317 317 if 'maxchainlen' in opts:
318 318 self._maxchainlen = opts['maxchainlen']
319 319 if 'aggressivemergedeltas' in opts:
320 320 self._aggressivemergedeltas = opts['aggressivemergedeltas']
321 321 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
322 322 if 'compengine' in opts:
323 323 self._compengine = opts['compengine']
324 324 if 'maxdeltachainspan' in opts:
325 325 self._maxdeltachainspan = opts['maxdeltachainspan']
326 326
327 327 if self._chunkcachesize <= 0:
328 328 raise RevlogError(_('revlog chunk cache size %r is not greater '
329 329 'than 0') % self._chunkcachesize)
330 330 elif self._chunkcachesize & (self._chunkcachesize - 1):
331 331 raise RevlogError(_('revlog chunk cache size %r is not a power '
332 332 'of 2') % self._chunkcachesize)
333 333
334 334 indexdata = ''
335 335 self._initempty = True
336 336 try:
337 337 f = self.opener(self.indexfile)
338 338 indexdata = f.read()
339 339 f.close()
340 340 if len(indexdata) > 0:
341 341 v = versionformat_unpack(indexdata[:4])[0]
342 342 self._initempty = False
343 343 except IOError as inst:
344 344 if inst.errno != errno.ENOENT:
345 345 raise
346 346
347 347 self.version = v
348 348 self._inline = v & FLAG_INLINE_DATA
349 349 self._generaldelta = v & FLAG_GENERALDELTA
350 350 flags = v & ~0xFFFF
351 351 fmt = v & 0xFFFF
352 352 if fmt == REVLOGV0:
353 353 if flags:
354 354 raise RevlogError(_('unknown flags (%#04x) in version %d '
355 355 'revlog %s') %
356 356 (flags >> 16, fmt, self.indexfile))
357 357 elif fmt == REVLOGV1:
358 358 if flags & ~REVLOGV1_FLAGS:
359 359 raise RevlogError(_('unknown flags (%#04x) in version %d '
360 360 'revlog %s') %
361 361 (flags >> 16, fmt, self.indexfile))
362 362 elif fmt == REVLOGV2:
363 363 if flags & ~REVLOGV2_FLAGS:
364 364 raise RevlogError(_('unknown flags (%#04x) in version %d '
365 365 'revlog %s') %
366 366 (flags >> 16, fmt, self.indexfile))
367 367 else:
368 368 raise RevlogError(_('unknown version (%d) in revlog %s') %
369 369 (fmt, self.indexfile))
370 370
371 371 self.storedeltachains = True
372 372
373 373 self._io = revlogio()
374 374 if self.version == REVLOGV0:
375 375 self._io = revlogoldio()
376 376 try:
377 377 d = self._io.parseindex(indexdata, self._inline)
378 378 except (ValueError, IndexError):
379 379 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
380 380 self.index, nodemap, self._chunkcache = d
381 381 if nodemap is not None:
382 382 self.nodemap = self._nodecache = nodemap
383 383 if not self._chunkcache:
384 384 self._chunkclear()
385 385 # revnum -> (chain-length, sum-delta-length)
386 386 self._chaininfocache = {}
387 387 # revlog header -> revlog compressor
388 388 self._decompressors = {}
389 389
390 390 @util.propertycache
391 391 def _compressor(self):
392 392 return util.compengines[self._compengine].revlogcompressor()
393 393
394 394 def tip(self):
395 395 return self.node(len(self.index) - 2)
396 396 def __contains__(self, rev):
397 397 return 0 <= rev < len(self)
398 398 def __len__(self):
399 399 return len(self.index) - 1
400 400 def __iter__(self):
401 401 return iter(xrange(len(self)))
402 402 def revs(self, start=0, stop=None):
403 403 """iterate over all rev in this revlog (from start to stop)"""
404 404 step = 1
405 405 if stop is not None:
406 406 if start > stop:
407 407 step = -1
408 408 stop += step
409 409 else:
410 410 stop = len(self)
411 411 return xrange(start, stop, step)
412 412
413 413 @util.propertycache
414 414 def nodemap(self):
415 415 self.rev(self.node(0))
416 416 return self._nodecache
417 417
418 418 def hasnode(self, node):
419 419 try:
420 420 self.rev(node)
421 421 return True
422 422 except KeyError:
423 423 return False
424 424
425 425 def clearcaches(self):
426 426 self._cache = None
427 427 self._chainbasecache.clear()
428 428 self._chunkcache = (0, '')
429 429 self._pcache = {}
430 430
431 431 try:
432 432 self._nodecache.clearcaches()
433 433 except AttributeError:
434 434 self._nodecache = {nullid: nullrev}
435 435 self._nodepos = None
436 436
437 437 def rev(self, node):
438 438 try:
439 439 return self._nodecache[node]
440 440 except TypeError:
441 441 raise
442 442 except RevlogError:
443 443 # parsers.c radix tree lookup failed
444 444 if node == wdirid:
445 445 raise error.WdirUnsupported
446 446 raise LookupError(node, self.indexfile, _('no node'))
447 447 except KeyError:
448 448 # pure python cache lookup failed
449 449 n = self._nodecache
450 450 i = self.index
451 451 p = self._nodepos
452 452 if p is None:
453 453 p = len(i) - 2
454 454 for r in xrange(p, -1, -1):
455 455 v = i[r][7]
456 456 n[v] = r
457 457 if v == node:
458 458 self._nodepos = r - 1
459 459 return r
460 460 if node == wdirid:
461 461 raise error.WdirUnsupported
462 462 raise LookupError(node, self.indexfile, _('no node'))
463 463
464 464 # Accessors for index entries.
465 465
466 466 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
467 467 # are flags.
468 468 def start(self, rev):
469 469 return int(self.index[rev][0] >> 16)
470 470
471 471 def flags(self, rev):
472 472 return self.index[rev][0] & 0xFFFF
473 473
474 474 def length(self, rev):
475 475 return self.index[rev][1]
476 476
477 477 def rawsize(self, rev):
478 478 """return the length of the uncompressed text for a given revision"""
479 479 l = self.index[rev][2]
480 480 if l >= 0:
481 481 return l
482 482
483 483 t = self.revision(rev, raw=True)
484 484 return len(t)
485 485
486 486 def size(self, rev):
487 487 """length of non-raw text (processed by a "read" flag processor)"""
488 488 # fast path: if no "read" flag processor could change the content,
489 489 # size is rawsize. note: ELLIPSIS is known to not change the content.
490 490 flags = self.flags(rev)
491 491 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
492 492 return self.rawsize(rev)
493 493
494 494 return len(self.revision(rev, raw=False))
495 495
496 496 def chainbase(self, rev):
497 497 base = self._chainbasecache.get(rev)
498 498 if base is not None:
499 499 return base
500 500
501 501 index = self.index
502 502 base = index[rev][3]
503 503 while base != rev:
504 504 rev = base
505 505 base = index[rev][3]
506 506
507 507 self._chainbasecache[rev] = base
508 508 return base
509 509
510 510 def linkrev(self, rev):
511 511 return self.index[rev][4]
512 512
513 513 def parentrevs(self, rev):
514 514 try:
515 515 return self.index[rev][5:7]
516 516 except IndexError:
517 517 if rev == wdirrev:
518 518 raise error.WdirUnsupported
519 519 raise
520 520
521 521 def node(self, rev):
522 522 try:
523 523 return self.index[rev][7]
524 524 except IndexError:
525 525 if rev == wdirrev:
526 526 raise error.WdirUnsupported
527 527 raise
528 528
529 529 # Derived from index values.
530 530
531 531 def end(self, rev):
532 532 return self.start(rev) + self.length(rev)
533 533
534 534 def parents(self, node):
535 535 i = self.index
536 536 d = i[self.rev(node)]
537 537 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
538 538
539 539 def chainlen(self, rev):
540 540 return self._chaininfo(rev)[0]
541 541
542 542 def _chaininfo(self, rev):
543 543 chaininfocache = self._chaininfocache
544 544 if rev in chaininfocache:
545 545 return chaininfocache[rev]
546 546 index = self.index
547 547 generaldelta = self._generaldelta
548 548 iterrev = rev
549 549 e = index[iterrev]
550 550 clen = 0
551 551 compresseddeltalen = 0
552 552 while iterrev != e[3]:
553 553 clen += 1
554 554 compresseddeltalen += e[1]
555 555 if generaldelta:
556 556 iterrev = e[3]
557 557 else:
558 558 iterrev -= 1
559 559 if iterrev in chaininfocache:
560 560 t = chaininfocache[iterrev]
561 561 clen += t[0]
562 562 compresseddeltalen += t[1]
563 563 break
564 564 e = index[iterrev]
565 565 else:
566 566 # Add text length of base since decompressing that also takes
567 567 # work. For cache hits the length is already included.
568 568 compresseddeltalen += e[1]
569 569 r = (clen, compresseddeltalen)
570 570 chaininfocache[rev] = r
571 571 return r
572 572
573 573 def _deltachain(self, rev, stoprev=None):
574 574 """Obtain the delta chain for a revision.
575 575
576 576 ``stoprev`` specifies a revision to stop at. If not specified, we
577 577 stop at the base of the chain.
578 578
579 579 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
580 580 revs in ascending order and ``stopped`` is a bool indicating whether
581 581 ``stoprev`` was hit.
582 582 """
583 583 # Try C implementation.
584 584 try:
585 585 return self.index.deltachain(rev, stoprev, self._generaldelta)
586 586 except AttributeError:
587 587 pass
588 588
589 589 chain = []
590 590
591 591 # Alias to prevent attribute lookup in tight loop.
592 592 index = self.index
593 593 generaldelta = self._generaldelta
594 594
595 595 iterrev = rev
596 596 e = index[iterrev]
597 597 while iterrev != e[3] and iterrev != stoprev:
598 598 chain.append(iterrev)
599 599 if generaldelta:
600 600 iterrev = e[3]
601 601 else:
602 602 iterrev -= 1
603 603 e = index[iterrev]
604 604
605 605 if iterrev == stoprev:
606 606 stopped = True
607 607 else:
608 608 chain.append(iterrev)
609 609 stopped = False
610 610
611 611 chain.reverse()
612 612 return chain, stopped
613 613
614 614 def ancestors(self, revs, stoprev=0, inclusive=False):
615 615 """Generate the ancestors of 'revs' in reverse topological order.
616 616 Does not generate revs lower than stoprev.
617 617
618 618 See the documentation for ancestor.lazyancestors for more details."""
619 619
620 620 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
621 621 inclusive=inclusive)
622 622
623 623 def descendants(self, revs):
624 624 """Generate the descendants of 'revs' in revision order.
625 625
626 626 Yield a sequence of revision numbers starting with a child of
627 627 some rev in revs, i.e., each revision is *not* considered a
628 628 descendant of itself. Results are ordered by revision number (a
629 629 topological sort)."""
630 630 first = min(revs)
631 631 if first == nullrev:
632 632 for i in self:
633 633 yield i
634 634 return
635 635
636 636 seen = set(revs)
637 637 for i in self.revs(start=first + 1):
638 638 for x in self.parentrevs(i):
639 639 if x != nullrev and x in seen:
640 640 seen.add(i)
641 641 yield i
642 642 break
643 643
644 644 def findcommonmissing(self, common=None, heads=None):
645 645 """Return a tuple of the ancestors of common and the ancestors of heads
646 646 that are not ancestors of common. In revset terminology, we return the
647 647 tuple:
648 648
649 649 ::common, (::heads) - (::common)
650 650
651 651 The list is sorted by revision number, meaning it is
652 652 topologically sorted.
653 653
654 654 'heads' and 'common' are both lists of node IDs. If heads is
655 655 not supplied, uses all of the revlog's heads. If common is not
656 656 supplied, uses nullid."""
657 657 if common is None:
658 658 common = [nullid]
659 659 if heads is None:
660 660 heads = self.heads()
661 661
662 662 common = [self.rev(n) for n in common]
663 663 heads = [self.rev(n) for n in heads]
664 664
665 665 # we want the ancestors, but inclusive
666 666 class lazyset(object):
667 667 def __init__(self, lazyvalues):
668 668 self.addedvalues = set()
669 669 self.lazyvalues = lazyvalues
670 670
671 671 def __contains__(self, value):
672 672 return value in self.addedvalues or value in self.lazyvalues
673 673
674 674 def __iter__(self):
675 675 added = self.addedvalues
676 676 for r in added:
677 677 yield r
678 678 for r in self.lazyvalues:
679 679 if not r in added:
680 680 yield r
681 681
682 682 def add(self, value):
683 683 self.addedvalues.add(value)
684 684
685 685 def update(self, values):
686 686 self.addedvalues.update(values)
687 687
688 688 has = lazyset(self.ancestors(common))
689 689 has.add(nullrev)
690 690 has.update(common)
691 691
692 692 # take all ancestors from heads that aren't in has
693 693 missing = set()
694 694 visit = collections.deque(r for r in heads if r not in has)
695 695 while visit:
696 696 r = visit.popleft()
697 697 if r in missing:
698 698 continue
699 699 else:
700 700 missing.add(r)
701 701 for p in self.parentrevs(r):
702 702 if p not in has:
703 703 visit.append(p)
704 704 missing = list(missing)
705 705 missing.sort()
706 706 return has, [self.node(miss) for miss in missing]
707 707
708 708 def incrementalmissingrevs(self, common=None):
709 709 """Return an object that can be used to incrementally compute the
710 710 revision numbers of the ancestors of arbitrary sets that are not
711 711 ancestors of common. This is an ancestor.incrementalmissingancestors
712 712 object.
713 713
714 714 'common' is a list of revision numbers. If common is not supplied, uses
715 715 nullrev.
716 716 """
717 717 if common is None:
718 718 common = [nullrev]
719 719
720 720 return ancestor.incrementalmissingancestors(self.parentrevs, common)
721 721
722 722 def findmissingrevs(self, common=None, heads=None):
723 723 """Return the revision numbers of the ancestors of heads that
724 724 are not ancestors of common.
725 725
726 726 More specifically, return a list of revision numbers corresponding to
727 727 nodes N such that every N satisfies the following constraints:
728 728
729 729 1. N is an ancestor of some node in 'heads'
730 730 2. N is not an ancestor of any node in 'common'
731 731
732 732 The list is sorted by revision number, meaning it is
733 733 topologically sorted.
734 734
735 735 'heads' and 'common' are both lists of revision numbers. If heads is
736 736 not supplied, uses all of the revlog's heads. If common is not
737 737 supplied, uses nullid."""
738 738 if common is None:
739 739 common = [nullrev]
740 740 if heads is None:
741 741 heads = self.headrevs()
742 742
743 743 inc = self.incrementalmissingrevs(common=common)
744 744 return inc.missingancestors(heads)
745 745
746 746 def findmissing(self, common=None, heads=None):
747 747 """Return the ancestors of heads that are not ancestors of common.
748 748
749 749 More specifically, return a list of nodes N such that every N
750 750 satisfies the following constraints:
751 751
752 752 1. N is an ancestor of some node in 'heads'
753 753 2. N is not an ancestor of any node in 'common'
754 754
755 755 The list is sorted by revision number, meaning it is
756 756 topologically sorted.
757 757
758 758 'heads' and 'common' are both lists of node IDs. If heads is
759 759 not supplied, uses all of the revlog's heads. If common is not
760 760 supplied, uses nullid."""
761 761 if common is None:
762 762 common = [nullid]
763 763 if heads is None:
764 764 heads = self.heads()
765 765
766 766 common = [self.rev(n) for n in common]
767 767 heads = [self.rev(n) for n in heads]
768 768
769 769 inc = self.incrementalmissingrevs(common=common)
770 770 return [self.node(r) for r in inc.missingancestors(heads)]
771 771
772 772 def nodesbetween(self, roots=None, heads=None):
773 773 """Return a topological path from 'roots' to 'heads'.
774 774
775 775 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
776 776 topologically sorted list of all nodes N that satisfy both of
777 777 these constraints:
778 778
779 779 1. N is a descendant of some node in 'roots'
780 780 2. N is an ancestor of some node in 'heads'
781 781
782 782 Every node is considered to be both a descendant and an ancestor
783 783 of itself, so every reachable node in 'roots' and 'heads' will be
784 784 included in 'nodes'.
785 785
786 786 'outroots' is the list of reachable nodes in 'roots', i.e., the
787 787 subset of 'roots' that is returned in 'nodes'. Likewise,
788 788 'outheads' is the subset of 'heads' that is also in 'nodes'.
789 789
790 790 'roots' and 'heads' are both lists of node IDs. If 'roots' is
791 791 unspecified, uses nullid as the only root. If 'heads' is
792 792 unspecified, uses list of all of the revlog's heads."""
793 793 nonodes = ([], [], [])
794 794 if roots is not None:
795 795 roots = list(roots)
796 796 if not roots:
797 797 return nonodes
798 798 lowestrev = min([self.rev(n) for n in roots])
799 799 else:
800 800 roots = [nullid] # Everybody's a descendant of nullid
801 801 lowestrev = nullrev
802 802 if (lowestrev == nullrev) and (heads is None):
803 803 # We want _all_ the nodes!
804 804 return ([self.node(r) for r in self], [nullid], list(self.heads()))
805 805 if heads is None:
806 806 # All nodes are ancestors, so the latest ancestor is the last
807 807 # node.
808 808 highestrev = len(self) - 1
809 809 # Set ancestors to None to signal that every node is an ancestor.
810 810 ancestors = None
811 811 # Set heads to an empty dictionary for later discovery of heads
812 812 heads = {}
813 813 else:
814 814 heads = list(heads)
815 815 if not heads:
816 816 return nonodes
817 817 ancestors = set()
818 818 # Turn heads into a dictionary so we can remove 'fake' heads.
819 819 # Also, later we will be using it to filter out the heads we can't
820 820 # find from roots.
821 821 heads = dict.fromkeys(heads, False)
822 822 # Start at the top and keep marking parents until we're done.
823 823 nodestotag = set(heads)
824 824 # Remember where the top was so we can use it as a limit later.
825 825 highestrev = max([self.rev(n) for n in nodestotag])
826 826 while nodestotag:
827 827 # grab a node to tag
828 828 n = nodestotag.pop()
829 829 # Never tag nullid
830 830 if n == nullid:
831 831 continue
832 832 # A node's revision number represents its place in a
833 833 # topologically sorted list of nodes.
834 834 r = self.rev(n)
835 835 if r >= lowestrev:
836 836 if n not in ancestors:
837 837 # If we are possibly a descendant of one of the roots
838 838 # and we haven't already been marked as an ancestor
839 839 ancestors.add(n) # Mark as ancestor
840 840 # Add non-nullid parents to list of nodes to tag.
841 841 nodestotag.update([p for p in self.parents(n) if
842 842 p != nullid])
843 843 elif n in heads: # We've seen it before, is it a fake head?
844 844 # So it is, real heads should not be the ancestors of
845 845 # any other heads.
846 846 heads.pop(n)
847 847 if not ancestors:
848 848 return nonodes
849 849 # Now that we have our set of ancestors, we want to remove any
850 850 # roots that are not ancestors.
851 851
852 852 # If one of the roots was nullid, everything is included anyway.
853 853 if lowestrev > nullrev:
854 854 # But, since we weren't, let's recompute the lowest rev to not
855 855 # include roots that aren't ancestors.
856 856
857 857 # Filter out roots that aren't ancestors of heads
858 858 roots = [root for root in roots if root in ancestors]
859 859 # Recompute the lowest revision
860 860 if roots:
861 861 lowestrev = min([self.rev(root) for root in roots])
862 862 else:
863 863 # No more roots? Return empty list
864 864 return nonodes
865 865 else:
866 866 # We are descending from nullid, and don't need to care about
867 867 # any other roots.
868 868 lowestrev = nullrev
869 869 roots = [nullid]
870 870 # Transform our roots list into a set.
871 871 descendants = set(roots)
872 872 # Also, keep the original roots so we can filter out roots that aren't
873 873 # 'real' roots (i.e. are descended from other roots).
874 874 roots = descendants.copy()
875 875 # Our topologically sorted list of output nodes.
876 876 orderedout = []
877 877 # Don't start at nullid since we don't want nullid in our output list,
878 878 # and if nullid shows up in descendants, empty parents will look like
879 879 # they're descendants.
880 880 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
881 881 n = self.node(r)
882 882 isdescendant = False
883 883 if lowestrev == nullrev: # Everybody is a descendant of nullid
884 884 isdescendant = True
885 885 elif n in descendants:
886 886 # n is already a descendant
887 887 isdescendant = True
888 888 # This check only needs to be done here because all the roots
889 889 # will start being marked is descendants before the loop.
890 890 if n in roots:
891 891 # If n was a root, check if it's a 'real' root.
892 892 p = tuple(self.parents(n))
893 893 # If any of its parents are descendants, it's not a root.
894 894 if (p[0] in descendants) or (p[1] in descendants):
895 895 roots.remove(n)
896 896 else:
897 897 p = tuple(self.parents(n))
898 898 # A node is a descendant if either of its parents are
899 899 # descendants. (We seeded the dependents list with the roots
900 900 # up there, remember?)
901 901 if (p[0] in descendants) or (p[1] in descendants):
902 902 descendants.add(n)
903 903 isdescendant = True
904 904 if isdescendant and ((ancestors is None) or (n in ancestors)):
905 905 # Only include nodes that are both descendants and ancestors.
906 906 orderedout.append(n)
907 907 if (ancestors is not None) and (n in heads):
908 908 # We're trying to figure out which heads are reachable
909 909 # from roots.
910 910 # Mark this head as having been reached
911 911 heads[n] = True
912 912 elif ancestors is None:
913 913 # Otherwise, we're trying to discover the heads.
914 914 # Assume this is a head because if it isn't, the next step
915 915 # will eventually remove it.
916 916 heads[n] = True
917 917 # But, obviously its parents aren't.
918 918 for p in self.parents(n):
919 919 heads.pop(p, None)
920 920 heads = [head for head, flag in heads.iteritems() if flag]
921 921 roots = list(roots)
922 922 assert orderedout
923 923 assert roots
924 924 assert heads
925 925 return (orderedout, roots, heads)
926 926
927 927 def headrevs(self):
928 928 try:
929 929 return self.index.headrevs()
930 930 except AttributeError:
931 931 return self._headrevs()
932 932
933 933 def computephases(self, roots):
934 934 return self.index.computephasesmapsets(roots)
935 935
936 936 def _headrevs(self):
937 937 count = len(self)
938 938 if not count:
939 939 return [nullrev]
940 940 # we won't iter over filtered rev so nobody is a head at start
941 941 ishead = [0] * (count + 1)
942 942 index = self.index
943 943 for r in self:
944 944 ishead[r] = 1 # I may be an head
945 945 e = index[r]
946 946 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
947 947 return [r for r, val in enumerate(ishead) if val]
948 948
949 949 def heads(self, start=None, stop=None):
950 950 """return the list of all nodes that have no children
951 951
952 952 if start is specified, only heads that are descendants of
953 953 start will be returned
954 954 if stop is specified, it will consider all the revs from stop
955 955 as if they had no children
956 956 """
957 957 if start is None and stop is None:
958 958 if not len(self):
959 959 return [nullid]
960 960 return [self.node(r) for r in self.headrevs()]
961 961
962 962 if start is None:
963 963 start = nullid
964 964 if stop is None:
965 965 stop = []
966 966 stoprevs = set([self.rev(n) for n in stop])
967 967 startrev = self.rev(start)
968 968 reachable = {startrev}
969 969 heads = {startrev}
970 970
971 971 parentrevs = self.parentrevs
972 972 for r in self.revs(start=startrev + 1):
973 973 for p in parentrevs(r):
974 974 if p in reachable:
975 975 if r not in stoprevs:
976 976 reachable.add(r)
977 977 heads.add(r)
978 978 if p in heads and p not in stoprevs:
979 979 heads.remove(p)
980 980
981 981 return [self.node(r) for r in heads]
982 982
983 983 def children(self, node):
984 984 """find the children of a given node"""
985 985 c = []
986 986 p = self.rev(node)
987 987 for r in self.revs(start=p + 1):
988 988 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
989 989 if prevs:
990 990 for pr in prevs:
991 991 if pr == p:
992 992 c.append(self.node(r))
993 993 elif p == nullrev:
994 994 c.append(self.node(r))
995 995 return c
996 996
997 997 def descendant(self, start, end):
998 998 if start == nullrev:
999 999 return True
1000 1000 for i in self.descendants([start]):
1001 1001 if i == end:
1002 1002 return True
1003 1003 elif i > end:
1004 1004 break
1005 1005 return False
1006 1006
1007 1007 def commonancestorsheads(self, a, b):
1008 1008 """calculate all the heads of the common ancestors of nodes a and b"""
1009 1009 a, b = self.rev(a), self.rev(b)
1010 1010 try:
1011 1011 ancs = self.index.commonancestorsheads(a, b)
1012 1012 except (AttributeError, OverflowError): # C implementation failed
1013 1013 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1014 1014 return pycompat.maplist(self.node, ancs)
1015 1015
1016 1016 def isancestor(self, a, b):
1017 1017 """return True if node a is an ancestor of node b
1018 1018
1019 1019 The implementation of this is trivial but the use of
1020 1020 commonancestorsheads is not."""
1021 1021 return a in self.commonancestorsheads(a, b)
1022 1022
1023 1023 def ancestor(self, a, b):
1024 1024 """calculate the "best" common ancestor of nodes a and b"""
1025 1025
1026 1026 a, b = self.rev(a), self.rev(b)
1027 1027 try:
1028 1028 ancs = self.index.ancestors(a, b)
1029 1029 except (AttributeError, OverflowError):
1030 1030 ancs = ancestor.ancestors(self.parentrevs, a, b)
1031 1031 if ancs:
1032 1032 # choose a consistent winner when there's a tie
1033 1033 return min(map(self.node, ancs))
1034 1034 return nullid
1035 1035
1036 1036 def _match(self, id):
1037 1037 if isinstance(id, int):
1038 1038 # rev
1039 1039 return self.node(id)
1040 1040 if len(id) == 20:
1041 1041 # possibly a binary node
1042 1042 # odds of a binary node being all hex in ASCII are 1 in 10**25
1043 1043 try:
1044 1044 node = id
1045 1045 self.rev(node) # quick search the index
1046 1046 return node
1047 1047 except LookupError:
1048 1048 pass # may be partial hex id
1049 1049 try:
1050 1050 # str(rev)
1051 1051 rev = int(id)
1052 1052 if str(rev) != id:
1053 1053 raise ValueError
1054 1054 if rev < 0:
1055 1055 rev = len(self) + rev
1056 1056 if rev < 0 or rev >= len(self):
1057 1057 raise ValueError
1058 1058 return self.node(rev)
1059 1059 except (ValueError, OverflowError):
1060 1060 pass
1061 1061 if len(id) == 40:
1062 1062 try:
1063 1063 # a full hex nodeid?
1064 1064 node = bin(id)
1065 1065 self.rev(node)
1066 1066 return node
1067 1067 except (TypeError, LookupError):
1068 1068 pass
1069 1069
1070 1070 def _partialmatch(self, id):
1071 1071 maybewdir = wdirhex.startswith(id)
1072 1072 try:
1073 1073 partial = self.index.partialmatch(id)
1074 1074 if partial and self.hasnode(partial):
1075 1075 if maybewdir:
1076 1076 # single 'ff...' match in radix tree, ambiguous with wdir
1077 1077 raise RevlogError
1078 1078 return partial
1079 1079 if maybewdir:
1080 1080 # no 'ff...' match in radix tree, wdir identified
1081 1081 raise error.WdirUnsupported
1082 1082 return None
1083 1083 except RevlogError:
1084 1084 # parsers.c radix tree lookup gave multiple matches
1085 1085 # fast path: for unfiltered changelog, radix tree is accurate
1086 1086 if not getattr(self, 'filteredrevs', None):
1087 1087 raise LookupError(id, self.indexfile,
1088 1088 _('ambiguous identifier'))
1089 1089 # fall through to slow path that filters hidden revisions
1090 1090 except (AttributeError, ValueError):
1091 1091 # we are pure python, or key was too short to search radix tree
1092 1092 pass
1093 1093
1094 1094 if id in self._pcache:
1095 1095 return self._pcache[id]
1096 1096
1097 1097 if len(id) < 40:
1098 1098 try:
1099 1099 # hex(node)[:...]
1100 1100 l = len(id) // 2 # grab an even number of digits
1101 1101 prefix = bin(id[:l * 2])
1102 1102 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1103 1103 nl = [n for n in nl if hex(n).startswith(id) and
1104 1104 self.hasnode(n)]
1105 1105 if len(nl) > 0:
1106 1106 if len(nl) == 1 and not maybewdir:
1107 1107 self._pcache[id] = nl[0]
1108 1108 return nl[0]
1109 1109 raise LookupError(id, self.indexfile,
1110 1110 _('ambiguous identifier'))
1111 1111 if maybewdir:
1112 1112 raise error.WdirUnsupported
1113 1113 return None
1114 1114 except (TypeError, binascii.Error):
1115 1115 pass
1116 1116
1117 1117 def lookup(self, id):
1118 1118 """locate a node based on:
1119 1119 - revision number or str(revision number)
1120 1120 - nodeid or subset of hex nodeid
1121 1121 """
1122 1122 n = self._match(id)
1123 1123 if n is not None:
1124 1124 return n
1125 1125 n = self._partialmatch(id)
1126 1126 if n:
1127 1127 return n
1128 1128
1129 1129 raise LookupError(id, self.indexfile, _('no match found'))
1130 1130
1131 1131 def shortest(self, hexnode, minlength=1):
1132 1132 """Find the shortest unambiguous prefix that matches hexnode."""
1133 1133 def isvalid(test):
1134 1134 try:
1135 1135 if self._partialmatch(test) is None:
1136 1136 return False
1137 1137
1138 1138 try:
1139 1139 i = int(test)
1140 1140 # if we are a pure int, then starting with zero will not be
1141 1141 # confused as a rev; or, obviously, if the int is larger
1142 1142 # than the value of the tip rev
1143 1143 if test[0] == '0' or i > len(self):
1144 1144 return True
1145 1145 return False
1146 1146 except ValueError:
1147 1147 return True
1148 1148 except error.RevlogError:
1149 1149 return False
1150 1150 except error.WdirUnsupported:
1151 1151 # single 'ff...' match
1152 1152 return True
1153 1153
1154 1154 shortest = hexnode
1155 1155 startlength = max(6, minlength)
1156 1156 length = startlength
1157 1157 while True:
1158 1158 test = hexnode[:length]
1159 1159 if isvalid(test):
1160 1160 shortest = test
1161 1161 if length == minlength or length > startlength:
1162 1162 return shortest
1163 1163 length -= 1
1164 1164 else:
1165 1165 length += 1
1166 1166 if len(shortest) <= length:
1167 1167 return shortest
1168 1168
1169 1169 def cmp(self, node, text):
1170 1170 """compare text with a given file revision
1171 1171
1172 1172 returns True if text is different than what is stored.
1173 1173 """
1174 1174 p1, p2 = self.parents(node)
1175 1175 return hash(text, p1, p2) != node
1176 1176
1177 1177 def _cachesegment(self, offset, data):
1178 1178 """Add a segment to the revlog cache.
1179 1179
1180 1180 Accepts an absolute offset and the data that is at that location.
1181 1181 """
1182 1182 o, d = self._chunkcache
1183 1183 # try to add to existing cache
1184 1184 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1185 1185 self._chunkcache = o, d + data
1186 1186 else:
1187 1187 self._chunkcache = offset, data
1188 1188
1189 1189 def _readsegment(self, offset, length, df=None):
1190 1190 """Load a segment of raw data from the revlog.
1191 1191
1192 1192 Accepts an absolute offset, length to read, and an optional existing
1193 1193 file handle to read from.
1194 1194
1195 1195 If an existing file handle is passed, it will be seeked and the
1196 1196 original seek position will NOT be restored.
1197 1197
1198 1198 Returns a str or buffer of raw byte data.
1199 1199 """
1200 1200 if df is not None:
1201 1201 closehandle = False
1202 1202 else:
1203 1203 if self._inline:
1204 1204 df = self.opener(self.indexfile)
1205 1205 else:
1206 1206 df = self.opener(self.datafile)
1207 1207 closehandle = True
1208 1208
1209 1209 # Cache data both forward and backward around the requested
1210 1210 # data, in a fixed size window. This helps speed up operations
1211 1211 # involving reading the revlog backwards.
1212 1212 cachesize = self._chunkcachesize
1213 1213 realoffset = offset & ~(cachesize - 1)
1214 1214 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1215 1215 - realoffset)
1216 1216 df.seek(realoffset)
1217 1217 d = df.read(reallength)
1218 1218 if closehandle:
1219 1219 df.close()
1220 1220 self._cachesegment(realoffset, d)
1221 1221 if offset != realoffset or reallength != length:
1222 1222 return util.buffer(d, offset - realoffset, length)
1223 1223 return d
1224 1224
1225 1225 def _getsegment(self, offset, length, df=None):
1226 1226 """Obtain a segment of raw data from the revlog.
1227 1227
1228 1228 Accepts an absolute offset, length of bytes to obtain, and an
1229 1229 optional file handle to the already-opened revlog. If the file
1230 1230 handle is used, it's original seek position will not be preserved.
1231 1231
1232 1232 Requests for data may be returned from a cache.
1233 1233
1234 1234 Returns a str or a buffer instance of raw byte data.
1235 1235 """
1236 1236 o, d = self._chunkcache
1237 1237 l = len(d)
1238 1238
1239 1239 # is it in the cache?
1240 1240 cachestart = offset - o
1241 1241 cacheend = cachestart + length
1242 1242 if cachestart >= 0 and cacheend <= l:
1243 1243 if cachestart == 0 and cacheend == l:
1244 1244 return d # avoid a copy
1245 1245 return util.buffer(d, cachestart, cacheend - cachestart)
1246 1246
1247 1247 return self._readsegment(offset, length, df=df)
1248 1248
1249 1249 def _getsegmentforrevs(self, startrev, endrev, df=None):
1250 1250 """Obtain a segment of raw data corresponding to a range of revisions.
1251 1251
1252 1252 Accepts the start and end revisions and an optional already-open
1253 1253 file handle to be used for reading. If the file handle is read, its
1254 1254 seek position will not be preserved.
1255 1255
1256 1256 Requests for data may be satisfied by a cache.
1257 1257
1258 1258 Returns a 2-tuple of (offset, data) for the requested range of
1259 1259 revisions. Offset is the integer offset from the beginning of the
1260 1260 revlog and data is a str or buffer of the raw byte data.
1261 1261
1262 1262 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1263 1263 to determine where each revision's data begins and ends.
1264 1264 """
1265 1265 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1266 1266 # (functions are expensive).
1267 1267 index = self.index
1268 1268 istart = index[startrev]
1269 1269 start = int(istart[0] >> 16)
1270 1270 if startrev == endrev:
1271 1271 end = start + istart[1]
1272 1272 else:
1273 1273 iend = index[endrev]
1274 1274 end = int(iend[0] >> 16) + iend[1]
1275 1275
1276 1276 if self._inline:
1277 1277 start += (startrev + 1) * self._io.size
1278 1278 end += (endrev + 1) * self._io.size
1279 1279 length = end - start
1280 1280
1281 1281 return start, self._getsegment(start, length, df=df)
1282 1282
1283 1283 def _chunk(self, rev, df=None):
1284 1284 """Obtain a single decompressed chunk for a revision.
1285 1285
1286 1286 Accepts an integer revision and an optional already-open file handle
1287 1287 to be used for reading. If used, the seek position of the file will not
1288 1288 be preserved.
1289 1289
1290 1290 Returns a str holding uncompressed data for the requested revision.
1291 1291 """
1292 1292 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1293 1293
1294 1294 def _chunks(self, revs, df=None):
1295 1295 """Obtain decompressed chunks for the specified revisions.
1296 1296
1297 1297 Accepts an iterable of numeric revisions that are assumed to be in
1298 1298 ascending order. Also accepts an optional already-open file handle
1299 1299 to be used for reading. If used, the seek position of the file will
1300 1300 not be preserved.
1301 1301
1302 1302 This function is similar to calling ``self._chunk()`` multiple times,
1303 1303 but is faster.
1304 1304
1305 1305 Returns a list with decompressed data for each requested revision.
1306 1306 """
1307 1307 if not revs:
1308 1308 return []
1309 1309 start = self.start
1310 1310 length = self.length
1311 1311 inline = self._inline
1312 1312 iosize = self._io.size
1313 1313 buffer = util.buffer
1314 1314
1315 1315 l = []
1316 1316 ladd = l.append
1317 1317
1318 1318 try:
1319 1319 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1320 1320 except OverflowError:
1321 1321 # issue4215 - we can't cache a run of chunks greater than
1322 1322 # 2G on Windows
1323 1323 return [self._chunk(rev, df=df) for rev in revs]
1324 1324
1325 1325 decomp = self.decompress
1326 1326 for rev in revs:
1327 1327 chunkstart = start(rev)
1328 1328 if inline:
1329 1329 chunkstart += (rev + 1) * iosize
1330 1330 chunklength = length(rev)
1331 1331 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1332 1332
1333 1333 return l
1334 1334
1335 1335 def _chunkclear(self):
1336 1336 """Clear the raw chunk cache."""
1337 1337 self._chunkcache = (0, '')
1338 1338
1339 1339 def deltaparent(self, rev):
1340 1340 """return deltaparent of the given revision"""
1341 1341 base = self.index[rev][3]
1342 1342 if base == rev:
1343 1343 return nullrev
1344 1344 elif self._generaldelta:
1345 1345 return base
1346 1346 else:
1347 1347 return rev - 1
1348 1348
1349 1349 def revdiff(self, rev1, rev2):
1350 1350 """return or calculate a delta between two revisions
1351 1351
1352 1352 The delta calculated is in binary form and is intended to be written to
1353 1353 revlog data directly. So this function needs raw revision data.
1354 1354 """
1355 1355 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1356 1356 return bytes(self._chunk(rev2))
1357 1357
1358 1358 return mdiff.textdiff(self.revision(rev1, raw=True),
1359 1359 self.revision(rev2, raw=True))
1360 1360
1361 1361 def revision(self, nodeorrev, _df=None, raw=False):
1362 1362 """return an uncompressed revision of a given node or revision
1363 1363 number.
1364 1364
1365 1365 _df - an existing file handle to read from. (internal-only)
1366 1366 raw - an optional argument specifying if the revision data is to be
1367 1367 treated as raw data when applying flag transforms. 'raw' should be set
1368 1368 to True when generating changegroups or in debug commands.
1369 1369 """
1370 1370 if isinstance(nodeorrev, int):
1371 1371 rev = nodeorrev
1372 1372 node = self.node(rev)
1373 1373 else:
1374 1374 node = nodeorrev
1375 1375 rev = None
1376 1376
1377 1377 cachedrev = None
1378 1378 flags = None
1379 1379 rawtext = None
1380 1380 if node == nullid:
1381 1381 return ""
1382 1382 if self._cache:
1383 1383 if self._cache[0] == node:
1384 1384 # _cache only stores rawtext
1385 1385 if raw:
1386 1386 return self._cache[2]
1387 1387 # duplicated, but good for perf
1388 1388 if rev is None:
1389 1389 rev = self.rev(node)
1390 1390 if flags is None:
1391 1391 flags = self.flags(rev)
1392 1392 # no extra flags set, no flag processor runs, text = rawtext
1393 1393 if flags == REVIDX_DEFAULT_FLAGS:
1394 1394 return self._cache[2]
1395 1395 # rawtext is reusable. need to run flag processor
1396 1396 rawtext = self._cache[2]
1397 1397
1398 1398 cachedrev = self._cache[1]
1399 1399
1400 1400 # look up what we need to read
1401 1401 if rawtext is None:
1402 1402 if rev is None:
1403 1403 rev = self.rev(node)
1404 1404
1405 1405 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1406 1406 if stopped:
1407 1407 rawtext = self._cache[2]
1408 1408
1409 1409 # drop cache to save memory
1410 1410 self._cache = None
1411 1411
1412 1412 bins = self._chunks(chain, df=_df)
1413 1413 if rawtext is None:
1414 1414 rawtext = bytes(bins[0])
1415 1415 bins = bins[1:]
1416 1416
1417 1417 rawtext = mdiff.patches(rawtext, bins)
1418 1418 self._cache = (node, rev, rawtext)
1419 1419
1420 1420 if flags is None:
1421 1421 if rev is None:
1422 1422 rev = self.rev(node)
1423 1423 flags = self.flags(rev)
1424 1424
1425 1425 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1426 1426 if validatehash:
1427 1427 self.checkhash(text, node, rev=rev)
1428 1428
1429 1429 return text
1430 1430
1431 1431 def hash(self, text, p1, p2):
1432 1432 """Compute a node hash.
1433 1433
1434 1434 Available as a function so that subclasses can replace the hash
1435 1435 as needed.
1436 1436 """
1437 1437 return hash(text, p1, p2)
1438 1438
1439 1439 def _processflags(self, text, flags, operation, raw=False):
1440 1440 """Inspect revision data flags and applies transforms defined by
1441 1441 registered flag processors.
1442 1442
1443 1443 ``text`` - the revision data to process
1444 1444 ``flags`` - the revision flags
1445 1445 ``operation`` - the operation being performed (read or write)
1446 1446 ``raw`` - an optional argument describing if the raw transform should be
1447 1447 applied.
1448 1448
1449 1449 This method processes the flags in the order (or reverse order if
1450 1450 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1451 1451 flag processors registered for present flags. The order of flags defined
1452 1452 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1453 1453
1454 1454 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1455 1455 processed text and ``validatehash`` is a bool indicating whether the
1456 1456 returned text should be checked for hash integrity.
1457 1457
1458 1458 Note: If the ``raw`` argument is set, it has precedence over the
1459 1459 operation and will only update the value of ``validatehash``.
1460 1460 """
1461 1461 # fast path: no flag processors will run
1462 1462 if flags == 0:
1463 1463 return text, True
1464 1464 if not operation in ('read', 'write'):
1465 1465 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1466 1466 # Check all flags are known.
1467 1467 if flags & ~REVIDX_KNOWN_FLAGS:
1468 1468 raise RevlogError(_("incompatible revision flag '%#x'") %
1469 1469 (flags & ~REVIDX_KNOWN_FLAGS))
1470 1470 validatehash = True
1471 1471 # Depending on the operation (read or write), the order might be
1472 1472 # reversed due to non-commutative transforms.
1473 1473 orderedflags = REVIDX_FLAGS_ORDER
1474 1474 if operation == 'write':
1475 1475 orderedflags = reversed(orderedflags)
1476 1476
1477 1477 for flag in orderedflags:
1478 1478 # If a flagprocessor has been registered for a known flag, apply the
1479 1479 # related operation transform and update result tuple.
1480 1480 if flag & flags:
1481 1481 vhash = True
1482 1482
1483 1483 if flag not in _flagprocessors:
1484 1484 message = _("missing processor for flag '%#x'") % (flag)
1485 1485 raise RevlogError(message)
1486 1486
1487 1487 processor = _flagprocessors[flag]
1488 1488 if processor is not None:
1489 1489 readtransform, writetransform, rawtransform = processor
1490 1490
1491 1491 if raw:
1492 1492 vhash = rawtransform(self, text)
1493 1493 elif operation == 'read':
1494 1494 text, vhash = readtransform(self, text)
1495 1495 else: # write operation
1496 1496 text, vhash = writetransform(self, text)
1497 1497 validatehash = validatehash and vhash
1498 1498
1499 1499 return text, validatehash
1500 1500
1501 1501 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1502 1502 """Check node hash integrity.
1503 1503
1504 1504 Available as a function so that subclasses can extend hash mismatch
1505 1505 behaviors as needed.
1506 1506 """
1507 1507 if p1 is None and p2 is None:
1508 1508 p1, p2 = self.parents(node)
1509 1509 if node != self.hash(text, p1, p2):
1510 1510 revornode = rev
1511 1511 if revornode is None:
1512 1512 revornode = templatefilters.short(hex(node))
1513 1513 raise RevlogError(_("integrity check failed on %s:%s")
1514 1514 % (self.indexfile, pycompat.bytestr(revornode)))
1515 1515
1516 1516 def checkinlinesize(self, tr, fp=None):
1517 1517 """Check if the revlog is too big for inline and convert if so.
1518 1518
1519 1519 This should be called after revisions are added to the revlog. If the
1520 1520 revlog has grown too large to be an inline revlog, it will convert it
1521 1521 to use multiple index and data files.
1522 1522 """
1523 1523 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1524 1524 return
1525 1525
1526 1526 trinfo = tr.find(self.indexfile)
1527 1527 if trinfo is None:
1528 1528 raise RevlogError(_("%s not found in the transaction")
1529 1529 % self.indexfile)
1530 1530
1531 1531 trindex = trinfo[2]
1532 1532 if trindex is not None:
1533 1533 dataoff = self.start(trindex)
1534 1534 else:
1535 1535 # revlog was stripped at start of transaction, use all leftover data
1536 1536 trindex = len(self) - 1
1537 1537 dataoff = self.end(-2)
1538 1538
1539 1539 tr.add(self.datafile, dataoff)
1540 1540
1541 1541 if fp:
1542 1542 fp.flush()
1543 1543 fp.close()
1544 1544
1545 1545 df = self.opener(self.datafile, 'w')
1546 1546 try:
1547 1547 for r in self:
1548 1548 df.write(self._getsegmentforrevs(r, r)[1])
1549 1549 finally:
1550 1550 df.close()
1551 1551
1552 1552 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1553 1553 checkambig=self._checkambig)
1554 1554 self.version &= ~FLAG_INLINE_DATA
1555 1555 self._inline = False
1556 1556 for i in self:
1557 1557 e = self._io.packentry(self.index[i], self.node, self.version, i)
1558 1558 fp.write(e)
1559 1559
1560 1560 # if we don't call close, the temp file will never replace the
1561 1561 # real index
1562 1562 fp.close()
1563 1563
1564 1564 tr.replace(self.indexfile, trindex * self._io.size)
1565 1565 self._chunkclear()
1566 1566
1567 1567 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1568 1568 node=None, flags=REVIDX_DEFAULT_FLAGS):
1569 1569 """add a revision to the log
1570 1570
1571 1571 text - the revision data to add
1572 1572 transaction - the transaction object used for rollback
1573 1573 link - the linkrev data to add
1574 1574 p1, p2 - the parent nodeids of the revision
1575 1575 cachedelta - an optional precomputed delta
1576 1576 node - nodeid of revision; typically node is not specified, and it is
1577 1577 computed by default as hash(text, p1, p2), however subclasses might
1578 1578 use different hashing method (and override checkhash() in such case)
1579 1579 flags - the known flags to set on the revision
1580 1580 """
1581 1581 if link == nullrev:
1582 1582 raise RevlogError(_("attempted to add linkrev -1 to %s")
1583 1583 % self.indexfile)
1584 1584
1585 1585 if flags:
1586 1586 node = node or self.hash(text, p1, p2)
1587 1587
1588 1588 rawtext, validatehash = self._processflags(text, flags, 'write')
1589 1589
1590 1590 # If the flag processor modifies the revision data, ignore any provided
1591 1591 # cachedelta.
1592 1592 if rawtext != text:
1593 1593 cachedelta = None
1594 1594
1595 1595 if len(rawtext) > _maxentrysize:
1596 1596 raise RevlogError(
1597 1597 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1598 1598 % (self.indexfile, len(rawtext)))
1599 1599
1600 1600 node = node or self.hash(rawtext, p1, p2)
1601 1601 if node in self.nodemap:
1602 1602 return node
1603 1603
1604 1604 if validatehash:
1605 1605 self.checkhash(rawtext, node, p1=p1, p2=p2)
1606 1606
1607 1607 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1608 1608 flags, cachedelta=cachedelta)
1609 1609
1610 1610 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1611 1611 cachedelta=None):
1612 1612 """add a raw revision with known flags, node and parents
1613 1613 useful when reusing a revision not stored in this revlog (ex: received
1614 1614 over wire, or read from an external bundle).
1615 1615 """
1616 1616 dfh = None
1617 1617 if not self._inline:
1618 1618 dfh = self.opener(self.datafile, "a+")
1619 1619 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1620 1620 try:
1621 1621 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1622 1622 flags, cachedelta, ifh, dfh)
1623 1623 finally:
1624 1624 if dfh:
1625 1625 dfh.close()
1626 1626 ifh.close()
1627 1627
1628 1628 def compress(self, data):
1629 1629 """Generate a possibly-compressed representation of data."""
1630 1630 if not data:
1631 1631 return '', data
1632 1632
1633 1633 compressed = self._compressor.compress(data)
1634 1634
1635 1635 if compressed:
1636 1636 # The revlog compressor added the header in the returned data.
1637 1637 return '', compressed
1638 1638
1639 1639 if data[0:1] == '\0':
1640 1640 return '', data
1641 1641 return 'u', data
1642 1642
1643 1643 def decompress(self, data):
1644 1644 """Decompress a revlog chunk.
1645 1645
1646 1646 The chunk is expected to begin with a header identifying the
1647 1647 format type so it can be routed to an appropriate decompressor.
1648 1648 """
1649 1649 if not data:
1650 1650 return data
1651 1651
1652 1652 # Revlogs are read much more frequently than they are written and many
1653 1653 # chunks only take microseconds to decompress, so performance is
1654 1654 # important here.
1655 1655 #
1656 1656 # We can make a few assumptions about revlogs:
1657 1657 #
1658 1658 # 1) the majority of chunks will be compressed (as opposed to inline
1659 1659 # raw data).
1660 1660 # 2) decompressing *any* data will likely by at least 10x slower than
1661 1661 # returning raw inline data.
1662 1662 # 3) we want to prioritize common and officially supported compression
1663 1663 # engines
1664 1664 #
1665 1665 # It follows that we want to optimize for "decompress compressed data
1666 1666 # when encoded with common and officially supported compression engines"
1667 1667 # case over "raw data" and "data encoded by less common or non-official
1668 1668 # compression engines." That is why we have the inline lookup first
1669 1669 # followed by the compengines lookup.
1670 1670 #
1671 1671 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1672 1672 # compressed chunks. And this matters for changelog and manifest reads.
1673 1673 t = data[0:1]
1674 1674
1675 1675 if t == 'x':
1676 1676 try:
1677 1677 return _zlibdecompress(data)
1678 1678 except zlib.error as e:
1679 1679 raise RevlogError(_('revlog decompress error: %s') % str(e))
1680 1680 # '\0' is more common than 'u' so it goes first.
1681 1681 elif t == '\0':
1682 1682 return data
1683 1683 elif t == 'u':
1684 1684 return util.buffer(data, 1)
1685 1685
1686 1686 try:
1687 1687 compressor = self._decompressors[t]
1688 1688 except KeyError:
1689 1689 try:
1690 1690 engine = util.compengines.forrevlogheader(t)
1691 1691 compressor = engine.revlogcompressor()
1692 1692 self._decompressors[t] = compressor
1693 1693 except KeyError:
1694 1694 raise RevlogError(_('unknown compression type %r') % t)
1695 1695
1696 1696 return compressor.decompress(data)
1697 1697
1698 1698 def _isgooddelta(self, d, textlen):
1699 1699 """Returns True if the given delta is good. Good means that it is within
1700 1700 the disk span, disk size, and chain length bounds that we know to be
1701 1701 performant."""
1702 1702 if d is None:
1703 1703 return False
1704 1704
1705 1705 # - 'dist' is the distance from the base revision -- bounding it limits
1706 1706 # the amount of I/O we need to do.
1707 1707 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1708 1708 # to apply -- bounding it limits the amount of CPU we consume.
1709 1709 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1710 1710
1711 1711 defaultmax = textlen * 4
1712 1712 maxdist = self._maxdeltachainspan
1713 1713 if not maxdist:
1714 1714 maxdist = dist # ensure the conditional pass
1715 1715 maxdist = max(maxdist, defaultmax)
1716 1716 if (dist > maxdist or l > textlen or
1717 1717 compresseddeltalen > textlen * 2 or
1718 1718 (self._maxchainlen and chainlen > self._maxchainlen)):
1719 1719 return False
1720 1720
1721 1721 return True
1722 1722
1723 1723 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1724 1724 cachedelta, ifh, dfh, alwayscache=False):
1725 1725 """internal function to add revisions to the log
1726 1726
1727 1727 see addrevision for argument descriptions.
1728 1728
1729 1729 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1730 1730
1731 1731 invariants:
1732 1732 - rawtext is optional (can be None); if not set, cachedelta must be set.
1733 1733 if both are set, they must correspond to each other.
1734 1734 """
1735 1735 if node == nullid:
1736 1736 raise RevlogError(_("%s: attempt to add null revision") %
1737 1737 (self.indexfile))
1738 1738 if node == wdirid:
1739 1739 raise RevlogError(_("%s: attempt to add wdir revision") %
1740 1740 (self.indexfile))
1741 1741
1742 1742 btext = [rawtext]
1743 1743 def buildtext():
1744 1744 if btext[0] is not None:
1745 1745 return btext[0]
1746 1746 baserev = cachedelta[0]
1747 1747 delta = cachedelta[1]
1748 1748 # special case deltas which replace entire base; no need to decode
1749 1749 # base revision. this neatly avoids censored bases, which throw when
1750 1750 # they're decoded.
1751 1751 hlen = struct.calcsize(">lll")
1752 1752 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1753 1753 len(delta) - hlen):
1754 1754 btext[0] = delta[hlen:]
1755 1755 else:
1756 1756 if self._inline:
1757 1757 fh = ifh
1758 1758 else:
1759 1759 fh = dfh
1760 1760 basetext = self.revision(baserev, _df=fh, raw=True)
1761 1761 btext[0] = mdiff.patch(basetext, delta)
1762 1762
1763 1763 try:
1764 1764 res = self._processflags(btext[0], flags, 'read', raw=True)
1765 1765 btext[0], validatehash = res
1766 1766 if validatehash:
1767 1767 self.checkhash(btext[0], node, p1=p1, p2=p2)
1768 1768 if flags & REVIDX_ISCENSORED:
1769 1769 raise RevlogError(_('node %s is not censored') % node)
1770 1770 except CensoredNodeError:
1771 1771 # must pass the censored index flag to add censored revisions
1772 1772 if not flags & REVIDX_ISCENSORED:
1773 1773 raise
1774 1774 return btext[0]
1775 1775
1776 1776 def builddelta(rev):
1777 1777 # can we use the cached delta?
1778 1778 if cachedelta and cachedelta[0] == rev:
1779 1779 delta = cachedelta[1]
1780 1780 else:
1781 1781 t = buildtext()
1782 1782 if self.iscensored(rev):
1783 1783 # deltas based on a censored revision must replace the
1784 1784 # full content in one patch, so delta works everywhere
1785 1785 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1786 1786 delta = header + t
1787 1787 else:
1788 1788 if self._inline:
1789 1789 fh = ifh
1790 1790 else:
1791 1791 fh = dfh
1792 1792 ptext = self.revision(rev, _df=fh, raw=True)
1793 1793 delta = mdiff.textdiff(ptext, t)
1794 1794 header, data = self.compress(delta)
1795 1795 deltalen = len(header) + len(data)
1796 1796 chainbase = self.chainbase(rev)
1797 1797 dist = deltalen + offset - self.start(chainbase)
1798 1798 if self._generaldelta:
1799 1799 base = rev
1800 1800 else:
1801 1801 base = chainbase
1802 1802 chainlen, compresseddeltalen = self._chaininfo(rev)
1803 1803 chainlen += 1
1804 1804 compresseddeltalen += deltalen
1805 1805 return (dist, deltalen, (header, data), base,
1806 1806 chainbase, chainlen, compresseddeltalen)
1807 1807
1808 1808 curr = len(self)
1809 1809 prev = curr - 1
1810 1810 offset = self.end(prev)
1811 1811 delta = None
1812 1812 p1r, p2r = self.rev(p1), self.rev(p2)
1813 1813
1814 1814 # full versions are inserted when the needed deltas
1815 1815 # become comparable to the uncompressed text
1816 1816 if rawtext is None:
1817 1817 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1818 1818 cachedelta[1])
1819 1819 else:
1820 1820 textlen = len(rawtext)
1821 1821
1822 1822 # should we try to build a delta?
1823 1823 if prev != nullrev and self.storedeltachains:
1824 1824 tested = set()
1825 1825 # This condition is true most of the time when processing
1826 1826 # changegroup data into a generaldelta repo. The only time it
1827 1827 # isn't true is if this is the first revision in a delta chain
1828 1828 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1829 1829 if cachedelta and self._generaldelta and self._lazydeltabase:
1830 1830 # Assume what we received from the server is a good choice
1831 1831 # build delta will reuse the cache
1832 1832 candidatedelta = builddelta(cachedelta[0])
1833 1833 tested.add(cachedelta[0])
1834 1834 if self._isgooddelta(candidatedelta, textlen):
1835 1835 delta = candidatedelta
1836 1836 if delta is None and self._generaldelta:
1837 1837 # exclude already lazy tested base if any
1838 1838 parents = [p for p in (p1r, p2r)
1839 1839 if p != nullrev and p not in tested]
1840 1840 if parents and not self._aggressivemergedeltas:
1841 1841 # Pick whichever parent is closer to us (to minimize the
1842 1842 # chance of having to build a fulltext).
1843 1843 parents = [max(parents)]
1844 1844 tested.update(parents)
1845 1845 pdeltas = []
1846 1846 for p in parents:
1847 1847 pd = builddelta(p)
1848 1848 if self._isgooddelta(pd, textlen):
1849 1849 pdeltas.append(pd)
1850 1850 if pdeltas:
1851 1851 delta = min(pdeltas, key=lambda x: x[1])
1852 1852 if delta is None and prev not in tested:
1853 1853 # other approach failed try against prev to hopefully save us a
1854 1854 # fulltext.
1855 1855 candidatedelta = builddelta(prev)
1856 1856 if self._isgooddelta(candidatedelta, textlen):
1857 1857 delta = candidatedelta
1858 1858 if delta is not None:
1859 1859 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1860 1860 else:
1861 1861 rawtext = buildtext()
1862 1862 data = self.compress(rawtext)
1863 1863 l = len(data[1]) + len(data[0])
1864 1864 base = chainbase = curr
1865 1865
1866 1866 e = (offset_type(offset, flags), l, textlen,
1867 1867 base, link, p1r, p2r, node)
1868 1868 self.index.insert(-1, e)
1869 1869 self.nodemap[node] = curr
1870 1870
1871 1871 entry = self._io.packentry(e, self.node, self.version, curr)
1872 1872 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1873 1873
1874 1874 if alwayscache and rawtext is None:
1875 1875 rawtext = buildtext()
1876 1876
1877 1877 if type(rawtext) == str: # only accept immutable objects
1878 1878 self._cache = (node, curr, rawtext)
1879 1879 self._chainbasecache[curr] = chainbase
1880 1880 return node
1881 1881
1882 1882 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1883 1883 # Files opened in a+ mode have inconsistent behavior on various
1884 1884 # platforms. Windows requires that a file positioning call be made
1885 1885 # when the file handle transitions between reads and writes. See
1886 1886 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1887 1887 # platforms, Python or the platform itself can be buggy. Some versions
1888 1888 # of Solaris have been observed to not append at the end of the file
1889 1889 # if the file was seeked to before the end. See issue4943 for more.
1890 1890 #
1891 1891 # We work around this issue by inserting a seek() before writing.
1892 1892 # Note: This is likely not necessary on Python 3.
1893 1893 ifh.seek(0, os.SEEK_END)
1894 1894 if dfh:
1895 1895 dfh.seek(0, os.SEEK_END)
1896 1896
1897 1897 curr = len(self) - 1
1898 1898 if not self._inline:
1899 1899 transaction.add(self.datafile, offset)
1900 1900 transaction.add(self.indexfile, curr * len(entry))
1901 1901 if data[0]:
1902 1902 dfh.write(data[0])
1903 1903 dfh.write(data[1])
1904 1904 ifh.write(entry)
1905 1905 else:
1906 1906 offset += curr * self._io.size
1907 1907 transaction.add(self.indexfile, offset, curr)
1908 1908 ifh.write(entry)
1909 1909 ifh.write(data[0])
1910 1910 ifh.write(data[1])
1911 1911 self.checkinlinesize(transaction, ifh)
1912 1912
1913 def addgroup(self, deltas, transaction, addrevisioncb=None):
1913 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1914 1914 """
1915 1915 add a delta group
1916 1916
1917 1917 given a set of deltas, add them to the revision log. the
1918 1918 first delta is against its parent, which should be in our
1919 1919 log, the rest are against the previous delta.
1920 1920
1921 1921 If ``addrevisioncb`` is defined, it will be called with arguments of
1922 1922 this revlog and the node that was added.
1923 1923 """
1924 1924
1925 1925 nodes = []
1926 1926
1927 1927 r = len(self)
1928 1928 end = 0
1929 1929 if r:
1930 1930 end = self.end(r - 1)
1931 1931 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1932 1932 isize = r * self._io.size
1933 1933 if self._inline:
1934 1934 transaction.add(self.indexfile, end + isize, r)
1935 1935 dfh = None
1936 1936 else:
1937 1937 transaction.add(self.indexfile, isize, r)
1938 1938 transaction.add(self.datafile, end)
1939 1939 dfh = self.opener(self.datafile, "a+")
1940 1940 def flush():
1941 1941 if dfh:
1942 1942 dfh.flush()
1943 1943 ifh.flush()
1944 1944 try:
1945 1945 # loop through our set of deltas
1946 1946 for data in deltas:
1947 node, p1, p2, link, deltabase, delta, flags = data
1947 node, p1, p2, linknode, deltabase, delta, flags = data
1948 link = linkmapper(linknode)
1948 1949 flags = flags or REVIDX_DEFAULT_FLAGS
1949 1950
1950 1951 nodes.append(node)
1951 1952
1952 1953 if node in self.nodemap:
1953 1954 # this can happen if two branches make the same change
1954 1955 continue
1955 1956
1956 1957 for p in (p1, p2):
1957 1958 if p not in self.nodemap:
1958 1959 raise LookupError(p, self.indexfile,
1959 1960 _('unknown parent'))
1960 1961
1961 1962 if deltabase not in self.nodemap:
1962 1963 raise LookupError(deltabase, self.indexfile,
1963 1964 _('unknown delta base'))
1964 1965
1965 1966 baserev = self.rev(deltabase)
1966 1967
1967 1968 if baserev != nullrev and self.iscensored(baserev):
1968 1969 # if base is censored, delta must be full replacement in a
1969 1970 # single patch operation
1970 1971 hlen = struct.calcsize(">lll")
1971 1972 oldlen = self.rawsize(baserev)
1972 1973 newlen = len(delta) - hlen
1973 1974 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1974 1975 raise error.CensoredBaseError(self.indexfile,
1975 1976 self.node(baserev))
1976 1977
1977 1978 if not flags and self._peek_iscensored(baserev, delta, flush):
1978 1979 flags |= REVIDX_ISCENSORED
1979 1980
1980 1981 # We assume consumers of addrevisioncb will want to retrieve
1981 1982 # the added revision, which will require a call to
1982 1983 # revision(). revision() will fast path if there is a cache
1983 1984 # hit. So, we tell _addrevision() to always cache in this case.
1984 1985 # We're only using addgroup() in the context of changegroup
1985 1986 # generation so the revision data can always be handled as raw
1986 1987 # by the flagprocessor.
1987 1988 self._addrevision(node, None, transaction, link,
1988 1989 p1, p2, flags, (baserev, delta),
1989 1990 ifh, dfh,
1990 1991 alwayscache=bool(addrevisioncb))
1991 1992
1992 1993 if addrevisioncb:
1993 1994 addrevisioncb(self, node)
1994 1995
1995 1996 if not dfh and not self._inline:
1996 1997 # addrevision switched from inline to conventional
1997 1998 # reopen the index
1998 1999 ifh.close()
1999 2000 dfh = self.opener(self.datafile, "a+")
2000 2001 ifh = self.opener(self.indexfile, "a+",
2001 2002 checkambig=self._checkambig)
2002 2003 finally:
2003 2004 if dfh:
2004 2005 dfh.close()
2005 2006 ifh.close()
2006 2007
2007 2008 return nodes
2008 2009
2009 2010 def iscensored(self, rev):
2010 2011 """Check if a file revision is censored."""
2011 2012 return False
2012 2013
2013 2014 def _peek_iscensored(self, baserev, delta, flush):
2014 2015 """Quickly check if a delta produces a censored revision."""
2015 2016 return False
2016 2017
2017 2018 def getstrippoint(self, minlink):
2018 2019 """find the minimum rev that must be stripped to strip the linkrev
2019 2020
2020 2021 Returns a tuple containing the minimum rev and a set of all revs that
2021 2022 have linkrevs that will be broken by this strip.
2022 2023 """
2023 2024 brokenrevs = set()
2024 2025 strippoint = len(self)
2025 2026
2026 2027 heads = {}
2027 2028 futurelargelinkrevs = set()
2028 2029 for head in self.headrevs():
2029 2030 headlinkrev = self.linkrev(head)
2030 2031 heads[head] = headlinkrev
2031 2032 if headlinkrev >= minlink:
2032 2033 futurelargelinkrevs.add(headlinkrev)
2033 2034
2034 2035 # This algorithm involves walking down the rev graph, starting at the
2035 2036 # heads. Since the revs are topologically sorted according to linkrev,
2036 2037 # once all head linkrevs are below the minlink, we know there are
2037 2038 # no more revs that could have a linkrev greater than minlink.
2038 2039 # So we can stop walking.
2039 2040 while futurelargelinkrevs:
2040 2041 strippoint -= 1
2041 2042 linkrev = heads.pop(strippoint)
2042 2043
2043 2044 if linkrev < minlink:
2044 2045 brokenrevs.add(strippoint)
2045 2046 else:
2046 2047 futurelargelinkrevs.remove(linkrev)
2047 2048
2048 2049 for p in self.parentrevs(strippoint):
2049 2050 if p != nullrev:
2050 2051 plinkrev = self.linkrev(p)
2051 2052 heads[p] = plinkrev
2052 2053 if plinkrev >= minlink:
2053 2054 futurelargelinkrevs.add(plinkrev)
2054 2055
2055 2056 return strippoint, brokenrevs
2056 2057
2057 2058 def strip(self, minlink, transaction):
2058 2059 """truncate the revlog on the first revision with a linkrev >= minlink
2059 2060
2060 2061 This function is called when we're stripping revision minlink and
2061 2062 its descendants from the repository.
2062 2063
2063 2064 We have to remove all revisions with linkrev >= minlink, because
2064 2065 the equivalent changelog revisions will be renumbered after the
2065 2066 strip.
2066 2067
2067 2068 So we truncate the revlog on the first of these revisions, and
2068 2069 trust that the caller has saved the revisions that shouldn't be
2069 2070 removed and that it'll re-add them after this truncation.
2070 2071 """
2071 2072 if len(self) == 0:
2072 2073 return
2073 2074
2074 2075 rev, _ = self.getstrippoint(minlink)
2075 2076 if rev == len(self):
2076 2077 return
2077 2078
2078 2079 # first truncate the files on disk
2079 2080 end = self.start(rev)
2080 2081 if not self._inline:
2081 2082 transaction.add(self.datafile, end)
2082 2083 end = rev * self._io.size
2083 2084 else:
2084 2085 end += rev * self._io.size
2085 2086
2086 2087 transaction.add(self.indexfile, end)
2087 2088
2088 2089 # then reset internal state in memory to forget those revisions
2089 2090 self._cache = None
2090 2091 self._chaininfocache = {}
2091 2092 self._chunkclear()
2092 2093 for x in xrange(rev, len(self)):
2093 2094 del self.nodemap[self.node(x)]
2094 2095
2095 2096 del self.index[rev:-1]
2096 2097
2097 2098 def checksize(self):
2098 2099 expected = 0
2099 2100 if len(self):
2100 2101 expected = max(0, self.end(len(self) - 1))
2101 2102
2102 2103 try:
2103 2104 f = self.opener(self.datafile)
2104 2105 f.seek(0, 2)
2105 2106 actual = f.tell()
2106 2107 f.close()
2107 2108 dd = actual - expected
2108 2109 except IOError as inst:
2109 2110 if inst.errno != errno.ENOENT:
2110 2111 raise
2111 2112 dd = 0
2112 2113
2113 2114 try:
2114 2115 f = self.opener(self.indexfile)
2115 2116 f.seek(0, 2)
2116 2117 actual = f.tell()
2117 2118 f.close()
2118 2119 s = self._io.size
2119 2120 i = max(0, actual // s)
2120 2121 di = actual - (i * s)
2121 2122 if self._inline:
2122 2123 databytes = 0
2123 2124 for r in self:
2124 2125 databytes += max(0, self.length(r))
2125 2126 dd = 0
2126 2127 di = actual - len(self) * s - databytes
2127 2128 except IOError as inst:
2128 2129 if inst.errno != errno.ENOENT:
2129 2130 raise
2130 2131 di = 0
2131 2132
2132 2133 return (dd, di)
2133 2134
2134 2135 def files(self):
2135 2136 res = [self.indexfile]
2136 2137 if not self._inline:
2137 2138 res.append(self.datafile)
2138 2139 return res
2139 2140
2140 2141 DELTAREUSEALWAYS = 'always'
2141 2142 DELTAREUSESAMEREVS = 'samerevs'
2142 2143 DELTAREUSENEVER = 'never'
2143 2144
2144 2145 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2145 2146
2146 2147 def clone(self, tr, destrevlog, addrevisioncb=None,
2147 2148 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2148 2149 """Copy this revlog to another, possibly with format changes.
2149 2150
2150 2151 The destination revlog will contain the same revisions and nodes.
2151 2152 However, it may not be bit-for-bit identical due to e.g. delta encoding
2152 2153 differences.
2153 2154
2154 2155 The ``deltareuse`` argument control how deltas from the existing revlog
2155 2156 are preserved in the destination revlog. The argument can have the
2156 2157 following values:
2157 2158
2158 2159 DELTAREUSEALWAYS
2159 2160 Deltas will always be reused (if possible), even if the destination
2160 2161 revlog would not select the same revisions for the delta. This is the
2161 2162 fastest mode of operation.
2162 2163 DELTAREUSESAMEREVS
2163 2164 Deltas will be reused if the destination revlog would pick the same
2164 2165 revisions for the delta. This mode strikes a balance between speed
2165 2166 and optimization.
2166 2167 DELTAREUSENEVER
2167 2168 Deltas will never be reused. This is the slowest mode of execution.
2168 2169 This mode can be used to recompute deltas (e.g. if the diff/delta
2169 2170 algorithm changes).
2170 2171
2171 2172 Delta computation can be slow, so the choice of delta reuse policy can
2172 2173 significantly affect run time.
2173 2174
2174 2175 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2175 2176 two extremes. Deltas will be reused if they are appropriate. But if the
2176 2177 delta could choose a better revision, it will do so. This means if you
2177 2178 are converting a non-generaldelta revlog to a generaldelta revlog,
2178 2179 deltas will be recomputed if the delta's parent isn't a parent of the
2179 2180 revision.
2180 2181
2181 2182 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2182 2183 controls whether to compute deltas against both parents for merges.
2183 2184 By default, the current default is used.
2184 2185 """
2185 2186 if deltareuse not in self.DELTAREUSEALL:
2186 2187 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2187 2188
2188 2189 if len(destrevlog):
2189 2190 raise ValueError(_('destination revlog is not empty'))
2190 2191
2191 2192 if getattr(self, 'filteredrevs', None):
2192 2193 raise ValueError(_('source revlog has filtered revisions'))
2193 2194 if getattr(destrevlog, 'filteredrevs', None):
2194 2195 raise ValueError(_('destination revlog has filtered revisions'))
2195 2196
2196 2197 # lazydeltabase controls whether to reuse a cached delta, if possible.
2197 2198 oldlazydeltabase = destrevlog._lazydeltabase
2198 2199 oldamd = destrevlog._aggressivemergedeltas
2199 2200
2200 2201 try:
2201 2202 if deltareuse == self.DELTAREUSEALWAYS:
2202 2203 destrevlog._lazydeltabase = True
2203 2204 elif deltareuse == self.DELTAREUSESAMEREVS:
2204 2205 destrevlog._lazydeltabase = False
2205 2206
2206 2207 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2207 2208
2208 2209 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2209 2210 self.DELTAREUSESAMEREVS)
2210 2211
2211 2212 index = self.index
2212 2213 for rev in self:
2213 2214 entry = index[rev]
2214 2215
2215 2216 # Some classes override linkrev to take filtered revs into
2216 2217 # account. Use raw entry from index.
2217 2218 flags = entry[0] & 0xffff
2218 2219 linkrev = entry[4]
2219 2220 p1 = index[entry[5]][7]
2220 2221 p2 = index[entry[6]][7]
2221 2222 node = entry[7]
2222 2223
2223 2224 # (Possibly) reuse the delta from the revlog if allowed and
2224 2225 # the revlog chunk is a delta.
2225 2226 cachedelta = None
2226 2227 rawtext = None
2227 2228 if populatecachedelta:
2228 2229 dp = self.deltaparent(rev)
2229 2230 if dp != nullrev:
2230 2231 cachedelta = (dp, str(self._chunk(rev)))
2231 2232
2232 2233 if not cachedelta:
2233 2234 rawtext = self.revision(rev, raw=True)
2234 2235
2235 2236 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2236 2237 checkambig=False)
2237 2238 dfh = None
2238 2239 if not destrevlog._inline:
2239 2240 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2240 2241 try:
2241 2242 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2242 2243 flags, cachedelta, ifh, dfh)
2243 2244 finally:
2244 2245 if dfh:
2245 2246 dfh.close()
2246 2247 ifh.close()
2247 2248
2248 2249 if addrevisioncb:
2249 2250 addrevisioncb(self, rev, node)
2250 2251 finally:
2251 2252 destrevlog._lazydeltabase = oldlazydeltabase
2252 2253 destrevlog._aggressivemergedeltas = oldamd
@@ -1,316 +1,315 b''
1 1 # test revlog interaction about raw data (flagprocessor)
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import sys
6 6
7 7 from mercurial import (
8 8 encoding,
9 9 node,
10 10 revlog,
11 11 transaction,
12 12 vfs,
13 13 )
14 14
15 15 # TESTTMP is optional. This makes it convenient to run without run-tests.py
16 16 tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp'))
17 17
18 18 # Enable generaldelta otherwise revlog won't use delta as expected by the test
19 19 tvfs.options = {'generaldelta': True, 'revlogv1': True}
20 20
21 21 # The test wants to control whether to use delta explicitly, based on
22 22 # "storedeltachains".
23 23 revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains
24 24
25 25 def abort(msg):
26 26 print('abort: %s' % msg)
27 27 # Return 0 so run-tests.py could compare the output.
28 28 sys.exit()
29 29
30 30 # Register a revlog processor for flag EXTSTORED.
31 31 #
32 32 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
33 33 # insertion and replacement, and may be interesting to test revlog's line-based
34 34 # deltas.
35 35 _extheader = b'E\n'
36 36
37 37 def readprocessor(self, rawtext):
38 38 # True: the returned text could be used to verify hash
39 39 text = rawtext[len(_extheader):].replace(b'i', b'1')
40 40 return text, True
41 41
42 42 def writeprocessor(self, text):
43 43 # False: the returned rawtext shouldn't be used to verify hash
44 44 rawtext = _extheader + text.replace(b'1', b'i')
45 45 return rawtext, False
46 46
47 47 def rawprocessor(self, rawtext):
48 48 # False: do not verify hash. Only the content returned by "readprocessor"
49 49 # can be used to verify hash.
50 50 return False
51 51
52 52 revlog.addflagprocessor(revlog.REVIDX_EXTSTORED,
53 53 (readprocessor, writeprocessor, rawprocessor))
54 54
55 55 # Utilities about reading and appending revlog
56 56
57 57 def newtransaction():
58 58 # A transaction is required to write revlogs
59 59 report = lambda msg: None
60 60 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
61 61
62 62 def newrevlog(name=b'_testrevlog.i', recreate=False):
63 63 if recreate:
64 64 tvfs.tryunlink(name)
65 65 rlog = revlog.revlog(tvfs, name)
66 66 return rlog
67 67
68 68 def appendrev(rlog, text, tr, isext=False, isdelta=True):
69 69 '''Append a revision. If isext is True, set the EXTSTORED flag so flag
70 70 processor will be used (and rawtext is different from text). If isdelta is
71 71 True, force the revision to be a delta, otherwise it's full text.
72 72 '''
73 73 nextrev = len(rlog)
74 74 p1 = rlog.node(nextrev - 1)
75 75 p2 = node.nullid
76 76 if isext:
77 77 flags = revlog.REVIDX_EXTSTORED
78 78 else:
79 79 flags = revlog.REVIDX_DEFAULT_FLAGS
80 80 # Change storedeltachains temporarily, to override revlog's delta decision
81 81 rlog.storedeltachains = isdelta
82 82 try:
83 83 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
84 84 return nextrev
85 85 except Exception as ex:
86 86 abort('rev %d: failed to append: %s' % (nextrev, ex))
87 87 finally:
88 88 # Restore storedeltachains. It is always True, see revlog.__init__
89 89 rlog.storedeltachains = True
90 90
91 91 def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True):
92 92 '''Copy revlog to destname using revlog.addgroup. Return the copied revlog.
93 93
94 94 This emulates push or pull. They use changegroup. Changegroup requires
95 95 repo to work. We don't have a repo, so a dummy changegroup is used.
96 96
97 97 If optimaldelta is True, use optimized delta parent, so the destination
98 98 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
99 99 the destination revlog needs more work to use it.
100 100
101 101 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
102 102 code path, which is not covered by "appendrev" alone.
103 103 '''
104 104 class dummychangegroup(object):
105 105 @staticmethod
106 106 def deltachunk(pnode):
107 107 pnode = pnode or node.nullid
108 108 parentrev = rlog.rev(pnode)
109 109 r = parentrev + 1
110 110 if r >= len(rlog):
111 111 return {}
112 112 if optimaldelta:
113 113 deltaparent = parentrev
114 114 else:
115 115 # suboptimal deltaparent
116 116 deltaparent = min(0, parentrev)
117 117 return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid,
118 118 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r),
119 119 'deltabase': rlog.node(deltaparent),
120 120 'delta': rlog.revdiff(deltaparent, r)}
121 121
122 def deltaiter(self, linkmapper):
122 def deltaiter(self):
123 123 chain = None
124 124 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
125 125 node = chunkdata['node']
126 126 p1 = chunkdata['p1']
127 127 p2 = chunkdata['p2']
128 128 cs = chunkdata['cs']
129 129 deltabase = chunkdata['deltabase']
130 130 delta = chunkdata['delta']
131 131 flags = chunkdata['flags']
132 132
133 link = linkmapper(cs)
134 133 chain = node
135 134
136 yield (node, p1, p2, link, deltabase, delta, flags)
135 yield (node, p1, p2, cs, deltabase, delta, flags)
137 136
138 137 def linkmap(lnode):
139 138 return rlog.rev(lnode)
140 139
141 140 dlog = newrevlog(destname, recreate=True)
142 dummydeltas = dummychangegroup().deltaiter(linkmap)
143 dlog.addgroup(dummydeltas, tr)
141 dummydeltas = dummychangegroup().deltaiter()
142 dlog.addgroup(dummydeltas, linkmap, tr)
144 143 return dlog
145 144
146 145 def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'):
147 146 '''Like addgroupcopy, but use the low level revlog._addrevision directly.
148 147
149 148 It exercises some code paths that are hard to reach easily otherwise.
150 149 '''
151 150 dlog = newrevlog(destname, recreate=True)
152 151 for r in rlog:
153 152 p1 = rlog.node(r - 1)
154 153 p2 = node.nullid
155 154 if r == 0:
156 155 text = rlog.revision(r, raw=True)
157 156 cachedelta = None
158 157 else:
159 158 # deltaparent is more interesting if it has the EXTSTORED flag.
160 159 deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)])
161 160 text = None
162 161 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
163 162 flags = rlog.flags(r)
164 163 ifh = dfh = None
165 164 try:
166 165 ifh = dlog.opener(dlog.indexfile, 'a+')
167 166 if not dlog._inline:
168 167 dfh = dlog.opener(dlog.datafile, 'a+')
169 168 dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags,
170 169 cachedelta, ifh, dfh)
171 170 finally:
172 171 if dfh is not None:
173 172 dfh.close()
174 173 if ifh is not None:
175 174 ifh.close()
176 175 return dlog
177 176
178 177 # Utilities to generate revisions for testing
179 178
180 179 def genbits(n):
181 180 '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
182 181 i.e. the generated numbers have a width of n bits.
183 182
184 183 The combination of two adjacent numbers will cover all possible cases.
185 184 That is to say, given any x, y where both x, and y are in range(2 ** n),
186 185 there is an x followed immediately by y in the generated sequence.
187 186 '''
188 187 m = 2 ** n
189 188
190 189 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
191 190 gray = lambda x: x ^ (x >> 1)
192 191 reversegray = dict((gray(i), i) for i in range(m))
193 192
194 193 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
195 194 # the next unused gray code where higher n bits equal to X.
196 195
197 196 # For gray codes whose higher bits are X, a[X] of them have been used.
198 197 a = [0] * m
199 198
200 199 # Iterate from 0.
201 200 x = 0
202 201 yield x
203 202 for i in range(m * m):
204 203 x = reversegray[x]
205 204 y = gray(a[x] + x * m) & (m - 1)
206 205 assert a[x] < m
207 206 a[x] += 1
208 207 x = y
209 208 yield x
210 209
211 210 def gentext(rev):
212 211 '''Given a revision number, generate dummy text'''
213 212 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
214 213
215 214 def writecases(rlog, tr):
216 215 '''Write some revisions interested to the test.
217 216
218 217 The test is interested in 3 properties of a revision:
219 218
220 219 - Is it a delta or a full text? (isdelta)
221 220 This is to catch some delta application issues.
222 221 - Does it have a flag of EXTSTORED? (isext)
223 222 This is to catch some flag processor issues. Especially when
224 223 interacted with revlog deltas.
225 224 - Is its text empty? (isempty)
226 225 This is less important. It is intended to try to catch some careless
227 226 checks like "if text" instead of "if text is None". Note: if flag
228 227 processor is involved, raw text may be not empty.
229 228
230 229 Write 65 revisions. So that all combinations of the above flags for
231 230 adjacent revisions are covered. That is to say,
232 231
233 232 len(set(
234 233 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
235 234 for r in range(len(rlog) - 1)
236 235 )) is 64.
237 236
238 237 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
239 238 mentioned above.
240 239
241 240 Return expected [(text, rawtext)].
242 241 '''
243 242 result = []
244 243 for i, x in enumerate(genbits(3)):
245 244 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
246 245 if isempty:
247 246 text = b''
248 247 else:
249 248 text = gentext(i)
250 249 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
251 250
252 251 # Verify text, rawtext, and rawsize
253 252 if isext:
254 253 rawtext = writeprocessor(None, text)[0]
255 254 else:
256 255 rawtext = text
257 256 if rlog.rawsize(rev) != len(rawtext):
258 257 abort('rev %d: wrong rawsize' % rev)
259 258 if rlog.revision(rev, raw=False) != text:
260 259 abort('rev %d: wrong text' % rev)
261 260 if rlog.revision(rev, raw=True) != rawtext:
262 261 abort('rev %d: wrong rawtext' % rev)
263 262 result.append((text, rawtext))
264 263
265 264 # Verify flags like isdelta, isext work as expected
266 265 if bool(rlog.deltaparent(rev) > -1) != isdelta:
267 266 abort('rev %d: isdelta is ineffective' % rev)
268 267 if bool(rlog.flags(rev)) != isext:
269 268 abort('rev %d: isext is ineffective' % rev)
270 269 return result
271 270
272 271 # Main test and checking
273 272
274 273 def checkrevlog(rlog, expected):
275 274 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
276 275 # Test using different access orders. This could expose some issues
277 276 # depending on revlog caching (see revlog._cache).
278 277 for r0 in range(len(rlog) - 1):
279 278 r1 = r0 + 1
280 279 for revorder in [[r0, r1], [r1, r0]]:
281 280 for raworder in [[True], [False], [True, False], [False, True]]:
282 281 nlog = newrevlog()
283 282 for rev in revorder:
284 283 for raw in raworder:
285 284 t = nlog.revision(rev, raw=raw)
286 285 if t != expected[rev][int(raw)]:
287 286 abort('rev %d: corrupted %stext'
288 287 % (rev, raw and 'raw' or ''))
289 288
290 289 def maintest():
291 290 expected = rl = None
292 291 with newtransaction() as tr:
293 292 rl = newrevlog(recreate=True)
294 293 expected = writecases(rl, tr)
295 294 checkrevlog(rl, expected)
296 295 print('local test passed')
297 296 # Copy via revlog.addgroup
298 297 rl1 = addgroupcopy(rl, tr)
299 298 checkrevlog(rl1, expected)
300 299 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
301 300 checkrevlog(rl2, expected)
302 301 print('addgroupcopy test passed')
303 302 # Copy via revlog.clone
304 303 rl3 = newrevlog(name='_destrevlog3.i', recreate=True)
305 304 rl.clone(tr, rl3)
306 305 checkrevlog(rl3, expected)
307 306 print('clone test passed')
308 307 # Copy via low-level revlog._addrevision
309 308 rl4 = lowlevelcopy(rl, tr)
310 309 checkrevlog(rl4, expected)
311 310 print('lowlevelcopy test passed')
312 311
313 312 try:
314 313 maintest()
315 314 except Exception as ex:
316 315 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now