##// END OF EJS Templates
manifest: get rid of manifest.readshallowfast...
Durham Goode -
r30294:bce79dfc default
parent child Browse files
Show More
@@ -1,1043 +1,1044 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import tempfile
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 hex,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 branchmap,
24 24 dagutil,
25 25 discovery,
26 26 error,
27 27 mdiff,
28 28 phases,
29 29 util,
30 30 )
31 31
32 32 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
33 33 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
34 34 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
35 35
36 36 def readexactly(stream, n):
37 37 '''read n bytes from stream.read and abort if less was available'''
38 38 s = stream.read(n)
39 39 if len(s) < n:
40 40 raise error.Abort(_("stream ended unexpectedly"
41 41 " (got %d bytes, expected %d)")
42 42 % (len(s), n))
43 43 return s
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_("invalid chunk length %d") % l)
52 52 return ""
53 53 return readexactly(stream, l - 4)
54 54
55 55 def chunkheader(length):
56 56 """return a changegroup chunk header (string)"""
57 57 return struct.pack(">l", length + 4)
58 58
59 59 def closechunk():
60 60 """return a changegroup chunk header (string) for a zero-length chunk"""
61 61 return struct.pack(">l", 0)
62 62
63 63 def combineresults(results):
64 64 """logic to combine 0 or more addchangegroup results into one"""
65 65 changedheads = 0
66 66 result = 1
67 67 for ret in results:
68 68 # If any changegroup result is 0, return 0
69 69 if ret == 0:
70 70 result = 0
71 71 break
72 72 if ret < -1:
73 73 changedheads += ret + 1
74 74 elif ret > 1:
75 75 changedheads += ret - 1
76 76 if changedheads > 0:
77 77 result = 1 + changedheads
78 78 elif changedheads < 0:
79 79 result = -1 + changedheads
80 80 return result
81 81
82 82 def writechunks(ui, chunks, filename, vfs=None):
83 83 """Write chunks to a file and return its filename.
84 84
85 85 The stream is assumed to be a bundle file.
86 86 Existing files will not be overwritten.
87 87 If no filename is specified, a temporary file is created.
88 88 """
89 89 fh = None
90 90 cleanup = None
91 91 try:
92 92 if filename:
93 93 if vfs:
94 94 fh = vfs.open(filename, "wb")
95 95 else:
96 96 # Increase default buffer size because default is usually
97 97 # small (4k is common on Linux).
98 98 fh = open(filename, "wb", 131072)
99 99 else:
100 100 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
101 101 fh = os.fdopen(fd, "wb")
102 102 cleanup = filename
103 103 for c in chunks:
104 104 fh.write(c)
105 105 cleanup = None
106 106 return filename
107 107 finally:
108 108 if fh is not None:
109 109 fh.close()
110 110 if cleanup is not None:
111 111 if filename and vfs:
112 112 vfs.unlink(cleanup)
113 113 else:
114 114 os.unlink(cleanup)
115 115
116 116 class cg1unpacker(object):
117 117 """Unpacker for cg1 changegroup streams.
118 118
119 119 A changegroup unpacker handles the framing of the revision data in
120 120 the wire format. Most consumers will want to use the apply()
121 121 method to add the changes from the changegroup to a repository.
122 122
123 123 If you're forwarding a changegroup unmodified to another consumer,
124 124 use getchunks(), which returns an iterator of changegroup
125 125 chunks. This is mostly useful for cases where you need to know the
126 126 data stream has ended by observing the end of the changegroup.
127 127
128 128 deltachunk() is useful only if you're applying delta data. Most
129 129 consumers should prefer apply() instead.
130 130
131 131 A few other public methods exist. Those are used only for
132 132 bundlerepo and some debug commands - their use is discouraged.
133 133 """
134 134 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
135 135 deltaheadersize = struct.calcsize(deltaheader)
136 136 version = '01'
137 137 _grouplistcount = 1 # One list of files after the manifests
138 138
139 139 def __init__(self, fh, alg, extras=None):
140 140 if alg == 'UN':
141 141 alg = None # get more modern without breaking too much
142 142 if not alg in util.decompressors:
143 143 raise error.Abort(_('unknown stream compression type: %s')
144 144 % alg)
145 145 if alg == 'BZ':
146 146 alg = '_truncatedBZ'
147 147 self._stream = util.decompressors[alg](fh)
148 148 self._type = alg
149 149 self.extras = extras or {}
150 150 self.callback = None
151 151
152 152 # These methods (compressed, read, seek, tell) all appear to only
153 153 # be used by bundlerepo, but it's a little hard to tell.
154 154 def compressed(self):
155 155 return self._type is not None
156 156 def read(self, l):
157 157 return self._stream.read(l)
158 158 def seek(self, pos):
159 159 return self._stream.seek(pos)
160 160 def tell(self):
161 161 return self._stream.tell()
162 162 def close(self):
163 163 return self._stream.close()
164 164
165 165 def _chunklength(self):
166 166 d = readexactly(self._stream, 4)
167 167 l = struct.unpack(">l", d)[0]
168 168 if l <= 4:
169 169 if l:
170 170 raise error.Abort(_("invalid chunk length %d") % l)
171 171 return 0
172 172 if self.callback:
173 173 self.callback()
174 174 return l - 4
175 175
176 176 def changelogheader(self):
177 177 """v10 does not have a changelog header chunk"""
178 178 return {}
179 179
180 180 def manifestheader(self):
181 181 """v10 does not have a manifest header chunk"""
182 182 return {}
183 183
184 184 def filelogheader(self):
185 185 """return the header of the filelogs chunk, v10 only has the filename"""
186 186 l = self._chunklength()
187 187 if not l:
188 188 return {}
189 189 fname = readexactly(self._stream, l)
190 190 return {'filename': fname}
191 191
192 192 def _deltaheader(self, headertuple, prevnode):
193 193 node, p1, p2, cs = headertuple
194 194 if prevnode is None:
195 195 deltabase = p1
196 196 else:
197 197 deltabase = prevnode
198 198 flags = 0
199 199 return node, p1, p2, deltabase, cs, flags
200 200
201 201 def deltachunk(self, prevnode):
202 202 l = self._chunklength()
203 203 if not l:
204 204 return {}
205 205 headerdata = readexactly(self._stream, self.deltaheadersize)
206 206 header = struct.unpack(self.deltaheader, headerdata)
207 207 delta = readexactly(self._stream, l - self.deltaheadersize)
208 208 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
209 209 return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs,
210 210 'deltabase': deltabase, 'delta': delta, 'flags': flags}
211 211
212 212 def getchunks(self):
213 213 """returns all the chunks contains in the bundle
214 214
215 215 Used when you need to forward the binary stream to a file or another
216 216 network API. To do so, it parse the changegroup data, otherwise it will
217 217 block in case of sshrepo because it don't know the end of the stream.
218 218 """
219 219 # an empty chunkgroup is the end of the changegroup
220 220 # a changegroup has at least 2 chunkgroups (changelog and manifest).
221 221 # after that, changegroup versions 1 and 2 have a series of groups
222 222 # with one group per file. changegroup 3 has a series of directory
223 223 # manifests before the files.
224 224 count = 0
225 225 emptycount = 0
226 226 while emptycount < self._grouplistcount:
227 227 empty = True
228 228 count += 1
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 if empty and count > 2:
233 233 emptycount += 1
234 234 break
235 235 empty = False
236 236 yield chunkheader(len(chunk))
237 237 pos = 0
238 238 while pos < len(chunk):
239 239 next = pos + 2**20
240 240 yield chunk[pos:next]
241 241 pos = next
242 242 yield closechunk()
243 243
244 244 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
245 245 # We know that we'll never have more manifests than we had
246 246 # changesets.
247 247 self.callback = prog(_('manifests'), numchanges)
248 248 # no need to check for empty manifest group here:
249 249 # if the result of the merge of 1 and 2 is the same in 3 and 4,
250 250 # no new manifest will be created and the manifest group will
251 251 # be empty during the pull
252 252 self.manifestheader()
253 253 repo.manifest.addgroup(self, revmap, trp)
254 254 repo.ui.progress(_('manifests'), None)
255 255 self.callback = None
256 256
257 257 def apply(self, repo, srctype, url, emptyok=False,
258 258 targetphase=phases.draft, expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 with repo.transaction("\n".join([srctype,
281 281 util.hidepassword(url)])) as tr:
282 282 # The transaction could have been created before and already
283 283 # carries source information. In this case we use the top
284 284 # level data. We overwrite the argument because we need to use
285 285 # the top level value (if they exist) in this function.
286 286 srctype = tr.hookargs.setdefault('source', srctype)
287 287 url = tr.hookargs.setdefault('url', url)
288 288 repo.hook('prechangegroup', throw=True, **tr.hookargs)
289 289
290 290 # write changelog data to temp files so concurrent readers
291 291 # will not see an inconsistent view
292 292 cl = repo.changelog
293 293 cl.delayupdate(tr)
294 294 oldheads = cl.heads()
295 295
296 296 trp = weakref.proxy(tr)
297 297 # pull off the changeset group
298 298 repo.ui.status(_("adding changesets\n"))
299 299 clstart = len(cl)
300 300 class prog(object):
301 301 def __init__(self, step, total):
302 302 self._step = step
303 303 self._total = total
304 304 self._count = 1
305 305 def __call__(self):
306 306 repo.ui.progress(self._step, self._count,
307 307 unit=_('chunks'), total=self._total)
308 308 self._count += 1
309 309 self.callback = prog(_('changesets'), expectedtotal)
310 310
311 311 efiles = set()
312 312 def onchangelog(cl, node):
313 313 efiles.update(cl.readfiles(node))
314 314
315 315 self.changelogheader()
316 316 srccontent = cl.addgroup(self, csmap, trp,
317 317 addrevisioncb=onchangelog)
318 318 efiles = len(efiles)
319 319
320 320 if not (srccontent or emptyok):
321 321 raise error.Abort(_("received changelog group is empty"))
322 322 clend = len(cl)
323 323 changesets = clend - clstart
324 324 repo.ui.progress(_('changesets'), None)
325 325 self.callback = None
326 326
327 327 # pull off the manifest group
328 328 repo.ui.status(_("adding manifests\n"))
329 329 self._unpackmanifests(repo, revmap, trp, prog, changesets)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate', default=False):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file nodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 dh = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 dh = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 dh -= 1
357 357 htext = ""
358 358 if dh:
359 359 htext = _(" (%+d heads)") % dh
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup', throw=True, **hookargs)
376 376
377 377 added = [cl.node(r) for r in xrange(clstart, clend)]
378 378 publishing = repo.publishing()
379 379 if srctype in ('push', 'serve'):
380 380 # Old servers can not push the boundary themselves.
381 381 # New servers won't push the boundary if changeset already
382 382 # exists locally as secret
383 383 #
384 384 # We should not use added here but the list of all change in
385 385 # the bundle
386 386 if publishing:
387 387 phases.advanceboundary(repo, tr, phases.public,
388 388 srccontent)
389 389 else:
390 390 # Those changesets have been pushed from the
391 391 # outside, their phases are going to be pushed
392 392 # alongside. Therefor `targetphase` is
393 393 # ignored.
394 394 phases.advanceboundary(repo, tr, phases.draft,
395 395 srccontent)
396 396 phases.retractboundary(repo, tr, phases.draft, added)
397 397 elif srctype != 'strip':
398 398 # publishing only alter behavior during push
399 399 #
400 400 # strip should not touch boundary at all
401 401 phases.retractboundary(repo, tr, targetphase, added)
402 402
403 403 if changesets > 0:
404 404 if srctype != 'strip':
405 405 # During strip, branchcache is invalid but
406 406 # coming call to `destroyed` will repair it.
407 407 # In other case we can safely update cache on
408 408 # disk.
409 409 repo.ui.debug('updating the branch cache\n')
410 410 branchmap.updatecache(repo.filtered('served'))
411 411
412 412 def runhooks():
413 413 # These hooks run when the lock releases, not when the
414 414 # transaction closes. So it's possible for the changelog
415 415 # to have changed since we last saw it.
416 416 if clstart >= len(repo):
417 417 return
418 418
419 419 repo.hook("changegroup", **hookargs)
420 420
421 421 for n in added:
422 422 args = hookargs.copy()
423 423 args['node'] = hex(n)
424 424 del args['node_last']
425 425 repo.hook("incoming", **args)
426 426
427 427 newheads = [h for h in repo.heads()
428 428 if h not in oldheads]
429 429 repo.ui.log("incoming",
430 430 "%s incoming changes - new heads: %s\n",
431 431 len(added),
432 432 ', '.join([hex(c[:6]) for c in newheads]))
433 433
434 434 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
435 435 lambda tr: repo._afterlock(runhooks))
436 436 finally:
437 437 repo.ui.flush()
438 438 # never return 0 here:
439 439 if dh < 0:
440 440 return dh - 1
441 441 else:
442 442 return dh + 1
443 443
444 444 class cg2unpacker(cg1unpacker):
445 445 """Unpacker for cg2 streams.
446 446
447 447 cg2 streams add support for generaldelta, so the delta header
448 448 format is slightly different. All other features about the data
449 449 remain the same.
450 450 """
451 451 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
452 452 deltaheadersize = struct.calcsize(deltaheader)
453 453 version = '02'
454 454
455 455 def _deltaheader(self, headertuple, prevnode):
456 456 node, p1, p2, deltabase, cs = headertuple
457 457 flags = 0
458 458 return node, p1, p2, deltabase, cs, flags
459 459
460 460 class cg3unpacker(cg2unpacker):
461 461 """Unpacker for cg3 streams.
462 462
463 463 cg3 streams add support for exchanging treemanifests and revlog
464 464 flags. It adds the revlog flags to the delta header and an empty chunk
465 465 separating manifests and files.
466 466 """
467 467 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
468 468 deltaheadersize = struct.calcsize(deltaheader)
469 469 version = '03'
470 470 _grouplistcount = 2 # One list of manifests and one list of files
471 471
472 472 def _deltaheader(self, headertuple, prevnode):
473 473 node, p1, p2, deltabase, cs, flags = headertuple
474 474 return node, p1, p2, deltabase, cs, flags
475 475
476 476 def _unpackmanifests(self, repo, revmap, trp, prog, numchanges):
477 477 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog,
478 478 numchanges)
479 479 for chunkdata in iter(self.filelogheader, {}):
480 480 # If we get here, there are directory manifests in the changegroup
481 481 d = chunkdata["filename"]
482 482 repo.ui.debug("adding %s revisions\n" % d)
483 483 dirlog = repo.manifest.dirlog(d)
484 484 if not dirlog.addgroup(self, revmap, trp):
485 485 raise error.Abort(_("received dir revlog group is empty"))
486 486
487 487 class headerlessfixup(object):
488 488 def __init__(self, fh, h):
489 489 self._h = h
490 490 self._fh = fh
491 491 def read(self, n):
492 492 if self._h:
493 493 d, self._h = self._h[:n], self._h[n:]
494 494 if len(d) < n:
495 495 d += readexactly(self._fh, n - len(d))
496 496 return d
497 497 return readexactly(self._fh, n)
498 498
499 499 class cg1packer(object):
500 500 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
501 501 version = '01'
502 502 def __init__(self, repo, bundlecaps=None):
503 503 """Given a source repo, construct a bundler.
504 504
505 505 bundlecaps is optional and can be used to specify the set of
506 506 capabilities which can be used to build the bundle.
507 507 """
508 508 # Set of capabilities we can use to build the bundle.
509 509 if bundlecaps is None:
510 510 bundlecaps = set()
511 511 self._bundlecaps = bundlecaps
512 512 # experimental config: bundle.reorder
513 513 reorder = repo.ui.config('bundle', 'reorder', 'auto')
514 514 if reorder == 'auto':
515 515 reorder = None
516 516 else:
517 517 reorder = util.parsebool(reorder)
518 518 self._repo = repo
519 519 self._reorder = reorder
520 520 self._progress = repo.ui.progress
521 521 if self._repo.ui.verbose and not self._repo.ui.debugflag:
522 522 self._verbosenote = self._repo.ui.note
523 523 else:
524 524 self._verbosenote = lambda s: None
525 525
526 526 def close(self):
527 527 return closechunk()
528 528
529 529 def fileheader(self, fname):
530 530 return chunkheader(len(fname)) + fname
531 531
532 532 # Extracted both for clarity and for overriding in extensions.
533 533 def _sortgroup(self, revlog, nodelist, lookup):
534 534 """Sort nodes for change group and turn them into revnums."""
535 535 # for generaldelta revlogs, we linearize the revs; this will both be
536 536 # much quicker and generate a much smaller bundle
537 537 if (revlog._generaldelta and self._reorder is None) or self._reorder:
538 538 dag = dagutil.revlogdag(revlog)
539 539 return dag.linearize(set(revlog.rev(n) for n in nodelist))
540 540 else:
541 541 return sorted([revlog.rev(n) for n in nodelist])
542 542
543 543 def group(self, nodelist, revlog, lookup, units=None):
544 544 """Calculate a delta group, yielding a sequence of changegroup chunks
545 545 (strings).
546 546
547 547 Given a list of changeset revs, return a set of deltas and
548 548 metadata corresponding to nodes. The first delta is
549 549 first parent(nodelist[0]) -> nodelist[0], the receiver is
550 550 guaranteed to have this parent as it has all history before
551 551 these changesets. In the case firstparent is nullrev the
552 552 changegroup starts with a full revision.
553 553
554 554 If units is not None, progress detail will be generated, units specifies
555 555 the type of revlog that is touched (changelog, manifest, etc.).
556 556 """
557 557 # if we don't have any revisions touched by these changesets, bail
558 558 if len(nodelist) == 0:
559 559 yield self.close()
560 560 return
561 561
562 562 revs = self._sortgroup(revlog, nodelist, lookup)
563 563
564 564 # add the parent of the first rev
565 565 p = revlog.parentrevs(revs[0])[0]
566 566 revs.insert(0, p)
567 567
568 568 # build deltas
569 569 total = len(revs) - 1
570 570 msgbundling = _('bundling')
571 571 for r in xrange(len(revs) - 1):
572 572 if units is not None:
573 573 self._progress(msgbundling, r + 1, unit=units, total=total)
574 574 prev, curr = revs[r], revs[r + 1]
575 575 linknode = lookup(revlog.node(curr))
576 576 for c in self.revchunk(revlog, curr, prev, linknode):
577 577 yield c
578 578
579 579 if units is not None:
580 580 self._progress(msgbundling, None)
581 581 yield self.close()
582 582
583 583 # filter any nodes that claim to be part of the known set
584 584 def prune(self, revlog, missing, commonrevs):
585 585 rr, rl = revlog.rev, revlog.linkrev
586 586 return [n for n in missing if rl(rr(n)) not in commonrevs]
587 587
588 588 def _packmanifests(self, dir, mfnodes, lookuplinknode):
589 589 """Pack flat manifests into a changegroup stream."""
590 590 assert not dir
591 591 for chunk in self.group(mfnodes, self._repo.manifest,
592 592 lookuplinknode, units=_('manifests')):
593 593 yield chunk
594 594
595 595 def _manifestsdone(self):
596 596 return ''
597 597
598 598 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
599 599 '''yield a sequence of changegroup chunks (strings)'''
600 600 repo = self._repo
601 601 cl = repo.changelog
602 602
603 603 clrevorder = {}
604 604 mfs = {} # needed manifests
605 605 fnodes = {} # needed file nodes
606 606 changedfiles = set()
607 607
608 608 # Callback for the changelog, used to collect changed files and manifest
609 609 # nodes.
610 610 # Returns the linkrev node (identity in the changelog case).
611 611 def lookupcl(x):
612 612 c = cl.read(x)
613 613 clrevorder[x] = len(clrevorder)
614 614 n = c[0]
615 615 # record the first changeset introducing this manifest version
616 616 mfs.setdefault(n, x)
617 617 # Record a complete list of potentially-changed files in
618 618 # this manifest.
619 619 changedfiles.update(c[3])
620 620 return x
621 621
622 622 self._verbosenote(_('uncompressed size of bundle content:\n'))
623 623 size = 0
624 624 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
625 625 size += len(chunk)
626 626 yield chunk
627 627 self._verbosenote(_('%8.i (changelog)\n') % size)
628 628
629 629 # We need to make sure that the linkrev in the changegroup refers to
630 630 # the first changeset that introduced the manifest or file revision.
631 631 # The fastpath is usually safer than the slowpath, because the filelogs
632 632 # are walked in revlog order.
633 633 #
634 634 # When taking the slowpath with reorder=None and the manifest revlog
635 635 # uses generaldelta, the manifest may be walked in the "wrong" order.
636 636 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
637 637 # cc0ff93d0c0c).
638 638 #
639 639 # When taking the fastpath, we are only vulnerable to reordering
640 640 # of the changelog itself. The changelog never uses generaldelta, so
641 641 # it is only reordered when reorder=True. To handle this case, we
642 642 # simply take the slowpath, which already has the 'clrevorder' logic.
643 643 # This was also fixed in cc0ff93d0c0c.
644 644 fastpathlinkrev = fastpathlinkrev and not self._reorder
645 645 # Treemanifests don't work correctly with fastpathlinkrev
646 646 # either, because we don't discover which directory nodes to
647 647 # send along with files. This could probably be fixed.
648 648 fastpathlinkrev = fastpathlinkrev and (
649 649 'treemanifest' not in repo.requirements)
650 650
651 651 for chunk in self.generatemanifests(commonrevs, clrevorder,
652 652 fastpathlinkrev, mfs, fnodes):
653 653 yield chunk
654 654 mfs.clear()
655 655 clrevs = set(cl.rev(x) for x in clnodes)
656 656
657 657 if not fastpathlinkrev:
658 658 def linknodes(unused, fname):
659 659 return fnodes.get(fname, {})
660 660 else:
661 661 cln = cl.node
662 662 def linknodes(filerevlog, fname):
663 663 llr = filerevlog.linkrev
664 664 fln = filerevlog.node
665 665 revs = ((r, llr(r)) for r in filerevlog)
666 666 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
667 667
668 668 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
669 669 source):
670 670 yield chunk
671 671
672 672 yield self.close()
673 673
674 674 if clnodes:
675 675 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
676 676
677 677 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
678 678 fnodes):
679 679 repo = self._repo
680 dirlog = repo.manifest.dirlog
680 mfl = repo.manifestlog
681 dirlog = mfl._revlog.dirlog
681 682 tmfnodes = {'': mfs}
682 683
683 684 # Callback for the manifest, used to collect linkrevs for filelog
684 685 # revisions.
685 686 # Returns the linkrev node (collected in lookupcl).
686 687 def makelookupmflinknode(dir):
687 688 if fastpathlinkrev:
688 689 assert not dir
689 690 return mfs.__getitem__
690 691
691 692 def lookupmflinknode(x):
692 693 """Callback for looking up the linknode for manifests.
693 694
694 695 Returns the linkrev node for the specified manifest.
695 696
696 697 SIDE EFFECT:
697 698
698 699 1) fclnodes gets populated with the list of relevant
699 700 file nodes if we're not using fastpathlinkrev
700 701 2) When treemanifests are in use, collects treemanifest nodes
701 702 to send
702 703
703 704 Note that this means manifests must be completely sent to
704 705 the client before you can trust the list of files and
705 706 treemanifests to send.
706 707 """
707 708 clnode = tmfnodes[dir][x]
708 mdata = dirlog(dir).readshallowfast(x)
709 mdata = mfl.get(dir, x).readfast(shallow=True)
709 710 for p, n, fl in mdata.iterentries():
710 711 if fl == 't': # subdirectory manifest
711 712 subdir = dir + p + '/'
712 713 tmfclnodes = tmfnodes.setdefault(subdir, {})
713 714 tmfclnode = tmfclnodes.setdefault(n, clnode)
714 715 if clrevorder[clnode] < clrevorder[tmfclnode]:
715 716 tmfclnodes[n] = clnode
716 717 else:
717 718 f = dir + p
718 719 fclnodes = fnodes.setdefault(f, {})
719 720 fclnode = fclnodes.setdefault(n, clnode)
720 721 if clrevorder[clnode] < clrevorder[fclnode]:
721 722 fclnodes[n] = clnode
722 723 return clnode
723 724 return lookupmflinknode
724 725
725 726 size = 0
726 727 while tmfnodes:
727 728 dir = min(tmfnodes)
728 729 nodes = tmfnodes[dir]
729 730 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
730 731 if not dir or prunednodes:
731 732 for x in self._packmanifests(dir, prunednodes,
732 733 makelookupmflinknode(dir)):
733 734 size += len(x)
734 735 yield x
735 736 del tmfnodes[dir]
736 737 self._verbosenote(_('%8.i (manifests)\n') % size)
737 738 yield self._manifestsdone()
738 739
739 740 # The 'source' parameter is useful for extensions
740 741 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
741 742 repo = self._repo
742 743 progress = self._progress
743 744 msgbundling = _('bundling')
744 745
745 746 total = len(changedfiles)
746 747 # for progress output
747 748 msgfiles = _('files')
748 749 for i, fname in enumerate(sorted(changedfiles)):
749 750 filerevlog = repo.file(fname)
750 751 if not filerevlog:
751 752 raise error.Abort(_("empty or missing revlog for %s") % fname)
752 753
753 754 linkrevnodes = linknodes(filerevlog, fname)
754 755 # Lookup for filenodes, we collected the linkrev nodes above in the
755 756 # fastpath case and with lookupmf in the slowpath case.
756 757 def lookupfilelog(x):
757 758 return linkrevnodes[x]
758 759
759 760 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
760 761 if filenodes:
761 762 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
762 763 total=total)
763 764 h = self.fileheader(fname)
764 765 size = len(h)
765 766 yield h
766 767 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
767 768 size += len(chunk)
768 769 yield chunk
769 770 self._verbosenote(_('%8.i %s\n') % (size, fname))
770 771 progress(msgbundling, None)
771 772
772 773 def deltaparent(self, revlog, rev, p1, p2, prev):
773 774 return prev
774 775
775 776 def revchunk(self, revlog, rev, prev, linknode):
776 777 node = revlog.node(rev)
777 778 p1, p2 = revlog.parentrevs(rev)
778 779 base = self.deltaparent(revlog, rev, p1, p2, prev)
779 780
780 781 prefix = ''
781 782 if revlog.iscensored(base) or revlog.iscensored(rev):
782 783 try:
783 784 delta = revlog.revision(node)
784 785 except error.CensoredNodeError as e:
785 786 delta = e.tombstone
786 787 if base == nullrev:
787 788 prefix = mdiff.trivialdiffheader(len(delta))
788 789 else:
789 790 baselen = revlog.rawsize(base)
790 791 prefix = mdiff.replacediffheader(baselen, len(delta))
791 792 elif base == nullrev:
792 793 delta = revlog.revision(node)
793 794 prefix = mdiff.trivialdiffheader(len(delta))
794 795 else:
795 796 delta = revlog.revdiff(base, rev)
796 797 p1n, p2n = revlog.parents(node)
797 798 basenode = revlog.node(base)
798 799 flags = revlog.flags(rev)
799 800 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
800 801 meta += prefix
801 802 l = len(meta) + len(delta)
802 803 yield chunkheader(l)
803 804 yield meta
804 805 yield delta
805 806 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
806 807 # do nothing with basenode, it is implicitly the previous one in HG10
807 808 # do nothing with flags, it is implicitly 0 for cg1 and cg2
808 809 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
809 810
810 811 class cg2packer(cg1packer):
811 812 version = '02'
812 813 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
813 814
814 815 def __init__(self, repo, bundlecaps=None):
815 816 super(cg2packer, self).__init__(repo, bundlecaps)
816 817 if self._reorder is None:
817 818 # Since generaldelta is directly supported by cg2, reordering
818 819 # generally doesn't help, so we disable it by default (treating
819 820 # bundle.reorder=auto just like bundle.reorder=False).
820 821 self._reorder = False
821 822
822 823 def deltaparent(self, revlog, rev, p1, p2, prev):
823 824 dp = revlog.deltaparent(rev)
824 825 if dp == nullrev and revlog.storedeltachains:
825 826 # Avoid sending full revisions when delta parent is null. Pick prev
826 827 # in that case. It's tempting to pick p1 in this case, as p1 will
827 828 # be smaller in the common case. However, computing a delta against
828 829 # p1 may require resolving the raw text of p1, which could be
829 830 # expensive. The revlog caches should have prev cached, meaning
830 831 # less CPU for changegroup generation. There is likely room to add
831 832 # a flag and/or config option to control this behavior.
832 833 return prev
833 834 elif dp == nullrev:
834 835 # revlog is configured to use full snapshot for a reason,
835 836 # stick to full snapshot.
836 837 return nullrev
837 838 elif dp not in (p1, p2, prev):
838 839 # Pick prev when we can't be sure remote has the base revision.
839 840 return prev
840 841 else:
841 842 return dp
842 843
843 844 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
844 845 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
845 846 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
846 847
847 848 class cg3packer(cg2packer):
848 849 version = '03'
849 850 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
850 851
851 852 def _packmanifests(self, dir, mfnodes, lookuplinknode):
852 853 if dir:
853 854 yield self.fileheader(dir)
854 855 for chunk in self.group(mfnodes, self._repo.manifest.dirlog(dir),
855 856 lookuplinknode, units=_('manifests')):
856 857 yield chunk
857 858
858 859 def _manifestsdone(self):
859 860 return self.close()
860 861
861 862 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
862 863 return struct.pack(
863 864 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
864 865
865 866 _packermap = {'01': (cg1packer, cg1unpacker),
866 867 # cg2 adds support for exchanging generaldelta
867 868 '02': (cg2packer, cg2unpacker),
868 869 # cg3 adds support for exchanging revlog flags and treemanifests
869 870 '03': (cg3packer, cg3unpacker),
870 871 }
871 872
872 873 def allsupportedversions(ui):
873 874 versions = set(_packermap.keys())
874 875 versions.discard('03')
875 876 if (ui.configbool('experimental', 'changegroup3') or
876 877 ui.configbool('experimental', 'treemanifest')):
877 878 versions.add('03')
878 879 return versions
879 880
880 881 # Changegroup versions that can be applied to the repo
881 882 def supportedincomingversions(repo):
882 883 versions = allsupportedversions(repo.ui)
883 884 if 'treemanifest' in repo.requirements:
884 885 versions.add('03')
885 886 return versions
886 887
887 888 # Changegroup versions that can be created from the repo
888 889 def supportedoutgoingversions(repo):
889 890 versions = allsupportedversions(repo.ui)
890 891 if 'treemanifest' in repo.requirements:
891 892 # Versions 01 and 02 support only flat manifests and it's just too
892 893 # expensive to convert between the flat manifest and tree manifest on
893 894 # the fly. Since tree manifests are hashed differently, all of history
894 895 # would have to be converted. Instead, we simply don't even pretend to
895 896 # support versions 01 and 02.
896 897 versions.discard('01')
897 898 versions.discard('02')
898 899 versions.add('03')
899 900 return versions
900 901
901 902 def safeversion(repo):
902 903 # Finds the smallest version that it's safe to assume clients of the repo
903 904 # will support. For example, all hg versions that support generaldelta also
904 905 # support changegroup 02.
905 906 versions = supportedoutgoingversions(repo)
906 907 if 'generaldelta' in repo.requirements:
907 908 versions.discard('01')
908 909 assert versions
909 910 return min(versions)
910 911
911 912 def getbundler(version, repo, bundlecaps=None):
912 913 assert version in supportedoutgoingversions(repo)
913 914 return _packermap[version][0](repo, bundlecaps)
914 915
915 916 def getunbundler(version, fh, alg, extras=None):
916 917 return _packermap[version][1](fh, alg, extras=extras)
917 918
918 919 def _changegroupinfo(repo, nodes, source):
919 920 if repo.ui.verbose or source == 'bundle':
920 921 repo.ui.status(_("%d changesets found\n") % len(nodes))
921 922 if repo.ui.debugflag:
922 923 repo.ui.debug("list of changesets:\n")
923 924 for node in nodes:
924 925 repo.ui.debug("%s\n" % hex(node))
925 926
926 927 def getsubsetraw(repo, outgoing, bundler, source, fastpath=False):
927 928 repo = repo.unfiltered()
928 929 commonrevs = outgoing.common
929 930 csets = outgoing.missing
930 931 heads = outgoing.missingheads
931 932 # We go through the fast path if we get told to, or if all (unfiltered
932 933 # heads have been requested (since we then know there all linkrevs will
933 934 # be pulled by the client).
934 935 heads.sort()
935 936 fastpathlinkrev = fastpath or (
936 937 repo.filtername is None and heads == sorted(repo.heads()))
937 938
938 939 repo.hook('preoutgoing', throw=True, source=source)
939 940 _changegroupinfo(repo, csets, source)
940 941 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
941 942
942 943 def getsubset(repo, outgoing, bundler, source, fastpath=False):
943 944 gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath)
944 945 return getunbundler(bundler.version, util.chunkbuffer(gengroup), None,
945 946 {'clcount': len(outgoing.missing)})
946 947
947 948 def changegroupsubset(repo, roots, heads, source, version='01'):
948 949 """Compute a changegroup consisting of all the nodes that are
949 950 descendants of any of the roots and ancestors of any of the heads.
950 951 Return a chunkbuffer object whose read() method will return
951 952 successive changegroup chunks.
952 953
953 954 It is fairly complex as determining which filenodes and which
954 955 manifest nodes need to be included for the changeset to be complete
955 956 is non-trivial.
956 957
957 958 Another wrinkle is doing the reverse, figuring out which changeset in
958 959 the changegroup a particular filenode or manifestnode belongs to.
959 960 """
960 961 outgoing = discovery.outgoing(repo, missingroots=roots, missingheads=heads)
961 962 bundler = getbundler(version, repo)
962 963 return getsubset(repo, outgoing, bundler, source)
963 964
964 965 def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None,
965 966 version='01'):
966 967 """Like getbundle, but taking a discovery.outgoing as an argument.
967 968
968 969 This is only implemented for local repos and reuses potentially
969 970 precomputed sets in outgoing. Returns a raw changegroup generator."""
970 971 if not outgoing.missing:
971 972 return None
972 973 bundler = getbundler(version, repo, bundlecaps)
973 974 return getsubsetraw(repo, outgoing, bundler, source)
974 975
975 976 def getlocalchangegroup(repo, source, outgoing, bundlecaps=None,
976 977 version='01'):
977 978 """Like getbundle, but taking a discovery.outgoing as an argument.
978 979
979 980 This is only implemented for local repos and reuses potentially
980 981 precomputed sets in outgoing."""
981 982 if not outgoing.missing:
982 983 return None
983 984 bundler = getbundler(version, repo, bundlecaps)
984 985 return getsubset(repo, outgoing, bundler, source)
985 986
986 987 def getchangegroup(repo, source, outgoing, bundlecaps=None,
987 988 version='01'):
988 989 """Like changegroupsubset, but returns the set difference between the
989 990 ancestors of heads and the ancestors common.
990 991
991 992 If heads is None, use the local heads. If common is None, use [nullid].
992 993
993 994 The nodes in common might not all be known locally due to the way the
994 995 current discovery protocol works.
995 996 """
996 997 return getlocalchangegroup(repo, source, outgoing, bundlecaps=bundlecaps,
997 998 version=version)
998 999
999 1000 def changegroup(repo, basenodes, source):
1000 1001 # to avoid a race we use changegroupsubset() (issue1320)
1001 1002 return changegroupsubset(repo, basenodes, repo.heads(), source)
1002 1003
1003 1004 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1004 1005 revisions = 0
1005 1006 files = 0
1006 1007 for chunkdata in iter(source.filelogheader, {}):
1007 1008 files += 1
1008 1009 f = chunkdata["filename"]
1009 1010 repo.ui.debug("adding %s revisions\n" % f)
1010 1011 repo.ui.progress(_('files'), files, unit=_('files'),
1011 1012 total=expectedfiles)
1012 1013 fl = repo.file(f)
1013 1014 o = len(fl)
1014 1015 try:
1015 1016 if not fl.addgroup(source, revmap, trp):
1016 1017 raise error.Abort(_("received file revlog group is empty"))
1017 1018 except error.CensoredBaseError as e:
1018 1019 raise error.Abort(_("received delta base is censored: %s") % e)
1019 1020 revisions += len(fl) - o
1020 1021 if f in needfiles:
1021 1022 needs = needfiles[f]
1022 1023 for new in xrange(o, len(fl)):
1023 1024 n = fl.node(new)
1024 1025 if n in needs:
1025 1026 needs.remove(n)
1026 1027 else:
1027 1028 raise error.Abort(
1028 1029 _("received spurious file revlog entry"))
1029 1030 if not needs:
1030 1031 del needfiles[f]
1031 1032 repo.ui.progress(_('files'), None)
1032 1033
1033 1034 for f, needs in needfiles.iteritems():
1034 1035 fl = repo.file(f)
1035 1036 for n in needs:
1036 1037 try:
1037 1038 fl.rev(n)
1038 1039 except error.LookupError:
1039 1040 raise error.Abort(
1040 1041 _('missing file data for %s:%s - run hg verify') %
1041 1042 (f, hex(n)))
1042 1043
1043 1044 return revisions, files
@@ -1,1579 +1,1576 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import heapq
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 mdiff,
19 19 parsers,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 propertycache = util.propertycache
25 25
26 26 def _parsev1(data):
27 27 # This method does a little bit of excessive-looking
28 28 # precondition checking. This is so that the behavior of this
29 29 # class exactly matches its C counterpart to try and help
30 30 # prevent surprise breakage for anyone that develops against
31 31 # the pure version.
32 32 if data and data[-1] != '\n':
33 33 raise ValueError('Manifest did not end in a newline.')
34 34 prev = None
35 35 for l in data.splitlines():
36 36 if prev is not None and prev > l:
37 37 raise ValueError('Manifest lines not in sorted order.')
38 38 prev = l
39 39 f, n = l.split('\0')
40 40 if len(n) > 40:
41 41 yield f, revlog.bin(n[:40]), n[40:]
42 42 else:
43 43 yield f, revlog.bin(n), ''
44 44
45 45 def _parsev2(data):
46 46 metadataend = data.find('\n')
47 47 # Just ignore metadata for now
48 48 pos = metadataend + 1
49 49 prevf = ''
50 50 while pos < len(data):
51 51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 52 if end == -1:
53 53 raise ValueError('Manifest ended with incomplete file entry.')
54 54 stemlen = ord(data[pos])
55 55 items = data[pos + 1:end].split('\0')
56 56 f = prevf[:stemlen] + items[0]
57 57 if prevf > f:
58 58 raise ValueError('Manifest entries not in sorted order.')
59 59 fl = items[1]
60 60 # Just ignore metadata (items[2:] for now)
61 61 n = data[end + 1:end + 21]
62 62 yield f, n, fl
63 63 pos = end + 22
64 64 prevf = f
65 65
66 66 def _parse(data):
67 67 """Generates (path, node, flags) tuples from a manifest text"""
68 68 if data.startswith('\0'):
69 69 return iter(_parsev2(data))
70 70 else:
71 71 return iter(_parsev1(data))
72 72
73 73 def _text(it, usemanifestv2):
74 74 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 75 text"""
76 76 if usemanifestv2:
77 77 return _textv2(it)
78 78 else:
79 79 return _textv1(it)
80 80
81 81 def _textv1(it):
82 82 files = []
83 83 lines = []
84 84 _hex = revlog.hex
85 85 for f, n, fl in it:
86 86 files.append(f)
87 87 # if this is changed to support newlines in filenames,
88 88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 90
91 91 _checkforbidden(files)
92 92 return ''.join(lines)
93 93
94 94 def _textv2(it):
95 95 files = []
96 96 lines = ['\0\n']
97 97 prevf = ''
98 98 for f, n, fl in it:
99 99 files.append(f)
100 100 stem = os.path.commonprefix([prevf, f])
101 101 stemlen = min(len(stem), 255)
102 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 103 prevf = f
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 107 class lazymanifestiter(object):
108 108 def __init__(self, lm):
109 109 self.pos = 0
110 110 self.lm = lm
111 111
112 112 def __iter__(self):
113 113 return self
114 114
115 115 def next(self):
116 116 try:
117 117 data, pos = self.lm._get(self.pos)
118 118 except IndexError:
119 119 raise StopIteration
120 120 if pos == -1:
121 121 self.pos += 1
122 122 return data[0]
123 123 self.pos += 1
124 124 zeropos = data.find('\x00', pos)
125 125 return data[pos:zeropos]
126 126
127 127 class lazymanifestiterentries(object):
128 128 def __init__(self, lm):
129 129 self.lm = lm
130 130 self.pos = 0
131 131
132 132 def __iter__(self):
133 133 return self
134 134
135 135 def next(self):
136 136 try:
137 137 data, pos = self.lm._get(self.pos)
138 138 except IndexError:
139 139 raise StopIteration
140 140 if pos == -1:
141 141 self.pos += 1
142 142 return data
143 143 zeropos = data.find('\x00', pos)
144 144 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
145 145 zeropos + 1, 40)
146 146 flags = self.lm._getflags(data, self.pos, zeropos)
147 147 self.pos += 1
148 148 return (data[pos:zeropos], hashval, flags)
149 149
150 150 def unhexlify(data, extra, pos, length):
151 151 s = data[pos:pos + length].decode('hex')
152 152 if extra:
153 153 s += chr(extra & 0xff)
154 154 return s
155 155
156 156 def _cmp(a, b):
157 157 return (a > b) - (a < b)
158 158
159 159 class _lazymanifest(object):
160 160 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
161 161 if positions is None:
162 162 self.positions = self.findlines(data)
163 163 self.extrainfo = [0] * len(self.positions)
164 164 self.data = data
165 165 self.extradata = []
166 166 else:
167 167 self.positions = positions[:]
168 168 self.extrainfo = extrainfo[:]
169 169 self.extradata = extradata[:]
170 170 self.data = data
171 171
172 172 def findlines(self, data):
173 173 if not data:
174 174 return []
175 175 pos = data.find("\n")
176 176 if pos == -1 or data[-1] != '\n':
177 177 raise ValueError("Manifest did not end in a newline.")
178 178 positions = [0]
179 179 prev = data[:data.find('\x00')]
180 180 while pos < len(data) - 1 and pos != -1:
181 181 positions.append(pos + 1)
182 182 nexts = data[pos + 1:data.find('\x00', pos + 1)]
183 183 if nexts < prev:
184 184 raise ValueError("Manifest lines not in sorted order.")
185 185 prev = nexts
186 186 pos = data.find("\n", pos + 1)
187 187 return positions
188 188
189 189 def _get(self, index):
190 190 # get the position encoded in pos:
191 191 # positive number is an index in 'data'
192 192 # negative number is in extrapieces
193 193 pos = self.positions[index]
194 194 if pos >= 0:
195 195 return self.data, pos
196 196 return self.extradata[-pos - 1], -1
197 197
198 198 def _getkey(self, pos):
199 199 if pos >= 0:
200 200 return self.data[pos:self.data.find('\x00', pos + 1)]
201 201 return self.extradata[-pos - 1][0]
202 202
203 203 def bsearch(self, key):
204 204 first = 0
205 205 last = len(self.positions) - 1
206 206
207 207 while first <= last:
208 208 midpoint = (first + last)//2
209 209 nextpos = self.positions[midpoint]
210 210 candidate = self._getkey(nextpos)
211 211 r = _cmp(key, candidate)
212 212 if r == 0:
213 213 return midpoint
214 214 else:
215 215 if r < 0:
216 216 last = midpoint - 1
217 217 else:
218 218 first = midpoint + 1
219 219 return -1
220 220
221 221 def bsearch2(self, key):
222 222 # same as the above, but will always return the position
223 223 # done for performance reasons
224 224 first = 0
225 225 last = len(self.positions) - 1
226 226
227 227 while first <= last:
228 228 midpoint = (first + last)//2
229 229 nextpos = self.positions[midpoint]
230 230 candidate = self._getkey(nextpos)
231 231 r = _cmp(key, candidate)
232 232 if r == 0:
233 233 return (midpoint, True)
234 234 else:
235 235 if r < 0:
236 236 last = midpoint - 1
237 237 else:
238 238 first = midpoint + 1
239 239 return (first, False)
240 240
241 241 def __contains__(self, key):
242 242 return self.bsearch(key) != -1
243 243
244 244 def _getflags(self, data, needle, pos):
245 245 start = pos + 41
246 246 end = data.find("\n", start)
247 247 if end == -1:
248 248 end = len(data) - 1
249 249 if start == end:
250 250 return ''
251 251 return self.data[start:end]
252 252
253 253 def __getitem__(self, key):
254 254 if not isinstance(key, str):
255 255 raise TypeError("getitem: manifest keys must be a string.")
256 256 needle = self.bsearch(key)
257 257 if needle == -1:
258 258 raise KeyError
259 259 data, pos = self._get(needle)
260 260 if pos == -1:
261 261 return (data[1], data[2])
262 262 zeropos = data.find('\x00', pos)
263 263 assert 0 <= needle <= len(self.positions)
264 264 assert len(self.extrainfo) == len(self.positions)
265 265 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
266 266 flags = self._getflags(data, needle, zeropos)
267 267 return (hashval, flags)
268 268
269 269 def __delitem__(self, key):
270 270 needle, found = self.bsearch2(key)
271 271 if not found:
272 272 raise KeyError
273 273 cur = self.positions[needle]
274 274 self.positions = self.positions[:needle] + self.positions[needle + 1:]
275 275 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
276 276 if cur >= 0:
277 277 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
278 278
279 279 def __setitem__(self, key, value):
280 280 if not isinstance(key, str):
281 281 raise TypeError("setitem: manifest keys must be a string.")
282 282 if not isinstance(value, tuple) or len(value) != 2:
283 283 raise TypeError("Manifest values must be a tuple of (node, flags).")
284 284 hashval = value[0]
285 285 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
286 286 raise TypeError("node must be a 20-byte string")
287 287 flags = value[1]
288 288 if len(hashval) == 22:
289 289 hashval = hashval[:-1]
290 290 if not isinstance(flags, str) or len(flags) > 1:
291 291 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
292 292 needle, found = self.bsearch2(key)
293 293 if found:
294 294 # put the item
295 295 pos = self.positions[needle]
296 296 if pos < 0:
297 297 self.extradata[-pos - 1] = (key, hashval, value[1])
298 298 else:
299 299 # just don't bother
300 300 self.extradata.append((key, hashval, value[1]))
301 301 self.positions[needle] = -len(self.extradata)
302 302 else:
303 303 # not found, put it in with extra positions
304 304 self.extradata.append((key, hashval, value[1]))
305 305 self.positions = (self.positions[:needle] + [-len(self.extradata)]
306 306 + self.positions[needle:])
307 307 self.extrainfo = (self.extrainfo[:needle] + [0] +
308 308 self.extrainfo[needle:])
309 309
310 310 def copy(self):
311 311 # XXX call _compact like in C?
312 312 return _lazymanifest(self.data, self.positions, self.extrainfo,
313 313 self.extradata)
314 314
315 315 def _compact(self):
316 316 # hopefully not called TOO often
317 317 if len(self.extradata) == 0:
318 318 return
319 319 l = []
320 320 last_cut = 0
321 321 i = 0
322 322 offset = 0
323 323 self.extrainfo = [0] * len(self.positions)
324 324 while i < len(self.positions):
325 325 if self.positions[i] >= 0:
326 326 cur = self.positions[i]
327 327 last_cut = cur
328 328 while True:
329 329 self.positions[i] = offset
330 330 i += 1
331 331 if i == len(self.positions) or self.positions[i] < 0:
332 332 break
333 333 offset += self.positions[i] - cur
334 334 cur = self.positions[i]
335 335 end_cut = self.data.find('\n', cur)
336 336 if end_cut != -1:
337 337 end_cut += 1
338 338 offset += end_cut - cur
339 339 l.append(self.data[last_cut:end_cut])
340 340 else:
341 341 while i < len(self.positions) and self.positions[i] < 0:
342 342 cur = self.positions[i]
343 343 t = self.extradata[-cur - 1]
344 344 l.append(self._pack(t))
345 345 self.positions[i] = offset
346 346 if len(t[1]) > 20:
347 347 self.extrainfo[i] = ord(t[1][21])
348 348 offset += len(l[-1])
349 349 i += 1
350 350 self.data = ''.join(l)
351 351 self.extradata = []
352 352
353 353 def _pack(self, d):
354 354 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
355 355
356 356 def text(self):
357 357 self._compact()
358 358 return self.data
359 359
360 360 def diff(self, m2, clean=False):
361 361 '''Finds changes between the current manifest and m2.'''
362 362 # XXX think whether efficiency matters here
363 363 diff = {}
364 364
365 365 for fn, e1, flags in self.iterentries():
366 366 if fn not in m2:
367 367 diff[fn] = (e1, flags), (None, '')
368 368 else:
369 369 e2 = m2[fn]
370 370 if (e1, flags) != e2:
371 371 diff[fn] = (e1, flags), e2
372 372 elif clean:
373 373 diff[fn] = None
374 374
375 375 for fn, e2, flags in m2.iterentries():
376 376 if fn not in self:
377 377 diff[fn] = (None, ''), (e2, flags)
378 378
379 379 return diff
380 380
381 381 def iterentries(self):
382 382 return lazymanifestiterentries(self)
383 383
384 384 def iterkeys(self):
385 385 return lazymanifestiter(self)
386 386
387 387 def __iter__(self):
388 388 return lazymanifestiter(self)
389 389
390 390 def __len__(self):
391 391 return len(self.positions)
392 392
393 393 def filtercopy(self, filterfn):
394 394 # XXX should be optimized
395 395 c = _lazymanifest('')
396 396 for f, n, fl in self.iterentries():
397 397 if filterfn(f):
398 398 c[f] = n, fl
399 399 return c
400 400
401 401 try:
402 402 _lazymanifest = parsers.lazymanifest
403 403 except AttributeError:
404 404 pass
405 405
406 406 class manifestdict(object):
407 407 def __init__(self, data=''):
408 408 if data.startswith('\0'):
409 409 #_lazymanifest can not parse v2
410 410 self._lm = _lazymanifest('')
411 411 for f, n, fl in _parsev2(data):
412 412 self._lm[f] = n, fl
413 413 else:
414 414 self._lm = _lazymanifest(data)
415 415
416 416 def __getitem__(self, key):
417 417 return self._lm[key][0]
418 418
419 419 def find(self, key):
420 420 return self._lm[key]
421 421
422 422 def __len__(self):
423 423 return len(self._lm)
424 424
425 425 def __setitem__(self, key, node):
426 426 self._lm[key] = node, self.flags(key, '')
427 427
428 428 def __contains__(self, key):
429 429 return key in self._lm
430 430
431 431 def __delitem__(self, key):
432 432 del self._lm[key]
433 433
434 434 def __iter__(self):
435 435 return self._lm.__iter__()
436 436
437 437 def iterkeys(self):
438 438 return self._lm.iterkeys()
439 439
440 440 def keys(self):
441 441 return list(self.iterkeys())
442 442
443 443 def filesnotin(self, m2):
444 444 '''Set of files in this manifest that are not in the other'''
445 445 diff = self.diff(m2)
446 446 files = set(filepath
447 447 for filepath, hashflags in diff.iteritems()
448 448 if hashflags[1][0] is None)
449 449 return files
450 450
451 451 @propertycache
452 452 def _dirs(self):
453 453 return util.dirs(self)
454 454
455 455 def dirs(self):
456 456 return self._dirs
457 457
458 458 def hasdir(self, dir):
459 459 return dir in self._dirs
460 460
461 461 def _filesfastpath(self, match):
462 462 '''Checks whether we can correctly and quickly iterate over matcher
463 463 files instead of over manifest files.'''
464 464 files = match.files()
465 465 return (len(files) < 100 and (match.isexact() or
466 466 (match.prefix() and all(fn in self for fn in files))))
467 467
468 468 def walk(self, match):
469 469 '''Generates matching file names.
470 470
471 471 Equivalent to manifest.matches(match).iterkeys(), but without creating
472 472 an entirely new manifest.
473 473
474 474 It also reports nonexistent files by marking them bad with match.bad().
475 475 '''
476 476 if match.always():
477 477 for f in iter(self):
478 478 yield f
479 479 return
480 480
481 481 fset = set(match.files())
482 482
483 483 # avoid the entire walk if we're only looking for specific files
484 484 if self._filesfastpath(match):
485 485 for fn in sorted(fset):
486 486 yield fn
487 487 return
488 488
489 489 for fn in self:
490 490 if fn in fset:
491 491 # specified pattern is the exact name
492 492 fset.remove(fn)
493 493 if match(fn):
494 494 yield fn
495 495
496 496 # for dirstate.walk, files=['.'] means "walk the whole tree".
497 497 # follow that here, too
498 498 fset.discard('.')
499 499
500 500 for fn in sorted(fset):
501 501 if not self.hasdir(fn):
502 502 match.bad(fn, None)
503 503
504 504 def matches(self, match):
505 505 '''generate a new manifest filtered by the match argument'''
506 506 if match.always():
507 507 return self.copy()
508 508
509 509 if self._filesfastpath(match):
510 510 m = manifestdict()
511 511 lm = self._lm
512 512 for fn in match.files():
513 513 if fn in lm:
514 514 m._lm[fn] = lm[fn]
515 515 return m
516 516
517 517 m = manifestdict()
518 518 m._lm = self._lm.filtercopy(match)
519 519 return m
520 520
521 521 def diff(self, m2, clean=False):
522 522 '''Finds changes between the current manifest and m2.
523 523
524 524 Args:
525 525 m2: the manifest to which this manifest should be compared.
526 526 clean: if true, include files unchanged between these manifests
527 527 with a None value in the returned dictionary.
528 528
529 529 The result is returned as a dict with filename as key and
530 530 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
531 531 nodeid in the current/other manifest and fl1/fl2 is the flag
532 532 in the current/other manifest. Where the file does not exist,
533 533 the nodeid will be None and the flags will be the empty
534 534 string.
535 535 '''
536 536 return self._lm.diff(m2._lm, clean)
537 537
538 538 def setflag(self, key, flag):
539 539 self._lm[key] = self[key], flag
540 540
541 541 def get(self, key, default=None):
542 542 try:
543 543 return self._lm[key][0]
544 544 except KeyError:
545 545 return default
546 546
547 547 def flags(self, key, default=''):
548 548 try:
549 549 return self._lm[key][1]
550 550 except KeyError:
551 551 return default
552 552
553 553 def copy(self):
554 554 c = manifestdict()
555 555 c._lm = self._lm.copy()
556 556 return c
557 557
558 558 def iteritems(self):
559 559 return (x[:2] for x in self._lm.iterentries())
560 560
561 561 def iterentries(self):
562 562 return self._lm.iterentries()
563 563
564 564 def text(self, usemanifestv2=False):
565 565 if usemanifestv2:
566 566 return _textv2(self._lm.iterentries())
567 567 else:
568 568 # use (probably) native version for v1
569 569 return self._lm.text()
570 570
571 571 def fastdelta(self, base, changes):
572 572 """Given a base manifest text as an array.array and a list of changes
573 573 relative to that text, compute a delta that can be used by revlog.
574 574 """
575 575 delta = []
576 576 dstart = None
577 577 dend = None
578 578 dline = [""]
579 579 start = 0
580 580 # zero copy representation of base as a buffer
581 581 addbuf = util.buffer(base)
582 582
583 583 changes = list(changes)
584 584 if len(changes) < 1000:
585 585 # start with a readonly loop that finds the offset of
586 586 # each line and creates the deltas
587 587 for f, todelete in changes:
588 588 # bs will either be the index of the item or the insert point
589 589 start, end = _msearch(addbuf, f, start)
590 590 if not todelete:
591 591 h, fl = self._lm[f]
592 592 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
593 593 else:
594 594 if start == end:
595 595 # item we want to delete was not found, error out
596 596 raise AssertionError(
597 597 _("failed to remove %s from manifest") % f)
598 598 l = ""
599 599 if dstart is not None and dstart <= start and dend >= start:
600 600 if dend < end:
601 601 dend = end
602 602 if l:
603 603 dline.append(l)
604 604 else:
605 605 if dstart is not None:
606 606 delta.append([dstart, dend, "".join(dline)])
607 607 dstart = start
608 608 dend = end
609 609 dline = [l]
610 610
611 611 if dstart is not None:
612 612 delta.append([dstart, dend, "".join(dline)])
613 613 # apply the delta to the base, and get a delta for addrevision
614 614 deltatext, arraytext = _addlistdelta(base, delta)
615 615 else:
616 616 # For large changes, it's much cheaper to just build the text and
617 617 # diff it.
618 618 arraytext = array.array('c', self.text())
619 619 deltatext = mdiff.textdiff(base, arraytext)
620 620
621 621 return arraytext, deltatext
622 622
623 623 def _msearch(m, s, lo=0, hi=None):
624 624 '''return a tuple (start, end) that says where to find s within m.
625 625
626 626 If the string is found m[start:end] are the line containing
627 627 that string. If start == end the string was not found and
628 628 they indicate the proper sorted insertion point.
629 629
630 630 m should be a buffer or a string
631 631 s is a string'''
632 632 def advance(i, c):
633 633 while i < lenm and m[i] != c:
634 634 i += 1
635 635 return i
636 636 if not s:
637 637 return (lo, lo)
638 638 lenm = len(m)
639 639 if not hi:
640 640 hi = lenm
641 641 while lo < hi:
642 642 mid = (lo + hi) // 2
643 643 start = mid
644 644 while start > 0 and m[start - 1] != '\n':
645 645 start -= 1
646 646 end = advance(start, '\0')
647 647 if m[start:end] < s:
648 648 # we know that after the null there are 40 bytes of sha1
649 649 # this translates to the bisect lo = mid + 1
650 650 lo = advance(end + 40, '\n') + 1
651 651 else:
652 652 # this translates to the bisect hi = mid
653 653 hi = start
654 654 end = advance(lo, '\0')
655 655 found = m[lo:end]
656 656 if s == found:
657 657 # we know that after the null there are 40 bytes of sha1
658 658 end = advance(end + 40, '\n')
659 659 return (lo, end + 1)
660 660 else:
661 661 return (lo, lo)
662 662
663 663 def _checkforbidden(l):
664 664 """Check filenames for illegal characters."""
665 665 for f in l:
666 666 if '\n' in f or '\r' in f:
667 667 raise error.RevlogError(
668 668 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
669 669
670 670
671 671 # apply the changes collected during the bisect loop to our addlist
672 672 # return a delta suitable for addrevision
673 673 def _addlistdelta(addlist, x):
674 674 # for large addlist arrays, building a new array is cheaper
675 675 # than repeatedly modifying the existing one
676 676 currentposition = 0
677 677 newaddlist = array.array('c')
678 678
679 679 for start, end, content in x:
680 680 newaddlist += addlist[currentposition:start]
681 681 if content:
682 682 newaddlist += array.array('c', content)
683 683
684 684 currentposition = end
685 685
686 686 newaddlist += addlist[currentposition:]
687 687
688 688 deltatext = "".join(struct.pack(">lll", start, end, len(content))
689 689 + content for start, end, content in x)
690 690 return deltatext, newaddlist
691 691
692 692 def _splittopdir(f):
693 693 if '/' in f:
694 694 dir, subpath = f.split('/', 1)
695 695 return dir + '/', subpath
696 696 else:
697 697 return '', f
698 698
699 699 _noop = lambda s: None
700 700
701 701 class treemanifest(object):
702 702 def __init__(self, dir='', text=''):
703 703 self._dir = dir
704 704 self._node = revlog.nullid
705 705 self._loadfunc = _noop
706 706 self._copyfunc = _noop
707 707 self._dirty = False
708 708 self._dirs = {}
709 709 # Using _lazymanifest here is a little slower than plain old dicts
710 710 self._files = {}
711 711 self._flags = {}
712 712 if text:
713 713 def readsubtree(subdir, subm):
714 714 raise AssertionError('treemanifest constructor only accepts '
715 715 'flat manifests')
716 716 self.parse(text, readsubtree)
717 717 self._dirty = True # Mark flat manifest dirty after parsing
718 718
719 719 def _subpath(self, path):
720 720 return self._dir + path
721 721
722 722 def __len__(self):
723 723 self._load()
724 724 size = len(self._files)
725 725 for m in self._dirs.values():
726 726 size += m.__len__()
727 727 return size
728 728
729 729 def _isempty(self):
730 730 self._load() # for consistency; already loaded by all callers
731 731 return (not self._files and (not self._dirs or
732 732 all(m._isempty() for m in self._dirs.values())))
733 733
734 734 def __repr__(self):
735 735 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
736 736 (self._dir, revlog.hex(self._node),
737 737 bool(self._loadfunc is _noop),
738 738 self._dirty, id(self)))
739 739
740 740 def dir(self):
741 741 '''The directory that this tree manifest represents, including a
742 742 trailing '/'. Empty string for the repo root directory.'''
743 743 return self._dir
744 744
745 745 def node(self):
746 746 '''This node of this instance. nullid for unsaved instances. Should
747 747 be updated when the instance is read or written from a revlog.
748 748 '''
749 749 assert not self._dirty
750 750 return self._node
751 751
752 752 def setnode(self, node):
753 753 self._node = node
754 754 self._dirty = False
755 755
756 756 def iterentries(self):
757 757 self._load()
758 758 for p, n in sorted(self._dirs.items() + self._files.items()):
759 759 if p in self._files:
760 760 yield self._subpath(p), n, self._flags.get(p, '')
761 761 else:
762 762 for x in n.iterentries():
763 763 yield x
764 764
765 765 def iteritems(self):
766 766 self._load()
767 767 for p, n in sorted(self._dirs.items() + self._files.items()):
768 768 if p in self._files:
769 769 yield self._subpath(p), n
770 770 else:
771 771 for f, sn in n.iteritems():
772 772 yield f, sn
773 773
774 774 def iterkeys(self):
775 775 self._load()
776 776 for p in sorted(self._dirs.keys() + self._files.keys()):
777 777 if p in self._files:
778 778 yield self._subpath(p)
779 779 else:
780 780 for f in self._dirs[p].iterkeys():
781 781 yield f
782 782
783 783 def keys(self):
784 784 return list(self.iterkeys())
785 785
786 786 def __iter__(self):
787 787 return self.iterkeys()
788 788
789 789 def __contains__(self, f):
790 790 if f is None:
791 791 return False
792 792 self._load()
793 793 dir, subpath = _splittopdir(f)
794 794 if dir:
795 795 if dir not in self._dirs:
796 796 return False
797 797 return self._dirs[dir].__contains__(subpath)
798 798 else:
799 799 return f in self._files
800 800
801 801 def get(self, f, default=None):
802 802 self._load()
803 803 dir, subpath = _splittopdir(f)
804 804 if dir:
805 805 if dir not in self._dirs:
806 806 return default
807 807 return self._dirs[dir].get(subpath, default)
808 808 else:
809 809 return self._files.get(f, default)
810 810
811 811 def __getitem__(self, f):
812 812 self._load()
813 813 dir, subpath = _splittopdir(f)
814 814 if dir:
815 815 return self._dirs[dir].__getitem__(subpath)
816 816 else:
817 817 return self._files[f]
818 818
819 819 def flags(self, f):
820 820 self._load()
821 821 dir, subpath = _splittopdir(f)
822 822 if dir:
823 823 if dir not in self._dirs:
824 824 return ''
825 825 return self._dirs[dir].flags(subpath)
826 826 else:
827 827 if f in self._dirs:
828 828 return ''
829 829 return self._flags.get(f, '')
830 830
831 831 def find(self, f):
832 832 self._load()
833 833 dir, subpath = _splittopdir(f)
834 834 if dir:
835 835 return self._dirs[dir].find(subpath)
836 836 else:
837 837 return self._files[f], self._flags.get(f, '')
838 838
839 839 def __delitem__(self, f):
840 840 self._load()
841 841 dir, subpath = _splittopdir(f)
842 842 if dir:
843 843 self._dirs[dir].__delitem__(subpath)
844 844 # If the directory is now empty, remove it
845 845 if self._dirs[dir]._isempty():
846 846 del self._dirs[dir]
847 847 else:
848 848 del self._files[f]
849 849 if f in self._flags:
850 850 del self._flags[f]
851 851 self._dirty = True
852 852
853 853 def __setitem__(self, f, n):
854 854 assert n is not None
855 855 self._load()
856 856 dir, subpath = _splittopdir(f)
857 857 if dir:
858 858 if dir not in self._dirs:
859 859 self._dirs[dir] = treemanifest(self._subpath(dir))
860 860 self._dirs[dir].__setitem__(subpath, n)
861 861 else:
862 862 self._files[f] = n[:21] # to match manifestdict's behavior
863 863 self._dirty = True
864 864
865 865 def _load(self):
866 866 if self._loadfunc is not _noop:
867 867 lf, self._loadfunc = self._loadfunc, _noop
868 868 lf(self)
869 869 elif self._copyfunc is not _noop:
870 870 cf, self._copyfunc = self._copyfunc, _noop
871 871 cf(self)
872 872
873 873 def setflag(self, f, flags):
874 874 """Set the flags (symlink, executable) for path f."""
875 875 self._load()
876 876 dir, subpath = _splittopdir(f)
877 877 if dir:
878 878 if dir not in self._dirs:
879 879 self._dirs[dir] = treemanifest(self._subpath(dir))
880 880 self._dirs[dir].setflag(subpath, flags)
881 881 else:
882 882 self._flags[f] = flags
883 883 self._dirty = True
884 884
885 885 def copy(self):
886 886 copy = treemanifest(self._dir)
887 887 copy._node = self._node
888 888 copy._dirty = self._dirty
889 889 if self._copyfunc is _noop:
890 890 def _copyfunc(s):
891 891 self._load()
892 892 for d in self._dirs:
893 893 s._dirs[d] = self._dirs[d].copy()
894 894 s._files = dict.copy(self._files)
895 895 s._flags = dict.copy(self._flags)
896 896 if self._loadfunc is _noop:
897 897 _copyfunc(copy)
898 898 else:
899 899 copy._copyfunc = _copyfunc
900 900 else:
901 901 copy._copyfunc = self._copyfunc
902 902 return copy
903 903
904 904 def filesnotin(self, m2):
905 905 '''Set of files in this manifest that are not in the other'''
906 906 files = set()
907 907 def _filesnotin(t1, t2):
908 908 if t1._node == t2._node and not t1._dirty and not t2._dirty:
909 909 return
910 910 t1._load()
911 911 t2._load()
912 912 for d, m1 in t1._dirs.iteritems():
913 913 if d in t2._dirs:
914 914 m2 = t2._dirs[d]
915 915 _filesnotin(m1, m2)
916 916 else:
917 917 files.update(m1.iterkeys())
918 918
919 919 for fn in t1._files.iterkeys():
920 920 if fn not in t2._files:
921 921 files.add(t1._subpath(fn))
922 922
923 923 _filesnotin(self, m2)
924 924 return files
925 925
926 926 @propertycache
927 927 def _alldirs(self):
928 928 return util.dirs(self)
929 929
930 930 def dirs(self):
931 931 return self._alldirs
932 932
933 933 def hasdir(self, dir):
934 934 self._load()
935 935 topdir, subdir = _splittopdir(dir)
936 936 if topdir:
937 937 if topdir in self._dirs:
938 938 return self._dirs[topdir].hasdir(subdir)
939 939 return False
940 940 return (dir + '/') in self._dirs
941 941
942 942 def walk(self, match):
943 943 '''Generates matching file names.
944 944
945 945 Equivalent to manifest.matches(match).iterkeys(), but without creating
946 946 an entirely new manifest.
947 947
948 948 It also reports nonexistent files by marking them bad with match.bad().
949 949 '''
950 950 if match.always():
951 951 for f in iter(self):
952 952 yield f
953 953 return
954 954
955 955 fset = set(match.files())
956 956
957 957 for fn in self._walk(match):
958 958 if fn in fset:
959 959 # specified pattern is the exact name
960 960 fset.remove(fn)
961 961 yield fn
962 962
963 963 # for dirstate.walk, files=['.'] means "walk the whole tree".
964 964 # follow that here, too
965 965 fset.discard('.')
966 966
967 967 for fn in sorted(fset):
968 968 if not self.hasdir(fn):
969 969 match.bad(fn, None)
970 970
971 971 def _walk(self, match):
972 972 '''Recursively generates matching file names for walk().'''
973 973 if not match.visitdir(self._dir[:-1] or '.'):
974 974 return
975 975
976 976 # yield this dir's files and walk its submanifests
977 977 self._load()
978 978 for p in sorted(self._dirs.keys() + self._files.keys()):
979 979 if p in self._files:
980 980 fullp = self._subpath(p)
981 981 if match(fullp):
982 982 yield fullp
983 983 else:
984 984 for f in self._dirs[p]._walk(match):
985 985 yield f
986 986
987 987 def matches(self, match):
988 988 '''generate a new manifest filtered by the match argument'''
989 989 if match.always():
990 990 return self.copy()
991 991
992 992 return self._matches(match)
993 993
994 994 def _matches(self, match):
995 995 '''recursively generate a new manifest filtered by the match argument.
996 996 '''
997 997
998 998 visit = match.visitdir(self._dir[:-1] or '.')
999 999 if visit == 'all':
1000 1000 return self.copy()
1001 1001 ret = treemanifest(self._dir)
1002 1002 if not visit:
1003 1003 return ret
1004 1004
1005 1005 self._load()
1006 1006 for fn in self._files:
1007 1007 fullp = self._subpath(fn)
1008 1008 if not match(fullp):
1009 1009 continue
1010 1010 ret._files[fn] = self._files[fn]
1011 1011 if fn in self._flags:
1012 1012 ret._flags[fn] = self._flags[fn]
1013 1013
1014 1014 for dir, subm in self._dirs.iteritems():
1015 1015 m = subm._matches(match)
1016 1016 if not m._isempty():
1017 1017 ret._dirs[dir] = m
1018 1018
1019 1019 if not ret._isempty():
1020 1020 ret._dirty = True
1021 1021 return ret
1022 1022
1023 1023 def diff(self, m2, clean=False):
1024 1024 '''Finds changes between the current manifest and m2.
1025 1025
1026 1026 Args:
1027 1027 m2: the manifest to which this manifest should be compared.
1028 1028 clean: if true, include files unchanged between these manifests
1029 1029 with a None value in the returned dictionary.
1030 1030
1031 1031 The result is returned as a dict with filename as key and
1032 1032 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1033 1033 nodeid in the current/other manifest and fl1/fl2 is the flag
1034 1034 in the current/other manifest. Where the file does not exist,
1035 1035 the nodeid will be None and the flags will be the empty
1036 1036 string.
1037 1037 '''
1038 1038 result = {}
1039 1039 emptytree = treemanifest()
1040 1040 def _diff(t1, t2):
1041 1041 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1042 1042 return
1043 1043 t1._load()
1044 1044 t2._load()
1045 1045 for d, m1 in t1._dirs.iteritems():
1046 1046 m2 = t2._dirs.get(d, emptytree)
1047 1047 _diff(m1, m2)
1048 1048
1049 1049 for d, m2 in t2._dirs.iteritems():
1050 1050 if d not in t1._dirs:
1051 1051 _diff(emptytree, m2)
1052 1052
1053 1053 for fn, n1 in t1._files.iteritems():
1054 1054 fl1 = t1._flags.get(fn, '')
1055 1055 n2 = t2._files.get(fn, None)
1056 1056 fl2 = t2._flags.get(fn, '')
1057 1057 if n1 != n2 or fl1 != fl2:
1058 1058 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1059 1059 elif clean:
1060 1060 result[t1._subpath(fn)] = None
1061 1061
1062 1062 for fn, n2 in t2._files.iteritems():
1063 1063 if fn not in t1._files:
1064 1064 fl2 = t2._flags.get(fn, '')
1065 1065 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1066 1066
1067 1067 _diff(self, m2)
1068 1068 return result
1069 1069
1070 1070 def unmodifiedsince(self, m2):
1071 1071 return not self._dirty and not m2._dirty and self._node == m2._node
1072 1072
1073 1073 def parse(self, text, readsubtree):
1074 1074 for f, n, fl in _parse(text):
1075 1075 if fl == 't':
1076 1076 f = f + '/'
1077 1077 self._dirs[f] = readsubtree(self._subpath(f), n)
1078 1078 elif '/' in f:
1079 1079 # This is a flat manifest, so use __setitem__ and setflag rather
1080 1080 # than assigning directly to _files and _flags, so we can
1081 1081 # assign a path in a subdirectory, and to mark dirty (compared
1082 1082 # to nullid).
1083 1083 self[f] = n
1084 1084 if fl:
1085 1085 self.setflag(f, fl)
1086 1086 else:
1087 1087 # Assigning to _files and _flags avoids marking as dirty,
1088 1088 # and should be a little faster.
1089 1089 self._files[f] = n
1090 1090 if fl:
1091 1091 self._flags[f] = fl
1092 1092
1093 1093 def text(self, usemanifestv2=False):
1094 1094 """Get the full data of this manifest as a bytestring."""
1095 1095 self._load()
1096 1096 return _text(self.iterentries(), usemanifestv2)
1097 1097
1098 1098 def dirtext(self, usemanifestv2=False):
1099 1099 """Get the full data of this directory as a bytestring. Make sure that
1100 1100 any submanifests have been written first, so their nodeids are correct.
1101 1101 """
1102 1102 self._load()
1103 1103 flags = self.flags
1104 1104 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1105 1105 files = [(f, self._files[f], flags(f)) for f in self._files]
1106 1106 return _text(sorted(dirs + files), usemanifestv2)
1107 1107
1108 1108 def read(self, gettext, readsubtree):
1109 1109 def _load_for_read(s):
1110 1110 s.parse(gettext(), readsubtree)
1111 1111 s._dirty = False
1112 1112 self._loadfunc = _load_for_read
1113 1113
1114 1114 def writesubtrees(self, m1, m2, writesubtree):
1115 1115 self._load() # for consistency; should never have any effect here
1116 1116 m1._load()
1117 1117 m2._load()
1118 1118 emptytree = treemanifest()
1119 1119 for d, subm in self._dirs.iteritems():
1120 1120 subp1 = m1._dirs.get(d, emptytree)._node
1121 1121 subp2 = m2._dirs.get(d, emptytree)._node
1122 1122 if subp1 == revlog.nullid:
1123 1123 subp1, subp2 = subp2, subp1
1124 1124 writesubtree(subm, subp1, subp2)
1125 1125
1126 1126 class manifestrevlog(revlog.revlog):
1127 1127 '''A revlog that stores manifest texts. This is responsible for caching the
1128 1128 full-text manifest contents.
1129 1129 '''
1130 1130 def __init__(self, opener, dir='', dirlogcache=None):
1131 1131 # During normal operations, we expect to deal with not more than four
1132 1132 # revs at a time (such as during commit --amend). When rebasing large
1133 1133 # stacks of commits, the number can go up, hence the config knob below.
1134 1134 cachesize = 4
1135 1135 usetreemanifest = False
1136 1136 usemanifestv2 = False
1137 1137 opts = getattr(opener, 'options', None)
1138 1138 if opts is not None:
1139 1139 cachesize = opts.get('manifestcachesize', cachesize)
1140 1140 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1141 1141 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
1142 1142
1143 1143 self._treeondisk = usetreemanifest
1144 1144 self._usemanifestv2 = usemanifestv2
1145 1145
1146 1146 self._fulltextcache = util.lrucachedict(cachesize)
1147 1147
1148 1148 indexfile = "00manifest.i"
1149 1149 if dir:
1150 1150 assert self._treeondisk, 'opts is %r' % opts
1151 1151 if not dir.endswith('/'):
1152 1152 dir = dir + '/'
1153 1153 indexfile = "meta/" + dir + "00manifest.i"
1154 1154 self._dir = dir
1155 1155 # The dirlogcache is kept on the root manifest log
1156 1156 if dir:
1157 1157 self._dirlogcache = dirlogcache
1158 1158 else:
1159 1159 self._dirlogcache = {'': self}
1160 1160
1161 1161 super(manifestrevlog, self).__init__(opener, indexfile,
1162 1162 checkambig=bool(dir))
1163 1163
1164 1164 @property
1165 1165 def fulltextcache(self):
1166 1166 return self._fulltextcache
1167 1167
1168 1168 def clearcaches(self):
1169 1169 super(manifestrevlog, self).clearcaches()
1170 1170 self._fulltextcache.clear()
1171 1171 self._dirlogcache = {'': self}
1172 1172
1173 1173 def dirlog(self, dir):
1174 1174 if dir:
1175 1175 assert self._treeondisk
1176 1176 if dir not in self._dirlogcache:
1177 1177 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
1178 1178 self._dirlogcache)
1179 1179 return self._dirlogcache[dir]
1180 1180
1181 1181 def add(self, m, transaction, link, p1, p2, added, removed):
1182 1182 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
1183 1183 and not self._usemanifestv2):
1184 1184 # If our first parent is in the manifest cache, we can
1185 1185 # compute a delta here using properties we know about the
1186 1186 # manifest up-front, which may save time later for the
1187 1187 # revlog layer.
1188 1188
1189 1189 _checkforbidden(added)
1190 1190 # combine the changed lists into one sorted iterator
1191 1191 work = heapq.merge([(x, False) for x in added],
1192 1192 [(x, True) for x in removed])
1193 1193
1194 1194 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1195 1195 cachedelta = self.rev(p1), deltatext
1196 1196 text = util.buffer(arraytext)
1197 1197 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
1198 1198 else:
1199 1199 # The first parent manifest isn't already loaded, so we'll
1200 1200 # just encode a fulltext of the manifest and pass that
1201 1201 # through to the revlog layer, and let it handle the delta
1202 1202 # process.
1203 1203 if self._treeondisk:
1204 1204 m1 = self.read(p1)
1205 1205 m2 = self.read(p2)
1206 1206 n = self._addtree(m, transaction, link, m1, m2)
1207 1207 arraytext = None
1208 1208 else:
1209 1209 text = m.text(self._usemanifestv2)
1210 1210 n = self.addrevision(text, transaction, link, p1, p2)
1211 1211 arraytext = array.array('c', text)
1212 1212
1213 1213 if arraytext is not None:
1214 1214 self.fulltextcache[n] = arraytext
1215 1215
1216 1216 return n
1217 1217
1218 1218 def _addtree(self, m, transaction, link, m1, m2):
1219 1219 # If the manifest is unchanged compared to one parent,
1220 1220 # don't write a new revision
1221 1221 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
1222 1222 return m.node()
1223 1223 def writesubtree(subm, subp1, subp2):
1224 1224 sublog = self.dirlog(subm.dir())
1225 1225 sublog.add(subm, transaction, link, subp1, subp2, None, None)
1226 1226 m.writesubtrees(m1, m2, writesubtree)
1227 1227 text = m.dirtext(self._usemanifestv2)
1228 1228 # Double-check whether contents are unchanged to one parent
1229 1229 if text == m1.dirtext(self._usemanifestv2):
1230 1230 n = m1.node()
1231 1231 elif text == m2.dirtext(self._usemanifestv2):
1232 1232 n = m2.node()
1233 1233 else:
1234 1234 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1235 1235 # Save nodeid so parent manifest can calculate its nodeid
1236 1236 m.setnode(n)
1237 1237 return n
1238 1238
1239 1239 class manifestlog(object):
1240 1240 """A collection class representing the collection of manifest snapshots
1241 1241 referenced by commits in the repository.
1242 1242
1243 1243 In this situation, 'manifest' refers to the abstract concept of a snapshot
1244 1244 of the list of files in the given commit. Consumers of the output of this
1245 1245 class do not care about the implementation details of the actual manifests
1246 1246 they receive (i.e. tree or flat or lazily loaded, etc)."""
1247 1247 def __init__(self, opener, repo):
1248 1248 self._repo = repo
1249 1249
1250 1250 usetreemanifest = False
1251 1251
1252 1252 opts = getattr(opener, 'options', None)
1253 1253 if opts is not None:
1254 1254 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1255 1255 self._treeinmem = usetreemanifest
1256 1256
1257 1257 self._oldmanifest = repo._constructmanifest()
1258 1258 self._revlog = self._oldmanifest
1259 1259
1260 1260 # A cache of the manifestctx or treemanifestctx for each directory
1261 1261 self._dirmancache = {}
1262 1262
1263 1263 # We'll separate this into it's own cache once oldmanifest is no longer
1264 1264 # used
1265 1265 self._mancache = self._oldmanifest._mancache
1266 1266 self._dirmancache[''] = self._mancache
1267 1267
1268 1268 # A future patch makes this use the same config value as the existing
1269 1269 # mancache
1270 1270 self.cachesize = 4
1271 1271
1272 1272 def __getitem__(self, node):
1273 1273 """Retrieves the manifest instance for the given node. Throws a
1274 1274 LookupError if not found.
1275 1275 """
1276 1276 return self.get('', node)
1277 1277
1278 1278 def get(self, dir, node):
1279 1279 """Retrieves the manifest instance for the given node. Throws a
1280 1280 LookupError if not found.
1281 1281 """
1282 1282 if node in self._dirmancache.get(dir, ()):
1283 1283 cachemf = self._dirmancache[dir][node]
1284 1284 # The old manifest may put non-ctx manifests in the cache, so
1285 1285 # skip those since they don't implement the full api.
1286 1286 if (isinstance(cachemf, manifestctx) or
1287 1287 isinstance(cachemf, treemanifestctx)):
1288 1288 return cachemf
1289 1289
1290 1290 if dir:
1291 1291 if self._revlog._treeondisk:
1292 1292 dirlog = self._revlog.dirlog(dir)
1293 1293 if node not in dirlog.nodemap:
1294 1294 raise LookupError(node, dirlog.indexfile,
1295 1295 _('no node'))
1296 1296 m = treemanifestctx(self._repo, dir, node)
1297 1297 else:
1298 1298 raise error.Abort(
1299 1299 _("cannot ask for manifest directory '%s' in a flat "
1300 1300 "manifest") % dir)
1301 1301 else:
1302 1302 if node not in self._revlog.nodemap:
1303 1303 raise LookupError(node, self._revlog.indexfile,
1304 1304 _('no node'))
1305 1305 if self._treeinmem:
1306 1306 m = treemanifestctx(self._repo, '', node)
1307 1307 else:
1308 1308 m = manifestctx(self._repo, node)
1309 1309
1310 1310 if node != revlog.nullid:
1311 1311 mancache = self._dirmancache.get(dir)
1312 1312 if not mancache:
1313 1313 mancache = util.lrucachedict(self.cachesize)
1314 1314 self._dirmancache[dir] = mancache
1315 1315 mancache[node] = m
1316 1316 return m
1317 1317
1318 1318 def add(self, m, transaction, link, p1, p2, added, removed):
1319 1319 return self._revlog.add(m, transaction, link, p1, p2, added, removed)
1320 1320
1321 1321 class manifestctx(object):
1322 1322 """A class representing a single revision of a manifest, including its
1323 1323 contents, its parent revs, and its linkrev.
1324 1324 """
1325 1325 def __init__(self, repo, node):
1326 1326 self._repo = repo
1327 1327 self._data = None
1328 1328
1329 1329 self._node = node
1330 1330
1331 1331 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1332 1332 # but let's add it later when something needs it and we can load it
1333 1333 # lazily.
1334 1334 #self.p1, self.p2 = revlog.parents(node)
1335 1335 #rev = revlog.rev(node)
1336 1336 #self.linkrev = revlog.linkrev(rev)
1337 1337
1338 1338 def node(self):
1339 1339 return self._node
1340 1340
1341 1341 def read(self):
1342 1342 if not self._data:
1343 1343 if self._node == revlog.nullid:
1344 1344 self._data = manifestdict()
1345 1345 else:
1346 1346 rl = self._repo.manifestlog._revlog
1347 1347 text = rl.revision(self._node)
1348 1348 arraytext = array.array('c', text)
1349 1349 rl._fulltextcache[self._node] = arraytext
1350 1350 self._data = manifestdict(text)
1351 1351 return self._data
1352 1352
1353 1353 def readfast(self, shallow=False):
1354 '''Calls either readdelta or read, based on which would be less work.
1355 readdelta is called if the delta is against the p1, and therefore can be
1356 read quickly.
1357
1358 If `shallow` is True, nothing changes since this is a flat manifest.
1359 '''
1354 1360 rl = self._repo.manifestlog._revlog
1355 1361 r = rl.rev(self._node)
1356 1362 deltaparent = rl.deltaparent(r)
1357 1363 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1358 1364 return self.readdelta()
1359 1365 return self.read()
1360 1366
1361 1367 def readdelta(self, shallow=False):
1362 1368 revlog = self._repo.manifestlog._revlog
1363 1369 if revlog._usemanifestv2:
1364 1370 # Need to perform a slow delta
1365 1371 r0 = revlog.deltaparent(revlog.rev(self._node))
1366 1372 m0 = manifestctx(self._repo, revlog.node(r0)).read()
1367 1373 m1 = self.read()
1368 1374 md = manifestdict()
1369 1375 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1370 1376 if n1:
1371 1377 md[f] = n1
1372 1378 if fl1:
1373 1379 md.setflag(f, fl1)
1374 1380 return md
1375 1381
1376 1382 r = revlog.rev(self._node)
1377 1383 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1378 1384 return manifestdict(d)
1379 1385
1380 1386 class treemanifestctx(object):
1381 1387 def __init__(self, repo, dir, node):
1382 1388 self._repo = repo
1383 1389 self._dir = dir
1384 1390 self._data = None
1385 1391
1386 1392 self._node = node
1387 1393
1388 1394 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1389 1395 # we can instantiate treemanifestctx objects for directories we don't
1390 1396 # have on disk.
1391 1397 #self.p1, self.p2 = revlog.parents(node)
1392 1398 #rev = revlog.rev(node)
1393 1399 #self.linkrev = revlog.linkrev(rev)
1394 1400
1395 1401 def _revlog(self):
1396 1402 return self._repo.manifestlog._revlog.dirlog(self._dir)
1397 1403
1398 1404 def read(self):
1399 1405 if not self._data:
1400 1406 rl = self._revlog()
1401 1407 if self._node == revlog.nullid:
1402 1408 self._data = treemanifest()
1403 1409 elif rl._treeondisk:
1404 1410 m = treemanifest(dir=self._dir)
1405 1411 def gettext():
1406 1412 return rl.revision(self._node)
1407 1413 def readsubtree(dir, subm):
1408 1414 return treemanifestctx(self._repo, dir, subm).read()
1409 1415 m.read(gettext, readsubtree)
1410 1416 m.setnode(self._node)
1411 1417 self._data = m
1412 1418 else:
1413 1419 text = revlog.revision(self._node)
1414 1420 arraytext = array.array('c', text)
1415 1421 rl.fulltextcache[self._node] = arraytext
1416 1422 self._data = treemanifest(dir=self._dir, text=text)
1417 1423
1418 1424 return self._data
1419 1425
1420 1426 def node(self):
1421 1427 return self._node
1422 1428
1423 1429 def readdelta(self, shallow=False):
1424 1430 revlog = self._revlog()
1425 1431 if shallow and not revlog._usemanifestv2:
1426 1432 r = revlog.rev(self._node)
1427 1433 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1428 1434 return manifestdict(d)
1429 1435 else:
1430 1436 # Need to perform a slow delta
1431 1437 r0 = revlog.deltaparent(revlog.rev(self._node))
1432 1438 m0 = treemanifestctx(self._repo, self._dir, revlog.node(r0)).read()
1433 1439 m1 = self.read()
1434 1440 md = treemanifest(dir=self._dir)
1435 1441 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1436 1442 if n1:
1437 1443 md[f] = n1
1438 1444 if fl1:
1439 1445 md.setflag(f, fl1)
1440 1446 return md
1441 1447
1442 1448 def readfast(self, shallow=False):
1449 '''Calls either readdelta or read, based on which would be less work.
1450 readdelta is called if the delta is against the p1, and therefore can be
1451 read quickly.
1452
1453 If `shallow` is True, it only returns the entries from this manifest,
1454 and not any submanifests.
1455 '''
1443 1456 rl = self._revlog()
1444 1457 r = rl.rev(self._node)
1445 1458 deltaparent = rl.deltaparent(r)
1446 1459 if (deltaparent != revlog.nullrev and
1447 1460 deltaparent in rl.parentrevs(r)):
1448 1461 return self.readdelta(shallow=shallow)
1449 1462
1450 1463 if shallow:
1451 1464 return manifestdict(rl.revision(self._node))
1452 1465 else:
1453 1466 return self.read()
1454 1467
1455 1468 class manifest(manifestrevlog):
1456 1469 def __init__(self, opener, dir='', dirlogcache=None):
1457 1470 '''The 'dir' and 'dirlogcache' arguments are for internal use by
1458 1471 manifest.manifest only. External users should create a root manifest
1459 1472 log with manifest.manifest(opener) and call dirlog() on it.
1460 1473 '''
1461 1474 # During normal operations, we expect to deal with not more than four
1462 1475 # revs at a time (such as during commit --amend). When rebasing large
1463 1476 # stacks of commits, the number can go up, hence the config knob below.
1464 1477 cachesize = 4
1465 1478 usetreemanifest = False
1466 1479 opts = getattr(opener, 'options', None)
1467 1480 if opts is not None:
1468 1481 cachesize = opts.get('manifestcachesize', cachesize)
1469 1482 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1470 1483 self._mancache = util.lrucachedict(cachesize)
1471 1484 self._treeinmem = usetreemanifest
1472 1485 super(manifest, self).__init__(opener, dir=dir, dirlogcache=dirlogcache)
1473 1486
1474 1487 def _newmanifest(self, data=''):
1475 1488 if self._treeinmem:
1476 1489 return treemanifest(self._dir, data)
1477 1490 return manifestdict(data)
1478 1491
1479 1492 def dirlog(self, dir):
1480 1493 """This overrides the base revlog implementation to allow construction
1481 1494 'manifest' types instead of manifestrevlog types. This is only needed
1482 1495 until we migrate off the 'manifest' type."""
1483 1496 if dir:
1484 1497 assert self._treeondisk
1485 1498 if dir not in self._dirlogcache:
1486 1499 self._dirlogcache[dir] = manifest(self.opener, dir,
1487 1500 self._dirlogcache)
1488 1501 return self._dirlogcache[dir]
1489 1502
1490 1503 def _slowreaddelta(self, node):
1491 1504 r0 = self.deltaparent(self.rev(node))
1492 1505 m0 = self.read(self.node(r0))
1493 1506 m1 = self.read(node)
1494 1507 md = self._newmanifest()
1495 1508 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1496 1509 if n1:
1497 1510 md[f] = n1
1498 1511 if fl1:
1499 1512 md.setflag(f, fl1)
1500 1513 return md
1501 1514
1502 1515 def readdelta(self, node):
1503 1516 if self._usemanifestv2 or self._treeondisk:
1504 1517 return self._slowreaddelta(node)
1505 1518 r = self.rev(node)
1506 1519 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1507 1520 return self._newmanifest(d)
1508 1521
1509 1522 def readshallowdelta(self, node):
1510 1523 '''For flat manifests, this is the same as readdelta(). For
1511 1524 treemanifests, this will read the delta for this revlog's directory,
1512 1525 without recursively reading subdirectory manifests. Instead, any
1513 1526 subdirectory entry will be reported as it appears in the manifests, i.e.
1514 1527 the subdirectory will be reported among files and distinguished only by
1515 1528 its 't' flag.'''
1516 1529 if not self._treeondisk:
1517 1530 return self.readdelta(node)
1518 1531 if self._usemanifestv2:
1519 1532 raise error.Abort(
1520 1533 _("readshallowdelta() not implemented for manifestv2"))
1521 1534 r = self.rev(node)
1522 1535 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1523 1536 return manifestdict(d)
1524 1537
1525 def readshallowfast(self, node):
1526 '''like readfast(), but calls readshallowdelta() instead of readdelta()
1527 '''
1528 r = self.rev(node)
1529 deltaparent = self.deltaparent(r)
1530 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1531 return self.readshallowdelta(node)
1532 return self.readshallow(node)
1533
1534 1538 def read(self, node):
1535 1539 if node == revlog.nullid:
1536 1540 return self._newmanifest() # don't upset local cache
1537 1541 if node in self._mancache:
1538 1542 cached = self._mancache[node]
1539 1543 if (isinstance(cached, manifestctx) or
1540 1544 isinstance(cached, treemanifestctx)):
1541 1545 cached = cached.read()
1542 1546 return cached
1543 1547 if self._treeondisk:
1544 1548 def gettext():
1545 1549 return self.revision(node)
1546 1550 def readsubtree(dir, subm):
1547 1551 return self.dirlog(dir).read(subm)
1548 1552 m = self._newmanifest()
1549 1553 m.read(gettext, readsubtree)
1550 1554 m.setnode(node)
1551 1555 arraytext = None
1552 1556 else:
1553 1557 text = self.revision(node)
1554 1558 m = self._newmanifest(text)
1555 1559 arraytext = array.array('c', text)
1556 1560 self._mancache[node] = m
1557 1561 if arraytext is not None:
1558 1562 self.fulltextcache[node] = arraytext
1559 1563 return m
1560 1564
1561 def readshallow(self, node):
1562 '''Reads the manifest in this directory. When using flat manifests,
1563 this manifest will generally have files in subdirectories in it. Does
1564 not cache the manifest as the callers generally do not read the same
1565 version twice.'''
1566 return manifestdict(self.revision(node))
1567
1568 1565 def find(self, node, f):
1569 1566 '''look up entry for a single file efficiently.
1570 1567 return (node, flags) pair if found, (None, None) if not.'''
1571 1568 m = self.read(node)
1572 1569 try:
1573 1570 return m.find(f)
1574 1571 except KeyError:
1575 1572 return None, None
1576 1573
1577 1574 def clearcaches(self):
1578 1575 super(manifest, self).clearcaches()
1579 1576 self._mancache.clear()
General Comments 0
You need to be logged in to leave comments. Login now