##// END OF EJS Templates
treemanifest: speed up diff by keeping track of dirty nodes...
Martin von Zweigbergk -
r25220:f0fbd88b default
parent child Browse files
Show More
@@ -1,947 +1,969 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11 import os
12 12
13 13 propertycache = util.propertycache
14 14
15 15 def _parsev1(data):
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 34 def _parsev2(data):
35 35 metadataend = data.find('\n')
36 36 # Just ignore metadata for now
37 37 pos = metadataend + 1
38 38 prevf = ''
39 39 while pos < len(data):
40 40 end = data.find('\n', pos + 1) # +1 to skip stem length byte
41 41 if end == -1:
42 42 raise ValueError('Manifest ended with incomplete file entry.')
43 43 stemlen = ord(data[pos])
44 44 items = data[pos + 1:end].split('\0')
45 45 f = prevf[:stemlen] + items[0]
46 46 if prevf > f:
47 47 raise ValueError('Manifest entries not in sorted order.')
48 48 fl = items[1]
49 49 # Just ignore metadata (items[2:] for now)
50 50 n = data[end + 1:end + 21]
51 51 yield f, n, fl
52 52 pos = end + 22
53 53 prevf = f
54 54
55 55 def _parse(data):
56 56 """Generates (path, node, flags) tuples from a manifest text"""
57 57 if data.startswith('\0'):
58 58 return iter(_parsev2(data))
59 59 else:
60 60 return iter(_parsev1(data))
61 61
62 62 def _text(it, usemanifestv2):
63 63 """Given an iterator over (path, node, flags) tuples, returns a manifest
64 64 text"""
65 65 if usemanifestv2:
66 66 return _textv2(it)
67 67 else:
68 68 return _textv1(it)
69 69
70 70 def _textv1(it):
71 71 files = []
72 72 lines = []
73 73 _hex = revlog.hex
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return ''.join(lines)
82 82
83 83 def _textv2(it):
84 84 files = []
85 85 lines = ['\0\n']
86 86 prevf = ''
87 87 for f, n, fl in it:
88 88 files.append(f)
89 89 stem = os.path.commonprefix([prevf, f])
90 90 stemlen = min(len(stem), 255)
91 91 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
92 92 prevf = f
93 93 _checkforbidden(files)
94 94 return ''.join(lines)
95 95
96 96 class _lazymanifest(dict):
97 97 """This is the pure implementation of lazymanifest.
98 98
99 99 It has not been optimized *at all* and is not lazy.
100 100 """
101 101
102 102 def __init__(self, data):
103 103 dict.__init__(self)
104 104 for f, n, fl in _parse(data):
105 105 self[f] = n, fl
106 106
107 107 def __setitem__(self, k, v):
108 108 node, flag = v
109 109 assert node is not None
110 110 if len(node) > 21:
111 111 node = node[:21] # match c implementation behavior
112 112 dict.__setitem__(self, k, (node, flag))
113 113
114 114 def __iter__(self):
115 115 return iter(sorted(dict.keys(self)))
116 116
117 117 def iterkeys(self):
118 118 return iter(sorted(dict.keys(self)))
119 119
120 120 def iterentries(self):
121 121 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
122 122
123 123 def copy(self):
124 124 c = _lazymanifest('')
125 125 c.update(self)
126 126 return c
127 127
128 128 def diff(self, m2, clean=False):
129 129 '''Finds changes between the current manifest and m2.'''
130 130 diff = {}
131 131
132 132 for fn, e1 in self.iteritems():
133 133 if fn not in m2:
134 134 diff[fn] = e1, (None, '')
135 135 else:
136 136 e2 = m2[fn]
137 137 if e1 != e2:
138 138 diff[fn] = e1, e2
139 139 elif clean:
140 140 diff[fn] = None
141 141
142 142 for fn, e2 in m2.iteritems():
143 143 if fn not in self:
144 144 diff[fn] = (None, ''), e2
145 145
146 146 return diff
147 147
148 148 def filtercopy(self, filterfn):
149 149 c = _lazymanifest('')
150 150 for f, n, fl in self.iterentries():
151 151 if filterfn(f):
152 152 c[f] = n, fl
153 153 return c
154 154
155 155 def text(self):
156 156 """Get the full data of this manifest as a bytestring."""
157 157 return _textv1(self.iterentries())
158 158
159 159 try:
160 160 _lazymanifest = parsers.lazymanifest
161 161 except AttributeError:
162 162 pass
163 163
164 164 class manifestdict(object):
165 165 def __init__(self, data=''):
166 166 if data.startswith('\0'):
167 167 #_lazymanifest can not parse v2
168 168 self._lm = _lazymanifest('')
169 169 for f, n, fl in _parsev2(data):
170 170 self._lm[f] = n, fl
171 171 else:
172 172 self._lm = _lazymanifest(data)
173 173
174 174 def __getitem__(self, key):
175 175 return self._lm[key][0]
176 176
177 177 def find(self, key):
178 178 return self._lm[key]
179 179
180 180 def __len__(self):
181 181 return len(self._lm)
182 182
183 183 def __setitem__(self, key, node):
184 184 self._lm[key] = node, self.flags(key, '')
185 185
186 186 def __contains__(self, key):
187 187 return key in self._lm
188 188
189 189 def __delitem__(self, key):
190 190 del self._lm[key]
191 191
192 192 def __iter__(self):
193 193 return self._lm.__iter__()
194 194
195 195 def iterkeys(self):
196 196 return self._lm.iterkeys()
197 197
198 198 def keys(self):
199 199 return list(self.iterkeys())
200 200
201 201 def filesnotin(self, m2):
202 202 '''Set of files in this manifest that are not in the other'''
203 203 files = set(self)
204 204 files.difference_update(m2)
205 205 return files
206 206
207 207 @propertycache
208 208 def _dirs(self):
209 209 return util.dirs(self)
210 210
211 211 def dirs(self):
212 212 return self._dirs
213 213
214 214 def hasdir(self, dir):
215 215 return dir in self._dirs
216 216
217 217 def _filesfastpath(self, match):
218 218 '''Checks whether we can correctly and quickly iterate over matcher
219 219 files instead of over manifest files.'''
220 220 files = match.files()
221 221 return (len(files) < 100 and (match.isexact() or
222 222 (not match.anypats() and all(fn in self for fn in files))))
223 223
224 224 def walk(self, match):
225 225 '''Generates matching file names.
226 226
227 227 Equivalent to manifest.matches(match).iterkeys(), but without creating
228 228 an entirely new manifest.
229 229
230 230 It also reports nonexistent files by marking them bad with match.bad().
231 231 '''
232 232 if match.always():
233 233 for f in iter(self):
234 234 yield f
235 235 return
236 236
237 237 fset = set(match.files())
238 238
239 239 # avoid the entire walk if we're only looking for specific files
240 240 if self._filesfastpath(match):
241 241 for fn in sorted(fset):
242 242 yield fn
243 243 return
244 244
245 245 for fn in self:
246 246 if fn in fset:
247 247 # specified pattern is the exact name
248 248 fset.remove(fn)
249 249 if match(fn):
250 250 yield fn
251 251
252 252 # for dirstate.walk, files=['.'] means "walk the whole tree".
253 253 # follow that here, too
254 254 fset.discard('.')
255 255
256 256 for fn in sorted(fset):
257 257 if not self.hasdir(fn):
258 258 match.bad(fn, None)
259 259
260 260 def matches(self, match):
261 261 '''generate a new manifest filtered by the match argument'''
262 262 if match.always():
263 263 return self.copy()
264 264
265 265 if self._filesfastpath(match):
266 266 m = manifestdict()
267 267 lm = self._lm
268 268 for fn in match.files():
269 269 if fn in lm:
270 270 m._lm[fn] = lm[fn]
271 271 return m
272 272
273 273 m = manifestdict()
274 274 m._lm = self._lm.filtercopy(match)
275 275 return m
276 276
277 277 def diff(self, m2, clean=False):
278 278 '''Finds changes between the current manifest and m2.
279 279
280 280 Args:
281 281 m2: the manifest to which this manifest should be compared.
282 282 clean: if true, include files unchanged between these manifests
283 283 with a None value in the returned dictionary.
284 284
285 285 The result is returned as a dict with filename as key and
286 286 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
287 287 nodeid in the current/other manifest and fl1/fl2 is the flag
288 288 in the current/other manifest. Where the file does not exist,
289 289 the nodeid will be None and the flags will be the empty
290 290 string.
291 291 '''
292 292 return self._lm.diff(m2._lm, clean)
293 293
294 294 def setflag(self, key, flag):
295 295 self._lm[key] = self[key], flag
296 296
297 297 def get(self, key, default=None):
298 298 try:
299 299 return self._lm[key][0]
300 300 except KeyError:
301 301 return default
302 302
303 303 def flags(self, key, default=''):
304 304 try:
305 305 return self._lm[key][1]
306 306 except KeyError:
307 307 return default
308 308
309 309 def copy(self):
310 310 c = manifestdict()
311 311 c._lm = self._lm.copy()
312 312 return c
313 313
314 314 def iteritems(self):
315 315 return (x[:2] for x in self._lm.iterentries())
316 316
317 317 def text(self, usemanifestv2=False):
318 318 if usemanifestv2:
319 319 return _textv2(self._lm.iterentries())
320 320 else:
321 321 # use (probably) native version for v1
322 322 return self._lm.text()
323 323
324 324 def fastdelta(self, base, changes):
325 325 """Given a base manifest text as an array.array and a list of changes
326 326 relative to that text, compute a delta that can be used by revlog.
327 327 """
328 328 delta = []
329 329 dstart = None
330 330 dend = None
331 331 dline = [""]
332 332 start = 0
333 333 # zero copy representation of base as a buffer
334 334 addbuf = util.buffer(base)
335 335
336 336 # start with a readonly loop that finds the offset of
337 337 # each line and creates the deltas
338 338 for f, todelete in changes:
339 339 # bs will either be the index of the item or the insert point
340 340 start, end = _msearch(addbuf, f, start)
341 341 if not todelete:
342 342 h, fl = self._lm[f]
343 343 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
344 344 else:
345 345 if start == end:
346 346 # item we want to delete was not found, error out
347 347 raise AssertionError(
348 348 _("failed to remove %s from manifest") % f)
349 349 l = ""
350 350 if dstart is not None and dstart <= start and dend >= start:
351 351 if dend < end:
352 352 dend = end
353 353 if l:
354 354 dline.append(l)
355 355 else:
356 356 if dstart is not None:
357 357 delta.append([dstart, dend, "".join(dline)])
358 358 dstart = start
359 359 dend = end
360 360 dline = [l]
361 361
362 362 if dstart is not None:
363 363 delta.append([dstart, dend, "".join(dline)])
364 364 # apply the delta to the base, and get a delta for addrevision
365 365 deltatext, arraytext = _addlistdelta(base, delta)
366 366 return arraytext, deltatext
367 367
368 368 def _msearch(m, s, lo=0, hi=None):
369 369 '''return a tuple (start, end) that says where to find s within m.
370 370
371 371 If the string is found m[start:end] are the line containing
372 372 that string. If start == end the string was not found and
373 373 they indicate the proper sorted insertion point.
374 374
375 375 m should be a buffer or a string
376 376 s is a string'''
377 377 def advance(i, c):
378 378 while i < lenm and m[i] != c:
379 379 i += 1
380 380 return i
381 381 if not s:
382 382 return (lo, lo)
383 383 lenm = len(m)
384 384 if not hi:
385 385 hi = lenm
386 386 while lo < hi:
387 387 mid = (lo + hi) // 2
388 388 start = mid
389 389 while start > 0 and m[start - 1] != '\n':
390 390 start -= 1
391 391 end = advance(start, '\0')
392 392 if m[start:end] < s:
393 393 # we know that after the null there are 40 bytes of sha1
394 394 # this translates to the bisect lo = mid + 1
395 395 lo = advance(end + 40, '\n') + 1
396 396 else:
397 397 # this translates to the bisect hi = mid
398 398 hi = start
399 399 end = advance(lo, '\0')
400 400 found = m[lo:end]
401 401 if s == found:
402 402 # we know that after the null there are 40 bytes of sha1
403 403 end = advance(end + 40, '\n')
404 404 return (lo, end + 1)
405 405 else:
406 406 return (lo, lo)
407 407
408 408 def _checkforbidden(l):
409 409 """Check filenames for illegal characters."""
410 410 for f in l:
411 411 if '\n' in f or '\r' in f:
412 412 raise error.RevlogError(
413 413 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
414 414
415 415
416 416 # apply the changes collected during the bisect loop to our addlist
417 417 # return a delta suitable for addrevision
418 418 def _addlistdelta(addlist, x):
419 419 # for large addlist arrays, building a new array is cheaper
420 420 # than repeatedly modifying the existing one
421 421 currentposition = 0
422 422 newaddlist = array.array('c')
423 423
424 424 for start, end, content in x:
425 425 newaddlist += addlist[currentposition:start]
426 426 if content:
427 427 newaddlist += array.array('c', content)
428 428
429 429 currentposition = end
430 430
431 431 newaddlist += addlist[currentposition:]
432 432
433 433 deltatext = "".join(struct.pack(">lll", start, end, len(content))
434 434 + content for start, end, content in x)
435 435 return deltatext, newaddlist
436 436
437 437 def _splittopdir(f):
438 438 if '/' in f:
439 439 dir, subpath = f.split('/', 1)
440 440 return dir + '/', subpath
441 441 else:
442 442 return '', f
443 443
444 444 class treemanifest(object):
445 445 def __init__(self, dir='', text=''):
446 446 self._dir = dir
447 447 self._node = revlog.nullid
448 self._dirty = False
448 449 self._dirs = {}
449 450 # Using _lazymanifest here is a little slower than plain old dicts
450 451 self._files = {}
451 452 self._flags = {}
452 def readsubtree(subdir, subm):
453 raise AssertionError('treemanifest constructor only accepts '
454 'flat manifests')
455 self.parse(text, readsubtree)
453 if text:
454 def readsubtree(subdir, subm):
455 raise AssertionError('treemanifest constructor only accepts '
456 'flat manifests')
457 self.parse(text, readsubtree)
458 self._dirty = True # Mark flat manifest dirty after parsing
456 459
457 460 def _subpath(self, path):
458 461 return self._dir + path
459 462
460 463 def __len__(self):
461 464 size = len(self._files)
462 465 for m in self._dirs.values():
463 466 size += m.__len__()
464 467 return size
465 468
466 469 def _isempty(self):
467 470 return (not self._files and (not self._dirs or
468 471 all(m._isempty() for m in self._dirs.values())))
469 472
470 473 def __str__(self):
471 return ('<treemanifest dir=%s, node=%s>' %
472 (self._dir, revlog.hex(self._node)))
474 return ('<treemanifest dir=%s, node=%s, dirty=%s>' %
475 (self._dir, revlog.hex(self._node), self._dirty))
473 476
474 477 def dir(self):
475 478 '''The directory that this tree manifest represents, including a
476 479 trailing '/'. Empty string for the repo root directory.'''
477 480 return self._dir
478 481
479 482 def node(self):
480 483 '''This node of this instance. nullid for unsaved instances. Should
481 484 be updated when the instance is read or written from a revlog.
482 485 '''
486 assert not self._dirty
483 487 return self._node
484 488
485 489 def setnode(self, node):
486 490 self._node = node
491 self._dirty = False
487 492
488 493 def iteritems(self):
489 494 for p, n in sorted(self._dirs.items() + self._files.items()):
490 495 if p in self._files:
491 496 yield self._subpath(p), n
492 497 else:
493 498 for f, sn in n.iteritems():
494 499 yield f, sn
495 500
496 501 def iterkeys(self):
497 502 for p in sorted(self._dirs.keys() + self._files.keys()):
498 503 if p in self._files:
499 504 yield self._subpath(p)
500 505 else:
501 506 for f in self._dirs[p].iterkeys():
502 507 yield f
503 508
504 509 def keys(self):
505 510 return list(self.iterkeys())
506 511
507 512 def __iter__(self):
508 513 return self.iterkeys()
509 514
510 515 def __contains__(self, f):
511 516 if f is None:
512 517 return False
513 518 dir, subpath = _splittopdir(f)
514 519 if dir:
515 520 if dir not in self._dirs:
516 521 return False
517 522 return self._dirs[dir].__contains__(subpath)
518 523 else:
519 524 return f in self._files
520 525
521 526 def get(self, f, default=None):
522 527 dir, subpath = _splittopdir(f)
523 528 if dir:
524 529 if dir not in self._dirs:
525 530 return default
526 531 return self._dirs[dir].get(subpath, default)
527 532 else:
528 533 return self._files.get(f, default)
529 534
530 535 def __getitem__(self, f):
531 536 dir, subpath = _splittopdir(f)
532 537 if dir:
533 538 return self._dirs[dir].__getitem__(subpath)
534 539 else:
535 540 return self._files[f]
536 541
537 542 def flags(self, f):
538 543 dir, subpath = _splittopdir(f)
539 544 if dir:
540 545 if dir not in self._dirs:
541 546 return ''
542 547 return self._dirs[dir].flags(subpath)
543 548 else:
544 549 if f in self._dirs:
545 550 return ''
546 551 return self._flags.get(f, '')
547 552
548 553 def find(self, f):
549 554 dir, subpath = _splittopdir(f)
550 555 if dir:
551 556 return self._dirs[dir].find(subpath)
552 557 else:
553 558 return self._files[f], self._flags.get(f, '')
554 559
555 560 def __delitem__(self, f):
556 561 dir, subpath = _splittopdir(f)
557 562 if dir:
558 563 self._dirs[dir].__delitem__(subpath)
559 564 # If the directory is now empty, remove it
560 565 if self._dirs[dir]._isempty():
561 566 del self._dirs[dir]
562 567 else:
563 568 del self._files[f]
564 569 if f in self._flags:
565 570 del self._flags[f]
571 self._dirty = True
566 572
567 573 def __setitem__(self, f, n):
568 574 assert n is not None
569 575 dir, subpath = _splittopdir(f)
570 576 if dir:
571 577 if dir not in self._dirs:
572 578 self._dirs[dir] = treemanifest(self._subpath(dir))
573 579 self._dirs[dir].__setitem__(subpath, n)
574 580 else:
575 581 self._files[f] = n[:21] # to match manifestdict's behavior
582 self._dirty = True
576 583
577 584 def setflag(self, f, flags):
578 585 """Set the flags (symlink, executable) for path f."""
579 586 assert 'd' not in flags
580 587 dir, subpath = _splittopdir(f)
581 588 if dir:
582 589 if dir not in self._dirs:
583 590 self._dirs[dir] = treemanifest(self._subpath(dir))
584 591 self._dirs[dir].setflag(subpath, flags)
585 592 else:
586 593 self._flags[f] = flags
594 self._dirty = True
587 595
588 596 def copy(self):
589 597 copy = treemanifest(self._dir)
590 598 copy._node = self._node
599 copy._dirty = self._dirty
591 600 for d in self._dirs:
592 601 copy._dirs[d] = self._dirs[d].copy()
593 602 copy._files = dict.copy(self._files)
594 603 copy._flags = dict.copy(self._flags)
595 604 return copy
596 605
597 606 def filesnotin(self, m2):
598 607 '''Set of files in this manifest that are not in the other'''
599 608 files = set()
600 609 def _filesnotin(t1, t2):
610 if t1._node == t2._node and not t1._dirty and not t2._dirty:
611 return
601 612 for d, m1 in t1._dirs.iteritems():
602 613 if d in t2._dirs:
603 614 m2 = t2._dirs[d]
604 615 _filesnotin(m1, m2)
605 616 else:
606 617 files.update(m1.iterkeys())
607 618
608 619 for fn in t1._files.iterkeys():
609 620 if fn not in t2._files:
610 621 files.add(t1._subpath(fn))
611 622
612 623 _filesnotin(self, m2)
613 624 return files
614 625
615 626 @propertycache
616 627 def _alldirs(self):
617 628 return util.dirs(self)
618 629
619 630 def dirs(self):
620 631 return self._alldirs
621 632
622 633 def hasdir(self, dir):
623 634 topdir, subdir = _splittopdir(dir)
624 635 if topdir:
625 636 if topdir in self._dirs:
626 637 return self._dirs[topdir].hasdir(subdir)
627 638 return False
628 639 return (dir + '/') in self._dirs
629 640
630 641 def walk(self, match):
631 642 '''Generates matching file names.
632 643
633 644 Equivalent to manifest.matches(match).iterkeys(), but without creating
634 645 an entirely new manifest.
635 646
636 647 It also reports nonexistent files by marking them bad with match.bad().
637 648 '''
638 649 if match.always():
639 650 for f in iter(self):
640 651 yield f
641 652 return
642 653
643 654 fset = set(match.files())
644 655
645 656 for fn in self._walk(match):
646 657 if fn in fset:
647 658 # specified pattern is the exact name
648 659 fset.remove(fn)
649 660 yield fn
650 661
651 662 # for dirstate.walk, files=['.'] means "walk the whole tree".
652 663 # follow that here, too
653 664 fset.discard('.')
654 665
655 666 for fn in sorted(fset):
656 667 if not self.hasdir(fn):
657 668 match.bad(fn, None)
658 669
659 670 def _walk(self, match):
660 671 '''Recursively generates matching file names for walk().'''
661 672 if not match.visitdir(self._dir[:-1] or '.'):
662 673 return
663 674
664 675 # yield this dir's files and walk its submanifests
665 676 for p in sorted(self._dirs.keys() + self._files.keys()):
666 677 if p in self._files:
667 678 fullp = self._subpath(p)
668 679 if match(fullp):
669 680 yield fullp
670 681 else:
671 682 for f in self._dirs[p]._walk(match):
672 683 yield f
673 684
674 685 def matches(self, match):
675 686 '''generate a new manifest filtered by the match argument'''
676 687 if match.always():
677 688 return self.copy()
678 689
679 690 return self._matches(match)
680 691
681 692 def _matches(self, match):
682 693 '''recursively generate a new manifest filtered by the match argument.
683 694 '''
684 695 ret = treemanifest(self._dir)
685 696
686 697 if not match.visitdir(self._dir[:-1] or '.'):
687 698 return ret
688 699
689 700 for fn in self._files:
690 701 fullp = self._subpath(fn)
691 702 if not match(fullp):
692 703 continue
693 704 ret._files[fn] = self._files[fn]
694 705 if fn in self._flags:
695 706 ret._flags[fn] = self._flags[fn]
696 707
697 708 for dir, subm in self._dirs.iteritems():
698 709 m = subm._matches(match)
699 710 if not m._isempty():
700 711 ret._dirs[dir] = m
701 712
713 if not ret._isempty():
714 ret._dirty = True
702 715 return ret
703 716
704 717 def diff(self, m2, clean=False):
705 718 '''Finds changes between the current manifest and m2.
706 719
707 720 Args:
708 721 m2: the manifest to which this manifest should be compared.
709 722 clean: if true, include files unchanged between these manifests
710 723 with a None value in the returned dictionary.
711 724
712 725 The result is returned as a dict with filename as key and
713 726 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
714 727 nodeid in the current/other manifest and fl1/fl2 is the flag
715 728 in the current/other manifest. Where the file does not exist,
716 729 the nodeid will be None and the flags will be the empty
717 730 string.
718 731 '''
719 732 result = {}
720 733 emptytree = treemanifest()
721 734 def _diff(t1, t2):
735 if t1._node == t2._node and not t1._dirty and not t2._dirty:
736 return
722 737 for d, m1 in t1._dirs.iteritems():
723 738 m2 = t2._dirs.get(d, emptytree)
724 739 _diff(m1, m2)
725 740
726 741 for d, m2 in t2._dirs.iteritems():
727 742 if d not in t1._dirs:
728 743 _diff(emptytree, m2)
729 744
730 745 for fn, n1 in t1._files.iteritems():
731 746 fl1 = t1._flags.get(fn, '')
732 747 n2 = t2._files.get(fn, None)
733 748 fl2 = t2._flags.get(fn, '')
734 749 if n1 != n2 or fl1 != fl2:
735 750 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
736 751 elif clean:
737 752 result[t1._subpath(fn)] = None
738 753
739 754 for fn, n2 in t2._files.iteritems():
740 755 if fn not in t1._files:
741 756 fl2 = t2._flags.get(fn, '')
742 757 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
743 758
744 759 _diff(self, m2)
745 760 return result
746 761
747 762 def parse(self, text, readsubtree):
748 763 for f, n, fl in _parse(text):
749 764 if fl == 'd':
750 765 f = f + '/'
751 766 self._dirs[f] = readsubtree(self._subpath(f), n)
752 else:
753 # Use __setitem__ and setflag rather than assigning directly
754 # to _files and _flags, thereby letting us parse flat manifests
755 # as well as tree manifests.
767 elif '/' in f:
768 # This is a flat manifest, so use __setitem__ and setflag rather
769 # than assigning directly to _files and _flags, so we can
770 # assign a path in a subdirectory, and to mark dirty (compared
771 # to nullid).
756 772 self[f] = n
757 773 if fl:
758 774 self.setflag(f, fl)
775 else:
776 # Assigning to _files and _flags avoids marking as dirty,
777 # and should be a little faster.
778 self._files[f] = n
779 if fl:
780 self._flags[f] = fl
759 781
760 782 def text(self, usemanifestv2=False):
761 783 """Get the full data of this manifest as a bytestring."""
762 784 flags = self.flags
763 785 return _text(((f, self[f], flags(f)) for f in self.keys()),
764 786 usemanifestv2)
765 787
766 788 def dirtext(self, usemanifestv2=False):
767 789 """Get the full data of this directory as a bytestring. Make sure that
768 790 any submanifests have been written first, so their nodeids are correct.
769 791 """
770 792 flags = self.flags
771 793 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
772 794 files = [(f, self._files[f], flags(f)) for f in self._files]
773 795 return _text(sorted(dirs + files), usemanifestv2)
774 796
775 797 def writesubtrees(self, m1, m2, writesubtree):
776 798 emptytree = treemanifest()
777 799 for d, subm in self._dirs.iteritems():
778 800 subp1 = m1._dirs.get(d, emptytree)._node
779 801 subp2 = m2._dirs.get(d, emptytree)._node
780 802 if subp1 == revlog.nullid:
781 803 subp1, subp2 = subp2, subp1
782 804 writesubtree(subm, subp1, subp2)
783 805
784 806 class manifest(revlog.revlog):
785 807 def __init__(self, opener, dir='', dirlogcache=None):
786 808 '''The 'dir' and 'dirlogcache' arguments are for internal use by
787 809 manifest.manifest only. External users should create a root manifest
788 810 log with manifest.manifest(opener) and call dirlog() on it.
789 811 '''
790 812 # During normal operations, we expect to deal with not more than four
791 813 # revs at a time (such as during commit --amend). When rebasing large
792 814 # stacks of commits, the number can go up, hence the config knob below.
793 815 cachesize = 4
794 816 usetreemanifest = False
795 817 usemanifestv2 = False
796 818 opts = getattr(opener, 'options', None)
797 819 if opts is not None:
798 820 cachesize = opts.get('manifestcachesize', cachesize)
799 821 usetreemanifest = opts.get('treemanifest', usetreemanifest)
800 822 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
801 823 self._mancache = util.lrucachedict(cachesize)
802 824 self._treeinmem = usetreemanifest
803 825 self._treeondisk = usetreemanifest
804 826 self._usemanifestv2 = usemanifestv2
805 827 indexfile = "00manifest.i"
806 828 if dir:
807 829 assert self._treeondisk
808 830 if not dir.endswith('/'):
809 831 dir = dir + '/'
810 832 indexfile = "meta/" + dir + "00manifest.i"
811 833 revlog.revlog.__init__(self, opener, indexfile)
812 834 self._dir = dir
813 835 # The dirlogcache is kept on the root manifest log
814 836 if dir:
815 837 self._dirlogcache = dirlogcache
816 838 else:
817 839 self._dirlogcache = {'': self}
818 840
819 841 def _newmanifest(self, data=''):
820 842 if self._treeinmem:
821 843 return treemanifest(self._dir, data)
822 844 return manifestdict(data)
823 845
824 846 def dirlog(self, dir):
825 847 assert self._treeondisk
826 848 if dir not in self._dirlogcache:
827 849 self._dirlogcache[dir] = manifest(self.opener, dir,
828 850 self._dirlogcache)
829 851 return self._dirlogcache[dir]
830 852
831 853 def _slowreaddelta(self, node):
832 854 r0 = self.deltaparent(self.rev(node))
833 855 m0 = self.read(self.node(r0))
834 856 m1 = self.read(node)
835 857 md = self._newmanifest()
836 858 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
837 859 if n1:
838 860 md[f] = n1
839 861 if fl1:
840 862 md.setflag(f, fl1)
841 863 return md
842 864
843 865 def readdelta(self, node):
844 866 if self._usemanifestv2 or self._treeondisk:
845 867 return self._slowreaddelta(node)
846 868 r = self.rev(node)
847 869 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
848 870 return self._newmanifest(d)
849 871
850 872 def readfast(self, node):
851 873 '''use the faster of readdelta or read
852 874
853 875 This will return a manifest which is either only the files
854 876 added/modified relative to p1, or all files in the
855 877 manifest. Which one is returned depends on the codepath used
856 878 to retrieve the data.
857 879 '''
858 880 r = self.rev(node)
859 881 deltaparent = self.deltaparent(r)
860 882 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
861 883 return self.readdelta(node)
862 884 return self.read(node)
863 885
864 886 def read(self, node):
865 887 if node == revlog.nullid:
866 888 return self._newmanifest() # don't upset local cache
867 889 if node in self._mancache:
868 890 return self._mancache[node][0]
869 891 text = self.revision(node)
870 892 if self._treeondisk:
871 893 def readsubtree(dir, subm):
872 894 return self.dirlog(dir).read(subm)
873 895 m = self._newmanifest()
874 896 m.parse(text, readsubtree)
875 897 m.setnode(node)
876 898 arraytext = None
877 899 else:
878 900 m = self._newmanifest(text)
879 901 arraytext = array.array('c', text)
880 902 self._mancache[node] = (m, arraytext)
881 903 return m
882 904
883 905 def find(self, node, f):
884 906 '''look up entry for a single file efficiently.
885 907 return (node, flags) pair if found, (None, None) if not.'''
886 908 m = self.read(node)
887 909 try:
888 910 return m.find(f)
889 911 except KeyError:
890 912 return None, None
891 913
892 914 def add(self, m, transaction, link, p1, p2, added, removed):
893 915 if (p1 in self._mancache and not self._treeinmem
894 916 and not self._usemanifestv2):
895 917 # If our first parent is in the manifest cache, we can
896 918 # compute a delta here using properties we know about the
897 919 # manifest up-front, which may save time later for the
898 920 # revlog layer.
899 921
900 922 _checkforbidden(added)
901 923 # combine the changed lists into one list for sorting
902 924 work = [(x, False) for x in added]
903 925 work.extend((x, True) for x in removed)
904 926 # this could use heapq.merge() (from Python 2.6+) or equivalent
905 927 # since the lists are already sorted
906 928 work.sort()
907 929
908 930 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
909 931 cachedelta = self.rev(p1), deltatext
910 932 text = util.buffer(arraytext)
911 933 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
912 934 else:
913 935 # The first parent manifest isn't already loaded, so we'll
914 936 # just encode a fulltext of the manifest and pass that
915 937 # through to the revlog layer, and let it handle the delta
916 938 # process.
917 939 if self._treeondisk:
918 940 m1 = self.read(p1)
919 941 m2 = self.read(p2)
920 942 n = self._addtree(m, transaction, link, m1, m2)
921 943 arraytext = None
922 944 else:
923 945 text = m.text(self._usemanifestv2)
924 946 n = self.addrevision(text, transaction, link, p1, p2)
925 947 arraytext = array.array('c', text)
926 948
927 949 self._mancache[n] = (m, arraytext)
928 950
929 951 return n
930 952
931 953 def _addtree(self, m, transaction, link, m1, m2):
932 954 def writesubtree(subm, subp1, subp2):
933 955 sublog = self.dirlog(subm.dir())
934 956 sublog.add(subm, transaction, link, subp1, subp2, None, None)
935 957 m.writesubtrees(m1, m2, writesubtree)
936 958 text = m.dirtext(self._usemanifestv2)
937 959 # If the manifest is unchanged compared to one parent,
938 960 # don't write a new revision
939 961 if text == m1.dirtext(self._usemanifestv2):
940 962 n = m1.node()
941 963 elif text == m2.dirtext(self._usemanifestv2):
942 964 n = m2.node()
943 965 else:
944 966 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
945 967 # Save nodeid so parent manifest can calculate its nodeid
946 968 m.setnode(n)
947 969 return n
General Comments 0
You need to be logged in to leave comments. Login now