##// END OF EJS Templates
treemanifest: speed up commit using dirty flag...
Martin von Zweigbergk -
r25221:eafa06e9 default
parent child Browse files
Show More
@@ -1,969 +1,975 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11 import os
12 12
13 13 propertycache = util.propertycache
14 14
15 15 def _parsev1(data):
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 34 def _parsev2(data):
35 35 metadataend = data.find('\n')
36 36 # Just ignore metadata for now
37 37 pos = metadataend + 1
38 38 prevf = ''
39 39 while pos < len(data):
40 40 end = data.find('\n', pos + 1) # +1 to skip stem length byte
41 41 if end == -1:
42 42 raise ValueError('Manifest ended with incomplete file entry.')
43 43 stemlen = ord(data[pos])
44 44 items = data[pos + 1:end].split('\0')
45 45 f = prevf[:stemlen] + items[0]
46 46 if prevf > f:
47 47 raise ValueError('Manifest entries not in sorted order.')
48 48 fl = items[1]
49 49 # Just ignore metadata (items[2:] for now)
50 50 n = data[end + 1:end + 21]
51 51 yield f, n, fl
52 52 pos = end + 22
53 53 prevf = f
54 54
55 55 def _parse(data):
56 56 """Generates (path, node, flags) tuples from a manifest text"""
57 57 if data.startswith('\0'):
58 58 return iter(_parsev2(data))
59 59 else:
60 60 return iter(_parsev1(data))
61 61
62 62 def _text(it, usemanifestv2):
63 63 """Given an iterator over (path, node, flags) tuples, returns a manifest
64 64 text"""
65 65 if usemanifestv2:
66 66 return _textv2(it)
67 67 else:
68 68 return _textv1(it)
69 69
70 70 def _textv1(it):
71 71 files = []
72 72 lines = []
73 73 _hex = revlog.hex
74 74 for f, n, fl in it:
75 75 files.append(f)
76 76 # if this is changed to support newlines in filenames,
77 77 # be sure to check the templates/ dir again (especially *-raw.tmpl)
78 78 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
79 79
80 80 _checkforbidden(files)
81 81 return ''.join(lines)
82 82
83 83 def _textv2(it):
84 84 files = []
85 85 lines = ['\0\n']
86 86 prevf = ''
87 87 for f, n, fl in it:
88 88 files.append(f)
89 89 stem = os.path.commonprefix([prevf, f])
90 90 stemlen = min(len(stem), 255)
91 91 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
92 92 prevf = f
93 93 _checkforbidden(files)
94 94 return ''.join(lines)
95 95
96 96 class _lazymanifest(dict):
97 97 """This is the pure implementation of lazymanifest.
98 98
99 99 It has not been optimized *at all* and is not lazy.
100 100 """
101 101
102 102 def __init__(self, data):
103 103 dict.__init__(self)
104 104 for f, n, fl in _parse(data):
105 105 self[f] = n, fl
106 106
107 107 def __setitem__(self, k, v):
108 108 node, flag = v
109 109 assert node is not None
110 110 if len(node) > 21:
111 111 node = node[:21] # match c implementation behavior
112 112 dict.__setitem__(self, k, (node, flag))
113 113
114 114 def __iter__(self):
115 115 return iter(sorted(dict.keys(self)))
116 116
117 117 def iterkeys(self):
118 118 return iter(sorted(dict.keys(self)))
119 119
120 120 def iterentries(self):
121 121 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
122 122
123 123 def copy(self):
124 124 c = _lazymanifest('')
125 125 c.update(self)
126 126 return c
127 127
128 128 def diff(self, m2, clean=False):
129 129 '''Finds changes between the current manifest and m2.'''
130 130 diff = {}
131 131
132 132 for fn, e1 in self.iteritems():
133 133 if fn not in m2:
134 134 diff[fn] = e1, (None, '')
135 135 else:
136 136 e2 = m2[fn]
137 137 if e1 != e2:
138 138 diff[fn] = e1, e2
139 139 elif clean:
140 140 diff[fn] = None
141 141
142 142 for fn, e2 in m2.iteritems():
143 143 if fn not in self:
144 144 diff[fn] = (None, ''), e2
145 145
146 146 return diff
147 147
148 148 def filtercopy(self, filterfn):
149 149 c = _lazymanifest('')
150 150 for f, n, fl in self.iterentries():
151 151 if filterfn(f):
152 152 c[f] = n, fl
153 153 return c
154 154
155 155 def text(self):
156 156 """Get the full data of this manifest as a bytestring."""
157 157 return _textv1(self.iterentries())
158 158
159 159 try:
160 160 _lazymanifest = parsers.lazymanifest
161 161 except AttributeError:
162 162 pass
163 163
164 164 class manifestdict(object):
165 165 def __init__(self, data=''):
166 166 if data.startswith('\0'):
167 167 #_lazymanifest can not parse v2
168 168 self._lm = _lazymanifest('')
169 169 for f, n, fl in _parsev2(data):
170 170 self._lm[f] = n, fl
171 171 else:
172 172 self._lm = _lazymanifest(data)
173 173
174 174 def __getitem__(self, key):
175 175 return self._lm[key][0]
176 176
177 177 def find(self, key):
178 178 return self._lm[key]
179 179
180 180 def __len__(self):
181 181 return len(self._lm)
182 182
183 183 def __setitem__(self, key, node):
184 184 self._lm[key] = node, self.flags(key, '')
185 185
186 186 def __contains__(self, key):
187 187 return key in self._lm
188 188
189 189 def __delitem__(self, key):
190 190 del self._lm[key]
191 191
192 192 def __iter__(self):
193 193 return self._lm.__iter__()
194 194
195 195 def iterkeys(self):
196 196 return self._lm.iterkeys()
197 197
198 198 def keys(self):
199 199 return list(self.iterkeys())
200 200
201 201 def filesnotin(self, m2):
202 202 '''Set of files in this manifest that are not in the other'''
203 203 files = set(self)
204 204 files.difference_update(m2)
205 205 return files
206 206
207 207 @propertycache
208 208 def _dirs(self):
209 209 return util.dirs(self)
210 210
211 211 def dirs(self):
212 212 return self._dirs
213 213
214 214 def hasdir(self, dir):
215 215 return dir in self._dirs
216 216
217 217 def _filesfastpath(self, match):
218 218 '''Checks whether we can correctly and quickly iterate over matcher
219 219 files instead of over manifest files.'''
220 220 files = match.files()
221 221 return (len(files) < 100 and (match.isexact() or
222 222 (not match.anypats() and all(fn in self for fn in files))))
223 223
224 224 def walk(self, match):
225 225 '''Generates matching file names.
226 226
227 227 Equivalent to manifest.matches(match).iterkeys(), but without creating
228 228 an entirely new manifest.
229 229
230 230 It also reports nonexistent files by marking them bad with match.bad().
231 231 '''
232 232 if match.always():
233 233 for f in iter(self):
234 234 yield f
235 235 return
236 236
237 237 fset = set(match.files())
238 238
239 239 # avoid the entire walk if we're only looking for specific files
240 240 if self._filesfastpath(match):
241 241 for fn in sorted(fset):
242 242 yield fn
243 243 return
244 244
245 245 for fn in self:
246 246 if fn in fset:
247 247 # specified pattern is the exact name
248 248 fset.remove(fn)
249 249 if match(fn):
250 250 yield fn
251 251
252 252 # for dirstate.walk, files=['.'] means "walk the whole tree".
253 253 # follow that here, too
254 254 fset.discard('.')
255 255
256 256 for fn in sorted(fset):
257 257 if not self.hasdir(fn):
258 258 match.bad(fn, None)
259 259
260 260 def matches(self, match):
261 261 '''generate a new manifest filtered by the match argument'''
262 262 if match.always():
263 263 return self.copy()
264 264
265 265 if self._filesfastpath(match):
266 266 m = manifestdict()
267 267 lm = self._lm
268 268 for fn in match.files():
269 269 if fn in lm:
270 270 m._lm[fn] = lm[fn]
271 271 return m
272 272
273 273 m = manifestdict()
274 274 m._lm = self._lm.filtercopy(match)
275 275 return m
276 276
277 277 def diff(self, m2, clean=False):
278 278 '''Finds changes between the current manifest and m2.
279 279
280 280 Args:
281 281 m2: the manifest to which this manifest should be compared.
282 282 clean: if true, include files unchanged between these manifests
283 283 with a None value in the returned dictionary.
284 284
285 285 The result is returned as a dict with filename as key and
286 286 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
287 287 nodeid in the current/other manifest and fl1/fl2 is the flag
288 288 in the current/other manifest. Where the file does not exist,
289 289 the nodeid will be None and the flags will be the empty
290 290 string.
291 291 '''
292 292 return self._lm.diff(m2._lm, clean)
293 293
294 294 def setflag(self, key, flag):
295 295 self._lm[key] = self[key], flag
296 296
297 297 def get(self, key, default=None):
298 298 try:
299 299 return self._lm[key][0]
300 300 except KeyError:
301 301 return default
302 302
303 303 def flags(self, key, default=''):
304 304 try:
305 305 return self._lm[key][1]
306 306 except KeyError:
307 307 return default
308 308
309 309 def copy(self):
310 310 c = manifestdict()
311 311 c._lm = self._lm.copy()
312 312 return c
313 313
314 314 def iteritems(self):
315 315 return (x[:2] for x in self._lm.iterentries())
316 316
317 317 def text(self, usemanifestv2=False):
318 318 if usemanifestv2:
319 319 return _textv2(self._lm.iterentries())
320 320 else:
321 321 # use (probably) native version for v1
322 322 return self._lm.text()
323 323
324 324 def fastdelta(self, base, changes):
325 325 """Given a base manifest text as an array.array and a list of changes
326 326 relative to that text, compute a delta that can be used by revlog.
327 327 """
328 328 delta = []
329 329 dstart = None
330 330 dend = None
331 331 dline = [""]
332 332 start = 0
333 333 # zero copy representation of base as a buffer
334 334 addbuf = util.buffer(base)
335 335
336 336 # start with a readonly loop that finds the offset of
337 337 # each line and creates the deltas
338 338 for f, todelete in changes:
339 339 # bs will either be the index of the item or the insert point
340 340 start, end = _msearch(addbuf, f, start)
341 341 if not todelete:
342 342 h, fl = self._lm[f]
343 343 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
344 344 else:
345 345 if start == end:
346 346 # item we want to delete was not found, error out
347 347 raise AssertionError(
348 348 _("failed to remove %s from manifest") % f)
349 349 l = ""
350 350 if dstart is not None and dstart <= start and dend >= start:
351 351 if dend < end:
352 352 dend = end
353 353 if l:
354 354 dline.append(l)
355 355 else:
356 356 if dstart is not None:
357 357 delta.append([dstart, dend, "".join(dline)])
358 358 dstart = start
359 359 dend = end
360 360 dline = [l]
361 361
362 362 if dstart is not None:
363 363 delta.append([dstart, dend, "".join(dline)])
364 364 # apply the delta to the base, and get a delta for addrevision
365 365 deltatext, arraytext = _addlistdelta(base, delta)
366 366 return arraytext, deltatext
367 367
368 368 def _msearch(m, s, lo=0, hi=None):
369 369 '''return a tuple (start, end) that says where to find s within m.
370 370
371 371 If the string is found m[start:end] are the line containing
372 372 that string. If start == end the string was not found and
373 373 they indicate the proper sorted insertion point.
374 374
375 375 m should be a buffer or a string
376 376 s is a string'''
377 377 def advance(i, c):
378 378 while i < lenm and m[i] != c:
379 379 i += 1
380 380 return i
381 381 if not s:
382 382 return (lo, lo)
383 383 lenm = len(m)
384 384 if not hi:
385 385 hi = lenm
386 386 while lo < hi:
387 387 mid = (lo + hi) // 2
388 388 start = mid
389 389 while start > 0 and m[start - 1] != '\n':
390 390 start -= 1
391 391 end = advance(start, '\0')
392 392 if m[start:end] < s:
393 393 # we know that after the null there are 40 bytes of sha1
394 394 # this translates to the bisect lo = mid + 1
395 395 lo = advance(end + 40, '\n') + 1
396 396 else:
397 397 # this translates to the bisect hi = mid
398 398 hi = start
399 399 end = advance(lo, '\0')
400 400 found = m[lo:end]
401 401 if s == found:
402 402 # we know that after the null there are 40 bytes of sha1
403 403 end = advance(end + 40, '\n')
404 404 return (lo, end + 1)
405 405 else:
406 406 return (lo, lo)
407 407
408 408 def _checkforbidden(l):
409 409 """Check filenames for illegal characters."""
410 410 for f in l:
411 411 if '\n' in f or '\r' in f:
412 412 raise error.RevlogError(
413 413 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
414 414
415 415
416 416 # apply the changes collected during the bisect loop to our addlist
417 417 # return a delta suitable for addrevision
418 418 def _addlistdelta(addlist, x):
419 419 # for large addlist arrays, building a new array is cheaper
420 420 # than repeatedly modifying the existing one
421 421 currentposition = 0
422 422 newaddlist = array.array('c')
423 423
424 424 for start, end, content in x:
425 425 newaddlist += addlist[currentposition:start]
426 426 if content:
427 427 newaddlist += array.array('c', content)
428 428
429 429 currentposition = end
430 430
431 431 newaddlist += addlist[currentposition:]
432 432
433 433 deltatext = "".join(struct.pack(">lll", start, end, len(content))
434 434 + content for start, end, content in x)
435 435 return deltatext, newaddlist
436 436
437 437 def _splittopdir(f):
438 438 if '/' in f:
439 439 dir, subpath = f.split('/', 1)
440 440 return dir + '/', subpath
441 441 else:
442 442 return '', f
443 443
444 444 class treemanifest(object):
445 445 def __init__(self, dir='', text=''):
446 446 self._dir = dir
447 447 self._node = revlog.nullid
448 448 self._dirty = False
449 449 self._dirs = {}
450 450 # Using _lazymanifest here is a little slower than plain old dicts
451 451 self._files = {}
452 452 self._flags = {}
453 453 if text:
454 454 def readsubtree(subdir, subm):
455 455 raise AssertionError('treemanifest constructor only accepts '
456 456 'flat manifests')
457 457 self.parse(text, readsubtree)
458 458 self._dirty = True # Mark flat manifest dirty after parsing
459 459
460 460 def _subpath(self, path):
461 461 return self._dir + path
462 462
463 463 def __len__(self):
464 464 size = len(self._files)
465 465 for m in self._dirs.values():
466 466 size += m.__len__()
467 467 return size
468 468
469 469 def _isempty(self):
470 470 return (not self._files and (not self._dirs or
471 471 all(m._isempty() for m in self._dirs.values())))
472 472
473 473 def __str__(self):
474 474 return ('<treemanifest dir=%s, node=%s, dirty=%s>' %
475 475 (self._dir, revlog.hex(self._node), self._dirty))
476 476
477 477 def dir(self):
478 478 '''The directory that this tree manifest represents, including a
479 479 trailing '/'. Empty string for the repo root directory.'''
480 480 return self._dir
481 481
482 482 def node(self):
483 483 '''This node of this instance. nullid for unsaved instances. Should
484 484 be updated when the instance is read or written from a revlog.
485 485 '''
486 486 assert not self._dirty
487 487 return self._node
488 488
489 489 def setnode(self, node):
490 490 self._node = node
491 491 self._dirty = False
492 492
493 493 def iteritems(self):
494 494 for p, n in sorted(self._dirs.items() + self._files.items()):
495 495 if p in self._files:
496 496 yield self._subpath(p), n
497 497 else:
498 498 for f, sn in n.iteritems():
499 499 yield f, sn
500 500
501 501 def iterkeys(self):
502 502 for p in sorted(self._dirs.keys() + self._files.keys()):
503 503 if p in self._files:
504 504 yield self._subpath(p)
505 505 else:
506 506 for f in self._dirs[p].iterkeys():
507 507 yield f
508 508
509 509 def keys(self):
510 510 return list(self.iterkeys())
511 511
512 512 def __iter__(self):
513 513 return self.iterkeys()
514 514
515 515 def __contains__(self, f):
516 516 if f is None:
517 517 return False
518 518 dir, subpath = _splittopdir(f)
519 519 if dir:
520 520 if dir not in self._dirs:
521 521 return False
522 522 return self._dirs[dir].__contains__(subpath)
523 523 else:
524 524 return f in self._files
525 525
526 526 def get(self, f, default=None):
527 527 dir, subpath = _splittopdir(f)
528 528 if dir:
529 529 if dir not in self._dirs:
530 530 return default
531 531 return self._dirs[dir].get(subpath, default)
532 532 else:
533 533 return self._files.get(f, default)
534 534
535 535 def __getitem__(self, f):
536 536 dir, subpath = _splittopdir(f)
537 537 if dir:
538 538 return self._dirs[dir].__getitem__(subpath)
539 539 else:
540 540 return self._files[f]
541 541
542 542 def flags(self, f):
543 543 dir, subpath = _splittopdir(f)
544 544 if dir:
545 545 if dir not in self._dirs:
546 546 return ''
547 547 return self._dirs[dir].flags(subpath)
548 548 else:
549 549 if f in self._dirs:
550 550 return ''
551 551 return self._flags.get(f, '')
552 552
553 553 def find(self, f):
554 554 dir, subpath = _splittopdir(f)
555 555 if dir:
556 556 return self._dirs[dir].find(subpath)
557 557 else:
558 558 return self._files[f], self._flags.get(f, '')
559 559
560 560 def __delitem__(self, f):
561 561 dir, subpath = _splittopdir(f)
562 562 if dir:
563 563 self._dirs[dir].__delitem__(subpath)
564 564 # If the directory is now empty, remove it
565 565 if self._dirs[dir]._isempty():
566 566 del self._dirs[dir]
567 567 else:
568 568 del self._files[f]
569 569 if f in self._flags:
570 570 del self._flags[f]
571 571 self._dirty = True
572 572
573 573 def __setitem__(self, f, n):
574 574 assert n is not None
575 575 dir, subpath = _splittopdir(f)
576 576 if dir:
577 577 if dir not in self._dirs:
578 578 self._dirs[dir] = treemanifest(self._subpath(dir))
579 579 self._dirs[dir].__setitem__(subpath, n)
580 580 else:
581 581 self._files[f] = n[:21] # to match manifestdict's behavior
582 582 self._dirty = True
583 583
584 584 def setflag(self, f, flags):
585 585 """Set the flags (symlink, executable) for path f."""
586 586 assert 'd' not in flags
587 587 dir, subpath = _splittopdir(f)
588 588 if dir:
589 589 if dir not in self._dirs:
590 590 self._dirs[dir] = treemanifest(self._subpath(dir))
591 591 self._dirs[dir].setflag(subpath, flags)
592 592 else:
593 593 self._flags[f] = flags
594 594 self._dirty = True
595 595
596 596 def copy(self):
597 597 copy = treemanifest(self._dir)
598 598 copy._node = self._node
599 599 copy._dirty = self._dirty
600 600 for d in self._dirs:
601 601 copy._dirs[d] = self._dirs[d].copy()
602 602 copy._files = dict.copy(self._files)
603 603 copy._flags = dict.copy(self._flags)
604 604 return copy
605 605
606 606 def filesnotin(self, m2):
607 607 '''Set of files in this manifest that are not in the other'''
608 608 files = set()
609 609 def _filesnotin(t1, t2):
610 610 if t1._node == t2._node and not t1._dirty and not t2._dirty:
611 611 return
612 612 for d, m1 in t1._dirs.iteritems():
613 613 if d in t2._dirs:
614 614 m2 = t2._dirs[d]
615 615 _filesnotin(m1, m2)
616 616 else:
617 617 files.update(m1.iterkeys())
618 618
619 619 for fn in t1._files.iterkeys():
620 620 if fn not in t2._files:
621 621 files.add(t1._subpath(fn))
622 622
623 623 _filesnotin(self, m2)
624 624 return files
625 625
626 626 @propertycache
627 627 def _alldirs(self):
628 628 return util.dirs(self)
629 629
630 630 def dirs(self):
631 631 return self._alldirs
632 632
633 633 def hasdir(self, dir):
634 634 topdir, subdir = _splittopdir(dir)
635 635 if topdir:
636 636 if topdir in self._dirs:
637 637 return self._dirs[topdir].hasdir(subdir)
638 638 return False
639 639 return (dir + '/') in self._dirs
640 640
641 641 def walk(self, match):
642 642 '''Generates matching file names.
643 643
644 644 Equivalent to manifest.matches(match).iterkeys(), but without creating
645 645 an entirely new manifest.
646 646
647 647 It also reports nonexistent files by marking them bad with match.bad().
648 648 '''
649 649 if match.always():
650 650 for f in iter(self):
651 651 yield f
652 652 return
653 653
654 654 fset = set(match.files())
655 655
656 656 for fn in self._walk(match):
657 657 if fn in fset:
658 658 # specified pattern is the exact name
659 659 fset.remove(fn)
660 660 yield fn
661 661
662 662 # for dirstate.walk, files=['.'] means "walk the whole tree".
663 663 # follow that here, too
664 664 fset.discard('.')
665 665
666 666 for fn in sorted(fset):
667 667 if not self.hasdir(fn):
668 668 match.bad(fn, None)
669 669
670 670 def _walk(self, match):
671 671 '''Recursively generates matching file names for walk().'''
672 672 if not match.visitdir(self._dir[:-1] or '.'):
673 673 return
674 674
675 675 # yield this dir's files and walk its submanifests
676 676 for p in sorted(self._dirs.keys() + self._files.keys()):
677 677 if p in self._files:
678 678 fullp = self._subpath(p)
679 679 if match(fullp):
680 680 yield fullp
681 681 else:
682 682 for f in self._dirs[p]._walk(match):
683 683 yield f
684 684
685 685 def matches(self, match):
686 686 '''generate a new manifest filtered by the match argument'''
687 687 if match.always():
688 688 return self.copy()
689 689
690 690 return self._matches(match)
691 691
692 692 def _matches(self, match):
693 693 '''recursively generate a new manifest filtered by the match argument.
694 694 '''
695 695 ret = treemanifest(self._dir)
696 696
697 697 if not match.visitdir(self._dir[:-1] or '.'):
698 698 return ret
699 699
700 700 for fn in self._files:
701 701 fullp = self._subpath(fn)
702 702 if not match(fullp):
703 703 continue
704 704 ret._files[fn] = self._files[fn]
705 705 if fn in self._flags:
706 706 ret._flags[fn] = self._flags[fn]
707 707
708 708 for dir, subm in self._dirs.iteritems():
709 709 m = subm._matches(match)
710 710 if not m._isempty():
711 711 ret._dirs[dir] = m
712 712
713 713 if not ret._isempty():
714 714 ret._dirty = True
715 715 return ret
716 716
717 717 def diff(self, m2, clean=False):
718 718 '''Finds changes between the current manifest and m2.
719 719
720 720 Args:
721 721 m2: the manifest to which this manifest should be compared.
722 722 clean: if true, include files unchanged between these manifests
723 723 with a None value in the returned dictionary.
724 724
725 725 The result is returned as a dict with filename as key and
726 726 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
727 727 nodeid in the current/other manifest and fl1/fl2 is the flag
728 728 in the current/other manifest. Where the file does not exist,
729 729 the nodeid will be None and the flags will be the empty
730 730 string.
731 731 '''
732 732 result = {}
733 733 emptytree = treemanifest()
734 734 def _diff(t1, t2):
735 735 if t1._node == t2._node and not t1._dirty and not t2._dirty:
736 736 return
737 737 for d, m1 in t1._dirs.iteritems():
738 738 m2 = t2._dirs.get(d, emptytree)
739 739 _diff(m1, m2)
740 740
741 741 for d, m2 in t2._dirs.iteritems():
742 742 if d not in t1._dirs:
743 743 _diff(emptytree, m2)
744 744
745 745 for fn, n1 in t1._files.iteritems():
746 746 fl1 = t1._flags.get(fn, '')
747 747 n2 = t2._files.get(fn, None)
748 748 fl2 = t2._flags.get(fn, '')
749 749 if n1 != n2 or fl1 != fl2:
750 750 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
751 751 elif clean:
752 752 result[t1._subpath(fn)] = None
753 753
754 754 for fn, n2 in t2._files.iteritems():
755 755 if fn not in t1._files:
756 756 fl2 = t2._flags.get(fn, '')
757 757 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
758 758
759 759 _diff(self, m2)
760 760 return result
761 761
762 def unmodifiedsince(self, m2):
763 return not self._dirty and not m2._dirty and self._node == m2._node
764
762 765 def parse(self, text, readsubtree):
763 766 for f, n, fl in _parse(text):
764 767 if fl == 'd':
765 768 f = f + '/'
766 769 self._dirs[f] = readsubtree(self._subpath(f), n)
767 770 elif '/' in f:
768 771 # This is a flat manifest, so use __setitem__ and setflag rather
769 772 # than assigning directly to _files and _flags, so we can
770 773 # assign a path in a subdirectory, and to mark dirty (compared
771 774 # to nullid).
772 775 self[f] = n
773 776 if fl:
774 777 self.setflag(f, fl)
775 778 else:
776 779 # Assigning to _files and _flags avoids marking as dirty,
777 780 # and should be a little faster.
778 781 self._files[f] = n
779 782 if fl:
780 783 self._flags[f] = fl
781 784
782 785 def text(self, usemanifestv2=False):
783 786 """Get the full data of this manifest as a bytestring."""
784 787 flags = self.flags
785 788 return _text(((f, self[f], flags(f)) for f in self.keys()),
786 789 usemanifestv2)
787 790
788 791 def dirtext(self, usemanifestv2=False):
789 792 """Get the full data of this directory as a bytestring. Make sure that
790 793 any submanifests have been written first, so their nodeids are correct.
791 794 """
792 795 flags = self.flags
793 796 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
794 797 files = [(f, self._files[f], flags(f)) for f in self._files]
795 798 return _text(sorted(dirs + files), usemanifestv2)
796 799
797 800 def writesubtrees(self, m1, m2, writesubtree):
798 801 emptytree = treemanifest()
799 802 for d, subm in self._dirs.iteritems():
800 803 subp1 = m1._dirs.get(d, emptytree)._node
801 804 subp2 = m2._dirs.get(d, emptytree)._node
802 805 if subp1 == revlog.nullid:
803 806 subp1, subp2 = subp2, subp1
804 807 writesubtree(subm, subp1, subp2)
805 808
806 809 class manifest(revlog.revlog):
807 810 def __init__(self, opener, dir='', dirlogcache=None):
808 811 '''The 'dir' and 'dirlogcache' arguments are for internal use by
809 812 manifest.manifest only. External users should create a root manifest
810 813 log with manifest.manifest(opener) and call dirlog() on it.
811 814 '''
812 815 # During normal operations, we expect to deal with not more than four
813 816 # revs at a time (such as during commit --amend). When rebasing large
814 817 # stacks of commits, the number can go up, hence the config knob below.
815 818 cachesize = 4
816 819 usetreemanifest = False
817 820 usemanifestv2 = False
818 821 opts = getattr(opener, 'options', None)
819 822 if opts is not None:
820 823 cachesize = opts.get('manifestcachesize', cachesize)
821 824 usetreemanifest = opts.get('treemanifest', usetreemanifest)
822 825 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
823 826 self._mancache = util.lrucachedict(cachesize)
824 827 self._treeinmem = usetreemanifest
825 828 self._treeondisk = usetreemanifest
826 829 self._usemanifestv2 = usemanifestv2
827 830 indexfile = "00manifest.i"
828 831 if dir:
829 832 assert self._treeondisk
830 833 if not dir.endswith('/'):
831 834 dir = dir + '/'
832 835 indexfile = "meta/" + dir + "00manifest.i"
833 836 revlog.revlog.__init__(self, opener, indexfile)
834 837 self._dir = dir
835 838 # The dirlogcache is kept on the root manifest log
836 839 if dir:
837 840 self._dirlogcache = dirlogcache
838 841 else:
839 842 self._dirlogcache = {'': self}
840 843
841 844 def _newmanifest(self, data=''):
842 845 if self._treeinmem:
843 846 return treemanifest(self._dir, data)
844 847 return manifestdict(data)
845 848
846 849 def dirlog(self, dir):
847 850 assert self._treeondisk
848 851 if dir not in self._dirlogcache:
849 852 self._dirlogcache[dir] = manifest(self.opener, dir,
850 853 self._dirlogcache)
851 854 return self._dirlogcache[dir]
852 855
853 856 def _slowreaddelta(self, node):
854 857 r0 = self.deltaparent(self.rev(node))
855 858 m0 = self.read(self.node(r0))
856 859 m1 = self.read(node)
857 860 md = self._newmanifest()
858 861 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
859 862 if n1:
860 863 md[f] = n1
861 864 if fl1:
862 865 md.setflag(f, fl1)
863 866 return md
864 867
865 868 def readdelta(self, node):
866 869 if self._usemanifestv2 or self._treeondisk:
867 870 return self._slowreaddelta(node)
868 871 r = self.rev(node)
869 872 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
870 873 return self._newmanifest(d)
871 874
872 875 def readfast(self, node):
873 876 '''use the faster of readdelta or read
874 877
875 878 This will return a manifest which is either only the files
876 879 added/modified relative to p1, or all files in the
877 880 manifest. Which one is returned depends on the codepath used
878 881 to retrieve the data.
879 882 '''
880 883 r = self.rev(node)
881 884 deltaparent = self.deltaparent(r)
882 885 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
883 886 return self.readdelta(node)
884 887 return self.read(node)
885 888
886 889 def read(self, node):
887 890 if node == revlog.nullid:
888 891 return self._newmanifest() # don't upset local cache
889 892 if node in self._mancache:
890 893 return self._mancache[node][0]
891 894 text = self.revision(node)
892 895 if self._treeondisk:
893 896 def readsubtree(dir, subm):
894 897 return self.dirlog(dir).read(subm)
895 898 m = self._newmanifest()
896 899 m.parse(text, readsubtree)
897 900 m.setnode(node)
898 901 arraytext = None
899 902 else:
900 903 m = self._newmanifest(text)
901 904 arraytext = array.array('c', text)
902 905 self._mancache[node] = (m, arraytext)
903 906 return m
904 907
905 908 def find(self, node, f):
906 909 '''look up entry for a single file efficiently.
907 910 return (node, flags) pair if found, (None, None) if not.'''
908 911 m = self.read(node)
909 912 try:
910 913 return m.find(f)
911 914 except KeyError:
912 915 return None, None
913 916
914 917 def add(self, m, transaction, link, p1, p2, added, removed):
915 918 if (p1 in self._mancache and not self._treeinmem
916 919 and not self._usemanifestv2):
917 920 # If our first parent is in the manifest cache, we can
918 921 # compute a delta here using properties we know about the
919 922 # manifest up-front, which may save time later for the
920 923 # revlog layer.
921 924
922 925 _checkforbidden(added)
923 926 # combine the changed lists into one list for sorting
924 927 work = [(x, False) for x in added]
925 928 work.extend((x, True) for x in removed)
926 929 # this could use heapq.merge() (from Python 2.6+) or equivalent
927 930 # since the lists are already sorted
928 931 work.sort()
929 932
930 933 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
931 934 cachedelta = self.rev(p1), deltatext
932 935 text = util.buffer(arraytext)
933 936 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
934 937 else:
935 938 # The first parent manifest isn't already loaded, so we'll
936 939 # just encode a fulltext of the manifest and pass that
937 940 # through to the revlog layer, and let it handle the delta
938 941 # process.
939 942 if self._treeondisk:
940 943 m1 = self.read(p1)
941 944 m2 = self.read(p2)
942 945 n = self._addtree(m, transaction, link, m1, m2)
943 946 arraytext = None
944 947 else:
945 948 text = m.text(self._usemanifestv2)
946 949 n = self.addrevision(text, transaction, link, p1, p2)
947 950 arraytext = array.array('c', text)
948 951
949 952 self._mancache[n] = (m, arraytext)
950 953
951 954 return n
952 955
953 956 def _addtree(self, m, transaction, link, m1, m2):
957 # If the manifest is unchanged compared to one parent,
958 # don't write a new revision
959 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
960 return m.node()
954 961 def writesubtree(subm, subp1, subp2):
955 962 sublog = self.dirlog(subm.dir())
956 963 sublog.add(subm, transaction, link, subp1, subp2, None, None)
957 964 m.writesubtrees(m1, m2, writesubtree)
958 965 text = m.dirtext(self._usemanifestv2)
959 # If the manifest is unchanged compared to one parent,
960 # don't write a new revision
966 # Double-check whether contents are unchanged to one parent
961 967 if text == m1.dirtext(self._usemanifestv2):
962 968 n = m1.node()
963 969 elif text == m2.dirtext(self._usemanifestv2):
964 970 n = m2.node()
965 971 else:
966 972 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
967 973 # Save nodeid so parent manifest can calculate its nodeid
968 974 m.setnode(n)
969 975 return n
General Comments 0
You need to be logged in to leave comments. Login now