##// END OF EJS Templates
treemanifest: rework lazy-copying code (issue4840)...
Augie Fackler -
r26402:05871262 default
parent child Browse files
Show More
@@ -1,1020 +1,1026 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11 import os
12 12 import heapq
13 13
14 14 propertycache = util.propertycache
15 15
16 16 def _parsev1(data):
17 17 # This method does a little bit of excessive-looking
18 18 # precondition checking. This is so that the behavior of this
19 19 # class exactly matches its C counterpart to try and help
20 20 # prevent surprise breakage for anyone that develops against
21 21 # the pure version.
22 22 if data and data[-1] != '\n':
23 23 raise ValueError('Manifest did not end in a newline.')
24 24 prev = None
25 25 for l in data.splitlines():
26 26 if prev is not None and prev > l:
27 27 raise ValueError('Manifest lines not in sorted order.')
28 28 prev = l
29 29 f, n = l.split('\0')
30 30 if len(n) > 40:
31 31 yield f, revlog.bin(n[:40]), n[40:]
32 32 else:
33 33 yield f, revlog.bin(n), ''
34 34
35 35 def _parsev2(data):
36 36 metadataend = data.find('\n')
37 37 # Just ignore metadata for now
38 38 pos = metadataend + 1
39 39 prevf = ''
40 40 while pos < len(data):
41 41 end = data.find('\n', pos + 1) # +1 to skip stem length byte
42 42 if end == -1:
43 43 raise ValueError('Manifest ended with incomplete file entry.')
44 44 stemlen = ord(data[pos])
45 45 items = data[pos + 1:end].split('\0')
46 46 f = prevf[:stemlen] + items[0]
47 47 if prevf > f:
48 48 raise ValueError('Manifest entries not in sorted order.')
49 49 fl = items[1]
50 50 # Just ignore metadata (items[2:] for now)
51 51 n = data[end + 1:end + 21]
52 52 yield f, n, fl
53 53 pos = end + 22
54 54 prevf = f
55 55
56 56 def _parse(data):
57 57 """Generates (path, node, flags) tuples from a manifest text"""
58 58 if data.startswith('\0'):
59 59 return iter(_parsev2(data))
60 60 else:
61 61 return iter(_parsev1(data))
62 62
63 63 def _text(it, usemanifestv2):
64 64 """Given an iterator over (path, node, flags) tuples, returns a manifest
65 65 text"""
66 66 if usemanifestv2:
67 67 return _textv2(it)
68 68 else:
69 69 return _textv1(it)
70 70
71 71 def _textv1(it):
72 72 files = []
73 73 lines = []
74 74 _hex = revlog.hex
75 75 for f, n, fl in it:
76 76 files.append(f)
77 77 # if this is changed to support newlines in filenames,
78 78 # be sure to check the templates/ dir again (especially *-raw.tmpl)
79 79 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
80 80
81 81 _checkforbidden(files)
82 82 return ''.join(lines)
83 83
84 84 def _textv2(it):
85 85 files = []
86 86 lines = ['\0\n']
87 87 prevf = ''
88 88 for f, n, fl in it:
89 89 files.append(f)
90 90 stem = os.path.commonprefix([prevf, f])
91 91 stemlen = min(len(stem), 255)
92 92 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
93 93 prevf = f
94 94 _checkforbidden(files)
95 95 return ''.join(lines)
96 96
97 97 class _lazymanifest(dict):
98 98 """This is the pure implementation of lazymanifest.
99 99
100 100 It has not been optimized *at all* and is not lazy.
101 101 """
102 102
103 103 def __init__(self, data):
104 104 dict.__init__(self)
105 105 for f, n, fl in _parse(data):
106 106 self[f] = n, fl
107 107
108 108 def __setitem__(self, k, v):
109 109 node, flag = v
110 110 assert node is not None
111 111 if len(node) > 21:
112 112 node = node[:21] # match c implementation behavior
113 113 dict.__setitem__(self, k, (node, flag))
114 114
115 115 def __iter__(self):
116 116 return iter(sorted(dict.keys(self)))
117 117
118 118 def iterkeys(self):
119 119 return iter(sorted(dict.keys(self)))
120 120
121 121 def iterentries(self):
122 122 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
123 123
124 124 def copy(self):
125 125 c = _lazymanifest('')
126 126 c.update(self)
127 127 return c
128 128
129 129 def diff(self, m2, clean=False):
130 130 '''Finds changes between the current manifest and m2.'''
131 131 diff = {}
132 132
133 133 for fn, e1 in self.iteritems():
134 134 if fn not in m2:
135 135 diff[fn] = e1, (None, '')
136 136 else:
137 137 e2 = m2[fn]
138 138 if e1 != e2:
139 139 diff[fn] = e1, e2
140 140 elif clean:
141 141 diff[fn] = None
142 142
143 143 for fn, e2 in m2.iteritems():
144 144 if fn not in self:
145 145 diff[fn] = (None, ''), e2
146 146
147 147 return diff
148 148
149 149 def filtercopy(self, filterfn):
150 150 c = _lazymanifest('')
151 151 for f, n, fl in self.iterentries():
152 152 if filterfn(f):
153 153 c[f] = n, fl
154 154 return c
155 155
156 156 def text(self):
157 157 """Get the full data of this manifest as a bytestring."""
158 158 return _textv1(self.iterentries())
159 159
160 160 try:
161 161 _lazymanifest = parsers.lazymanifest
162 162 except AttributeError:
163 163 pass
164 164
165 165 class manifestdict(object):
166 166 def __init__(self, data=''):
167 167 if data.startswith('\0'):
168 168 #_lazymanifest can not parse v2
169 169 self._lm = _lazymanifest('')
170 170 for f, n, fl in _parsev2(data):
171 171 self._lm[f] = n, fl
172 172 else:
173 173 self._lm = _lazymanifest(data)
174 174
175 175 def __getitem__(self, key):
176 176 return self._lm[key][0]
177 177
178 178 def find(self, key):
179 179 return self._lm[key]
180 180
181 181 def __len__(self):
182 182 return len(self._lm)
183 183
184 184 def __setitem__(self, key, node):
185 185 self._lm[key] = node, self.flags(key, '')
186 186
187 187 def __contains__(self, key):
188 188 return key in self._lm
189 189
190 190 def __delitem__(self, key):
191 191 del self._lm[key]
192 192
193 193 def __iter__(self):
194 194 return self._lm.__iter__()
195 195
196 196 def iterkeys(self):
197 197 return self._lm.iterkeys()
198 198
199 199 def keys(self):
200 200 return list(self.iterkeys())
201 201
202 202 def filesnotin(self, m2):
203 203 '''Set of files in this manifest that are not in the other'''
204 204 files = set(self)
205 205 files.difference_update(m2)
206 206 return files
207 207
208 208 @propertycache
209 209 def _dirs(self):
210 210 return util.dirs(self)
211 211
212 212 def dirs(self):
213 213 return self._dirs
214 214
215 215 def hasdir(self, dir):
216 216 return dir in self._dirs
217 217
218 218 def _filesfastpath(self, match):
219 219 '''Checks whether we can correctly and quickly iterate over matcher
220 220 files instead of over manifest files.'''
221 221 files = match.files()
222 222 return (len(files) < 100 and (match.isexact() or
223 223 (match.prefix() and all(fn in self for fn in files))))
224 224
225 225 def walk(self, match):
226 226 '''Generates matching file names.
227 227
228 228 Equivalent to manifest.matches(match).iterkeys(), but without creating
229 229 an entirely new manifest.
230 230
231 231 It also reports nonexistent files by marking them bad with match.bad().
232 232 '''
233 233 if match.always():
234 234 for f in iter(self):
235 235 yield f
236 236 return
237 237
238 238 fset = set(match.files())
239 239
240 240 # avoid the entire walk if we're only looking for specific files
241 241 if self._filesfastpath(match):
242 242 for fn in sorted(fset):
243 243 yield fn
244 244 return
245 245
246 246 for fn in self:
247 247 if fn in fset:
248 248 # specified pattern is the exact name
249 249 fset.remove(fn)
250 250 if match(fn):
251 251 yield fn
252 252
253 253 # for dirstate.walk, files=['.'] means "walk the whole tree".
254 254 # follow that here, too
255 255 fset.discard('.')
256 256
257 257 for fn in sorted(fset):
258 258 if not self.hasdir(fn):
259 259 match.bad(fn, None)
260 260
261 261 def matches(self, match):
262 262 '''generate a new manifest filtered by the match argument'''
263 263 if match.always():
264 264 return self.copy()
265 265
266 266 if self._filesfastpath(match):
267 267 m = manifestdict()
268 268 lm = self._lm
269 269 for fn in match.files():
270 270 if fn in lm:
271 271 m._lm[fn] = lm[fn]
272 272 return m
273 273
274 274 m = manifestdict()
275 275 m._lm = self._lm.filtercopy(match)
276 276 return m
277 277
278 278 def diff(self, m2, clean=False):
279 279 '''Finds changes between the current manifest and m2.
280 280
281 281 Args:
282 282 m2: the manifest to which this manifest should be compared.
283 283 clean: if true, include files unchanged between these manifests
284 284 with a None value in the returned dictionary.
285 285
286 286 The result is returned as a dict with filename as key and
287 287 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
288 288 nodeid in the current/other manifest and fl1/fl2 is the flag
289 289 in the current/other manifest. Where the file does not exist,
290 290 the nodeid will be None and the flags will be the empty
291 291 string.
292 292 '''
293 293 return self._lm.diff(m2._lm, clean)
294 294
295 295 def setflag(self, key, flag):
296 296 self._lm[key] = self[key], flag
297 297
298 298 def get(self, key, default=None):
299 299 try:
300 300 return self._lm[key][0]
301 301 except KeyError:
302 302 return default
303 303
304 304 def flags(self, key, default=''):
305 305 try:
306 306 return self._lm[key][1]
307 307 except KeyError:
308 308 return default
309 309
310 310 def copy(self):
311 311 c = manifestdict()
312 312 c._lm = self._lm.copy()
313 313 return c
314 314
315 315 def iteritems(self):
316 316 return (x[:2] for x in self._lm.iterentries())
317 317
318 318 def text(self, usemanifestv2=False):
319 319 if usemanifestv2:
320 320 return _textv2(self._lm.iterentries())
321 321 else:
322 322 # use (probably) native version for v1
323 323 return self._lm.text()
324 324
325 325 def fastdelta(self, base, changes):
326 326 """Given a base manifest text as an array.array and a list of changes
327 327 relative to that text, compute a delta that can be used by revlog.
328 328 """
329 329 delta = []
330 330 dstart = None
331 331 dend = None
332 332 dline = [""]
333 333 start = 0
334 334 # zero copy representation of base as a buffer
335 335 addbuf = util.buffer(base)
336 336
337 337 # start with a readonly loop that finds the offset of
338 338 # each line and creates the deltas
339 339 for f, todelete in changes:
340 340 # bs will either be the index of the item or the insert point
341 341 start, end = _msearch(addbuf, f, start)
342 342 if not todelete:
343 343 h, fl = self._lm[f]
344 344 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
345 345 else:
346 346 if start == end:
347 347 # item we want to delete was not found, error out
348 348 raise AssertionError(
349 349 _("failed to remove %s from manifest") % f)
350 350 l = ""
351 351 if dstart is not None and dstart <= start and dend >= start:
352 352 if dend < end:
353 353 dend = end
354 354 if l:
355 355 dline.append(l)
356 356 else:
357 357 if dstart is not None:
358 358 delta.append([dstart, dend, "".join(dline)])
359 359 dstart = start
360 360 dend = end
361 361 dline = [l]
362 362
363 363 if dstart is not None:
364 364 delta.append([dstart, dend, "".join(dline)])
365 365 # apply the delta to the base, and get a delta for addrevision
366 366 deltatext, arraytext = _addlistdelta(base, delta)
367 367 return arraytext, deltatext
368 368
369 369 def _msearch(m, s, lo=0, hi=None):
370 370 '''return a tuple (start, end) that says where to find s within m.
371 371
372 372 If the string is found m[start:end] are the line containing
373 373 that string. If start == end the string was not found and
374 374 they indicate the proper sorted insertion point.
375 375
376 376 m should be a buffer or a string
377 377 s is a string'''
378 378 def advance(i, c):
379 379 while i < lenm and m[i] != c:
380 380 i += 1
381 381 return i
382 382 if not s:
383 383 return (lo, lo)
384 384 lenm = len(m)
385 385 if not hi:
386 386 hi = lenm
387 387 while lo < hi:
388 388 mid = (lo + hi) // 2
389 389 start = mid
390 390 while start > 0 and m[start - 1] != '\n':
391 391 start -= 1
392 392 end = advance(start, '\0')
393 393 if m[start:end] < s:
394 394 # we know that after the null there are 40 bytes of sha1
395 395 # this translates to the bisect lo = mid + 1
396 396 lo = advance(end + 40, '\n') + 1
397 397 else:
398 398 # this translates to the bisect hi = mid
399 399 hi = start
400 400 end = advance(lo, '\0')
401 401 found = m[lo:end]
402 402 if s == found:
403 403 # we know that after the null there are 40 bytes of sha1
404 404 end = advance(end + 40, '\n')
405 405 return (lo, end + 1)
406 406 else:
407 407 return (lo, lo)
408 408
409 409 def _checkforbidden(l):
410 410 """Check filenames for illegal characters."""
411 411 for f in l:
412 412 if '\n' in f or '\r' in f:
413 413 raise error.RevlogError(
414 414 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
415 415
416 416
417 417 # apply the changes collected during the bisect loop to our addlist
418 418 # return a delta suitable for addrevision
419 419 def _addlistdelta(addlist, x):
420 420 # for large addlist arrays, building a new array is cheaper
421 421 # than repeatedly modifying the existing one
422 422 currentposition = 0
423 423 newaddlist = array.array('c')
424 424
425 425 for start, end, content in x:
426 426 newaddlist += addlist[currentposition:start]
427 427 if content:
428 428 newaddlist += array.array('c', content)
429 429
430 430 currentposition = end
431 431
432 432 newaddlist += addlist[currentposition:]
433 433
434 434 deltatext = "".join(struct.pack(">lll", start, end, len(content))
435 435 + content for start, end, content in x)
436 436 return deltatext, newaddlist
437 437
438 438 def _splittopdir(f):
439 439 if '/' in f:
440 440 dir, subpath = f.split('/', 1)
441 441 return dir + '/', subpath
442 442 else:
443 443 return '', f
444 444
445 _noop = lambda: None
445 _noop = lambda s: None
446 446
447 447 class treemanifest(object):
448 448 def __init__(self, dir='', text=''):
449 449 self._dir = dir
450 450 self._node = revlog.nullid
451 self._load = _noop
451 self._loadfunc = _noop
452 self._copyfunc = _noop
452 453 self._dirty = False
453 454 self._dirs = {}
454 455 # Using _lazymanifest here is a little slower than plain old dicts
455 456 self._files = {}
456 457 self._flags = {}
457 458 if text:
458 459 def readsubtree(subdir, subm):
459 460 raise AssertionError('treemanifest constructor only accepts '
460 461 'flat manifests')
461 462 self.parse(text, readsubtree)
462 463 self._dirty = True # Mark flat manifest dirty after parsing
463 464
464 465 def _subpath(self, path):
465 466 return self._dir + path
466 467
467 468 def __len__(self):
468 469 self._load()
469 470 size = len(self._files)
470 471 for m in self._dirs.values():
471 472 size += m.__len__()
472 473 return size
473 474
474 475 def _isempty(self):
475 476 self._load() # for consistency; already loaded by all callers
476 477 return (not self._files and (not self._dirs or
477 478 all(m._isempty() for m in self._dirs.values())))
478 479
479 480 def __repr__(self):
480 481 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
481 482 (self._dir, revlog.hex(self._node),
482 bool(self._load is _noop),
483 bool(self._loadfunc is _noop),
483 484 self._dirty, id(self)))
484 485
485 486 def dir(self):
486 487 '''The directory that this tree manifest represents, including a
487 488 trailing '/'. Empty string for the repo root directory.'''
488 489 return self._dir
489 490
490 491 def node(self):
491 492 '''This node of this instance. nullid for unsaved instances. Should
492 493 be updated when the instance is read or written from a revlog.
493 494 '''
494 495 assert not self._dirty
495 496 return self._node
496 497
497 498 def setnode(self, node):
498 499 self._node = node
499 500 self._dirty = False
500 501
501 502 def iteritems(self):
502 503 self._load()
503 504 for p, n in sorted(self._dirs.items() + self._files.items()):
504 505 if p in self._files:
505 506 yield self._subpath(p), n
506 507 else:
507 508 for f, sn in n.iteritems():
508 509 yield f, sn
509 510
510 511 def iterkeys(self):
511 512 self._load()
512 513 for p in sorted(self._dirs.keys() + self._files.keys()):
513 514 if p in self._files:
514 515 yield self._subpath(p)
515 516 else:
516 517 for f in self._dirs[p].iterkeys():
517 518 yield f
518 519
519 520 def keys(self):
520 521 return list(self.iterkeys())
521 522
522 523 def __iter__(self):
523 524 return self.iterkeys()
524 525
525 526 def __contains__(self, f):
526 527 if f is None:
527 528 return False
528 529 self._load()
529 530 dir, subpath = _splittopdir(f)
530 531 if dir:
531 532 if dir not in self._dirs:
532 533 return False
533 534 return self._dirs[dir].__contains__(subpath)
534 535 else:
535 536 return f in self._files
536 537
537 538 def get(self, f, default=None):
538 539 self._load()
539 540 dir, subpath = _splittopdir(f)
540 541 if dir:
541 542 if dir not in self._dirs:
542 543 return default
543 544 return self._dirs[dir].get(subpath, default)
544 545 else:
545 546 return self._files.get(f, default)
546 547
547 548 def __getitem__(self, f):
548 549 self._load()
549 550 dir, subpath = _splittopdir(f)
550 551 if dir:
551 552 return self._dirs[dir].__getitem__(subpath)
552 553 else:
553 554 return self._files[f]
554 555
555 556 def flags(self, f):
556 557 self._load()
557 558 dir, subpath = _splittopdir(f)
558 559 if dir:
559 560 if dir not in self._dirs:
560 561 return ''
561 562 return self._dirs[dir].flags(subpath)
562 563 else:
563 564 if f in self._dirs:
564 565 return ''
565 566 return self._flags.get(f, '')
566 567
567 568 def find(self, f):
568 569 self._load()
569 570 dir, subpath = _splittopdir(f)
570 571 if dir:
571 572 return self._dirs[dir].find(subpath)
572 573 else:
573 574 return self._files[f], self._flags.get(f, '')
574 575
575 576 def __delitem__(self, f):
576 577 self._load()
577 578 dir, subpath = _splittopdir(f)
578 579 if dir:
579 580 self._dirs[dir].__delitem__(subpath)
580 581 # If the directory is now empty, remove it
581 582 if self._dirs[dir]._isempty():
582 583 del self._dirs[dir]
583 584 else:
584 585 del self._files[f]
585 586 if f in self._flags:
586 587 del self._flags[f]
587 588 self._dirty = True
588 589
589 590 def __setitem__(self, f, n):
590 591 assert n is not None
591 592 self._load()
592 593 dir, subpath = _splittopdir(f)
593 594 if dir:
594 595 if dir not in self._dirs:
595 596 self._dirs[dir] = treemanifest(self._subpath(dir))
596 597 self._dirs[dir].__setitem__(subpath, n)
597 598 else:
598 599 self._files[f] = n[:21] # to match manifestdict's behavior
599 600 self._dirty = True
600 601
602 def _load(self):
603 if self._loadfunc is not _noop:
604 lf, self._loadfunc = self._loadfunc, _noop
605 lf(self)
606 elif self._copyfunc is not _noop:
607 cf, self._copyfunc = self._copyfunc, _noop
608 cf(self)
609
601 610 def setflag(self, f, flags):
602 611 """Set the flags (symlink, executable) for path f."""
603 612 assert 'd' not in flags
604 613 self._load()
605 614 dir, subpath = _splittopdir(f)
606 615 if dir:
607 616 if dir not in self._dirs:
608 617 self._dirs[dir] = treemanifest(self._subpath(dir))
609 618 self._dirs[dir].setflag(subpath, flags)
610 619 else:
611 620 self._flags[f] = flags
612 621 self._dirty = True
613 622
614 623 def copy(self):
615 624 copy = treemanifest(self._dir)
616 625 copy._node = self._node
617 626 copy._dirty = self._dirty
618 def _load_for_copy():
619 self._load()
620 for d in self._dirs:
621 copy._dirs[d] = self._dirs[d].copy()
622 copy._files = dict.copy(self._files)
623 copy._flags = dict.copy(self._flags)
624 copy._load = _noop
625 copy._load = _load_for_copy
626 if self._load == _noop:
627 # Chaining _load if it's _noop is functionally correct, but the
628 # chain may end up excessively long (stack overflow), and
629 # will prevent garbage collection of 'self'.
630 copy._load()
627 if self._copyfunc is _noop:
628 def _copyfunc(s):
629 self._load()
630 for d in self._dirs:
631 s._dirs[d] = self._dirs[d].copy()
632 s._files = dict.copy(self._files)
633 s._flags = dict.copy(self._flags)
634 if self._loadfunc is _noop:
635 _copyfunc(copy)
636 else:
637 copy._copyfunc = _copyfunc
638 else:
639 copy._copyfunc = self._copyfunc
631 640 return copy
632 641
633 642 def filesnotin(self, m2):
634 643 '''Set of files in this manifest that are not in the other'''
635 644 files = set()
636 645 def _filesnotin(t1, t2):
637 646 if t1._node == t2._node and not t1._dirty and not t2._dirty:
638 647 return
639 648 t1._load()
640 649 t2._load()
641 650 for d, m1 in t1._dirs.iteritems():
642 651 if d in t2._dirs:
643 652 m2 = t2._dirs[d]
644 653 _filesnotin(m1, m2)
645 654 else:
646 655 files.update(m1.iterkeys())
647 656
648 657 for fn in t1._files.iterkeys():
649 658 if fn not in t2._files:
650 659 files.add(t1._subpath(fn))
651 660
652 661 _filesnotin(self, m2)
653 662 return files
654 663
655 664 @propertycache
656 665 def _alldirs(self):
657 666 return util.dirs(self)
658 667
659 668 def dirs(self):
660 669 return self._alldirs
661 670
662 671 def hasdir(self, dir):
663 672 self._load()
664 673 topdir, subdir = _splittopdir(dir)
665 674 if topdir:
666 675 if topdir in self._dirs:
667 676 return self._dirs[topdir].hasdir(subdir)
668 677 return False
669 678 return (dir + '/') in self._dirs
670 679
671 680 def walk(self, match):
672 681 '''Generates matching file names.
673 682
674 683 Equivalent to manifest.matches(match).iterkeys(), but without creating
675 684 an entirely new manifest.
676 685
677 686 It also reports nonexistent files by marking them bad with match.bad().
678 687 '''
679 688 if match.always():
680 689 for f in iter(self):
681 690 yield f
682 691 return
683 692
684 693 fset = set(match.files())
685 694
686 695 for fn in self._walk(match):
687 696 if fn in fset:
688 697 # specified pattern is the exact name
689 698 fset.remove(fn)
690 699 yield fn
691 700
692 701 # for dirstate.walk, files=['.'] means "walk the whole tree".
693 702 # follow that here, too
694 703 fset.discard('.')
695 704
696 705 for fn in sorted(fset):
697 706 if not self.hasdir(fn):
698 707 match.bad(fn, None)
699 708
700 709 def _walk(self, match):
701 710 '''Recursively generates matching file names for walk().'''
702 711 if not match.visitdir(self._dir[:-1] or '.'):
703 712 return
704 713
705 714 # yield this dir's files and walk its submanifests
706 715 self._load()
707 716 for p in sorted(self._dirs.keys() + self._files.keys()):
708 717 if p in self._files:
709 718 fullp = self._subpath(p)
710 719 if match(fullp):
711 720 yield fullp
712 721 else:
713 722 for f in self._dirs[p]._walk(match):
714 723 yield f
715 724
716 725 def matches(self, match):
717 726 '''generate a new manifest filtered by the match argument'''
718 727 if match.always():
719 728 return self.copy()
720 729
721 730 return self._matches(match)
722 731
723 732 def _matches(self, match):
724 733 '''recursively generate a new manifest filtered by the match argument.
725 734 '''
726 735 ret = treemanifest(self._dir)
727 736
728 737 if not match.visitdir(self._dir[:-1] or '.'):
729 738 return ret
730 739
731 740 self._load()
732 741 for fn in self._files:
733 742 fullp = self._subpath(fn)
734 743 if not match(fullp):
735 744 continue
736 745 ret._files[fn] = self._files[fn]
737 746 if fn in self._flags:
738 747 ret._flags[fn] = self._flags[fn]
739 748
740 749 for dir, subm in self._dirs.iteritems():
741 750 m = subm._matches(match)
742 751 if not m._isempty():
743 752 ret._dirs[dir] = m
744 753
745 754 if not ret._isempty():
746 755 ret._dirty = True
747 756 return ret
748 757
749 758 def diff(self, m2, clean=False):
750 759 '''Finds changes between the current manifest and m2.
751 760
752 761 Args:
753 762 m2: the manifest to which this manifest should be compared.
754 763 clean: if true, include files unchanged between these manifests
755 764 with a None value in the returned dictionary.
756 765
757 766 The result is returned as a dict with filename as key and
758 767 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
759 768 nodeid in the current/other manifest and fl1/fl2 is the flag
760 769 in the current/other manifest. Where the file does not exist,
761 770 the nodeid will be None and the flags will be the empty
762 771 string.
763 772 '''
764 773 result = {}
765 774 emptytree = treemanifest()
766 775 def _diff(t1, t2):
767 776 if t1._node == t2._node and not t1._dirty and not t2._dirty:
768 777 return
769 778 t1._load()
770 779 t2._load()
771 780 for d, m1 in t1._dirs.iteritems():
772 781 m2 = t2._dirs.get(d, emptytree)
773 782 _diff(m1, m2)
774 783
775 784 for d, m2 in t2._dirs.iteritems():
776 785 if d not in t1._dirs:
777 786 _diff(emptytree, m2)
778 787
779 788 for fn, n1 in t1._files.iteritems():
780 789 fl1 = t1._flags.get(fn, '')
781 790 n2 = t2._files.get(fn, None)
782 791 fl2 = t2._flags.get(fn, '')
783 792 if n1 != n2 or fl1 != fl2:
784 793 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
785 794 elif clean:
786 795 result[t1._subpath(fn)] = None
787 796
788 797 for fn, n2 in t2._files.iteritems():
789 798 if fn not in t1._files:
790 799 fl2 = t2._flags.get(fn, '')
791 800 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
792 801
793 802 _diff(self, m2)
794 803 return result
795 804
796 805 def unmodifiedsince(self, m2):
797 806 return not self._dirty and not m2._dirty and self._node == m2._node
798 807
799 808 def parse(self, text, readsubtree):
800 809 for f, n, fl in _parse(text):
801 810 if fl == 'd':
802 811 f = f + '/'
803 812 self._dirs[f] = readsubtree(self._subpath(f), n)
804 813 elif '/' in f:
805 814 # This is a flat manifest, so use __setitem__ and setflag rather
806 815 # than assigning directly to _files and _flags, so we can
807 816 # assign a path in a subdirectory, and to mark dirty (compared
808 817 # to nullid).
809 818 self[f] = n
810 819 if fl:
811 820 self.setflag(f, fl)
812 821 else:
813 822 # Assigning to _files and _flags avoids marking as dirty,
814 823 # and should be a little faster.
815 824 self._files[f] = n
816 825 if fl:
817 826 self._flags[f] = fl
818 827
819 828 def text(self, usemanifestv2=False):
820 829 """Get the full data of this manifest as a bytestring."""
821 830 self._load()
822 831 flags = self.flags
823 832 return _text(((f, self[f], flags(f)) for f in self.keys()),
824 833 usemanifestv2)
825 834
826 835 def dirtext(self, usemanifestv2=False):
827 836 """Get the full data of this directory as a bytestring. Make sure that
828 837 any submanifests have been written first, so their nodeids are correct.
829 838 """
830 839 self._load()
831 840 flags = self.flags
832 841 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
833 842 files = [(f, self._files[f], flags(f)) for f in self._files]
834 843 return _text(sorted(dirs + files), usemanifestv2)
835 844
836 845 def read(self, gettext, readsubtree):
837 def _load_for_read():
838 # Mark as loaded already here, so __setitem__ and setflag() don't
839 # cause infinite loops when they try to load.
840 self._load = _noop
841 self.parse(gettext(), readsubtree)
842 self._dirty = False
843 self._load = _load_for_read
846 def _load_for_read(s):
847 s.parse(gettext(), readsubtree)
848 s._dirty = False
849 self._loadfunc = _load_for_read
844 850
845 851 def writesubtrees(self, m1, m2, writesubtree):
846 852 self._load() # for consistency; should never have any effect here
847 853 emptytree = treemanifest()
848 854 for d, subm in self._dirs.iteritems():
849 855 subp1 = m1._dirs.get(d, emptytree)._node
850 856 subp2 = m2._dirs.get(d, emptytree)._node
851 857 if subp1 == revlog.nullid:
852 858 subp1, subp2 = subp2, subp1
853 859 writesubtree(subm, subp1, subp2)
854 860
855 861 class manifest(revlog.revlog):
856 862 def __init__(self, opener, dir='', dirlogcache=None):
857 863 '''The 'dir' and 'dirlogcache' arguments are for internal use by
858 864 manifest.manifest only. External users should create a root manifest
859 865 log with manifest.manifest(opener) and call dirlog() on it.
860 866 '''
861 867 # During normal operations, we expect to deal with not more than four
862 868 # revs at a time (such as during commit --amend). When rebasing large
863 869 # stacks of commits, the number can go up, hence the config knob below.
864 870 cachesize = 4
865 871 usetreemanifest = False
866 872 usemanifestv2 = False
867 873 opts = getattr(opener, 'options', None)
868 874 if opts is not None:
869 875 cachesize = opts.get('manifestcachesize', cachesize)
870 876 usetreemanifest = opts.get('treemanifest', usetreemanifest)
871 877 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
872 878 self._mancache = util.lrucachedict(cachesize)
873 879 self._treeinmem = usetreemanifest
874 880 self._treeondisk = usetreemanifest
875 881 self._usemanifestv2 = usemanifestv2
876 882 indexfile = "00manifest.i"
877 883 if dir:
878 884 assert self._treeondisk
879 885 if not dir.endswith('/'):
880 886 dir = dir + '/'
881 887 indexfile = "meta/" + dir + "00manifest.i"
882 888 revlog.revlog.__init__(self, opener, indexfile)
883 889 self._dir = dir
884 890 # The dirlogcache is kept on the root manifest log
885 891 if dir:
886 892 self._dirlogcache = dirlogcache
887 893 else:
888 894 self._dirlogcache = {'': self}
889 895
890 896 def _newmanifest(self, data=''):
891 897 if self._treeinmem:
892 898 return treemanifest(self._dir, data)
893 899 return manifestdict(data)
894 900
895 901 def dirlog(self, dir):
896 902 assert self._treeondisk
897 903 if dir not in self._dirlogcache:
898 904 self._dirlogcache[dir] = manifest(self.opener, dir,
899 905 self._dirlogcache)
900 906 return self._dirlogcache[dir]
901 907
902 908 def _slowreaddelta(self, node):
903 909 r0 = self.deltaparent(self.rev(node))
904 910 m0 = self.read(self.node(r0))
905 911 m1 = self.read(node)
906 912 md = self._newmanifest()
907 913 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
908 914 if n1:
909 915 md[f] = n1
910 916 if fl1:
911 917 md.setflag(f, fl1)
912 918 return md
913 919
914 920 def readdelta(self, node):
915 921 if self._usemanifestv2 or self._treeondisk:
916 922 return self._slowreaddelta(node)
917 923 r = self.rev(node)
918 924 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
919 925 return self._newmanifest(d)
920 926
921 927 def readfast(self, node):
922 928 '''use the faster of readdelta or read
923 929
924 930 This will return a manifest which is either only the files
925 931 added/modified relative to p1, or all files in the
926 932 manifest. Which one is returned depends on the codepath used
927 933 to retrieve the data.
928 934 '''
929 935 r = self.rev(node)
930 936 deltaparent = self.deltaparent(r)
931 937 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
932 938 return self.readdelta(node)
933 939 return self.read(node)
934 940
935 941 def read(self, node):
936 942 if node == revlog.nullid:
937 943 return self._newmanifest() # don't upset local cache
938 944 if node in self._mancache:
939 945 return self._mancache[node][0]
940 946 if self._treeondisk:
941 947 def gettext():
942 948 return self.revision(node)
943 949 def readsubtree(dir, subm):
944 950 return self.dirlog(dir).read(subm)
945 951 m = self._newmanifest()
946 952 m.read(gettext, readsubtree)
947 953 m.setnode(node)
948 954 arraytext = None
949 955 else:
950 956 text = self.revision(node)
951 957 m = self._newmanifest(text)
952 958 arraytext = array.array('c', text)
953 959 self._mancache[node] = (m, arraytext)
954 960 return m
955 961
956 962 def find(self, node, f):
957 963 '''look up entry for a single file efficiently.
958 964 return (node, flags) pair if found, (None, None) if not.'''
959 965 m = self.read(node)
960 966 try:
961 967 return m.find(f)
962 968 except KeyError:
963 969 return None, None
964 970
965 971 def add(self, m, transaction, link, p1, p2, added, removed):
966 972 if (p1 in self._mancache and not self._treeinmem
967 973 and not self._usemanifestv2):
968 974 # If our first parent is in the manifest cache, we can
969 975 # compute a delta here using properties we know about the
970 976 # manifest up-front, which may save time later for the
971 977 # revlog layer.
972 978
973 979 _checkforbidden(added)
974 980 # combine the changed lists into one sorted iterator
975 981 work = heapq.merge([(x, False) for x in added],
976 982 [(x, True) for x in removed])
977 983
978 984 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
979 985 cachedelta = self.rev(p1), deltatext
980 986 text = util.buffer(arraytext)
981 987 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
982 988 else:
983 989 # The first parent manifest isn't already loaded, so we'll
984 990 # just encode a fulltext of the manifest and pass that
985 991 # through to the revlog layer, and let it handle the delta
986 992 # process.
987 993 if self._treeondisk:
988 994 m1 = self.read(p1)
989 995 m2 = self.read(p2)
990 996 n = self._addtree(m, transaction, link, m1, m2)
991 997 arraytext = None
992 998 else:
993 999 text = m.text(self._usemanifestv2)
994 1000 n = self.addrevision(text, transaction, link, p1, p2)
995 1001 arraytext = array.array('c', text)
996 1002
997 1003 self._mancache[n] = (m, arraytext)
998 1004
999 1005 return n
1000 1006
1001 1007 def _addtree(self, m, transaction, link, m1, m2):
1002 1008 # If the manifest is unchanged compared to one parent,
1003 1009 # don't write a new revision
1004 1010 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
1005 1011 return m.node()
1006 1012 def writesubtree(subm, subp1, subp2):
1007 1013 sublog = self.dirlog(subm.dir())
1008 1014 sublog.add(subm, transaction, link, subp1, subp2, None, None)
1009 1015 m.writesubtrees(m1, m2, writesubtree)
1010 1016 text = m.dirtext(self._usemanifestv2)
1011 1017 # Double-check whether contents are unchanged to one parent
1012 1018 if text == m1.dirtext(self._usemanifestv2):
1013 1019 n = m1.node()
1014 1020 elif text == m2.dirtext(self._usemanifestv2):
1015 1021 n = m2.node()
1016 1022 else:
1017 1023 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1018 1024 # Save nodeid so parent manifest can calculate its nodeid
1019 1025 m.setnode(n)
1020 1026 return n
General Comments 0
You need to be logged in to leave comments. Login now