##// END OF EJS Templates
manifestv2: implement slow readdelta() without revdiff...
Martin von Zweigbergk -
r24528:b538ae24 default
parent child Browse files
Show More
@@ -1,680 +1,694
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _parse(data):
15 15 """Generates (path, node, flags) tuples from a manifest text"""
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 34 def _text(it):
35 35 """Given an iterator over (path, node, flags) tuples, returns a manifest
36 36 text"""
37 37 files = []
38 38 lines = []
39 39 _hex = revlog.hex
40 40 for f, n, fl in it:
41 41 files.append(f)
42 42 # if this is changed to support newlines in filenames,
43 43 # be sure to check the templates/ dir again (especially *-raw.tmpl)
44 44 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
45 45
46 46 _checkforbidden(files)
47 47 return ''.join(lines)
48 48
49 49 class _lazymanifest(dict):
50 50 """This is the pure implementation of lazymanifest.
51 51
52 52 It has not been optimized *at all* and is not lazy.
53 53 """
54 54
55 55 def __init__(self, data):
56 56 dict.__init__(self)
57 57 for f, n, fl in _parse(data):
58 58 self[f] = n, fl
59 59
60 60 def __setitem__(self, k, v):
61 61 node, flag = v
62 62 assert node is not None
63 63 if len(node) > 21:
64 64 node = node[:21] # match c implementation behavior
65 65 dict.__setitem__(self, k, (node, flag))
66 66
67 67 def __iter__(self):
68 68 return iter(sorted(dict.keys(self)))
69 69
70 70 def iterkeys(self):
71 71 return iter(sorted(dict.keys(self)))
72 72
73 73 def iterentries(self):
74 74 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
75 75
76 76 def copy(self):
77 77 c = _lazymanifest('')
78 78 c.update(self)
79 79 return c
80 80
81 81 def diff(self, m2, clean=False):
82 82 '''Finds changes between the current manifest and m2.'''
83 83 diff = {}
84 84
85 85 for fn, e1 in self.iteritems():
86 86 if fn not in m2:
87 87 diff[fn] = e1, (None, '')
88 88 else:
89 89 e2 = m2[fn]
90 90 if e1 != e2:
91 91 diff[fn] = e1, e2
92 92 elif clean:
93 93 diff[fn] = None
94 94
95 95 for fn, e2 in m2.iteritems():
96 96 if fn not in self:
97 97 diff[fn] = (None, ''), e2
98 98
99 99 return diff
100 100
101 101 def filtercopy(self, filterfn):
102 102 c = _lazymanifest('')
103 103 for f, n, fl in self.iterentries():
104 104 if filterfn(f):
105 105 c[f] = n, fl
106 106 return c
107 107
108 108 def text(self):
109 109 """Get the full data of this manifest as a bytestring."""
110 110 return _text(self.iterentries())
111 111
112 112 try:
113 113 _lazymanifest = parsers.lazymanifest
114 114 except AttributeError:
115 115 pass
116 116
117 117 class manifestdict(object):
118 118 def __init__(self, data=''):
119 119 self._lm = _lazymanifest(data)
120 120
121 121 def __getitem__(self, key):
122 122 return self._lm[key][0]
123 123
124 124 def find(self, key):
125 125 return self._lm[key]
126 126
127 127 def __len__(self):
128 128 return len(self._lm)
129 129
130 130 def __setitem__(self, key, node):
131 131 self._lm[key] = node, self.flags(key, '')
132 132
133 133 def __contains__(self, key):
134 134 return key in self._lm
135 135
136 136 def __delitem__(self, key):
137 137 del self._lm[key]
138 138
139 139 def __iter__(self):
140 140 return self._lm.__iter__()
141 141
142 142 def iterkeys(self):
143 143 return self._lm.iterkeys()
144 144
145 145 def keys(self):
146 146 return list(self.iterkeys())
147 147
148 148 def _intersectfiles(self, files):
149 149 '''make a new lazymanifest with the intersection of self with files
150 150
151 151 The algorithm assumes that files is much smaller than self.'''
152 152 ret = manifestdict()
153 153 lm = self._lm
154 154 for fn in files:
155 155 if fn in lm:
156 156 ret._lm[fn] = self._lm[fn]
157 157 return ret
158 158
159 159 def filesnotin(self, m2):
160 160 '''Set of files in this manifest that are not in the other'''
161 161 files = set(self)
162 162 files.difference_update(m2)
163 163 return files
164 164
165 165 @propertycache
166 166 def _dirs(self):
167 167 return scmutil.dirs(self)
168 168
169 169 def dirs(self):
170 170 return self._dirs
171 171
172 172 def hasdir(self, dir):
173 173 return dir in self._dirs
174 174
175 175 def matches(self, match):
176 176 '''generate a new manifest filtered by the match argument'''
177 177 if match.always():
178 178 return self.copy()
179 179
180 180 files = match.files()
181 181 if (len(files) < 100 and (match.isexact() or
182 182 (not match.anypats() and util.all(fn in self for fn in files)))):
183 183 return self._intersectfiles(files)
184 184
185 185 lm = manifestdict('')
186 186 lm._lm = self._lm.filtercopy(match)
187 187 return lm
188 188
189 189 def diff(self, m2, clean=False):
190 190 '''Finds changes between the current manifest and m2.
191 191
192 192 Args:
193 193 m2: the manifest to which this manifest should be compared.
194 194 clean: if true, include files unchanged between these manifests
195 195 with a None value in the returned dictionary.
196 196
197 197 The result is returned as a dict with filename as key and
198 198 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
199 199 nodeid in the current/other manifest and fl1/fl2 is the flag
200 200 in the current/other manifest. Where the file does not exist,
201 201 the nodeid will be None and the flags will be the empty
202 202 string.
203 203 '''
204 204 return self._lm.diff(m2._lm, clean)
205 205
206 206 def setflag(self, key, flag):
207 207 self._lm[key] = self[key], flag
208 208
209 209 def get(self, key, default=None):
210 210 try:
211 211 return self._lm[key][0]
212 212 except KeyError:
213 213 return default
214 214
215 215 def flags(self, key, default=''):
216 216 try:
217 217 return self._lm[key][1]
218 218 except KeyError:
219 219 return default
220 220
221 221 def copy(self):
222 222 c = manifestdict('')
223 223 c._lm = self._lm.copy()
224 224 return c
225 225
226 226 def iteritems(self):
227 227 return (x[:2] for x in self._lm.iterentries())
228 228
229 229 def text(self):
230 230 return self._lm.text()
231 231
232 232 def fastdelta(self, base, changes):
233 233 """Given a base manifest text as an array.array and a list of changes
234 234 relative to that text, compute a delta that can be used by revlog.
235 235 """
236 236 delta = []
237 237 dstart = None
238 238 dend = None
239 239 dline = [""]
240 240 start = 0
241 241 # zero copy representation of base as a buffer
242 242 addbuf = util.buffer(base)
243 243
244 244 # start with a readonly loop that finds the offset of
245 245 # each line and creates the deltas
246 246 for f, todelete in changes:
247 247 # bs will either be the index of the item or the insert point
248 248 start, end = _msearch(addbuf, f, start)
249 249 if not todelete:
250 250 h, fl = self._lm[f]
251 251 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
252 252 else:
253 253 if start == end:
254 254 # item we want to delete was not found, error out
255 255 raise AssertionError(
256 256 _("failed to remove %s from manifest") % f)
257 257 l = ""
258 258 if dstart is not None and dstart <= start and dend >= start:
259 259 if dend < end:
260 260 dend = end
261 261 if l:
262 262 dline.append(l)
263 263 else:
264 264 if dstart is not None:
265 265 delta.append([dstart, dend, "".join(dline)])
266 266 dstart = start
267 267 dend = end
268 268 dline = [l]
269 269
270 270 if dstart is not None:
271 271 delta.append([dstart, dend, "".join(dline)])
272 272 # apply the delta to the base, and get a delta for addrevision
273 273 deltatext, arraytext = _addlistdelta(base, delta)
274 274 return arraytext, deltatext
275 275
276 276 def _msearch(m, s, lo=0, hi=None):
277 277 '''return a tuple (start, end) that says where to find s within m.
278 278
279 279 If the string is found m[start:end] are the line containing
280 280 that string. If start == end the string was not found and
281 281 they indicate the proper sorted insertion point.
282 282
283 283 m should be a buffer or a string
284 284 s is a string'''
285 285 def advance(i, c):
286 286 while i < lenm and m[i] != c:
287 287 i += 1
288 288 return i
289 289 if not s:
290 290 return (lo, lo)
291 291 lenm = len(m)
292 292 if not hi:
293 293 hi = lenm
294 294 while lo < hi:
295 295 mid = (lo + hi) // 2
296 296 start = mid
297 297 while start > 0 and m[start - 1] != '\n':
298 298 start -= 1
299 299 end = advance(start, '\0')
300 300 if m[start:end] < s:
301 301 # we know that after the null there are 40 bytes of sha1
302 302 # this translates to the bisect lo = mid + 1
303 303 lo = advance(end + 40, '\n') + 1
304 304 else:
305 305 # this translates to the bisect hi = mid
306 306 hi = start
307 307 end = advance(lo, '\0')
308 308 found = m[lo:end]
309 309 if s == found:
310 310 # we know that after the null there are 40 bytes of sha1
311 311 end = advance(end + 40, '\n')
312 312 return (lo, end + 1)
313 313 else:
314 314 return (lo, lo)
315 315
316 316 def _checkforbidden(l):
317 317 """Check filenames for illegal characters."""
318 318 for f in l:
319 319 if '\n' in f or '\r' in f:
320 320 raise error.RevlogError(
321 321 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
322 322
323 323
324 324 # apply the changes collected during the bisect loop to our addlist
325 325 # return a delta suitable for addrevision
326 326 def _addlistdelta(addlist, x):
327 327 # for large addlist arrays, building a new array is cheaper
328 328 # than repeatedly modifying the existing one
329 329 currentposition = 0
330 330 newaddlist = array.array('c')
331 331
332 332 for start, end, content in x:
333 333 newaddlist += addlist[currentposition:start]
334 334 if content:
335 335 newaddlist += array.array('c', content)
336 336
337 337 currentposition = end
338 338
339 339 newaddlist += addlist[currentposition:]
340 340
341 341 deltatext = "".join(struct.pack(">lll", start, end, len(content))
342 342 + content for start, end, content in x)
343 343 return deltatext, newaddlist
344 344
345 345 def _splittopdir(f):
346 346 if '/' in f:
347 347 dir, subpath = f.split('/', 1)
348 348 return dir + '/', subpath
349 349 else:
350 350 return '', f
351 351
352 352 class treemanifest(object):
353 353 def __init__(self, dir='', text=''):
354 354 self._dir = dir
355 355 self._dirs = {}
356 356 # Using _lazymanifest here is a little slower than plain old dicts
357 357 self._files = {}
358 358 self._flags = {}
359 359 for f, n, fl in _parse(text):
360 360 self[f] = n
361 361 if fl:
362 362 self.setflag(f, fl)
363 363
364 364 def _subpath(self, path):
365 365 return self._dir + path
366 366
367 367 def __len__(self):
368 368 size = len(self._files)
369 369 for m in self._dirs.values():
370 370 size += m.__len__()
371 371 return size
372 372
373 373 def __str__(self):
374 374 return '<treemanifest dir=%s>' % self._dir
375 375
376 376 def iteritems(self):
377 377 for p, n in sorted(self._dirs.items() + self._files.items()):
378 378 if p in self._files:
379 379 yield self._subpath(p), n
380 380 else:
381 381 for f, sn in n.iteritems():
382 382 yield f, sn
383 383
384 384 def iterkeys(self):
385 385 for p in sorted(self._dirs.keys() + self._files.keys()):
386 386 if p in self._files:
387 387 yield self._subpath(p)
388 388 else:
389 389 for f in self._dirs[p].iterkeys():
390 390 yield f
391 391
392 392 def keys(self):
393 393 return list(self.iterkeys())
394 394
395 395 def __iter__(self):
396 396 return self.iterkeys()
397 397
398 398 def __contains__(self, f):
399 399 if f is None:
400 400 return False
401 401 dir, subpath = _splittopdir(f)
402 402 if dir:
403 403 if dir not in self._dirs:
404 404 return False
405 405 return self._dirs[dir].__contains__(subpath)
406 406 else:
407 407 return f in self._files
408 408
409 409 def get(self, f, default=None):
410 410 dir, subpath = _splittopdir(f)
411 411 if dir:
412 412 if dir not in self._dirs:
413 413 return default
414 414 return self._dirs[dir].get(subpath, default)
415 415 else:
416 416 return self._files.get(f, default)
417 417
418 418 def __getitem__(self, f):
419 419 dir, subpath = _splittopdir(f)
420 420 if dir:
421 421 return self._dirs[dir].__getitem__(subpath)
422 422 else:
423 423 return self._files[f]
424 424
425 425 def flags(self, f):
426 426 dir, subpath = _splittopdir(f)
427 427 if dir:
428 428 if dir not in self._dirs:
429 429 return ''
430 430 return self._dirs[dir].flags(subpath)
431 431 else:
432 432 if f in self._dirs:
433 433 return ''
434 434 return self._flags.get(f, '')
435 435
436 436 def find(self, f):
437 437 dir, subpath = _splittopdir(f)
438 438 if dir:
439 439 return self._dirs[dir].find(subpath)
440 440 else:
441 441 return self._files[f], self._flags.get(f, '')
442 442
443 443 def __delitem__(self, f):
444 444 dir, subpath = _splittopdir(f)
445 445 if dir:
446 446 self._dirs[dir].__delitem__(subpath)
447 447 # If the directory is now empty, remove it
448 448 if not self._dirs[dir]._dirs and not self._dirs[dir]._files:
449 449 del self._dirs[dir]
450 450 else:
451 451 del self._files[f]
452 452 if f in self._flags:
453 453 del self._flags[f]
454 454
455 455 def __setitem__(self, f, n):
456 456 assert n is not None
457 457 dir, subpath = _splittopdir(f)
458 458 if dir:
459 459 if dir not in self._dirs:
460 460 self._dirs[dir] = treemanifest(self._subpath(dir))
461 461 self._dirs[dir].__setitem__(subpath, n)
462 462 else:
463 463 self._files[f] = n[:21] # to match manifestdict's behavior
464 464
465 465 def setflag(self, f, flags):
466 466 """Set the flags (symlink, executable) for path f."""
467 467 dir, subpath = _splittopdir(f)
468 468 if dir:
469 469 if dir not in self._dirs:
470 470 self._dirs[dir] = treemanifest(self._subpath(dir))
471 471 self._dirs[dir].setflag(subpath, flags)
472 472 else:
473 473 self._flags[f] = flags
474 474
475 475 def copy(self):
476 476 copy = treemanifest(self._dir)
477 477 for d in self._dirs:
478 478 copy._dirs[d] = self._dirs[d].copy()
479 479 copy._files = dict.copy(self._files)
480 480 copy._flags = dict.copy(self._flags)
481 481 return copy
482 482
483 483 def _intersectfiles(self, files):
484 484 '''make a new treemanifest with the intersection of self with files
485 485
486 486 The algorithm assumes that files is much smaller than self.'''
487 487 ret = treemanifest()
488 488 for fn in files:
489 489 if fn in self:
490 490 ret[fn] = self[fn]
491 491 flags = self.flags(fn)
492 492 if flags:
493 493 ret.setflag(fn, flags)
494 494 return ret
495 495
496 496 def filesnotin(self, m2):
497 497 '''Set of files in this manifest that are not in the other'''
498 498 files = set()
499 499 def _filesnotin(t1, t2):
500 500 for d, m1 in t1._dirs.iteritems():
501 501 if d in t2._dirs:
502 502 m2 = t2._dirs[d]
503 503 _filesnotin(m1, m2)
504 504 else:
505 505 files.update(m1.iterkeys())
506 506
507 507 for fn in t1._files.iterkeys():
508 508 if fn not in t2._files:
509 509 files.add(t1._subpath(fn))
510 510
511 511 _filesnotin(self, m2)
512 512 return files
513 513
514 514 @propertycache
515 515 def _alldirs(self):
516 516 return scmutil.dirs(self)
517 517
518 518 def dirs(self):
519 519 return self._alldirs
520 520
521 521 def hasdir(self, dir):
522 522 topdir, subdir = _splittopdir(dir)
523 523 if topdir:
524 524 if topdir in self._dirs:
525 525 return self._dirs[topdir].hasdir(subdir)
526 526 return False
527 527 return (dir + '/') in self._dirs
528 528
529 529 def matches(self, match):
530 530 '''generate a new manifest filtered by the match argument'''
531 531 if match.always():
532 532 return self.copy()
533 533
534 534 files = match.files()
535 535 if (match.isexact() or
536 536 (not match.anypats() and util.all(fn in self for fn in files))):
537 537 return self._intersectfiles(files)
538 538
539 539 m = self.copy()
540 540 for fn in m.keys():
541 541 if not match(fn):
542 542 del m[fn]
543 543 return m
544 544
545 545 def diff(self, m2, clean=False):
546 546 '''Finds changes between the current manifest and m2.
547 547
548 548 Args:
549 549 m2: the manifest to which this manifest should be compared.
550 550 clean: if true, include files unchanged between these manifests
551 551 with a None value in the returned dictionary.
552 552
553 553 The result is returned as a dict with filename as key and
554 554 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
555 555 nodeid in the current/other manifest and fl1/fl2 is the flag
556 556 in the current/other manifest. Where the file does not exist,
557 557 the nodeid will be None and the flags will be the empty
558 558 string.
559 559 '''
560 560 result = {}
561 561 emptytree = treemanifest()
562 562 def _diff(t1, t2):
563 563 for d, m1 in t1._dirs.iteritems():
564 564 m2 = t2._dirs.get(d, emptytree)
565 565 _diff(m1, m2)
566 566
567 567 for d, m2 in t2._dirs.iteritems():
568 568 if d not in t1._dirs:
569 569 _diff(emptytree, m2)
570 570
571 571 for fn, n1 in t1._files.iteritems():
572 572 fl1 = t1._flags.get(fn, '')
573 573 n2 = t2._files.get(fn, None)
574 574 fl2 = t2._flags.get(fn, '')
575 575 if n1 != n2 or fl1 != fl2:
576 576 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
577 577 elif clean:
578 578 result[t1._subpath(fn)] = None
579 579
580 580 for fn, n2 in t2._files.iteritems():
581 581 if fn not in t1._files:
582 582 fl2 = t2._flags.get(fn, '')
583 583 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
584 584
585 585 _diff(self, m2)
586 586 return result
587 587
588 588 def text(self):
589 589 """Get the full data of this manifest as a bytestring."""
590 590 flags = self.flags
591 591 return _text((f, self[f], flags(f)) for f in self.keys())
592 592
593 593 class manifest(revlog.revlog):
594 594 def __init__(self, opener):
595 595 # During normal operations, we expect to deal with not more than four
596 596 # revs at a time (such as during commit --amend). When rebasing large
597 597 # stacks of commits, the number can go up, hence the config knob below.
598 598 cachesize = 4
599 599 usetreemanifest = False
600 600 usemanifestv2 = False
601 601 opts = getattr(opener, 'options', None)
602 602 if opts is not None:
603 603 cachesize = opts.get('manifestcachesize', cachesize)
604 604 usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
605 605 usemanifestv2 = opts.get('usemanifestv2', usemanifestv2)
606 606 self._mancache = util.lrucachedict(cachesize)
607 607 revlog.revlog.__init__(self, opener, "00manifest.i")
608 608 self._usetreemanifest = usetreemanifest
609 609 self._usemanifestv2 = usemanifestv2
610 610
611 611 def _newmanifest(self, data=''):
612 612 if self._usetreemanifest:
613 613 return treemanifest('', data)
614 614 return manifestdict(data)
615 615
616 def _slowreaddelta(self, node):
617 r0 = self.deltaparent(self.rev(node))
618 m0 = self.read(self.node(r0))
619 m1 = self.read(node)
620 md = self._newmanifest()
621 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
622 if n1:
623 md[f] = n1
624 if fl1:
625 md.setflag(f, fl1)
626 return md
627
616 628 def readdelta(self, node):
629 if self._usemanifestv2:
630 return self._slowreaddelta(node)
617 631 r = self.rev(node)
618 632 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
619 633 return self._newmanifest(d)
620 634
621 635 def readfast(self, node):
622 636 '''use the faster of readdelta or read'''
623 637 r = self.rev(node)
624 638 deltaparent = self.deltaparent(r)
625 639 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
626 640 return self.readdelta(node)
627 641 return self.read(node)
628 642
629 643 def read(self, node):
630 644 if node == revlog.nullid:
631 645 return self._newmanifest() # don't upset local cache
632 646 if node in self._mancache:
633 647 return self._mancache[node][0]
634 648 text = self.revision(node)
635 649 arraytext = array.array('c', text)
636 650 m = self._newmanifest(text)
637 651 self._mancache[node] = (m, arraytext)
638 652 return m
639 653
640 654 def find(self, node, f):
641 655 '''look up entry for a single file efficiently.
642 656 return (node, flags) pair if found, (None, None) if not.'''
643 657 m = self.read(node)
644 658 try:
645 659 return m.find(f)
646 660 except KeyError:
647 661 return None, None
648 662
649 663 def add(self, m, transaction, link, p1, p2, added, removed):
650 664 if (p1 in self._mancache and not self._usetreemanifest
651 665 and not self._usemanifestv2):
652 666 # If our first parent is in the manifest cache, we can
653 667 # compute a delta here using properties we know about the
654 668 # manifest up-front, which may save time later for the
655 669 # revlog layer.
656 670
657 671 _checkforbidden(added)
658 672 # combine the changed lists into one list for sorting
659 673 work = [(x, False) for x in added]
660 674 work.extend((x, True) for x in removed)
661 675 # this could use heapq.merge() (from Python 2.6+) or equivalent
662 676 # since the lists are already sorted
663 677 work.sort()
664 678
665 679 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
666 680 cachedelta = self.rev(p1), deltatext
667 681 text = util.buffer(arraytext)
668 682 else:
669 683 # The first parent manifest isn't already loaded, so we'll
670 684 # just encode a fulltext of the manifest and pass that
671 685 # through to the revlog layer, and let it handle the delta
672 686 # process.
673 687 text = m.text()
674 688 arraytext = array.array('c', text)
675 689 cachedelta = None
676 690
677 691 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
678 692 self._mancache[n] = (m, arraytext)
679 693
680 694 return n
General Comments 0
You need to be logged in to leave comments. Login now