##// END OF EJS Templates
manifest: extract method for creating manifest text...
Martin von Zweigbergk -
r24525:e118f74d default
parent child Browse files
Show More
@@ -1,672 +1,676
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _parse(data):
15 15 """Generates (path, node, flags) tuples from a manifest text"""
16 16 # This method does a little bit of excessive-looking
17 17 # precondition checking. This is so that the behavior of this
18 18 # class exactly matches its C counterpart to try and help
19 19 # prevent surprise breakage for anyone that develops against
20 20 # the pure version.
21 21 if data and data[-1] != '\n':
22 22 raise ValueError('Manifest did not end in a newline.')
23 23 prev = None
24 24 for l in data.splitlines():
25 25 if prev is not None and prev > l:
26 26 raise ValueError('Manifest lines not in sorted order.')
27 27 prev = l
28 28 f, n = l.split('\0')
29 29 if len(n) > 40:
30 30 yield f, revlog.bin(n[:40]), n[40:]
31 31 else:
32 32 yield f, revlog.bin(n), ''
33 33
34 def _text(it):
35 """Given an iterator over (path, node, flags) tuples, returns a manifest
36 text"""
37 files = []
38 lines = []
39 _hex = revlog.hex
40 for f, n, fl in it:
41 files.append(f)
42 # if this is changed to support newlines in filenames,
43 # be sure to check the templates/ dir again (especially *-raw.tmpl)
44 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
45
46 _checkforbidden(files)
47 return ''.join(lines)
48
34 49 class _lazymanifest(dict):
35 50 """This is the pure implementation of lazymanifest.
36 51
37 52 It has not been optimized *at all* and is not lazy.
38 53 """
39 54
40 55 def __init__(self, data):
41 56 dict.__init__(self)
42 57 for f, n, fl in _parse(data):
43 58 self[f] = n, fl
44 59
45 60 def __setitem__(self, k, v):
46 61 node, flag = v
47 62 assert node is not None
48 63 if len(node) > 21:
49 64 node = node[:21] # match c implementation behavior
50 65 dict.__setitem__(self, k, (node, flag))
51 66
52 67 def __iter__(self):
53 68 return iter(sorted(dict.keys(self)))
54 69
55 70 def iterkeys(self):
56 71 return iter(sorted(dict.keys(self)))
57 72
58 73 def iterentries(self):
59 74 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
60 75
61 76 def copy(self):
62 77 c = _lazymanifest('')
63 78 c.update(self)
64 79 return c
65 80
66 81 def diff(self, m2, clean=False):
67 82 '''Finds changes between the current manifest and m2.'''
68 83 diff = {}
69 84
70 85 for fn, e1 in self.iteritems():
71 86 if fn not in m2:
72 87 diff[fn] = e1, (None, '')
73 88 else:
74 89 e2 = m2[fn]
75 90 if e1 != e2:
76 91 diff[fn] = e1, e2
77 92 elif clean:
78 93 diff[fn] = None
79 94
80 95 for fn, e2 in m2.iteritems():
81 96 if fn not in self:
82 97 diff[fn] = (None, ''), e2
83 98
84 99 return diff
85 100
86 101 def filtercopy(self, filterfn):
87 102 c = _lazymanifest('')
88 103 for f, n, fl in self.iterentries():
89 104 if filterfn(f):
90 105 c[f] = n, fl
91 106 return c
92 107
93 108 def text(self):
94 109 """Get the full data of this manifest as a bytestring."""
95 fl = self.iterentries()
96
97 _hex = revlog.hex
98 # if this is changed to support newlines in filenames,
99 # be sure to check the templates/ dir again (especially *-raw.tmpl)
100 return ''.join("%s\0%s%s\n" % (
101 f, _hex(n[:20]), flag) for f, n, flag in fl)
110 return _text(self.iterentries())
102 111
103 112 try:
104 113 _lazymanifest = parsers.lazymanifest
105 114 except AttributeError:
106 115 pass
107 116
108 117 class manifestdict(object):
109 118 def __init__(self, data=''):
110 119 self._lm = _lazymanifest(data)
111 120
112 121 def __getitem__(self, key):
113 122 return self._lm[key][0]
114 123
115 124 def find(self, key):
116 125 return self._lm[key]
117 126
118 127 def __len__(self):
119 128 return len(self._lm)
120 129
121 130 def __setitem__(self, key, node):
122 131 self._lm[key] = node, self.flags(key, '')
123 132
124 133 def __contains__(self, key):
125 134 return key in self._lm
126 135
127 136 def __delitem__(self, key):
128 137 del self._lm[key]
129 138
130 139 def __iter__(self):
131 140 return self._lm.__iter__()
132 141
133 142 def iterkeys(self):
134 143 return self._lm.iterkeys()
135 144
136 145 def keys(self):
137 146 return list(self.iterkeys())
138 147
139 148 def _intersectfiles(self, files):
140 149 '''make a new lazymanifest with the intersection of self with files
141 150
142 151 The algorithm assumes that files is much smaller than self.'''
143 152 ret = manifestdict()
144 153 lm = self._lm
145 154 for fn in files:
146 155 if fn in lm:
147 156 ret._lm[fn] = self._lm[fn]
148 157 return ret
149 158
150 159 def filesnotin(self, m2):
151 160 '''Set of files in this manifest that are not in the other'''
152 161 files = set(self)
153 162 files.difference_update(m2)
154 163 return files
155 164
156 165 @propertycache
157 166 def _dirs(self):
158 167 return scmutil.dirs(self)
159 168
160 169 def dirs(self):
161 170 return self._dirs
162 171
163 172 def hasdir(self, dir):
164 173 return dir in self._dirs
165 174
166 175 def matches(self, match):
167 176 '''generate a new manifest filtered by the match argument'''
168 177 if match.always():
169 178 return self.copy()
170 179
171 180 files = match.files()
172 181 if (len(files) < 100 and (match.isexact() or
173 182 (not match.anypats() and util.all(fn in self for fn in files)))):
174 183 return self._intersectfiles(files)
175 184
176 185 lm = manifestdict('')
177 186 lm._lm = self._lm.filtercopy(match)
178 187 return lm
179 188
180 189 def diff(self, m2, clean=False):
181 190 '''Finds changes between the current manifest and m2.
182 191
183 192 Args:
184 193 m2: the manifest to which this manifest should be compared.
185 194 clean: if true, include files unchanged between these manifests
186 195 with a None value in the returned dictionary.
187 196
188 197 The result is returned as a dict with filename as key and
189 198 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
190 199 nodeid in the current/other manifest and fl1/fl2 is the flag
191 200 in the current/other manifest. Where the file does not exist,
192 201 the nodeid will be None and the flags will be the empty
193 202 string.
194 203 '''
195 204 return self._lm.diff(m2._lm, clean)
196 205
197 206 def setflag(self, key, flag):
198 207 self._lm[key] = self[key], flag
199 208
200 209 def get(self, key, default=None):
201 210 try:
202 211 return self._lm[key][0]
203 212 except KeyError:
204 213 return default
205 214
206 215 def flags(self, key, default=''):
207 216 try:
208 217 return self._lm[key][1]
209 218 except KeyError:
210 219 return default
211 220
212 221 def copy(self):
213 222 c = manifestdict('')
214 223 c._lm = self._lm.copy()
215 224 return c
216 225
217 226 def iteritems(self):
218 227 return (x[:2] for x in self._lm.iterentries())
219 228
220 229 def text(self):
221 230 return self._lm.text()
222 231
223 232 def fastdelta(self, base, changes):
224 233 """Given a base manifest text as an array.array and a list of changes
225 234 relative to that text, compute a delta that can be used by revlog.
226 235 """
227 236 delta = []
228 237 dstart = None
229 238 dend = None
230 239 dline = [""]
231 240 start = 0
232 241 # zero copy representation of base as a buffer
233 242 addbuf = util.buffer(base)
234 243
235 244 # start with a readonly loop that finds the offset of
236 245 # each line and creates the deltas
237 246 for f, todelete in changes:
238 247 # bs will either be the index of the item or the insert point
239 248 start, end = _msearch(addbuf, f, start)
240 249 if not todelete:
241 250 h, fl = self._lm[f]
242 251 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
243 252 else:
244 253 if start == end:
245 254 # item we want to delete was not found, error out
246 255 raise AssertionError(
247 256 _("failed to remove %s from manifest") % f)
248 257 l = ""
249 258 if dstart is not None and dstart <= start and dend >= start:
250 259 if dend < end:
251 260 dend = end
252 261 if l:
253 262 dline.append(l)
254 263 else:
255 264 if dstart is not None:
256 265 delta.append([dstart, dend, "".join(dline)])
257 266 dstart = start
258 267 dend = end
259 268 dline = [l]
260 269
261 270 if dstart is not None:
262 271 delta.append([dstart, dend, "".join(dline)])
263 272 # apply the delta to the base, and get a delta for addrevision
264 273 deltatext, arraytext = _addlistdelta(base, delta)
265 274 return arraytext, deltatext
266 275
267 276 def _msearch(m, s, lo=0, hi=None):
268 277 '''return a tuple (start, end) that says where to find s within m.
269 278
270 279 If the string is found m[start:end] are the line containing
271 280 that string. If start == end the string was not found and
272 281 they indicate the proper sorted insertion point.
273 282
274 283 m should be a buffer or a string
275 284 s is a string'''
276 285 def advance(i, c):
277 286 while i < lenm and m[i] != c:
278 287 i += 1
279 288 return i
280 289 if not s:
281 290 return (lo, lo)
282 291 lenm = len(m)
283 292 if not hi:
284 293 hi = lenm
285 294 while lo < hi:
286 295 mid = (lo + hi) // 2
287 296 start = mid
288 297 while start > 0 and m[start - 1] != '\n':
289 298 start -= 1
290 299 end = advance(start, '\0')
291 300 if m[start:end] < s:
292 301 # we know that after the null there are 40 bytes of sha1
293 302 # this translates to the bisect lo = mid + 1
294 303 lo = advance(end + 40, '\n') + 1
295 304 else:
296 305 # this translates to the bisect hi = mid
297 306 hi = start
298 307 end = advance(lo, '\0')
299 308 found = m[lo:end]
300 309 if s == found:
301 310 # we know that after the null there are 40 bytes of sha1
302 311 end = advance(end + 40, '\n')
303 312 return (lo, end + 1)
304 313 else:
305 314 return (lo, lo)
306 315
307 316 def _checkforbidden(l):
308 317 """Check filenames for illegal characters."""
309 318 for f in l:
310 319 if '\n' in f or '\r' in f:
311 320 raise error.RevlogError(
312 321 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
313 322
314 323
315 324 # apply the changes collected during the bisect loop to our addlist
316 325 # return a delta suitable for addrevision
317 326 def _addlistdelta(addlist, x):
318 327 # for large addlist arrays, building a new array is cheaper
319 328 # than repeatedly modifying the existing one
320 329 currentposition = 0
321 330 newaddlist = array.array('c')
322 331
323 332 for start, end, content in x:
324 333 newaddlist += addlist[currentposition:start]
325 334 if content:
326 335 newaddlist += array.array('c', content)
327 336
328 337 currentposition = end
329 338
330 339 newaddlist += addlist[currentposition:]
331 340
332 341 deltatext = "".join(struct.pack(">lll", start, end, len(content))
333 342 + content for start, end, content in x)
334 343 return deltatext, newaddlist
335 344
336 345 def _splittopdir(f):
337 346 if '/' in f:
338 347 dir, subpath = f.split('/', 1)
339 348 return dir + '/', subpath
340 349 else:
341 350 return '', f
342 351
343 352 class treemanifest(object):
344 353 def __init__(self, dir='', text=''):
345 354 self._dir = dir
346 355 self._dirs = {}
347 356 # Using _lazymanifest here is a little slower than plain old dicts
348 357 self._files = {}
349 358 self._flags = {}
350 359 for f, n, fl in _parse(text):
351 360 self[f] = n
352 361 if fl:
353 362 self.setflag(f, fl)
354 363
355 364 def _subpath(self, path):
356 365 return self._dir + path
357 366
358 367 def __len__(self):
359 368 size = len(self._files)
360 369 for m in self._dirs.values():
361 370 size += m.__len__()
362 371 return size
363 372
364 373 def __str__(self):
365 374 return '<treemanifest dir=%s>' % self._dir
366 375
367 376 def iteritems(self):
368 377 for p, n in sorted(self._dirs.items() + self._files.items()):
369 378 if p in self._files:
370 379 yield self._subpath(p), n
371 380 else:
372 381 for f, sn in n.iteritems():
373 382 yield f, sn
374 383
375 384 def iterkeys(self):
376 385 for p in sorted(self._dirs.keys() + self._files.keys()):
377 386 if p in self._files:
378 387 yield self._subpath(p)
379 388 else:
380 389 for f in self._dirs[p].iterkeys():
381 390 yield f
382 391
383 392 def keys(self):
384 393 return list(self.iterkeys())
385 394
386 395 def __iter__(self):
387 396 return self.iterkeys()
388 397
389 398 def __contains__(self, f):
390 399 if f is None:
391 400 return False
392 401 dir, subpath = _splittopdir(f)
393 402 if dir:
394 403 if dir not in self._dirs:
395 404 return False
396 405 return self._dirs[dir].__contains__(subpath)
397 406 else:
398 407 return f in self._files
399 408
400 409 def get(self, f, default=None):
401 410 dir, subpath = _splittopdir(f)
402 411 if dir:
403 412 if dir not in self._dirs:
404 413 return default
405 414 return self._dirs[dir].get(subpath, default)
406 415 else:
407 416 return self._files.get(f, default)
408 417
409 418 def __getitem__(self, f):
410 419 dir, subpath = _splittopdir(f)
411 420 if dir:
412 421 return self._dirs[dir].__getitem__(subpath)
413 422 else:
414 423 return self._files[f]
415 424
416 425 def flags(self, f):
417 426 dir, subpath = _splittopdir(f)
418 427 if dir:
419 428 if dir not in self._dirs:
420 429 return ''
421 430 return self._dirs[dir].flags(subpath)
422 431 else:
423 432 if f in self._dirs:
424 433 return ''
425 434 return self._flags.get(f, '')
426 435
427 436 def find(self, f):
428 437 dir, subpath = _splittopdir(f)
429 438 if dir:
430 439 return self._dirs[dir].find(subpath)
431 440 else:
432 441 return self._files[f], self._flags.get(f, '')
433 442
434 443 def __delitem__(self, f):
435 444 dir, subpath = _splittopdir(f)
436 445 if dir:
437 446 self._dirs[dir].__delitem__(subpath)
438 447 # If the directory is now empty, remove it
439 448 if not self._dirs[dir]._dirs and not self._dirs[dir]._files:
440 449 del self._dirs[dir]
441 450 else:
442 451 del self._files[f]
443 452 if f in self._flags:
444 453 del self._flags[f]
445 454
446 455 def __setitem__(self, f, n):
447 456 assert n is not None
448 457 dir, subpath = _splittopdir(f)
449 458 if dir:
450 459 if dir not in self._dirs:
451 460 self._dirs[dir] = treemanifest(self._subpath(dir))
452 461 self._dirs[dir].__setitem__(subpath, n)
453 462 else:
454 463 self._files[f] = n[:21] # to match manifestdict's behavior
455 464
456 465 def setflag(self, f, flags):
457 466 """Set the flags (symlink, executable) for path f."""
458 467 dir, subpath = _splittopdir(f)
459 468 if dir:
460 469 if dir not in self._dirs:
461 470 self._dirs[dir] = treemanifest(self._subpath(dir))
462 471 self._dirs[dir].setflag(subpath, flags)
463 472 else:
464 473 self._flags[f] = flags
465 474
466 475 def copy(self):
467 476 copy = treemanifest(self._dir)
468 477 for d in self._dirs:
469 478 copy._dirs[d] = self._dirs[d].copy()
470 479 copy._files = dict.copy(self._files)
471 480 copy._flags = dict.copy(self._flags)
472 481 return copy
473 482
474 483 def _intersectfiles(self, files):
475 484 '''make a new treemanifest with the intersection of self with files
476 485
477 486 The algorithm assumes that files is much smaller than self.'''
478 487 ret = treemanifest()
479 488 for fn in files:
480 489 if fn in self:
481 490 ret[fn] = self[fn]
482 491 flags = self.flags(fn)
483 492 if flags:
484 493 ret.setflag(fn, flags)
485 494 return ret
486 495
487 496 def filesnotin(self, m2):
488 497 '''Set of files in this manifest that are not in the other'''
489 498 files = set()
490 499 def _filesnotin(t1, t2):
491 500 for d, m1 in t1._dirs.iteritems():
492 501 if d in t2._dirs:
493 502 m2 = t2._dirs[d]
494 503 _filesnotin(m1, m2)
495 504 else:
496 505 files.update(m1.iterkeys())
497 506
498 507 for fn in t1._files.iterkeys():
499 508 if fn not in t2._files:
500 509 files.add(t1._subpath(fn))
501 510
502 511 _filesnotin(self, m2)
503 512 return files
504 513
505 514 @propertycache
506 515 def _alldirs(self):
507 516 return scmutil.dirs(self)
508 517
509 518 def dirs(self):
510 519 return self._alldirs
511 520
512 521 def hasdir(self, dir):
513 522 topdir, subdir = _splittopdir(dir)
514 523 if topdir:
515 524 if topdir in self._dirs:
516 525 return self._dirs[topdir].hasdir(subdir)
517 526 return False
518 527 return (dir + '/') in self._dirs
519 528
520 529 def matches(self, match):
521 530 '''generate a new manifest filtered by the match argument'''
522 531 if match.always():
523 532 return self.copy()
524 533
525 534 files = match.files()
526 535 if (match.isexact() or
527 536 (not match.anypats() and util.all(fn in self for fn in files))):
528 537 return self._intersectfiles(files)
529 538
530 539 m = self.copy()
531 540 for fn in m.keys():
532 541 if not match(fn):
533 542 del m[fn]
534 543 return m
535 544
536 545 def diff(self, m2, clean=False):
537 546 '''Finds changes between the current manifest and m2.
538 547
539 548 Args:
540 549 m2: the manifest to which this manifest should be compared.
541 550 clean: if true, include files unchanged between these manifests
542 551 with a None value in the returned dictionary.
543 552
544 553 The result is returned as a dict with filename as key and
545 554 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
546 555 nodeid in the current/other manifest and fl1/fl2 is the flag
547 556 in the current/other manifest. Where the file does not exist,
548 557 the nodeid will be None and the flags will be the empty
549 558 string.
550 559 '''
551 560 result = {}
552 561 emptytree = treemanifest()
553 562 def _diff(t1, t2):
554 563 for d, m1 in t1._dirs.iteritems():
555 564 m2 = t2._dirs.get(d, emptytree)
556 565 _diff(m1, m2)
557 566
558 567 for d, m2 in t2._dirs.iteritems():
559 568 if d not in t1._dirs:
560 569 _diff(emptytree, m2)
561 570
562 571 for fn, n1 in t1._files.iteritems():
563 572 fl1 = t1._flags.get(fn, '')
564 573 n2 = t2._files.get(fn, None)
565 574 fl2 = t2._flags.get(fn, '')
566 575 if n1 != n2 or fl1 != fl2:
567 576 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
568 577 elif clean:
569 578 result[t1._subpath(fn)] = None
570 579
571 580 for fn, n2 in t2._files.iteritems():
572 581 if fn not in t1._files:
573 582 fl2 = t2._flags.get(fn, '')
574 583 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
575 584
576 585 _diff(self, m2)
577 586 return result
578 587
579 588 def text(self):
580 589 """Get the full data of this manifest as a bytestring."""
581 fl = self.keys()
582 _checkforbidden(fl)
583
584 hex, flags = revlog.hex, self.flags
585 # if this is changed to support newlines in filenames,
586 # be sure to check the templates/ dir again (especially *-raw.tmpl)
587 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
590 flags = self.flags
591 return _text((f, self[f], flags(f)) for f in self.keys())
588 592
589 593 class manifest(revlog.revlog):
590 594 def __init__(self, opener):
591 595 # During normal operations, we expect to deal with not more than four
592 596 # revs at a time (such as during commit --amend). When rebasing large
593 597 # stacks of commits, the number can go up, hence the config knob below.
594 598 cachesize = 4
595 599 usetreemanifest = False
596 600 opts = getattr(opener, 'options', None)
597 601 if opts is not None:
598 602 cachesize = opts.get('manifestcachesize', cachesize)
599 603 usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
600 604 self._mancache = util.lrucachedict(cachesize)
601 605 revlog.revlog.__init__(self, opener, "00manifest.i")
602 606 self._usetreemanifest = usetreemanifest
603 607
604 608 def _newmanifest(self, data=''):
605 609 if self._usetreemanifest:
606 610 return treemanifest('', data)
607 611 return manifestdict(data)
608 612
609 613 def readdelta(self, node):
610 614 r = self.rev(node)
611 615 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
612 616 return self._newmanifest(d)
613 617
614 618 def readfast(self, node):
615 619 '''use the faster of readdelta or read'''
616 620 r = self.rev(node)
617 621 deltaparent = self.deltaparent(r)
618 622 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
619 623 return self.readdelta(node)
620 624 return self.read(node)
621 625
622 626 def read(self, node):
623 627 if node == revlog.nullid:
624 628 return self._newmanifest() # don't upset local cache
625 629 if node in self._mancache:
626 630 return self._mancache[node][0]
627 631 text = self.revision(node)
628 632 arraytext = array.array('c', text)
629 633 m = self._newmanifest(text)
630 634 self._mancache[node] = (m, arraytext)
631 635 return m
632 636
633 637 def find(self, node, f):
634 638 '''look up entry for a single file efficiently.
635 639 return (node, flags) pair if found, (None, None) if not.'''
636 640 m = self.read(node)
637 641 try:
638 642 return m.find(f)
639 643 except KeyError:
640 644 return None, None
641 645
642 646 def add(self, m, transaction, link, p1, p2, added, removed):
643 647 if p1 in self._mancache and not self._usetreemanifest:
644 648 # If our first parent is in the manifest cache, we can
645 649 # compute a delta here using properties we know about the
646 650 # manifest up-front, which may save time later for the
647 651 # revlog layer.
648 652
649 653 _checkforbidden(added)
650 654 # combine the changed lists into one list for sorting
651 655 work = [(x, False) for x in added]
652 656 work.extend((x, True) for x in removed)
653 657 # this could use heapq.merge() (from Python 2.6+) or equivalent
654 658 # since the lists are already sorted
655 659 work.sort()
656 660
657 661 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
658 662 cachedelta = self.rev(p1), deltatext
659 663 text = util.buffer(arraytext)
660 664 else:
661 665 # The first parent manifest isn't already loaded, so we'll
662 666 # just encode a fulltext of the manifest and pass that
663 667 # through to the revlog layer, and let it handle the delta
664 668 # process.
665 669 text = m.text()
666 670 arraytext = array.array('c', text)
667 671 cachedelta = None
668 672
669 673 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
670 674 self._mancache[n] = (m, arraytext)
671 675
672 676 return n
General Comments 0
You need to be logged in to leave comments. Login now