##// END OF EJS Templates
manifest: extract method for parsing manifest...
Martin von Zweigbergk -
r24524:63b60313 default
parent child Browse files
Show More
@@ -1,668 +1,672
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, scmutil
10 10 import array, struct
11 11
12 12 propertycache = util.propertycache
13 13
14 def _parse(data):
15 """Generates (path, node, flags) tuples from a manifest text"""
16 # This method does a little bit of excessive-looking
17 # precondition checking. This is so that the behavior of this
18 # class exactly matches its C counterpart to try and help
19 # prevent surprise breakage for anyone that develops against
20 # the pure version.
21 if data and data[-1] != '\n':
22 raise ValueError('Manifest did not end in a newline.')
23 prev = None
24 for l in data.splitlines():
25 if prev is not None and prev > l:
26 raise ValueError('Manifest lines not in sorted order.')
27 prev = l
28 f, n = l.split('\0')
29 if len(n) > 40:
30 yield f, revlog.bin(n[:40]), n[40:]
31 else:
32 yield f, revlog.bin(n), ''
33
14 34 class _lazymanifest(dict):
15 35 """This is the pure implementation of lazymanifest.
16 36
17 37 It has not been optimized *at all* and is not lazy.
18 38 """
19 39
20 40 def __init__(self, data):
21 # This init method does a little bit of excessive-looking
22 # precondition checking. This is so that the behavior of this
23 # class exactly matches its C counterpart to try and help
24 # prevent surprise breakage for anyone that develops against
25 # the pure version.
26 if data and data[-1] != '\n':
27 raise ValueError('Manifest did not end in a newline.')
28 41 dict.__init__(self)
29 prev = None
30 for l in data.splitlines():
31 if prev is not None and prev > l:
32 raise ValueError('Manifest lines not in sorted order.')
33 prev = l
34 f, n = l.split('\0')
35 if len(n) > 40:
36 self[f] = revlog.bin(n[:40]), n[40:]
37 else:
38 self[f] = revlog.bin(n), ''
42 for f, n, fl in _parse(data):
43 self[f] = n, fl
39 44
40 45 def __setitem__(self, k, v):
41 46 node, flag = v
42 47 assert node is not None
43 48 if len(node) > 21:
44 49 node = node[:21] # match c implementation behavior
45 50 dict.__setitem__(self, k, (node, flag))
46 51
47 52 def __iter__(self):
48 53 return iter(sorted(dict.keys(self)))
49 54
50 55 def iterkeys(self):
51 56 return iter(sorted(dict.keys(self)))
52 57
53 58 def iterentries(self):
54 59 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
55 60
56 61 def copy(self):
57 62 c = _lazymanifest('')
58 63 c.update(self)
59 64 return c
60 65
61 66 def diff(self, m2, clean=False):
62 67 '''Finds changes between the current manifest and m2.'''
63 68 diff = {}
64 69
65 70 for fn, e1 in self.iteritems():
66 71 if fn not in m2:
67 72 diff[fn] = e1, (None, '')
68 73 else:
69 74 e2 = m2[fn]
70 75 if e1 != e2:
71 76 diff[fn] = e1, e2
72 77 elif clean:
73 78 diff[fn] = None
74 79
75 80 for fn, e2 in m2.iteritems():
76 81 if fn not in self:
77 82 diff[fn] = (None, ''), e2
78 83
79 84 return diff
80 85
81 86 def filtercopy(self, filterfn):
82 87 c = _lazymanifest('')
83 88 for f, n, fl in self.iterentries():
84 89 if filterfn(f):
85 90 c[f] = n, fl
86 91 return c
87 92
88 93 def text(self):
89 94 """Get the full data of this manifest as a bytestring."""
90 95 fl = self.iterentries()
91 96
92 97 _hex = revlog.hex
93 98 # if this is changed to support newlines in filenames,
94 99 # be sure to check the templates/ dir again (especially *-raw.tmpl)
95 100 return ''.join("%s\0%s%s\n" % (
96 101 f, _hex(n[:20]), flag) for f, n, flag in fl)
97 102
98 103 try:
99 104 _lazymanifest = parsers.lazymanifest
100 105 except AttributeError:
101 106 pass
102 107
103 108 class manifestdict(object):
104 109 def __init__(self, data=''):
105 110 self._lm = _lazymanifest(data)
106 111
107 112 def __getitem__(self, key):
108 113 return self._lm[key][0]
109 114
110 115 def find(self, key):
111 116 return self._lm[key]
112 117
113 118 def __len__(self):
114 119 return len(self._lm)
115 120
116 121 def __setitem__(self, key, node):
117 122 self._lm[key] = node, self.flags(key, '')
118 123
119 124 def __contains__(self, key):
120 125 return key in self._lm
121 126
122 127 def __delitem__(self, key):
123 128 del self._lm[key]
124 129
125 130 def __iter__(self):
126 131 return self._lm.__iter__()
127 132
128 133 def iterkeys(self):
129 134 return self._lm.iterkeys()
130 135
131 136 def keys(self):
132 137 return list(self.iterkeys())
133 138
134 139 def _intersectfiles(self, files):
135 140 '''make a new lazymanifest with the intersection of self with files
136 141
137 142 The algorithm assumes that files is much smaller than self.'''
138 143 ret = manifestdict()
139 144 lm = self._lm
140 145 for fn in files:
141 146 if fn in lm:
142 147 ret._lm[fn] = self._lm[fn]
143 148 return ret
144 149
145 150 def filesnotin(self, m2):
146 151 '''Set of files in this manifest that are not in the other'''
147 152 files = set(self)
148 153 files.difference_update(m2)
149 154 return files
150 155
151 156 @propertycache
152 157 def _dirs(self):
153 158 return scmutil.dirs(self)
154 159
155 160 def dirs(self):
156 161 return self._dirs
157 162
158 163 def hasdir(self, dir):
159 164 return dir in self._dirs
160 165
161 166 def matches(self, match):
162 167 '''generate a new manifest filtered by the match argument'''
163 168 if match.always():
164 169 return self.copy()
165 170
166 171 files = match.files()
167 172 if (len(files) < 100 and (match.isexact() or
168 173 (not match.anypats() and util.all(fn in self for fn in files)))):
169 174 return self._intersectfiles(files)
170 175
171 176 lm = manifestdict('')
172 177 lm._lm = self._lm.filtercopy(match)
173 178 return lm
174 179
175 180 def diff(self, m2, clean=False):
176 181 '''Finds changes between the current manifest and m2.
177 182
178 183 Args:
179 184 m2: the manifest to which this manifest should be compared.
180 185 clean: if true, include files unchanged between these manifests
181 186 with a None value in the returned dictionary.
182 187
183 188 The result is returned as a dict with filename as key and
184 189 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
185 190 nodeid in the current/other manifest and fl1/fl2 is the flag
186 191 in the current/other manifest. Where the file does not exist,
187 192 the nodeid will be None and the flags will be the empty
188 193 string.
189 194 '''
190 195 return self._lm.diff(m2._lm, clean)
191 196
192 197 def setflag(self, key, flag):
193 198 self._lm[key] = self[key], flag
194 199
195 200 def get(self, key, default=None):
196 201 try:
197 202 return self._lm[key][0]
198 203 except KeyError:
199 204 return default
200 205
201 206 def flags(self, key, default=''):
202 207 try:
203 208 return self._lm[key][1]
204 209 except KeyError:
205 210 return default
206 211
207 212 def copy(self):
208 213 c = manifestdict('')
209 214 c._lm = self._lm.copy()
210 215 return c
211 216
212 217 def iteritems(self):
213 218 return (x[:2] for x in self._lm.iterentries())
214 219
215 220 def text(self):
216 221 return self._lm.text()
217 222
218 223 def fastdelta(self, base, changes):
219 224 """Given a base manifest text as an array.array and a list of changes
220 225 relative to that text, compute a delta that can be used by revlog.
221 226 """
222 227 delta = []
223 228 dstart = None
224 229 dend = None
225 230 dline = [""]
226 231 start = 0
227 232 # zero copy representation of base as a buffer
228 233 addbuf = util.buffer(base)
229 234
230 235 # start with a readonly loop that finds the offset of
231 236 # each line and creates the deltas
232 237 for f, todelete in changes:
233 238 # bs will either be the index of the item or the insert point
234 239 start, end = _msearch(addbuf, f, start)
235 240 if not todelete:
236 241 h, fl = self._lm[f]
237 242 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
238 243 else:
239 244 if start == end:
240 245 # item we want to delete was not found, error out
241 246 raise AssertionError(
242 247 _("failed to remove %s from manifest") % f)
243 248 l = ""
244 249 if dstart is not None and dstart <= start and dend >= start:
245 250 if dend < end:
246 251 dend = end
247 252 if l:
248 253 dline.append(l)
249 254 else:
250 255 if dstart is not None:
251 256 delta.append([dstart, dend, "".join(dline)])
252 257 dstart = start
253 258 dend = end
254 259 dline = [l]
255 260
256 261 if dstart is not None:
257 262 delta.append([dstart, dend, "".join(dline)])
258 263 # apply the delta to the base, and get a delta for addrevision
259 264 deltatext, arraytext = _addlistdelta(base, delta)
260 265 return arraytext, deltatext
261 266
262 267 def _msearch(m, s, lo=0, hi=None):
263 268 '''return a tuple (start, end) that says where to find s within m.
264 269
265 270 If the string is found m[start:end] are the line containing
266 271 that string. If start == end the string was not found and
267 272 they indicate the proper sorted insertion point.
268 273
269 274 m should be a buffer or a string
270 275 s is a string'''
271 276 def advance(i, c):
272 277 while i < lenm and m[i] != c:
273 278 i += 1
274 279 return i
275 280 if not s:
276 281 return (lo, lo)
277 282 lenm = len(m)
278 283 if not hi:
279 284 hi = lenm
280 285 while lo < hi:
281 286 mid = (lo + hi) // 2
282 287 start = mid
283 288 while start > 0 and m[start - 1] != '\n':
284 289 start -= 1
285 290 end = advance(start, '\0')
286 291 if m[start:end] < s:
287 292 # we know that after the null there are 40 bytes of sha1
288 293 # this translates to the bisect lo = mid + 1
289 294 lo = advance(end + 40, '\n') + 1
290 295 else:
291 296 # this translates to the bisect hi = mid
292 297 hi = start
293 298 end = advance(lo, '\0')
294 299 found = m[lo:end]
295 300 if s == found:
296 301 # we know that after the null there are 40 bytes of sha1
297 302 end = advance(end + 40, '\n')
298 303 return (lo, end + 1)
299 304 else:
300 305 return (lo, lo)
301 306
302 307 def _checkforbidden(l):
303 308 """Check filenames for illegal characters."""
304 309 for f in l:
305 310 if '\n' in f or '\r' in f:
306 311 raise error.RevlogError(
307 312 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
308 313
309 314
310 315 # apply the changes collected during the bisect loop to our addlist
311 316 # return a delta suitable for addrevision
312 317 def _addlistdelta(addlist, x):
313 318 # for large addlist arrays, building a new array is cheaper
314 319 # than repeatedly modifying the existing one
315 320 currentposition = 0
316 321 newaddlist = array.array('c')
317 322
318 323 for start, end, content in x:
319 324 newaddlist += addlist[currentposition:start]
320 325 if content:
321 326 newaddlist += array.array('c', content)
322 327
323 328 currentposition = end
324 329
325 330 newaddlist += addlist[currentposition:]
326 331
327 332 deltatext = "".join(struct.pack(">lll", start, end, len(content))
328 333 + content for start, end, content in x)
329 334 return deltatext, newaddlist
330 335
331 336 def _splittopdir(f):
332 337 if '/' in f:
333 338 dir, subpath = f.split('/', 1)
334 339 return dir + '/', subpath
335 340 else:
336 341 return '', f
337 342
338 343 class treemanifest(object):
339 344 def __init__(self, dir='', text=''):
340 345 self._dir = dir
341 346 self._dirs = {}
342 347 # Using _lazymanifest here is a little slower than plain old dicts
343 348 self._files = {}
344 349 self._flags = {}
345 lm = _lazymanifest(text)
346 for f, n, fl in lm.iterentries():
350 for f, n, fl in _parse(text):
347 351 self[f] = n
348 352 if fl:
349 353 self.setflag(f, fl)
350 354
351 355 def _subpath(self, path):
352 356 return self._dir + path
353 357
354 358 def __len__(self):
355 359 size = len(self._files)
356 360 for m in self._dirs.values():
357 361 size += m.__len__()
358 362 return size
359 363
360 364 def __str__(self):
361 365 return '<treemanifest dir=%s>' % self._dir
362 366
363 367 def iteritems(self):
364 368 for p, n in sorted(self._dirs.items() + self._files.items()):
365 369 if p in self._files:
366 370 yield self._subpath(p), n
367 371 else:
368 372 for f, sn in n.iteritems():
369 373 yield f, sn
370 374
371 375 def iterkeys(self):
372 376 for p in sorted(self._dirs.keys() + self._files.keys()):
373 377 if p in self._files:
374 378 yield self._subpath(p)
375 379 else:
376 380 for f in self._dirs[p].iterkeys():
377 381 yield f
378 382
379 383 def keys(self):
380 384 return list(self.iterkeys())
381 385
382 386 def __iter__(self):
383 387 return self.iterkeys()
384 388
385 389 def __contains__(self, f):
386 390 if f is None:
387 391 return False
388 392 dir, subpath = _splittopdir(f)
389 393 if dir:
390 394 if dir not in self._dirs:
391 395 return False
392 396 return self._dirs[dir].__contains__(subpath)
393 397 else:
394 398 return f in self._files
395 399
396 400 def get(self, f, default=None):
397 401 dir, subpath = _splittopdir(f)
398 402 if dir:
399 403 if dir not in self._dirs:
400 404 return default
401 405 return self._dirs[dir].get(subpath, default)
402 406 else:
403 407 return self._files.get(f, default)
404 408
405 409 def __getitem__(self, f):
406 410 dir, subpath = _splittopdir(f)
407 411 if dir:
408 412 return self._dirs[dir].__getitem__(subpath)
409 413 else:
410 414 return self._files[f]
411 415
412 416 def flags(self, f):
413 417 dir, subpath = _splittopdir(f)
414 418 if dir:
415 419 if dir not in self._dirs:
416 420 return ''
417 421 return self._dirs[dir].flags(subpath)
418 422 else:
419 423 if f in self._dirs:
420 424 return ''
421 425 return self._flags.get(f, '')
422 426
423 427 def find(self, f):
424 428 dir, subpath = _splittopdir(f)
425 429 if dir:
426 430 return self._dirs[dir].find(subpath)
427 431 else:
428 432 return self._files[f], self._flags.get(f, '')
429 433
430 434 def __delitem__(self, f):
431 435 dir, subpath = _splittopdir(f)
432 436 if dir:
433 437 self._dirs[dir].__delitem__(subpath)
434 438 # If the directory is now empty, remove it
435 439 if not self._dirs[dir]._dirs and not self._dirs[dir]._files:
436 440 del self._dirs[dir]
437 441 else:
438 442 del self._files[f]
439 443 if f in self._flags:
440 444 del self._flags[f]
441 445
442 446 def __setitem__(self, f, n):
443 447 assert n is not None
444 448 dir, subpath = _splittopdir(f)
445 449 if dir:
446 450 if dir not in self._dirs:
447 451 self._dirs[dir] = treemanifest(self._subpath(dir))
448 452 self._dirs[dir].__setitem__(subpath, n)
449 453 else:
450 454 self._files[f] = n[:21] # to match manifestdict's behavior
451 455
452 456 def setflag(self, f, flags):
453 457 """Set the flags (symlink, executable) for path f."""
454 458 dir, subpath = _splittopdir(f)
455 459 if dir:
456 460 if dir not in self._dirs:
457 461 self._dirs[dir] = treemanifest(self._subpath(dir))
458 462 self._dirs[dir].setflag(subpath, flags)
459 463 else:
460 464 self._flags[f] = flags
461 465
462 466 def copy(self):
463 467 copy = treemanifest(self._dir)
464 468 for d in self._dirs:
465 469 copy._dirs[d] = self._dirs[d].copy()
466 470 copy._files = dict.copy(self._files)
467 471 copy._flags = dict.copy(self._flags)
468 472 return copy
469 473
470 474 def _intersectfiles(self, files):
471 475 '''make a new treemanifest with the intersection of self with files
472 476
473 477 The algorithm assumes that files is much smaller than self.'''
474 478 ret = treemanifest()
475 479 for fn in files:
476 480 if fn in self:
477 481 ret[fn] = self[fn]
478 482 flags = self.flags(fn)
479 483 if flags:
480 484 ret.setflag(fn, flags)
481 485 return ret
482 486
483 487 def filesnotin(self, m2):
484 488 '''Set of files in this manifest that are not in the other'''
485 489 files = set()
486 490 def _filesnotin(t1, t2):
487 491 for d, m1 in t1._dirs.iteritems():
488 492 if d in t2._dirs:
489 493 m2 = t2._dirs[d]
490 494 _filesnotin(m1, m2)
491 495 else:
492 496 files.update(m1.iterkeys())
493 497
494 498 for fn in t1._files.iterkeys():
495 499 if fn not in t2._files:
496 500 files.add(t1._subpath(fn))
497 501
498 502 _filesnotin(self, m2)
499 503 return files
500 504
501 505 @propertycache
502 506 def _alldirs(self):
503 507 return scmutil.dirs(self)
504 508
505 509 def dirs(self):
506 510 return self._alldirs
507 511
508 512 def hasdir(self, dir):
509 513 topdir, subdir = _splittopdir(dir)
510 514 if topdir:
511 515 if topdir in self._dirs:
512 516 return self._dirs[topdir].hasdir(subdir)
513 517 return False
514 518 return (dir + '/') in self._dirs
515 519
516 520 def matches(self, match):
517 521 '''generate a new manifest filtered by the match argument'''
518 522 if match.always():
519 523 return self.copy()
520 524
521 525 files = match.files()
522 526 if (match.isexact() or
523 527 (not match.anypats() and util.all(fn in self for fn in files))):
524 528 return self._intersectfiles(files)
525 529
526 530 m = self.copy()
527 531 for fn in m.keys():
528 532 if not match(fn):
529 533 del m[fn]
530 534 return m
531 535
532 536 def diff(self, m2, clean=False):
533 537 '''Finds changes between the current manifest and m2.
534 538
535 539 Args:
536 540 m2: the manifest to which this manifest should be compared.
537 541 clean: if true, include files unchanged between these manifests
538 542 with a None value in the returned dictionary.
539 543
540 544 The result is returned as a dict with filename as key and
541 545 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
542 546 nodeid in the current/other manifest and fl1/fl2 is the flag
543 547 in the current/other manifest. Where the file does not exist,
544 548 the nodeid will be None and the flags will be the empty
545 549 string.
546 550 '''
547 551 result = {}
548 552 emptytree = treemanifest()
549 553 def _diff(t1, t2):
550 554 for d, m1 in t1._dirs.iteritems():
551 555 m2 = t2._dirs.get(d, emptytree)
552 556 _diff(m1, m2)
553 557
554 558 for d, m2 in t2._dirs.iteritems():
555 559 if d not in t1._dirs:
556 560 _diff(emptytree, m2)
557 561
558 562 for fn, n1 in t1._files.iteritems():
559 563 fl1 = t1._flags.get(fn, '')
560 564 n2 = t2._files.get(fn, None)
561 565 fl2 = t2._flags.get(fn, '')
562 566 if n1 != n2 or fl1 != fl2:
563 567 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
564 568 elif clean:
565 569 result[t1._subpath(fn)] = None
566 570
567 571 for fn, n2 in t2._files.iteritems():
568 572 if fn not in t1._files:
569 573 fl2 = t2._flags.get(fn, '')
570 574 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
571 575
572 576 _diff(self, m2)
573 577 return result
574 578
575 579 def text(self):
576 580 """Get the full data of this manifest as a bytestring."""
577 581 fl = self.keys()
578 582 _checkforbidden(fl)
579 583
580 584 hex, flags = revlog.hex, self.flags
581 585 # if this is changed to support newlines in filenames,
582 586 # be sure to check the templates/ dir again (especially *-raw.tmpl)
583 587 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
584 588
585 589 class manifest(revlog.revlog):
586 590 def __init__(self, opener):
587 591 # During normal operations, we expect to deal with not more than four
588 592 # revs at a time (such as during commit --amend). When rebasing large
589 593 # stacks of commits, the number can go up, hence the config knob below.
590 594 cachesize = 4
591 595 usetreemanifest = False
592 596 opts = getattr(opener, 'options', None)
593 597 if opts is not None:
594 598 cachesize = opts.get('manifestcachesize', cachesize)
595 599 usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
596 600 self._mancache = util.lrucachedict(cachesize)
597 601 revlog.revlog.__init__(self, opener, "00manifest.i")
598 602 self._usetreemanifest = usetreemanifest
599 603
600 604 def _newmanifest(self, data=''):
601 605 if self._usetreemanifest:
602 606 return treemanifest('', data)
603 607 return manifestdict(data)
604 608
605 609 def readdelta(self, node):
606 610 r = self.rev(node)
607 611 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
608 612 return self._newmanifest(d)
609 613
610 614 def readfast(self, node):
611 615 '''use the faster of readdelta or read'''
612 616 r = self.rev(node)
613 617 deltaparent = self.deltaparent(r)
614 618 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
615 619 return self.readdelta(node)
616 620 return self.read(node)
617 621
618 622 def read(self, node):
619 623 if node == revlog.nullid:
620 624 return self._newmanifest() # don't upset local cache
621 625 if node in self._mancache:
622 626 return self._mancache[node][0]
623 627 text = self.revision(node)
624 628 arraytext = array.array('c', text)
625 629 m = self._newmanifest(text)
626 630 self._mancache[node] = (m, arraytext)
627 631 return m
628 632
629 633 def find(self, node, f):
630 634 '''look up entry for a single file efficiently.
631 635 return (node, flags) pair if found, (None, None) if not.'''
632 636 m = self.read(node)
633 637 try:
634 638 return m.find(f)
635 639 except KeyError:
636 640 return None, None
637 641
638 642 def add(self, m, transaction, link, p1, p2, added, removed):
639 643 if p1 in self._mancache and not self._usetreemanifest:
640 644 # If our first parent is in the manifest cache, we can
641 645 # compute a delta here using properties we know about the
642 646 # manifest up-front, which may save time later for the
643 647 # revlog layer.
644 648
645 649 _checkforbidden(added)
646 650 # combine the changed lists into one list for sorting
647 651 work = [(x, False) for x in added]
648 652 work.extend((x, True) for x in removed)
649 653 # this could use heapq.merge() (from Python 2.6+) or equivalent
650 654 # since the lists are already sorted
651 655 work.sort()
652 656
653 657 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
654 658 cachedelta = self.rev(p1), deltatext
655 659 text = util.buffer(arraytext)
656 660 else:
657 661 # The first parent manifest isn't already loaded, so we'll
658 662 # just encode a fulltext of the manifest and pass that
659 663 # through to the revlog layer, and let it handle the delta
660 664 # process.
661 665 text = m.text()
662 666 arraytext = array.array('c', text)
663 667 cachedelta = None
664 668
665 669 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
666 670 self._mancache[n] = (m, arraytext)
667 671
668 672 return n
General Comments 0
You need to be logged in to leave comments. Login now