##// END OF EJS Templates
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Maciej Fijalkowski -
r30042:d24e03da default
parent child Browse files
Show More
@@ -1,1299 +1,1530 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import heapq
12 12 import os
13 13 import struct
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 mdiff,
19 19 parsers,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 propertycache = util.propertycache
25 25
26 26 def _parsev1(data):
27 27 # This method does a little bit of excessive-looking
28 28 # precondition checking. This is so that the behavior of this
29 29 # class exactly matches its C counterpart to try and help
30 30 # prevent surprise breakage for anyone that develops against
31 31 # the pure version.
32 32 if data and data[-1] != '\n':
33 33 raise ValueError('Manifest did not end in a newline.')
34 34 prev = None
35 35 for l in data.splitlines():
36 36 if prev is not None and prev > l:
37 37 raise ValueError('Manifest lines not in sorted order.')
38 38 prev = l
39 39 f, n = l.split('\0')
40 40 if len(n) > 40:
41 41 yield f, revlog.bin(n[:40]), n[40:]
42 42 else:
43 43 yield f, revlog.bin(n), ''
44 44
45 45 def _parsev2(data):
46 46 metadataend = data.find('\n')
47 47 # Just ignore metadata for now
48 48 pos = metadataend + 1
49 49 prevf = ''
50 50 while pos < len(data):
51 51 end = data.find('\n', pos + 1) # +1 to skip stem length byte
52 52 if end == -1:
53 53 raise ValueError('Manifest ended with incomplete file entry.')
54 54 stemlen = ord(data[pos])
55 55 items = data[pos + 1:end].split('\0')
56 56 f = prevf[:stemlen] + items[0]
57 57 if prevf > f:
58 58 raise ValueError('Manifest entries not in sorted order.')
59 59 fl = items[1]
60 60 # Just ignore metadata (items[2:] for now)
61 61 n = data[end + 1:end + 21]
62 62 yield f, n, fl
63 63 pos = end + 22
64 64 prevf = f
65 65
66 66 def _parse(data):
67 67 """Generates (path, node, flags) tuples from a manifest text"""
68 68 if data.startswith('\0'):
69 69 return iter(_parsev2(data))
70 70 else:
71 71 return iter(_parsev1(data))
72 72
73 73 def _text(it, usemanifestv2):
74 74 """Given an iterator over (path, node, flags) tuples, returns a manifest
75 75 text"""
76 76 if usemanifestv2:
77 77 return _textv2(it)
78 78 else:
79 79 return _textv1(it)
80 80
81 81 def _textv1(it):
82 82 files = []
83 83 lines = []
84 84 _hex = revlog.hex
85 85 for f, n, fl in it:
86 86 files.append(f)
87 87 # if this is changed to support newlines in filenames,
88 88 # be sure to check the templates/ dir again (especially *-raw.tmpl)
89 89 lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
90 90
91 91 _checkforbidden(files)
92 92 return ''.join(lines)
93 93
94 94 def _textv2(it):
95 95 files = []
96 96 lines = ['\0\n']
97 97 prevf = ''
98 98 for f, n, fl in it:
99 99 files.append(f)
100 100 stem = os.path.commonprefix([prevf, f])
101 101 stemlen = min(len(stem), 255)
102 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
103 103 prevf = f
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 class _lazymanifest(dict):
108 """This is the pure implementation of lazymanifest.
107 class lazymanifestiter(object):
108 def __init__(self, lm):
109 self.pos = 0
110 self.lm = lm
109 111
110 It has not been optimized *at all* and is not lazy.
111 """
112 def __iter__(self):
113 return self
112 114
113 def __init__(self, data):
114 dict.__init__(self)
115 for f, n, fl in _parse(data):
116 self[f] = n, fl
115 def next(self):
116 try:
117 data, pos = self.lm._get(self.pos)
118 except IndexError:
119 raise StopIteration
120 if pos == -1:
121 self.pos += 1
122 return data[0]
123 self.pos += 1
124 zeropos = data.find('\x00', pos)
125 return data[pos:zeropos]
117 126
118 def __setitem__(self, k, v):
119 node, flag = v
120 assert node is not None
121 if len(node) > 21:
122 node = node[:21] # match c implementation behavior
123 dict.__setitem__(self, k, (node, flag))
127 class lazymanifestiterentries(object):
128 def __init__(self, lm):
129 self.lm = lm
130 self.pos = 0
124 131
125 132 def __iter__(self):
126 return iter(sorted(dict.keys(self)))
133 return self
134
135 def next(self):
136 try:
137 data, pos = self.lm._get(self.pos)
138 except IndexError:
139 raise StopIteration
140 if pos == -1:
141 self.pos += 1
142 return data
143 zeropos = data.find('\x00', pos)
144 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
145 zeropos + 1, 40)
146 flags = self.lm._getflags(data, self.pos, zeropos)
147 self.pos += 1
148 return (data[pos:zeropos], hashval, flags)
149
150 def unhexlify(data, extra, pos, length):
151 s = data[pos:pos + length].decode('hex')
152 if extra:
153 s += chr(extra & 0xff)
154 return s
155
156 def _cmp(a, b):
157 return (a > b) - (a < b)
158
159 class _lazymanifest(object):
160 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
161 if positions is None:
162 self.positions = self.findlines(data)
163 self.extrainfo = [0] * len(self.positions)
164 self.data = data
165 self.extradata = []
166 else:
167 self.positions = positions[:]
168 self.extrainfo = extrainfo[:]
169 self.extradata = extradata[:]
170 self.data = data
171
172 def findlines(self, data):
173 if not data:
174 return []
175 pos = data.find("\n")
176 if pos == -1 or data[-1] != '\n':
177 raise ValueError("Manifest did not end in a newline.")
178 positions = [0]
179 prev = data[:data.find('\x00')]
180 while pos < len(data) - 1 and pos != -1:
181 positions.append(pos + 1)
182 nexts = data[pos + 1:data.find('\x00', pos + 1)]
183 if nexts < prev:
184 raise ValueError("Manifest lines not in sorted order.")
185 prev = nexts
186 pos = data.find("\n", pos + 1)
187 return positions
188
189 def _get(self, index):
190 # get the position encoded in pos:
191 # positive number is an index in 'data'
192 # negative number is in extrapieces
193 pos = self.positions[index]
194 if pos >= 0:
195 return self.data, pos
196 return self.extradata[-pos - 1], -1
197
198 def _getkey(self, pos):
199 if pos >= 0:
200 return self.data[pos:self.data.find('\x00', pos + 1)]
201 return self.extradata[-pos - 1][0]
202
203 def bsearch(self, key):
204 first = 0
205 last = len(self.positions) - 1
206
207 while first <= last:
208 midpoint = (first + last)//2
209 nextpos = self.positions[midpoint]
210 candidate = self._getkey(nextpos)
211 r = _cmp(key, candidate)
212 if r == 0:
213 return midpoint
214 else:
215 if r < 0:
216 last = midpoint - 1
217 else:
218 first = midpoint + 1
219 return -1
127 220
128 def iterkeys(self):
129 return iter(sorted(dict.keys(self)))
221 def bsearch2(self, key):
222 # same as the above, but will always return the position
223 # done for performance reasons
224 first = 0
225 last = len(self.positions) - 1
226
227 while first <= last:
228 midpoint = (first + last)//2
229 nextpos = self.positions[midpoint]
230 candidate = self._getkey(nextpos)
231 r = _cmp(key, candidate)
232 if r == 0:
233 return (midpoint, True)
234 else:
235 if r < 0:
236 last = midpoint - 1
237 else:
238 first = midpoint + 1
239 return (first, False)
240
241 def __contains__(self, key):
242 return self.bsearch(key) != -1
243
244 def _getflags(self, data, needle, pos):
245 start = pos + 41
246 end = data.find("\n", start)
247 if end == -1:
248 end = len(data) - 1
249 if start == end:
250 return ''
251 return self.data[start:end]
130 252
131 def iterentries(self):
132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
253 def __getitem__(self, key):
254 if not isinstance(key, str):
255 raise TypeError("getitem: manifest keys must be a string.")
256 needle = self.bsearch(key)
257 if needle == -1:
258 raise KeyError
259 data, pos = self._get(needle)
260 if pos == -1:
261 return (data[1], data[2])
262 zeropos = data.find('\x00', pos)
263 assert 0 <= needle <= len(self.positions)
264 assert len(self.extrainfo) == len(self.positions)
265 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
266 flags = self._getflags(data, needle, zeropos)
267 return (hashval, flags)
268
269 def __delitem__(self, key):
270 needle, found = self.bsearch2(key)
271 if not found:
272 raise KeyError
273 cur = self.positions[needle]
274 self.positions = self.positions[:needle] + self.positions[needle + 1:]
275 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
276 if cur >= 0:
277 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
278
279 def __setitem__(self, key, value):
280 if not isinstance(key, str):
281 raise TypeError("setitem: manifest keys must be a string.")
282 if not isinstance(value, tuple) or len(value) != 2:
283 raise TypeError("Manifest values must be a tuple of (node, flags).")
284 hashval = value[0]
285 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
286 raise TypeError("node must be a 20-byte string")
287 flags = value[1]
288 if len(hashval) == 22:
289 hashval = hashval[:-1]
290 if not isinstance(flags, str) or len(flags) > 1:
291 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
292 needle, found = self.bsearch2(key)
293 if found:
294 # put the item
295 pos = self.positions[needle]
296 if pos < 0:
297 self.extradata[-pos - 1] = (key, hashval, value[1])
298 else:
299 # just don't bother
300 self.extradata.append((key, hashval, value[1]))
301 self.positions[needle] = -len(self.extradata)
302 else:
303 # not found, put it in with extra positions
304 self.extradata.append((key, hashval, value[1]))
305 self.positions = (self.positions[:needle] + [-len(self.extradata)]
306 + self.positions[needle:])
307 self.extrainfo = (self.extrainfo[:needle] + [0] +
308 self.extrainfo[needle:])
133 309
134 310 def copy(self):
135 c = _lazymanifest('')
136 c.update(self)
137 return c
311 # XXX call _compact like in C?
312 return _lazymanifest(self.data, self.positions, self.extrainfo,
313 self.extradata)
314
315 def _compact(self):
316 # hopefully not called TOO often
317 if len(self.extradata) == 0:
318 return
319 l = []
320 last_cut = 0
321 i = 0
322 offset = 0
323 self.extrainfo = [0] * len(self.positions)
324 while i < len(self.positions):
325 if self.positions[i] >= 0:
326 cur = self.positions[i]
327 last_cut = cur
328 while True:
329 self.positions[i] = offset
330 i += 1
331 if i == len(self.positions) or self.positions[i] < 0:
332 break
333 offset += self.positions[i] - cur
334 cur = self.positions[i]
335 end_cut = self.data.find('\n', cur)
336 if end_cut != -1:
337 end_cut += 1
338 offset += end_cut - cur
339 l.append(self.data[last_cut:end_cut])
340 else:
341 while i < len(self.positions) and self.positions[i] < 0:
342 cur = self.positions[i]
343 t = self.extradata[-cur - 1]
344 l.append(self._pack(t))
345 self.positions[i] = offset
346 if len(t[1]) > 20:
347 self.extrainfo[i] = ord(t[1][21])
348 offset += len(l[-1])
349 i += 1
350 self.data = ''.join(l)
351 self.extradata = []
352
353 def _pack(self, d):
354 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
355
356 def text(self):
357 self._compact()
358 return self.data
138 359
139 360 def diff(self, m2, clean=False):
140 361 '''Finds changes between the current manifest and m2.'''
362 # XXX think whether efficiency matters here
141 363 diff = {}
142 364
143 for fn, e1 in self.iteritems():
365 for fn, e1, flags in self.iterentries():
144 366 if fn not in m2:
145 diff[fn] = e1, (None, '')
367 diff[fn] = (e1, flags), (None, '')
146 368 else:
147 369 e2 = m2[fn]
148 if e1 != e2:
149 diff[fn] = e1, e2
370 if (e1, flags) != e2:
371 diff[fn] = (e1, flags), e2
150 372 elif clean:
151 373 diff[fn] = None
152 374
153 for fn, e2 in m2.iteritems():
375 for fn, e2, flags in m2.iterentries():
154 376 if fn not in self:
155 diff[fn] = (None, ''), e2
377 diff[fn] = (None, ''), (e2, flags)
156 378
157 379 return diff
158 380
381 def iterentries(self):
382 return lazymanifestiterentries(self)
383
384 def iterkeys(self):
385 return lazymanifestiter(self)
386
387 def __iter__(self):
388 return lazymanifestiter(self)
389
390 def __len__(self):
391 return len(self.positions)
392
159 393 def filtercopy(self, filterfn):
394 # XXX should be optimized
160 395 c = _lazymanifest('')
161 396 for f, n, fl in self.iterentries():
162 397 if filterfn(f):
163 398 c[f] = n, fl
164 399 return c
165 400
166 def text(self):
167 """Get the full data of this manifest as a bytestring."""
168 return _textv1(self.iterentries())
169
170 401 try:
171 402 _lazymanifest = parsers.lazymanifest
172 403 except AttributeError:
173 404 pass
174 405
175 406 class manifestdict(object):
176 407 def __init__(self, data=''):
177 408 if data.startswith('\0'):
178 409 #_lazymanifest can not parse v2
179 410 self._lm = _lazymanifest('')
180 411 for f, n, fl in _parsev2(data):
181 412 self._lm[f] = n, fl
182 413 else:
183 414 self._lm = _lazymanifest(data)
184 415
185 416 def __getitem__(self, key):
186 417 return self._lm[key][0]
187 418
188 419 def find(self, key):
189 420 return self._lm[key]
190 421
191 422 def __len__(self):
192 423 return len(self._lm)
193 424
194 425 def __setitem__(self, key, node):
195 426 self._lm[key] = node, self.flags(key, '')
196 427
197 428 def __contains__(self, key):
198 429 return key in self._lm
199 430
200 431 def __delitem__(self, key):
201 432 del self._lm[key]
202 433
203 434 def __iter__(self):
204 435 return self._lm.__iter__()
205 436
206 437 def iterkeys(self):
207 438 return self._lm.iterkeys()
208 439
209 440 def keys(self):
210 441 return list(self.iterkeys())
211 442
212 443 def filesnotin(self, m2):
213 444 '''Set of files in this manifest that are not in the other'''
214 445 diff = self.diff(m2)
215 446 files = set(filepath
216 447 for filepath, hashflags in diff.iteritems()
217 448 if hashflags[1][0] is None)
218 449 return files
219 450
220 451 @propertycache
221 452 def _dirs(self):
222 453 return util.dirs(self)
223 454
224 455 def dirs(self):
225 456 return self._dirs
226 457
227 458 def hasdir(self, dir):
228 459 return dir in self._dirs
229 460
230 461 def _filesfastpath(self, match):
231 462 '''Checks whether we can correctly and quickly iterate over matcher
232 463 files instead of over manifest files.'''
233 464 files = match.files()
234 465 return (len(files) < 100 and (match.isexact() or
235 466 (match.prefix() and all(fn in self for fn in files))))
236 467
237 468 def walk(self, match):
238 469 '''Generates matching file names.
239 470
240 471 Equivalent to manifest.matches(match).iterkeys(), but without creating
241 472 an entirely new manifest.
242 473
243 474 It also reports nonexistent files by marking them bad with match.bad().
244 475 '''
245 476 if match.always():
246 477 for f in iter(self):
247 478 yield f
248 479 return
249 480
250 481 fset = set(match.files())
251 482
252 483 # avoid the entire walk if we're only looking for specific files
253 484 if self._filesfastpath(match):
254 485 for fn in sorted(fset):
255 486 yield fn
256 487 return
257 488
258 489 for fn in self:
259 490 if fn in fset:
260 491 # specified pattern is the exact name
261 492 fset.remove(fn)
262 493 if match(fn):
263 494 yield fn
264 495
265 496 # for dirstate.walk, files=['.'] means "walk the whole tree".
266 497 # follow that here, too
267 498 fset.discard('.')
268 499
269 500 for fn in sorted(fset):
270 501 if not self.hasdir(fn):
271 502 match.bad(fn, None)
272 503
273 504 def matches(self, match):
274 505 '''generate a new manifest filtered by the match argument'''
275 506 if match.always():
276 507 return self.copy()
277 508
278 509 if self._filesfastpath(match):
279 510 m = manifestdict()
280 511 lm = self._lm
281 512 for fn in match.files():
282 513 if fn in lm:
283 514 m._lm[fn] = lm[fn]
284 515 return m
285 516
286 517 m = manifestdict()
287 518 m._lm = self._lm.filtercopy(match)
288 519 return m
289 520
290 521 def diff(self, m2, clean=False):
291 522 '''Finds changes between the current manifest and m2.
292 523
293 524 Args:
294 525 m2: the manifest to which this manifest should be compared.
295 526 clean: if true, include files unchanged between these manifests
296 527 with a None value in the returned dictionary.
297 528
298 529 The result is returned as a dict with filename as key and
299 530 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
300 531 nodeid in the current/other manifest and fl1/fl2 is the flag
301 532 in the current/other manifest. Where the file does not exist,
302 533 the nodeid will be None and the flags will be the empty
303 534 string.
304 535 '''
305 536 return self._lm.diff(m2._lm, clean)
306 537
307 538 def setflag(self, key, flag):
308 539 self._lm[key] = self[key], flag
309 540
310 541 def get(self, key, default=None):
311 542 try:
312 543 return self._lm[key][0]
313 544 except KeyError:
314 545 return default
315 546
316 547 def flags(self, key, default=''):
317 548 try:
318 549 return self._lm[key][1]
319 550 except KeyError:
320 551 return default
321 552
322 553 def copy(self):
323 554 c = manifestdict()
324 555 c._lm = self._lm.copy()
325 556 return c
326 557
327 558 def iteritems(self):
328 559 return (x[:2] for x in self._lm.iterentries())
329 560
330 561 def iterentries(self):
331 562 return self._lm.iterentries()
332 563
333 564 def text(self, usemanifestv2=False):
334 565 if usemanifestv2:
335 566 return _textv2(self._lm.iterentries())
336 567 else:
337 568 # use (probably) native version for v1
338 569 return self._lm.text()
339 570
340 571 def fastdelta(self, base, changes):
341 572 """Given a base manifest text as an array.array and a list of changes
342 573 relative to that text, compute a delta that can be used by revlog.
343 574 """
344 575 delta = []
345 576 dstart = None
346 577 dend = None
347 578 dline = [""]
348 579 start = 0
349 580 # zero copy representation of base as a buffer
350 581 addbuf = util.buffer(base)
351 582
352 583 changes = list(changes)
353 584 if len(changes) < 1000:
354 585 # start with a readonly loop that finds the offset of
355 586 # each line and creates the deltas
356 587 for f, todelete in changes:
357 588 # bs will either be the index of the item or the insert point
358 589 start, end = _msearch(addbuf, f, start)
359 590 if not todelete:
360 591 h, fl = self._lm[f]
361 592 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
362 593 else:
363 594 if start == end:
364 595 # item we want to delete was not found, error out
365 596 raise AssertionError(
366 597 _("failed to remove %s from manifest") % f)
367 598 l = ""
368 599 if dstart is not None and dstart <= start and dend >= start:
369 600 if dend < end:
370 601 dend = end
371 602 if l:
372 603 dline.append(l)
373 604 else:
374 605 if dstart is not None:
375 606 delta.append([dstart, dend, "".join(dline)])
376 607 dstart = start
377 608 dend = end
378 609 dline = [l]
379 610
380 611 if dstart is not None:
381 612 delta.append([dstart, dend, "".join(dline)])
382 613 # apply the delta to the base, and get a delta for addrevision
383 614 deltatext, arraytext = _addlistdelta(base, delta)
384 615 else:
385 616 # For large changes, it's much cheaper to just build the text and
386 617 # diff it.
387 618 arraytext = array.array('c', self.text())
388 619 deltatext = mdiff.textdiff(base, arraytext)
389 620
390 621 return arraytext, deltatext
391 622
392 623 def _msearch(m, s, lo=0, hi=None):
393 624 '''return a tuple (start, end) that says where to find s within m.
394 625
395 626 If the string is found m[start:end] are the line containing
396 627 that string. If start == end the string was not found and
397 628 they indicate the proper sorted insertion point.
398 629
399 630 m should be a buffer or a string
400 631 s is a string'''
401 632 def advance(i, c):
402 633 while i < lenm and m[i] != c:
403 634 i += 1
404 635 return i
405 636 if not s:
406 637 return (lo, lo)
407 638 lenm = len(m)
408 639 if not hi:
409 640 hi = lenm
410 641 while lo < hi:
411 642 mid = (lo + hi) // 2
412 643 start = mid
413 644 while start > 0 and m[start - 1] != '\n':
414 645 start -= 1
415 646 end = advance(start, '\0')
416 647 if m[start:end] < s:
417 648 # we know that after the null there are 40 bytes of sha1
418 649 # this translates to the bisect lo = mid + 1
419 650 lo = advance(end + 40, '\n') + 1
420 651 else:
421 652 # this translates to the bisect hi = mid
422 653 hi = start
423 654 end = advance(lo, '\0')
424 655 found = m[lo:end]
425 656 if s == found:
426 657 # we know that after the null there are 40 bytes of sha1
427 658 end = advance(end + 40, '\n')
428 659 return (lo, end + 1)
429 660 else:
430 661 return (lo, lo)
431 662
432 663 def _checkforbidden(l):
433 664 """Check filenames for illegal characters."""
434 665 for f in l:
435 666 if '\n' in f or '\r' in f:
436 667 raise error.RevlogError(
437 668 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
438 669
439 670
440 671 # apply the changes collected during the bisect loop to our addlist
441 672 # return a delta suitable for addrevision
442 673 def _addlistdelta(addlist, x):
443 674 # for large addlist arrays, building a new array is cheaper
444 675 # than repeatedly modifying the existing one
445 676 currentposition = 0
446 677 newaddlist = array.array('c')
447 678
448 679 for start, end, content in x:
449 680 newaddlist += addlist[currentposition:start]
450 681 if content:
451 682 newaddlist += array.array('c', content)
452 683
453 684 currentposition = end
454 685
455 686 newaddlist += addlist[currentposition:]
456 687
457 688 deltatext = "".join(struct.pack(">lll", start, end, len(content))
458 689 + content for start, end, content in x)
459 690 return deltatext, newaddlist
460 691
461 692 def _splittopdir(f):
462 693 if '/' in f:
463 694 dir, subpath = f.split('/', 1)
464 695 return dir + '/', subpath
465 696 else:
466 697 return '', f
467 698
468 699 _noop = lambda s: None
469 700
470 701 class treemanifest(object):
471 702 def __init__(self, dir='', text=''):
472 703 self._dir = dir
473 704 self._node = revlog.nullid
474 705 self._loadfunc = _noop
475 706 self._copyfunc = _noop
476 707 self._dirty = False
477 708 self._dirs = {}
478 709 # Using _lazymanifest here is a little slower than plain old dicts
479 710 self._files = {}
480 711 self._flags = {}
481 712 if text:
482 713 def readsubtree(subdir, subm):
483 714 raise AssertionError('treemanifest constructor only accepts '
484 715 'flat manifests')
485 716 self.parse(text, readsubtree)
486 717 self._dirty = True # Mark flat manifest dirty after parsing
487 718
488 719 def _subpath(self, path):
489 720 return self._dir + path
490 721
491 722 def __len__(self):
492 723 self._load()
493 724 size = len(self._files)
494 725 for m in self._dirs.values():
495 726 size += m.__len__()
496 727 return size
497 728
498 729 def _isempty(self):
499 730 self._load() # for consistency; already loaded by all callers
500 731 return (not self._files and (not self._dirs or
501 732 all(m._isempty() for m in self._dirs.values())))
502 733
503 734 def __repr__(self):
504 735 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
505 736 (self._dir, revlog.hex(self._node),
506 737 bool(self._loadfunc is _noop),
507 738 self._dirty, id(self)))
508 739
509 740 def dir(self):
510 741 '''The directory that this tree manifest represents, including a
511 742 trailing '/'. Empty string for the repo root directory.'''
512 743 return self._dir
513 744
514 745 def node(self):
515 746 '''This node of this instance. nullid for unsaved instances. Should
516 747 be updated when the instance is read or written from a revlog.
517 748 '''
518 749 assert not self._dirty
519 750 return self._node
520 751
521 752 def setnode(self, node):
522 753 self._node = node
523 754 self._dirty = False
524 755
525 756 def iterentries(self):
526 757 self._load()
527 758 for p, n in sorted(self._dirs.items() + self._files.items()):
528 759 if p in self._files:
529 760 yield self._subpath(p), n, self._flags.get(p, '')
530 761 else:
531 762 for x in n.iterentries():
532 763 yield x
533 764
534 765 def iteritems(self):
535 766 self._load()
536 767 for p, n in sorted(self._dirs.items() + self._files.items()):
537 768 if p in self._files:
538 769 yield self._subpath(p), n
539 770 else:
540 771 for f, sn in n.iteritems():
541 772 yield f, sn
542 773
543 774 def iterkeys(self):
544 775 self._load()
545 776 for p in sorted(self._dirs.keys() + self._files.keys()):
546 777 if p in self._files:
547 778 yield self._subpath(p)
548 779 else:
549 780 for f in self._dirs[p].iterkeys():
550 781 yield f
551 782
552 783 def keys(self):
553 784 return list(self.iterkeys())
554 785
555 786 def __iter__(self):
556 787 return self.iterkeys()
557 788
558 789 def __contains__(self, f):
559 790 if f is None:
560 791 return False
561 792 self._load()
562 793 dir, subpath = _splittopdir(f)
563 794 if dir:
564 795 if dir not in self._dirs:
565 796 return False
566 797 return self._dirs[dir].__contains__(subpath)
567 798 else:
568 799 return f in self._files
569 800
570 801 def get(self, f, default=None):
571 802 self._load()
572 803 dir, subpath = _splittopdir(f)
573 804 if dir:
574 805 if dir not in self._dirs:
575 806 return default
576 807 return self._dirs[dir].get(subpath, default)
577 808 else:
578 809 return self._files.get(f, default)
579 810
580 811 def __getitem__(self, f):
581 812 self._load()
582 813 dir, subpath = _splittopdir(f)
583 814 if dir:
584 815 return self._dirs[dir].__getitem__(subpath)
585 816 else:
586 817 return self._files[f]
587 818
588 819 def flags(self, f):
589 820 self._load()
590 821 dir, subpath = _splittopdir(f)
591 822 if dir:
592 823 if dir not in self._dirs:
593 824 return ''
594 825 return self._dirs[dir].flags(subpath)
595 826 else:
596 827 if f in self._dirs:
597 828 return ''
598 829 return self._flags.get(f, '')
599 830
600 831 def find(self, f):
601 832 self._load()
602 833 dir, subpath = _splittopdir(f)
603 834 if dir:
604 835 return self._dirs[dir].find(subpath)
605 836 else:
606 837 return self._files[f], self._flags.get(f, '')
607 838
608 839 def __delitem__(self, f):
609 840 self._load()
610 841 dir, subpath = _splittopdir(f)
611 842 if dir:
612 843 self._dirs[dir].__delitem__(subpath)
613 844 # If the directory is now empty, remove it
614 845 if self._dirs[dir]._isempty():
615 846 del self._dirs[dir]
616 847 else:
617 848 del self._files[f]
618 849 if f in self._flags:
619 850 del self._flags[f]
620 851 self._dirty = True
621 852
622 853 def __setitem__(self, f, n):
623 854 assert n is not None
624 855 self._load()
625 856 dir, subpath = _splittopdir(f)
626 857 if dir:
627 858 if dir not in self._dirs:
628 859 self._dirs[dir] = treemanifest(self._subpath(dir))
629 860 self._dirs[dir].__setitem__(subpath, n)
630 861 else:
631 862 self._files[f] = n[:21] # to match manifestdict's behavior
632 863 self._dirty = True
633 864
634 865 def _load(self):
635 866 if self._loadfunc is not _noop:
636 867 lf, self._loadfunc = self._loadfunc, _noop
637 868 lf(self)
638 869 elif self._copyfunc is not _noop:
639 870 cf, self._copyfunc = self._copyfunc, _noop
640 871 cf(self)
641 872
642 873 def setflag(self, f, flags):
643 874 """Set the flags (symlink, executable) for path f."""
644 875 self._load()
645 876 dir, subpath = _splittopdir(f)
646 877 if dir:
647 878 if dir not in self._dirs:
648 879 self._dirs[dir] = treemanifest(self._subpath(dir))
649 880 self._dirs[dir].setflag(subpath, flags)
650 881 else:
651 882 self._flags[f] = flags
652 883 self._dirty = True
653 884
654 885 def copy(self):
655 886 copy = treemanifest(self._dir)
656 887 copy._node = self._node
657 888 copy._dirty = self._dirty
658 889 if self._copyfunc is _noop:
659 890 def _copyfunc(s):
660 891 self._load()
661 892 for d in self._dirs:
662 893 s._dirs[d] = self._dirs[d].copy()
663 894 s._files = dict.copy(self._files)
664 895 s._flags = dict.copy(self._flags)
665 896 if self._loadfunc is _noop:
666 897 _copyfunc(copy)
667 898 else:
668 899 copy._copyfunc = _copyfunc
669 900 else:
670 901 copy._copyfunc = self._copyfunc
671 902 return copy
672 903
673 904 def filesnotin(self, m2):
674 905 '''Set of files in this manifest that are not in the other'''
675 906 files = set()
676 907 def _filesnotin(t1, t2):
677 908 if t1._node == t2._node and not t1._dirty and not t2._dirty:
678 909 return
679 910 t1._load()
680 911 t2._load()
681 912 for d, m1 in t1._dirs.iteritems():
682 913 if d in t2._dirs:
683 914 m2 = t2._dirs[d]
684 915 _filesnotin(m1, m2)
685 916 else:
686 917 files.update(m1.iterkeys())
687 918
688 919 for fn in t1._files.iterkeys():
689 920 if fn not in t2._files:
690 921 files.add(t1._subpath(fn))
691 922
692 923 _filesnotin(self, m2)
693 924 return files
694 925
695 926 @propertycache
696 927 def _alldirs(self):
697 928 return util.dirs(self)
698 929
699 930 def dirs(self):
700 931 return self._alldirs
701 932
702 933 def hasdir(self, dir):
703 934 self._load()
704 935 topdir, subdir = _splittopdir(dir)
705 936 if topdir:
706 937 if topdir in self._dirs:
707 938 return self._dirs[topdir].hasdir(subdir)
708 939 return False
709 940 return (dir + '/') in self._dirs
710 941
711 942 def walk(self, match):
712 943 '''Generates matching file names.
713 944
714 945 Equivalent to manifest.matches(match).iterkeys(), but without creating
715 946 an entirely new manifest.
716 947
717 948 It also reports nonexistent files by marking them bad with match.bad().
718 949 '''
719 950 if match.always():
720 951 for f in iter(self):
721 952 yield f
722 953 return
723 954
724 955 fset = set(match.files())
725 956
726 957 for fn in self._walk(match):
727 958 if fn in fset:
728 959 # specified pattern is the exact name
729 960 fset.remove(fn)
730 961 yield fn
731 962
732 963 # for dirstate.walk, files=['.'] means "walk the whole tree".
733 964 # follow that here, too
734 965 fset.discard('.')
735 966
736 967 for fn in sorted(fset):
737 968 if not self.hasdir(fn):
738 969 match.bad(fn, None)
739 970
740 971 def _walk(self, match):
741 972 '''Recursively generates matching file names for walk().'''
742 973 if not match.visitdir(self._dir[:-1] or '.'):
743 974 return
744 975
745 976 # yield this dir's files and walk its submanifests
746 977 self._load()
747 978 for p in sorted(self._dirs.keys() + self._files.keys()):
748 979 if p in self._files:
749 980 fullp = self._subpath(p)
750 981 if match(fullp):
751 982 yield fullp
752 983 else:
753 984 for f in self._dirs[p]._walk(match):
754 985 yield f
755 986
756 987 def matches(self, match):
757 988 '''generate a new manifest filtered by the match argument'''
758 989 if match.always():
759 990 return self.copy()
760 991
761 992 return self._matches(match)
762 993
763 994 def _matches(self, match):
764 995 '''recursively generate a new manifest filtered by the match argument.
765 996 '''
766 997
767 998 visit = match.visitdir(self._dir[:-1] or '.')
768 999 if visit == 'all':
769 1000 return self.copy()
770 1001 ret = treemanifest(self._dir)
771 1002 if not visit:
772 1003 return ret
773 1004
774 1005 self._load()
775 1006 for fn in self._files:
776 1007 fullp = self._subpath(fn)
777 1008 if not match(fullp):
778 1009 continue
779 1010 ret._files[fn] = self._files[fn]
780 1011 if fn in self._flags:
781 1012 ret._flags[fn] = self._flags[fn]
782 1013
783 1014 for dir, subm in self._dirs.iteritems():
784 1015 m = subm._matches(match)
785 1016 if not m._isempty():
786 1017 ret._dirs[dir] = m
787 1018
788 1019 if not ret._isempty():
789 1020 ret._dirty = True
790 1021 return ret
791 1022
792 1023 def diff(self, m2, clean=False):
793 1024 '''Finds changes between the current manifest and m2.
794 1025
795 1026 Args:
796 1027 m2: the manifest to which this manifest should be compared.
797 1028 clean: if true, include files unchanged between these manifests
798 1029 with a None value in the returned dictionary.
799 1030
800 1031 The result is returned as a dict with filename as key and
801 1032 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
802 1033 nodeid in the current/other manifest and fl1/fl2 is the flag
803 1034 in the current/other manifest. Where the file does not exist,
804 1035 the nodeid will be None and the flags will be the empty
805 1036 string.
806 1037 '''
807 1038 result = {}
808 1039 emptytree = treemanifest()
809 1040 def _diff(t1, t2):
810 1041 if t1._node == t2._node and not t1._dirty and not t2._dirty:
811 1042 return
812 1043 t1._load()
813 1044 t2._load()
814 1045 for d, m1 in t1._dirs.iteritems():
815 1046 m2 = t2._dirs.get(d, emptytree)
816 1047 _diff(m1, m2)
817 1048
818 1049 for d, m2 in t2._dirs.iteritems():
819 1050 if d not in t1._dirs:
820 1051 _diff(emptytree, m2)
821 1052
822 1053 for fn, n1 in t1._files.iteritems():
823 1054 fl1 = t1._flags.get(fn, '')
824 1055 n2 = t2._files.get(fn, None)
825 1056 fl2 = t2._flags.get(fn, '')
826 1057 if n1 != n2 or fl1 != fl2:
827 1058 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
828 1059 elif clean:
829 1060 result[t1._subpath(fn)] = None
830 1061
831 1062 for fn, n2 in t2._files.iteritems():
832 1063 if fn not in t1._files:
833 1064 fl2 = t2._flags.get(fn, '')
834 1065 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
835 1066
836 1067 _diff(self, m2)
837 1068 return result
838 1069
839 1070 def unmodifiedsince(self, m2):
840 1071 return not self._dirty and not m2._dirty and self._node == m2._node
841 1072
842 1073 def parse(self, text, readsubtree):
843 1074 for f, n, fl in _parse(text):
844 1075 if fl == 't':
845 1076 f = f + '/'
846 1077 self._dirs[f] = readsubtree(self._subpath(f), n)
847 1078 elif '/' in f:
848 1079 # This is a flat manifest, so use __setitem__ and setflag rather
849 1080 # than assigning directly to _files and _flags, so we can
850 1081 # assign a path in a subdirectory, and to mark dirty (compared
851 1082 # to nullid).
852 1083 self[f] = n
853 1084 if fl:
854 1085 self.setflag(f, fl)
855 1086 else:
856 1087 # Assigning to _files and _flags avoids marking as dirty,
857 1088 # and should be a little faster.
858 1089 self._files[f] = n
859 1090 if fl:
860 1091 self._flags[f] = fl
861 1092
862 1093 def text(self, usemanifestv2=False):
863 1094 """Get the full data of this manifest as a bytestring."""
864 1095 self._load()
865 1096 return _text(self.iterentries(), usemanifestv2)
866 1097
867 1098 def dirtext(self, usemanifestv2=False):
868 1099 """Get the full data of this directory as a bytestring. Make sure that
869 1100 any submanifests have been written first, so their nodeids are correct.
870 1101 """
871 1102 self._load()
872 1103 flags = self.flags
873 1104 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
874 1105 files = [(f, self._files[f], flags(f)) for f in self._files]
875 1106 return _text(sorted(dirs + files), usemanifestv2)
876 1107
877 1108 def read(self, gettext, readsubtree):
878 1109 def _load_for_read(s):
879 1110 s.parse(gettext(), readsubtree)
880 1111 s._dirty = False
881 1112 self._loadfunc = _load_for_read
882 1113
883 1114 def writesubtrees(self, m1, m2, writesubtree):
884 1115 self._load() # for consistency; should never have any effect here
885 1116 m1._load()
886 1117 m2._load()
887 1118 emptytree = treemanifest()
888 1119 for d, subm in self._dirs.iteritems():
889 1120 subp1 = m1._dirs.get(d, emptytree)._node
890 1121 subp2 = m2._dirs.get(d, emptytree)._node
891 1122 if subp1 == revlog.nullid:
892 1123 subp1, subp2 = subp2, subp1
893 1124 writesubtree(subm, subp1, subp2)
894 1125
895 1126 class manifestrevlog(revlog.revlog):
896 1127 '''A revlog that stores manifest texts. This is responsible for caching the
897 1128 full-text manifest contents.
898 1129 '''
899 1130 def __init__(self, opener, dir='', dirlogcache=None):
900 1131 # During normal operations, we expect to deal with not more than four
901 1132 # revs at a time (such as during commit --amend). When rebasing large
902 1133 # stacks of commits, the number can go up, hence the config knob below.
903 1134 cachesize = 4
904 1135 usetreemanifest = False
905 1136 usemanifestv2 = False
906 1137 opts = getattr(opener, 'options', None)
907 1138 if opts is not None:
908 1139 cachesize = opts.get('manifestcachesize', cachesize)
909 1140 usetreemanifest = opts.get('treemanifest', usetreemanifest)
910 1141 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
911 1142
912 1143 self._treeondisk = usetreemanifest
913 1144 self._usemanifestv2 = usemanifestv2
914 1145
915 1146 self._fulltextcache = util.lrucachedict(cachesize)
916 1147
917 1148 indexfile = "00manifest.i"
918 1149 if dir:
919 1150 assert self._treeondisk, 'opts is %r' % opts
920 1151 if not dir.endswith('/'):
921 1152 dir = dir + '/'
922 1153 indexfile = "meta/" + dir + "00manifest.i"
923 1154 self._dir = dir
924 1155 # The dirlogcache is kept on the root manifest log
925 1156 if dir:
926 1157 self._dirlogcache = dirlogcache
927 1158 else:
928 1159 self._dirlogcache = {'': self}
929 1160
930 1161 super(manifestrevlog, self).__init__(opener, indexfile,
931 1162 checkambig=bool(dir))
932 1163
933 1164 @property
934 1165 def fulltextcache(self):
935 1166 return self._fulltextcache
936 1167
937 1168 def clearcaches(self):
938 1169 super(manifestrevlog, self).clearcaches()
939 1170 self._fulltextcache.clear()
940 1171 self._dirlogcache = {'': self}
941 1172
942 1173 def dirlog(self, dir):
943 1174 if dir:
944 1175 assert self._treeondisk
945 1176 if dir not in self._dirlogcache:
946 1177 self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
947 1178 self._dirlogcache)
948 1179 return self._dirlogcache[dir]
949 1180
950 1181 def add(self, m, transaction, link, p1, p2, added, removed):
951 1182 if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
952 1183 and not self._usemanifestv2):
953 1184 # If our first parent is in the manifest cache, we can
954 1185 # compute a delta here using properties we know about the
955 1186 # manifest up-front, which may save time later for the
956 1187 # revlog layer.
957 1188
958 1189 _checkforbidden(added)
959 1190 # combine the changed lists into one sorted iterator
960 1191 work = heapq.merge([(x, False) for x in added],
961 1192 [(x, True) for x in removed])
962 1193
963 1194 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
964 1195 cachedelta = self.rev(p1), deltatext
965 1196 text = util.buffer(arraytext)
966 1197 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
967 1198 else:
968 1199 # The first parent manifest isn't already loaded, so we'll
969 1200 # just encode a fulltext of the manifest and pass that
970 1201 # through to the revlog layer, and let it handle the delta
971 1202 # process.
972 1203 if self._treeondisk:
973 1204 m1 = self.read(p1)
974 1205 m2 = self.read(p2)
975 1206 n = self._addtree(m, transaction, link, m1, m2)
976 1207 arraytext = None
977 1208 else:
978 1209 text = m.text(self._usemanifestv2)
979 1210 n = self.addrevision(text, transaction, link, p1, p2)
980 1211 arraytext = array.array('c', text)
981 1212
982 1213 self.fulltextcache[n] = arraytext
983 1214
984 1215 return n
985 1216
986 1217 def _addtree(self, m, transaction, link, m1, m2):
987 1218 # If the manifest is unchanged compared to one parent,
988 1219 # don't write a new revision
989 1220 if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
990 1221 return m.node()
991 1222 def writesubtree(subm, subp1, subp2):
992 1223 sublog = self.dirlog(subm.dir())
993 1224 sublog.add(subm, transaction, link, subp1, subp2, None, None)
994 1225 m.writesubtrees(m1, m2, writesubtree)
995 1226 text = m.dirtext(self._usemanifestv2)
996 1227 # Double-check whether contents are unchanged to one parent
997 1228 if text == m1.dirtext(self._usemanifestv2):
998 1229 n = m1.node()
999 1230 elif text == m2.dirtext(self._usemanifestv2):
1000 1231 n = m2.node()
1001 1232 else:
1002 1233 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
1003 1234 # Save nodeid so parent manifest can calculate its nodeid
1004 1235 m.setnode(n)
1005 1236 return n
1006 1237
1007 1238 class manifestlog(object):
1008 1239 """A collection class representing the collection of manifest snapshots
1009 1240 referenced by commits in the repository.
1010 1241
1011 1242 In this situation, 'manifest' refers to the abstract concept of a snapshot
1012 1243 of the list of files in the given commit. Consumers of the output of this
1013 1244 class do not care about the implementation details of the actual manifests
1014 1245 they receive (i.e. tree or flat or lazily loaded, etc)."""
1015 1246 def __init__(self, opener, repo):
1016 1247 self._repo = repo
1017 1248
1018 1249 usetreemanifest = False
1019 1250
1020 1251 opts = getattr(opener, 'options', None)
1021 1252 if opts is not None:
1022 1253 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1023 1254 self._treeinmem = usetreemanifest
1024 1255
1025 1256 # We'll separate this into it's own cache once oldmanifest is no longer
1026 1257 # used
1027 1258 self._mancache = repo.manifest._mancache
1028 1259
1029 1260 @property
1030 1261 def _revlog(self):
1031 1262 return self._repo.manifest
1032 1263
1033 1264 def __getitem__(self, node):
1034 1265 """Retrieves the manifest instance for the given node. Throws a KeyError
1035 1266 if not found.
1036 1267 """
1037 1268 if node in self._mancache:
1038 1269 cachemf = self._mancache[node]
1039 1270 # The old manifest may put non-ctx manifests in the cache, so skip
1040 1271 # those since they don't implement the full api.
1041 1272 if (isinstance(cachemf, manifestctx) or
1042 1273 isinstance(cachemf, treemanifestctx)):
1043 1274 return cachemf
1044 1275
1045 1276 if self._treeinmem:
1046 1277 m = treemanifestctx(self._revlog, '', node)
1047 1278 else:
1048 1279 m = manifestctx(self._revlog, node)
1049 1280 if node != revlog.nullid:
1050 1281 self._mancache[node] = m
1051 1282 return m
1052 1283
1053 1284 def add(self, m, transaction, link, p1, p2, added, removed):
1054 1285 return self._revlog.add(m, transaction, link, p1, p2, added, removed)
1055 1286
1056 1287 class manifestctx(object):
1057 1288 """A class representing a single revision of a manifest, including its
1058 1289 contents, its parent revs, and its linkrev.
1059 1290 """
1060 1291 def __init__(self, revlog, node):
1061 1292 self._revlog = revlog
1062 1293 self._data = None
1063 1294
1064 1295 self._node = node
1065 1296
1066 1297 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1067 1298 # but let's add it later when something needs it and we can load it
1068 1299 # lazily.
1069 1300 #self.p1, self.p2 = revlog.parents(node)
1070 1301 #rev = revlog.rev(node)
1071 1302 #self.linkrev = revlog.linkrev(rev)
1072 1303
1073 1304 def node(self):
1074 1305 return self._node
1075 1306
1076 1307 def read(self):
1077 1308 if not self._data:
1078 1309 if self._node == revlog.nullid:
1079 1310 self._data = manifestdict()
1080 1311 else:
1081 1312 text = self._revlog.revision(self._node)
1082 1313 arraytext = array.array('c', text)
1083 1314 self._revlog._fulltextcache[self._node] = arraytext
1084 1315 self._data = manifestdict(text)
1085 1316 return self._data
1086 1317
1087 1318 def readfast(self):
1088 1319 rl = self._revlog
1089 1320 r = rl.rev(self._node)
1090 1321 deltaparent = rl.deltaparent(r)
1091 1322 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1092 1323 return self.readdelta()
1093 1324 return self.read()
1094 1325
1095 1326 def readdelta(self):
1096 1327 revlog = self._revlog
1097 1328 if revlog._usemanifestv2:
1098 1329 # Need to perform a slow delta
1099 1330 r0 = revlog.deltaparent(revlog.rev(self._node))
1100 1331 m0 = manifestctx(revlog, revlog.node(r0)).read()
1101 1332 m1 = self.read()
1102 1333 md = manifestdict()
1103 1334 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1104 1335 if n1:
1105 1336 md[f] = n1
1106 1337 if fl1:
1107 1338 md.setflag(f, fl1)
1108 1339 return md
1109 1340
1110 1341 r = revlog.rev(self._node)
1111 1342 d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
1112 1343 return manifestdict(d)
1113 1344
1114 1345 class treemanifestctx(object):
1115 1346 def __init__(self, revlog, dir, node):
1116 1347 revlog = revlog.dirlog(dir)
1117 1348 self._revlog = revlog
1118 1349 self._dir = dir
1119 1350 self._data = None
1120 1351
1121 1352 self._node = node
1122 1353
1123 1354 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1124 1355 # we can instantiate treemanifestctx objects for directories we don't
1125 1356 # have on disk.
1126 1357 #self.p1, self.p2 = revlog.parents(node)
1127 1358 #rev = revlog.rev(node)
1128 1359 #self.linkrev = revlog.linkrev(rev)
1129 1360
1130 1361 def read(self):
1131 1362 if not self._data:
1132 1363 if self._node == revlog.nullid:
1133 1364 self._data = treemanifest()
1134 1365 elif self._revlog._treeondisk:
1135 1366 m = treemanifest(dir=self._dir)
1136 1367 def gettext():
1137 1368 return self._revlog.revision(self._node)
1138 1369 def readsubtree(dir, subm):
1139 1370 return treemanifestctx(self._revlog, dir, subm).read()
1140 1371 m.read(gettext, readsubtree)
1141 1372 m.setnode(self._node)
1142 1373 self._data = m
1143 1374 else:
1144 1375 text = self._revlog.revision(self._node)
1145 1376 arraytext = array.array('c', text)
1146 1377 self._revlog.fulltextcache[self._node] = arraytext
1147 1378 self._data = treemanifest(dir=self._dir, text=text)
1148 1379
1149 1380 return self._data
1150 1381
1151 1382 def node(self):
1152 1383 return self._node
1153 1384
1154 1385 def readdelta(self):
1155 1386 # Need to perform a slow delta
1156 1387 revlog = self._revlog
1157 1388 r0 = revlog.deltaparent(revlog.rev(self._node))
1158 1389 m0 = treemanifestctx(revlog, revlog.node(r0), dir=self._dir).read()
1159 1390 m1 = self.read()
1160 1391 md = treemanifest(dir=self._dir)
1161 1392 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1162 1393 if n1:
1163 1394 md[f] = n1
1164 1395 if fl1:
1165 1396 md.setflag(f, fl1)
1166 1397 return md
1167 1398
1168 1399 def readfast(self):
1169 1400 rl = self._revlog
1170 1401 r = rl.rev(self._node)
1171 1402 deltaparent = rl.deltaparent(r)
1172 1403 if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
1173 1404 return self.readdelta()
1174 1405 return self.read()
1175 1406
1176 1407 class manifest(manifestrevlog):
1177 1408 def __init__(self, opener, dir='', dirlogcache=None):
1178 1409 '''The 'dir' and 'dirlogcache' arguments are for internal use by
1179 1410 manifest.manifest only. External users should create a root manifest
1180 1411 log with manifest.manifest(opener) and call dirlog() on it.
1181 1412 '''
1182 1413 # During normal operations, we expect to deal with not more than four
1183 1414 # revs at a time (such as during commit --amend). When rebasing large
1184 1415 # stacks of commits, the number can go up, hence the config knob below.
1185 1416 cachesize = 4
1186 1417 usetreemanifest = False
1187 1418 opts = getattr(opener, 'options', None)
1188 1419 if opts is not None:
1189 1420 cachesize = opts.get('manifestcachesize', cachesize)
1190 1421 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1191 1422 self._mancache = util.lrucachedict(cachesize)
1192 1423 self._treeinmem = usetreemanifest
1193 1424 super(manifest, self).__init__(opener, dir=dir, dirlogcache=dirlogcache)
1194 1425
1195 1426 def _newmanifest(self, data=''):
1196 1427 if self._treeinmem:
1197 1428 return treemanifest(self._dir, data)
1198 1429 return manifestdict(data)
1199 1430
1200 1431 def dirlog(self, dir):
1201 1432 """This overrides the base revlog implementation to allow construction
1202 1433 'manifest' types instead of manifestrevlog types. This is only needed
1203 1434 until we migrate off the 'manifest' type."""
1204 1435 if dir:
1205 1436 assert self._treeondisk
1206 1437 if dir not in self._dirlogcache:
1207 1438 self._dirlogcache[dir] = manifest(self.opener, dir,
1208 1439 self._dirlogcache)
1209 1440 return self._dirlogcache[dir]
1210 1441
1211 1442 def _slowreaddelta(self, node):
1212 1443 r0 = self.deltaparent(self.rev(node))
1213 1444 m0 = self.read(self.node(r0))
1214 1445 m1 = self.read(node)
1215 1446 md = self._newmanifest()
1216 1447 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1217 1448 if n1:
1218 1449 md[f] = n1
1219 1450 if fl1:
1220 1451 md.setflag(f, fl1)
1221 1452 return md
1222 1453
1223 1454 def readdelta(self, node):
1224 1455 if self._usemanifestv2 or self._treeondisk:
1225 1456 return self._slowreaddelta(node)
1226 1457 r = self.rev(node)
1227 1458 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1228 1459 return self._newmanifest(d)
1229 1460
1230 1461 def readshallowdelta(self, node):
1231 1462 '''For flat manifests, this is the same as readdelta(). For
1232 1463 treemanifests, this will read the delta for this revlog's directory,
1233 1464 without recursively reading subdirectory manifests. Instead, any
1234 1465 subdirectory entry will be reported as it appears in the manifests, i.e.
1235 1466 the subdirectory will be reported among files and distinguished only by
1236 1467 its 't' flag.'''
1237 1468 if not self._treeondisk:
1238 1469 return self.readdelta(node)
1239 1470 if self._usemanifestv2:
1240 1471 raise error.Abort(
1241 1472 _("readshallowdelta() not implemented for manifestv2"))
1242 1473 r = self.rev(node)
1243 1474 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
1244 1475 return manifestdict(d)
1245 1476
1246 1477 def readshallowfast(self, node):
1247 1478 '''like readfast(), but calls readshallowdelta() instead of readdelta()
1248 1479 '''
1249 1480 r = self.rev(node)
1250 1481 deltaparent = self.deltaparent(r)
1251 1482 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
1252 1483 return self.readshallowdelta(node)
1253 1484 return self.readshallow(node)
1254 1485
1255 1486 def read(self, node):
1256 1487 if node == revlog.nullid:
1257 1488 return self._newmanifest() # don't upset local cache
1258 1489 if node in self._mancache:
1259 1490 cached = self._mancache[node]
1260 1491 if (isinstance(cached, manifestctx) or
1261 1492 isinstance(cached, treemanifestctx)):
1262 1493 cached = cached.read()
1263 1494 return cached
1264 1495 if self._treeondisk:
1265 1496 def gettext():
1266 1497 return self.revision(node)
1267 1498 def readsubtree(dir, subm):
1268 1499 return self.dirlog(dir).read(subm)
1269 1500 m = self._newmanifest()
1270 1501 m.read(gettext, readsubtree)
1271 1502 m.setnode(node)
1272 1503 arraytext = None
1273 1504 else:
1274 1505 text = self.revision(node)
1275 1506 m = self._newmanifest(text)
1276 1507 arraytext = array.array('c', text)
1277 1508 self._mancache[node] = m
1278 1509 self.fulltextcache[node] = arraytext
1279 1510 return m
1280 1511
1281 1512 def readshallow(self, node):
1282 1513 '''Reads the manifest in this directory. When using flat manifests,
1283 1514 this manifest will generally have files in subdirectories in it. Does
1284 1515 not cache the manifest as the callers generally do not read the same
1285 1516 version twice.'''
1286 1517 return manifestdict(self.revision(node))
1287 1518
1288 1519 def find(self, node, f):
1289 1520 '''look up entry for a single file efficiently.
1290 1521 return (node, flags) pair if found, (None, None) if not.'''
1291 1522 m = self.read(node)
1292 1523 try:
1293 1524 return m.find(f)
1294 1525 except KeyError:
1295 1526 return None, None
1296 1527
1297 1528 def clearcaches(self):
1298 1529 super(manifest, self).clearcaches()
1299 1530 self._mancache.clear()
@@ -1,169 +1,169 b''
1 1 # bdiff.py - Python implementation of bdiff.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import difflib
12 12 import re
13 13 import struct
14 14
15 15 from . import policy
16 16 policynocffi = policy.policynocffi
17 17 modulepolicy = policy.policy
18 18
19 19 def splitnewlines(text):
20 20 '''like str.splitlines, but only split on newlines.'''
21 21 lines = [l + '\n' for l in text.split('\n')]
22 22 if lines:
23 23 if lines[-1] == '\n':
24 24 lines.pop()
25 25 else:
26 26 lines[-1] = lines[-1][:-1]
27 27 return lines
28 28
29 29 def _normalizeblocks(a, b, blocks):
30 30 prev = None
31 31 r = []
32 32 for curr in blocks:
33 33 if prev is None:
34 34 prev = curr
35 35 continue
36 36 shift = 0
37 37
38 38 a1, b1, l1 = prev
39 39 a1end = a1 + l1
40 40 b1end = b1 + l1
41 41
42 42 a2, b2, l2 = curr
43 43 a2end = a2 + l2
44 44 b2end = b2 + l2
45 45 if a1end == a2:
46 46 while (a1end + shift < a2end and
47 47 a[a1end + shift] == b[b1end + shift]):
48 48 shift += 1
49 49 elif b1end == b2:
50 50 while (b1end + shift < b2end and
51 51 a[a1end + shift] == b[b1end + shift]):
52 52 shift += 1
53 53 r.append((a1, b1, l1 + shift))
54 54 prev = a2 + shift, b2 + shift, l2 - shift
55 55 r.append(prev)
56 56 return r
57 57
58 58 def _tostring(c):
59 59 if type(c) is array.array:
60 60 # this copy overhead isn't ideal
61 61 return c.tostring()
62 62 return str(c)
63 63
64 64 def bdiff(a, b):
65 65 a = _tostring(a).splitlines(True)
66 66 b = _tostring(b).splitlines(True)
67 67
68 68 if not a:
69 69 s = "".join(b)
70 70 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
71 71
72 72 bin = []
73 73 p = [0]
74 74 for i in a: p.append(p[-1] + len(i))
75 75
76 76 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
77 77 d = _normalizeblocks(a, b, d)
78 78 la = 0
79 79 lb = 0
80 80 for am, bm, size in d:
81 81 s = "".join(b[lb:bm])
82 82 if am > la or s:
83 83 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
84 84 la = am + size
85 85 lb = bm + size
86 86
87 87 return "".join(bin)
88 88
89 89 def blocks(a, b):
90 90 an = splitnewlines(a)
91 91 bn = splitnewlines(b)
92 92 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
93 93 d = _normalizeblocks(an, bn, d)
94 94 return [(i, i + n, j, j + n) for (i, j, n) in d]
95 95
96 96 def fixws(text, allws):
97 97 if allws:
98 98 text = re.sub('[ \t\r]+', '', text)
99 99 else:
100 100 text = re.sub('[ \t\r]+', ' ', text)
101 101 text = text.replace(' \n', '\n')
102 102 return text
103 103
104 104 if modulepolicy not in policynocffi:
105 105 try:
106 106 from _bdiff_cffi import ffi, lib
107 107 except ImportError:
108 108 if modulepolicy == 'cffi': # strict cffi import
109 109 raise
110 110 else:
111 111 def blocks(sa, sb):
112 112 a = ffi.new("struct bdiff_line**")
113 113 b = ffi.new("struct bdiff_line**")
114 ac = ffi.new("char[]", sa)
115 bc = ffi.new("char[]", sb)
114 ac = ffi.new("char[]", str(sa))
115 bc = ffi.new("char[]", str(sb))
116 116 l = ffi.new("struct bdiff_hunk*")
117 117 try:
118 118 an = lib.bdiff_splitlines(ac, len(sa), a)
119 119 bn = lib.bdiff_splitlines(bc, len(sb), b)
120 120 if not a[0] or not b[0]:
121 121 raise MemoryError
122 122 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
123 123 if count < 0:
124 124 raise MemoryError
125 125 rl = [None] * count
126 126 h = l.next
127 127 i = 0
128 128 while h:
129 129 rl[i] = (h.a1, h.a2, h.b1, h.b2)
130 130 h = h.next
131 131 i += 1
132 132 finally:
133 133 lib.free(a[0])
134 134 lib.free(b[0])
135 135 lib.bdiff_freehunks(l.next)
136 136 return rl
137 137
138 138 def bdiff(sa, sb):
139 139 a = ffi.new("struct bdiff_line**")
140 140 b = ffi.new("struct bdiff_line**")
141 ac = ffi.new("char[]", sa)
142 bc = ffi.new("char[]", sb)
141 ac = ffi.new("char[]", str(sa))
142 bc = ffi.new("char[]", str(sb))
143 143 l = ffi.new("struct bdiff_hunk*")
144 144 try:
145 145 an = lib.bdiff_splitlines(ac, len(sa), a)
146 146 bn = lib.bdiff_splitlines(bc, len(sb), b)
147 147 if not a[0] or not b[0]:
148 148 raise MemoryError
149 149 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
150 150 if count < 0:
151 151 raise MemoryError
152 152 rl = []
153 153 h = l.next
154 154 la = lb = 0
155 155 while h:
156 156 if h.a1 != la or h.b1 != lb:
157 157 lgt = (b[0] + h.b1).l - (b[0] + lb).l
158 158 rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
159 159 (a[0] + h.a1).l - a[0].l, lgt))
160 160 rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
161 161 la = h.a2
162 162 lb = h.b2
163 163 h = h.next
164 164
165 165 finally:
166 166 lib.free(a[0])
167 167 lib.free(b[0])
168 168 lib.bdiff_freehunks(l.next)
169 169 return "".join(rl)
General Comments 0
You need to be logged in to leave comments. Login now