##// END OF EJS Templates
manifest: rewrite find(node, f) in terms of read(node)...
Martin von Zweigbergk -
r24292:b7add2eb default
parent child Browse files
Show More
@@ -1,392 +1,389
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util
9 import mdiff, parsers, error, revlog, util
10 import array, struct
10 import array, struct
11
11
12
12
13 class _lazymanifest(dict):
13 class _lazymanifest(dict):
14 """This is the pure implementation of lazymanifest.
14 """This is the pure implementation of lazymanifest.
15
15
16 It has not been optimized *at all* and is not lazy.
16 It has not been optimized *at all* and is not lazy.
17 """
17 """
18
18
19 def __init__(self, data):
19 def __init__(self, data):
20 # This init method does a little bit of excessive-looking
20 # This init method does a little bit of excessive-looking
21 # precondition checking. This is so that the behavior of this
21 # precondition checking. This is so that the behavior of this
22 # class exactly matches its C counterpart to try and help
22 # class exactly matches its C counterpart to try and help
23 # prevent surprise breakage for anyone that develops against
23 # prevent surprise breakage for anyone that develops against
24 # the pure version.
24 # the pure version.
25 if data and data[-1] != '\n':
25 if data and data[-1] != '\n':
26 raise ValueError('Manifest did not end in a newline.')
26 raise ValueError('Manifest did not end in a newline.')
27 dict.__init__(self)
27 dict.__init__(self)
28 prev = None
28 prev = None
29 for l in data.splitlines():
29 for l in data.splitlines():
30 if prev is not None and prev > l:
30 if prev is not None and prev > l:
31 raise ValueError('Manifest lines not in sorted order.')
31 raise ValueError('Manifest lines not in sorted order.')
32 prev = l
32 prev = l
33 f, n = l.split('\0')
33 f, n = l.split('\0')
34 if len(n) > 40:
34 if len(n) > 40:
35 self[f] = revlog.bin(n[:40]), n[40:]
35 self[f] = revlog.bin(n[:40]), n[40:]
36 else:
36 else:
37 self[f] = revlog.bin(n), ''
37 self[f] = revlog.bin(n), ''
38
38
39 def __setitem__(self, k, v):
39 def __setitem__(self, k, v):
40 node, flag = v
40 node, flag = v
41 assert node is not None
41 assert node is not None
42 if len(node) > 21:
42 if len(node) > 21:
43 node = node[:21] # match c implementation behavior
43 node = node[:21] # match c implementation behavior
44 dict.__setitem__(self, k, (node, flag))
44 dict.__setitem__(self, k, (node, flag))
45
45
46 def __iter__(self):
46 def __iter__(self):
47 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
47 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
48
48
49 def copy(self):
49 def copy(self):
50 c = _lazymanifest('')
50 c = _lazymanifest('')
51 c.update(self)
51 c.update(self)
52 return c
52 return c
53
53
54 def diff(self, m2, clean=False):
54 def diff(self, m2, clean=False):
55 '''Finds changes between the current manifest and m2.'''
55 '''Finds changes between the current manifest and m2.'''
56 diff = {}
56 diff = {}
57
57
58 for fn, e1 in self.iteritems():
58 for fn, e1 in self.iteritems():
59 if fn not in m2:
59 if fn not in m2:
60 diff[fn] = e1, (None, '')
60 diff[fn] = e1, (None, '')
61 else:
61 else:
62 e2 = m2[fn]
62 e2 = m2[fn]
63 if e1 != e2:
63 if e1 != e2:
64 diff[fn] = e1, e2
64 diff[fn] = e1, e2
65 elif clean:
65 elif clean:
66 diff[fn] = None
66 diff[fn] = None
67
67
68 for fn, e2 in m2.iteritems():
68 for fn, e2 in m2.iteritems():
69 if fn not in self:
69 if fn not in self:
70 diff[fn] = (None, ''), e2
70 diff[fn] = (None, ''), e2
71
71
72 return diff
72 return diff
73
73
74 def filtercopy(self, filterfn):
74 def filtercopy(self, filterfn):
75 c = _lazymanifest('')
75 c = _lazymanifest('')
76 for f, n, fl in self:
76 for f, n, fl in self:
77 if filterfn(f):
77 if filterfn(f):
78 c[f] = n, fl
78 c[f] = n, fl
79 return c
79 return c
80
80
81 def text(self):
81 def text(self):
82 """Get the full data of this manifest as a bytestring."""
82 """Get the full data of this manifest as a bytestring."""
83 fl = sorted(self)
83 fl = sorted(self)
84
84
85 _hex = revlog.hex
85 _hex = revlog.hex
86 # if this is changed to support newlines in filenames,
86 # if this is changed to support newlines in filenames,
87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
88 return ''.join("%s\0%s%s\n" % (
88 return ''.join("%s\0%s%s\n" % (
89 f, _hex(n[:20]), flag) for f, n, flag in fl)
89 f, _hex(n[:20]), flag) for f, n, flag in fl)
90
90
91 try:
91 try:
92 _lazymanifest = parsers.lazymanifest
92 _lazymanifest = parsers.lazymanifest
93 except AttributeError:
93 except AttributeError:
94 pass
94 pass
95
95
96 class manifestdict(object):
96 class manifestdict(object):
97 def __init__(self, data=''):
97 def __init__(self, data=''):
98 self._lm = _lazymanifest(data)
98 self._lm = _lazymanifest(data)
99
99
100 def __getitem__(self, key):
100 def __getitem__(self, key):
101 return self._lm[key][0]
101 return self._lm[key][0]
102
102
103 def find(self, key):
103 def find(self, key):
104 return self._lm[key]
104 return self._lm[key]
105
105
106 def __len__(self):
106 def __len__(self):
107 return len(self._lm)
107 return len(self._lm)
108
108
109 def __setitem__(self, key, node):
109 def __setitem__(self, key, node):
110 self._lm[key] = node, self.flags(key, '')
110 self._lm[key] = node, self.flags(key, '')
111
111
112 def __contains__(self, key):
112 def __contains__(self, key):
113 return key in self._lm
113 return key in self._lm
114
114
115 def __delitem__(self, key):
115 def __delitem__(self, key):
116 del self._lm[key]
116 del self._lm[key]
117
117
118 def keys(self):
118 def keys(self):
119 return [x[0] for x in self._lm]
119 return [x[0] for x in self._lm]
120
120
121 def iterkeys(self):
121 def iterkeys(self):
122 return iter(self.keys())
122 return iter(self.keys())
123
123
124 def intersectfiles(self, files):
124 def intersectfiles(self, files):
125 '''make a new lazymanifest with the intersection of self with files
125 '''make a new lazymanifest with the intersection of self with files
126
126
127 The algorithm assumes that files is much smaller than self.'''
127 The algorithm assumes that files is much smaller than self.'''
128 ret = manifestdict()
128 ret = manifestdict()
129 lm = self._lm
129 lm = self._lm
130 for fn in files:
130 for fn in files:
131 if fn in lm:
131 if fn in lm:
132 ret._lm[fn] = self._lm[fn]
132 ret._lm[fn] = self._lm[fn]
133 return ret
133 return ret
134
134
135 def filesnotin(self, m2):
135 def filesnotin(self, m2):
136 '''Set of files in this manifest that are not in the other'''
136 '''Set of files in this manifest that are not in the other'''
137 files = set(self.iterkeys())
137 files = set(self.iterkeys())
138 files.difference_update(m2.iterkeys())
138 files.difference_update(m2.iterkeys())
139 return files
139 return files
140
140
141 def matches(self, match):
141 def matches(self, match):
142 '''generate a new manifest filtered by the match argument'''
142 '''generate a new manifest filtered by the match argument'''
143 if match.always():
143 if match.always():
144 return self.copy()
144 return self.copy()
145
145
146 files = match.files()
146 files = match.files()
147 if (match.matchfn == match.exact or
147 if (match.matchfn == match.exact or
148 (not match.anypats() and util.all(fn in self for fn in files))):
148 (not match.anypats() and util.all(fn in self for fn in files))):
149 return self.intersectfiles(files)
149 return self.intersectfiles(files)
150
150
151 lm = manifestdict('')
151 lm = manifestdict('')
152 lm._lm = self._lm.filtercopy(match)
152 lm._lm = self._lm.filtercopy(match)
153 return lm
153 return lm
154
154
155 def diff(self, m2, clean=False):
155 def diff(self, m2, clean=False):
156 '''Finds changes between the current manifest and m2.
156 '''Finds changes between the current manifest and m2.
157
157
158 Args:
158 Args:
159 m2: the manifest to which this manifest should be compared.
159 m2: the manifest to which this manifest should be compared.
160 clean: if true, include files unchanged between these manifests
160 clean: if true, include files unchanged between these manifests
161 with a None value in the returned dictionary.
161 with a None value in the returned dictionary.
162
162
163 The result is returned as a dict with filename as key and
163 The result is returned as a dict with filename as key and
164 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
164 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
165 nodeid in the current/other manifest and fl1/fl2 is the flag
165 nodeid in the current/other manifest and fl1/fl2 is the flag
166 in the current/other manifest. Where the file does not exist,
166 in the current/other manifest. Where the file does not exist,
167 the nodeid will be None and the flags will be the empty
167 the nodeid will be None and the flags will be the empty
168 string.
168 string.
169 '''
169 '''
170 return self._lm.diff(m2._lm, clean)
170 return self._lm.diff(m2._lm, clean)
171
171
172 def setflag(self, key, flag):
172 def setflag(self, key, flag):
173 self._lm[key] = self[key], flag
173 self._lm[key] = self[key], flag
174
174
175 def get(self, key, default=None):
175 def get(self, key, default=None):
176 try:
176 try:
177 return self._lm[key][0]
177 return self._lm[key][0]
178 except KeyError:
178 except KeyError:
179 return default
179 return default
180
180
181 def flags(self, key, default=''):
181 def flags(self, key, default=''):
182 try:
182 try:
183 return self._lm[key][1]
183 return self._lm[key][1]
184 except KeyError:
184 except KeyError:
185 return default
185 return default
186
186
187 def __iter__(self):
187 def __iter__(self):
188 return (x[0] for x in self._lm)
188 return (x[0] for x in self._lm)
189
189
190 def copy(self):
190 def copy(self):
191 c = manifestdict('')
191 c = manifestdict('')
192 c._lm = self._lm.copy()
192 c._lm = self._lm.copy()
193 return c
193 return c
194
194
195 def iteritems(self):
195 def iteritems(self):
196 return (x[:2] for x in self._lm)
196 return (x[:2] for x in self._lm)
197
197
198 def text(self):
198 def text(self):
199 return self._lm.text()
199 return self._lm.text()
200
200
201 def fastdelta(self, base, changes):
201 def fastdelta(self, base, changes):
202 """Given a base manifest text as an array.array and a list of changes
202 """Given a base manifest text as an array.array and a list of changes
203 relative to that text, compute a delta that can be used by revlog.
203 relative to that text, compute a delta that can be used by revlog.
204 """
204 """
205 delta = []
205 delta = []
206 dstart = None
206 dstart = None
207 dend = None
207 dend = None
208 dline = [""]
208 dline = [""]
209 start = 0
209 start = 0
210 # zero copy representation of base as a buffer
210 # zero copy representation of base as a buffer
211 addbuf = util.buffer(base)
211 addbuf = util.buffer(base)
212
212
213 # start with a readonly loop that finds the offset of
213 # start with a readonly loop that finds the offset of
214 # each line and creates the deltas
214 # each line and creates the deltas
215 for f, todelete in changes:
215 for f, todelete in changes:
216 # bs will either be the index of the item or the insert point
216 # bs will either be the index of the item or the insert point
217 start, end = _msearch(addbuf, f, start)
217 start, end = _msearch(addbuf, f, start)
218 if not todelete:
218 if not todelete:
219 h, fl = self._lm[f]
219 h, fl = self._lm[f]
220 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
220 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
221 else:
221 else:
222 if start == end:
222 if start == end:
223 # item we want to delete was not found, error out
223 # item we want to delete was not found, error out
224 raise AssertionError(
224 raise AssertionError(
225 _("failed to remove %s from manifest") % f)
225 _("failed to remove %s from manifest") % f)
226 l = ""
226 l = ""
227 if dstart is not None and dstart <= start and dend >= start:
227 if dstart is not None and dstart <= start and dend >= start:
228 if dend < end:
228 if dend < end:
229 dend = end
229 dend = end
230 if l:
230 if l:
231 dline.append(l)
231 dline.append(l)
232 else:
232 else:
233 if dstart is not None:
233 if dstart is not None:
234 delta.append([dstart, dend, "".join(dline)])
234 delta.append([dstart, dend, "".join(dline)])
235 dstart = start
235 dstart = start
236 dend = end
236 dend = end
237 dline = [l]
237 dline = [l]
238
238
239 if dstart is not None:
239 if dstart is not None:
240 delta.append([dstart, dend, "".join(dline)])
240 delta.append([dstart, dend, "".join(dline)])
241 # apply the delta to the base, and get a delta for addrevision
241 # apply the delta to the base, and get a delta for addrevision
242 deltatext, arraytext = _addlistdelta(base, delta)
242 deltatext, arraytext = _addlistdelta(base, delta)
243 return arraytext, deltatext
243 return arraytext, deltatext
244
244
245 def _msearch(m, s, lo=0, hi=None):
245 def _msearch(m, s, lo=0, hi=None):
246 '''return a tuple (start, end) that says where to find s within m.
246 '''return a tuple (start, end) that says where to find s within m.
247
247
248 If the string is found m[start:end] are the line containing
248 If the string is found m[start:end] are the line containing
249 that string. If start == end the string was not found and
249 that string. If start == end the string was not found and
250 they indicate the proper sorted insertion point.
250 they indicate the proper sorted insertion point.
251
251
252 m should be a buffer or a string
252 m should be a buffer or a string
253 s is a string'''
253 s is a string'''
254 def advance(i, c):
254 def advance(i, c):
255 while i < lenm and m[i] != c:
255 while i < lenm and m[i] != c:
256 i += 1
256 i += 1
257 return i
257 return i
258 if not s:
258 if not s:
259 return (lo, lo)
259 return (lo, lo)
260 lenm = len(m)
260 lenm = len(m)
261 if not hi:
261 if not hi:
262 hi = lenm
262 hi = lenm
263 while lo < hi:
263 while lo < hi:
264 mid = (lo + hi) // 2
264 mid = (lo + hi) // 2
265 start = mid
265 start = mid
266 while start > 0 and m[start - 1] != '\n':
266 while start > 0 and m[start - 1] != '\n':
267 start -= 1
267 start -= 1
268 end = advance(start, '\0')
268 end = advance(start, '\0')
269 if m[start:end] < s:
269 if m[start:end] < s:
270 # we know that after the null there are 40 bytes of sha1
270 # we know that after the null there are 40 bytes of sha1
271 # this translates to the bisect lo = mid + 1
271 # this translates to the bisect lo = mid + 1
272 lo = advance(end + 40, '\n') + 1
272 lo = advance(end + 40, '\n') + 1
273 else:
273 else:
274 # this translates to the bisect hi = mid
274 # this translates to the bisect hi = mid
275 hi = start
275 hi = start
276 end = advance(lo, '\0')
276 end = advance(lo, '\0')
277 found = m[lo:end]
277 found = m[lo:end]
278 if s == found:
278 if s == found:
279 # we know that after the null there are 40 bytes of sha1
279 # we know that after the null there are 40 bytes of sha1
280 end = advance(end + 40, '\n')
280 end = advance(end + 40, '\n')
281 return (lo, end + 1)
281 return (lo, end + 1)
282 else:
282 else:
283 return (lo, lo)
283 return (lo, lo)
284
284
285 def _checkforbidden(l):
285 def _checkforbidden(l):
286 """Check filenames for illegal characters."""
286 """Check filenames for illegal characters."""
287 for f in l:
287 for f in l:
288 if '\n' in f or '\r' in f:
288 if '\n' in f or '\r' in f:
289 raise error.RevlogError(
289 raise error.RevlogError(
290 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
290 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
291
291
292
292
293 # apply the changes collected during the bisect loop to our addlist
293 # apply the changes collected during the bisect loop to our addlist
294 # return a delta suitable for addrevision
294 # return a delta suitable for addrevision
295 def _addlistdelta(addlist, x):
295 def _addlistdelta(addlist, x):
296 # for large addlist arrays, building a new array is cheaper
296 # for large addlist arrays, building a new array is cheaper
297 # than repeatedly modifying the existing one
297 # than repeatedly modifying the existing one
298 currentposition = 0
298 currentposition = 0
299 newaddlist = array.array('c')
299 newaddlist = array.array('c')
300
300
301 for start, end, content in x:
301 for start, end, content in x:
302 newaddlist += addlist[currentposition:start]
302 newaddlist += addlist[currentposition:start]
303 if content:
303 if content:
304 newaddlist += array.array('c', content)
304 newaddlist += array.array('c', content)
305
305
306 currentposition = end
306 currentposition = end
307
307
308 newaddlist += addlist[currentposition:]
308 newaddlist += addlist[currentposition:]
309
309
310 deltatext = "".join(struct.pack(">lll", start, end, len(content))
310 deltatext = "".join(struct.pack(">lll", start, end, len(content))
311 + content for start, end, content in x)
311 + content for start, end, content in x)
312 return deltatext, newaddlist
312 return deltatext, newaddlist
313
313
314 class manifest(revlog.revlog):
314 class manifest(revlog.revlog):
315 def __init__(self, opener):
315 def __init__(self, opener):
316 # During normal operations, we expect to deal with not more than four
316 # During normal operations, we expect to deal with not more than four
317 # revs at a time (such as during commit --amend). When rebasing large
317 # revs at a time (such as during commit --amend). When rebasing large
318 # stacks of commits, the number can go up, hence the config knob below.
318 # stacks of commits, the number can go up, hence the config knob below.
319 cachesize = 4
319 cachesize = 4
320 opts = getattr(opener, 'options', None)
320 opts = getattr(opener, 'options', None)
321 if opts is not None:
321 if opts is not None:
322 cachesize = opts.get('manifestcachesize', cachesize)
322 cachesize = opts.get('manifestcachesize', cachesize)
323 self._mancache = util.lrucachedict(cachesize)
323 self._mancache = util.lrucachedict(cachesize)
324 revlog.revlog.__init__(self, opener, "00manifest.i")
324 revlog.revlog.__init__(self, opener, "00manifest.i")
325
325
326 def readdelta(self, node):
326 def readdelta(self, node):
327 r = self.rev(node)
327 r = self.rev(node)
328 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
328 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
329 return manifestdict(d)
329 return manifestdict(d)
330
330
331 def readfast(self, node):
331 def readfast(self, node):
332 '''use the faster of readdelta or read'''
332 '''use the faster of readdelta or read'''
333 r = self.rev(node)
333 r = self.rev(node)
334 deltaparent = self.deltaparent(r)
334 deltaparent = self.deltaparent(r)
335 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
335 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
336 return self.readdelta(node)
336 return self.readdelta(node)
337 return self.read(node)
337 return self.read(node)
338
338
339 def read(self, node):
339 def read(self, node):
340 if node == revlog.nullid:
340 if node == revlog.nullid:
341 return manifestdict() # don't upset local cache
341 return manifestdict() # don't upset local cache
342 if node in self._mancache:
342 if node in self._mancache:
343 return self._mancache[node][0]
343 return self._mancache[node][0]
344 text = self.revision(node)
344 text = self.revision(node)
345 arraytext = array.array('c', text)
345 arraytext = array.array('c', text)
346 m = manifestdict(text)
346 m = manifestdict(text)
347 self._mancache[node] = (m, arraytext)
347 self._mancache[node] = (m, arraytext)
348 return m
348 return m
349
349
350 def find(self, node, f):
350 def find(self, node, f):
351 '''look up entry for a single file efficiently.
351 '''look up entry for a single file efficiently.
352 return (node, flags) pair if found, (None, None) if not.'''
352 return (node, flags) pair if found, (None, None) if not.'''
353 if node in self._mancache:
353 m = self.read(node)
354 m = self._mancache[node][0]
355 return m.get(f), m.flags(f)
356 text = self.revision(node)
357 try:
354 try:
358 return manifestdict(text).find(f)
355 return m.find(f)
359 except KeyError:
356 except KeyError:
360 return None, None
357 return None, None
361
358
362 def add(self, m, transaction, link, p1, p2, added, removed):
359 def add(self, m, transaction, link, p1, p2, added, removed):
363 if p1 in self._mancache:
360 if p1 in self._mancache:
364 # If our first parent is in the manifest cache, we can
361 # If our first parent is in the manifest cache, we can
365 # compute a delta here using properties we know about the
362 # compute a delta here using properties we know about the
366 # manifest up-front, which may save time later for the
363 # manifest up-front, which may save time later for the
367 # revlog layer.
364 # revlog layer.
368
365
369 _checkforbidden(added)
366 _checkforbidden(added)
370 # combine the changed lists into one list for sorting
367 # combine the changed lists into one list for sorting
371 work = [(x, False) for x in added]
368 work = [(x, False) for x in added]
372 work.extend((x, True) for x in removed)
369 work.extend((x, True) for x in removed)
373 # this could use heapq.merge() (from Python 2.6+) or equivalent
370 # this could use heapq.merge() (from Python 2.6+) or equivalent
374 # since the lists are already sorted
371 # since the lists are already sorted
375 work.sort()
372 work.sort()
376
373
377 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
374 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
378 cachedelta = self.rev(p1), deltatext
375 cachedelta = self.rev(p1), deltatext
379 text = util.buffer(arraytext)
376 text = util.buffer(arraytext)
380 else:
377 else:
381 # The first parent manifest isn't already loaded, so we'll
378 # The first parent manifest isn't already loaded, so we'll
382 # just encode a fulltext of the manifest and pass that
379 # just encode a fulltext of the manifest and pass that
383 # through to the revlog layer, and let it handle the delta
380 # through to the revlog layer, and let it handle the delta
384 # process.
381 # process.
385 text = m.text()
382 text = m.text()
386 arraytext = array.array('c', text)
383 arraytext = array.array('c', text)
387 cachedelta = None
384 cachedelta = None
388
385
389 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
386 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
390 self._mancache[n] = (m, arraytext)
387 self._mancache[n] = (m, arraytext)
391
388
392 return n
389 return n
General Comments 0
You need to be logged in to leave comments. Login now