##// END OF EJS Templates
manifest: don't let find() look inside manifestdict...
Martin von Zweigbergk -
r24277:22d560fe default
parent child Browse files
Show More
@@ -1,389 +1,392 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util
9 import mdiff, parsers, error, revlog, util
10 import array, struct
10 import array, struct
11
11
12
12
13 class _lazymanifest(dict):
13 class _lazymanifest(dict):
14 """This is the pure implementation of lazymanifest.
14 """This is the pure implementation of lazymanifest.
15
15
16 It has not been optimized *at all* and is not lazy.
16 It has not been optimized *at all* and is not lazy.
17 """
17 """
18
18
19 def __init__(self, data):
19 def __init__(self, data):
20 # This init method does a little bit of excessive-looking
20 # This init method does a little bit of excessive-looking
21 # precondition checking. This is so that the behavior of this
21 # precondition checking. This is so that the behavior of this
22 # class exactly matches its C counterpart to try and help
22 # class exactly matches its C counterpart to try and help
23 # prevent surprise breakage for anyone that develops against
23 # prevent surprise breakage for anyone that develops against
24 # the pure version.
24 # the pure version.
25 if data and data[-1] != '\n':
25 if data and data[-1] != '\n':
26 raise ValueError('Manifest did not end in a newline.')
26 raise ValueError('Manifest did not end in a newline.')
27 dict.__init__(self)
27 dict.__init__(self)
28 prev = None
28 prev = None
29 for l in data.splitlines():
29 for l in data.splitlines():
30 if prev is not None and prev > l:
30 if prev is not None and prev > l:
31 raise ValueError('Manifest lines not in sorted order.')
31 raise ValueError('Manifest lines not in sorted order.')
32 prev = l
32 prev = l
33 f, n = l.split('\0')
33 f, n = l.split('\0')
34 if len(n) > 40:
34 if len(n) > 40:
35 self[f] = revlog.bin(n[:40]), n[40:]
35 self[f] = revlog.bin(n[:40]), n[40:]
36 else:
36 else:
37 self[f] = revlog.bin(n), ''
37 self[f] = revlog.bin(n), ''
38
38
39 def __setitem__(self, k, v):
39 def __setitem__(self, k, v):
40 node, flag = v
40 node, flag = v
41 assert node is not None
41 assert node is not None
42 if len(node) > 21:
42 if len(node) > 21:
43 node = node[:21] # match c implementation behavior
43 node = node[:21] # match c implementation behavior
44 dict.__setitem__(self, k, (node, flag))
44 dict.__setitem__(self, k, (node, flag))
45
45
46 def __iter__(self):
46 def __iter__(self):
47 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
47 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
48
48
49 def copy(self):
49 def copy(self):
50 c = _lazymanifest('')
50 c = _lazymanifest('')
51 c.update(self)
51 c.update(self)
52 return c
52 return c
53
53
54 def diff(self, m2, clean=False):
54 def diff(self, m2, clean=False):
55 '''Finds changes between the current manifest and m2.'''
55 '''Finds changes between the current manifest and m2.'''
56 diff = {}
56 diff = {}
57
57
58 for fn, e1 in self.iteritems():
58 for fn, e1 in self.iteritems():
59 if fn not in m2:
59 if fn not in m2:
60 diff[fn] = e1, (None, '')
60 diff[fn] = e1, (None, '')
61 else:
61 else:
62 e2 = m2[fn]
62 e2 = m2[fn]
63 if e1 != e2:
63 if e1 != e2:
64 diff[fn] = e1, e2
64 diff[fn] = e1, e2
65 elif clean:
65 elif clean:
66 diff[fn] = None
66 diff[fn] = None
67
67
68 for fn, e2 in m2.iteritems():
68 for fn, e2 in m2.iteritems():
69 if fn not in self:
69 if fn not in self:
70 diff[fn] = (None, ''), e2
70 diff[fn] = (None, ''), e2
71
71
72 return diff
72 return diff
73
73
74 def filtercopy(self, filterfn):
74 def filtercopy(self, filterfn):
75 c = _lazymanifest('')
75 c = _lazymanifest('')
76 for f, n, fl in self:
76 for f, n, fl in self:
77 if filterfn(f):
77 if filterfn(f):
78 c[f] = n, fl
78 c[f] = n, fl
79 return c
79 return c
80
80
81 def text(self):
81 def text(self):
82 """Get the full data of this manifest as a bytestring."""
82 """Get the full data of this manifest as a bytestring."""
83 fl = sorted(self)
83 fl = sorted(self)
84
84
85 _hex = revlog.hex
85 _hex = revlog.hex
86 # if this is changed to support newlines in filenames,
86 # if this is changed to support newlines in filenames,
87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
88 return ''.join("%s\0%s%s\n" % (
88 return ''.join("%s\0%s%s\n" % (
89 f, _hex(n[:20]), flag) for f, n, flag in fl)
89 f, _hex(n[:20]), flag) for f, n, flag in fl)
90
90
91 try:
91 try:
92 _lazymanifest = parsers.lazymanifest
92 _lazymanifest = parsers.lazymanifest
93 except AttributeError:
93 except AttributeError:
94 pass
94 pass
95
95
96 class manifestdict(object):
96 class manifestdict(object):
97 def __init__(self, data=''):
97 def __init__(self, data=''):
98 self._lm = _lazymanifest(data)
98 self._lm = _lazymanifest(data)
99
99
100 def __getitem__(self, key):
100 def __getitem__(self, key):
101 return self._lm[key][0]
101 return self._lm[key][0]
102
102
103 def find(self, key):
104 return self._lm[key]
105
103 def __len__(self):
106 def __len__(self):
104 return len(self._lm)
107 return len(self._lm)
105
108
106 def __setitem__(self, key, node):
109 def __setitem__(self, key, node):
107 self._lm[key] = node, self.flags(key, '')
110 self._lm[key] = node, self.flags(key, '')
108
111
109 def __contains__(self, key):
112 def __contains__(self, key):
110 return key in self._lm
113 return key in self._lm
111
114
112 def __delitem__(self, key):
115 def __delitem__(self, key):
113 del self._lm[key]
116 del self._lm[key]
114
117
115 def keys(self):
118 def keys(self):
116 return [x[0] for x in self._lm]
119 return [x[0] for x in self._lm]
117
120
118 def iterkeys(self):
121 def iterkeys(self):
119 return iter(self.keys())
122 return iter(self.keys())
120
123
121 def intersectfiles(self, files):
124 def intersectfiles(self, files):
122 '''make a new lazymanifest with the intersection of self with files
125 '''make a new lazymanifest with the intersection of self with files
123
126
124 The algorithm assumes that files is much smaller than self.'''
127 The algorithm assumes that files is much smaller than self.'''
125 ret = manifestdict()
128 ret = manifestdict()
126 lm = self._lm
129 lm = self._lm
127 for fn in files:
130 for fn in files:
128 if fn in lm:
131 if fn in lm:
129 ret._lm[fn] = self._lm[fn]
132 ret._lm[fn] = self._lm[fn]
130 return ret
133 return ret
131
134
132 def filesnotin(self, m2):
135 def filesnotin(self, m2):
133 '''Set of files in this manifest that are not in the other'''
136 '''Set of files in this manifest that are not in the other'''
134 files = set(self.iterkeys())
137 files = set(self.iterkeys())
135 files.difference_update(m2.iterkeys())
138 files.difference_update(m2.iterkeys())
136 return files
139 return files
137
140
138 def matches(self, match):
141 def matches(self, match):
139 '''generate a new manifest filtered by the match argument'''
142 '''generate a new manifest filtered by the match argument'''
140 if match.always():
143 if match.always():
141 return self.copy()
144 return self.copy()
142
145
143 files = match.files()
146 files = match.files()
144 if (match.matchfn == match.exact or
147 if (match.matchfn == match.exact or
145 (not match.anypats() and util.all(fn in self for fn in files))):
148 (not match.anypats() and util.all(fn in self for fn in files))):
146 return self.intersectfiles(files)
149 return self.intersectfiles(files)
147
150
148 lm = manifestdict('')
151 lm = manifestdict('')
149 lm._lm = self._lm.filtercopy(match)
152 lm._lm = self._lm.filtercopy(match)
150 return lm
153 return lm
151
154
152 def diff(self, m2, clean=False):
155 def diff(self, m2, clean=False):
153 '''Finds changes between the current manifest and m2.
156 '''Finds changes between the current manifest and m2.
154
157
155 Args:
158 Args:
156 m2: the manifest to which this manifest should be compared.
159 m2: the manifest to which this manifest should be compared.
157 clean: if true, include files unchanged between these manifests
160 clean: if true, include files unchanged between these manifests
158 with a None value in the returned dictionary.
161 with a None value in the returned dictionary.
159
162
160 The result is returned as a dict with filename as key and
163 The result is returned as a dict with filename as key and
161 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
164 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
162 nodeid in the current/other manifest and fl1/fl2 is the flag
165 nodeid in the current/other manifest and fl1/fl2 is the flag
163 in the current/other manifest. Where the file does not exist,
166 in the current/other manifest. Where the file does not exist,
164 the nodeid will be None and the flags will be the empty
167 the nodeid will be None and the flags will be the empty
165 string.
168 string.
166 '''
169 '''
167 return self._lm.diff(m2._lm, clean)
170 return self._lm.diff(m2._lm, clean)
168
171
169 def setflag(self, key, flag):
172 def setflag(self, key, flag):
170 self._lm[key] = self[key], flag
173 self._lm[key] = self[key], flag
171
174
172 def get(self, key, default=None):
175 def get(self, key, default=None):
173 try:
176 try:
174 return self._lm[key][0]
177 return self._lm[key][0]
175 except KeyError:
178 except KeyError:
176 return default
179 return default
177
180
178 def flags(self, key, default=''):
181 def flags(self, key, default=''):
179 try:
182 try:
180 return self._lm[key][1]
183 return self._lm[key][1]
181 except KeyError:
184 except KeyError:
182 return default
185 return default
183
186
184 def __iter__(self):
187 def __iter__(self):
185 return (x[0] for x in self._lm)
188 return (x[0] for x in self._lm)
186
189
187 def copy(self):
190 def copy(self):
188 c = manifestdict('')
191 c = manifestdict('')
189 c._lm = self._lm.copy()
192 c._lm = self._lm.copy()
190 return c
193 return c
191
194
192 def iteritems(self):
195 def iteritems(self):
193 return (x[:2] for x in self._lm)
196 return (x[:2] for x in self._lm)
194
197
195 def text(self):
198 def text(self):
196 return self._lm.text()
199 return self._lm.text()
197
200
198 def fastdelta(self, base, changes):
201 def fastdelta(self, base, changes):
199 """Given a base manifest text as an array.array and a list of changes
202 """Given a base manifest text as an array.array and a list of changes
200 relative to that text, compute a delta that can be used by revlog.
203 relative to that text, compute a delta that can be used by revlog.
201 """
204 """
202 delta = []
205 delta = []
203 dstart = None
206 dstart = None
204 dend = None
207 dend = None
205 dline = [""]
208 dline = [""]
206 start = 0
209 start = 0
207 # zero copy representation of base as a buffer
210 # zero copy representation of base as a buffer
208 addbuf = util.buffer(base)
211 addbuf = util.buffer(base)
209
212
210 # start with a readonly loop that finds the offset of
213 # start with a readonly loop that finds the offset of
211 # each line and creates the deltas
214 # each line and creates the deltas
212 for f, todelete in changes:
215 for f, todelete in changes:
213 # bs will either be the index of the item or the insert point
216 # bs will either be the index of the item or the insert point
214 start, end = _msearch(addbuf, f, start)
217 start, end = _msearch(addbuf, f, start)
215 if not todelete:
218 if not todelete:
216 h, fl = self._lm[f]
219 h, fl = self._lm[f]
217 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
220 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
218 else:
221 else:
219 if start == end:
222 if start == end:
220 # item we want to delete was not found, error out
223 # item we want to delete was not found, error out
221 raise AssertionError(
224 raise AssertionError(
222 _("failed to remove %s from manifest") % f)
225 _("failed to remove %s from manifest") % f)
223 l = ""
226 l = ""
224 if dstart is not None and dstart <= start and dend >= start:
227 if dstart is not None and dstart <= start and dend >= start:
225 if dend < end:
228 if dend < end:
226 dend = end
229 dend = end
227 if l:
230 if l:
228 dline.append(l)
231 dline.append(l)
229 else:
232 else:
230 if dstart is not None:
233 if dstart is not None:
231 delta.append([dstart, dend, "".join(dline)])
234 delta.append([dstart, dend, "".join(dline)])
232 dstart = start
235 dstart = start
233 dend = end
236 dend = end
234 dline = [l]
237 dline = [l]
235
238
236 if dstart is not None:
239 if dstart is not None:
237 delta.append([dstart, dend, "".join(dline)])
240 delta.append([dstart, dend, "".join(dline)])
238 # apply the delta to the base, and get a delta for addrevision
241 # apply the delta to the base, and get a delta for addrevision
239 deltatext, arraytext = _addlistdelta(base, delta)
242 deltatext, arraytext = _addlistdelta(base, delta)
240 return arraytext, deltatext
243 return arraytext, deltatext
241
244
242 def _msearch(m, s, lo=0, hi=None):
245 def _msearch(m, s, lo=0, hi=None):
243 '''return a tuple (start, end) that says where to find s within m.
246 '''return a tuple (start, end) that says where to find s within m.
244
247
245 If the string is found m[start:end] are the line containing
248 If the string is found m[start:end] are the line containing
246 that string. If start == end the string was not found and
249 that string. If start == end the string was not found and
247 they indicate the proper sorted insertion point.
250 they indicate the proper sorted insertion point.
248
251
249 m should be a buffer or a string
252 m should be a buffer or a string
250 s is a string'''
253 s is a string'''
251 def advance(i, c):
254 def advance(i, c):
252 while i < lenm and m[i] != c:
255 while i < lenm and m[i] != c:
253 i += 1
256 i += 1
254 return i
257 return i
255 if not s:
258 if not s:
256 return (lo, lo)
259 return (lo, lo)
257 lenm = len(m)
260 lenm = len(m)
258 if not hi:
261 if not hi:
259 hi = lenm
262 hi = lenm
260 while lo < hi:
263 while lo < hi:
261 mid = (lo + hi) // 2
264 mid = (lo + hi) // 2
262 start = mid
265 start = mid
263 while start > 0 and m[start - 1] != '\n':
266 while start > 0 and m[start - 1] != '\n':
264 start -= 1
267 start -= 1
265 end = advance(start, '\0')
268 end = advance(start, '\0')
266 if m[start:end] < s:
269 if m[start:end] < s:
267 # we know that after the null there are 40 bytes of sha1
270 # we know that after the null there are 40 bytes of sha1
268 # this translates to the bisect lo = mid + 1
271 # this translates to the bisect lo = mid + 1
269 lo = advance(end + 40, '\n') + 1
272 lo = advance(end + 40, '\n') + 1
270 else:
273 else:
271 # this translates to the bisect hi = mid
274 # this translates to the bisect hi = mid
272 hi = start
275 hi = start
273 end = advance(lo, '\0')
276 end = advance(lo, '\0')
274 found = m[lo:end]
277 found = m[lo:end]
275 if s == found:
278 if s == found:
276 # we know that after the null there are 40 bytes of sha1
279 # we know that after the null there are 40 bytes of sha1
277 end = advance(end + 40, '\n')
280 end = advance(end + 40, '\n')
278 return (lo, end + 1)
281 return (lo, end + 1)
279 else:
282 else:
280 return (lo, lo)
283 return (lo, lo)
281
284
282 def _checkforbidden(l):
285 def _checkforbidden(l):
283 """Check filenames for illegal characters."""
286 """Check filenames for illegal characters."""
284 for f in l:
287 for f in l:
285 if '\n' in f or '\r' in f:
288 if '\n' in f or '\r' in f:
286 raise error.RevlogError(
289 raise error.RevlogError(
287 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
290 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
288
291
289
292
290 # apply the changes collected during the bisect loop to our addlist
293 # apply the changes collected during the bisect loop to our addlist
291 # return a delta suitable for addrevision
294 # return a delta suitable for addrevision
292 def _addlistdelta(addlist, x):
295 def _addlistdelta(addlist, x):
293 # for large addlist arrays, building a new array is cheaper
296 # for large addlist arrays, building a new array is cheaper
294 # than repeatedly modifying the existing one
297 # than repeatedly modifying the existing one
295 currentposition = 0
298 currentposition = 0
296 newaddlist = array.array('c')
299 newaddlist = array.array('c')
297
300
298 for start, end, content in x:
301 for start, end, content in x:
299 newaddlist += addlist[currentposition:start]
302 newaddlist += addlist[currentposition:start]
300 if content:
303 if content:
301 newaddlist += array.array('c', content)
304 newaddlist += array.array('c', content)
302
305
303 currentposition = end
306 currentposition = end
304
307
305 newaddlist += addlist[currentposition:]
308 newaddlist += addlist[currentposition:]
306
309
307 deltatext = "".join(struct.pack(">lll", start, end, len(content))
310 deltatext = "".join(struct.pack(">lll", start, end, len(content))
308 + content for start, end, content in x)
311 + content for start, end, content in x)
309 return deltatext, newaddlist
312 return deltatext, newaddlist
310
313
311 class manifest(revlog.revlog):
314 class manifest(revlog.revlog):
312 def __init__(self, opener):
315 def __init__(self, opener):
313 # During normal operations, we expect to deal with not more than four
316 # During normal operations, we expect to deal with not more than four
314 # revs at a time (such as during commit --amend). When rebasing large
317 # revs at a time (such as during commit --amend). When rebasing large
315 # stacks of commits, the number can go up, hence the config knob below.
318 # stacks of commits, the number can go up, hence the config knob below.
316 cachesize = 4
319 cachesize = 4
317 opts = getattr(opener, 'options', None)
320 opts = getattr(opener, 'options', None)
318 if opts is not None:
321 if opts is not None:
319 cachesize = opts.get('manifestcachesize', cachesize)
322 cachesize = opts.get('manifestcachesize', cachesize)
320 self._mancache = util.lrucachedict(cachesize)
323 self._mancache = util.lrucachedict(cachesize)
321 revlog.revlog.__init__(self, opener, "00manifest.i")
324 revlog.revlog.__init__(self, opener, "00manifest.i")
322
325
323 def readdelta(self, node):
326 def readdelta(self, node):
324 r = self.rev(node)
327 r = self.rev(node)
325 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
328 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
326 return manifestdict(d)
329 return manifestdict(d)
327
330
328 def readfast(self, node):
331 def readfast(self, node):
329 '''use the faster of readdelta or read'''
332 '''use the faster of readdelta or read'''
330 r = self.rev(node)
333 r = self.rev(node)
331 deltaparent = self.deltaparent(r)
334 deltaparent = self.deltaparent(r)
332 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
335 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
333 return self.readdelta(node)
336 return self.readdelta(node)
334 return self.read(node)
337 return self.read(node)
335
338
336 def read(self, node):
339 def read(self, node):
337 if node == revlog.nullid:
340 if node == revlog.nullid:
338 return manifestdict() # don't upset local cache
341 return manifestdict() # don't upset local cache
339 if node in self._mancache:
342 if node in self._mancache:
340 return self._mancache[node][0]
343 return self._mancache[node][0]
341 text = self.revision(node)
344 text = self.revision(node)
342 arraytext = array.array('c', text)
345 arraytext = array.array('c', text)
343 m = manifestdict(text)
346 m = manifestdict(text)
344 self._mancache[node] = (m, arraytext)
347 self._mancache[node] = (m, arraytext)
345 return m
348 return m
346
349
347 def find(self, node, f):
350 def find(self, node, f):
348 '''look up entry for a single file efficiently.
351 '''look up entry for a single file efficiently.
349 return (node, flags) pair if found, (None, None) if not.'''
352 return (node, flags) pair if found, (None, None) if not.'''
350 if node in self._mancache:
353 if node in self._mancache:
351 m = self._mancache[node][0]
354 m = self._mancache[node][0]
352 return m.get(f), m.flags(f)
355 return m.get(f), m.flags(f)
353 text = self.revision(node)
356 text = self.revision(node)
354 try:
357 try:
355 return manifestdict(text)._lm[f]
358 return manifestdict(text).find(f)
356 except KeyError:
359 except KeyError:
357 return None, None
360 return None, None
358
361
359 def add(self, m, transaction, link, p1, p2, added, removed):
362 def add(self, m, transaction, link, p1, p2, added, removed):
360 if p1 in self._mancache:
363 if p1 in self._mancache:
361 # If our first parent is in the manifest cache, we can
364 # If our first parent is in the manifest cache, we can
362 # compute a delta here using properties we know about the
365 # compute a delta here using properties we know about the
363 # manifest up-front, which may save time later for the
366 # manifest up-front, which may save time later for the
364 # revlog layer.
367 # revlog layer.
365
368
366 _checkforbidden(added)
369 _checkforbidden(added)
367 # combine the changed lists into one list for sorting
370 # combine the changed lists into one list for sorting
368 work = [(x, False) for x in added]
371 work = [(x, False) for x in added]
369 work.extend((x, True) for x in removed)
372 work.extend((x, True) for x in removed)
370 # this could use heapq.merge() (from Python 2.6+) or equivalent
373 # this could use heapq.merge() (from Python 2.6+) or equivalent
371 # since the lists are already sorted
374 # since the lists are already sorted
372 work.sort()
375 work.sort()
373
376
374 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
377 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
375 cachedelta = self.rev(p1), deltatext
378 cachedelta = self.rev(p1), deltatext
376 text = util.buffer(arraytext)
379 text = util.buffer(arraytext)
377 else:
380 else:
378 # The first parent manifest isn't already loaded, so we'll
381 # The first parent manifest isn't already loaded, so we'll
379 # just encode a fulltext of the manifest and pass that
382 # just encode a fulltext of the manifest and pass that
380 # through to the revlog layer, and let it handle the delta
383 # through to the revlog layer, and let it handle the delta
381 # process.
384 # process.
382 text = m.text()
385 text = m.text()
383 arraytext = array.array('c', text)
386 arraytext = array.array('c', text)
384 cachedelta = None
387 cachedelta = None
385
388
386 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
389 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
387 self._mancache[n] = (m, arraytext)
390 self._mancache[n] = (m, arraytext)
388
391
389 return n
392 return n
General Comments 0
You need to be logged in to leave comments. Login now