##// END OF EJS Templates
manifest: don't let find() look inside manifestdict...
Martin von Zweigbergk -
r24277:22d560fe default
parent child Browse files
Show More
@@ -1,389 +1,392 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11
12 12
13 13 class _lazymanifest(dict):
14 14 """This is the pure implementation of lazymanifest.
15 15
16 16 It has not been optimized *at all* and is not lazy.
17 17 """
18 18
19 19 def __init__(self, data):
20 20 # This init method does a little bit of excessive-looking
21 21 # precondition checking. This is so that the behavior of this
22 22 # class exactly matches its C counterpart to try and help
23 23 # prevent surprise breakage for anyone that develops against
24 24 # the pure version.
25 25 if data and data[-1] != '\n':
26 26 raise ValueError('Manifest did not end in a newline.')
27 27 dict.__init__(self)
28 28 prev = None
29 29 for l in data.splitlines():
30 30 if prev is not None and prev > l:
31 31 raise ValueError('Manifest lines not in sorted order.')
32 32 prev = l
33 33 f, n = l.split('\0')
34 34 if len(n) > 40:
35 35 self[f] = revlog.bin(n[:40]), n[40:]
36 36 else:
37 37 self[f] = revlog.bin(n), ''
38 38
39 39 def __setitem__(self, k, v):
40 40 node, flag = v
41 41 assert node is not None
42 42 if len(node) > 21:
43 43 node = node[:21] # match c implementation behavior
44 44 dict.__setitem__(self, k, (node, flag))
45 45
46 46 def __iter__(self):
47 47 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
48 48
49 49 def copy(self):
50 50 c = _lazymanifest('')
51 51 c.update(self)
52 52 return c
53 53
54 54 def diff(self, m2, clean=False):
55 55 '''Finds changes between the current manifest and m2.'''
56 56 diff = {}
57 57
58 58 for fn, e1 in self.iteritems():
59 59 if fn not in m2:
60 60 diff[fn] = e1, (None, '')
61 61 else:
62 62 e2 = m2[fn]
63 63 if e1 != e2:
64 64 diff[fn] = e1, e2
65 65 elif clean:
66 66 diff[fn] = None
67 67
68 68 for fn, e2 in m2.iteritems():
69 69 if fn not in self:
70 70 diff[fn] = (None, ''), e2
71 71
72 72 return diff
73 73
74 74 def filtercopy(self, filterfn):
75 75 c = _lazymanifest('')
76 76 for f, n, fl in self:
77 77 if filterfn(f):
78 78 c[f] = n, fl
79 79 return c
80 80
81 81 def text(self):
82 82 """Get the full data of this manifest as a bytestring."""
83 83 fl = sorted(self)
84 84
85 85 _hex = revlog.hex
86 86 # if this is changed to support newlines in filenames,
87 87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
88 88 return ''.join("%s\0%s%s\n" % (
89 89 f, _hex(n[:20]), flag) for f, n, flag in fl)
90 90
91 91 try:
92 92 _lazymanifest = parsers.lazymanifest
93 93 except AttributeError:
94 94 pass
95 95
96 96 class manifestdict(object):
97 97 def __init__(self, data=''):
98 98 self._lm = _lazymanifest(data)
99 99
100 100 def __getitem__(self, key):
101 101 return self._lm[key][0]
102 102
103 def find(self, key):
104 return self._lm[key]
105
103 106 def __len__(self):
104 107 return len(self._lm)
105 108
106 109 def __setitem__(self, key, node):
107 110 self._lm[key] = node, self.flags(key, '')
108 111
109 112 def __contains__(self, key):
110 113 return key in self._lm
111 114
112 115 def __delitem__(self, key):
113 116 del self._lm[key]
114 117
115 118 def keys(self):
116 119 return [x[0] for x in self._lm]
117 120
118 121 def iterkeys(self):
119 122 return iter(self.keys())
120 123
121 124 def intersectfiles(self, files):
122 125 '''make a new lazymanifest with the intersection of self with files
123 126
124 127 The algorithm assumes that files is much smaller than self.'''
125 128 ret = manifestdict()
126 129 lm = self._lm
127 130 for fn in files:
128 131 if fn in lm:
129 132 ret._lm[fn] = self._lm[fn]
130 133 return ret
131 134
132 135 def filesnotin(self, m2):
133 136 '''Set of files in this manifest that are not in the other'''
134 137 files = set(self.iterkeys())
135 138 files.difference_update(m2.iterkeys())
136 139 return files
137 140
138 141 def matches(self, match):
139 142 '''generate a new manifest filtered by the match argument'''
140 143 if match.always():
141 144 return self.copy()
142 145
143 146 files = match.files()
144 147 if (match.matchfn == match.exact or
145 148 (not match.anypats() and util.all(fn in self for fn in files))):
146 149 return self.intersectfiles(files)
147 150
148 151 lm = manifestdict('')
149 152 lm._lm = self._lm.filtercopy(match)
150 153 return lm
151 154
152 155 def diff(self, m2, clean=False):
153 156 '''Finds changes between the current manifest and m2.
154 157
155 158 Args:
156 159 m2: the manifest to which this manifest should be compared.
157 160 clean: if true, include files unchanged between these manifests
158 161 with a None value in the returned dictionary.
159 162
160 163 The result is returned as a dict with filename as key and
161 164 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
162 165 nodeid in the current/other manifest and fl1/fl2 is the flag
163 166 in the current/other manifest. Where the file does not exist,
164 167 the nodeid will be None and the flags will be the empty
165 168 string.
166 169 '''
167 170 return self._lm.diff(m2._lm, clean)
168 171
169 172 def setflag(self, key, flag):
170 173 self._lm[key] = self[key], flag
171 174
172 175 def get(self, key, default=None):
173 176 try:
174 177 return self._lm[key][0]
175 178 except KeyError:
176 179 return default
177 180
178 181 def flags(self, key, default=''):
179 182 try:
180 183 return self._lm[key][1]
181 184 except KeyError:
182 185 return default
183 186
184 187 def __iter__(self):
185 188 return (x[0] for x in self._lm)
186 189
187 190 def copy(self):
188 191 c = manifestdict('')
189 192 c._lm = self._lm.copy()
190 193 return c
191 194
192 195 def iteritems(self):
193 196 return (x[:2] for x in self._lm)
194 197
195 198 def text(self):
196 199 return self._lm.text()
197 200
198 201 def fastdelta(self, base, changes):
199 202 """Given a base manifest text as an array.array and a list of changes
200 203 relative to that text, compute a delta that can be used by revlog.
201 204 """
202 205 delta = []
203 206 dstart = None
204 207 dend = None
205 208 dline = [""]
206 209 start = 0
207 210 # zero copy representation of base as a buffer
208 211 addbuf = util.buffer(base)
209 212
210 213 # start with a readonly loop that finds the offset of
211 214 # each line and creates the deltas
212 215 for f, todelete in changes:
213 216 # bs will either be the index of the item or the insert point
214 217 start, end = _msearch(addbuf, f, start)
215 218 if not todelete:
216 219 h, fl = self._lm[f]
217 220 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
218 221 else:
219 222 if start == end:
220 223 # item we want to delete was not found, error out
221 224 raise AssertionError(
222 225 _("failed to remove %s from manifest") % f)
223 226 l = ""
224 227 if dstart is not None and dstart <= start and dend >= start:
225 228 if dend < end:
226 229 dend = end
227 230 if l:
228 231 dline.append(l)
229 232 else:
230 233 if dstart is not None:
231 234 delta.append([dstart, dend, "".join(dline)])
232 235 dstart = start
233 236 dend = end
234 237 dline = [l]
235 238
236 239 if dstart is not None:
237 240 delta.append([dstart, dend, "".join(dline)])
238 241 # apply the delta to the base, and get a delta for addrevision
239 242 deltatext, arraytext = _addlistdelta(base, delta)
240 243 return arraytext, deltatext
241 244
242 245 def _msearch(m, s, lo=0, hi=None):
243 246 '''return a tuple (start, end) that says where to find s within m.
244 247
245 248 If the string is found m[start:end] are the line containing
246 249 that string. If start == end the string was not found and
247 250 they indicate the proper sorted insertion point.
248 251
249 252 m should be a buffer or a string
250 253 s is a string'''
251 254 def advance(i, c):
252 255 while i < lenm and m[i] != c:
253 256 i += 1
254 257 return i
255 258 if not s:
256 259 return (lo, lo)
257 260 lenm = len(m)
258 261 if not hi:
259 262 hi = lenm
260 263 while lo < hi:
261 264 mid = (lo + hi) // 2
262 265 start = mid
263 266 while start > 0 and m[start - 1] != '\n':
264 267 start -= 1
265 268 end = advance(start, '\0')
266 269 if m[start:end] < s:
267 270 # we know that after the null there are 40 bytes of sha1
268 271 # this translates to the bisect lo = mid + 1
269 272 lo = advance(end + 40, '\n') + 1
270 273 else:
271 274 # this translates to the bisect hi = mid
272 275 hi = start
273 276 end = advance(lo, '\0')
274 277 found = m[lo:end]
275 278 if s == found:
276 279 # we know that after the null there are 40 bytes of sha1
277 280 end = advance(end + 40, '\n')
278 281 return (lo, end + 1)
279 282 else:
280 283 return (lo, lo)
281 284
282 285 def _checkforbidden(l):
283 286 """Check filenames for illegal characters."""
284 287 for f in l:
285 288 if '\n' in f or '\r' in f:
286 289 raise error.RevlogError(
287 290 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
288 291
289 292
290 293 # apply the changes collected during the bisect loop to our addlist
291 294 # return a delta suitable for addrevision
292 295 def _addlistdelta(addlist, x):
293 296 # for large addlist arrays, building a new array is cheaper
294 297 # than repeatedly modifying the existing one
295 298 currentposition = 0
296 299 newaddlist = array.array('c')
297 300
298 301 for start, end, content in x:
299 302 newaddlist += addlist[currentposition:start]
300 303 if content:
301 304 newaddlist += array.array('c', content)
302 305
303 306 currentposition = end
304 307
305 308 newaddlist += addlist[currentposition:]
306 309
307 310 deltatext = "".join(struct.pack(">lll", start, end, len(content))
308 311 + content for start, end, content in x)
309 312 return deltatext, newaddlist
310 313
311 314 class manifest(revlog.revlog):
312 315 def __init__(self, opener):
313 316 # During normal operations, we expect to deal with not more than four
314 317 # revs at a time (such as during commit --amend). When rebasing large
315 318 # stacks of commits, the number can go up, hence the config knob below.
316 319 cachesize = 4
317 320 opts = getattr(opener, 'options', None)
318 321 if opts is not None:
319 322 cachesize = opts.get('manifestcachesize', cachesize)
320 323 self._mancache = util.lrucachedict(cachesize)
321 324 revlog.revlog.__init__(self, opener, "00manifest.i")
322 325
323 326 def readdelta(self, node):
324 327 r = self.rev(node)
325 328 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
326 329 return manifestdict(d)
327 330
328 331 def readfast(self, node):
329 332 '''use the faster of readdelta or read'''
330 333 r = self.rev(node)
331 334 deltaparent = self.deltaparent(r)
332 335 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
333 336 return self.readdelta(node)
334 337 return self.read(node)
335 338
336 339 def read(self, node):
337 340 if node == revlog.nullid:
338 341 return manifestdict() # don't upset local cache
339 342 if node in self._mancache:
340 343 return self._mancache[node][0]
341 344 text = self.revision(node)
342 345 arraytext = array.array('c', text)
343 346 m = manifestdict(text)
344 347 self._mancache[node] = (m, arraytext)
345 348 return m
346 349
347 350 def find(self, node, f):
348 351 '''look up entry for a single file efficiently.
349 352 return (node, flags) pair if found, (None, None) if not.'''
350 353 if node in self._mancache:
351 354 m = self._mancache[node][0]
352 355 return m.get(f), m.flags(f)
353 356 text = self.revision(node)
354 357 try:
355 return manifestdict(text)._lm[f]
358 return manifestdict(text).find(f)
356 359 except KeyError:
357 360 return None, None
358 361
359 362 def add(self, m, transaction, link, p1, p2, added, removed):
360 363 if p1 in self._mancache:
361 364 # If our first parent is in the manifest cache, we can
362 365 # compute a delta here using properties we know about the
363 366 # manifest up-front, which may save time later for the
364 367 # revlog layer.
365 368
366 369 _checkforbidden(added)
367 370 # combine the changed lists into one list for sorting
368 371 work = [(x, False) for x in added]
369 372 work.extend((x, True) for x in removed)
370 373 # this could use heapq.merge() (from Python 2.6+) or equivalent
371 374 # since the lists are already sorted
372 375 work.sort()
373 376
374 377 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
375 378 cachedelta = self.rev(p1), deltatext
376 379 text = util.buffer(arraytext)
377 380 else:
378 381 # The first parent manifest isn't already loaded, so we'll
379 382 # just encode a fulltext of the manifest and pass that
380 383 # through to the revlog layer, and let it handle the delta
381 384 # process.
382 385 text = m.text()
383 386 arraytext = array.array('c', text)
384 387 cachedelta = None
385 388
386 389 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
387 390 self._mancache[n] = (m, arraytext)
388 391
389 392 return n
General Comments 0
You need to be logged in to leave comments. Login now