##// END OF EJS Templates
manifest: rewrite find(node, f) in terms of read(node)...
Martin von Zweigbergk -
r24292:b7add2eb default
parent child Browse files
Show More
@@ -1,392 +1,389
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11
12 12
13 13 class _lazymanifest(dict):
14 14 """This is the pure implementation of lazymanifest.
15 15
16 16 It has not been optimized *at all* and is not lazy.
17 17 """
18 18
19 19 def __init__(self, data):
20 20 # This init method does a little bit of excessive-looking
21 21 # precondition checking. This is so that the behavior of this
22 22 # class exactly matches its C counterpart to try and help
23 23 # prevent surprise breakage for anyone that develops against
24 24 # the pure version.
25 25 if data and data[-1] != '\n':
26 26 raise ValueError('Manifest did not end in a newline.')
27 27 dict.__init__(self)
28 28 prev = None
29 29 for l in data.splitlines():
30 30 if prev is not None and prev > l:
31 31 raise ValueError('Manifest lines not in sorted order.')
32 32 prev = l
33 33 f, n = l.split('\0')
34 34 if len(n) > 40:
35 35 self[f] = revlog.bin(n[:40]), n[40:]
36 36 else:
37 37 self[f] = revlog.bin(n), ''
38 38
39 39 def __setitem__(self, k, v):
40 40 node, flag = v
41 41 assert node is not None
42 42 if len(node) > 21:
43 43 node = node[:21] # match c implementation behavior
44 44 dict.__setitem__(self, k, (node, flag))
45 45
46 46 def __iter__(self):
47 47 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
48 48
49 49 def copy(self):
50 50 c = _lazymanifest('')
51 51 c.update(self)
52 52 return c
53 53
54 54 def diff(self, m2, clean=False):
55 55 '''Finds changes between the current manifest and m2.'''
56 56 diff = {}
57 57
58 58 for fn, e1 in self.iteritems():
59 59 if fn not in m2:
60 60 diff[fn] = e1, (None, '')
61 61 else:
62 62 e2 = m2[fn]
63 63 if e1 != e2:
64 64 diff[fn] = e1, e2
65 65 elif clean:
66 66 diff[fn] = None
67 67
68 68 for fn, e2 in m2.iteritems():
69 69 if fn not in self:
70 70 diff[fn] = (None, ''), e2
71 71
72 72 return diff
73 73
74 74 def filtercopy(self, filterfn):
75 75 c = _lazymanifest('')
76 76 for f, n, fl in self:
77 77 if filterfn(f):
78 78 c[f] = n, fl
79 79 return c
80 80
81 81 def text(self):
82 82 """Get the full data of this manifest as a bytestring."""
83 83 fl = sorted(self)
84 84
85 85 _hex = revlog.hex
86 86 # if this is changed to support newlines in filenames,
87 87 # be sure to check the templates/ dir again (especially *-raw.tmpl)
88 88 return ''.join("%s\0%s%s\n" % (
89 89 f, _hex(n[:20]), flag) for f, n, flag in fl)
90 90
91 91 try:
92 92 _lazymanifest = parsers.lazymanifest
93 93 except AttributeError:
94 94 pass
95 95
96 96 class manifestdict(object):
97 97 def __init__(self, data=''):
98 98 self._lm = _lazymanifest(data)
99 99
100 100 def __getitem__(self, key):
101 101 return self._lm[key][0]
102 102
103 103 def find(self, key):
104 104 return self._lm[key]
105 105
106 106 def __len__(self):
107 107 return len(self._lm)
108 108
109 109 def __setitem__(self, key, node):
110 110 self._lm[key] = node, self.flags(key, '')
111 111
112 112 def __contains__(self, key):
113 113 return key in self._lm
114 114
115 115 def __delitem__(self, key):
116 116 del self._lm[key]
117 117
118 118 def keys(self):
119 119 return [x[0] for x in self._lm]
120 120
121 121 def iterkeys(self):
122 122 return iter(self.keys())
123 123
124 124 def intersectfiles(self, files):
125 125 '''make a new lazymanifest with the intersection of self with files
126 126
127 127 The algorithm assumes that files is much smaller than self.'''
128 128 ret = manifestdict()
129 129 lm = self._lm
130 130 for fn in files:
131 131 if fn in lm:
132 132 ret._lm[fn] = self._lm[fn]
133 133 return ret
134 134
135 135 def filesnotin(self, m2):
136 136 '''Set of files in this manifest that are not in the other'''
137 137 files = set(self.iterkeys())
138 138 files.difference_update(m2.iterkeys())
139 139 return files
140 140
141 141 def matches(self, match):
142 142 '''generate a new manifest filtered by the match argument'''
143 143 if match.always():
144 144 return self.copy()
145 145
146 146 files = match.files()
147 147 if (match.matchfn == match.exact or
148 148 (not match.anypats() and util.all(fn in self for fn in files))):
149 149 return self.intersectfiles(files)
150 150
151 151 lm = manifestdict('')
152 152 lm._lm = self._lm.filtercopy(match)
153 153 return lm
154 154
155 155 def diff(self, m2, clean=False):
156 156 '''Finds changes between the current manifest and m2.
157 157
158 158 Args:
159 159 m2: the manifest to which this manifest should be compared.
160 160 clean: if true, include files unchanged between these manifests
161 161 with a None value in the returned dictionary.
162 162
163 163 The result is returned as a dict with filename as key and
164 164 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
165 165 nodeid in the current/other manifest and fl1/fl2 is the flag
166 166 in the current/other manifest. Where the file does not exist,
167 167 the nodeid will be None and the flags will be the empty
168 168 string.
169 169 '''
170 170 return self._lm.diff(m2._lm, clean)
171 171
172 172 def setflag(self, key, flag):
173 173 self._lm[key] = self[key], flag
174 174
175 175 def get(self, key, default=None):
176 176 try:
177 177 return self._lm[key][0]
178 178 except KeyError:
179 179 return default
180 180
181 181 def flags(self, key, default=''):
182 182 try:
183 183 return self._lm[key][1]
184 184 except KeyError:
185 185 return default
186 186
187 187 def __iter__(self):
188 188 return (x[0] for x in self._lm)
189 189
190 190 def copy(self):
191 191 c = manifestdict('')
192 192 c._lm = self._lm.copy()
193 193 return c
194 194
195 195 def iteritems(self):
196 196 return (x[:2] for x in self._lm)
197 197
198 198 def text(self):
199 199 return self._lm.text()
200 200
201 201 def fastdelta(self, base, changes):
202 202 """Given a base manifest text as an array.array and a list of changes
203 203 relative to that text, compute a delta that can be used by revlog.
204 204 """
205 205 delta = []
206 206 dstart = None
207 207 dend = None
208 208 dline = [""]
209 209 start = 0
210 210 # zero copy representation of base as a buffer
211 211 addbuf = util.buffer(base)
212 212
213 213 # start with a readonly loop that finds the offset of
214 214 # each line and creates the deltas
215 215 for f, todelete in changes:
216 216 # bs will either be the index of the item or the insert point
217 217 start, end = _msearch(addbuf, f, start)
218 218 if not todelete:
219 219 h, fl = self._lm[f]
220 220 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
221 221 else:
222 222 if start == end:
223 223 # item we want to delete was not found, error out
224 224 raise AssertionError(
225 225 _("failed to remove %s from manifest") % f)
226 226 l = ""
227 227 if dstart is not None and dstart <= start and dend >= start:
228 228 if dend < end:
229 229 dend = end
230 230 if l:
231 231 dline.append(l)
232 232 else:
233 233 if dstart is not None:
234 234 delta.append([dstart, dend, "".join(dline)])
235 235 dstart = start
236 236 dend = end
237 237 dline = [l]
238 238
239 239 if dstart is not None:
240 240 delta.append([dstart, dend, "".join(dline)])
241 241 # apply the delta to the base, and get a delta for addrevision
242 242 deltatext, arraytext = _addlistdelta(base, delta)
243 243 return arraytext, deltatext
244 244
245 245 def _msearch(m, s, lo=0, hi=None):
246 246 '''return a tuple (start, end) that says where to find s within m.
247 247
248 248 If the string is found m[start:end] are the line containing
249 249 that string. If start == end the string was not found and
250 250 they indicate the proper sorted insertion point.
251 251
252 252 m should be a buffer or a string
253 253 s is a string'''
254 254 def advance(i, c):
255 255 while i < lenm and m[i] != c:
256 256 i += 1
257 257 return i
258 258 if not s:
259 259 return (lo, lo)
260 260 lenm = len(m)
261 261 if not hi:
262 262 hi = lenm
263 263 while lo < hi:
264 264 mid = (lo + hi) // 2
265 265 start = mid
266 266 while start > 0 and m[start - 1] != '\n':
267 267 start -= 1
268 268 end = advance(start, '\0')
269 269 if m[start:end] < s:
270 270 # we know that after the null there are 40 bytes of sha1
271 271 # this translates to the bisect lo = mid + 1
272 272 lo = advance(end + 40, '\n') + 1
273 273 else:
274 274 # this translates to the bisect hi = mid
275 275 hi = start
276 276 end = advance(lo, '\0')
277 277 found = m[lo:end]
278 278 if s == found:
279 279 # we know that after the null there are 40 bytes of sha1
280 280 end = advance(end + 40, '\n')
281 281 return (lo, end + 1)
282 282 else:
283 283 return (lo, lo)
284 284
285 285 def _checkforbidden(l):
286 286 """Check filenames for illegal characters."""
287 287 for f in l:
288 288 if '\n' in f or '\r' in f:
289 289 raise error.RevlogError(
290 290 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
291 291
292 292
293 293 # apply the changes collected during the bisect loop to our addlist
294 294 # return a delta suitable for addrevision
295 295 def _addlistdelta(addlist, x):
296 296 # for large addlist arrays, building a new array is cheaper
297 297 # than repeatedly modifying the existing one
298 298 currentposition = 0
299 299 newaddlist = array.array('c')
300 300
301 301 for start, end, content in x:
302 302 newaddlist += addlist[currentposition:start]
303 303 if content:
304 304 newaddlist += array.array('c', content)
305 305
306 306 currentposition = end
307 307
308 308 newaddlist += addlist[currentposition:]
309 309
310 310 deltatext = "".join(struct.pack(">lll", start, end, len(content))
311 311 + content for start, end, content in x)
312 312 return deltatext, newaddlist
313 313
314 314 class manifest(revlog.revlog):
315 315 def __init__(self, opener):
316 316 # During normal operations, we expect to deal with not more than four
317 317 # revs at a time (such as during commit --amend). When rebasing large
318 318 # stacks of commits, the number can go up, hence the config knob below.
319 319 cachesize = 4
320 320 opts = getattr(opener, 'options', None)
321 321 if opts is not None:
322 322 cachesize = opts.get('manifestcachesize', cachesize)
323 323 self._mancache = util.lrucachedict(cachesize)
324 324 revlog.revlog.__init__(self, opener, "00manifest.i")
325 325
326 326 def readdelta(self, node):
327 327 r = self.rev(node)
328 328 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
329 329 return manifestdict(d)
330 330
331 331 def readfast(self, node):
332 332 '''use the faster of readdelta or read'''
333 333 r = self.rev(node)
334 334 deltaparent = self.deltaparent(r)
335 335 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
336 336 return self.readdelta(node)
337 337 return self.read(node)
338 338
339 339 def read(self, node):
340 340 if node == revlog.nullid:
341 341 return manifestdict() # don't upset local cache
342 342 if node in self._mancache:
343 343 return self._mancache[node][0]
344 344 text = self.revision(node)
345 345 arraytext = array.array('c', text)
346 346 m = manifestdict(text)
347 347 self._mancache[node] = (m, arraytext)
348 348 return m
349 349
350 350 def find(self, node, f):
351 351 '''look up entry for a single file efficiently.
352 352 return (node, flags) pair if found, (None, None) if not.'''
353 if node in self._mancache:
354 m = self._mancache[node][0]
355 return m.get(f), m.flags(f)
356 text = self.revision(node)
353 m = self.read(node)
357 354 try:
358 return manifestdict(text).find(f)
355 return m.find(f)
359 356 except KeyError:
360 357 return None, None
361 358
362 359 def add(self, m, transaction, link, p1, p2, added, removed):
363 360 if p1 in self._mancache:
364 361 # If our first parent is in the manifest cache, we can
365 362 # compute a delta here using properties we know about the
366 363 # manifest up-front, which may save time later for the
367 364 # revlog layer.
368 365
369 366 _checkforbidden(added)
370 367 # combine the changed lists into one list for sorting
371 368 work = [(x, False) for x in added]
372 369 work.extend((x, True) for x in removed)
373 370 # this could use heapq.merge() (from Python 2.6+) or equivalent
374 371 # since the lists are already sorted
375 372 work.sort()
376 373
377 374 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
378 375 cachedelta = self.rev(p1), deltatext
379 376 text = util.buffer(arraytext)
380 377 else:
381 378 # The first parent manifest isn't already loaded, so we'll
382 379 # just encode a fulltext of the manifest and pass that
383 380 # through to the revlog layer, and let it handle the delta
384 381 # process.
385 382 text = m.text()
386 383 arraytext = array.array('c', text)
387 384 cachedelta = None
388 385
389 386 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
390 387 self._mancache[n] = (m, arraytext)
391 388
392 389 return n
General Comments 0
You need to be logged in to leave comments. Login now