##// END OF EJS Templates
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Maciej Fijalkowski -
r30042:d24e03da default
parent child Browse files
Show More
@@ -104,69 +104,300 b' def _textv2(it):'
104 104 _checkforbidden(files)
105 105 return ''.join(lines)
106 106
107 class _lazymanifest(dict):
108 """This is the pure implementation of lazymanifest.
107 class lazymanifestiter(object):
108 def __init__(self, lm):
109 self.pos = 0
110 self.lm = lm
109 111
110 It has not been optimized *at all* and is not lazy.
111 """
112 def __iter__(self):
113 return self
112 114
113 def __init__(self, data):
114 dict.__init__(self)
115 for f, n, fl in _parse(data):
116 self[f] = n, fl
115 def next(self):
116 try:
117 data, pos = self.lm._get(self.pos)
118 except IndexError:
119 raise StopIteration
120 if pos == -1:
121 self.pos += 1
122 return data[0]
123 self.pos += 1
124 zeropos = data.find('\x00', pos)
125 return data[pos:zeropos]
117 126
118 def __setitem__(self, k, v):
119 node, flag = v
120 assert node is not None
121 if len(node) > 21:
122 node = node[:21] # match c implementation behavior
123 dict.__setitem__(self, k, (node, flag))
127 class lazymanifestiterentries(object):
128 def __init__(self, lm):
129 self.lm = lm
130 self.pos = 0
124 131
125 132 def __iter__(self):
126 return iter(sorted(dict.keys(self)))
133 return self
134
135 def next(self):
136 try:
137 data, pos = self.lm._get(self.pos)
138 except IndexError:
139 raise StopIteration
140 if pos == -1:
141 self.pos += 1
142 return data
143 zeropos = data.find('\x00', pos)
144 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
145 zeropos + 1, 40)
146 flags = self.lm._getflags(data, self.pos, zeropos)
147 self.pos += 1
148 return (data[pos:zeropos], hashval, flags)
149
150 def unhexlify(data, extra, pos, length):
151 s = data[pos:pos + length].decode('hex')
152 if extra:
153 s += chr(extra & 0xff)
154 return s
155
156 def _cmp(a, b):
157 return (a > b) - (a < b)
158
159 class _lazymanifest(object):
160 def __init__(self, data, positions=None, extrainfo=None, extradata=None):
161 if positions is None:
162 self.positions = self.findlines(data)
163 self.extrainfo = [0] * len(self.positions)
164 self.data = data
165 self.extradata = []
166 else:
167 self.positions = positions[:]
168 self.extrainfo = extrainfo[:]
169 self.extradata = extradata[:]
170 self.data = data
171
172 def findlines(self, data):
173 if not data:
174 return []
175 pos = data.find("\n")
176 if pos == -1 or data[-1] != '\n':
177 raise ValueError("Manifest did not end in a newline.")
178 positions = [0]
179 prev = data[:data.find('\x00')]
180 while pos < len(data) - 1 and pos != -1:
181 positions.append(pos + 1)
182 nexts = data[pos + 1:data.find('\x00', pos + 1)]
183 if nexts < prev:
184 raise ValueError("Manifest lines not in sorted order.")
185 prev = nexts
186 pos = data.find("\n", pos + 1)
187 return positions
188
189 def _get(self, index):
190 # get the position encoded in pos:
191 # positive number is an index in 'data'
192 # negative number is in extrapieces
193 pos = self.positions[index]
194 if pos >= 0:
195 return self.data, pos
196 return self.extradata[-pos - 1], -1
197
198 def _getkey(self, pos):
199 if pos >= 0:
200 return self.data[pos:self.data.find('\x00', pos + 1)]
201 return self.extradata[-pos - 1][0]
202
203 def bsearch(self, key):
204 first = 0
205 last = len(self.positions) - 1
206
207 while first <= last:
208 midpoint = (first + last)//2
209 nextpos = self.positions[midpoint]
210 candidate = self._getkey(nextpos)
211 r = _cmp(key, candidate)
212 if r == 0:
213 return midpoint
214 else:
215 if r < 0:
216 last = midpoint - 1
217 else:
218 first = midpoint + 1
219 return -1
127 220
128 def iterkeys(self):
129 return iter(sorted(dict.keys(self)))
221 def bsearch2(self, key):
222 # same as the above, but will always return the position
223 # done for performance reasons
224 first = 0
225 last = len(self.positions) - 1
226
227 while first <= last:
228 midpoint = (first + last)//2
229 nextpos = self.positions[midpoint]
230 candidate = self._getkey(nextpos)
231 r = _cmp(key, candidate)
232 if r == 0:
233 return (midpoint, True)
234 else:
235 if r < 0:
236 last = midpoint - 1
237 else:
238 first = midpoint + 1
239 return (first, False)
240
241 def __contains__(self, key):
242 return self.bsearch(key) != -1
243
244 def _getflags(self, data, needle, pos):
245 start = pos + 41
246 end = data.find("\n", start)
247 if end == -1:
248 end = len(data) - 1
249 if start == end:
250 return ''
251 return self.data[start:end]
130 252
131 def iterentries(self):
132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
253 def __getitem__(self, key):
254 if not isinstance(key, str):
255 raise TypeError("getitem: manifest keys must be a string.")
256 needle = self.bsearch(key)
257 if needle == -1:
258 raise KeyError
259 data, pos = self._get(needle)
260 if pos == -1:
261 return (data[1], data[2])
262 zeropos = data.find('\x00', pos)
263 assert 0 <= needle <= len(self.positions)
264 assert len(self.extrainfo) == len(self.positions)
265 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
266 flags = self._getflags(data, needle, zeropos)
267 return (hashval, flags)
268
269 def __delitem__(self, key):
270 needle, found = self.bsearch2(key)
271 if not found:
272 raise KeyError
273 cur = self.positions[needle]
274 self.positions = self.positions[:needle] + self.positions[needle + 1:]
275 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
276 if cur >= 0:
277 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
278
279 def __setitem__(self, key, value):
280 if not isinstance(key, str):
281 raise TypeError("setitem: manifest keys must be a string.")
282 if not isinstance(value, tuple) or len(value) != 2:
283 raise TypeError("Manifest values must be a tuple of (node, flags).")
284 hashval = value[0]
285 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
286 raise TypeError("node must be a 20-byte string")
287 flags = value[1]
288 if len(hashval) == 22:
289 hashval = hashval[:-1]
290 if not isinstance(flags, str) or len(flags) > 1:
291 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
292 needle, found = self.bsearch2(key)
293 if found:
294 # put the item
295 pos = self.positions[needle]
296 if pos < 0:
297 self.extradata[-pos - 1] = (key, hashval, value[1])
298 else:
299 # just don't bother
300 self.extradata.append((key, hashval, value[1]))
301 self.positions[needle] = -len(self.extradata)
302 else:
303 # not found, put it in with extra positions
304 self.extradata.append((key, hashval, value[1]))
305 self.positions = (self.positions[:needle] + [-len(self.extradata)]
306 + self.positions[needle:])
307 self.extrainfo = (self.extrainfo[:needle] + [0] +
308 self.extrainfo[needle:])
133 309
134 310 def copy(self):
135 c = _lazymanifest('')
136 c.update(self)
137 return c
311 # XXX call _compact like in C?
312 return _lazymanifest(self.data, self.positions, self.extrainfo,
313 self.extradata)
314
315 def _compact(self):
316 # hopefully not called TOO often
317 if len(self.extradata) == 0:
318 return
319 l = []
320 last_cut = 0
321 i = 0
322 offset = 0
323 self.extrainfo = [0] * len(self.positions)
324 while i < len(self.positions):
325 if self.positions[i] >= 0:
326 cur = self.positions[i]
327 last_cut = cur
328 while True:
329 self.positions[i] = offset
330 i += 1
331 if i == len(self.positions) or self.positions[i] < 0:
332 break
333 offset += self.positions[i] - cur
334 cur = self.positions[i]
335 end_cut = self.data.find('\n', cur)
336 if end_cut != -1:
337 end_cut += 1
338 offset += end_cut - cur
339 l.append(self.data[last_cut:end_cut])
340 else:
341 while i < len(self.positions) and self.positions[i] < 0:
342 cur = self.positions[i]
343 t = self.extradata[-cur - 1]
344 l.append(self._pack(t))
345 self.positions[i] = offset
346 if len(t[1]) > 20:
347 self.extrainfo[i] = ord(t[1][21])
348 offset += len(l[-1])
349 i += 1
350 self.data = ''.join(l)
351 self.extradata = []
352
353 def _pack(self, d):
354 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
355
356 def text(self):
357 self._compact()
358 return self.data
138 359
139 360 def diff(self, m2, clean=False):
140 361 '''Finds changes between the current manifest and m2.'''
362 # XXX think whether efficiency matters here
141 363 diff = {}
142 364
143 for fn, e1 in self.iteritems():
365 for fn, e1, flags in self.iterentries():
144 366 if fn not in m2:
145 diff[fn] = e1, (None, '')
367 diff[fn] = (e1, flags), (None, '')
146 368 else:
147 369 e2 = m2[fn]
148 if e1 != e2:
149 diff[fn] = e1, e2
370 if (e1, flags) != e2:
371 diff[fn] = (e1, flags), e2
150 372 elif clean:
151 373 diff[fn] = None
152 374
153 for fn, e2 in m2.iteritems():
375 for fn, e2, flags in m2.iterentries():
154 376 if fn not in self:
155 diff[fn] = (None, ''), e2
377 diff[fn] = (None, ''), (e2, flags)
156 378
157 379 return diff
158 380
381 def iterentries(self):
382 return lazymanifestiterentries(self)
383
384 def iterkeys(self):
385 return lazymanifestiter(self)
386
387 def __iter__(self):
388 return lazymanifestiter(self)
389
390 def __len__(self):
391 return len(self.positions)
392
159 393 def filtercopy(self, filterfn):
394 # XXX should be optimized
160 395 c = _lazymanifest('')
161 396 for f, n, fl in self.iterentries():
162 397 if filterfn(f):
163 398 c[f] = n, fl
164 399 return c
165 400
166 def text(self):
167 """Get the full data of this manifest as a bytestring."""
168 return _textv1(self.iterentries())
169
170 401 try:
171 402 _lazymanifest = parsers.lazymanifest
172 403 except AttributeError:
@@ -111,8 +111,8 b' if modulepolicy not in policynocffi:'
111 111 def blocks(sa, sb):
112 112 a = ffi.new("struct bdiff_line**")
113 113 b = ffi.new("struct bdiff_line**")
114 ac = ffi.new("char[]", sa)
115 bc = ffi.new("char[]", sb)
114 ac = ffi.new("char[]", str(sa))
115 bc = ffi.new("char[]", str(sb))
116 116 l = ffi.new("struct bdiff_hunk*")
117 117 try:
118 118 an = lib.bdiff_splitlines(ac, len(sa), a)
@@ -138,8 +138,8 b' if modulepolicy not in policynocffi:'
138 138 def bdiff(sa, sb):
139 139 a = ffi.new("struct bdiff_line**")
140 140 b = ffi.new("struct bdiff_line**")
141 ac = ffi.new("char[]", sa)
142 bc = ffi.new("char[]", sb)
141 ac = ffi.new("char[]", str(sa))
142 bc = ffi.new("char[]", str(sb))
143 143 l = ffi.new("struct bdiff_hunk*")
144 144 try:
145 145 an = lib.bdiff_splitlines(ac, len(sa), a)
General Comments 0
You need to be logged in to leave comments. Login now