Show More
@@ -104,69 +104,300 b' def _textv2(it):' | |||
|
104 | 104 | _checkforbidden(files) |
|
105 | 105 | return ''.join(lines) |
|
106 | 106 | |
|
107 |
class |
|
|
108 | """This is the pure implementation of lazymanifest. | |
|
107 | class lazymanifestiter(object): | |
|
108 | def __init__(self, lm): | |
|
109 | self.pos = 0 | |
|
110 | self.lm = lm | |
|
109 | 111 | |
|
110 | It has not been optimized *at all* and is not lazy. | |
|
111 | """ | |
|
112 | def __iter__(self): | |
|
113 | return self | |
|
112 | 114 | |
|
113 |
def |
|
|
114 | dict.__init__(self) | |
|
115 | for f, n, fl in _parse(data): | |
|
116 | self[f] = n, fl | |
|
115 | def next(self): | |
|
116 | try: | |
|
117 | data, pos = self.lm._get(self.pos) | |
|
118 | except IndexError: | |
|
119 | raise StopIteration | |
|
120 | if pos == -1: | |
|
121 | self.pos += 1 | |
|
122 | return data[0] | |
|
123 | self.pos += 1 | |
|
124 | zeropos = data.find('\x00', pos) | |
|
125 | return data[pos:zeropos] | |
|
117 | 126 | |
|
118 | def __setitem__(self, k, v): | |
|
119 | node, flag = v | |
|
120 | assert node is not None | |
|
121 | if len(node) > 21: | |
|
122 | node = node[:21] # match c implementation behavior | |
|
123 | dict.__setitem__(self, k, (node, flag)) | |
|
127 | class lazymanifestiterentries(object): | |
|
128 | def __init__(self, lm): | |
|
129 | self.lm = lm | |
|
130 | self.pos = 0 | |
|
124 | 131 | |
|
125 | 132 | def __iter__(self): |
|
126 |
return |
|
|
133 | return self | |
|
134 | ||
|
135 | def next(self): | |
|
136 | try: | |
|
137 | data, pos = self.lm._get(self.pos) | |
|
138 | except IndexError: | |
|
139 | raise StopIteration | |
|
140 | if pos == -1: | |
|
141 | self.pos += 1 | |
|
142 | return data | |
|
143 | zeropos = data.find('\x00', pos) | |
|
144 | hashval = unhexlify(data, self.lm.extrainfo[self.pos], | |
|
145 | zeropos + 1, 40) | |
|
146 | flags = self.lm._getflags(data, self.pos, zeropos) | |
|
147 | self.pos += 1 | |
|
148 | return (data[pos:zeropos], hashval, flags) | |
|
149 | ||
|
150 | def unhexlify(data, extra, pos, length): | |
|
151 | s = data[pos:pos + length].decode('hex') | |
|
152 | if extra: | |
|
153 | s += chr(extra & 0xff) | |
|
154 | return s | |
|
155 | ||
|
156 | def _cmp(a, b): | |
|
157 | return (a > b) - (a < b) | |
|
158 | ||
|
159 | class _lazymanifest(object): | |
|
160 | def __init__(self, data, positions=None, extrainfo=None, extradata=None): | |
|
161 | if positions is None: | |
|
162 | self.positions = self.findlines(data) | |
|
163 | self.extrainfo = [0] * len(self.positions) | |
|
164 | self.data = data | |
|
165 | self.extradata = [] | |
|
166 | else: | |
|
167 | self.positions = positions[:] | |
|
168 | self.extrainfo = extrainfo[:] | |
|
169 | self.extradata = extradata[:] | |
|
170 | self.data = data | |
|
171 | ||
|
172 | def findlines(self, data): | |
|
173 | if not data: | |
|
174 | return [] | |
|
175 | pos = data.find("\n") | |
|
176 | if pos == -1 or data[-1] != '\n': | |
|
177 | raise ValueError("Manifest did not end in a newline.") | |
|
178 | positions = [0] | |
|
179 | prev = data[:data.find('\x00')] | |
|
180 | while pos < len(data) - 1 and pos != -1: | |
|
181 | positions.append(pos + 1) | |
|
182 | nexts = data[pos + 1:data.find('\x00', pos + 1)] | |
|
183 | if nexts < prev: | |
|
184 | raise ValueError("Manifest lines not in sorted order.") | |
|
185 | prev = nexts | |
|
186 | pos = data.find("\n", pos + 1) | |
|
187 | return positions | |
|
188 | ||
|
189 | def _get(self, index): | |
|
190 | # get the position encoded in pos: | |
|
191 | # positive number is an index in 'data' | |
|
192 | # negative number is in extrapieces | |
|
193 | pos = self.positions[index] | |
|
194 | if pos >= 0: | |
|
195 | return self.data, pos | |
|
196 | return self.extradata[-pos - 1], -1 | |
|
197 | ||
|
198 | def _getkey(self, pos): | |
|
199 | if pos >= 0: | |
|
200 | return self.data[pos:self.data.find('\x00', pos + 1)] | |
|
201 | return self.extradata[-pos - 1][0] | |
|
202 | ||
|
203 | def bsearch(self, key): | |
|
204 | first = 0 | |
|
205 | last = len(self.positions) - 1 | |
|
206 | ||
|
207 | while first <= last: | |
|
208 | midpoint = (first + last)//2 | |
|
209 | nextpos = self.positions[midpoint] | |
|
210 | candidate = self._getkey(nextpos) | |
|
211 | r = _cmp(key, candidate) | |
|
212 | if r == 0: | |
|
213 | return midpoint | |
|
214 | else: | |
|
215 | if r < 0: | |
|
216 | last = midpoint - 1 | |
|
217 | else: | |
|
218 | first = midpoint + 1 | |
|
219 | return -1 | |
|
127 | 220 | |
|
128 |
def |
|
|
129 | return iter(sorted(dict.keys(self))) | |
|
221 | def bsearch2(self, key): | |
|
222 | # same as the above, but will always return the position | |
|
223 | # done for performance reasons | |
|
224 | first = 0 | |
|
225 | last = len(self.positions) - 1 | |
|
226 | ||
|
227 | while first <= last: | |
|
228 | midpoint = (first + last)//2 | |
|
229 | nextpos = self.positions[midpoint] | |
|
230 | candidate = self._getkey(nextpos) | |
|
231 | r = _cmp(key, candidate) | |
|
232 | if r == 0: | |
|
233 | return (midpoint, True) | |
|
234 | else: | |
|
235 | if r < 0: | |
|
236 | last = midpoint - 1 | |
|
237 | else: | |
|
238 | first = midpoint + 1 | |
|
239 | return (first, False) | |
|
240 | ||
|
241 | def __contains__(self, key): | |
|
242 | return self.bsearch(key) != -1 | |
|
243 | ||
|
244 | def _getflags(self, data, needle, pos): | |
|
245 | start = pos + 41 | |
|
246 | end = data.find("\n", start) | |
|
247 | if end == -1: | |
|
248 | end = len(data) - 1 | |
|
249 | if start == end: | |
|
250 | return '' | |
|
251 | return self.data[start:end] | |
|
130 | 252 | |
|
131 |
def |
|
|
132 | return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) | |
|
253 | def __getitem__(self, key): | |
|
254 | if not isinstance(key, str): | |
|
255 | raise TypeError("getitem: manifest keys must be a string.") | |
|
256 | needle = self.bsearch(key) | |
|
257 | if needle == -1: | |
|
258 | raise KeyError | |
|
259 | data, pos = self._get(needle) | |
|
260 | if pos == -1: | |
|
261 | return (data[1], data[2]) | |
|
262 | zeropos = data.find('\x00', pos) | |
|
263 | assert 0 <= needle <= len(self.positions) | |
|
264 | assert len(self.extrainfo) == len(self.positions) | |
|
265 | hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) | |
|
266 | flags = self._getflags(data, needle, zeropos) | |
|
267 | return (hashval, flags) | |
|
268 | ||
|
269 | def __delitem__(self, key): | |
|
270 | needle, found = self.bsearch2(key) | |
|
271 | if not found: | |
|
272 | raise KeyError | |
|
273 | cur = self.positions[needle] | |
|
274 | self.positions = self.positions[:needle] + self.positions[needle + 1:] | |
|
275 | self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] | |
|
276 | if cur >= 0: | |
|
277 | self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] | |
|
278 | ||
|
279 | def __setitem__(self, key, value): | |
|
280 | if not isinstance(key, str): | |
|
281 | raise TypeError("setitem: manifest keys must be a string.") | |
|
282 | if not isinstance(value, tuple) or len(value) != 2: | |
|
283 | raise TypeError("Manifest values must be a tuple of (node, flags).") | |
|
284 | hashval = value[0] | |
|
285 | if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: | |
|
286 | raise TypeError("node must be a 20-byte string") | |
|
287 | flags = value[1] | |
|
288 | if len(hashval) == 22: | |
|
289 | hashval = hashval[:-1] | |
|
290 | if not isinstance(flags, str) or len(flags) > 1: | |
|
291 | raise TypeError("flags must a 0 or 1 byte string, got %r", flags) | |
|
292 | needle, found = self.bsearch2(key) | |
|
293 | if found: | |
|
294 | # put the item | |
|
295 | pos = self.positions[needle] | |
|
296 | if pos < 0: | |
|
297 | self.extradata[-pos - 1] = (key, hashval, value[1]) | |
|
298 | else: | |
|
299 | # just don't bother | |
|
300 | self.extradata.append((key, hashval, value[1])) | |
|
301 | self.positions[needle] = -len(self.extradata) | |
|
302 | else: | |
|
303 | # not found, put it in with extra positions | |
|
304 | self.extradata.append((key, hashval, value[1])) | |
|
305 | self.positions = (self.positions[:needle] + [-len(self.extradata)] | |
|
306 | + self.positions[needle:]) | |
|
307 | self.extrainfo = (self.extrainfo[:needle] + [0] + | |
|
308 | self.extrainfo[needle:]) | |
|
133 | 309 | |
|
134 | 310 | def copy(self): |
|
135 | c = _lazymanifest('') | |
|
136 | c.update(self) | |
|
137 | return c | |
|
311 | # XXX call _compact like in C? | |
|
312 | return _lazymanifest(self.data, self.positions, self.extrainfo, | |
|
313 | self.extradata) | |
|
314 | ||
|
315 | def _compact(self): | |
|
316 | # hopefully not called TOO often | |
|
317 | if len(self.extradata) == 0: | |
|
318 | return | |
|
319 | l = [] | |
|
320 | last_cut = 0 | |
|
321 | i = 0 | |
|
322 | offset = 0 | |
|
323 | self.extrainfo = [0] * len(self.positions) | |
|
324 | while i < len(self.positions): | |
|
325 | if self.positions[i] >= 0: | |
|
326 | cur = self.positions[i] | |
|
327 | last_cut = cur | |
|
328 | while True: | |
|
329 | self.positions[i] = offset | |
|
330 | i += 1 | |
|
331 | if i == len(self.positions) or self.positions[i] < 0: | |
|
332 | break | |
|
333 | offset += self.positions[i] - cur | |
|
334 | cur = self.positions[i] | |
|
335 | end_cut = self.data.find('\n', cur) | |
|
336 | if end_cut != -1: | |
|
337 | end_cut += 1 | |
|
338 | offset += end_cut - cur | |
|
339 | l.append(self.data[last_cut:end_cut]) | |
|
340 | else: | |
|
341 | while i < len(self.positions) and self.positions[i] < 0: | |
|
342 | cur = self.positions[i] | |
|
343 | t = self.extradata[-cur - 1] | |
|
344 | l.append(self._pack(t)) | |
|
345 | self.positions[i] = offset | |
|
346 | if len(t[1]) > 20: | |
|
347 | self.extrainfo[i] = ord(t[1][21]) | |
|
348 | offset += len(l[-1]) | |
|
349 | i += 1 | |
|
350 | self.data = ''.join(l) | |
|
351 | self.extradata = [] | |
|
352 | ||
|
353 | def _pack(self, d): | |
|
354 | return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' | |
|
355 | ||
|
356 | def text(self): | |
|
357 | self._compact() | |
|
358 | return self.data | |
|
138 | 359 | |
|
139 | 360 | def diff(self, m2, clean=False): |
|
140 | 361 | '''Finds changes between the current manifest and m2.''' |
|
362 | # XXX think whether efficiency matters here | |
|
141 | 363 | diff = {} |
|
142 | 364 | |
|
143 |
for fn, e1 in self.iter |
|
|
365 | for fn, e1, flags in self.iterentries(): | |
|
144 | 366 | if fn not in m2: |
|
145 | diff[fn] = e1, (None, '') | |
|
367 | diff[fn] = (e1, flags), (None, '') | |
|
146 | 368 | else: |
|
147 | 369 | e2 = m2[fn] |
|
148 | if e1 != e2: | |
|
149 | diff[fn] = e1, e2 | |
|
370 | if (e1, flags) != e2: | |
|
371 | diff[fn] = (e1, flags), e2 | |
|
150 | 372 | elif clean: |
|
151 | 373 | diff[fn] = None |
|
152 | 374 | |
|
153 |
for fn, e2 in m2.iter |
|
|
375 | for fn, e2, flags in m2.iterentries(): | |
|
154 | 376 | if fn not in self: |
|
155 | diff[fn] = (None, ''), e2 | |
|
377 | diff[fn] = (None, ''), (e2, flags) | |
|
156 | 378 | |
|
157 | 379 | return diff |
|
158 | 380 | |
|
381 | def iterentries(self): | |
|
382 | return lazymanifestiterentries(self) | |
|
383 | ||
|
384 | def iterkeys(self): | |
|
385 | return lazymanifestiter(self) | |
|
386 | ||
|
387 | def __iter__(self): | |
|
388 | return lazymanifestiter(self) | |
|
389 | ||
|
390 | def __len__(self): | |
|
391 | return len(self.positions) | |
|
392 | ||
|
159 | 393 | def filtercopy(self, filterfn): |
|
394 | # XXX should be optimized | |
|
160 | 395 | c = _lazymanifest('') |
|
161 | 396 | for f, n, fl in self.iterentries(): |
|
162 | 397 | if filterfn(f): |
|
163 | 398 | c[f] = n, fl |
|
164 | 399 | return c |
|
165 | 400 | |
|
166 | def text(self): | |
|
167 | """Get the full data of this manifest as a bytestring.""" | |
|
168 | return _textv1(self.iterentries()) | |
|
169 | ||
|
170 | 401 | try: |
|
171 | 402 | _lazymanifest = parsers.lazymanifest |
|
172 | 403 | except AttributeError: |
@@ -111,8 +111,8 b' if modulepolicy not in policynocffi:' | |||
|
111 | 111 | def blocks(sa, sb): |
|
112 | 112 | a = ffi.new("struct bdiff_line**") |
|
113 | 113 | b = ffi.new("struct bdiff_line**") |
|
114 | ac = ffi.new("char[]", sa) | |
|
115 | bc = ffi.new("char[]", sb) | |
|
114 | ac = ffi.new("char[]", str(sa)) | |
|
115 | bc = ffi.new("char[]", str(sb)) | |
|
116 | 116 | l = ffi.new("struct bdiff_hunk*") |
|
117 | 117 | try: |
|
118 | 118 | an = lib.bdiff_splitlines(ac, len(sa), a) |
@@ -138,8 +138,8 b' if modulepolicy not in policynocffi:' | |||
|
138 | 138 | def bdiff(sa, sb): |
|
139 | 139 | a = ffi.new("struct bdiff_line**") |
|
140 | 140 | b = ffi.new("struct bdiff_line**") |
|
141 | ac = ffi.new("char[]", sa) | |
|
142 | bc = ffi.new("char[]", sb) | |
|
141 | ac = ffi.new("char[]", str(sa)) | |
|
142 | bc = ffi.new("char[]", str(sb)) | |
|
143 | 143 | l = ffi.new("struct bdiff_hunk*") |
|
144 | 144 | try: |
|
145 | 145 | an = lib.bdiff_splitlines(ac, len(sa), a) |
General Comments 0
You need to be logged in to leave comments.
Login now