Show More
@@ -35,6 +35,9 b' from .utils import (' | |||||
35 | parsers = policy.importmod(r'parsers') |
|
35 | parsers = policy.importmod(r'parsers') | |
36 | propertycache = util.propertycache |
|
36 | propertycache = util.propertycache | |
37 |
|
37 | |||
|
38 | # Allow tests to more easily test the alternate path in manifestdict.fastdelta() | |||
|
39 | FASTDELTA_TEXTDIFF_THRESHOLD = 1000 | |||
|
40 | ||||
38 | def _parse(data): |
|
41 | def _parse(data): | |
39 | # This method does a little bit of excessive-looking |
|
42 | # This method does a little bit of excessive-looking | |
40 | # precondition checking. This is so that the behavior of this |
|
43 | # precondition checking. This is so that the behavior of this | |
@@ -123,17 +126,36 b' def _cmp(a, b):' | |||||
123 | return (a > b) - (a < b) |
|
126 | return (a > b) - (a < b) | |
124 |
|
127 | |||
125 | class _lazymanifest(object): |
|
128 | class _lazymanifest(object): | |
126 | def __init__(self, data, positions=None, extrainfo=None, extradata=None): |
|
129 | """A pure python manifest backed by a byte string. It is supplimented with | |
|
130 | internal lists as it is modified, until it is compacted back to a pure byte | |||
|
131 | string. | |||
|
132 | ||||
|
133 | ``data`` is the initial manifest data. | |||
|
134 | ||||
|
135 | ``positions`` is a list of offsets, one per manifest entry. Positive | |||
|
136 | values are offsets into ``data``, negative values are offsets into the | |||
|
137 | ``extradata`` list. When an entry is removed, its entry is dropped from | |||
|
138 | ``positions``. The values are encoded such that when walking the list and | |||
|
139 | indexing into ``data`` or ``extradata`` as appropriate, the entries are | |||
|
140 | sorted by filename. | |||
|
141 | ||||
|
142 | ``extradata`` is a list of (key, hash, flags) for entries that were added or | |||
|
143 | modified since the manifest was created or compacted. | |||
|
144 | """ | |||
|
145 | def __init__(self, data, positions=None, extrainfo=None, extradata=None, | |||
|
146 | hasremovals=False): | |||
127 | if positions is None: |
|
147 | if positions is None: | |
128 | self.positions = self.findlines(data) |
|
148 | self.positions = self.findlines(data) | |
129 | self.extrainfo = [0] * len(self.positions) |
|
149 | self.extrainfo = [0] * len(self.positions) | |
130 | self.data = data |
|
150 | self.data = data | |
131 | self.extradata = [] |
|
151 | self.extradata = [] | |
|
152 | self.hasremovals = False | |||
132 | else: |
|
153 | else: | |
133 | self.positions = positions[:] |
|
154 | self.positions = positions[:] | |
134 | self.extrainfo = extrainfo[:] |
|
155 | self.extrainfo = extrainfo[:] | |
135 | self.extradata = extradata[:] |
|
156 | self.extradata = extradata[:] | |
136 | self.data = data |
|
157 | self.data = data | |
|
158 | self.hasremovals = hasremovals | |||
137 |
|
159 | |||
138 | def findlines(self, data): |
|
160 | def findlines(self, data): | |
139 | if not data: |
|
161 | if not data: | |
@@ -240,7 +262,10 b' class _lazymanifest(object):' | |||||
240 | self.positions = self.positions[:needle] + self.positions[needle + 1:] |
|
262 | self.positions = self.positions[:needle] + self.positions[needle + 1:] | |
241 | self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] |
|
263 | self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] | |
242 | if cur >= 0: |
|
264 | if cur >= 0: | |
|
265 | # This does NOT unsort the list as far as the search functions are | |||
|
266 | # concerned, as they only examine lines mapped by self.positions. | |||
243 | self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] |
|
267 | self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] | |
|
268 | self.hasremovals = True | |||
244 |
|
269 | |||
245 | def __setitem__(self, key, value): |
|
270 | def __setitem__(self, key, value): | |
246 | if not isinstance(key, bytes): |
|
271 | if not isinstance(key, bytes): | |
@@ -276,11 +301,11 b' class _lazymanifest(object):' | |||||
276 | def copy(self): |
|
301 | def copy(self): | |
277 | # XXX call _compact like in C? |
|
302 | # XXX call _compact like in C? | |
278 | return _lazymanifest(self.data, self.positions, self.extrainfo, |
|
303 | return _lazymanifest(self.data, self.positions, self.extrainfo, | |
279 | self.extradata) |
|
304 | self.extradata, self.hasremovals) | |
280 |
|
305 | |||
281 | def _compact(self): |
|
306 | def _compact(self): | |
282 | # hopefully not called TOO often |
|
307 | # hopefully not called TOO often | |
283 | if len(self.extradata) == 0: |
|
308 | if len(self.extradata) == 0 and not self.hasremovals: | |
284 | return |
|
309 | return | |
285 | l = [] |
|
310 | l = [] | |
286 | i = 0 |
|
311 | i = 0 | |
@@ -290,11 +315,25 b' class _lazymanifest(object):' | |||||
290 | if self.positions[i] >= 0: |
|
315 | if self.positions[i] >= 0: | |
291 | cur = self.positions[i] |
|
316 | cur = self.positions[i] | |
292 | last_cut = cur |
|
317 | last_cut = cur | |
|
318 | ||||
|
319 | # Collect all contiguous entries in the buffer at the current | |||
|
320 | # offset, breaking out only for added/modified items held in | |||
|
321 | # extradata, or a deleted line prior to the next position. | |||
293 | while True: |
|
322 | while True: | |
294 | self.positions[i] = offset |
|
323 | self.positions[i] = offset | |
295 | i += 1 |
|
324 | i += 1 | |
296 | if i == len(self.positions) or self.positions[i] < 0: |
|
325 | if i == len(self.positions) or self.positions[i] < 0: | |
297 | break |
|
326 | break | |
|
327 | ||||
|
328 | # A removed file has no positions[] entry, but does have an | |||
|
329 | # overwritten first byte. Break out and find the end of the | |||
|
330 | # current good entry/entries if there is a removed file | |||
|
331 | # before the next position. | |||
|
332 | if (self.hasremovals | |||
|
333 | and self.data.find('\n\x00', cur, | |||
|
334 | self.positions[i]) != -1): | |||
|
335 | break | |||
|
336 | ||||
298 | offset += self.positions[i] - cur |
|
337 | offset += self.positions[i] - cur | |
299 | cur = self.positions[i] |
|
338 | cur = self.positions[i] | |
300 | end_cut = self.data.find('\n', cur) |
|
339 | end_cut = self.data.find('\n', cur) | |
@@ -313,6 +352,7 b' class _lazymanifest(object):' | |||||
313 | offset += len(l[-1]) |
|
352 | offset += len(l[-1]) | |
314 | i += 1 |
|
353 | i += 1 | |
315 | self.data = ''.join(l) |
|
354 | self.data = ''.join(l) | |
|
355 | self.hasremovals = False | |||
316 | self.extradata = [] |
|
356 | self.extradata = [] | |
317 |
|
357 | |||
318 | def _pack(self, d): |
|
358 | def _pack(self, d): | |
@@ -558,7 +598,7 b' class manifestdict(object):' | |||||
558 | addbuf = util.buffer(base) |
|
598 | addbuf = util.buffer(base) | |
559 |
|
599 | |||
560 | changes = list(changes) |
|
600 | changes = list(changes) | |
561 |
if len(changes) < |
|
601 | if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD: | |
562 | # start with a readonly loop that finds the offset of |
|
602 | # start with a readonly loop that finds the offset of | |
563 | # each line and creates the deltas |
|
603 | # each line and creates the deltas | |
564 | for f, todelete in changes: |
|
604 | for f, todelete in changes: |
@@ -201,3 +201,78 b' hg update should warm the cache too' | |||||
201 | total cache data size 425 bytes, on-disk 425 bytes |
|
201 | total cache data size 425 bytes, on-disk 425 bytes | |
202 | $ hg log -r '0' --debug | grep 'manifest:' |
|
202 | $ hg log -r '0' --debug | grep 'manifest:' | |
203 | manifest: 0:fce2a30dedad1eef4da95ca1dc0004157aa527cf |
|
203 | manifest: 0:fce2a30dedad1eef4da95ca1dc0004157aa527cf | |
|
204 | ||||
|
205 | Test file removal (especially with pure). The tests are crafted such that there | |||
|
206 | will be contiguous spans of existing entries to ensure that is handled properly. | |||
|
207 | (In this case, a.txt, aa.txt and c.txt, cc.txt, and ccc.txt) | |||
|
208 | ||||
|
209 | $ cat > $TESTTMP/manifest.py <<EOF | |||
|
210 | > from mercurial import ( | |||
|
211 | > extensions, | |||
|
212 | > manifest, | |||
|
213 | > ) | |||
|
214 | > def extsetup(ui): | |||
|
215 | > manifest.FASTDELTA_TEXTDIFF_THRESHOLD = 0 | |||
|
216 | > EOF | |||
|
217 | $ cat >> $HGRCPATH <<EOF | |||
|
218 | > [extensions] | |||
|
219 | > manifest = $TESTTMP/manifest.py | |||
|
220 | > EOF | |||
|
221 | ||||
|
222 | Pure removes should actually remove all dropped entries | |||
|
223 | ||||
|
224 | $ hg init repo | |||
|
225 | $ cd repo | |||
|
226 | $ echo a > a.txt | |||
|
227 | $ echo aa > aa.txt | |||
|
228 | $ echo b > b.txt | |||
|
229 | $ echo c > c.txt | |||
|
230 | $ echo c > cc.txt | |||
|
231 | $ echo c > ccc.txt | |||
|
232 | $ echo b > d.txt | |||
|
233 | $ echo c > e.txt | |||
|
234 | $ hg ci -Aqm 'a-e' | |||
|
235 | ||||
|
236 | $ hg rm b.txt d.txt | |||
|
237 | $ hg ci -m 'remove b and d' | |||
|
238 | ||||
|
239 | $ hg debugdata -m 1 | |||
|
240 | a.txt\x00b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (esc) | |||
|
241 | aa.txt\x00a4bdc161c8fbb523c9a60409603f8710ff49a571 (esc) | |||
|
242 | c.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
243 | cc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
244 | ccc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
245 | e.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
246 | ||||
|
247 | $ hg up -qC . | |||
|
248 | ||||
|
249 | $ hg verify | |||
|
250 | checking changesets | |||
|
251 | checking manifests | |||
|
252 | crosschecking files in changesets and manifests | |||
|
253 | checking files | |||
|
254 | checked 2 changesets with 8 changes to 8 files | |||
|
255 | ||||
|
256 | $ hg rollback -q --config ui.rollback=True | |||
|
257 | $ hg rm b.txt d.txt | |||
|
258 | $ echo bb > bb.txt | |||
|
259 | ||||
|
260 | A mix of adds and removes should remove all dropped entries. | |||
|
261 | ||||
|
262 | $ hg ci -Aqm 'remove b and d; add bb' | |||
|
263 | ||||
|
264 | $ hg debugdata -m 1 | |||
|
265 | a.txt\x00b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (esc) | |||
|
266 | aa.txt\x00a4bdc161c8fbb523c9a60409603f8710ff49a571 (esc) | |||
|
267 | bb.txt\x0004c6faf8a9fdd848a5304dfc1704749a374dff44 (esc) | |||
|
268 | c.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
269 | cc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
270 | ccc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
271 | e.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) | |||
|
272 | ||||
|
273 | $ hg verify | |||
|
274 | checking changesets | |||
|
275 | checking manifests | |||
|
276 | crosschecking files in changesets and manifests | |||
|
277 | checking files | |||
|
278 | checked 2 changesets with 9 changes to 9 files |
General Comments 0
You need to be logged in to leave comments.
Login now