##// END OF EJS Templates
manifest: add dirs() to manifestdict...
Drew Gottlieb -
r24322:f263814c default
parent child Browse files
Show More
@@ -1,395 +1,403 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util
9 import mdiff, parsers, error, revlog, util, scmutil
10 import array, struct
10 import array, struct
11
11
12 propertycache = util.propertycache
12
13
13 class _lazymanifest(dict):
14 class _lazymanifest(dict):
14 """This is the pure implementation of lazymanifest.
15 """This is the pure implementation of lazymanifest.
15
16
16 It has not been optimized *at all* and is not lazy.
17 It has not been optimized *at all* and is not lazy.
17 """
18 """
18
19
19 def __init__(self, data):
20 def __init__(self, data):
20 # This init method does a little bit of excessive-looking
21 # This init method does a little bit of excessive-looking
21 # precondition checking. This is so that the behavior of this
22 # precondition checking. This is so that the behavior of this
22 # class exactly matches its C counterpart to try and help
23 # class exactly matches its C counterpart to try and help
23 # prevent surprise breakage for anyone that develops against
24 # prevent surprise breakage for anyone that develops against
24 # the pure version.
25 # the pure version.
25 if data and data[-1] != '\n':
26 if data and data[-1] != '\n':
26 raise ValueError('Manifest did not end in a newline.')
27 raise ValueError('Manifest did not end in a newline.')
27 dict.__init__(self)
28 dict.__init__(self)
28 prev = None
29 prev = None
29 for l in data.splitlines():
30 for l in data.splitlines():
30 if prev is not None and prev > l:
31 if prev is not None and prev > l:
31 raise ValueError('Manifest lines not in sorted order.')
32 raise ValueError('Manifest lines not in sorted order.')
32 prev = l
33 prev = l
33 f, n = l.split('\0')
34 f, n = l.split('\0')
34 if len(n) > 40:
35 if len(n) > 40:
35 self[f] = revlog.bin(n[:40]), n[40:]
36 self[f] = revlog.bin(n[:40]), n[40:]
36 else:
37 else:
37 self[f] = revlog.bin(n), ''
38 self[f] = revlog.bin(n), ''
38
39
39 def __setitem__(self, k, v):
40 def __setitem__(self, k, v):
40 node, flag = v
41 node, flag = v
41 assert node is not None
42 assert node is not None
42 if len(node) > 21:
43 if len(node) > 21:
43 node = node[:21] # match c implementation behavior
44 node = node[:21] # match c implementation behavior
44 dict.__setitem__(self, k, (node, flag))
45 dict.__setitem__(self, k, (node, flag))
45
46
46 def __iter__(self):
47 def __iter__(self):
47 return iter(sorted(dict.keys(self)))
48 return iter(sorted(dict.keys(self)))
48
49
49 def iterkeys(self):
50 def iterkeys(self):
50 return iter(sorted(dict.keys(self)))
51 return iter(sorted(dict.keys(self)))
51
52
52 def iterentries(self):
53 def iterentries(self):
53 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
54 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
54
55
55 def copy(self):
56 def copy(self):
56 c = _lazymanifest('')
57 c = _lazymanifest('')
57 c.update(self)
58 c.update(self)
58 return c
59 return c
59
60
60 def diff(self, m2, clean=False):
61 def diff(self, m2, clean=False):
61 '''Finds changes between the current manifest and m2.'''
62 '''Finds changes between the current manifest and m2.'''
62 diff = {}
63 diff = {}
63
64
64 for fn, e1 in self.iteritems():
65 for fn, e1 in self.iteritems():
65 if fn not in m2:
66 if fn not in m2:
66 diff[fn] = e1, (None, '')
67 diff[fn] = e1, (None, '')
67 else:
68 else:
68 e2 = m2[fn]
69 e2 = m2[fn]
69 if e1 != e2:
70 if e1 != e2:
70 diff[fn] = e1, e2
71 diff[fn] = e1, e2
71 elif clean:
72 elif clean:
72 diff[fn] = None
73 diff[fn] = None
73
74
74 for fn, e2 in m2.iteritems():
75 for fn, e2 in m2.iteritems():
75 if fn not in self:
76 if fn not in self:
76 diff[fn] = (None, ''), e2
77 diff[fn] = (None, ''), e2
77
78
78 return diff
79 return diff
79
80
80 def filtercopy(self, filterfn):
81 def filtercopy(self, filterfn):
81 c = _lazymanifest('')
82 c = _lazymanifest('')
82 for f, n, fl in self.iterentries():
83 for f, n, fl in self.iterentries():
83 if filterfn(f):
84 if filterfn(f):
84 c[f] = n, fl
85 c[f] = n, fl
85 return c
86 return c
86
87
87 def text(self):
88 def text(self):
88 """Get the full data of this manifest as a bytestring."""
89 """Get the full data of this manifest as a bytestring."""
89 fl = sorted(self.iterentries())
90 fl = sorted(self.iterentries())
90
91
91 _hex = revlog.hex
92 _hex = revlog.hex
92 # if this is changed to support newlines in filenames,
93 # if this is changed to support newlines in filenames,
93 # be sure to check the templates/ dir again (especially *-raw.tmpl)
94 # be sure to check the templates/ dir again (especially *-raw.tmpl)
94 return ''.join("%s\0%s%s\n" % (
95 return ''.join("%s\0%s%s\n" % (
95 f, _hex(n[:20]), flag) for f, n, flag in fl)
96 f, _hex(n[:20]), flag) for f, n, flag in fl)
96
97
97 try:
98 try:
98 _lazymanifest = parsers.lazymanifest
99 _lazymanifest = parsers.lazymanifest
99 except AttributeError:
100 except AttributeError:
100 pass
101 pass
101
102
102 class manifestdict(object):
103 class manifestdict(object):
103 def __init__(self, data=''):
104 def __init__(self, data=''):
104 self._lm = _lazymanifest(data)
105 self._lm = _lazymanifest(data)
105
106
106 def __getitem__(self, key):
107 def __getitem__(self, key):
107 return self._lm[key][0]
108 return self._lm[key][0]
108
109
109 def find(self, key):
110 def find(self, key):
110 return self._lm[key]
111 return self._lm[key]
111
112
112 def __len__(self):
113 def __len__(self):
113 return len(self._lm)
114 return len(self._lm)
114
115
115 def __setitem__(self, key, node):
116 def __setitem__(self, key, node):
116 self._lm[key] = node, self.flags(key, '')
117 self._lm[key] = node, self.flags(key, '')
117
118
118 def __contains__(self, key):
119 def __contains__(self, key):
119 return key in self._lm
120 return key in self._lm
120
121
121 def __delitem__(self, key):
122 def __delitem__(self, key):
122 del self._lm[key]
123 del self._lm[key]
123
124
124 def __iter__(self):
125 def __iter__(self):
125 return self._lm.__iter__()
126 return self._lm.__iter__()
126
127
127 def iterkeys(self):
128 def iterkeys(self):
128 return self._lm.iterkeys()
129 return self._lm.iterkeys()
129
130
130 def keys(self):
131 def keys(self):
131 return list(self.iterkeys())
132 return list(self.iterkeys())
132
133
133 def intersectfiles(self, files):
134 def intersectfiles(self, files):
134 '''make a new lazymanifest with the intersection of self with files
135 '''make a new lazymanifest with the intersection of self with files
135
136
136 The algorithm assumes that files is much smaller than self.'''
137 The algorithm assumes that files is much smaller than self.'''
137 ret = manifestdict()
138 ret = manifestdict()
138 lm = self._lm
139 lm = self._lm
139 for fn in files:
140 for fn in files:
140 if fn in lm:
141 if fn in lm:
141 ret._lm[fn] = self._lm[fn]
142 ret._lm[fn] = self._lm[fn]
142 return ret
143 return ret
143
144
144 def filesnotin(self, m2):
145 def filesnotin(self, m2):
145 '''Set of files in this manifest that are not in the other'''
146 '''Set of files in this manifest that are not in the other'''
146 files = set(self)
147 files = set(self)
147 files.difference_update(m2)
148 files.difference_update(m2)
148 return files
149 return files
149
150
151 @propertycache
152 def _dirs(self):
153 return scmutil.dirs(self)
154
155 def dirs(self):
156 return self._dirs
157
150 def matches(self, match):
158 def matches(self, match):
151 '''generate a new manifest filtered by the match argument'''
159 '''generate a new manifest filtered by the match argument'''
152 if match.always():
160 if match.always():
153 return self.copy()
161 return self.copy()
154
162
155 files = match.files()
163 files = match.files()
156 if (match.matchfn == match.exact or
164 if (match.matchfn == match.exact or
157 (not match.anypats() and util.all(fn in self for fn in files))):
165 (not match.anypats() and util.all(fn in self for fn in files))):
158 return self.intersectfiles(files)
166 return self.intersectfiles(files)
159
167
160 lm = manifestdict('')
168 lm = manifestdict('')
161 lm._lm = self._lm.filtercopy(match)
169 lm._lm = self._lm.filtercopy(match)
162 return lm
170 return lm
163
171
164 def diff(self, m2, clean=False):
172 def diff(self, m2, clean=False):
165 '''Finds changes between the current manifest and m2.
173 '''Finds changes between the current manifest and m2.
166
174
167 Args:
175 Args:
168 m2: the manifest to which this manifest should be compared.
176 m2: the manifest to which this manifest should be compared.
169 clean: if true, include files unchanged between these manifests
177 clean: if true, include files unchanged between these manifests
170 with a None value in the returned dictionary.
178 with a None value in the returned dictionary.
171
179
172 The result is returned as a dict with filename as key and
180 The result is returned as a dict with filename as key and
173 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
181 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
174 nodeid in the current/other manifest and fl1/fl2 is the flag
182 nodeid in the current/other manifest and fl1/fl2 is the flag
175 in the current/other manifest. Where the file does not exist,
183 in the current/other manifest. Where the file does not exist,
176 the nodeid will be None and the flags will be the empty
184 the nodeid will be None and the flags will be the empty
177 string.
185 string.
178 '''
186 '''
179 return self._lm.diff(m2._lm, clean)
187 return self._lm.diff(m2._lm, clean)
180
188
181 def setflag(self, key, flag):
189 def setflag(self, key, flag):
182 self._lm[key] = self[key], flag
190 self._lm[key] = self[key], flag
183
191
184 def get(self, key, default=None):
192 def get(self, key, default=None):
185 try:
193 try:
186 return self._lm[key][0]
194 return self._lm[key][0]
187 except KeyError:
195 except KeyError:
188 return default
196 return default
189
197
190 def flags(self, key, default=''):
198 def flags(self, key, default=''):
191 try:
199 try:
192 return self._lm[key][1]
200 return self._lm[key][1]
193 except KeyError:
201 except KeyError:
194 return default
202 return default
195
203
196 def copy(self):
204 def copy(self):
197 c = manifestdict('')
205 c = manifestdict('')
198 c._lm = self._lm.copy()
206 c._lm = self._lm.copy()
199 return c
207 return c
200
208
201 def iteritems(self):
209 def iteritems(self):
202 return (x[:2] for x in self._lm.iterentries())
210 return (x[:2] for x in self._lm.iterentries())
203
211
204 def text(self):
212 def text(self):
205 return self._lm.text()
213 return self._lm.text()
206
214
207 def fastdelta(self, base, changes):
215 def fastdelta(self, base, changes):
208 """Given a base manifest text as an array.array and a list of changes
216 """Given a base manifest text as an array.array and a list of changes
209 relative to that text, compute a delta that can be used by revlog.
217 relative to that text, compute a delta that can be used by revlog.
210 """
218 """
211 delta = []
219 delta = []
212 dstart = None
220 dstart = None
213 dend = None
221 dend = None
214 dline = [""]
222 dline = [""]
215 start = 0
223 start = 0
216 # zero copy representation of base as a buffer
224 # zero copy representation of base as a buffer
217 addbuf = util.buffer(base)
225 addbuf = util.buffer(base)
218
226
219 # start with a readonly loop that finds the offset of
227 # start with a readonly loop that finds the offset of
220 # each line and creates the deltas
228 # each line and creates the deltas
221 for f, todelete in changes:
229 for f, todelete in changes:
222 # bs will either be the index of the item or the insert point
230 # bs will either be the index of the item or the insert point
223 start, end = _msearch(addbuf, f, start)
231 start, end = _msearch(addbuf, f, start)
224 if not todelete:
232 if not todelete:
225 h, fl = self._lm[f]
233 h, fl = self._lm[f]
226 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
234 l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
227 else:
235 else:
228 if start == end:
236 if start == end:
229 # item we want to delete was not found, error out
237 # item we want to delete was not found, error out
230 raise AssertionError(
238 raise AssertionError(
231 _("failed to remove %s from manifest") % f)
239 _("failed to remove %s from manifest") % f)
232 l = ""
240 l = ""
233 if dstart is not None and dstart <= start and dend >= start:
241 if dstart is not None and dstart <= start and dend >= start:
234 if dend < end:
242 if dend < end:
235 dend = end
243 dend = end
236 if l:
244 if l:
237 dline.append(l)
245 dline.append(l)
238 else:
246 else:
239 if dstart is not None:
247 if dstart is not None:
240 delta.append([dstart, dend, "".join(dline)])
248 delta.append([dstart, dend, "".join(dline)])
241 dstart = start
249 dstart = start
242 dend = end
250 dend = end
243 dline = [l]
251 dline = [l]
244
252
245 if dstart is not None:
253 if dstart is not None:
246 delta.append([dstart, dend, "".join(dline)])
254 delta.append([dstart, dend, "".join(dline)])
247 # apply the delta to the base, and get a delta for addrevision
255 # apply the delta to the base, and get a delta for addrevision
248 deltatext, arraytext = _addlistdelta(base, delta)
256 deltatext, arraytext = _addlistdelta(base, delta)
249 return arraytext, deltatext
257 return arraytext, deltatext
250
258
251 def _msearch(m, s, lo=0, hi=None):
259 def _msearch(m, s, lo=0, hi=None):
252 '''return a tuple (start, end) that says where to find s within m.
260 '''return a tuple (start, end) that says where to find s within m.
253
261
254 If the string is found m[start:end] are the line containing
262 If the string is found m[start:end] are the line containing
255 that string. If start == end the string was not found and
263 that string. If start == end the string was not found and
256 they indicate the proper sorted insertion point.
264 they indicate the proper sorted insertion point.
257
265
258 m should be a buffer or a string
266 m should be a buffer or a string
259 s is a string'''
267 s is a string'''
260 def advance(i, c):
268 def advance(i, c):
261 while i < lenm and m[i] != c:
269 while i < lenm and m[i] != c:
262 i += 1
270 i += 1
263 return i
271 return i
264 if not s:
272 if not s:
265 return (lo, lo)
273 return (lo, lo)
266 lenm = len(m)
274 lenm = len(m)
267 if not hi:
275 if not hi:
268 hi = lenm
276 hi = lenm
269 while lo < hi:
277 while lo < hi:
270 mid = (lo + hi) // 2
278 mid = (lo + hi) // 2
271 start = mid
279 start = mid
272 while start > 0 and m[start - 1] != '\n':
280 while start > 0 and m[start - 1] != '\n':
273 start -= 1
281 start -= 1
274 end = advance(start, '\0')
282 end = advance(start, '\0')
275 if m[start:end] < s:
283 if m[start:end] < s:
276 # we know that after the null there are 40 bytes of sha1
284 # we know that after the null there are 40 bytes of sha1
277 # this translates to the bisect lo = mid + 1
285 # this translates to the bisect lo = mid + 1
278 lo = advance(end + 40, '\n') + 1
286 lo = advance(end + 40, '\n') + 1
279 else:
287 else:
280 # this translates to the bisect hi = mid
288 # this translates to the bisect hi = mid
281 hi = start
289 hi = start
282 end = advance(lo, '\0')
290 end = advance(lo, '\0')
283 found = m[lo:end]
291 found = m[lo:end]
284 if s == found:
292 if s == found:
285 # we know that after the null there are 40 bytes of sha1
293 # we know that after the null there are 40 bytes of sha1
286 end = advance(end + 40, '\n')
294 end = advance(end + 40, '\n')
287 return (lo, end + 1)
295 return (lo, end + 1)
288 else:
296 else:
289 return (lo, lo)
297 return (lo, lo)
290
298
291 def _checkforbidden(l):
299 def _checkforbidden(l):
292 """Check filenames for illegal characters."""
300 """Check filenames for illegal characters."""
293 for f in l:
301 for f in l:
294 if '\n' in f or '\r' in f:
302 if '\n' in f or '\r' in f:
295 raise error.RevlogError(
303 raise error.RevlogError(
296 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
304 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
297
305
298
306
299 # apply the changes collected during the bisect loop to our addlist
307 # apply the changes collected during the bisect loop to our addlist
300 # return a delta suitable for addrevision
308 # return a delta suitable for addrevision
301 def _addlistdelta(addlist, x):
309 def _addlistdelta(addlist, x):
302 # for large addlist arrays, building a new array is cheaper
310 # for large addlist arrays, building a new array is cheaper
303 # than repeatedly modifying the existing one
311 # than repeatedly modifying the existing one
304 currentposition = 0
312 currentposition = 0
305 newaddlist = array.array('c')
313 newaddlist = array.array('c')
306
314
307 for start, end, content in x:
315 for start, end, content in x:
308 newaddlist += addlist[currentposition:start]
316 newaddlist += addlist[currentposition:start]
309 if content:
317 if content:
310 newaddlist += array.array('c', content)
318 newaddlist += array.array('c', content)
311
319
312 currentposition = end
320 currentposition = end
313
321
314 newaddlist += addlist[currentposition:]
322 newaddlist += addlist[currentposition:]
315
323
316 deltatext = "".join(struct.pack(">lll", start, end, len(content))
324 deltatext = "".join(struct.pack(">lll", start, end, len(content))
317 + content for start, end, content in x)
325 + content for start, end, content in x)
318 return deltatext, newaddlist
326 return deltatext, newaddlist
319
327
320 class manifest(revlog.revlog):
328 class manifest(revlog.revlog):
321 def __init__(self, opener):
329 def __init__(self, opener):
322 # During normal operations, we expect to deal with not more than four
330 # During normal operations, we expect to deal with not more than four
323 # revs at a time (such as during commit --amend). When rebasing large
331 # revs at a time (such as during commit --amend). When rebasing large
324 # stacks of commits, the number can go up, hence the config knob below.
332 # stacks of commits, the number can go up, hence the config knob below.
325 cachesize = 4
333 cachesize = 4
326 opts = getattr(opener, 'options', None)
334 opts = getattr(opener, 'options', None)
327 if opts is not None:
335 if opts is not None:
328 cachesize = opts.get('manifestcachesize', cachesize)
336 cachesize = opts.get('manifestcachesize', cachesize)
329 self._mancache = util.lrucachedict(cachesize)
337 self._mancache = util.lrucachedict(cachesize)
330 revlog.revlog.__init__(self, opener, "00manifest.i")
338 revlog.revlog.__init__(self, opener, "00manifest.i")
331
339
332 def readdelta(self, node):
340 def readdelta(self, node):
333 r = self.rev(node)
341 r = self.rev(node)
334 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
342 d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
335 return manifestdict(d)
343 return manifestdict(d)
336
344
337 def readfast(self, node):
345 def readfast(self, node):
338 '''use the faster of readdelta or read'''
346 '''use the faster of readdelta or read'''
339 r = self.rev(node)
347 r = self.rev(node)
340 deltaparent = self.deltaparent(r)
348 deltaparent = self.deltaparent(r)
341 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
349 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
342 return self.readdelta(node)
350 return self.readdelta(node)
343 return self.read(node)
351 return self.read(node)
344
352
345 def read(self, node):
353 def read(self, node):
346 if node == revlog.nullid:
354 if node == revlog.nullid:
347 return manifestdict() # don't upset local cache
355 return manifestdict() # don't upset local cache
348 if node in self._mancache:
356 if node in self._mancache:
349 return self._mancache[node][0]
357 return self._mancache[node][0]
350 text = self.revision(node)
358 text = self.revision(node)
351 arraytext = array.array('c', text)
359 arraytext = array.array('c', text)
352 m = manifestdict(text)
360 m = manifestdict(text)
353 self._mancache[node] = (m, arraytext)
361 self._mancache[node] = (m, arraytext)
354 return m
362 return m
355
363
356 def find(self, node, f):
364 def find(self, node, f):
357 '''look up entry for a single file efficiently.
365 '''look up entry for a single file efficiently.
358 return (node, flags) pair if found, (None, None) if not.'''
366 return (node, flags) pair if found, (None, None) if not.'''
359 m = self.read(node)
367 m = self.read(node)
360 try:
368 try:
361 return m.find(f)
369 return m.find(f)
362 except KeyError:
370 except KeyError:
363 return None, None
371 return None, None
364
372
365 def add(self, m, transaction, link, p1, p2, added, removed):
373 def add(self, m, transaction, link, p1, p2, added, removed):
366 if p1 in self._mancache:
374 if p1 in self._mancache:
367 # If our first parent is in the manifest cache, we can
375 # If our first parent is in the manifest cache, we can
368 # compute a delta here using properties we know about the
376 # compute a delta here using properties we know about the
369 # manifest up-front, which may save time later for the
377 # manifest up-front, which may save time later for the
370 # revlog layer.
378 # revlog layer.
371
379
372 _checkforbidden(added)
380 _checkforbidden(added)
373 # combine the changed lists into one list for sorting
381 # combine the changed lists into one list for sorting
374 work = [(x, False) for x in added]
382 work = [(x, False) for x in added]
375 work.extend((x, True) for x in removed)
383 work.extend((x, True) for x in removed)
376 # this could use heapq.merge() (from Python 2.6+) or equivalent
384 # this could use heapq.merge() (from Python 2.6+) or equivalent
377 # since the lists are already sorted
385 # since the lists are already sorted
378 work.sort()
386 work.sort()
379
387
380 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
388 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
381 cachedelta = self.rev(p1), deltatext
389 cachedelta = self.rev(p1), deltatext
382 text = util.buffer(arraytext)
390 text = util.buffer(arraytext)
383 else:
391 else:
384 # The first parent manifest isn't already loaded, so we'll
392 # The first parent manifest isn't already loaded, so we'll
385 # just encode a fulltext of the manifest and pass that
393 # just encode a fulltext of the manifest and pass that
386 # through to the revlog layer, and let it handle the delta
394 # through to the revlog layer, and let it handle the delta
387 # process.
395 # process.
388 text = m.text()
396 text = m.text()
389 arraytext = array.array('c', text)
397 arraytext = array.array('c', text)
390 cachedelta = None
398 cachedelta = None
391
399
392 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
400 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
393 self._mancache[n] = (m, arraytext)
401 self._mancache[n] = (m, arraytext)
394
402
395 return n
403 return n
General Comments 0
You need to be logged in to leave comments. Login now