##// END OF EJS Templates
manifest: move parsing functions up in file...
Augie Fackler -
r24223:b4df0d0c default
parent child Browse files
Show More
@@ -1,318 +1,318
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util
9 import mdiff, parsers, error, revlog, util
10 import array, struct
10 import array, struct
11
11
12 # Pure Python fallback
13 def _parsemanifest(mfdict, fdict, lines):
14 bin = revlog.bin
15 for l in lines.splitlines():
16 f, n = l.split('\0')
17 if len(n) > 40:
18 fdict[f] = n[40:]
19 mfdict[f] = bin(n[:40])
20 else:
21 mfdict[f] = bin(n)
22
23 def _parse(lines):
24 mfdict = manifestdict()
25 try:
26 parsers.parse_manifest(mfdict, mfdict._flags, lines)
27 except AttributeError:
28 _parsemanifest(mfdict, mfdict._flags, lines)
29 return mfdict
30
12 class manifestdict(dict):
31 class manifestdict(dict):
13 def __init__(self):
32 def __init__(self):
14 self._flags = {}
33 self._flags = {}
15 def __setitem__(self, k, v):
34 def __setitem__(self, k, v):
16 assert v is not None
35 assert v is not None
17 dict.__setitem__(self, k, v)
36 dict.__setitem__(self, k, v)
18 def flags(self, f):
37 def flags(self, f):
19 return self._flags.get(f, "")
38 return self._flags.get(f, "")
20 def setflag(self, f, flags):
39 def setflag(self, f, flags):
21 """Set the flags (symlink, executable) for path f."""
40 """Set the flags (symlink, executable) for path f."""
22 self._flags[f] = flags
41 self._flags[f] = flags
23 def copy(self):
42 def copy(self):
24 copy = manifestdict()
43 copy = manifestdict()
25 dict.__init__(copy, self)
44 dict.__init__(copy, self)
26 copy._flags = dict.copy(self._flags)
45 copy._flags = dict.copy(self._flags)
27 return copy
46 return copy
28 def intersectfiles(self, files):
47 def intersectfiles(self, files):
29 '''make a new manifestdict with the intersection of self with files
48 '''make a new manifestdict with the intersection of self with files
30
49
31 The algorithm assumes that files is much smaller than self.'''
50 The algorithm assumes that files is much smaller than self.'''
32 ret = manifestdict()
51 ret = manifestdict()
33 for fn in files:
52 for fn in files:
34 if fn in self:
53 if fn in self:
35 ret[fn] = self[fn]
54 ret[fn] = self[fn]
36 flags = self._flags.get(fn, None)
55 flags = self._flags.get(fn, None)
37 if flags:
56 if flags:
38 ret._flags[fn] = flags
57 ret._flags[fn] = flags
39 return ret
58 return ret
40
59
41 def filesnotin(self, m2):
60 def filesnotin(self, m2):
42 '''Set of files in this manifest that are not in the other'''
61 '''Set of files in this manifest that are not in the other'''
43 files = set(self.iterkeys())
62 files = set(self.iterkeys())
44 files.difference_update(m2.iterkeys())
63 files.difference_update(m2.iterkeys())
45 return files
64 return files
46
65
47 def matches(self, match):
66 def matches(self, match):
48 '''generate a new manifest filtered by the match argument'''
67 '''generate a new manifest filtered by the match argument'''
49 if match.always():
68 if match.always():
50 return self.copy()
69 return self.copy()
51
70
52 files = match.files()
71 files = match.files()
53 if (match.matchfn == match.exact or
72 if (match.matchfn == match.exact or
54 (not match.anypats() and util.all(fn in self for fn in files))):
73 (not match.anypats() and util.all(fn in self for fn in files))):
55 return self.intersectfiles(files)
74 return self.intersectfiles(files)
56
75
57 m = self.copy()
76 m = self.copy()
58 for fn in m.keys():
77 for fn in m.keys():
59 if not match(fn):
78 if not match(fn):
60 del m[fn]
79 del m[fn]
61 return m
80 return m
62
81
63 def diff(self, m2, clean=False):
82 def diff(self, m2, clean=False):
64 '''Finds changes between the current manifest and m2.
83 '''Finds changes between the current manifest and m2.
65
84
66 Args:
85 Args:
67 m2: the manifest to which this manifest should be compared.
86 m2: the manifest to which this manifest should be compared.
68 clean: if true, include files unchanged between these manifests
87 clean: if true, include files unchanged between these manifests
69 with a None value in the returned dictionary.
88 with a None value in the returned dictionary.
70
89
71 The result is returned as a dict with filename as key and
90 The result is returned as a dict with filename as key and
72 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
91 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
73 nodeid in the current/other manifest and fl1/fl2 is the flag
92 nodeid in the current/other manifest and fl1/fl2 is the flag
74 in the current/other manifest. Where the file does not exist,
93 in the current/other manifest. Where the file does not exist,
75 the nodeid will be None and the flags will be the empty
94 the nodeid will be None and the flags will be the empty
76 string.
95 string.
77 '''
96 '''
78 diff = {}
97 diff = {}
79
98
80 for fn, n1 in self.iteritems():
99 for fn, n1 in self.iteritems():
81 fl1 = self._flags.get(fn, '')
100 fl1 = self._flags.get(fn, '')
82 n2 = m2.get(fn, None)
101 n2 = m2.get(fn, None)
83 fl2 = m2._flags.get(fn, '')
102 fl2 = m2._flags.get(fn, '')
84 if n2 is None:
103 if n2 is None:
85 fl2 = ''
104 fl2 = ''
86 if n1 != n2 or fl1 != fl2:
105 if n1 != n2 or fl1 != fl2:
87 diff[fn] = ((n1, fl1), (n2, fl2))
106 diff[fn] = ((n1, fl1), (n2, fl2))
88 elif clean:
107 elif clean:
89 diff[fn] = None
108 diff[fn] = None
90
109
91 for fn, n2 in m2.iteritems():
110 for fn, n2 in m2.iteritems():
92 if fn not in self:
111 if fn not in self:
93 fl2 = m2._flags.get(fn, '')
112 fl2 = m2._flags.get(fn, '')
94 diff[fn] = ((None, ''), (n2, fl2))
113 diff[fn] = ((None, ''), (n2, fl2))
95
114
96 return diff
115 return diff
97
116
98 def text(self):
117 def text(self):
99 """Get the full data of this manifest as a bytestring."""
118 """Get the full data of this manifest as a bytestring."""
100 fl = sorted(self)
119 fl = sorted(self)
101 _checkforbidden(fl)
120 _checkforbidden(fl)
102
121
103 hex, flags = revlog.hex, self.flags
122 hex, flags = revlog.hex, self.flags
104 # if this is changed to support newlines in filenames,
123 # if this is changed to support newlines in filenames,
105 # be sure to check the templates/ dir again (especially *-raw.tmpl)
124 # be sure to check the templates/ dir again (especially *-raw.tmpl)
106 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
125 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
107
126
108 def fastdelta(self, base, changes):
127 def fastdelta(self, base, changes):
109 """Given a base manifest text as an array.array and a list of changes
128 """Given a base manifest text as an array.array and a list of changes
110 relative to that text, compute a delta that can be used by revlog.
129 relative to that text, compute a delta that can be used by revlog.
111 """
130 """
112 delta = []
131 delta = []
113 dstart = None
132 dstart = None
114 dend = None
133 dend = None
115 dline = [""]
134 dline = [""]
116 start = 0
135 start = 0
117 # zero copy representation of base as a buffer
136 # zero copy representation of base as a buffer
118 addbuf = util.buffer(base)
137 addbuf = util.buffer(base)
119
138
120 # start with a readonly loop that finds the offset of
139 # start with a readonly loop that finds the offset of
121 # each line and creates the deltas
140 # each line and creates the deltas
122 for f, todelete in changes:
141 for f, todelete in changes:
123 # bs will either be the index of the item or the insert point
142 # bs will either be the index of the item or the insert point
124 start, end = _msearch(addbuf, f, start)
143 start, end = _msearch(addbuf, f, start)
125 if not todelete:
144 if not todelete:
126 l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
145 l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
127 else:
146 else:
128 if start == end:
147 if start == end:
129 # item we want to delete was not found, error out
148 # item we want to delete was not found, error out
130 raise AssertionError(
149 raise AssertionError(
131 _("failed to remove %s from manifest") % f)
150 _("failed to remove %s from manifest") % f)
132 l = ""
151 l = ""
133 if dstart is not None and dstart <= start and dend >= start:
152 if dstart is not None and dstart <= start and dend >= start:
134 if dend < end:
153 if dend < end:
135 dend = end
154 dend = end
136 if l:
155 if l:
137 dline.append(l)
156 dline.append(l)
138 else:
157 else:
139 if dstart is not None:
158 if dstart is not None:
140 delta.append([dstart, dend, "".join(dline)])
159 delta.append([dstart, dend, "".join(dline)])
141 dstart = start
160 dstart = start
142 dend = end
161 dend = end
143 dline = [l]
162 dline = [l]
144
163
145 if dstart is not None:
164 if dstart is not None:
146 delta.append([dstart, dend, "".join(dline)])
165 delta.append([dstart, dend, "".join(dline)])
147 # apply the delta to the base, and get a delta for addrevision
166 # apply the delta to the base, and get a delta for addrevision
148 deltatext, arraytext = _addlistdelta(base, delta)
167 deltatext, arraytext = _addlistdelta(base, delta)
149 return arraytext, deltatext
168 return arraytext, deltatext
150
169
151 def _msearch(m, s, lo=0, hi=None):
170 def _msearch(m, s, lo=0, hi=None):
152 '''return a tuple (start, end) that says where to find s within m.
171 '''return a tuple (start, end) that says where to find s within m.
153
172
154 If the string is found m[start:end] are the line containing
173 If the string is found m[start:end] are the line containing
155 that string. If start == end the string was not found and
174 that string. If start == end the string was not found and
156 they indicate the proper sorted insertion point.
175 they indicate the proper sorted insertion point.
157
176
158 m should be a buffer or a string
177 m should be a buffer or a string
159 s is a string'''
178 s is a string'''
160 def advance(i, c):
179 def advance(i, c):
161 while i < lenm and m[i] != c:
180 while i < lenm and m[i] != c:
162 i += 1
181 i += 1
163 return i
182 return i
164 if not s:
183 if not s:
165 return (lo, lo)
184 return (lo, lo)
166 lenm = len(m)
185 lenm = len(m)
167 if not hi:
186 if not hi:
168 hi = lenm
187 hi = lenm
169 while lo < hi:
188 while lo < hi:
170 mid = (lo + hi) // 2
189 mid = (lo + hi) // 2
171 start = mid
190 start = mid
172 while start > 0 and m[start - 1] != '\n':
191 while start > 0 and m[start - 1] != '\n':
173 start -= 1
192 start -= 1
174 end = advance(start, '\0')
193 end = advance(start, '\0')
175 if m[start:end] < s:
194 if m[start:end] < s:
176 # we know that after the null there are 40 bytes of sha1
195 # we know that after the null there are 40 bytes of sha1
177 # this translates to the bisect lo = mid + 1
196 # this translates to the bisect lo = mid + 1
178 lo = advance(end + 40, '\n') + 1
197 lo = advance(end + 40, '\n') + 1
179 else:
198 else:
180 # this translates to the bisect hi = mid
199 # this translates to the bisect hi = mid
181 hi = start
200 hi = start
182 end = advance(lo, '\0')
201 end = advance(lo, '\0')
183 found = m[lo:end]
202 found = m[lo:end]
184 if s == found:
203 if s == found:
185 # we know that after the null there are 40 bytes of sha1
204 # we know that after the null there are 40 bytes of sha1
186 end = advance(end + 40, '\n')
205 end = advance(end + 40, '\n')
187 return (lo, end + 1)
206 return (lo, end + 1)
188 else:
207 else:
189 return (lo, lo)
208 return (lo, lo)
190
209
191 def _checkforbidden(l):
210 def _checkforbidden(l):
192 """Check filenames for illegal characters."""
211 """Check filenames for illegal characters."""
193 for f in l:
212 for f in l:
194 if '\n' in f or '\r' in f:
213 if '\n' in f or '\r' in f:
195 raise error.RevlogError(
214 raise error.RevlogError(
196 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
215 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
197
216
198
217
199 # apply the changes collected during the bisect loop to our addlist
218 # apply the changes collected during the bisect loop to our addlist
200 # return a delta suitable for addrevision
219 # return a delta suitable for addrevision
201 def _addlistdelta(addlist, x):
220 def _addlistdelta(addlist, x):
202 # for large addlist arrays, building a new array is cheaper
221 # for large addlist arrays, building a new array is cheaper
203 # than repeatedly modifying the existing one
222 # than repeatedly modifying the existing one
204 currentposition = 0
223 currentposition = 0
205 newaddlist = array.array('c')
224 newaddlist = array.array('c')
206
225
207 for start, end, content in x:
226 for start, end, content in x:
208 newaddlist += addlist[currentposition:start]
227 newaddlist += addlist[currentposition:start]
209 if content:
228 if content:
210 newaddlist += array.array('c', content)
229 newaddlist += array.array('c', content)
211
230
212 currentposition = end
231 currentposition = end
213
232
214 newaddlist += addlist[currentposition:]
233 newaddlist += addlist[currentposition:]
215
234
216 deltatext = "".join(struct.pack(">lll", start, end, len(content))
235 deltatext = "".join(struct.pack(">lll", start, end, len(content))
217 + content for start, end, content in x)
236 + content for start, end, content in x)
218 return deltatext, newaddlist
237 return deltatext, newaddlist
219
238
220 # Pure Python fallback
221 def _parsemanifest(mfdict, fdict, lines):
222 bin = revlog.bin
223 for l in lines.splitlines():
224 f, n = l.split('\0')
225 if len(n) > 40:
226 fdict[f] = n[40:]
227 mfdict[f] = bin(n[:40])
228 else:
229 mfdict[f] = bin(n)
230
231 def _parse(lines):
232 mfdict = manifestdict()
233 try:
234 parsers.parse_manifest(mfdict, mfdict._flags, lines)
235 except AttributeError:
236 _parsemanifest(mfdict, mfdict._flags, lines)
237 return mfdict
238
239 class manifest(revlog.revlog):
239 class manifest(revlog.revlog):
240 def __init__(self, opener):
240 def __init__(self, opener):
241 # During normal operations, we expect to deal with not more than four
241 # During normal operations, we expect to deal with not more than four
242 # revs at a time (such as during commit --amend). When rebasing large
242 # revs at a time (such as during commit --amend). When rebasing large
243 # stacks of commits, the number can go up, hence the config knob below.
243 # stacks of commits, the number can go up, hence the config knob below.
244 cachesize = 4
244 cachesize = 4
245 opts = getattr(opener, 'options', None)
245 opts = getattr(opener, 'options', None)
246 if opts is not None:
246 if opts is not None:
247 cachesize = opts.get('manifestcachesize', cachesize)
247 cachesize = opts.get('manifestcachesize', cachesize)
248 self._mancache = util.lrucachedict(cachesize)
248 self._mancache = util.lrucachedict(cachesize)
249 revlog.revlog.__init__(self, opener, "00manifest.i")
249 revlog.revlog.__init__(self, opener, "00manifest.i")
250
250
251 def readdelta(self, node):
251 def readdelta(self, node):
252 r = self.rev(node)
252 r = self.rev(node)
253 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
253 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
254
254
255 def readfast(self, node):
255 def readfast(self, node):
256 '''use the faster of readdelta or read'''
256 '''use the faster of readdelta or read'''
257 r = self.rev(node)
257 r = self.rev(node)
258 deltaparent = self.deltaparent(r)
258 deltaparent = self.deltaparent(r)
259 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
259 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
260 return self.readdelta(node)
260 return self.readdelta(node)
261 return self.read(node)
261 return self.read(node)
262
262
263 def read(self, node):
263 def read(self, node):
264 if node == revlog.nullid:
264 if node == revlog.nullid:
265 return manifestdict() # don't upset local cache
265 return manifestdict() # don't upset local cache
266 if node in self._mancache:
266 if node in self._mancache:
267 return self._mancache[node][0]
267 return self._mancache[node][0]
268 text = self.revision(node)
268 text = self.revision(node)
269 arraytext = array.array('c', text)
269 arraytext = array.array('c', text)
270 m = _parse(text)
270 m = _parse(text)
271 self._mancache[node] = (m, arraytext)
271 self._mancache[node] = (m, arraytext)
272 return m
272 return m
273
273
274 def find(self, node, f):
274 def find(self, node, f):
275 '''look up entry for a single file efficiently.
275 '''look up entry for a single file efficiently.
276 return (node, flags) pair if found, (None, None) if not.'''
276 return (node, flags) pair if found, (None, None) if not.'''
277 if node in self._mancache:
277 if node in self._mancache:
278 m = self._mancache[node][0]
278 m = self._mancache[node][0]
279 return m.get(f), m.flags(f)
279 return m.get(f), m.flags(f)
280 text = self.revision(node)
280 text = self.revision(node)
281 start, end = _msearch(text, f)
281 start, end = _msearch(text, f)
282 if start == end:
282 if start == end:
283 return None, None
283 return None, None
284 l = text[start:end]
284 l = text[start:end]
285 f, n = l.split('\0')
285 f, n = l.split('\0')
286 return revlog.bin(n[:40]), n[40:-1]
286 return revlog.bin(n[:40]), n[40:-1]
287
287
288 def add(self, m, transaction, link, p1, p2, added, removed):
288 def add(self, m, transaction, link, p1, p2, added, removed):
289 if p1 in self._mancache:
289 if p1 in self._mancache:
290 # If our first parent is in the manifest cache, we can
290 # If our first parent is in the manifest cache, we can
291 # compute a delta here using properties we know about the
291 # compute a delta here using properties we know about the
292 # manifest up-front, which may save time later for the
292 # manifest up-front, which may save time later for the
293 # revlog layer.
293 # revlog layer.
294
294
295 _checkforbidden(added)
295 _checkforbidden(added)
296 # combine the changed lists into one list for sorting
296 # combine the changed lists into one list for sorting
297 work = [(x, False) for x in added]
297 work = [(x, False) for x in added]
298 work.extend((x, True) for x in removed)
298 work.extend((x, True) for x in removed)
299 # this could use heapq.merge() (from Python 2.6+) or equivalent
299 # this could use heapq.merge() (from Python 2.6+) or equivalent
300 # since the lists are already sorted
300 # since the lists are already sorted
301 work.sort()
301 work.sort()
302
302
303 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
303 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
304 cachedelta = self.rev(p1), deltatext
304 cachedelta = self.rev(p1), deltatext
305 text = util.buffer(arraytext)
305 text = util.buffer(arraytext)
306 else:
306 else:
307 # The first parent manifest isn't already loaded, so we'll
307 # The first parent manifest isn't already loaded, so we'll
308 # just encode a fulltext of the manifest and pass that
308 # just encode a fulltext of the manifest and pass that
309 # through to the revlog layer, and let it handle the delta
309 # through to the revlog layer, and let it handle the delta
310 # process.
310 # process.
311 text = m.text()
311 text = m.text()
312 arraytext = array.array('c', text)
312 arraytext = array.array('c', text)
313 cachedelta = None
313 cachedelta = None
314
314
315 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
315 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
316 self._mancache[n] = (m, arraytext)
316 self._mancache[n] = (m, arraytext)
317
317
318 return n
318 return n
General Comments 0
You need to be logged in to leave comments. Login now