##// END OF EJS Templates
manifest: move parsing functions up in file...
Augie Fackler -
r24223:b4df0d0c default
parent child Browse files
Show More
@@ -1,318 +1,318
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11
12 # Pure Python fallback
13 def _parsemanifest(mfdict, fdict, lines):
14 bin = revlog.bin
15 for l in lines.splitlines():
16 f, n = l.split('\0')
17 if len(n) > 40:
18 fdict[f] = n[40:]
19 mfdict[f] = bin(n[:40])
20 else:
21 mfdict[f] = bin(n)
22
23 def _parse(lines):
24 mfdict = manifestdict()
25 try:
26 parsers.parse_manifest(mfdict, mfdict._flags, lines)
27 except AttributeError:
28 _parsemanifest(mfdict, mfdict._flags, lines)
29 return mfdict
30
12 31 class manifestdict(dict):
13 32 def __init__(self):
14 33 self._flags = {}
15 34 def __setitem__(self, k, v):
16 35 assert v is not None
17 36 dict.__setitem__(self, k, v)
18 37 def flags(self, f):
19 38 return self._flags.get(f, "")
20 39 def setflag(self, f, flags):
21 40 """Set the flags (symlink, executable) for path f."""
22 41 self._flags[f] = flags
23 42 def copy(self):
24 43 copy = manifestdict()
25 44 dict.__init__(copy, self)
26 45 copy._flags = dict.copy(self._flags)
27 46 return copy
28 47 def intersectfiles(self, files):
29 48 '''make a new manifestdict with the intersection of self with files
30 49
31 50 The algorithm assumes that files is much smaller than self.'''
32 51 ret = manifestdict()
33 52 for fn in files:
34 53 if fn in self:
35 54 ret[fn] = self[fn]
36 55 flags = self._flags.get(fn, None)
37 56 if flags:
38 57 ret._flags[fn] = flags
39 58 return ret
40 59
41 60 def filesnotin(self, m2):
42 61 '''Set of files in this manifest that are not in the other'''
43 62 files = set(self.iterkeys())
44 63 files.difference_update(m2.iterkeys())
45 64 return files
46 65
47 66 def matches(self, match):
48 67 '''generate a new manifest filtered by the match argument'''
49 68 if match.always():
50 69 return self.copy()
51 70
52 71 files = match.files()
53 72 if (match.matchfn == match.exact or
54 73 (not match.anypats() and util.all(fn in self for fn in files))):
55 74 return self.intersectfiles(files)
56 75
57 76 m = self.copy()
58 77 for fn in m.keys():
59 78 if not match(fn):
60 79 del m[fn]
61 80 return m
62 81
63 82 def diff(self, m2, clean=False):
64 83 '''Finds changes between the current manifest and m2.
65 84
66 85 Args:
67 86 m2: the manifest to which this manifest should be compared.
68 87 clean: if true, include files unchanged between these manifests
69 88 with a None value in the returned dictionary.
70 89
71 90 The result is returned as a dict with filename as key and
72 91 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
73 92 nodeid in the current/other manifest and fl1/fl2 is the flag
74 93 in the current/other manifest. Where the file does not exist,
75 94 the nodeid will be None and the flags will be the empty
76 95 string.
77 96 '''
78 97 diff = {}
79 98
80 99 for fn, n1 in self.iteritems():
81 100 fl1 = self._flags.get(fn, '')
82 101 n2 = m2.get(fn, None)
83 102 fl2 = m2._flags.get(fn, '')
84 103 if n2 is None:
85 104 fl2 = ''
86 105 if n1 != n2 or fl1 != fl2:
87 106 diff[fn] = ((n1, fl1), (n2, fl2))
88 107 elif clean:
89 108 diff[fn] = None
90 109
91 110 for fn, n2 in m2.iteritems():
92 111 if fn not in self:
93 112 fl2 = m2._flags.get(fn, '')
94 113 diff[fn] = ((None, ''), (n2, fl2))
95 114
96 115 return diff
97 116
98 117 def text(self):
99 118 """Get the full data of this manifest as a bytestring."""
100 119 fl = sorted(self)
101 120 _checkforbidden(fl)
102 121
103 122 hex, flags = revlog.hex, self.flags
104 123 # if this is changed to support newlines in filenames,
105 124 # be sure to check the templates/ dir again (especially *-raw.tmpl)
106 125 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
107 126
108 127 def fastdelta(self, base, changes):
109 128 """Given a base manifest text as an array.array and a list of changes
110 129 relative to that text, compute a delta that can be used by revlog.
111 130 """
112 131 delta = []
113 132 dstart = None
114 133 dend = None
115 134 dline = [""]
116 135 start = 0
117 136 # zero copy representation of base as a buffer
118 137 addbuf = util.buffer(base)
119 138
120 139 # start with a readonly loop that finds the offset of
121 140 # each line and creates the deltas
122 141 for f, todelete in changes:
123 142 # bs will either be the index of the item or the insert point
124 143 start, end = _msearch(addbuf, f, start)
125 144 if not todelete:
126 145 l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
127 146 else:
128 147 if start == end:
129 148 # item we want to delete was not found, error out
130 149 raise AssertionError(
131 150 _("failed to remove %s from manifest") % f)
132 151 l = ""
133 152 if dstart is not None and dstart <= start and dend >= start:
134 153 if dend < end:
135 154 dend = end
136 155 if l:
137 156 dline.append(l)
138 157 else:
139 158 if dstart is not None:
140 159 delta.append([dstart, dend, "".join(dline)])
141 160 dstart = start
142 161 dend = end
143 162 dline = [l]
144 163
145 164 if dstart is not None:
146 165 delta.append([dstart, dend, "".join(dline)])
147 166 # apply the delta to the base, and get a delta for addrevision
148 167 deltatext, arraytext = _addlistdelta(base, delta)
149 168 return arraytext, deltatext
150 169
151 170 def _msearch(m, s, lo=0, hi=None):
152 171 '''return a tuple (start, end) that says where to find s within m.
153 172
154 173 If the string is found m[start:end] are the line containing
155 174 that string. If start == end the string was not found and
156 175 they indicate the proper sorted insertion point.
157 176
158 177 m should be a buffer or a string
159 178 s is a string'''
160 179 def advance(i, c):
161 180 while i < lenm and m[i] != c:
162 181 i += 1
163 182 return i
164 183 if not s:
165 184 return (lo, lo)
166 185 lenm = len(m)
167 186 if not hi:
168 187 hi = lenm
169 188 while lo < hi:
170 189 mid = (lo + hi) // 2
171 190 start = mid
172 191 while start > 0 and m[start - 1] != '\n':
173 192 start -= 1
174 193 end = advance(start, '\0')
175 194 if m[start:end] < s:
176 195 # we know that after the null there are 40 bytes of sha1
177 196 # this translates to the bisect lo = mid + 1
178 197 lo = advance(end + 40, '\n') + 1
179 198 else:
180 199 # this translates to the bisect hi = mid
181 200 hi = start
182 201 end = advance(lo, '\0')
183 202 found = m[lo:end]
184 203 if s == found:
185 204 # we know that after the null there are 40 bytes of sha1
186 205 end = advance(end + 40, '\n')
187 206 return (lo, end + 1)
188 207 else:
189 208 return (lo, lo)
190 209
191 210 def _checkforbidden(l):
192 211 """Check filenames for illegal characters."""
193 212 for f in l:
194 213 if '\n' in f or '\r' in f:
195 214 raise error.RevlogError(
196 215 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
197 216
198 217
199 218 # apply the changes collected during the bisect loop to our addlist
200 219 # return a delta suitable for addrevision
201 220 def _addlistdelta(addlist, x):
202 221 # for large addlist arrays, building a new array is cheaper
203 222 # than repeatedly modifying the existing one
204 223 currentposition = 0
205 224 newaddlist = array.array('c')
206 225
207 226 for start, end, content in x:
208 227 newaddlist += addlist[currentposition:start]
209 228 if content:
210 229 newaddlist += array.array('c', content)
211 230
212 231 currentposition = end
213 232
214 233 newaddlist += addlist[currentposition:]
215 234
216 235 deltatext = "".join(struct.pack(">lll", start, end, len(content))
217 236 + content for start, end, content in x)
218 237 return deltatext, newaddlist
219 238
220 # Pure Python fallback
221 def _parsemanifest(mfdict, fdict, lines):
222 bin = revlog.bin
223 for l in lines.splitlines():
224 f, n = l.split('\0')
225 if len(n) > 40:
226 fdict[f] = n[40:]
227 mfdict[f] = bin(n[:40])
228 else:
229 mfdict[f] = bin(n)
230
231 def _parse(lines):
232 mfdict = manifestdict()
233 try:
234 parsers.parse_manifest(mfdict, mfdict._flags, lines)
235 except AttributeError:
236 _parsemanifest(mfdict, mfdict._flags, lines)
237 return mfdict
238
239 239 class manifest(revlog.revlog):
240 240 def __init__(self, opener):
241 241 # During normal operations, we expect to deal with not more than four
242 242 # revs at a time (such as during commit --amend). When rebasing large
243 243 # stacks of commits, the number can go up, hence the config knob below.
244 244 cachesize = 4
245 245 opts = getattr(opener, 'options', None)
246 246 if opts is not None:
247 247 cachesize = opts.get('manifestcachesize', cachesize)
248 248 self._mancache = util.lrucachedict(cachesize)
249 249 revlog.revlog.__init__(self, opener, "00manifest.i")
250 250
251 251 def readdelta(self, node):
252 252 r = self.rev(node)
253 253 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
254 254
255 255 def readfast(self, node):
256 256 '''use the faster of readdelta or read'''
257 257 r = self.rev(node)
258 258 deltaparent = self.deltaparent(r)
259 259 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
260 260 return self.readdelta(node)
261 261 return self.read(node)
262 262
263 263 def read(self, node):
264 264 if node == revlog.nullid:
265 265 return manifestdict() # don't upset local cache
266 266 if node in self._mancache:
267 267 return self._mancache[node][0]
268 268 text = self.revision(node)
269 269 arraytext = array.array('c', text)
270 270 m = _parse(text)
271 271 self._mancache[node] = (m, arraytext)
272 272 return m
273 273
274 274 def find(self, node, f):
275 275 '''look up entry for a single file efficiently.
276 276 return (node, flags) pair if found, (None, None) if not.'''
277 277 if node in self._mancache:
278 278 m = self._mancache[node][0]
279 279 return m.get(f), m.flags(f)
280 280 text = self.revision(node)
281 281 start, end = _msearch(text, f)
282 282 if start == end:
283 283 return None, None
284 284 l = text[start:end]
285 285 f, n = l.split('\0')
286 286 return revlog.bin(n[:40]), n[40:-1]
287 287
288 288 def add(self, m, transaction, link, p1, p2, added, removed):
289 289 if p1 in self._mancache:
290 290 # If our first parent is in the manifest cache, we can
291 291 # compute a delta here using properties we know about the
292 292 # manifest up-front, which may save time later for the
293 293 # revlog layer.
294 294
295 295 _checkforbidden(added)
296 296 # combine the changed lists into one list for sorting
297 297 work = [(x, False) for x in added]
298 298 work.extend((x, True) for x in removed)
299 299 # this could use heapq.merge() (from Python 2.6+) or equivalent
300 300 # since the lists are already sorted
301 301 work.sort()
302 302
303 303 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
304 304 cachedelta = self.rev(p1), deltatext
305 305 text = util.buffer(arraytext)
306 306 else:
307 307 # The first parent manifest isn't already loaded, so we'll
308 308 # just encode a fulltext of the manifest and pass that
309 309 # through to the revlog layer, and let it handle the delta
310 310 # process.
311 311 text = m.text()
312 312 arraytext = array.array('c', text)
313 313 cachedelta = None
314 314
315 315 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
316 316 self._mancache[n] = (m, arraytext)
317 317
318 318 return n
General Comments 0
You need to be logged in to leave comments. Login now