##// END OF EJS Templates
manifest: move pure parsing code out of pure...
Matt Mackall -
r24215:feddc528 default
parent child Browse files
Show More
@@ -1,304 +1,318 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11
12 12 class manifestdict(dict):
13 13 def __init__(self):
14 14 self._flags = {}
15 15 def __setitem__(self, k, v):
16 16 assert v is not None
17 17 dict.__setitem__(self, k, v)
18 18 def flags(self, f):
19 19 return self._flags.get(f, "")
20 20 def setflag(self, f, flags):
21 21 """Set the flags (symlink, executable) for path f."""
22 22 self._flags[f] = flags
23 23 def copy(self):
24 24 copy = manifestdict()
25 25 dict.__init__(copy, self)
26 26 copy._flags = dict.copy(self._flags)
27 27 return copy
28 28 def intersectfiles(self, files):
29 29 '''make a new manifestdict with the intersection of self with files
30 30
31 31 The algorithm assumes that files is much smaller than self.'''
32 32 ret = manifestdict()
33 33 for fn in files:
34 34 if fn in self:
35 35 ret[fn] = self[fn]
36 36 flags = self._flags.get(fn, None)
37 37 if flags:
38 38 ret._flags[fn] = flags
39 39 return ret
40 40
41 41 def filesnotin(self, m2):
42 42 '''Set of files in this manifest that are not in the other'''
43 43 files = set(self.iterkeys())
44 44 files.difference_update(m2.iterkeys())
45 45 return files
46 46
47 47 def matches(self, match):
48 48 '''generate a new manifest filtered by the match argument'''
49 49 if match.always():
50 50 return self.copy()
51 51
52 52 files = match.files()
53 53 if (match.matchfn == match.exact or
54 54 (not match.anypats() and util.all(fn in self for fn in files))):
55 55 return self.intersectfiles(files)
56 56
57 57 m = self.copy()
58 58 for fn in m.keys():
59 59 if not match(fn):
60 60 del m[fn]
61 61 return m
62 62
63 63 def diff(self, m2, clean=False):
64 64 '''Finds changes between the current manifest and m2.
65 65
66 66 Args:
67 67 m2: the manifest to which this manifest should be compared.
68 68 clean: if true, include files unchanged between these manifests
69 69 with a None value in the returned dictionary.
70 70
71 71 The result is returned as a dict with filename as key and
72 72 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
73 73 nodeid in the current/other manifest and fl1/fl2 is the flag
74 74 in the current/other manifest. Where the file does not exist,
75 75 the nodeid will be None and the flags will be the empty
76 76 string.
77 77 '''
78 78 diff = {}
79 79
80 80 for fn, n1 in self.iteritems():
81 81 fl1 = self._flags.get(fn, '')
82 82 n2 = m2.get(fn, None)
83 83 fl2 = m2._flags.get(fn, '')
84 84 if n2 is None:
85 85 fl2 = ''
86 86 if n1 != n2 or fl1 != fl2:
87 87 diff[fn] = ((n1, fl1), (n2, fl2))
88 88 elif clean:
89 89 diff[fn] = None
90 90
91 91 for fn, n2 in m2.iteritems():
92 92 if fn not in self:
93 93 fl2 = m2._flags.get(fn, '')
94 94 diff[fn] = ((None, ''), (n2, fl2))
95 95
96 96 return diff
97 97
98 98 def text(self):
99 99 """Get the full data of this manifest as a bytestring."""
100 100 fl = sorted(self)
101 101 _checkforbidden(fl)
102 102
103 103 hex, flags = revlog.hex, self.flags
104 104 # if this is changed to support newlines in filenames,
105 105 # be sure to check the templates/ dir again (especially *-raw.tmpl)
106 106 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
107 107
108 108 def fastdelta(self, base, changes):
109 109 """Given a base manifest text as an array.array and a list of changes
110 110 relative to that text, compute a delta that can be used by revlog.
111 111 """
112 112 delta = []
113 113 dstart = None
114 114 dend = None
115 115 dline = [""]
116 116 start = 0
117 117 # zero copy representation of base as a buffer
118 118 addbuf = util.buffer(base)
119 119
120 120 # start with a readonly loop that finds the offset of
121 121 # each line and creates the deltas
122 122 for f, todelete in changes:
123 123 # bs will either be the index of the item or the insert point
124 124 start, end = _msearch(addbuf, f, start)
125 125 if not todelete:
126 126 l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
127 127 else:
128 128 if start == end:
129 129 # item we want to delete was not found, error out
130 130 raise AssertionError(
131 131 _("failed to remove %s from manifest") % f)
132 132 l = ""
133 133 if dstart is not None and dstart <= start and dend >= start:
134 134 if dend < end:
135 135 dend = end
136 136 if l:
137 137 dline.append(l)
138 138 else:
139 139 if dstart is not None:
140 140 delta.append([dstart, dend, "".join(dline)])
141 141 dstart = start
142 142 dend = end
143 143 dline = [l]
144 144
145 145 if dstart is not None:
146 146 delta.append([dstart, dend, "".join(dline)])
147 147 # apply the delta to the base, and get a delta for addrevision
148 148 deltatext, arraytext = _addlistdelta(base, delta)
149 149 return arraytext, deltatext
150 150
151 151 def _msearch(m, s, lo=0, hi=None):
152 152 '''return a tuple (start, end) that says where to find s within m.
153 153
154 154 If the string is found m[start:end] are the line containing
155 155 that string. If start == end the string was not found and
156 156 they indicate the proper sorted insertion point.
157 157
158 158 m should be a buffer or a string
159 159 s is a string'''
160 160 def advance(i, c):
161 161 while i < lenm and m[i] != c:
162 162 i += 1
163 163 return i
164 164 if not s:
165 165 return (lo, lo)
166 166 lenm = len(m)
167 167 if not hi:
168 168 hi = lenm
169 169 while lo < hi:
170 170 mid = (lo + hi) // 2
171 171 start = mid
172 172 while start > 0 and m[start - 1] != '\n':
173 173 start -= 1
174 174 end = advance(start, '\0')
175 175 if m[start:end] < s:
176 176 # we know that after the null there are 40 bytes of sha1
177 177 # this translates to the bisect lo = mid + 1
178 178 lo = advance(end + 40, '\n') + 1
179 179 else:
180 180 # this translates to the bisect hi = mid
181 181 hi = start
182 182 end = advance(lo, '\0')
183 183 found = m[lo:end]
184 184 if s == found:
185 185 # we know that after the null there are 40 bytes of sha1
186 186 end = advance(end + 40, '\n')
187 187 return (lo, end + 1)
188 188 else:
189 189 return (lo, lo)
190 190
191 191 def _checkforbidden(l):
192 192 """Check filenames for illegal characters."""
193 193 for f in l:
194 194 if '\n' in f or '\r' in f:
195 195 raise error.RevlogError(
196 196 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
197 197
198 198
199 199 # apply the changes collected during the bisect loop to our addlist
200 200 # return a delta suitable for addrevision
201 201 def _addlistdelta(addlist, x):
202 202 # for large addlist arrays, building a new array is cheaper
203 203 # than repeatedly modifying the existing one
204 204 currentposition = 0
205 205 newaddlist = array.array('c')
206 206
207 207 for start, end, content in x:
208 208 newaddlist += addlist[currentposition:start]
209 209 if content:
210 210 newaddlist += array.array('c', content)
211 211
212 212 currentposition = end
213 213
214 214 newaddlist += addlist[currentposition:]
215 215
216 216 deltatext = "".join(struct.pack(">lll", start, end, len(content))
217 217 + content for start, end, content in x)
218 218 return deltatext, newaddlist
219 219
220 # Pure Python fallback
221 def _parsemanifest(mfdict, fdict, lines):
222 bin = revlog.bin
223 for l in lines.splitlines():
224 f, n = l.split('\0')
225 if len(n) > 40:
226 fdict[f] = n[40:]
227 mfdict[f] = bin(n[:40])
228 else:
229 mfdict[f] = bin(n)
230
220 231 def _parse(lines):
221 232 mfdict = manifestdict()
222 parsers.parse_manifest(mfdict, mfdict._flags, lines)
233 try:
234 parsers.parse_manifest(mfdict, mfdict._flags, lines)
235 except AttributeError:
236 _parsemanifest(mfdict, mfdict._flags, lines)
223 237 return mfdict
224 238
225 239 class manifest(revlog.revlog):
226 240 def __init__(self, opener):
227 241 # During normal operations, we expect to deal with not more than four
228 242 # revs at a time (such as during commit --amend). When rebasing large
229 243 # stacks of commits, the number can go up, hence the config knob below.
230 244 cachesize = 4
231 245 opts = getattr(opener, 'options', None)
232 246 if opts is not None:
233 247 cachesize = opts.get('manifestcachesize', cachesize)
234 248 self._mancache = util.lrucachedict(cachesize)
235 249 revlog.revlog.__init__(self, opener, "00manifest.i")
236 250
237 251 def readdelta(self, node):
238 252 r = self.rev(node)
239 253 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
240 254
241 255 def readfast(self, node):
242 256 '''use the faster of readdelta or read'''
243 257 r = self.rev(node)
244 258 deltaparent = self.deltaparent(r)
245 259 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
246 260 return self.readdelta(node)
247 261 return self.read(node)
248 262
249 263 def read(self, node):
250 264 if node == revlog.nullid:
251 265 return manifestdict() # don't upset local cache
252 266 if node in self._mancache:
253 267 return self._mancache[node][0]
254 268 text = self.revision(node)
255 269 arraytext = array.array('c', text)
256 270 m = _parse(text)
257 271 self._mancache[node] = (m, arraytext)
258 272 return m
259 273
260 274 def find(self, node, f):
261 275 '''look up entry for a single file efficiently.
262 276 return (node, flags) pair if found, (None, None) if not.'''
263 277 if node in self._mancache:
264 278 m = self._mancache[node][0]
265 279 return m.get(f), m.flags(f)
266 280 text = self.revision(node)
267 281 start, end = _msearch(text, f)
268 282 if start == end:
269 283 return None, None
270 284 l = text[start:end]
271 285 f, n = l.split('\0')
272 286 return revlog.bin(n[:40]), n[40:-1]
273 287
274 288 def add(self, m, transaction, link, p1, p2, added, removed):
275 289 if p1 in self._mancache:
276 290 # If our first parent is in the manifest cache, we can
277 291 # compute a delta here using properties we know about the
278 292 # manifest up-front, which may save time later for the
279 293 # revlog layer.
280 294
281 295 _checkforbidden(added)
282 296 # combine the changed lists into one list for sorting
283 297 work = [(x, False) for x in added]
284 298 work.extend((x, True) for x in removed)
285 299 # this could use heapq.merge() (from Python 2.6+) or equivalent
286 300 # since the lists are already sorted
287 301 work.sort()
288 302
289 303 arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
290 304 cachedelta = self.rev(p1), deltatext
291 305 text = util.buffer(arraytext)
292 306 else:
293 307 # The first parent manifest isn't already loaded, so we'll
294 308 # just encode a fulltext of the manifest and pass that
295 309 # through to the revlog layer, and let it handle the delta
296 310 # process.
297 311 text = m.text()
298 312 arraytext = array.array('c', text)
299 313 cachedelta = None
300 314
301 315 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
302 316 self._mancache[n] = (m, arraytext)
303 317
304 318 return n
@@ -1,121 +1,112 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 from mercurial.node import bin, nullid
8 from mercurial.node import nullid
9 9 from mercurial import util
10 10 import struct, zlib, cStringIO
11 11
12 12 _pack = struct.pack
13 13 _unpack = struct.unpack
14 14 _compress = zlib.compress
15 15 _decompress = zlib.decompress
16 16 _sha = util.sha1
17 17
18 18 # Some code below makes tuples directly because it's more convenient. However,
19 19 # code outside this module should always use dirstatetuple.
20 20 def dirstatetuple(*x):
21 21 # x is a tuple
22 22 return x
23 23
24 def parse_manifest(mfdict, fdict, lines):
25 for l in lines.splitlines():
26 f, n = l.split('\0')
27 if len(n) > 40:
28 fdict[f] = n[40:]
29 mfdict[f] = bin(n[:40])
30 else:
31 mfdict[f] = bin(n)
32
33 24 def parse_index2(data, inline):
34 25 def gettype(q):
35 26 return int(q & 0xFFFF)
36 27
37 28 def offset_type(offset, type):
38 29 return long(long(offset) << 16 | type)
39 30
40 31 indexformatng = ">Qiiiiii20s12x"
41 32
42 33 s = struct.calcsize(indexformatng)
43 34 index = []
44 35 cache = None
45 36 off = 0
46 37
47 38 l = len(data) - s
48 39 append = index.append
49 40 if inline:
50 41 cache = (0, data)
51 42 while off <= l:
52 43 e = _unpack(indexformatng, data[off:off + s])
53 44 append(e)
54 45 if e[1] < 0:
55 46 break
56 47 off += e[1] + s
57 48 else:
58 49 while off <= l:
59 50 e = _unpack(indexformatng, data[off:off + s])
60 51 append(e)
61 52 off += s
62 53
63 54 if off != len(data):
64 55 raise ValueError('corrupt index file')
65 56
66 57 if index:
67 58 e = list(index[0])
68 59 type = gettype(e[0])
69 60 e[0] = offset_type(0, type)
70 61 index[0] = tuple(e)
71 62
72 63 # add the magic null revision at -1
73 64 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
74 65
75 66 return index, cache
76 67
77 68 def parse_dirstate(dmap, copymap, st):
78 69 parents = [st[:20], st[20: 40]]
79 70 # dereference fields so they will be local in loop
80 71 format = ">cllll"
81 72 e_size = struct.calcsize(format)
82 73 pos1 = 40
83 74 l = len(st)
84 75
85 76 # the inner loop
86 77 while pos1 < l:
87 78 pos2 = pos1 + e_size
88 79 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
89 80 pos1 = pos2 + e[4]
90 81 f = st[pos2:pos1]
91 82 if '\0' in f:
92 83 f, c = f.split('\0')
93 84 copymap[f] = c
94 85 dmap[f] = e[:4]
95 86 return parents
96 87
97 88 def pack_dirstate(dmap, copymap, pl, now):
98 89 now = int(now)
99 90 cs = cStringIO.StringIO()
100 91 write = cs.write
101 92 write("".join(pl))
102 93 for f, e in dmap.iteritems():
103 94 if e[0] == 'n' and e[3] == now:
104 95 # The file was last modified "simultaneously" with the current
105 96 # write to dirstate (i.e. within the same second for file-
106 97 # systems with a granularity of 1 sec). This commonly happens
107 98 # for at least a couple of files on 'update'.
108 99 # The user could change the file without changing its size
109 100 # within the same second. Invalidate the file's mtime in
110 101 # dirstate, forcing future 'status' calls to compare the
111 102 # contents of the file if the size is the same. This prevents
112 103 # mistakenly treating such files as clean.
113 104 e = dirstatetuple(e[0], e[1], e[2], -1)
114 105 dmap[f] = e
115 106
116 107 if f in copymap:
117 108 f = "%s\0%s" % (f, copymap[f])
118 109 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
119 110 write(e)
120 111 write(f)
121 112 return cs.getvalue()
General Comments 0
You need to be logged in to leave comments. Login now