##// END OF EJS Templates
manifest: add fastdelta method to manifestdict...
Augie Fackler -
r22931:48c0b101 default
parent child Browse files
Show More
@@ -1,238 +1,243 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util, dicthelpers
10 10 import array, struct
11 11
12 12 class manifestdict(dict):
13 13 def __init__(self, mapping=None, flags=None):
14 14 if mapping is None:
15 15 mapping = {}
16 16 if flags is None:
17 17 flags = {}
18 18 dict.__init__(self, mapping)
19 19 self._flags = flags
20 20 def flags(self, f):
21 21 return self._flags.get(f, "")
22 22 def withflags(self):
23 23 return set(self._flags.keys())
24 24 def set(self, f, flags):
25 25 self._flags[f] = flags
26 26 def copy(self):
27 27 return manifestdict(self, dict.copy(self._flags))
28 28 def intersectfiles(self, files):
29 29 '''make a new manifestdict with the intersection of self with files
30 30
31 31 The algorithm assumes that files is much smaller than self.'''
32 32 ret = manifestdict()
33 33 for fn in files:
34 34 if fn in self:
35 35 ret[fn] = self[fn]
36 36 flags = self._flags.get(fn, None)
37 37 if flags:
38 38 ret._flags[fn] = flags
39 39 return ret
40 40 def flagsdiff(self, d2):
41 41 return dicthelpers.diff(self._flags, d2._flags, "")
42 42
43 43 def text(self):
44 44 fl = sorted(self)
45 45 _checkforbidden(fl)
46 46
47 47 hex, flags = revlog.hex, self.flags
48 48 # if this is changed to support newlines in filenames,
49 49 # be sure to check the templates/ dir again (especially *-raw.tmpl)
50 50 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
51 51
52 def fastdelta(self, base, changes):
53 """Given a base manifest text as an array.array and a list of changes
54 relative to that text, compute a delta that can be used by revlog.
55 """
56 delta = []
57 dstart = None
58 dend = None
59 dline = [""]
60 start = 0
61 # zero copy representation of base as a buffer
62 addbuf = util.buffer(base)
63
64 # start with a readonly loop that finds the offset of
65 # each line and creates the deltas
66 for f, todelete in changes:
67 # bs will either be the index of the item or the insert point
68 start, end = _msearch(addbuf, f, start)
69 if not todelete:
70 l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
71 else:
72 if start == end:
73 # item we want to delete was not found, error out
74 raise AssertionError(
75 _("failed to remove %s from manifest") % f)
76 l = ""
77 if dstart is not None and dstart <= start and dend >= start:
78 if dend < end:
79 dend = end
80 if l:
81 dline.append(l)
82 else:
83 if dstart is not None:
84 delta.append([dstart, dend, "".join(dline)])
85 dstart = start
86 dend = end
87 dline = [l]
88
89 if dstart is not None:
90 delta.append([dstart, dend, "".join(dline)])
91 # apply the delta to the base, and get a delta for addrevision
92 deltatext, arraytext = _addlistdelta(base, delta)
93 return arraytext, deltatext
94
52 95 def _msearch(m, s, lo=0, hi=None):
53 96 '''return a tuple (start, end) that says where to find s within m.
54 97
55 98 If the string is found m[start:end] are the line containing
56 99 that string. If start == end the string was not found and
57 100 they indicate the proper sorted insertion point.
58 101
59 102 m should be a buffer or a string
60 103 s is a string'''
61 104 def advance(i, c):
62 105 while i < lenm and m[i] != c:
63 106 i += 1
64 107 return i
65 108 if not s:
66 109 return (lo, lo)
67 110 lenm = len(m)
68 111 if not hi:
69 112 hi = lenm
70 113 while lo < hi:
71 114 mid = (lo + hi) // 2
72 115 start = mid
73 116 while start > 0 and m[start - 1] != '\n':
74 117 start -= 1
75 118 end = advance(start, '\0')
76 119 if m[start:end] < s:
77 120 # we know that after the null there are 40 bytes of sha1
78 121 # this translates to the bisect lo = mid + 1
79 122 lo = advance(end + 40, '\n') + 1
80 123 else:
81 124 # this translates to the bisect hi = mid
82 125 hi = start
83 126 end = advance(lo, '\0')
84 127 found = m[lo:end]
85 128 if s == found:
86 129 # we know that after the null there are 40 bytes of sha1
87 130 end = advance(end + 40, '\n')
88 131 return (lo, end + 1)
89 132 else:
90 133 return (lo, lo)
91 134
92 135 def _checkforbidden(l):
93 136 """Check filenames for illegal characters."""
94 137 for f in l:
95 138 if '\n' in f or '\r' in f:
96 139 raise error.RevlogError(
97 140 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
98 141
99 142
100 143 # apply the changes collected during the bisect loop to our addlist
101 144 # return a delta suitable for addrevision
102 145 def _addlistdelta(addlist, x):
103 146 # for large addlist arrays, building a new array is cheaper
104 147 # than repeatedly modifying the existing one
105 148 currentposition = 0
106 149 newaddlist = array.array('c')
107 150
108 151 for start, end, content in x:
109 152 newaddlist += addlist[currentposition:start]
110 153 if content:
111 154 newaddlist += array.array('c', content)
112 155
113 156 currentposition = end
114 157
115 158 newaddlist += addlist[currentposition:]
116 159
117 160 deltatext = "".join(struct.pack(">lll", start, end, len(content))
118 161 + content for start, end, content in x)
119 162 return deltatext, newaddlist
120 163
121 164 def _parse(lines):
122 165 mfdict = manifestdict()
123 166 parsers.parse_manifest(mfdict, mfdict._flags, lines)
124 167 return mfdict
125 168
126 169 class manifest(revlog.revlog):
127 170 def __init__(self, opener):
128 171 # we expect to deal with not more than four revs at a time,
129 172 # during a commit --amend
130 173 self._mancache = util.lrucachedict(4)
131 174 revlog.revlog.__init__(self, opener, "00manifest.i")
132 175
133 176 def readdelta(self, node):
134 177 r = self.rev(node)
135 178 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
136 179
137 180 def readfast(self, node):
138 181 '''use the faster of readdelta or read'''
139 182 r = self.rev(node)
140 183 deltaparent = self.deltaparent(r)
141 184 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
142 185 return self.readdelta(node)
143 186 return self.read(node)
144 187
145 188 def read(self, node):
146 189 if node == revlog.nullid:
147 190 return manifestdict() # don't upset local cache
148 191 if node in self._mancache:
149 192 return self._mancache[node][0]
150 193 text = self.revision(node)
151 194 arraytext = array.array('c', text)
152 195 mapping = _parse(text)
153 196 self._mancache[node] = (mapping, arraytext)
154 197 return mapping
155 198
156 199 def find(self, node, f):
157 200 '''look up entry for a single file efficiently.
158 201 return (node, flags) pair if found, (None, None) if not.'''
159 202 if node in self._mancache:
160 203 mapping = self._mancache[node][0]
161 204 return mapping.get(f), mapping.flags(f)
162 205 text = self.revision(node)
163 206 start, end = _msearch(text, f)
164 207 if start == end:
165 208 return None, None
166 209 l = text[start:end]
167 210 f, n = l.split('\0')
168 211 return revlog.bin(n[:40]), n[40:-1]
169 212
170 213 def add(self, map, transaction, link, p1, p2, added, removed):
171 214 if p1 in self._mancache:
172 215 # If our first parent is in the manifest cache, we can
173 216 # compute a delta here using properties we know about the
174 217 # manifest up-front, which may save time later for the
175 218 # revlog layer.
176 addlist = self._mancache[p1][1]
177 219
178 220 _checkforbidden(added)
179 221 # combine the changed lists into one list for sorting
180 222 work = [(x, False) for x in added]
181 223 work.extend((x, True) for x in removed)
182 224 # this could use heapq.merge() (from Python 2.6+) or equivalent
183 225 # since the lists are already sorted
184 226 work.sort()
185 227
186 delta = []
187 dstart = None
188 dend = None
189 dline = [""]
190 start = 0
191 # zero copy representation of addlist as a buffer
192 addbuf = util.buffer(addlist)
193
194 # start with a readonly loop that finds the offset of
195 # each line and creates the deltas
196 for f, todelete in work:
197 # bs will either be the index of the item or the insert point
198 start, end = _msearch(addbuf, f, start)
199 if not todelete:
200 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
201 else:
202 if start == end:
203 # item we want to delete was not found, error out
204 raise AssertionError(
205 _("failed to remove %s from manifest") % f)
206 l = ""
207 if dstart is not None and dstart <= start and dend >= start:
208 if dend < end:
209 dend = end
210 if l:
211 dline.append(l)
212 else:
213 if dstart is not None:
214 delta.append([dstart, dend, "".join(dline)])
215 dstart = start
216 dend = end
217 dline = [l]
218
219 if dstart is not None:
220 delta.append([dstart, dend, "".join(dline)])
221 # apply the delta to the addlist, and get a delta for addrevision
222 deltatext, addlist = _addlistdelta(addlist, delta)
223 cachedelta = (self.rev(p1), deltatext)
224 arraytext = addlist
228 arraytext, deltatext = map.fastdelta(self._mancache[p1][1], work)
229 cachedelta = self.rev(p1), deltatext
225 230 text = util.buffer(arraytext)
226 231 else:
227 232 # The first parent manifest isn't already loaded, so we'll
228 233 # just encode a fulltext of the manifest and pass that
229 234 # through to the revlog layer, and let it handle the delta
230 235 # process.
231 236 text = map.text()
232 237 arraytext = array.array('c', text)
233 238 cachedelta = None
234 239
235 240 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
236 241 self._mancache[n] = (map, arraytext)
237 242
238 243 return n
General Comments 0
You need to be logged in to leave comments. Login now