##// END OF EJS Templates
manifest: add fastdelta method to manifestdict...
Augie Fackler -
r22931:48c0b101 default
parent child Browse files
Show More
@@ -1,238 +1,243 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util, dicthelpers
9 import mdiff, parsers, error, revlog, util, dicthelpers
10 import array, struct
10 import array, struct
11
11
12 class manifestdict(dict):
12 class manifestdict(dict):
13 def __init__(self, mapping=None, flags=None):
13 def __init__(self, mapping=None, flags=None):
14 if mapping is None:
14 if mapping is None:
15 mapping = {}
15 mapping = {}
16 if flags is None:
16 if flags is None:
17 flags = {}
17 flags = {}
18 dict.__init__(self, mapping)
18 dict.__init__(self, mapping)
19 self._flags = flags
19 self._flags = flags
20 def flags(self, f):
20 def flags(self, f):
21 return self._flags.get(f, "")
21 return self._flags.get(f, "")
22 def withflags(self):
22 def withflags(self):
23 return set(self._flags.keys())
23 return set(self._flags.keys())
24 def set(self, f, flags):
24 def set(self, f, flags):
25 self._flags[f] = flags
25 self._flags[f] = flags
26 def copy(self):
26 def copy(self):
27 return manifestdict(self, dict.copy(self._flags))
27 return manifestdict(self, dict.copy(self._flags))
28 def intersectfiles(self, files):
28 def intersectfiles(self, files):
29 '''make a new manifestdict with the intersection of self with files
29 '''make a new manifestdict with the intersection of self with files
30
30
31 The algorithm assumes that files is much smaller than self.'''
31 The algorithm assumes that files is much smaller than self.'''
32 ret = manifestdict()
32 ret = manifestdict()
33 for fn in files:
33 for fn in files:
34 if fn in self:
34 if fn in self:
35 ret[fn] = self[fn]
35 ret[fn] = self[fn]
36 flags = self._flags.get(fn, None)
36 flags = self._flags.get(fn, None)
37 if flags:
37 if flags:
38 ret._flags[fn] = flags
38 ret._flags[fn] = flags
39 return ret
39 return ret
40 def flagsdiff(self, d2):
40 def flagsdiff(self, d2):
41 return dicthelpers.diff(self._flags, d2._flags, "")
41 return dicthelpers.diff(self._flags, d2._flags, "")
42
42
43 def text(self):
43 def text(self):
44 fl = sorted(self)
44 fl = sorted(self)
45 _checkforbidden(fl)
45 _checkforbidden(fl)
46
46
47 hex, flags = revlog.hex, self.flags
47 hex, flags = revlog.hex, self.flags
48 # if this is changed to support newlines in filenames,
48 # if this is changed to support newlines in filenames,
49 # be sure to check the templates/ dir again (especially *-raw.tmpl)
49 # be sure to check the templates/ dir again (especially *-raw.tmpl)
50 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
50 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
51
51
52 def fastdelta(self, base, changes):
53 """Given a base manifest text as an array.array and a list of changes
54 relative to that text, compute a delta that can be used by revlog.
55 """
56 delta = []
57 dstart = None
58 dend = None
59 dline = [""]
60 start = 0
61 # zero copy representation of base as a buffer
62 addbuf = util.buffer(base)
63
64 # start with a readonly loop that finds the offset of
65 # each line and creates the deltas
66 for f, todelete in changes:
67 # bs will either be the index of the item or the insert point
68 start, end = _msearch(addbuf, f, start)
69 if not todelete:
70 l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
71 else:
72 if start == end:
73 # item we want to delete was not found, error out
74 raise AssertionError(
75 _("failed to remove %s from manifest") % f)
76 l = ""
77 if dstart is not None and dstart <= start and dend >= start:
78 if dend < end:
79 dend = end
80 if l:
81 dline.append(l)
82 else:
83 if dstart is not None:
84 delta.append([dstart, dend, "".join(dline)])
85 dstart = start
86 dend = end
87 dline = [l]
88
89 if dstart is not None:
90 delta.append([dstart, dend, "".join(dline)])
91 # apply the delta to the base, and get a delta for addrevision
92 deltatext, arraytext = _addlistdelta(base, delta)
93 return arraytext, deltatext
94
52 def _msearch(m, s, lo=0, hi=None):
95 def _msearch(m, s, lo=0, hi=None):
53 '''return a tuple (start, end) that says where to find s within m.
96 '''return a tuple (start, end) that says where to find s within m.
54
97
55 If the string is found m[start:end] are the line containing
98 If the string is found m[start:end] are the line containing
56 that string. If start == end the string was not found and
99 that string. If start == end the string was not found and
57 they indicate the proper sorted insertion point.
100 they indicate the proper sorted insertion point.
58
101
59 m should be a buffer or a string
102 m should be a buffer or a string
60 s is a string'''
103 s is a string'''
61 def advance(i, c):
104 def advance(i, c):
62 while i < lenm and m[i] != c:
105 while i < lenm and m[i] != c:
63 i += 1
106 i += 1
64 return i
107 return i
65 if not s:
108 if not s:
66 return (lo, lo)
109 return (lo, lo)
67 lenm = len(m)
110 lenm = len(m)
68 if not hi:
111 if not hi:
69 hi = lenm
112 hi = lenm
70 while lo < hi:
113 while lo < hi:
71 mid = (lo + hi) // 2
114 mid = (lo + hi) // 2
72 start = mid
115 start = mid
73 while start > 0 and m[start - 1] != '\n':
116 while start > 0 and m[start - 1] != '\n':
74 start -= 1
117 start -= 1
75 end = advance(start, '\0')
118 end = advance(start, '\0')
76 if m[start:end] < s:
119 if m[start:end] < s:
77 # we know that after the null there are 40 bytes of sha1
120 # we know that after the null there are 40 bytes of sha1
78 # this translates to the bisect lo = mid + 1
121 # this translates to the bisect lo = mid + 1
79 lo = advance(end + 40, '\n') + 1
122 lo = advance(end + 40, '\n') + 1
80 else:
123 else:
81 # this translates to the bisect hi = mid
124 # this translates to the bisect hi = mid
82 hi = start
125 hi = start
83 end = advance(lo, '\0')
126 end = advance(lo, '\0')
84 found = m[lo:end]
127 found = m[lo:end]
85 if s == found:
128 if s == found:
86 # we know that after the null there are 40 bytes of sha1
129 # we know that after the null there are 40 bytes of sha1
87 end = advance(end + 40, '\n')
130 end = advance(end + 40, '\n')
88 return (lo, end + 1)
131 return (lo, end + 1)
89 else:
132 else:
90 return (lo, lo)
133 return (lo, lo)
91
134
92 def _checkforbidden(l):
135 def _checkforbidden(l):
93 """Check filenames for illegal characters."""
136 """Check filenames for illegal characters."""
94 for f in l:
137 for f in l:
95 if '\n' in f or '\r' in f:
138 if '\n' in f or '\r' in f:
96 raise error.RevlogError(
139 raise error.RevlogError(
97 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
140 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
98
141
99
142
100 # apply the changes collected during the bisect loop to our addlist
143 # apply the changes collected during the bisect loop to our addlist
101 # return a delta suitable for addrevision
144 # return a delta suitable for addrevision
102 def _addlistdelta(addlist, x):
145 def _addlistdelta(addlist, x):
103 # for large addlist arrays, building a new array is cheaper
146 # for large addlist arrays, building a new array is cheaper
104 # than repeatedly modifying the existing one
147 # than repeatedly modifying the existing one
105 currentposition = 0
148 currentposition = 0
106 newaddlist = array.array('c')
149 newaddlist = array.array('c')
107
150
108 for start, end, content in x:
151 for start, end, content in x:
109 newaddlist += addlist[currentposition:start]
152 newaddlist += addlist[currentposition:start]
110 if content:
153 if content:
111 newaddlist += array.array('c', content)
154 newaddlist += array.array('c', content)
112
155
113 currentposition = end
156 currentposition = end
114
157
115 newaddlist += addlist[currentposition:]
158 newaddlist += addlist[currentposition:]
116
159
117 deltatext = "".join(struct.pack(">lll", start, end, len(content))
160 deltatext = "".join(struct.pack(">lll", start, end, len(content))
118 + content for start, end, content in x)
161 + content for start, end, content in x)
119 return deltatext, newaddlist
162 return deltatext, newaddlist
120
163
121 def _parse(lines):
164 def _parse(lines):
122 mfdict = manifestdict()
165 mfdict = manifestdict()
123 parsers.parse_manifest(mfdict, mfdict._flags, lines)
166 parsers.parse_manifest(mfdict, mfdict._flags, lines)
124 return mfdict
167 return mfdict
125
168
126 class manifest(revlog.revlog):
169 class manifest(revlog.revlog):
127 def __init__(self, opener):
170 def __init__(self, opener):
128 # we expect to deal with not more than four revs at a time,
171 # we expect to deal with not more than four revs at a time,
129 # during a commit --amend
172 # during a commit --amend
130 self._mancache = util.lrucachedict(4)
173 self._mancache = util.lrucachedict(4)
131 revlog.revlog.__init__(self, opener, "00manifest.i")
174 revlog.revlog.__init__(self, opener, "00manifest.i")
132
175
133 def readdelta(self, node):
176 def readdelta(self, node):
134 r = self.rev(node)
177 r = self.rev(node)
135 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
178 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
136
179
137 def readfast(self, node):
180 def readfast(self, node):
138 '''use the faster of readdelta or read'''
181 '''use the faster of readdelta or read'''
139 r = self.rev(node)
182 r = self.rev(node)
140 deltaparent = self.deltaparent(r)
183 deltaparent = self.deltaparent(r)
141 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
184 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
142 return self.readdelta(node)
185 return self.readdelta(node)
143 return self.read(node)
186 return self.read(node)
144
187
145 def read(self, node):
188 def read(self, node):
146 if node == revlog.nullid:
189 if node == revlog.nullid:
147 return manifestdict() # don't upset local cache
190 return manifestdict() # don't upset local cache
148 if node in self._mancache:
191 if node in self._mancache:
149 return self._mancache[node][0]
192 return self._mancache[node][0]
150 text = self.revision(node)
193 text = self.revision(node)
151 arraytext = array.array('c', text)
194 arraytext = array.array('c', text)
152 mapping = _parse(text)
195 mapping = _parse(text)
153 self._mancache[node] = (mapping, arraytext)
196 self._mancache[node] = (mapping, arraytext)
154 return mapping
197 return mapping
155
198
156 def find(self, node, f):
199 def find(self, node, f):
157 '''look up entry for a single file efficiently.
200 '''look up entry for a single file efficiently.
158 return (node, flags) pair if found, (None, None) if not.'''
201 return (node, flags) pair if found, (None, None) if not.'''
159 if node in self._mancache:
202 if node in self._mancache:
160 mapping = self._mancache[node][0]
203 mapping = self._mancache[node][0]
161 return mapping.get(f), mapping.flags(f)
204 return mapping.get(f), mapping.flags(f)
162 text = self.revision(node)
205 text = self.revision(node)
163 start, end = _msearch(text, f)
206 start, end = _msearch(text, f)
164 if start == end:
207 if start == end:
165 return None, None
208 return None, None
166 l = text[start:end]
209 l = text[start:end]
167 f, n = l.split('\0')
210 f, n = l.split('\0')
168 return revlog.bin(n[:40]), n[40:-1]
211 return revlog.bin(n[:40]), n[40:-1]
169
212
170 def add(self, map, transaction, link, p1, p2, added, removed):
213 def add(self, map, transaction, link, p1, p2, added, removed):
171 if p1 in self._mancache:
214 if p1 in self._mancache:
172 # If our first parent is in the manifest cache, we can
215 # If our first parent is in the manifest cache, we can
173 # compute a delta here using properties we know about the
216 # compute a delta here using properties we know about the
174 # manifest up-front, which may save time later for the
217 # manifest up-front, which may save time later for the
175 # revlog layer.
218 # revlog layer.
176 addlist = self._mancache[p1][1]
177
219
178 _checkforbidden(added)
220 _checkforbidden(added)
179 # combine the changed lists into one list for sorting
221 # combine the changed lists into one list for sorting
180 work = [(x, False) for x in added]
222 work = [(x, False) for x in added]
181 work.extend((x, True) for x in removed)
223 work.extend((x, True) for x in removed)
182 # this could use heapq.merge() (from Python 2.6+) or equivalent
224 # this could use heapq.merge() (from Python 2.6+) or equivalent
183 # since the lists are already sorted
225 # since the lists are already sorted
184 work.sort()
226 work.sort()
185
227
186 delta = []
228 arraytext, deltatext = map.fastdelta(self._mancache[p1][1], work)
187 dstart = None
229 cachedelta = self.rev(p1), deltatext
188 dend = None
189 dline = [""]
190 start = 0
191 # zero copy representation of addlist as a buffer
192 addbuf = util.buffer(addlist)
193
194 # start with a readonly loop that finds the offset of
195 # each line and creates the deltas
196 for f, todelete in work:
197 # bs will either be the index of the item or the insert point
198 start, end = _msearch(addbuf, f, start)
199 if not todelete:
200 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
201 else:
202 if start == end:
203 # item we want to delete was not found, error out
204 raise AssertionError(
205 _("failed to remove %s from manifest") % f)
206 l = ""
207 if dstart is not None and dstart <= start and dend >= start:
208 if dend < end:
209 dend = end
210 if l:
211 dline.append(l)
212 else:
213 if dstart is not None:
214 delta.append([dstart, dend, "".join(dline)])
215 dstart = start
216 dend = end
217 dline = [l]
218
219 if dstart is not None:
220 delta.append([dstart, dend, "".join(dline)])
221 # apply the delta to the addlist, and get a delta for addrevision
222 deltatext, addlist = _addlistdelta(addlist, delta)
223 cachedelta = (self.rev(p1), deltatext)
224 arraytext = addlist
225 text = util.buffer(arraytext)
230 text = util.buffer(arraytext)
226 else:
231 else:
227 # The first parent manifest isn't already loaded, so we'll
232 # The first parent manifest isn't already loaded, so we'll
228 # just encode a fulltext of the manifest and pass that
233 # just encode a fulltext of the manifest and pass that
229 # through to the revlog layer, and let it handle the delta
234 # through to the revlog layer, and let it handle the delta
230 # process.
235 # process.
231 text = map.text()
236 text = map.text()
232 arraytext = array.array('c', text)
237 arraytext = array.array('c', text)
233 cachedelta = None
238 cachedelta = None
234
239
235 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
240 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
236 self._mancache[n] = (map, arraytext)
241 self._mancache[n] = (map, arraytext)
237
242
238 return n
243 return n
General Comments 0
You need to be logged in to leave comments. Login now