##// END OF EJS Templates
manifest: move _search to module level and rename to _msearch...
Augie Fackler -
r22930:1acb81d1 default
parent child Browse files
Show More
@@ -1,238 +1,238
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util, dicthelpers
9 import mdiff, parsers, error, revlog, util, dicthelpers
10 import array, struct
10 import array, struct
11
11
12 class manifestdict(dict):
12 class manifestdict(dict):
13 def __init__(self, mapping=None, flags=None):
13 def __init__(self, mapping=None, flags=None):
14 if mapping is None:
14 if mapping is None:
15 mapping = {}
15 mapping = {}
16 if flags is None:
16 if flags is None:
17 flags = {}
17 flags = {}
18 dict.__init__(self, mapping)
18 dict.__init__(self, mapping)
19 self._flags = flags
19 self._flags = flags
20 def flags(self, f):
20 def flags(self, f):
21 return self._flags.get(f, "")
21 return self._flags.get(f, "")
22 def withflags(self):
22 def withflags(self):
23 return set(self._flags.keys())
23 return set(self._flags.keys())
24 def set(self, f, flags):
24 def set(self, f, flags):
25 self._flags[f] = flags
25 self._flags[f] = flags
26 def copy(self):
26 def copy(self):
27 return manifestdict(self, dict.copy(self._flags))
27 return manifestdict(self, dict.copy(self._flags))
28 def intersectfiles(self, files):
28 def intersectfiles(self, files):
29 '''make a new manifestdict with the intersection of self with files
29 '''make a new manifestdict with the intersection of self with files
30
30
31 The algorithm assumes that files is much smaller than self.'''
31 The algorithm assumes that files is much smaller than self.'''
32 ret = manifestdict()
32 ret = manifestdict()
33 for fn in files:
33 for fn in files:
34 if fn in self:
34 if fn in self:
35 ret[fn] = self[fn]
35 ret[fn] = self[fn]
36 flags = self._flags.get(fn, None)
36 flags = self._flags.get(fn, None)
37 if flags:
37 if flags:
38 ret._flags[fn] = flags
38 ret._flags[fn] = flags
39 return ret
39 return ret
40 def flagsdiff(self, d2):
40 def flagsdiff(self, d2):
41 return dicthelpers.diff(self._flags, d2._flags, "")
41 return dicthelpers.diff(self._flags, d2._flags, "")
42
42
43 def text(self):
43 def text(self):
44 fl = sorted(self)
44 fl = sorted(self)
45 _checkforbidden(fl)
45 _checkforbidden(fl)
46
46
47 hex, flags = revlog.hex, self.flags
47 hex, flags = revlog.hex, self.flags
48 # if this is changed to support newlines in filenames,
48 # if this is changed to support newlines in filenames,
49 # be sure to check the templates/ dir again (especially *-raw.tmpl)
49 # be sure to check the templates/ dir again (especially *-raw.tmpl)
50 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
50 return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
51
51
52 def _msearch(m, s, lo=0, hi=None):
53 '''return a tuple (start, end) that says where to find s within m.
54
55 If the string is found m[start:end] are the line containing
56 that string. If start == end the string was not found and
57 they indicate the proper sorted insertion point.
58
59 m should be a buffer or a string
60 s is a string'''
61 def advance(i, c):
62 while i < lenm and m[i] != c:
63 i += 1
64 return i
65 if not s:
66 return (lo, lo)
67 lenm = len(m)
68 if not hi:
69 hi = lenm
70 while lo < hi:
71 mid = (lo + hi) // 2
72 start = mid
73 while start > 0 and m[start - 1] != '\n':
74 start -= 1
75 end = advance(start, '\0')
76 if m[start:end] < s:
77 # we know that after the null there are 40 bytes of sha1
78 # this translates to the bisect lo = mid + 1
79 lo = advance(end + 40, '\n') + 1
80 else:
81 # this translates to the bisect hi = mid
82 hi = start
83 end = advance(lo, '\0')
84 found = m[lo:end]
85 if s == found:
86 # we know that after the null there are 40 bytes of sha1
87 end = advance(end + 40, '\n')
88 return (lo, end + 1)
89 else:
90 return (lo, lo)
91
52 def _checkforbidden(l):
92 def _checkforbidden(l):
53 """Check filenames for illegal characters."""
93 """Check filenames for illegal characters."""
54 for f in l:
94 for f in l:
55 if '\n' in f or '\r' in f:
95 if '\n' in f or '\r' in f:
56 raise error.RevlogError(
96 raise error.RevlogError(
57 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
97 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
58
98
59
99
60 # apply the changes collected during the bisect loop to our addlist
100 # apply the changes collected during the bisect loop to our addlist
61 # return a delta suitable for addrevision
101 # return a delta suitable for addrevision
62 def _addlistdelta(addlist, x):
102 def _addlistdelta(addlist, x):
63 # for large addlist arrays, building a new array is cheaper
103 # for large addlist arrays, building a new array is cheaper
64 # than repeatedly modifying the existing one
104 # than repeatedly modifying the existing one
65 currentposition = 0
105 currentposition = 0
66 newaddlist = array.array('c')
106 newaddlist = array.array('c')
67
107
68 for start, end, content in x:
108 for start, end, content in x:
69 newaddlist += addlist[currentposition:start]
109 newaddlist += addlist[currentposition:start]
70 if content:
110 if content:
71 newaddlist += array.array('c', content)
111 newaddlist += array.array('c', content)
72
112
73 currentposition = end
113 currentposition = end
74
114
75 newaddlist += addlist[currentposition:]
115 newaddlist += addlist[currentposition:]
76
116
77 deltatext = "".join(struct.pack(">lll", start, end, len(content))
117 deltatext = "".join(struct.pack(">lll", start, end, len(content))
78 + content for start, end, content in x)
118 + content for start, end, content in x)
79 return deltatext, newaddlist
119 return deltatext, newaddlist
80
120
81 def _parse(lines):
121 def _parse(lines):
82 mfdict = manifestdict()
122 mfdict = manifestdict()
83 parsers.parse_manifest(mfdict, mfdict._flags, lines)
123 parsers.parse_manifest(mfdict, mfdict._flags, lines)
84 return mfdict
124 return mfdict
85
125
86 class manifest(revlog.revlog):
126 class manifest(revlog.revlog):
87 def __init__(self, opener):
127 def __init__(self, opener):
88 # we expect to deal with not more than four revs at a time,
128 # we expect to deal with not more than four revs at a time,
89 # during a commit --amend
129 # during a commit --amend
90 self._mancache = util.lrucachedict(4)
130 self._mancache = util.lrucachedict(4)
91 revlog.revlog.__init__(self, opener, "00manifest.i")
131 revlog.revlog.__init__(self, opener, "00manifest.i")
92
132
93 def readdelta(self, node):
133 def readdelta(self, node):
94 r = self.rev(node)
134 r = self.rev(node)
95 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
135 return _parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
96
136
97 def readfast(self, node):
137 def readfast(self, node):
98 '''use the faster of readdelta or read'''
138 '''use the faster of readdelta or read'''
99 r = self.rev(node)
139 r = self.rev(node)
100 deltaparent = self.deltaparent(r)
140 deltaparent = self.deltaparent(r)
101 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
141 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
102 return self.readdelta(node)
142 return self.readdelta(node)
103 return self.read(node)
143 return self.read(node)
104
144
105 def read(self, node):
145 def read(self, node):
106 if node == revlog.nullid:
146 if node == revlog.nullid:
107 return manifestdict() # don't upset local cache
147 return manifestdict() # don't upset local cache
108 if node in self._mancache:
148 if node in self._mancache:
109 return self._mancache[node][0]
149 return self._mancache[node][0]
110 text = self.revision(node)
150 text = self.revision(node)
111 arraytext = array.array('c', text)
151 arraytext = array.array('c', text)
112 mapping = _parse(text)
152 mapping = _parse(text)
113 self._mancache[node] = (mapping, arraytext)
153 self._mancache[node] = (mapping, arraytext)
114 return mapping
154 return mapping
115
155
116 def _search(self, m, s, lo=0, hi=None):
117 '''return a tuple (start, end) that says where to find s within m.
118
119 If the string is found m[start:end] are the line containing
120 that string. If start == end the string was not found and
121 they indicate the proper sorted insertion point.
122
123 m should be a buffer or a string
124 s is a string'''
125 def advance(i, c):
126 while i < lenm and m[i] != c:
127 i += 1
128 return i
129 if not s:
130 return (lo, lo)
131 lenm = len(m)
132 if not hi:
133 hi = lenm
134 while lo < hi:
135 mid = (lo + hi) // 2
136 start = mid
137 while start > 0 and m[start - 1] != '\n':
138 start -= 1
139 end = advance(start, '\0')
140 if m[start:end] < s:
141 # we know that after the null there are 40 bytes of sha1
142 # this translates to the bisect lo = mid + 1
143 lo = advance(end + 40, '\n') + 1
144 else:
145 # this translates to the bisect hi = mid
146 hi = start
147 end = advance(lo, '\0')
148 found = m[lo:end]
149 if s == found:
150 # we know that after the null there are 40 bytes of sha1
151 end = advance(end + 40, '\n')
152 return (lo, end + 1)
153 else:
154 return (lo, lo)
155
156 def find(self, node, f):
156 def find(self, node, f):
157 '''look up entry for a single file efficiently.
157 '''look up entry for a single file efficiently.
158 return (node, flags) pair if found, (None, None) if not.'''
158 return (node, flags) pair if found, (None, None) if not.'''
159 if node in self._mancache:
159 if node in self._mancache:
160 mapping = self._mancache[node][0]
160 mapping = self._mancache[node][0]
161 return mapping.get(f), mapping.flags(f)
161 return mapping.get(f), mapping.flags(f)
162 text = self.revision(node)
162 text = self.revision(node)
163 start, end = self._search(text, f)
163 start, end = _msearch(text, f)
164 if start == end:
164 if start == end:
165 return None, None
165 return None, None
166 l = text[start:end]
166 l = text[start:end]
167 f, n = l.split('\0')
167 f, n = l.split('\0')
168 return revlog.bin(n[:40]), n[40:-1]
168 return revlog.bin(n[:40]), n[40:-1]
169
169
170 def add(self, map, transaction, link, p1, p2, added, removed):
170 def add(self, map, transaction, link, p1, p2, added, removed):
171 if p1 in self._mancache:
171 if p1 in self._mancache:
172 # If our first parent is in the manifest cache, we can
172 # If our first parent is in the manifest cache, we can
173 # compute a delta here using properties we know about the
173 # compute a delta here using properties we know about the
174 # manifest up-front, which may save time later for the
174 # manifest up-front, which may save time later for the
175 # revlog layer.
175 # revlog layer.
176 addlist = self._mancache[p1][1]
176 addlist = self._mancache[p1][1]
177
177
178 _checkforbidden(added)
178 _checkforbidden(added)
179 # combine the changed lists into one list for sorting
179 # combine the changed lists into one list for sorting
180 work = [(x, False) for x in added]
180 work = [(x, False) for x in added]
181 work.extend((x, True) for x in removed)
181 work.extend((x, True) for x in removed)
182 # this could use heapq.merge() (from Python 2.6+) or equivalent
182 # this could use heapq.merge() (from Python 2.6+) or equivalent
183 # since the lists are already sorted
183 # since the lists are already sorted
184 work.sort()
184 work.sort()
185
185
186 delta = []
186 delta = []
187 dstart = None
187 dstart = None
188 dend = None
188 dend = None
189 dline = [""]
189 dline = [""]
190 start = 0
190 start = 0
191 # zero copy representation of addlist as a buffer
191 # zero copy representation of addlist as a buffer
192 addbuf = util.buffer(addlist)
192 addbuf = util.buffer(addlist)
193
193
194 # start with a readonly loop that finds the offset of
194 # start with a readonly loop that finds the offset of
195 # each line and creates the deltas
195 # each line and creates the deltas
196 for f, todelete in work:
196 for f, todelete in work:
197 # bs will either be the index of the item or the insert point
197 # bs will either be the index of the item or the insert point
198 start, end = self._search(addbuf, f, start)
198 start, end = _msearch(addbuf, f, start)
199 if not todelete:
199 if not todelete:
200 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
200 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
201 else:
201 else:
202 if start == end:
202 if start == end:
203 # item we want to delete was not found, error out
203 # item we want to delete was not found, error out
204 raise AssertionError(
204 raise AssertionError(
205 _("failed to remove %s from manifest") % f)
205 _("failed to remove %s from manifest") % f)
206 l = ""
206 l = ""
207 if dstart is not None and dstart <= start and dend >= start:
207 if dstart is not None and dstart <= start and dend >= start:
208 if dend < end:
208 if dend < end:
209 dend = end
209 dend = end
210 if l:
210 if l:
211 dline.append(l)
211 dline.append(l)
212 else:
212 else:
213 if dstart is not None:
213 if dstart is not None:
214 delta.append([dstart, dend, "".join(dline)])
214 delta.append([dstart, dend, "".join(dline)])
215 dstart = start
215 dstart = start
216 dend = end
216 dend = end
217 dline = [l]
217 dline = [l]
218
218
219 if dstart is not None:
219 if dstart is not None:
220 delta.append([dstart, dend, "".join(dline)])
220 delta.append([dstart, dend, "".join(dline)])
221 # apply the delta to the addlist, and get a delta for addrevision
221 # apply the delta to the addlist, and get a delta for addrevision
222 deltatext, addlist = _addlistdelta(addlist, delta)
222 deltatext, addlist = _addlistdelta(addlist, delta)
223 cachedelta = (self.rev(p1), deltatext)
223 cachedelta = (self.rev(p1), deltatext)
224 arraytext = addlist
224 arraytext = addlist
225 text = util.buffer(arraytext)
225 text = util.buffer(arraytext)
226 else:
226 else:
227 # The first parent manifest isn't already loaded, so we'll
227 # The first parent manifest isn't already loaded, so we'll
228 # just encode a fulltext of the manifest and pass that
228 # just encode a fulltext of the manifest and pass that
229 # through to the revlog layer, and let it handle the delta
229 # through to the revlog layer, and let it handle the delta
230 # process.
230 # process.
231 text = map.text()
231 text = map.text()
232 arraytext = array.array('c', text)
232 arraytext = array.array('c', text)
233 cachedelta = None
233 cachedelta = None
234
234
235 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
235 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
236 self._mancache[n] = (map, arraytext)
236 self._mancache[n] = (map, arraytext)
237
237
238 return n
238 return n
General Comments 0
You need to be logged in to leave comments. Login now