##// END OF EJS Templates
manifestdict: add a new method to intersect with a set of files...
Siddharth Agarwal -
r21879:090dcaaf default
parent child Browse files
Show More
@@ -1,218 +1,230 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util, dicthelpers
9 import mdiff, parsers, error, revlog, util, dicthelpers
10 import array, struct
10 import array, struct
11
11
12 class manifestdict(dict):
12 class manifestdict(dict):
13 def __init__(self, mapping=None, flags=None):
13 def __init__(self, mapping=None, flags=None):
14 if mapping is None:
14 if mapping is None:
15 mapping = {}
15 mapping = {}
16 if flags is None:
16 if flags is None:
17 flags = {}
17 flags = {}
18 dict.__init__(self, mapping)
18 dict.__init__(self, mapping)
19 self._flags = flags
19 self._flags = flags
20 def flags(self, f):
20 def flags(self, f):
21 return self._flags.get(f, "")
21 return self._flags.get(f, "")
22 def withflags(self):
22 def withflags(self):
23 return set(self._flags.keys())
23 return set(self._flags.keys())
24 def set(self, f, flags):
24 def set(self, f, flags):
25 self._flags[f] = flags
25 self._flags[f] = flags
26 def copy(self):
26 def copy(self):
27 return manifestdict(self, dict.copy(self._flags))
27 return manifestdict(self, dict.copy(self._flags))
28 def intersectfiles(self, files):
29 '''make a new manifestdict with the intersection of self with files
30
31 The algorithm assumes that files is much smaller than self.'''
32 ret = manifestdict()
33 for fn in files:
34 if fn in self:
35 ret[fn] = self[fn]
36 flags = self._flags.get(fn, None)
37 if flags:
38 ret._flags[fn] = flags
39 return ret
28 def flagsdiff(self, d2):
40 def flagsdiff(self, d2):
29 return dicthelpers.diff(self._flags, d2._flags, "")
41 return dicthelpers.diff(self._flags, d2._flags, "")
30
42
31 class manifest(revlog.revlog):
43 class manifest(revlog.revlog):
32 def __init__(self, opener):
44 def __init__(self, opener):
33 # we expect to deal with not more than four revs at a time,
45 # we expect to deal with not more than four revs at a time,
34 # during a commit --amend
46 # during a commit --amend
35 self._mancache = util.lrucachedict(4)
47 self._mancache = util.lrucachedict(4)
36 revlog.revlog.__init__(self, opener, "00manifest.i")
48 revlog.revlog.__init__(self, opener, "00manifest.i")
37
49
38 def parse(self, lines):
50 def parse(self, lines):
39 mfdict = manifestdict()
51 mfdict = manifestdict()
40 parsers.parse_manifest(mfdict, mfdict._flags, lines)
52 parsers.parse_manifest(mfdict, mfdict._flags, lines)
41 return mfdict
53 return mfdict
42
54
43 def readdelta(self, node):
55 def readdelta(self, node):
44 r = self.rev(node)
56 r = self.rev(node)
45 return self.parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
57 return self.parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
46
58
47 def readfast(self, node):
59 def readfast(self, node):
48 '''use the faster of readdelta or read'''
60 '''use the faster of readdelta or read'''
49 r = self.rev(node)
61 r = self.rev(node)
50 deltaparent = self.deltaparent(r)
62 deltaparent = self.deltaparent(r)
51 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
63 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
52 return self.readdelta(node)
64 return self.readdelta(node)
53 return self.read(node)
65 return self.read(node)
54
66
55 def read(self, node):
67 def read(self, node):
56 if node == revlog.nullid:
68 if node == revlog.nullid:
57 return manifestdict() # don't upset local cache
69 return manifestdict() # don't upset local cache
58 if node in self._mancache:
70 if node in self._mancache:
59 return self._mancache[node][0]
71 return self._mancache[node][0]
60 text = self.revision(node)
72 text = self.revision(node)
61 arraytext = array.array('c', text)
73 arraytext = array.array('c', text)
62 mapping = self.parse(text)
74 mapping = self.parse(text)
63 self._mancache[node] = (mapping, arraytext)
75 self._mancache[node] = (mapping, arraytext)
64 return mapping
76 return mapping
65
77
66 def _search(self, m, s, lo=0, hi=None):
78 def _search(self, m, s, lo=0, hi=None):
67 '''return a tuple (start, end) that says where to find s within m.
79 '''return a tuple (start, end) that says where to find s within m.
68
80
69 If the string is found m[start:end] are the line containing
81 If the string is found m[start:end] are the line containing
70 that string. If start == end the string was not found and
82 that string. If start == end the string was not found and
71 they indicate the proper sorted insertion point.
83 they indicate the proper sorted insertion point.
72
84
73 m should be a buffer or a string
85 m should be a buffer or a string
74 s is a string'''
86 s is a string'''
75 def advance(i, c):
87 def advance(i, c):
76 while i < lenm and m[i] != c:
88 while i < lenm and m[i] != c:
77 i += 1
89 i += 1
78 return i
90 return i
79 if not s:
91 if not s:
80 return (lo, lo)
92 return (lo, lo)
81 lenm = len(m)
93 lenm = len(m)
82 if not hi:
94 if not hi:
83 hi = lenm
95 hi = lenm
84 while lo < hi:
96 while lo < hi:
85 mid = (lo + hi) // 2
97 mid = (lo + hi) // 2
86 start = mid
98 start = mid
87 while start > 0 and m[start - 1] != '\n':
99 while start > 0 and m[start - 1] != '\n':
88 start -= 1
100 start -= 1
89 end = advance(start, '\0')
101 end = advance(start, '\0')
90 if m[start:end] < s:
102 if m[start:end] < s:
91 # we know that after the null there are 40 bytes of sha1
103 # we know that after the null there are 40 bytes of sha1
92 # this translates to the bisect lo = mid + 1
104 # this translates to the bisect lo = mid + 1
93 lo = advance(end + 40, '\n') + 1
105 lo = advance(end + 40, '\n') + 1
94 else:
106 else:
95 # this translates to the bisect hi = mid
107 # this translates to the bisect hi = mid
96 hi = start
108 hi = start
97 end = advance(lo, '\0')
109 end = advance(lo, '\0')
98 found = m[lo:end]
110 found = m[lo:end]
99 if s == found:
111 if s == found:
100 # we know that after the null there are 40 bytes of sha1
112 # we know that after the null there are 40 bytes of sha1
101 end = advance(end + 40, '\n')
113 end = advance(end + 40, '\n')
102 return (lo, end + 1)
114 return (lo, end + 1)
103 else:
115 else:
104 return (lo, lo)
116 return (lo, lo)
105
117
106 def find(self, node, f):
118 def find(self, node, f):
107 '''look up entry for a single file efficiently.
119 '''look up entry for a single file efficiently.
108 return (node, flags) pair if found, (None, None) if not.'''
120 return (node, flags) pair if found, (None, None) if not.'''
109 if node in self._mancache:
121 if node in self._mancache:
110 mapping = self._mancache[node][0]
122 mapping = self._mancache[node][0]
111 return mapping.get(f), mapping.flags(f)
123 return mapping.get(f), mapping.flags(f)
112 text = self.revision(node)
124 text = self.revision(node)
113 start, end = self._search(text, f)
125 start, end = self._search(text, f)
114 if start == end:
126 if start == end:
115 return None, None
127 return None, None
116 l = text[start:end]
128 l = text[start:end]
117 f, n = l.split('\0')
129 f, n = l.split('\0')
118 return revlog.bin(n[:40]), n[40:-1]
130 return revlog.bin(n[:40]), n[40:-1]
119
131
120 def add(self, map, transaction, link, p1=None, p2=None,
132 def add(self, map, transaction, link, p1=None, p2=None,
121 changed=None):
133 changed=None):
122 # apply the changes collected during the bisect loop to our addlist
134 # apply the changes collected during the bisect loop to our addlist
123 # return a delta suitable for addrevision
135 # return a delta suitable for addrevision
124 def addlistdelta(addlist, x):
136 def addlistdelta(addlist, x):
125 # for large addlist arrays, building a new array is cheaper
137 # for large addlist arrays, building a new array is cheaper
126 # than repeatedly modifying the existing one
138 # than repeatedly modifying the existing one
127 currentposition = 0
139 currentposition = 0
128 newaddlist = array.array('c')
140 newaddlist = array.array('c')
129
141
130 for start, end, content in x:
142 for start, end, content in x:
131 newaddlist += addlist[currentposition:start]
143 newaddlist += addlist[currentposition:start]
132 if content:
144 if content:
133 newaddlist += array.array('c', content)
145 newaddlist += array.array('c', content)
134
146
135 currentposition = end
147 currentposition = end
136
148
137 newaddlist += addlist[currentposition:]
149 newaddlist += addlist[currentposition:]
138
150
139 deltatext = "".join(struct.pack(">lll", start, end, len(content))
151 deltatext = "".join(struct.pack(">lll", start, end, len(content))
140 + content for start, end, content in x)
152 + content for start, end, content in x)
141 return deltatext, newaddlist
153 return deltatext, newaddlist
142
154
143 def checkforbidden(l):
155 def checkforbidden(l):
144 for f in l:
156 for f in l:
145 if '\n' in f or '\r' in f:
157 if '\n' in f or '\r' in f:
146 raise error.RevlogError(
158 raise error.RevlogError(
147 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
159 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
148
160
149 # if we're using the cache, make sure it is valid and
161 # if we're using the cache, make sure it is valid and
150 # parented by the same node we're diffing against
162 # parented by the same node we're diffing against
151 if not (changed and p1 and (p1 in self._mancache)):
163 if not (changed and p1 and (p1 in self._mancache)):
152 files = sorted(map)
164 files = sorted(map)
153 checkforbidden(files)
165 checkforbidden(files)
154
166
155 # if this is changed to support newlines in filenames,
167 # if this is changed to support newlines in filenames,
156 # be sure to check the templates/ dir again (especially *-raw.tmpl)
168 # be sure to check the templates/ dir again (especially *-raw.tmpl)
157 hex, flags = revlog.hex, map.flags
169 hex, flags = revlog.hex, map.flags
158 text = ''.join("%s\0%s%s\n" % (f, hex(map[f]), flags(f))
170 text = ''.join("%s\0%s%s\n" % (f, hex(map[f]), flags(f))
159 for f in files)
171 for f in files)
160 arraytext = array.array('c', text)
172 arraytext = array.array('c', text)
161 cachedelta = None
173 cachedelta = None
162 else:
174 else:
163 added, removed = changed
175 added, removed = changed
164 addlist = self._mancache[p1][1]
176 addlist = self._mancache[p1][1]
165
177
166 checkforbidden(added)
178 checkforbidden(added)
167 # combine the changed lists into one list for sorting
179 # combine the changed lists into one list for sorting
168 work = [(x, False) for x in added]
180 work = [(x, False) for x in added]
169 work.extend((x, True) for x in removed)
181 work.extend((x, True) for x in removed)
170 # this could use heapq.merge() (from Python 2.6+) or equivalent
182 # this could use heapq.merge() (from Python 2.6+) or equivalent
171 # since the lists are already sorted
183 # since the lists are already sorted
172 work.sort()
184 work.sort()
173
185
174 delta = []
186 delta = []
175 dstart = None
187 dstart = None
176 dend = None
188 dend = None
177 dline = [""]
189 dline = [""]
178 start = 0
190 start = 0
179 # zero copy representation of addlist as a buffer
191 # zero copy representation of addlist as a buffer
180 addbuf = util.buffer(addlist)
192 addbuf = util.buffer(addlist)
181
193
182 # start with a readonly loop that finds the offset of
194 # start with a readonly loop that finds the offset of
183 # each line and creates the deltas
195 # each line and creates the deltas
184 for f, todelete in work:
196 for f, todelete in work:
185 # bs will either be the index of the item or the insert point
197 # bs will either be the index of the item or the insert point
186 start, end = self._search(addbuf, f, start)
198 start, end = self._search(addbuf, f, start)
187 if not todelete:
199 if not todelete:
188 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
200 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
189 else:
201 else:
190 if start == end:
202 if start == end:
191 # item we want to delete was not found, error out
203 # item we want to delete was not found, error out
192 raise AssertionError(
204 raise AssertionError(
193 _("failed to remove %s from manifest") % f)
205 _("failed to remove %s from manifest") % f)
194 l = ""
206 l = ""
195 if dstart is not None and dstart <= start and dend >= start:
207 if dstart is not None and dstart <= start and dend >= start:
196 if dend < end:
208 if dend < end:
197 dend = end
209 dend = end
198 if l:
210 if l:
199 dline.append(l)
211 dline.append(l)
200 else:
212 else:
201 if dstart is not None:
213 if dstart is not None:
202 delta.append([dstart, dend, "".join(dline)])
214 delta.append([dstart, dend, "".join(dline)])
203 dstart = start
215 dstart = start
204 dend = end
216 dend = end
205 dline = [l]
217 dline = [l]
206
218
207 if dstart is not None:
219 if dstart is not None:
208 delta.append([dstart, dend, "".join(dline)])
220 delta.append([dstart, dend, "".join(dline)])
209 # apply the delta to the addlist, and get a delta for addrevision
221 # apply the delta to the addlist, and get a delta for addrevision
210 deltatext, addlist = addlistdelta(addlist, delta)
222 deltatext, addlist = addlistdelta(addlist, delta)
211 cachedelta = (self.rev(p1), deltatext)
223 cachedelta = (self.rev(p1), deltatext)
212 arraytext = addlist
224 arraytext = addlist
213 text = util.buffer(arraytext)
225 text = util.buffer(arraytext)
214
226
215 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
227 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
216 self._mancache[n] = (map, arraytext)
228 self._mancache[n] = (map, arraytext)
217
229
218 return n
230 return n
General Comments 0
You need to be logged in to leave comments. Login now