##// END OF EJS Templates
manifest: use a size 3 LRU cache to store parsed manifests...
Siddharth Agarwal -
r18604:a1141f04 default
parent child Browse files
Show More
@@ -1,213 +1,215 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import mdiff, parsers, error, revlog, util
9 import mdiff, parsers, error, revlog, util
10 import array, struct
10 import array, struct
11
11
12 class manifestdict(dict):
12 class manifestdict(dict):
13 def __init__(self, mapping=None, flags=None):
13 def __init__(self, mapping=None, flags=None):
14 if mapping is None:
14 if mapping is None:
15 mapping = {}
15 mapping = {}
16 if flags is None:
16 if flags is None:
17 flags = {}
17 flags = {}
18 dict.__init__(self, mapping)
18 dict.__init__(self, mapping)
19 self._flags = flags
19 self._flags = flags
20 def flags(self, f):
20 def flags(self, f):
21 return self._flags.get(f, "")
21 return self._flags.get(f, "")
22 def withflags(self):
22 def withflags(self):
23 return set(self._flags.keys())
23 return set(self._flags.keys())
24 def set(self, f, flags):
24 def set(self, f, flags):
25 self._flags[f] = flags
25 self._flags[f] = flags
26 def copy(self):
26 def copy(self):
27 return manifestdict(self, dict.copy(self._flags))
27 return manifestdict(self, dict.copy(self._flags))
28
28
29 class manifest(revlog.revlog):
29 class manifest(revlog.revlog):
30 def __init__(self, opener):
30 def __init__(self, opener):
31 self._mancache = None
31 # we expect to deal with not more than three revs at a time in merge
32 self._mancache = util.lrucachedict(3)
32 revlog.revlog.__init__(self, opener, "00manifest.i")
33 revlog.revlog.__init__(self, opener, "00manifest.i")
33
34
34 def parse(self, lines):
35 def parse(self, lines):
35 mfdict = manifestdict()
36 mfdict = manifestdict()
36 parsers.parse_manifest(mfdict, mfdict._flags, lines)
37 parsers.parse_manifest(mfdict, mfdict._flags, lines)
37 return mfdict
38 return mfdict
38
39
39 def readdelta(self, node):
40 def readdelta(self, node):
40 r = self.rev(node)
41 r = self.rev(node)
41 return self.parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
42 return self.parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
42
43
43 def readfast(self, node):
44 def readfast(self, node):
44 '''use the faster of readdelta or read'''
45 '''use the faster of readdelta or read'''
45 r = self.rev(node)
46 r = self.rev(node)
46 deltaparent = self.deltaparent(r)
47 deltaparent = self.deltaparent(r)
47 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
48 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
48 return self.readdelta(node)
49 return self.readdelta(node)
49 return self.read(node)
50 return self.read(node)
50
51
51 def read(self, node):
52 def read(self, node):
52 if node == revlog.nullid:
53 if node == revlog.nullid:
53 return manifestdict() # don't upset local cache
54 return manifestdict() # don't upset local cache
54 if self._mancache and self._mancache[0] == node:
55 if node in self._mancache:
55 return self._mancache[1]
56 return self._mancache[node][0]
56 text = self.revision(node)
57 text = self.revision(node)
57 arraytext = array.array('c', text)
58 arraytext = array.array('c', text)
58 mapping = self.parse(text)
59 mapping = self.parse(text)
59 self._mancache = (node, mapping, arraytext)
60 self._mancache[node] = (mapping, arraytext)
60 return mapping
61 return mapping
61
62
62 def _search(self, m, s, lo=0, hi=None):
63 def _search(self, m, s, lo=0, hi=None):
63 '''return a tuple (start, end) that says where to find s within m.
64 '''return a tuple (start, end) that says where to find s within m.
64
65
65 If the string is found m[start:end] are the line containing
66 If the string is found m[start:end] are the line containing
66 that string. If start == end the string was not found and
67 that string. If start == end the string was not found and
67 they indicate the proper sorted insertion point.
68 they indicate the proper sorted insertion point.
68
69
69 m should be a buffer or a string
70 m should be a buffer or a string
70 s is a string'''
71 s is a string'''
71 def advance(i, c):
72 def advance(i, c):
72 while i < lenm and m[i] != c:
73 while i < lenm and m[i] != c:
73 i += 1
74 i += 1
74 return i
75 return i
75 if not s:
76 if not s:
76 return (lo, lo)
77 return (lo, lo)
77 lenm = len(m)
78 lenm = len(m)
78 if not hi:
79 if not hi:
79 hi = lenm
80 hi = lenm
80 while lo < hi:
81 while lo < hi:
81 mid = (lo + hi) // 2
82 mid = (lo + hi) // 2
82 start = mid
83 start = mid
83 while start > 0 and m[start - 1] != '\n':
84 while start > 0 and m[start - 1] != '\n':
84 start -= 1
85 start -= 1
85 end = advance(start, '\0')
86 end = advance(start, '\0')
86 if m[start:end] < s:
87 if m[start:end] < s:
87 # we know that after the null there are 40 bytes of sha1
88 # we know that after the null there are 40 bytes of sha1
88 # this translates to the bisect lo = mid + 1
89 # this translates to the bisect lo = mid + 1
89 lo = advance(end + 40, '\n') + 1
90 lo = advance(end + 40, '\n') + 1
90 else:
91 else:
91 # this translates to the bisect hi = mid
92 # this translates to the bisect hi = mid
92 hi = start
93 hi = start
93 end = advance(lo, '\0')
94 end = advance(lo, '\0')
94 found = m[lo:end]
95 found = m[lo:end]
95 if s == found:
96 if s == found:
96 # we know that after the null there are 40 bytes of sha1
97 # we know that after the null there are 40 bytes of sha1
97 end = advance(end + 40, '\n')
98 end = advance(end + 40, '\n')
98 return (lo, end + 1)
99 return (lo, end + 1)
99 else:
100 else:
100 return (lo, lo)
101 return (lo, lo)
101
102
102 def find(self, node, f):
103 def find(self, node, f):
103 '''look up entry for a single file efficiently.
104 '''look up entry for a single file efficiently.
104 return (node, flags) pair if found, (None, None) if not.'''
105 return (node, flags) pair if found, (None, None) if not.'''
105 if self._mancache and self._mancache[0] == node:
106 if node in self._mancache:
106 return self._mancache[1].get(f), self._mancache[1].flags(f)
107 mapping = self._mancache[node][0]
108 return mapping.get(f), mapping.flags(f)
107 text = self.revision(node)
109 text = self.revision(node)
108 start, end = self._search(text, f)
110 start, end = self._search(text, f)
109 if start == end:
111 if start == end:
110 return None, None
112 return None, None
111 l = text[start:end]
113 l = text[start:end]
112 f, n = l.split('\0')
114 f, n = l.split('\0')
113 return revlog.bin(n[:40]), n[40:-1]
115 return revlog.bin(n[:40]), n[40:-1]
114
116
115 def add(self, map, transaction, link, p1=None, p2=None,
117 def add(self, map, transaction, link, p1=None, p2=None,
116 changed=None):
118 changed=None):
117 # apply the changes collected during the bisect loop to our addlist
119 # apply the changes collected during the bisect loop to our addlist
118 # return a delta suitable for addrevision
120 # return a delta suitable for addrevision
119 def addlistdelta(addlist, x):
121 def addlistdelta(addlist, x):
120 # for large addlist arrays, building a new array is cheaper
122 # for large addlist arrays, building a new array is cheaper
121 # than repeatedly modifying the existing one
123 # than repeatedly modifying the existing one
122 currentposition = 0
124 currentposition = 0
123 newaddlist = array.array('c')
125 newaddlist = array.array('c')
124
126
125 for start, end, content in x:
127 for start, end, content in x:
126 newaddlist += addlist[currentposition:start]
128 newaddlist += addlist[currentposition:start]
127 if content:
129 if content:
128 newaddlist += array.array('c', content)
130 newaddlist += array.array('c', content)
129
131
130 currentposition = end
132 currentposition = end
131
133
132 newaddlist += addlist[currentposition:]
134 newaddlist += addlist[currentposition:]
133
135
134 deltatext = "".join(struct.pack(">lll", start, end, len(content))
136 deltatext = "".join(struct.pack(">lll", start, end, len(content))
135 + content for start, end, content in x)
137 + content for start, end, content in x)
136 return deltatext, newaddlist
138 return deltatext, newaddlist
137
139
138 def checkforbidden(l):
140 def checkforbidden(l):
139 for f in l:
141 for f in l:
140 if '\n' in f or '\r' in f:
142 if '\n' in f or '\r' in f:
141 raise error.RevlogError(
143 raise error.RevlogError(
142 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
144 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
143
145
144 # if we're using the cache, make sure it is valid and
146 # if we're using the cache, make sure it is valid and
145 # parented by the same node we're diffing against
147 # parented by the same node we're diffing against
146 if not (changed and self._mancache and p1 and self._mancache[0] == p1):
148 if not (changed and p1 and (p1 in self._mancache)):
147 files = sorted(map)
149 files = sorted(map)
148 checkforbidden(files)
150 checkforbidden(files)
149
151
150 # if this is changed to support newlines in filenames,
152 # if this is changed to support newlines in filenames,
151 # be sure to check the templates/ dir again (especially *-raw.tmpl)
153 # be sure to check the templates/ dir again (especially *-raw.tmpl)
152 hex, flags = revlog.hex, map.flags
154 hex, flags = revlog.hex, map.flags
153 text = ''.join("%s\0%s%s\n" % (f, hex(map[f]), flags(f))
155 text = ''.join("%s\0%s%s\n" % (f, hex(map[f]), flags(f))
154 for f in files)
156 for f in files)
155 arraytext = array.array('c', text)
157 arraytext = array.array('c', text)
156 cachedelta = None
158 cachedelta = None
157 else:
159 else:
158 added, removed = changed
160 added, removed = changed
159 addlist = self._mancache[2]
161 addlist = self._mancache[p1][1]
160
162
161 checkforbidden(added)
163 checkforbidden(added)
162 # combine the changed lists into one list for sorting
164 # combine the changed lists into one list for sorting
163 work = [(x, False) for x in added]
165 work = [(x, False) for x in added]
164 work.extend((x, True) for x in removed)
166 work.extend((x, True) for x in removed)
165 # this could use heapq.merge() (from Python 2.6+) or equivalent
167 # this could use heapq.merge() (from Python 2.6+) or equivalent
166 # since the lists are already sorted
168 # since the lists are already sorted
167 work.sort()
169 work.sort()
168
170
169 delta = []
171 delta = []
170 dstart = None
172 dstart = None
171 dend = None
173 dend = None
172 dline = [""]
174 dline = [""]
173 start = 0
175 start = 0
174 # zero copy representation of addlist as a buffer
176 # zero copy representation of addlist as a buffer
175 addbuf = util.buffer(addlist)
177 addbuf = util.buffer(addlist)
176
178
177 # start with a readonly loop that finds the offset of
179 # start with a readonly loop that finds the offset of
178 # each line and creates the deltas
180 # each line and creates the deltas
179 for f, todelete in work:
181 for f, todelete in work:
180 # bs will either be the index of the item or the insert point
182 # bs will either be the index of the item or the insert point
181 start, end = self._search(addbuf, f, start)
183 start, end = self._search(addbuf, f, start)
182 if not todelete:
184 if not todelete:
183 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
185 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
184 else:
186 else:
185 if start == end:
187 if start == end:
186 # item we want to delete was not found, error out
188 # item we want to delete was not found, error out
187 raise AssertionError(
189 raise AssertionError(
188 _("failed to remove %s from manifest") % f)
190 _("failed to remove %s from manifest") % f)
189 l = ""
191 l = ""
190 if dstart is not None and dstart <= start and dend >= start:
192 if dstart is not None and dstart <= start and dend >= start:
191 if dend < end:
193 if dend < end:
192 dend = end
194 dend = end
193 if l:
195 if l:
194 dline.append(l)
196 dline.append(l)
195 else:
197 else:
196 if dstart is not None:
198 if dstart is not None:
197 delta.append([dstart, dend, "".join(dline)])
199 delta.append([dstart, dend, "".join(dline)])
198 dstart = start
200 dstart = start
199 dend = end
201 dend = end
200 dline = [l]
202 dline = [l]
201
203
202 if dstart is not None:
204 if dstart is not None:
203 delta.append([dstart, dend, "".join(dline)])
205 delta.append([dstart, dend, "".join(dline)])
204 # apply the delta to the addlist, and get a delta for addrevision
206 # apply the delta to the addlist, and get a delta for addrevision
205 deltatext, addlist = addlistdelta(addlist, delta)
207 deltatext, addlist = addlistdelta(addlist, delta)
206 cachedelta = (self.rev(p1), deltatext)
208 cachedelta = (self.rev(p1), deltatext)
207 arraytext = addlist
209 arraytext = addlist
208 text = util.buffer(arraytext)
210 text = util.buffer(arraytext)
209
211
210 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
212 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
211 self._mancache = (n, map, arraytext)
213 self._mancache[n] = (map, arraytext)
212
214
213 return n
215 return n
General Comments 0
You need to be logged in to leave comments. Login now