##// END OF EJS Templates
manifest: minor performance tweak
Matt Mackall -
r5328:8d00788c default
parent child Browse files
Show More
@@ -1,212 +1,213 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from revlog import *
8 from revlog import *
9 from i18n import _
9 from i18n import _
10 import array, bisect, struct, mdiff
10 import array, bisect, struct, mdiff
11
11
12 class manifestdict(dict):
12 class manifestdict(dict):
13 def __init__(self, mapping=None, flags=None):
13 def __init__(self, mapping=None, flags=None):
14 if mapping is None: mapping = {}
14 if mapping is None: mapping = {}
15 if flags is None: flags = {}
15 if flags is None: flags = {}
16 dict.__init__(self, mapping)
16 dict.__init__(self, mapping)
17 self._flags = flags
17 self._flags = flags
18 def flags(self, f):
18 def flags(self, f):
19 return self._flags.get(f, "")
19 return self._flags.get(f, "")
20 def execf(self, f):
20 def execf(self, f):
21 "test for executable in manifest flags"
21 "test for executable in manifest flags"
22 return "x" in self.flags(f)
22 return "x" in self.flags(f)
23 def linkf(self, f):
23 def linkf(self, f):
24 "test for symlink in manifest flags"
24 "test for symlink in manifest flags"
25 return "l" in self.flags(f)
25 return "l" in self.flags(f)
26 def set(self, f, execf=False, linkf=False):
26 def set(self, f, execf=False, linkf=False):
27 if linkf: self._flags[f] = "l"
27 if linkf: self._flags[f] = "l"
28 elif execf: self._flags[f] = "x"
28 elif execf: self._flags[f] = "x"
29 else: self._flags[f] = ""
29 else: self._flags[f] = ""
30 def copy(self):
30 def copy(self):
31 return manifestdict(dict.copy(self), dict.copy(self._flags))
31 return manifestdict(dict.copy(self), dict.copy(self._flags))
32
32
33 class manifest(revlog):
33 class manifest(revlog):
34 def __init__(self, opener):
34 def __init__(self, opener):
35 self.mapcache = None
35 self.mapcache = None
36 self.listcache = None
36 self.listcache = None
37 revlog.__init__(self, opener, "00manifest.i")
37 revlog.__init__(self, opener, "00manifest.i")
38
38
39 def parse(self, lines):
39 def parse(self, lines):
40 mfdict = manifestdict()
40 mfdict = manifestdict()
41 fdict = mfdict._flags
41 for l in lines.splitlines():
42 for l in lines.splitlines():
42 f, n = l.split('\0')
43 f, n = l.split('\0')
43 if len(n) > 40:
44 if len(n) > 40:
44 mfdict._flags[f] = n[40:]
45 fdict[f] = n[40:]
45 mfdict[f] = bin(n[:40])
46 mfdict[f] = bin(n[:40])
46 else:
47 else:
47 mfdict[f] = bin(n)
48 mfdict[f] = bin(n)
48 return mfdict
49 return mfdict
49
50
50 def readdelta(self, node):
51 def readdelta(self, node):
51 return self.parse(mdiff.patchtext(self.delta(node)))
52 return self.parse(mdiff.patchtext(self.delta(node)))
52
53
53 def read(self, node):
54 def read(self, node):
54 if node == nullid: return manifestdict() # don't upset local cache
55 if node == nullid: return manifestdict() # don't upset local cache
55 if self.mapcache and self.mapcache[0] == node:
56 if self.mapcache and self.mapcache[0] == node:
56 return self.mapcache[1]
57 return self.mapcache[1]
57 text = self.revision(node)
58 text = self.revision(node)
58 self.listcache = array.array('c', text)
59 self.listcache = array.array('c', text)
59 mapping = self.parse(text)
60 mapping = self.parse(text)
60 self.mapcache = (node, mapping)
61 self.mapcache = (node, mapping)
61 return mapping
62 return mapping
62
63
63 def _search(self, m, s, lo=0, hi=None):
64 def _search(self, m, s, lo=0, hi=None):
64 '''return a tuple (start, end) that says where to find s within m.
65 '''return a tuple (start, end) that says where to find s within m.
65
66
66 If the string is found m[start:end] are the line containing
67 If the string is found m[start:end] are the line containing
67 that string. If start == end the string was not found and
68 that string. If start == end the string was not found and
68 they indicate the proper sorted insertion point. This was
69 they indicate the proper sorted insertion point. This was
69 taken from bisect_left, and modified to find line start/end as
70 taken from bisect_left, and modified to find line start/end as
70 it goes along.
71 it goes along.
71
72
72 m should be a buffer or a string
73 m should be a buffer or a string
73 s is a string'''
74 s is a string'''
74 def advance(i, c):
75 def advance(i, c):
75 while i < lenm and m[i] != c:
76 while i < lenm and m[i] != c:
76 i += 1
77 i += 1
77 return i
78 return i
78 lenm = len(m)
79 lenm = len(m)
79 if not hi:
80 if not hi:
80 hi = lenm
81 hi = lenm
81 while lo < hi:
82 while lo < hi:
82 mid = (lo + hi) // 2
83 mid = (lo + hi) // 2
83 start = mid
84 start = mid
84 while start > 0 and m[start-1] != '\n':
85 while start > 0 and m[start-1] != '\n':
85 start -= 1
86 start -= 1
86 end = advance(start, '\0')
87 end = advance(start, '\0')
87 if m[start:end] < s:
88 if m[start:end] < s:
88 # we know that after the null there are 40 bytes of sha1
89 # we know that after the null there are 40 bytes of sha1
89 # this translates to the bisect lo = mid + 1
90 # this translates to the bisect lo = mid + 1
90 lo = advance(end + 40, '\n') + 1
91 lo = advance(end + 40, '\n') + 1
91 else:
92 else:
92 # this translates to the bisect hi = mid
93 # this translates to the bisect hi = mid
93 hi = start
94 hi = start
94 end = advance(lo, '\0')
95 end = advance(lo, '\0')
95 found = m[lo:end]
96 found = m[lo:end]
96 if cmp(s, found) == 0:
97 if cmp(s, found) == 0:
97 # we know that after the null there are 40 bytes of sha1
98 # we know that after the null there are 40 bytes of sha1
98 end = advance(end + 40, '\n')
99 end = advance(end + 40, '\n')
99 return (lo, end+1)
100 return (lo, end+1)
100 else:
101 else:
101 return (lo, lo)
102 return (lo, lo)
102
103
103 def find(self, node, f):
104 def find(self, node, f):
104 '''look up entry for a single file efficiently.
105 '''look up entry for a single file efficiently.
105 return (node, flags) pair if found, (None, None) if not.'''
106 return (node, flags) pair if found, (None, None) if not.'''
106 if self.mapcache and node == self.mapcache[0]:
107 if self.mapcache and node == self.mapcache[0]:
107 return self.mapcache[1].get(f), self.mapcache[1].flags(f)
108 return self.mapcache[1].get(f), self.mapcache[1].flags(f)
108 text = self.revision(node)
109 text = self.revision(node)
109 start, end = self._search(text, f)
110 start, end = self._search(text, f)
110 if start == end:
111 if start == end:
111 return None, None
112 return None, None
112 l = text[start:end]
113 l = text[start:end]
113 f, n = l.split('\0')
114 f, n = l.split('\0')
114 return bin(n[:40]), n[40:-1]
115 return bin(n[:40]), n[40:-1]
115
116
116 def add(self, map, transaction, link, p1=None, p2=None,
117 def add(self, map, transaction, link, p1=None, p2=None,
117 changed=None):
118 changed=None):
118 # apply the changes collected during the bisect loop to our addlist
119 # apply the changes collected during the bisect loop to our addlist
119 # return a delta suitable for addrevision
120 # return a delta suitable for addrevision
120 def addlistdelta(addlist, x):
121 def addlistdelta(addlist, x):
121 # start from the bottom up
122 # start from the bottom up
122 # so changes to the offsets don't mess things up.
123 # so changes to the offsets don't mess things up.
123 i = len(x)
124 i = len(x)
124 while i > 0:
125 while i > 0:
125 i -= 1
126 i -= 1
126 start = x[i][0]
127 start = x[i][0]
127 end = x[i][1]
128 end = x[i][1]
128 if x[i][2]:
129 if x[i][2]:
129 addlist[start:end] = array.array('c', x[i][2])
130 addlist[start:end] = array.array('c', x[i][2])
130 else:
131 else:
131 del addlist[start:end]
132 del addlist[start:end]
132 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2]
133 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2]
133 for d in x ])
134 for d in x ])
134
135
135 def checkforbidden(f):
136 def checkforbidden(f):
136 if '\n' in f or '\r' in f:
137 if '\n' in f or '\r' in f:
137 raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
138 raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
138
139
139 # if we're using the listcache, make sure it is valid and
140 # if we're using the listcache, make sure it is valid and
140 # parented by the same node we're diffing against
141 # parented by the same node we're diffing against
141 if not (changed and self.listcache and p1 and self.mapcache[0] == p1):
142 if not (changed and self.listcache and p1 and self.mapcache[0] == p1):
142 files = map.keys()
143 files = map.keys()
143 files.sort()
144 files.sort()
144
145
145 for f in files:
146 for f in files:
146 checkforbidden(f)
147 checkforbidden(f)
147
148
148 # if this is changed to support newlines in filenames,
149 # if this is changed to support newlines in filenames,
149 # be sure to check the templates/ dir again (especially *-raw.tmpl)
150 # be sure to check the templates/ dir again (especially *-raw.tmpl)
150 text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
151 text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
151 for f in files]
152 for f in files]
152 self.listcache = array.array('c', "".join(text))
153 self.listcache = array.array('c', "".join(text))
153 cachedelta = None
154 cachedelta = None
154 else:
155 else:
155 addlist = self.listcache
156 addlist = self.listcache
156
157
157 for f in changed[0]:
158 for f in changed[0]:
158 checkforbidden(f)
159 checkforbidden(f)
159 # combine the changed lists into one list for sorting
160 # combine the changed lists into one list for sorting
160 work = [[x, 0] for x in changed[0]]
161 work = [[x, 0] for x in changed[0]]
161 work[len(work):] = [[x, 1] for x in changed[1]]
162 work[len(work):] = [[x, 1] for x in changed[1]]
162 work.sort()
163 work.sort()
163
164
164 delta = []
165 delta = []
165 dstart = None
166 dstart = None
166 dend = None
167 dend = None
167 dline = [""]
168 dline = [""]
168 start = 0
169 start = 0
169 # zero copy representation of addlist as a buffer
170 # zero copy representation of addlist as a buffer
170 addbuf = buffer(addlist)
171 addbuf = buffer(addlist)
171
172
172 # start with a readonly loop that finds the offset of
173 # start with a readonly loop that finds the offset of
173 # each line and creates the deltas
174 # each line and creates the deltas
174 for w in work:
175 for w in work:
175 f = w[0]
176 f = w[0]
176 # bs will either be the index of the item or the insert point
177 # bs will either be the index of the item or the insert point
177 start, end = self._search(addbuf, f, start)
178 start, end = self._search(addbuf, f, start)
178 if w[1] == 0:
179 if w[1] == 0:
179 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
180 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
180 else:
181 else:
181 l = ""
182 l = ""
182 if start == end and w[1] == 1:
183 if start == end and w[1] == 1:
183 # item we want to delete was not found, error out
184 # item we want to delete was not found, error out
184 raise AssertionError(
185 raise AssertionError(
185 _("failed to remove %s from manifest") % f)
186 _("failed to remove %s from manifest") % f)
186 if dstart != None and dstart <= start and dend >= start:
187 if dstart != None and dstart <= start and dend >= start:
187 if dend < end:
188 if dend < end:
188 dend = end
189 dend = end
189 if l:
190 if l:
190 dline.append(l)
191 dline.append(l)
191 else:
192 else:
192 if dstart != None:
193 if dstart != None:
193 delta.append([dstart, dend, "".join(dline)])
194 delta.append([dstart, dend, "".join(dline)])
194 dstart = start
195 dstart = start
195 dend = end
196 dend = end
196 dline = [l]
197 dline = [l]
197
198
198 if dstart != None:
199 if dstart != None:
199 delta.append([dstart, dend, "".join(dline)])
200 delta.append([dstart, dend, "".join(dline)])
200 # apply the delta to the addlist, and get a delta for addrevision
201 # apply the delta to the addlist, and get a delta for addrevision
201 cachedelta = addlistdelta(addlist, delta)
202 cachedelta = addlistdelta(addlist, delta)
202
203
203 # the delta is only valid if we've been processing the tip revision
204 # the delta is only valid if we've been processing the tip revision
204 if self.mapcache[0] != self.tip():
205 if self.mapcache[0] != self.tip():
205 cachedelta = None
206 cachedelta = None
206 self.listcache = addlist
207 self.listcache = addlist
207
208
208 n = self.addrevision(buffer(self.listcache), transaction, link,
209 n = self.addrevision(buffer(self.listcache), transaction, link,
209 p1, p2, cachedelta)
210 p1, p2, cachedelta)
210 self.mapcache = (n, map)
211 self.mapcache = (n, map)
211
212
212 return n
213 return n
General Comments 0
You need to be logged in to leave comments. Login now