##// END OF EJS Templates
commit: increase perf by building a new addlist instead of editing the old one...
Durham Goode -
r17983:c64e646a default
parent child Browse files
Show More
@@ -1,204 +1,213 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import mdiff, parsers, error, revlog, util
10 10 import array, struct
11 11
12 12 class manifestdict(dict):
13 13 def __init__(self, mapping=None, flags=None):
14 14 if mapping is None:
15 15 mapping = {}
16 16 if flags is None:
17 17 flags = {}
18 18 dict.__init__(self, mapping)
19 19 self._flags = flags
20 20 def flags(self, f):
21 21 return self._flags.get(f, "")
22 22 def withflags(self):
23 23 return set(self._flags.keys())
24 24 def set(self, f, flags):
25 25 self._flags[f] = flags
26 26 def copy(self):
27 27 return manifestdict(self, dict.copy(self._flags))
28 28
29 29 class manifest(revlog.revlog):
30 30 def __init__(self, opener):
31 31 self._mancache = None
32 32 revlog.revlog.__init__(self, opener, "00manifest.i")
33 33
34 34 def parse(self, lines):
35 35 mfdict = manifestdict()
36 36 parsers.parse_manifest(mfdict, mfdict._flags, lines)
37 37 return mfdict
38 38
39 39 def readdelta(self, node):
40 40 r = self.rev(node)
41 41 return self.parse(mdiff.patchtext(self.revdiff(self.deltaparent(r), r)))
42 42
43 43 def readfast(self, node):
44 44 '''use the faster of readdelta or read'''
45 45 r = self.rev(node)
46 46 deltaparent = self.deltaparent(r)
47 47 if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
48 48 return self.readdelta(node)
49 49 return self.read(node)
50 50
51 51 def read(self, node):
52 52 if node == revlog.nullid:
53 53 return manifestdict() # don't upset local cache
54 54 if self._mancache and self._mancache[0] == node:
55 55 return self._mancache[1]
56 56 text = self.revision(node)
57 57 arraytext = array.array('c', text)
58 58 mapping = self.parse(text)
59 59 self._mancache = (node, mapping, arraytext)
60 60 return mapping
61 61
62 62 def _search(self, m, s, lo=0, hi=None):
63 63 '''return a tuple (start, end) that says where to find s within m.
64 64
65 65 If the string is found m[start:end] are the line containing
66 66 that string. If start == end the string was not found and
67 67 they indicate the proper sorted insertion point.
68 68
69 69 m should be a buffer or a string
70 70 s is a string'''
71 71 def advance(i, c):
72 72 while i < lenm and m[i] != c:
73 73 i += 1
74 74 return i
75 75 if not s:
76 76 return (lo, lo)
77 77 lenm = len(m)
78 78 if not hi:
79 79 hi = lenm
80 80 while lo < hi:
81 81 mid = (lo + hi) // 2
82 82 start = mid
83 83 while start > 0 and m[start - 1] != '\n':
84 84 start -= 1
85 85 end = advance(start, '\0')
86 86 if m[start:end] < s:
87 87 # we know that after the null there are 40 bytes of sha1
88 88 # this translates to the bisect lo = mid + 1
89 89 lo = advance(end + 40, '\n') + 1
90 90 else:
91 91 # this translates to the bisect hi = mid
92 92 hi = start
93 93 end = advance(lo, '\0')
94 94 found = m[lo:end]
95 95 if s == found:
96 96 # we know that after the null there are 40 bytes of sha1
97 97 end = advance(end + 40, '\n')
98 98 return (lo, end + 1)
99 99 else:
100 100 return (lo, lo)
101 101
102 102 def find(self, node, f):
103 103 '''look up entry for a single file efficiently.
104 104 return (node, flags) pair if found, (None, None) if not.'''
105 105 if self._mancache and self._mancache[0] == node:
106 106 return self._mancache[1].get(f), self._mancache[1].flags(f)
107 107 text = self.revision(node)
108 108 start, end = self._search(text, f)
109 109 if start == end:
110 110 return None, None
111 111 l = text[start:end]
112 112 f, n = l.split('\0')
113 113 return revlog.bin(n[:40]), n[40:-1]
114 114
115 115 def add(self, map, transaction, link, p1=None, p2=None,
116 116 changed=None):
117 117 # apply the changes collected during the bisect loop to our addlist
118 118 # return a delta suitable for addrevision
119 119 def addlistdelta(addlist, x):
120 # start from the bottom up
121 # so changes to the offsets don't mess things up.
122 for start, end, content in reversed(x):
120 # for large addlist arrays, building a new array is cheaper
121 # than repeatedly modifying the existing one
122 currentposition = 0
123 newaddlist = array.array('c')
124
125 for start, end, content in x:
126 newaddlist += addlist[currentposition:start]
123 127 if content:
124 addlist[start:end] = array.array('c', content)
125 else:
126 del addlist[start:end]
127 return "".join(struct.pack(">lll", start, end, len(content))
128 newaddlist += array.array('c', content)
129
130 currentposition = end
131
132 newaddlist += addlist[currentposition:]
133
134 deltatext = "".join(struct.pack(">lll", start, end, len(content))
128 135 + content for start, end, content in x)
136 return deltatext, newaddlist
129 137
130 138 def checkforbidden(l):
131 139 for f in l:
132 140 if '\n' in f or '\r' in f:
133 141 raise error.RevlogError(
134 142 _("'\\n' and '\\r' disallowed in filenames: %r") % f)
135 143
136 144 # if we're using the cache, make sure it is valid and
137 145 # parented by the same node we're diffing against
138 146 if not (changed and self._mancache and p1 and self._mancache[0] == p1):
139 147 files = sorted(map)
140 148 checkforbidden(files)
141 149
142 150 # if this is changed to support newlines in filenames,
143 151 # be sure to check the templates/ dir again (especially *-raw.tmpl)
144 152 hex, flags = revlog.hex, map.flags
145 153 text = ''.join("%s\0%s%s\n" % (f, hex(map[f]), flags(f))
146 154 for f in files)
147 155 arraytext = array.array('c', text)
148 156 cachedelta = None
149 157 else:
150 158 added, removed = changed
151 159 addlist = self._mancache[2]
152 160
153 161 checkforbidden(added)
154 162 # combine the changed lists into one list for sorting
155 163 work = [(x, False) for x in added]
156 164 work.extend((x, True) for x in removed)
157 165 # this could use heapq.merge() (from Python 2.6+) or equivalent
158 166 # since the lists are already sorted
159 167 work.sort()
160 168
161 169 delta = []
162 170 dstart = None
163 171 dend = None
164 172 dline = [""]
165 173 start = 0
166 174 # zero copy representation of addlist as a buffer
167 175 addbuf = util.buffer(addlist)
168 176
169 177 # start with a readonly loop that finds the offset of
170 178 # each line and creates the deltas
171 179 for f, todelete in work:
172 180 # bs will either be the index of the item or the insert point
173 181 start, end = self._search(addbuf, f, start)
174 182 if not todelete:
175 183 l = "%s\0%s%s\n" % (f, revlog.hex(map[f]), map.flags(f))
176 184 else:
177 185 if start == end:
178 186 # item we want to delete was not found, error out
179 187 raise AssertionError(
180 188 _("failed to remove %s from manifest") % f)
181 189 l = ""
182 190 if dstart is not None and dstart <= start and dend >= start:
183 191 if dend < end:
184 192 dend = end
185 193 if l:
186 194 dline.append(l)
187 195 else:
188 196 if dstart is not None:
189 197 delta.append([dstart, dend, "".join(dline)])
190 198 dstart = start
191 199 dend = end
192 200 dline = [l]
193 201
194 202 if dstart is not None:
195 203 delta.append([dstart, dend, "".join(dline)])
196 204 # apply the delta to the addlist, and get a delta for addrevision
197 cachedelta = (self.rev(p1), addlistdelta(addlist, delta))
205 deltatext, addlist = addlistdelta(addlist, delta)
206 cachedelta = (self.rev(p1), deltatext)
198 207 arraytext = addlist
199 208 text = util.buffer(arraytext)
200 209
201 210 n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
202 211 self._mancache = (n, map, arraytext)
203 212
204 213 return n
General Comments 0
You need to be logged in to leave comments. Login now