##// END OF EJS Templates
revlog: remove lazy index
Matt Mackall -
r13253:61c9bc3d default
parent child Browse files
Show More
@@ -83,8 +83,7 b' def perfindex(ui, repo):'
83 83 import mercurial.changelog
84 84 def d():
85 85 t = repo.changelog.tip()
86 repo.changelog = mercurial.changelog.changelog(repo.sopener)
87 repo.changelog._loadindexmap()
86 repo.invalidate()
88 87 timer(d)
89 88
90 89 def perfstartup(ui, repo):
@@ -1412,9 +1412,6 b' class localrepository(repo.repository):'
1412 1412 # Nor do we know which filenodes are missing.
1413 1413 msng_filenode_set = {}
1414 1414
1415 junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
1416 junk = None
1417
1418 1415 # A changeset always belongs to itself, so the changenode lookup
1419 1416 # function for a changenode is identity.
1420 1417 def identity(x):
@@ -38,7 +38,7 b' def parse_index(data, inline):'
38 38 cache = None
39 39 nodemap = {nullid: nullrev}
40 40 n = off = 0
41 # if we're not using lazymap, always read the whole index
41
42 42 l = len(data) - s
43 43 append = index.append
44 44 if inline:
@@ -38,11 +38,9 b' REVIDX_PARENTDELTA = 1'
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 # amount of data read unconditionally, should be >= 4
42 # when not inline: threshold for using lazy index
43 _prereadsize = 1048576
44 41 # max size of revlog with inline data
45 42 _maxinline = 131072
43 _chunksize = 1048576
46 44
47 45 RevlogError = error.RevlogError
48 46 LookupError = error.LookupError
@@ -121,209 +119,6 b' def decompress(bin):'
121 119 return bin[1:]
122 120 raise RevlogError(_("unknown compression type %r") % t)
123 121
124 class lazyparser(object):
125 """
126 this class avoids the need to parse the entirety of large indices
127 """
128
129 # lazyparser is not safe to use on windows if win32 extensions not
130 # available. it keeps file handle open, which make it not possible
131 # to break hardlinks on local cloned repos.
132
133 def __init__(self, dataf):
134 try:
135 size = util.fstat(dataf).st_size
136 except AttributeError:
137 size = 0
138 self.dataf = dataf
139 self.s = struct.calcsize(indexformatng)
140 self.datasize = size
141 self.l = size // self.s
142 self.index = [None] * self.l
143 self.map = {nullid: nullrev}
144 self.allmap = 0
145 self.all = 0
146 self.mapfind_count = 0
147
148 def loadmap(self):
149 """
150 during a commit, we need to make sure the rev being added is
151 not a duplicate. This requires loading the entire index,
152 which is fairly slow. loadmap can load up just the node map,
153 which takes much less time.
154 """
155 if self.allmap:
156 return
157 end = self.datasize
158 self.allmap = 1
159 cur = 0
160 count = 0
161 blocksize = self.s * 256
162 self.dataf.seek(0)
163 while cur < end:
164 data = self.dataf.read(blocksize)
165 off = 0
166 for x in xrange(256):
167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 self.map[n] = count
169 count += 1
170 if count >= self.l:
171 break
172 off += self.s
173 cur += blocksize
174
175 def loadblock(self, blockstart, blocksize, data=None):
176 if self.all:
177 return
178 if data is None:
179 self.dataf.seek(blockstart)
180 if blockstart + blocksize > self.datasize:
181 # the revlog may have grown since we've started running,
182 # but we don't have space in self.index for more entries.
183 # limit blocksize so that we don't get too much data.
184 blocksize = max(self.datasize - blockstart, 0)
185 data = self.dataf.read(blocksize)
186 lend = len(data) // self.s
187 i = blockstart // self.s
188 off = 0
189 # lazyindex supports __delitem__
190 if lend > len(self.index) - i:
191 lend = len(self.index) - i
192 for x in xrange(lend):
193 if self.index[i + x] is None:
194 b = data[off : off + self.s]
195 self.index[i + x] = b
196 n = b[ngshaoffset:ngshaoffset + 20]
197 self.map[n] = i + x
198 off += self.s
199
200 def findnode(self, node):
201 """search backwards through the index file for a specific node"""
202 if self.allmap:
203 return None
204
205 # hg log will cause many many searches for the manifest
206 # nodes. After we get called a few times, just load the whole
207 # thing.
208 if self.mapfind_count > 8:
209 self.loadmap()
210 if node in self.map:
211 return node
212 return None
213 self.mapfind_count += 1
214 last = self.l - 1
215 while self.index[last] is not None:
216 if last == 0:
217 self.all = 1
218 self.allmap = 1
219 return None
220 last -= 1
221 end = (last + 1) * self.s
222 blocksize = self.s * 256
223 while end >= 0:
224 start = max(end - blocksize, 0)
225 self.dataf.seek(start)
226 data = self.dataf.read(end - start)
227 findend = end - start
228 while True:
229 # we're searching backwards, so we have to make sure
230 # we don't find a changeset where this node is a parent
231 off = data.find(node, 0, findend)
232 findend = off
233 if off >= 0:
234 i = off / self.s
235 off = i * self.s
236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 if n == node:
238 self.map[n] = i + start / self.s
239 return node
240 else:
241 break
242 end -= blocksize
243 return None
244
245 def loadindex(self, i=None, end=None):
246 if self.all:
247 return
248 all = False
249 if i is None:
250 blockstart = 0
251 blocksize = (65536 / self.s) * self.s
252 end = self.datasize
253 all = True
254 else:
255 if end:
256 blockstart = i * self.s
257 end = end * self.s
258 blocksize = end - blockstart
259 else:
260 blockstart = (i & ~1023) * self.s
261 blocksize = self.s * 1024
262 end = blockstart + blocksize
263 while blockstart < end:
264 self.loadblock(blockstart, blocksize)
265 blockstart += blocksize
266 if all:
267 self.all = True
268
269 class lazyindex(object):
270 """a lazy version of the index array"""
271 def __init__(self, parser):
272 self.p = parser
273 def __len__(self):
274 return len(self.p.index)
275 def load(self, pos):
276 if pos < 0:
277 pos += len(self.p.index)
278 self.p.loadindex(pos)
279 return self.p.index[pos]
280 def __getitem__(self, pos):
281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 def __setitem__(self, pos, item):
283 self.p.index[pos] = _pack(indexformatng, *item)
284 def __delitem__(self, pos):
285 del self.p.index[pos]
286 def insert(self, pos, e):
287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 def append(self, e):
289 self.p.index.append(_pack(indexformatng, *e))
290
291 class lazymap(object):
292 """a lazy version of the node map"""
293 def __init__(self, parser):
294 self.p = parser
295 def load(self, key):
296 n = self.p.findnode(key)
297 if n is None:
298 raise KeyError(key)
299 def __contains__(self, key):
300 if key in self.p.map:
301 return True
302 self.p.loadmap()
303 return key in self.p.map
304 def __iter__(self):
305 yield nullid
306 for i, ret in enumerate(self.p.index):
307 if not ret:
308 self.p.loadindex(i)
309 ret = self.p.index[i]
310 if isinstance(ret, str):
311 ret = _unpack(indexformatng, ret)
312 yield ret[7]
313 def __getitem__(self, key):
314 try:
315 return self.p.map[key]
316 except KeyError:
317 try:
318 self.load(key)
319 return self.p.map[key]
320 except KeyError:
321 raise KeyError("node " + hex(key))
322 def __setitem__(self, key, val):
323 self.p.map[key] = val
324 def __delitem__(self, key):
325 del self.p.map[key]
326
327 122 indexformatv0 = ">4l20s20s20s"
328 123 v0shaoffset = 56
329 124
@@ -336,8 +131,6 b' class revlogoldio(object):'
336 131 index = []
337 132 nodemap = {nullid: nullrev}
338 133 n = off = 0
339 if len(data) == _prereadsize:
340 data += fp.read() # read the rest
341 134 l = len(data)
342 135 while off + s <= l:
343 136 cur = data[off:off + s]
@@ -378,20 +171,6 b' class revlogio(object):'
378 171 self.size = struct.calcsize(indexformatng)
379 172
380 173 def parseindex(self, fp, data, inline):
381 if len(data) == _prereadsize:
382 if util.openhardlinks() and not inline:
383 # big index, let's parse it on demand
384 parser = lazyparser(fp)
385 index = lazyindex(parser)
386 nodemap = lazymap(parser)
387 e = list(index[0])
388 type = gettype(e[0])
389 e[0] = offset_type(0, type)
390 index[0] = e
391 return index, nodemap, None
392 else:
393 data += fp.read()
394
395 174 # call the C implementation to parse the index data
396 175 index, nodemap, cache = parsers.parse_index(data, inline)
397 176 return index, nodemap, cache
@@ -458,10 +237,7 b' class revlog(object):'
458 237 i = ''
459 238 try:
460 239 f = self.opener(self.indexfile)
461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 i = f.read()
463 else:
464 i = f.read(_prereadsize)
240 i = f.read()
465 241 if len(i) > 0:
466 242 v = struct.unpack(versionformat, i[:4])[0]
467 243 except IOError, inst:
@@ -496,28 +272,9 b' class revlog(object):'
496 272 self._chunkclear()
497 273
498 274 # add the magic null revision at -1 (if it hasn't been done already)
499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 self.index[-1][7] != nullid) :
275 if self.index == [] or self.index[-1][7] != nullid:
501 276 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 277
503 def _loadindex(self, start, end):
504 """load a block of indexes all at once from the lazy parser"""
505 if isinstance(self.index, lazyindex):
506 self.index.p.loadindex(start, end)
507
508 def _loadindexmap(self):
509 """loads both the map and the index from the lazy parser"""
510 if isinstance(self.index, lazyindex):
511 p = self.index.p
512 p.loadindex()
513 self.nodemap = p.map
514
515 def _loadmap(self):
516 """loads the map from the lazy parser"""
517 if isinstance(self.nodemap, lazymap):
518 self.nodemap.p.loadmap()
519 self.nodemap = self.nodemap.p.map
520
521 278 def tip(self):
522 279 return self.node(len(self.index) - 2)
523 280 def __len__(self):
@@ -978,7 +735,7 b' class revlog(object):'
978 735 def _addchunk(self, offset, data):
979 736 o, d = self._chunkcache
980 737 # try to add to existing cache
981 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
738 if o + len(d) == offset and len(d) + len(data) < _chunksize:
982 739 self._chunkcache = o, d + data
983 740 else:
984 741 self._chunkcache = offset, data
@@ -1060,7 +817,6 b' class revlog(object):'
1060 817 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061 818
1062 819 # build delta chain
1063 self._loadindex(base, rev + 1)
1064 820 chain = []
1065 821 index = self.index # for performance
1066 822 iterrev = rev
@@ -1413,9 +1169,6 b' class revlog(object):'
1413 1169 if len(self) == 0:
1414 1170 return
1415 1171
1416 if isinstance(self.index, lazyindex):
1417 self._loadindexmap()
1418
1419 1172 for rev in self:
1420 1173 if self.index[rev][4] >= minlink:
1421 1174 break
@@ -77,7 +77,6 b' def build_opener(ui, authinfo):'
77 77 return httprangereader(f, urlopener)
78 78 return o
79 79
80 opener.options = {'nonlazy': 1}
81 80 return opener
82 81
83 82 class statichttprepository(localrepo.localrepository):
@@ -21,7 +21,7 b' def py_parseindex(data, inline) :'
21 21 index = []
22 22 nodemap = {nullid: nullrev}
23 23 n = off = 0
24 # if we're not using lazymap, always read the whole index
24
25 25 l = len(data) - s
26 26 append = index.append
27 27 if inline:
General Comments 0
You need to be logged in to leave comments. Login now