##// END OF EJS Templates
revlog: remove lazy index
Matt Mackall -
r13253:61c9bc3d default
parent child Browse files
Show More
@@ -83,8 +83,7 b' def perfindex(ui, repo):'
83 import mercurial.changelog
83 import mercurial.changelog
84 def d():
84 def d():
85 t = repo.changelog.tip()
85 t = repo.changelog.tip()
86 repo.changelog = mercurial.changelog.changelog(repo.sopener)
86 repo.invalidate()
87 repo.changelog._loadindexmap()
88 timer(d)
87 timer(d)
89
88
90 def perfstartup(ui, repo):
89 def perfstartup(ui, repo):
@@ -1412,9 +1412,6 b' class localrepository(repo.repository):'
1412 # Nor do we know which filenodes are missing.
1412 # Nor do we know which filenodes are missing.
1413 msng_filenode_set = {}
1413 msng_filenode_set = {}
1414
1414
1415 junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
1416 junk = None
1417
1418 # A changeset always belongs to itself, so the changenode lookup
1415 # A changeset always belongs to itself, so the changenode lookup
1419 # function for a changenode is identity.
1416 # function for a changenode is identity.
1420 def identity(x):
1417 def identity(x):
@@ -38,7 +38,7 b' def parse_index(data, inline):'
38 cache = None
38 cache = None
39 nodemap = {nullid: nullrev}
39 nodemap = {nullid: nullrev}
40 n = off = 0
40 n = off = 0
41 # if we're not using lazymap, always read the whole index
41
42 l = len(data) - s
42 l = len(data) - s
43 append = index.append
43 append = index.append
44 if inline:
44 if inline:
@@ -38,11 +38,9 b' REVIDX_PARENTDELTA = 1'
38 REVIDX_PUNCHED_FLAG = 2
38 REVIDX_PUNCHED_FLAG = 2
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40
40
41 # amount of data read unconditionally, should be >= 4
42 # when not inline: threshold for using lazy index
43 _prereadsize = 1048576
44 # max size of revlog with inline data
41 # max size of revlog with inline data
45 _maxinline = 131072
42 _maxinline = 131072
43 _chunksize = 1048576
46
44
47 RevlogError = error.RevlogError
45 RevlogError = error.RevlogError
48 LookupError = error.LookupError
46 LookupError = error.LookupError
@@ -121,209 +119,6 b' def decompress(bin):'
121 return bin[1:]
119 return bin[1:]
122 raise RevlogError(_("unknown compression type %r") % t)
120 raise RevlogError(_("unknown compression type %r") % t)
123
121
124 class lazyparser(object):
125 """
126 this class avoids the need to parse the entirety of large indices
127 """
128
129 # lazyparser is not safe to use on windows if win32 extensions not
130 # available. it keeps file handle open, which make it not possible
131 # to break hardlinks on local cloned repos.
132
133 def __init__(self, dataf):
134 try:
135 size = util.fstat(dataf).st_size
136 except AttributeError:
137 size = 0
138 self.dataf = dataf
139 self.s = struct.calcsize(indexformatng)
140 self.datasize = size
141 self.l = size // self.s
142 self.index = [None] * self.l
143 self.map = {nullid: nullrev}
144 self.allmap = 0
145 self.all = 0
146 self.mapfind_count = 0
147
148 def loadmap(self):
149 """
150 during a commit, we need to make sure the rev being added is
151 not a duplicate. This requires loading the entire index,
152 which is fairly slow. loadmap can load up just the node map,
153 which takes much less time.
154 """
155 if self.allmap:
156 return
157 end = self.datasize
158 self.allmap = 1
159 cur = 0
160 count = 0
161 blocksize = self.s * 256
162 self.dataf.seek(0)
163 while cur < end:
164 data = self.dataf.read(blocksize)
165 off = 0
166 for x in xrange(256):
167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 self.map[n] = count
169 count += 1
170 if count >= self.l:
171 break
172 off += self.s
173 cur += blocksize
174
175 def loadblock(self, blockstart, blocksize, data=None):
176 if self.all:
177 return
178 if data is None:
179 self.dataf.seek(blockstart)
180 if blockstart + blocksize > self.datasize:
181 # the revlog may have grown since we've started running,
182 # but we don't have space in self.index for more entries.
183 # limit blocksize so that we don't get too much data.
184 blocksize = max(self.datasize - blockstart, 0)
185 data = self.dataf.read(blocksize)
186 lend = len(data) // self.s
187 i = blockstart // self.s
188 off = 0
189 # lazyindex supports __delitem__
190 if lend > len(self.index) - i:
191 lend = len(self.index) - i
192 for x in xrange(lend):
193 if self.index[i + x] is None:
194 b = data[off : off + self.s]
195 self.index[i + x] = b
196 n = b[ngshaoffset:ngshaoffset + 20]
197 self.map[n] = i + x
198 off += self.s
199
200 def findnode(self, node):
201 """search backwards through the index file for a specific node"""
202 if self.allmap:
203 return None
204
205 # hg log will cause many many searches for the manifest
206 # nodes. After we get called a few times, just load the whole
207 # thing.
208 if self.mapfind_count > 8:
209 self.loadmap()
210 if node in self.map:
211 return node
212 return None
213 self.mapfind_count += 1
214 last = self.l - 1
215 while self.index[last] is not None:
216 if last == 0:
217 self.all = 1
218 self.allmap = 1
219 return None
220 last -= 1
221 end = (last + 1) * self.s
222 blocksize = self.s * 256
223 while end >= 0:
224 start = max(end - blocksize, 0)
225 self.dataf.seek(start)
226 data = self.dataf.read(end - start)
227 findend = end - start
228 while True:
229 # we're searching backwards, so we have to make sure
230 # we don't find a changeset where this node is a parent
231 off = data.find(node, 0, findend)
232 findend = off
233 if off >= 0:
234 i = off / self.s
235 off = i * self.s
236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 if n == node:
238 self.map[n] = i + start / self.s
239 return node
240 else:
241 break
242 end -= blocksize
243 return None
244
245 def loadindex(self, i=None, end=None):
246 if self.all:
247 return
248 all = False
249 if i is None:
250 blockstart = 0
251 blocksize = (65536 / self.s) * self.s
252 end = self.datasize
253 all = True
254 else:
255 if end:
256 blockstart = i * self.s
257 end = end * self.s
258 blocksize = end - blockstart
259 else:
260 blockstart = (i & ~1023) * self.s
261 blocksize = self.s * 1024
262 end = blockstart + blocksize
263 while blockstart < end:
264 self.loadblock(blockstart, blocksize)
265 blockstart += blocksize
266 if all:
267 self.all = True
268
269 class lazyindex(object):
270 """a lazy version of the index array"""
271 def __init__(self, parser):
272 self.p = parser
273 def __len__(self):
274 return len(self.p.index)
275 def load(self, pos):
276 if pos < 0:
277 pos += len(self.p.index)
278 self.p.loadindex(pos)
279 return self.p.index[pos]
280 def __getitem__(self, pos):
281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 def __setitem__(self, pos, item):
283 self.p.index[pos] = _pack(indexformatng, *item)
284 def __delitem__(self, pos):
285 del self.p.index[pos]
286 def insert(self, pos, e):
287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 def append(self, e):
289 self.p.index.append(_pack(indexformatng, *e))
290
291 class lazymap(object):
292 """a lazy version of the node map"""
293 def __init__(self, parser):
294 self.p = parser
295 def load(self, key):
296 n = self.p.findnode(key)
297 if n is None:
298 raise KeyError(key)
299 def __contains__(self, key):
300 if key in self.p.map:
301 return True
302 self.p.loadmap()
303 return key in self.p.map
304 def __iter__(self):
305 yield nullid
306 for i, ret in enumerate(self.p.index):
307 if not ret:
308 self.p.loadindex(i)
309 ret = self.p.index[i]
310 if isinstance(ret, str):
311 ret = _unpack(indexformatng, ret)
312 yield ret[7]
313 def __getitem__(self, key):
314 try:
315 return self.p.map[key]
316 except KeyError:
317 try:
318 self.load(key)
319 return self.p.map[key]
320 except KeyError:
321 raise KeyError("node " + hex(key))
322 def __setitem__(self, key, val):
323 self.p.map[key] = val
324 def __delitem__(self, key):
325 del self.p.map[key]
326
327 indexformatv0 = ">4l20s20s20s"
122 indexformatv0 = ">4l20s20s20s"
328 v0shaoffset = 56
123 v0shaoffset = 56
329
124
@@ -336,8 +131,6 b' class revlogoldio(object):'
336 index = []
131 index = []
337 nodemap = {nullid: nullrev}
132 nodemap = {nullid: nullrev}
338 n = off = 0
133 n = off = 0
339 if len(data) == _prereadsize:
340 data += fp.read() # read the rest
341 l = len(data)
134 l = len(data)
342 while off + s <= l:
135 while off + s <= l:
343 cur = data[off:off + s]
136 cur = data[off:off + s]
@@ -378,20 +171,6 b' class revlogio(object):'
378 self.size = struct.calcsize(indexformatng)
171 self.size = struct.calcsize(indexformatng)
379
172
380 def parseindex(self, fp, data, inline):
173 def parseindex(self, fp, data, inline):
381 if len(data) == _prereadsize:
382 if util.openhardlinks() and not inline:
383 # big index, let's parse it on demand
384 parser = lazyparser(fp)
385 index = lazyindex(parser)
386 nodemap = lazymap(parser)
387 e = list(index[0])
388 type = gettype(e[0])
389 e[0] = offset_type(0, type)
390 index[0] = e
391 return index, nodemap, None
392 else:
393 data += fp.read()
394
395 # call the C implementation to parse the index data
174 # call the C implementation to parse the index data
396 index, nodemap, cache = parsers.parse_index(data, inline)
175 index, nodemap, cache = parsers.parse_index(data, inline)
397 return index, nodemap, cache
176 return index, nodemap, cache
@@ -458,10 +237,7 b' class revlog(object):'
458 i = ''
237 i = ''
459 try:
238 try:
460 f = self.opener(self.indexfile)
239 f = self.opener(self.indexfile)
461 if "nonlazy" in getattr(self.opener, 'options', {}):
240 i = f.read()
462 i = f.read()
463 else:
464 i = f.read(_prereadsize)
465 if len(i) > 0:
241 if len(i) > 0:
466 v = struct.unpack(versionformat, i[:4])[0]
242 v = struct.unpack(versionformat, i[:4])[0]
467 except IOError, inst:
243 except IOError, inst:
@@ -496,28 +272,9 b' class revlog(object):'
496 self._chunkclear()
272 self._chunkclear()
497
273
498 # add the magic null revision at -1 (if it hasn't been done already)
274 # add the magic null revision at -1 (if it hasn't been done already)
499 if (self.index == [] or isinstance(self.index, lazyindex) or
275 if self.index == [] or self.index[-1][7] != nullid:
500 self.index[-1][7] != nullid) :
501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
276 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502
277
503 def _loadindex(self, start, end):
504 """load a block of indexes all at once from the lazy parser"""
505 if isinstance(self.index, lazyindex):
506 self.index.p.loadindex(start, end)
507
508 def _loadindexmap(self):
509 """loads both the map and the index from the lazy parser"""
510 if isinstance(self.index, lazyindex):
511 p = self.index.p
512 p.loadindex()
513 self.nodemap = p.map
514
515 def _loadmap(self):
516 """loads the map from the lazy parser"""
517 if isinstance(self.nodemap, lazymap):
518 self.nodemap.p.loadmap()
519 self.nodemap = self.nodemap.p.map
520
521 def tip(self):
278 def tip(self):
522 return self.node(len(self.index) - 2)
279 return self.node(len(self.index) - 2)
523 def __len__(self):
280 def __len__(self):
@@ -978,7 +735,7 b' class revlog(object):'
978 def _addchunk(self, offset, data):
735 def _addchunk(self, offset, data):
979 o, d = self._chunkcache
736 o, d = self._chunkcache
980 # try to add to existing cache
737 # try to add to existing cache
981 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
738 if o + len(d) == offset and len(d) + len(data) < _chunksize:
982 self._chunkcache = o, d + data
739 self._chunkcache = o, d + data
983 else:
740 else:
984 self._chunkcache = offset, data
741 self._chunkcache = offset, data
@@ -1060,7 +817,6 b' class revlog(object):'
1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
817 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061
818
1062 # build delta chain
819 # build delta chain
1063 self._loadindex(base, rev + 1)
1064 chain = []
820 chain = []
1065 index = self.index # for performance
821 index = self.index # for performance
1066 iterrev = rev
822 iterrev = rev
@@ -1413,9 +1169,6 b' class revlog(object):'
1413 if len(self) == 0:
1169 if len(self) == 0:
1414 return
1170 return
1415
1171
1416 if isinstance(self.index, lazyindex):
1417 self._loadindexmap()
1418
1419 for rev in self:
1172 for rev in self:
1420 if self.index[rev][4] >= minlink:
1173 if self.index[rev][4] >= minlink:
1421 break
1174 break
@@ -77,7 +77,6 b' def build_opener(ui, authinfo):'
77 return httprangereader(f, urlopener)
77 return httprangereader(f, urlopener)
78 return o
78 return o
79
79
80 opener.options = {'nonlazy': 1}
81 return opener
80 return opener
82
81
83 class statichttprepository(localrepo.localrepository):
82 class statichttprepository(localrepo.localrepository):
@@ -21,7 +21,7 b' def py_parseindex(data, inline) :'
21 index = []
21 index = []
22 nodemap = {nullid: nullrev}
22 nodemap = {nullid: nullrev}
23 n = off = 0
23 n = off = 0
24 # if we're not using lazymap, always read the whole index
24
25 l = len(data) - s
25 l = len(data) - s
26 append = index.append
26 append = index.append
27 if inline:
27 if inline:
General Comments 0
You need to be logged in to leave comments. Login now