##// END OF EJS Templates
New lazy index code for revlogs....
mason@suse.com -
r2079:ee96ca27 default
parent child Browse files
Show More
@@ -64,6 +64,7 b' def decompress(bin):'
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 v0shaoffset = 56
67 68 # index ng:
68 69 # 6 bytes offset
69 70 # 2 bytes flags
@@ -75,48 +76,135 b' indexformatv0 = ">4l20s20s20s"'
75 76 # 4 bytes parent 2 rev
76 77 # 32 bytes: nodeid
77 78 indexformatng = ">Qiiiiii20s12x"
79 ngshaoffset = 32
78 80 versionformat = ">i"
79 81
80 82 class lazyparser(object):
81 83 """
82 84 this class avoids the need to parse the entirety of large indices
83
84 By default we parse and load 1000 entries at a time.
85
86 If no position is specified, we load the whole index, and replace
87 the lazy objects in revlog with the underlying objects for
88 efficiency in cases where we look at most of the nodes.
89 85 """
90 def __init__(self, data, revlog, indexformat):
91 self.data = data
86 def __init__(self, dataf, size, indexformat, shaoffset):
87 self.dataf = dataf
88 self.format = indexformat
92 89 self.s = struct.calcsize(indexformat)
93 90 self.indexformat = indexformat
94 self.l = len(data)/self.s
91 self.datasize = size
92 self.l = size/self.s
95 93 self.index = [None] * self.l
96 94 self.map = {nullid: -1}
95 self.allmap = 0
97 96 self.all = 0
98 self.revlog = revlog
97 self.mapfind_count = 0
98 self.shaoffset = shaoffset
99 99
100 def load(self, pos=None):
100 def loadmap(self):
101 """
102 during a commit, we need to make sure the rev being added is
103 not a duplicate. This requires loading the entire index,
104 which is fairly slow. loadmap can load up just the node map,
105 which takes much less time.
106 """
107 if self.allmap: return
108 start = 0
109 end = self.datasize
110 self.allmap = 1
111 cur = 0
112 count = 0
113 blocksize = self.s * 256
114 self.dataf.seek(0)
115 while cur < end:
116 data = self.dataf.read(blocksize)
117 off = 0
118 for x in xrange(256):
119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 self.map[n] = count
121 count += 1
122 if count >= self.l:
123 break
124 off += self.s
125 cur += blocksize
126
127 def loadblock(self, blockstart, blocksize, data=None):
101 128 if self.all: return
102 if pos is not None:
103 block = pos / 1000
104 i = block * 1000
105 end = min(self.l, i + 1000)
106 else:
129 if data is None:
130 self.dataf.seek(blockstart)
131 data = self.dataf.read(blocksize)
132 lend = len(data) / self.s
133 i = blockstart / self.s
134 off = 0
135 for x in xrange(lend):
136 if self.index[i + x] == None:
137 b = data[off : off + self.s]
138 e = struct.unpack(self.format, b)
139 self.index[i + x] = e
140 self.map[e[-1]] = i + x
141 off += self.s
142
143 def findnode(self, node):
144 """search backwards through the index file for a specific node"""
145 if self.allmap: return None
146
147 # hg log will cause many many searches for the manifest
148 # nodes. After we get called a few times, just load the whole
149 # thing.
150 if self.mapfind_count > 8:
151 self.loadmap()
152 if node in self.map:
153 return node
154 return None
155 self.mapfind_count += 1
156 last = self.l - 1
157 while self.index[last] != None:
158 if last == 0:
107 159 self.all = 1
108 i = 0
109 end = self.l
110 self.revlog.index = self.index
111 self.revlog.nodemap = self.map
160 self.allmap = 1
161 return None
162 last -= 1
163 end = (last + 1) * self.s
164 blocksize = self.s * 256
165 while end >= 0:
166 start = max(end - blocksize, 0)
167 self.dataf.seek(start)
168 data = self.dataf.read(end - start)
169 findend = end - start
170 while True:
171 # we're searching backwards, so weh have to make sure
172 # we don't find a changeset where this node is a parent
173 off = data.rfind(node, 0, findend)
174 findend = off
175 if off >= 0:
176 i = off / self.s
177 off = i * self.s
178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 if n == node:
180 self.map[n] = i + start / self.s
181 return node
182 else:
183 break
184 end -= blocksize
185 return None
112 186
113 while i < end:
114 if not self.index[i]:
115 d = self.data[i * self.s: (i + 1) * self.s]
116 e = struct.unpack(self.indexformat, d)
117 self.index[i] = e
118 self.map[e[-1]] = i
119 i += 1
187 def loadindex(self, i=None, end=None):
188 if self.all: return
189 all = False
190 if i == None:
191 blockstart = 0
192 blocksize = (512 / self.s) * self.s
193 end = self.datasize
194 all = True
195 else:
196 if end:
197 blockstart = i * self.s
198 end = end * self.s
199 blocksize = end - blockstart
200 else:
201 blockstart = (i & ~(32)) * self.s
202 blocksize = self.s * 64
203 end = blockstart + blocksize
204 while blockstart < end:
205 self.loadblock(blockstart, blocksize)
206 blockstart += blocksize
207 if all: self.all = True
120 208
121 209 class lazyindex(object):
122 210 """a lazy version of the index array"""
@@ -127,7 +215,7 b' class lazyindex(object):'
127 215 def load(self, pos):
128 216 if pos < 0:
129 217 pos += len(self.p.index)
130 self.p.load(pos)
218 self.p.loadindex(pos)
131 219 return self.p.index[pos]
132 220 def __getitem__(self, pos):
133 221 return self.p.index[pos] or self.load(pos)
@@ -143,14 +231,13 b' class lazymap(object):'
143 231 def __init__(self, parser):
144 232 self.p = parser
145 233 def load(self, key):
146 if self.p.all: return
147 n = self.p.data.find(key)
148 if n < 0:
234 n = self.p.findnode(key)
235 if n == None:
149 236 raise KeyError(key)
150 pos = n / self.p.s
151 self.p.load(pos)
152 237 def __contains__(self, key):
153 self.p.load()
238 if key in self.p.map:
239 return True
240 self.p.loadmap()
154 241 return key in self.p.map
155 242 def __iter__(self):
156 243 yield nullid
@@ -158,7 +245,7 b' class lazymap(object):'
158 245 try:
159 246 yield self.p.index[i][-1]
160 247 except:
161 self.p.load(i)
248 self.p.loadindex(i)
162 249 yield self.p.index[i][-1]
163 250 def __getitem__(self, key):
164 251 try:
@@ -222,7 +309,8 b' class revlog(object):'
222 309 v = self.defversion
223 310 try:
224 311 f = self.opener(self.indexfile)
225 i = f.read()
312 i = f.read(4)
313 f.seek(0)
226 314 except IOError, inst:
227 315 if inst.errno != errno.ENOENT:
228 316 raise
@@ -241,7 +329,7 b' class revlog(object):'
241 329 return
242 330 self.indexstat = st
243 331 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
332 v = struct.unpack(versionformat, i)[0]
245 333 flags = v & ~0xFFFF
246 334 fmt = v & 0xFFFF
247 335 if fmt == 0:
@@ -258,16 +346,19 b' class revlog(object):'
258 346 self.version = v
259 347 if v == 0:
260 348 self.indexformat = indexformatv0
349 shaoffset = v0shaoffset
261 350 else:
262 351 self.indexformat = indexformatng
352 shaoffset = ngshaoffset
263 353
264 354 if i:
265 355 if not self.inlinedata() and st and st.st_size > 10000:
266 356 # big index, let's parse it on demand
267 parser = lazyparser(i, self, self.indexformat)
357 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
268 358 self.index = lazyindex(parser)
269 359 self.nodemap = lazymap(parser)
270 360 else:
361 i = f.read()
271 362 self.parseindex(i)
272 363 if self.inlinedata():
273 364 # we've already got the entire data file read in, save it
@@ -312,11 +403,23 b' class revlog(object):'
312 403 def offset_type(self, offset, type):
313 404 return long(long(offset) << 16 | type)
314 405
406 def loadindex(self, start, end):
407 """load a block of indexes all at once from the lazy parser"""
408 if isinstance(self.index, lazyindex):
409 self.index.p.loadindex(start, end)
410
315 411 def loadindexmap(self):
316 412 """loads both the map and the index from the lazy parser"""
317 413 if isinstance(self.index, lazyindex):
318 414 p = self.index.p
319 p.load()
415 p.loadindex()
416 self.nodemap = p.map
417
418 def loadmap(self):
419 """loads the map from the lazy parser"""
420 if isinstance(self.nodemap, lazymap):
421 self.nodemap.p.loadmap()
422 self.nodemap = self.nodemap.p.map
320 423
321 424 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 425 def tip(self): return self.node(len(self.index) - 1)
@@ -690,7 +793,9 b' class revlog(object):'
690 793 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
691 794 base = self.cache[1]
692 795 text = self.cache[2]
796 self.loadindex(base, rev + 1)
693 797 else:
798 self.loadindex(base, rev + 1)
694 799 text = self.chunk(base, df=df)
695 800
696 801 bins = []
General Comments 0
You need to be logged in to leave comments. Login now