##// END OF EJS Templates
New lazy index code for revlogs....
mason@suse.com -
r2079:ee96ca27 default
parent child Browse files
Show More
@@ -64,6 +64,7 b' def decompress(bin):'
64 raise RevlogError(_("unknown compression type %r") % t)
64 raise RevlogError(_("unknown compression type %r") % t)
65
65
66 indexformatv0 = ">4l20s20s20s"
66 indexformatv0 = ">4l20s20s20s"
67 v0shaoffset = 56
67 # index ng:
68 # index ng:
68 # 6 bytes offset
69 # 6 bytes offset
69 # 2 bytes flags
70 # 2 bytes flags
@@ -75,48 +76,135 b' indexformatv0 = ">4l20s20s20s"'
75 # 4 bytes parent 2 rev
76 # 4 bytes parent 2 rev
76 # 32 bytes: nodeid
77 # 32 bytes: nodeid
77 indexformatng = ">Qiiiiii20s12x"
78 indexformatng = ">Qiiiiii20s12x"
79 ngshaoffset = 32
78 versionformat = ">i"
80 versionformat = ">i"
79
81
80 class lazyparser(object):
82 class lazyparser(object):
81 """
83 """
82 this class avoids the need to parse the entirety of large indices
84 this class avoids the need to parse the entirety of large indices
83
84 By default we parse and load 1000 entries at a time.
85
86 If no position is specified, we load the whole index, and replace
87 the lazy objects in revlog with the underlying objects for
88 efficiency in cases where we look at most of the nodes.
89 """
85 """
90 def __init__(self, data, revlog, indexformat):
86 def __init__(self, dataf, size, indexformat, shaoffset):
91 self.data = data
87 self.dataf = dataf
88 self.format = indexformat
92 self.s = struct.calcsize(indexformat)
89 self.s = struct.calcsize(indexformat)
93 self.indexformat = indexformat
90 self.indexformat = indexformat
94 self.l = len(data)/self.s
91 self.datasize = size
92 self.l = size/self.s
95 self.index = [None] * self.l
93 self.index = [None] * self.l
96 self.map = {nullid: -1}
94 self.map = {nullid: -1}
95 self.allmap = 0
97 self.all = 0
96 self.all = 0
98 self.revlog = revlog
97 self.mapfind_count = 0
98 self.shaoffset = shaoffset
99
99
100 def load(self, pos=None):
100 def loadmap(self):
101 """
102 during a commit, we need to make sure the rev being added is
103 not a duplicate. This requires loading the entire index,
104 which is fairly slow. loadmap can load up just the node map,
105 which takes much less time.
106 """
107 if self.allmap: return
108 start = 0
109 end = self.datasize
110 self.allmap = 1
111 cur = 0
112 count = 0
113 blocksize = self.s * 256
114 self.dataf.seek(0)
115 while cur < end:
116 data = self.dataf.read(blocksize)
117 off = 0
118 for x in xrange(256):
119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 self.map[n] = count
121 count += 1
122 if count >= self.l:
123 break
124 off += self.s
125 cur += blocksize
126
127 def loadblock(self, blockstart, blocksize, data=None):
101 if self.all: return
128 if self.all: return
102 if pos is not None:
129 if data is None:
103 block = pos / 1000
130 self.dataf.seek(blockstart)
104 i = block * 1000
131 data = self.dataf.read(blocksize)
105 end = min(self.l, i + 1000)
132 lend = len(data) / self.s
106 else:
133 i = blockstart / self.s
134 off = 0
135 for x in xrange(lend):
136 if self.index[i + x] == None:
137 b = data[off : off + self.s]
138 e = struct.unpack(self.format, b)
139 self.index[i + x] = e
140 self.map[e[-1]] = i + x
141 off += self.s
142
143 def findnode(self, node):
144 """search backwards through the index file for a specific node"""
145 if self.allmap: return None
146
147 # hg log will cause many many searches for the manifest
148 # nodes. After we get called a few times, just load the whole
149 # thing.
150 if self.mapfind_count > 8:
151 self.loadmap()
152 if node in self.map:
153 return node
154 return None
155 self.mapfind_count += 1
156 last = self.l - 1
157 while self.index[last] != None:
158 if last == 0:
107 self.all = 1
159 self.all = 1
108 i = 0
160 self.allmap = 1
109 end = self.l
161 return None
110 self.revlog.index = self.index
162 last -= 1
111 self.revlog.nodemap = self.map
163 end = (last + 1) * self.s
164 blocksize = self.s * 256
165 while end >= 0:
166 start = max(end - blocksize, 0)
167 self.dataf.seek(start)
168 data = self.dataf.read(end - start)
169 findend = end - start
170 while True:
171 # we're searching backwards, so weh have to make sure
172 # we don't find a changeset where this node is a parent
173 off = data.rfind(node, 0, findend)
174 findend = off
175 if off >= 0:
176 i = off / self.s
177 off = i * self.s
178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 if n == node:
180 self.map[n] = i + start / self.s
181 return node
182 else:
183 break
184 end -= blocksize
185 return None
112
186
113 while i < end:
187 def loadindex(self, i=None, end=None):
114 if not self.index[i]:
188 if self.all: return
115 d = self.data[i * self.s: (i + 1) * self.s]
189 all = False
116 e = struct.unpack(self.indexformat, d)
190 if i == None:
117 self.index[i] = e
191 blockstart = 0
118 self.map[e[-1]] = i
192 blocksize = (512 / self.s) * self.s
119 i += 1
193 end = self.datasize
194 all = True
195 else:
196 if end:
197 blockstart = i * self.s
198 end = end * self.s
199 blocksize = end - blockstart
200 else:
201 blockstart = (i & ~(32)) * self.s
202 blocksize = self.s * 64
203 end = blockstart + blocksize
204 while blockstart < end:
205 self.loadblock(blockstart, blocksize)
206 blockstart += blocksize
207 if all: self.all = True
120
208
121 class lazyindex(object):
209 class lazyindex(object):
122 """a lazy version of the index array"""
210 """a lazy version of the index array"""
@@ -127,7 +215,7 b' class lazyindex(object):'
127 def load(self, pos):
215 def load(self, pos):
128 if pos < 0:
216 if pos < 0:
129 pos += len(self.p.index)
217 pos += len(self.p.index)
130 self.p.load(pos)
218 self.p.loadindex(pos)
131 return self.p.index[pos]
219 return self.p.index[pos]
132 def __getitem__(self, pos):
220 def __getitem__(self, pos):
133 return self.p.index[pos] or self.load(pos)
221 return self.p.index[pos] or self.load(pos)
@@ -143,14 +231,13 b' class lazymap(object):'
143 def __init__(self, parser):
231 def __init__(self, parser):
144 self.p = parser
232 self.p = parser
145 def load(self, key):
233 def load(self, key):
146 if self.p.all: return
234 n = self.p.findnode(key)
147 n = self.p.data.find(key)
235 if n == None:
148 if n < 0:
149 raise KeyError(key)
236 raise KeyError(key)
150 pos = n / self.p.s
151 self.p.load(pos)
152 def __contains__(self, key):
237 def __contains__(self, key):
153 self.p.load()
238 if key in self.p.map:
239 return True
240 self.p.loadmap()
154 return key in self.p.map
241 return key in self.p.map
155 def __iter__(self):
242 def __iter__(self):
156 yield nullid
243 yield nullid
@@ -158,7 +245,7 b' class lazymap(object):'
158 try:
245 try:
159 yield self.p.index[i][-1]
246 yield self.p.index[i][-1]
160 except:
247 except:
161 self.p.load(i)
248 self.p.loadindex(i)
162 yield self.p.index[i][-1]
249 yield self.p.index[i][-1]
163 def __getitem__(self, key):
250 def __getitem__(self, key):
164 try:
251 try:
@@ -222,7 +309,8 b' class revlog(object):'
222 v = self.defversion
309 v = self.defversion
223 try:
310 try:
224 f = self.opener(self.indexfile)
311 f = self.opener(self.indexfile)
225 i = f.read()
312 i = f.read(4)
313 f.seek(0)
226 except IOError, inst:
314 except IOError, inst:
227 if inst.errno != errno.ENOENT:
315 if inst.errno != errno.ENOENT:
228 raise
316 raise
@@ -241,7 +329,7 b' class revlog(object):'
241 return
329 return
242 self.indexstat = st
330 self.indexstat = st
243 if len(i) > 0:
331 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
332 v = struct.unpack(versionformat, i)[0]
245 flags = v & ~0xFFFF
333 flags = v & ~0xFFFF
246 fmt = v & 0xFFFF
334 fmt = v & 0xFFFF
247 if fmt == 0:
335 if fmt == 0:
@@ -258,16 +346,19 b' class revlog(object):'
258 self.version = v
346 self.version = v
259 if v == 0:
347 if v == 0:
260 self.indexformat = indexformatv0
348 self.indexformat = indexformatv0
349 shaoffset = v0shaoffset
261 else:
350 else:
262 self.indexformat = indexformatng
351 self.indexformat = indexformatng
352 shaoffset = ngshaoffset
263
353
264 if i:
354 if i:
265 if not self.inlinedata() and st and st.st_size > 10000:
355 if not self.inlinedata() and st and st.st_size > 10000:
266 # big index, let's parse it on demand
356 # big index, let's parse it on demand
267 parser = lazyparser(i, self, self.indexformat)
357 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
268 self.index = lazyindex(parser)
358 self.index = lazyindex(parser)
269 self.nodemap = lazymap(parser)
359 self.nodemap = lazymap(parser)
270 else:
360 else:
361 i = f.read()
271 self.parseindex(i)
362 self.parseindex(i)
272 if self.inlinedata():
363 if self.inlinedata():
273 # we've already got the entire data file read in, save it
364 # we've already got the entire data file read in, save it
@@ -312,11 +403,23 b' class revlog(object):'
312 def offset_type(self, offset, type):
403 def offset_type(self, offset, type):
313 return long(long(offset) << 16 | type)
404 return long(long(offset) << 16 | type)
314
405
406 def loadindex(self, start, end):
407 """load a block of indexes all at once from the lazy parser"""
408 if isinstance(self.index, lazyindex):
409 self.index.p.loadindex(start, end)
410
315 def loadindexmap(self):
411 def loadindexmap(self):
316 """loads both the map and the index from the lazy parser"""
412 """loads both the map and the index from the lazy parser"""
317 if isinstance(self.index, lazyindex):
413 if isinstance(self.index, lazyindex):
318 p = self.index.p
414 p = self.index.p
319 p.load()
415 p.loadindex()
416 self.nodemap = p.map
417
418 def loadmap(self):
419 """loads the map from the lazy parser"""
420 if isinstance(self.nodemap, lazymap):
421 self.nodemap.p.loadmap()
422 self.nodemap = self.nodemap.p.map
320
423
321 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
424 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 def tip(self): return self.node(len(self.index) - 1)
425 def tip(self): return self.node(len(self.index) - 1)
@@ -690,7 +793,9 b' class revlog(object):'
690 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
793 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
691 base = self.cache[1]
794 base = self.cache[1]
692 text = self.cache[2]
795 text = self.cache[2]
796 self.loadindex(base, rev + 1)
693 else:
797 else:
798 self.loadindex(base, rev + 1)
694 text = self.chunk(base, df=df)
799 text = self.chunk(base, df=df)
695
800
696 bins = []
801 bins = []
General Comments 0
You need to be logged in to leave comments. Login now