Show More
@@ -64,6 +64,7 b' def decompress(bin):' | |||||
64 | raise RevlogError(_("unknown compression type %r") % t) |
|
64 | raise RevlogError(_("unknown compression type %r") % t) | |
65 |
|
65 | |||
66 | indexformatv0 = ">4l20s20s20s" |
|
66 | indexformatv0 = ">4l20s20s20s" | |
|
67 | v0shaoffset = 56 | |||
67 | # index ng: |
|
68 | # index ng: | |
68 | # 6 bytes offset |
|
69 | # 6 bytes offset | |
69 | # 2 bytes flags |
|
70 | # 2 bytes flags | |
@@ -75,48 +76,135 b' indexformatv0 = ">4l20s20s20s"' | |||||
75 | # 4 bytes parent 2 rev |
|
76 | # 4 bytes parent 2 rev | |
76 | # 32 bytes: nodeid |
|
77 | # 32 bytes: nodeid | |
77 | indexformatng = ">Qiiiiii20s12x" |
|
78 | indexformatng = ">Qiiiiii20s12x" | |
|
79 | ngshaoffset = 32 | |||
78 | versionformat = ">i" |
|
80 | versionformat = ">i" | |
79 |
|
81 | |||
80 | class lazyparser(object): |
|
82 | class lazyparser(object): | |
81 | """ |
|
83 | """ | |
82 | this class avoids the need to parse the entirety of large indices |
|
84 | this class avoids the need to parse the entirety of large indices | |
83 |
|
||||
84 | By default we parse and load 1000 entries at a time. |
|
|||
85 |
|
||||
86 | If no position is specified, we load the whole index, and replace |
|
|||
87 | the lazy objects in revlog with the underlying objects for |
|
|||
88 | efficiency in cases where we look at most of the nodes. |
|
|||
89 | """ |
|
85 | """ | |
90 |
def __init__(self, data, |
|
86 | def __init__(self, dataf, size, indexformat, shaoffset): | |
91 | self.data = data |
|
87 | self.dataf = dataf | |
|
88 | self.format = indexformat | |||
92 | self.s = struct.calcsize(indexformat) |
|
89 | self.s = struct.calcsize(indexformat) | |
93 | self.indexformat = indexformat |
|
90 | self.indexformat = indexformat | |
94 |
self. |
|
91 | self.datasize = size | |
|
92 | self.l = size/self.s | |||
95 | self.index = [None] * self.l |
|
93 | self.index = [None] * self.l | |
96 | self.map = {nullid: -1} |
|
94 | self.map = {nullid: -1} | |
|
95 | self.allmap = 0 | |||
97 | self.all = 0 |
|
96 | self.all = 0 | |
98 | self.revlog = revlog |
|
97 | self.mapfind_count = 0 | |
|
98 | self.shaoffset = shaoffset | |||
99 |
|
99 | |||
100 |
def load(self |
|
100 | def loadmap(self): | |
|
101 | """ | |||
|
102 | during a commit, we need to make sure the rev being added is | |||
|
103 | not a duplicate. This requires loading the entire index, | |||
|
104 | which is fairly slow. loadmap can load up just the node map, | |||
|
105 | which takes much less time. | |||
|
106 | """ | |||
|
107 | if self.allmap: return | |||
|
108 | start = 0 | |||
|
109 | end = self.datasize | |||
|
110 | self.allmap = 1 | |||
|
111 | cur = 0 | |||
|
112 | count = 0 | |||
|
113 | blocksize = self.s * 256 | |||
|
114 | self.dataf.seek(0) | |||
|
115 | while cur < end: | |||
|
116 | data = self.dataf.read(blocksize) | |||
|
117 | off = 0 | |||
|
118 | for x in xrange(256): | |||
|
119 | n = data[off + self.shaoffset:off + self.shaoffset + 20] | |||
|
120 | self.map[n] = count | |||
|
121 | count += 1 | |||
|
122 | if count >= self.l: | |||
|
123 | break | |||
|
124 | off += self.s | |||
|
125 | cur += blocksize | |||
|
126 | ||||
|
127 | def loadblock(self, blockstart, blocksize, data=None): | |||
101 | if self.all: return |
|
128 | if self.all: return | |
102 |
if |
|
129 | if data is None: | |
103 | block = pos / 1000 |
|
130 | self.dataf.seek(blockstart) | |
104 | i = block * 1000 |
|
131 | data = self.dataf.read(blocksize) | |
105 | end = min(self.l, i + 1000) |
|
132 | lend = len(data) / self.s | |
106 | else: |
|
133 | i = blockstart / self.s | |
|
134 | off = 0 | |||
|
135 | for x in xrange(lend): | |||
|
136 | if self.index[i + x] == None: | |||
|
137 | b = data[off : off + self.s] | |||
|
138 | e = struct.unpack(self.format, b) | |||
|
139 | self.index[i + x] = e | |||
|
140 | self.map[e[-1]] = i + x | |||
|
141 | off += self.s | |||
|
142 | ||||
|
143 | def findnode(self, node): | |||
|
144 | """search backwards through the index file for a specific node""" | |||
|
145 | if self.allmap: return None | |||
|
146 | ||||
|
147 | # hg log will cause many many searches for the manifest | |||
|
148 | # nodes. After we get called a few times, just load the whole | |||
|
149 | # thing. | |||
|
150 | if self.mapfind_count > 8: | |||
|
151 | self.loadmap() | |||
|
152 | if node in self.map: | |||
|
153 | return node | |||
|
154 | return None | |||
|
155 | self.mapfind_count += 1 | |||
|
156 | last = self.l - 1 | |||
|
157 | while self.index[last] != None: | |||
|
158 | if last == 0: | |||
107 | self.all = 1 |
|
159 | self.all = 1 | |
108 |
|
|
160 | self.allmap = 1 | |
109 |
en |
|
161 | return None | |
110 | self.revlog.index = self.index |
|
162 | last -= 1 | |
111 | self.revlog.nodemap = self.map |
|
163 | end = (last + 1) * self.s | |
|
164 | blocksize = self.s * 256 | |||
|
165 | while end >= 0: | |||
|
166 | start = max(end - blocksize, 0) | |||
|
167 | self.dataf.seek(start) | |||
|
168 | data = self.dataf.read(end - start) | |||
|
169 | findend = end - start | |||
|
170 | while True: | |||
|
171 | # we're searching backwards, so weh have to make sure | |||
|
172 | # we don't find a changeset where this node is a parent | |||
|
173 | off = data.rfind(node, 0, findend) | |||
|
174 | findend = off | |||
|
175 | if off >= 0: | |||
|
176 | i = off / self.s | |||
|
177 | off = i * self.s | |||
|
178 | n = data[off + self.shaoffset:off + self.shaoffset + 20] | |||
|
179 | if n == node: | |||
|
180 | self.map[n] = i + start / self.s | |||
|
181 | return node | |||
|
182 | else: | |||
|
183 | break | |||
|
184 | end -= blocksize | |||
|
185 | return None | |||
112 |
|
186 | |||
113 | while i < end: |
|
187 | def loadindex(self, i=None, end=None): | |
114 | if not self.index[i]: |
|
188 | if self.all: return | |
115 | d = self.data[i * self.s: (i + 1) * self.s] |
|
189 | all = False | |
116 | e = struct.unpack(self.indexformat, d) |
|
190 | if i == None: | |
117 | self.index[i] = e |
|
191 | blockstart = 0 | |
118 | self.map[e[-1]] = i |
|
192 | blocksize = (512 / self.s) * self.s | |
119 |
|
|
193 | end = self.datasize | |
|
194 | all = True | |||
|
195 | else: | |||
|
196 | if end: | |||
|
197 | blockstart = i * self.s | |||
|
198 | end = end * self.s | |||
|
199 | blocksize = end - blockstart | |||
|
200 | else: | |||
|
201 | blockstart = (i & ~(32)) * self.s | |||
|
202 | blocksize = self.s * 64 | |||
|
203 | end = blockstart + blocksize | |||
|
204 | while blockstart < end: | |||
|
205 | self.loadblock(blockstart, blocksize) | |||
|
206 | blockstart += blocksize | |||
|
207 | if all: self.all = True | |||
120 |
|
208 | |||
121 | class lazyindex(object): |
|
209 | class lazyindex(object): | |
122 | """a lazy version of the index array""" |
|
210 | """a lazy version of the index array""" | |
@@ -127,7 +215,7 b' class lazyindex(object):' | |||||
127 | def load(self, pos): |
|
215 | def load(self, pos): | |
128 | if pos < 0: |
|
216 | if pos < 0: | |
129 | pos += len(self.p.index) |
|
217 | pos += len(self.p.index) | |
130 | self.p.load(pos) |
|
218 | self.p.loadindex(pos) | |
131 | return self.p.index[pos] |
|
219 | return self.p.index[pos] | |
132 | def __getitem__(self, pos): |
|
220 | def __getitem__(self, pos): | |
133 | return self.p.index[pos] or self.load(pos) |
|
221 | return self.p.index[pos] or self.load(pos) | |
@@ -143,14 +231,13 b' class lazymap(object):' | |||||
143 | def __init__(self, parser): |
|
231 | def __init__(self, parser): | |
144 | self.p = parser |
|
232 | self.p = parser | |
145 | def load(self, key): |
|
233 | def load(self, key): | |
146 | if self.p.all: return |
|
234 | n = self.p.findnode(key) | |
147 | n = self.p.data.find(key) |
|
235 | if n == None: | |
148 | if n < 0: |
|
|||
149 | raise KeyError(key) |
|
236 | raise KeyError(key) | |
150 | pos = n / self.p.s |
|
|||
151 | self.p.load(pos) |
|
|||
152 | def __contains__(self, key): |
|
237 | def __contains__(self, key): | |
153 |
self.p. |
|
238 | if key in self.p.map: | |
|
239 | return True | |||
|
240 | self.p.loadmap() | |||
154 | return key in self.p.map |
|
241 | return key in self.p.map | |
155 | def __iter__(self): |
|
242 | def __iter__(self): | |
156 | yield nullid |
|
243 | yield nullid | |
@@ -158,7 +245,7 b' class lazymap(object):' | |||||
158 | try: |
|
245 | try: | |
159 | yield self.p.index[i][-1] |
|
246 | yield self.p.index[i][-1] | |
160 | except: |
|
247 | except: | |
161 | self.p.load(i) |
|
248 | self.p.loadindex(i) | |
162 | yield self.p.index[i][-1] |
|
249 | yield self.p.index[i][-1] | |
163 | def __getitem__(self, key): |
|
250 | def __getitem__(self, key): | |
164 | try: |
|
251 | try: | |
@@ -222,7 +309,8 b' class revlog(object):' | |||||
222 | v = self.defversion |
|
309 | v = self.defversion | |
223 | try: |
|
310 | try: | |
224 | f = self.opener(self.indexfile) |
|
311 | f = self.opener(self.indexfile) | |
225 | i = f.read() |
|
312 | i = f.read(4) | |
|
313 | f.seek(0) | |||
226 | except IOError, inst: |
|
314 | except IOError, inst: | |
227 | if inst.errno != errno.ENOENT: |
|
315 | if inst.errno != errno.ENOENT: | |
228 | raise |
|
316 | raise | |
@@ -241,7 +329,7 b' class revlog(object):' | |||||
241 | return |
|
329 | return | |
242 | self.indexstat = st |
|
330 | self.indexstat = st | |
243 | if len(i) > 0: |
|
331 | if len(i) > 0: | |
244 |
v = struct.unpack(versionformat, i |
|
332 | v = struct.unpack(versionformat, i)[0] | |
245 | flags = v & ~0xFFFF |
|
333 | flags = v & ~0xFFFF | |
246 | fmt = v & 0xFFFF |
|
334 | fmt = v & 0xFFFF | |
247 | if fmt == 0: |
|
335 | if fmt == 0: | |
@@ -258,16 +346,19 b' class revlog(object):' | |||||
258 | self.version = v |
|
346 | self.version = v | |
259 | if v == 0: |
|
347 | if v == 0: | |
260 | self.indexformat = indexformatv0 |
|
348 | self.indexformat = indexformatv0 | |
|
349 | shaoffset = v0shaoffset | |||
261 | else: |
|
350 | else: | |
262 | self.indexformat = indexformatng |
|
351 | self.indexformat = indexformatng | |
|
352 | shaoffset = ngshaoffset | |||
263 |
|
353 | |||
264 | if i: |
|
354 | if i: | |
265 | if not self.inlinedata() and st and st.st_size > 10000: |
|
355 | if not self.inlinedata() and st and st.st_size > 10000: | |
266 | # big index, let's parse it on demand |
|
356 | # big index, let's parse it on demand | |
267 |
parser = lazyparser( |
|
357 | parser = lazyparser(f, st.st_size, self.indexformat, shaoffset) | |
268 | self.index = lazyindex(parser) |
|
358 | self.index = lazyindex(parser) | |
269 | self.nodemap = lazymap(parser) |
|
359 | self.nodemap = lazymap(parser) | |
270 | else: |
|
360 | else: | |
|
361 | i = f.read() | |||
271 | self.parseindex(i) |
|
362 | self.parseindex(i) | |
272 | if self.inlinedata(): |
|
363 | if self.inlinedata(): | |
273 | # we've already got the entire data file read in, save it |
|
364 | # we've already got the entire data file read in, save it | |
@@ -312,11 +403,23 b' class revlog(object):' | |||||
312 | def offset_type(self, offset, type): |
|
403 | def offset_type(self, offset, type): | |
313 | return long(long(offset) << 16 | type) |
|
404 | return long(long(offset) << 16 | type) | |
314 |
|
405 | |||
|
406 | def loadindex(self, start, end): | |||
|
407 | """load a block of indexes all at once from the lazy parser""" | |||
|
408 | if isinstance(self.index, lazyindex): | |||
|
409 | self.index.p.loadindex(start, end) | |||
|
410 | ||||
315 | def loadindexmap(self): |
|
411 | def loadindexmap(self): | |
316 | """loads both the map and the index from the lazy parser""" |
|
412 | """loads both the map and the index from the lazy parser""" | |
317 | if isinstance(self.index, lazyindex): |
|
413 | if isinstance(self.index, lazyindex): | |
318 | p = self.index.p |
|
414 | p = self.index.p | |
319 | p.load() |
|
415 | p.loadindex() | |
|
416 | self.nodemap = p.map | |||
|
417 | ||||
|
418 | def loadmap(self): | |||
|
419 | """loads the map from the lazy parser""" | |||
|
420 | if isinstance(self.nodemap, lazymap): | |||
|
421 | self.nodemap.p.loadmap() | |||
|
422 | self.nodemap = self.nodemap.p.map | |||
320 |
|
423 | |||
321 | def inlinedata(self): return self.version & REVLOGNGINLINEDATA |
|
424 | def inlinedata(self): return self.version & REVLOGNGINLINEDATA | |
322 | def tip(self): return self.node(len(self.index) - 1) |
|
425 | def tip(self): return self.node(len(self.index) - 1) | |
@@ -690,7 +793,9 b' class revlog(object):' | |||||
690 | if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
|
793 | if self.cache and self.cache[1] >= base and self.cache[1] < rev: | |
691 | base = self.cache[1] |
|
794 | base = self.cache[1] | |
692 | text = self.cache[2] |
|
795 | text = self.cache[2] | |
|
796 | self.loadindex(base, rev + 1) | |||
693 | else: |
|
797 | else: | |
|
798 | self.loadindex(base, rev + 1) | |||
694 | text = self.chunk(base, df=df) |
|
799 | text = self.chunk(base, df=df) | |
695 |
|
800 | |||
696 | bins = [] |
|
801 | bins = [] |
General Comments 0
You need to be logged in to leave comments.
Login now