Show More
@@ -64,6 +64,7 b' def decompress(bin):' | |||
|
64 | 64 | raise RevlogError(_("unknown compression type %r") % t) |
|
65 | 65 | |
|
66 | 66 | indexformatv0 = ">4l20s20s20s" |
|
67 | v0shaoffset = 56 | |
|
67 | 68 | # index ng: |
|
68 | 69 | # 6 bytes offset |
|
69 | 70 | # 2 bytes flags |
@@ -75,48 +76,135 b' indexformatv0 = ">4l20s20s20s"' | |||
|
75 | 76 | # 4 bytes parent 2 rev |
|
76 | 77 | # 32 bytes: nodeid |
|
77 | 78 | indexformatng = ">Qiiiiii20s12x" |
|
79 | ngshaoffset = 32 | |
|
78 | 80 | versionformat = ">i" |
|
79 | 81 | |
|
80 | 82 | class lazyparser(object): |
|
81 | 83 | """ |
|
82 | 84 | this class avoids the need to parse the entirety of large indices |
|
83 | ||
|
84 | By default we parse and load 1000 entries at a time. | |
|
85 | ||
|
86 | If no position is specified, we load the whole index, and replace | |
|
87 | the lazy objects in revlog with the underlying objects for | |
|
88 | efficiency in cases where we look at most of the nodes. | |
|
89 | 85 | """ |
|
90 |
def __init__(self, data, |
|
|
91 | self.data = data | |
|
86 | def __init__(self, dataf, size, indexformat, shaoffset): | |
|
87 | self.dataf = dataf | |
|
88 | self.format = indexformat | |
|
92 | 89 | self.s = struct.calcsize(indexformat) |
|
93 | 90 | self.indexformat = indexformat |
|
94 |
self. |
|
|
91 | self.datasize = size | |
|
92 | self.l = size/self.s | |
|
95 | 93 | self.index = [None] * self.l |
|
96 | 94 | self.map = {nullid: -1} |
|
95 | self.allmap = 0 | |
|
97 | 96 | self.all = 0 |
|
98 | self.revlog = revlog | |
|
97 | self.mapfind_count = 0 | |
|
98 | self.shaoffset = shaoffset | |
|
99 | 99 | |
|
100 |
def load(self |
|
|
100 | def loadmap(self): | |
|
101 | """ | |
|
102 | during a commit, we need to make sure the rev being added is | |
|
103 | not a duplicate. This requires loading the entire index, | |
|
104 | which is fairly slow. loadmap can load up just the node map, | |
|
105 | which takes much less time. | |
|
106 | """ | |
|
107 | if self.allmap: return | |
|
108 | start = 0 | |
|
109 | end = self.datasize | |
|
110 | self.allmap = 1 | |
|
111 | cur = 0 | |
|
112 | count = 0 | |
|
113 | blocksize = self.s * 256 | |
|
114 | self.dataf.seek(0) | |
|
115 | while cur < end: | |
|
116 | data = self.dataf.read(blocksize) | |
|
117 | off = 0 | |
|
118 | for x in xrange(256): | |
|
119 | n = data[off + self.shaoffset:off + self.shaoffset + 20] | |
|
120 | self.map[n] = count | |
|
121 | count += 1 | |
|
122 | if count >= self.l: | |
|
123 | break | |
|
124 | off += self.s | |
|
125 | cur += blocksize | |
|
126 | ||
|
127 | def loadblock(self, blockstart, blocksize, data=None): | |
|
101 | 128 | if self.all: return |
|
102 |
if |
|
|
103 | block = pos / 1000 | |
|
104 | i = block * 1000 | |
|
105 | end = min(self.l, i + 1000) | |
|
129 | if data is None: | |
|
130 | self.dataf.seek(blockstart) | |
|
131 | data = self.dataf.read(blocksize) | |
|
132 | lend = len(data) / self.s | |
|
133 | i = blockstart / self.s | |
|
134 | off = 0 | |
|
135 | for x in xrange(lend): | |
|
136 | if self.index[i + x] == None: | |
|
137 | b = data[off : off + self.s] | |
|
138 | e = struct.unpack(self.format, b) | |
|
139 | self.index[i + x] = e | |
|
140 | self.map[e[-1]] = i + x | |
|
141 | off += self.s | |
|
142 | ||
|
143 | def findnode(self, node): | |
|
144 | """search backwards through the index file for a specific node""" | |
|
145 | if self.allmap: return None | |
|
146 | ||
|
147 | # hg log will cause many many searches for the manifest | |
|
148 | # nodes. After we get called a few times, just load the whole | |
|
149 | # thing. | |
|
150 | if self.mapfind_count > 8: | |
|
151 | self.loadmap() | |
|
152 | if node in self.map: | |
|
153 | return node | |
|
154 | return None | |
|
155 | self.mapfind_count += 1 | |
|
156 | last = self.l - 1 | |
|
157 | while self.index[last] != None: | |
|
158 | if last == 0: | |
|
159 | self.all = 1 | |
|
160 | self.allmap = 1 | |
|
161 | return None | |
|
162 | last -= 1 | |
|
163 | end = (last + 1) * self.s | |
|
164 | blocksize = self.s * 256 | |
|
165 | while end >= 0: | |
|
166 | start = max(end - blocksize, 0) | |
|
167 | self.dataf.seek(start) | |
|
168 | data = self.dataf.read(end - start) | |
|
169 | findend = end - start | |
|
170 | while True: | |
|
171 | # we're searching backwards, so weh have to make sure | |
|
172 | # we don't find a changeset where this node is a parent | |
|
173 | off = data.rfind(node, 0, findend) | |
|
174 | findend = off | |
|
175 | if off >= 0: | |
|
176 | i = off / self.s | |
|
177 | off = i * self.s | |
|
178 | n = data[off + self.shaoffset:off + self.shaoffset + 20] | |
|
179 | if n == node: | |
|
180 | self.map[n] = i + start / self.s | |
|
181 | return node | |
|
182 | else: | |
|
183 | break | |
|
184 | end -= blocksize | |
|
185 | return None | |
|
186 | ||
|
187 | def loadindex(self, i=None, end=None): | |
|
188 | if self.all: return | |
|
189 | all = False | |
|
190 | if i == None: | |
|
191 | blockstart = 0 | |
|
192 | blocksize = (512 / self.s) * self.s | |
|
193 | end = self.datasize | |
|
194 | all = True | |
|
106 | 195 | else: |
|
107 |
|
|
|
108 | i = 0 | |
|
109 |
end = self. |
|
|
110 | self.revlog.index = self.index | |
|
111 | self.revlog.nodemap = self.map | |
|
112 | ||
|
113 | while i < end: | |
|
114 | if not self.index[i]: | |
|
115 | d = self.data[i * self.s: (i + 1) * self.s] | |
|
116 | e = struct.unpack(self.indexformat, d) | |
|
117 | self.index[i] = e | |
|
118 | self.map[e[-1]] = i | |
|
119 | i += 1 | |
|
196 | if end: | |
|
197 | blockstart = i * self.s | |
|
198 | end = end * self.s | |
|
199 | blocksize = end - blockstart | |
|
200 | else: | |
|
201 | blockstart = (i & ~(32)) * self.s | |
|
202 | blocksize = self.s * 64 | |
|
203 | end = blockstart + blocksize | |
|
204 | while blockstart < end: | |
|
205 | self.loadblock(blockstart, blocksize) | |
|
206 | blockstart += blocksize | |
|
207 | if all: self.all = True | |
|
120 | 208 | |
|
121 | 209 | class lazyindex(object): |
|
122 | 210 | """a lazy version of the index array""" |
@@ -127,7 +215,7 b' class lazyindex(object):' | |||
|
127 | 215 | def load(self, pos): |
|
128 | 216 | if pos < 0: |
|
129 | 217 | pos += len(self.p.index) |
|
130 | self.p.load(pos) | |
|
218 | self.p.loadindex(pos) | |
|
131 | 219 | return self.p.index[pos] |
|
132 | 220 | def __getitem__(self, pos): |
|
133 | 221 | return self.p.index[pos] or self.load(pos) |
@@ -143,14 +231,13 b' class lazymap(object):' | |||
|
143 | 231 | def __init__(self, parser): |
|
144 | 232 | self.p = parser |
|
145 | 233 | def load(self, key): |
|
146 | if self.p.all: return | |
|
147 | n = self.p.data.find(key) | |
|
148 | if n < 0: | |
|
234 | n = self.p.findnode(key) | |
|
235 | if n == None: | |
|
149 | 236 | raise KeyError(key) |
|
150 | pos = n / self.p.s | |
|
151 | self.p.load(pos) | |
|
152 | 237 | def __contains__(self, key): |
|
153 |
self.p. |
|
|
238 | if key in self.p.map: | |
|
239 | return True | |
|
240 | self.p.loadmap() | |
|
154 | 241 | return key in self.p.map |
|
155 | 242 | def __iter__(self): |
|
156 | 243 | yield nullid |
@@ -158,7 +245,7 b' class lazymap(object):' | |||
|
158 | 245 | try: |
|
159 | 246 | yield self.p.index[i][-1] |
|
160 | 247 | except: |
|
161 | self.p.load(i) | |
|
248 | self.p.loadindex(i) | |
|
162 | 249 | yield self.p.index[i][-1] |
|
163 | 250 | def __getitem__(self, key): |
|
164 | 251 | try: |
@@ -222,7 +309,8 b' class revlog(object):' | |||
|
222 | 309 | v = self.defversion |
|
223 | 310 | try: |
|
224 | 311 | f = self.opener(self.indexfile) |
|
225 | i = f.read() | |
|
312 | i = f.read(4) | |
|
313 | f.seek(0) | |
|
226 | 314 | except IOError, inst: |
|
227 | 315 | if inst.errno != errno.ENOENT: |
|
228 | 316 | raise |
@@ -241,7 +329,7 b' class revlog(object):' | |||
|
241 | 329 | return |
|
242 | 330 | self.indexstat = st |
|
243 | 331 | if len(i) > 0: |
|
244 |
v = struct.unpack(versionformat, i |
|
|
332 | v = struct.unpack(versionformat, i)[0] | |
|
245 | 333 | flags = v & ~0xFFFF |
|
246 | 334 | fmt = v & 0xFFFF |
|
247 | 335 | if fmt == 0: |
@@ -258,16 +346,19 b' class revlog(object):' | |||
|
258 | 346 | self.version = v |
|
259 | 347 | if v == 0: |
|
260 | 348 | self.indexformat = indexformatv0 |
|
349 | shaoffset = v0shaoffset | |
|
261 | 350 | else: |
|
262 | 351 | self.indexformat = indexformatng |
|
352 | shaoffset = ngshaoffset | |
|
263 | 353 | |
|
264 | 354 | if i: |
|
265 | 355 | if not self.inlinedata() and st and st.st_size > 10000: |
|
266 | 356 | # big index, let's parse it on demand |
|
267 |
parser = lazyparser( |
|
|
357 | parser = lazyparser(f, st.st_size, self.indexformat, shaoffset) | |
|
268 | 358 | self.index = lazyindex(parser) |
|
269 | 359 | self.nodemap = lazymap(parser) |
|
270 | 360 | else: |
|
361 | i = f.read() | |
|
271 | 362 | self.parseindex(i) |
|
272 | 363 | if self.inlinedata(): |
|
273 | 364 | # we've already got the entire data file read in, save it |
@@ -312,11 +403,23 b' class revlog(object):' | |||
|
312 | 403 | def offset_type(self, offset, type): |
|
313 | 404 | return long(long(offset) << 16 | type) |
|
314 | 405 | |
|
406 | def loadindex(self, start, end): | |
|
407 | """load a block of indexes all at once from the lazy parser""" | |
|
408 | if isinstance(self.index, lazyindex): | |
|
409 | self.index.p.loadindex(start, end) | |
|
410 | ||
|
315 | 411 | def loadindexmap(self): |
|
316 | 412 | """loads both the map and the index from the lazy parser""" |
|
317 | 413 | if isinstance(self.index, lazyindex): |
|
318 | 414 | p = self.index.p |
|
319 | p.load() | |
|
415 | p.loadindex() | |
|
416 | self.nodemap = p.map | |
|
417 | ||
|
418 | def loadmap(self): | |
|
419 | """loads the map from the lazy parser""" | |
|
420 | if isinstance(self.nodemap, lazymap): | |
|
421 | self.nodemap.p.loadmap() | |
|
422 | self.nodemap = self.nodemap.p.map | |
|
320 | 423 | |
|
321 | 424 | def inlinedata(self): return self.version & REVLOGNGINLINEDATA |
|
322 | 425 | def tip(self): return self.node(len(self.index) - 1) |
@@ -690,7 +793,9 b' class revlog(object):' | |||
|
690 | 793 | if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
|
691 | 794 | base = self.cache[1] |
|
692 | 795 | text = self.cache[2] |
|
796 | self.loadindex(base, rev + 1) | |
|
693 | 797 | else: |
|
798 | self.loadindex(base, rev + 1) | |
|
694 | 799 | text = self.chunk(base, df=df) |
|
695 | 800 | |
|
696 | 801 | bins = [] |
General Comments 0
You need to be logged in to leave comments.
Login now