##// END OF EJS Templates
Add iterator to the lazymap code
mpm@selenic.com -
r97:7a2abee6 default
parent child Browse files
Show More
@@ -1,450 +1,457 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, os, tempfile, binascii
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:4])
17 17
18 18 def compress(text):
19 19 return zlib.compress(text)
20 20
21 21 def decompress(bin):
22 22 return zlib.decompress(bin)
23 23
24 24 def hash(text, p1, p2):
25 25 l = [p1, p2]
26 26 l.sort()
27 27 return sha.sha(l[0] + l[1] + text).digest()
28 28
29 29 nullid = "\0" * 20
30 30 indexformat = ">4l20s20s20s"
31 31
32 32 class lazyparser:
33 33 def __init__(self, data):
34 34 self.data = data
35 35 self.s = struct.calcsize(indexformat)
36 36 self.l = len(data)/self.s
37 37 self.index = [None] * self.l
38 38 self.map = {nullid: -1}
39 39
40 40 if 0:
41 41 n = 0
42 42 i = self.data
43 43 s = struct.calcsize(indexformat)
44 44 for f in xrange(0, len(i), s):
45 45 # offset, size, base, linkrev, p1, p2, nodeid
46 46 e = struct.unpack(indexformat, i[f:f + s])
47 47 self.map[e[6]] = n
48 48 self.index.append(e)
49 49 n += 1
50 50
51 51 def load(self, pos):
52 52 block = pos / 1000
53 53 i = block * 1000
54 54 end = min(self.l, i + 1000)
55 55 while i < end:
56 56 d = self.data[i * self.s: (i + 1) * self.s]
57 57 e = struct.unpack(indexformat, d)
58 58 self.index[i] = e
59 59 self.map[e[6]] = i
60 60 i += 1
61 61
62 62 class lazyindex:
63 63 def __init__(self, parser):
64 64 self.p = parser
65 65 def __len__(self):
66 66 return len(self.p.index)
67 67 def __getitem__(self, pos):
68 68 i = self.p.index[pos]
69 69 if not i:
70 70 self.p.load(pos)
71 71 return self.p.index[pos]
72 72 return i
73 73 def append(self, e):
74 74 self.p.index.append(e)
75 75
76 76 class lazymap:
77 77 def __init__(self, parser):
78 78 self.p = parser
79 79 def load(self, key):
80 80 n = self.p.data.find(key)
81 81 if n < 0: raise KeyError("node " + hex(key))
82 82 pos = n / self.p.s
83 83 self.p.load(pos)
84 84 def __contains__(self, key):
85 85 try:
86 86 self[key]
87 87 return True
88 88 except KeyError:
89 89 return False
90 def __iter__(self):
91 for i in xrange(self.p.l):
92 try:
93 yield self.p.index[i][6]
94 except:
95 self.p.load(i)
96 yield self.p.index[i][6]
90 97 def __getitem__(self, key):
91 98 try:
92 99 return self.p.map[key]
93 100 except KeyError:
94 101 try:
95 102 self.load(key)
96 103 return self.p.map[key]
97 104 except KeyError:
98 105 raise KeyError("node " + hex(key))
99 106 def __setitem__(self, key, val):
100 107 self.p.map[key] = val
101 108
102 109 class revlog:
103 110 def __init__(self, opener, indexfile, datafile):
104 111 self.indexfile = indexfile
105 112 self.datafile = datafile
106 113 self.opener = opener
107 114 self.cache = None
108 115 # read the whole index for now, handle on-demand later
109 116 try:
110 117 i = self.opener(self.indexfile).read()
111 118 except IOError:
112 119 i = ""
113 120 parser = lazyparser(i)
114 121 self.index = lazyindex(parser)
115 122 self.nodemap = lazymap(parser)
116 123
117 124 def tip(self): return self.node(len(self.index) - 1)
118 125 def count(self): return len(self.index)
119 126 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
120 127 def rev(self, node): return self.nodemap[node]
121 128 def linkrev(self, node): return self.index[self.nodemap[node]][3]
122 129 def parents(self, node):
123 130 if node == nullid: return (nullid, nullid)
124 131 return self.index[self.nodemap[node]][4:6]
125 132
126 133 def start(self, rev): return self.index[rev][0]
127 134 def length(self, rev): return self.index[rev][1]
128 135 def end(self, rev): return self.start(rev) + self.length(rev)
129 136 def base(self, rev): return self.index[rev][2]
130 137
131 138 def lookup(self, id):
132 139 try:
133 140 rev = int(id)
134 141 return self.node(rev)
135 142 except ValueError:
136 143 c = []
137 144 for n in self.nodemap:
138 145 if id in hex(n):
139 146 c.append(n)
140 147 if len(c) > 1: raise KeyError("Ambiguous identifier")
141 148 if len(c) < 1: raise KeyError("No match found")
142 149 return c[0]
143 150
144 151 return None
145 152
146 153 def diff(self, a, b):
147 154 return mdiff.textdiff(a, b)
148 155
149 156 def patches(self, t, pl):
150 157 return mdiff.patches(t, pl)
151 158
152 159 def revision(self, node):
153 160 if node == nullid: return ""
154 161 if self.cache and self.cache[0] == node: return self.cache[2]
155 162
156 163 text = None
157 164 rev = self.rev(node)
158 165 base = self.base(rev)
159 166 start = self.start(base)
160 167 end = self.end(rev)
161 168
162 169 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
163 170 base = self.cache[1]
164 171 start = self.start(base + 1)
165 172 text = self.cache[2]
166 173 last = 0
167 174
168 175 f = self.opener(self.datafile)
169 176 f.seek(start)
170 177 data = f.read(end - start)
171 178
172 179 if not text:
173 180 last = self.length(base)
174 181 text = decompress(data[:last])
175 182
176 183 bins = []
177 184 for r in xrange(base + 1, rev + 1):
178 185 s = self.length(r)
179 186 bins.append(decompress(data[last:last + s]))
180 187 last = last + s
181 188
182 189 text = mdiff.patches(text, bins)
183 190
184 191 (p1, p2) = self.parents(node)
185 192 if node != hash(text, p1, p2):
186 193 raise "integrity check failed on %s:%d" % (self.datafile, rev)
187 194
188 195 self.cache = (node, rev, text)
189 196 return text
190 197
191 198 def addrevision(self, text, transaction, link, p1=None, p2=None):
192 199 if text is None: text = ""
193 200 if p1 is None: p1 = self.tip()
194 201 if p2 is None: p2 = nullid
195 202
196 203 node = hash(text, p1, p2)
197 204
198 205 n = self.count()
199 206 t = n - 1
200 207
201 208 if n:
202 209 base = self.base(t)
203 210 start = self.start(base)
204 211 end = self.end(t)
205 212 prev = self.revision(self.tip())
206 213 data = compress(self.diff(prev, text))
207 214 dist = end - start + len(data)
208 215
209 216 # full versions are inserted when the needed deltas
210 217 # become comparable to the uncompressed text
211 218 if not n or dist > len(text) * 2:
212 219 data = compress(text)
213 220 base = n
214 221 else:
215 222 base = self.base(t)
216 223
217 224 offset = 0
218 225 if t >= 0:
219 226 offset = self.end(t)
220 227
221 228 e = (offset, len(data), base, link, p1, p2, node)
222 229
223 230 self.index.append(e)
224 231 self.nodemap[node] = n
225 232 entry = struct.pack(indexformat, *e)
226 233
227 234 transaction.add(self.datafile, e[0])
228 235 self.opener(self.datafile, "a").write(data)
229 236 transaction.add(self.indexfile, n * len(entry))
230 237 self.opener(self.indexfile, "a").write(entry)
231 238
232 239 self.cache = (node, n, text)
233 240 return node
234 241
235 242 def ancestor(self, a, b):
236 243 def expand(list, map):
237 244 a = []
238 245 while list:
239 246 n = list.pop(0)
240 247 map[n] = 1
241 248 yield n
242 249 for p in self.parents(n):
243 250 if p != nullid and p not in map:
244 251 list.append(p)
245 252 yield nullid
246 253
247 254 amap = {}
248 255 bmap = {}
249 256 ag = expand([a], amap)
250 257 bg = expand([b], bmap)
251 258 adone = bdone = 0
252 259
253 260 while not adone or not bdone:
254 261 if not adone:
255 262 an = ag.next()
256 263 if an == nullid:
257 264 adone = 1
258 265 elif an in bmap:
259 266 return an
260 267 if not bdone:
261 268 bn = bg.next()
262 269 if bn == nullid:
263 270 bdone = 1
264 271 elif bn in amap:
265 272 return bn
266 273
267 274 return nullid
268 275
269 276 def group(self, linkmap):
270 277 # given a list of changeset revs, return a set of deltas and
271 278 # metadata corresponding to nodes. the first delta is
272 279 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
273 280 # have this parent as it has all history before these
274 281 # changesets. parent is parent[0]
275 282
276 283 revs = []
277 284 needed = {}
278 285
279 286 # find file nodes/revs that match changeset revs
280 287 for i in xrange(0, self.count()):
281 288 if self.index[i][3] in linkmap:
282 289 revs.append(i)
283 290 needed[i] = 1
284 291
285 292 # if we don't have any revisions touched by these changesets, bail
286 293 if not revs: return struct.pack(">l", 0)
287 294
288 295 # add the parent of the first rev
289 296 p = self.parents(self.node(revs[0]))[0]
290 297 revs.insert(0, self.rev(p))
291 298
292 299 # for each delta that isn't contiguous in the log, we need to
293 300 # reconstruct the base, reconstruct the result, and then
294 301 # calculate the delta. We also need to do this where we've
295 302 # stored a full version and not a delta
296 303 for i in xrange(0, len(revs) - 1):
297 304 a, b = revs[i], revs[i + 1]
298 305 if a + 1 != b or self.base(b) == b:
299 306 for j in xrange(self.base(a), a + 1):
300 307 needed[j] = 1
301 308 for j in xrange(self.base(b), b + 1):
302 309 needed[j] = 1
303 310
304 311 # calculate spans to retrieve from datafile
305 312 needed = needed.keys()
306 313 needed.sort()
307 314 spans = []
308 315 for n in needed:
309 316 if n < 0: continue
310 317 o = self.start(n)
311 318 l = self.length(n)
312 319 spans.append((o, l, [(n, l)]))
313 320
314 321 # merge spans
315 322 merge = [spans.pop(0)]
316 323 while spans:
317 324 e = spans.pop(0)
318 325 f = merge[-1]
319 326 if e[0] == f[0] + f[1]:
320 327 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
321 328 else:
322 329 merge.append(e)
323 330
324 331 # read spans in, divide up chunks
325 332 chunks = {}
326 333 for span in merge:
327 334 # we reopen the file for each span to make http happy for now
328 335 f = self.opener(self.datafile)
329 336 f.seek(span[0])
330 337 data = f.read(span[1])
331 338
332 339 # divide up the span
333 340 pos = 0
334 341 for r, l in span[2]:
335 342 chunks[r] = data[pos: pos + l]
336 343 pos += l
337 344
338 345 # helper to reconstruct intermediate versions
339 346 def construct(text, base, rev):
340 347 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
341 348 return mdiff.patches(text, bins)
342 349
343 350 # build deltas
344 351 deltas = []
345 352 for d in xrange(0, len(revs) - 1):
346 353 a, b = revs[d], revs[d + 1]
347 354 n = self.node(b)
348 355
349 356 if a + 1 != b or self.base(b) == b:
350 357 if a >= 0:
351 358 base = self.base(a)
352 359 ta = decompress(chunks[self.base(a)])
353 360 ta = construct(ta, base, a)
354 361 else:
355 362 ta = ""
356 363
357 364 base = self.base(b)
358 365 if a > base:
359 366 base = a
360 367 tb = ta
361 368 else:
362 369 tb = decompress(chunks[self.base(b)])
363 370 tb = construct(tb, base, b)
364 371 d = self.diff(ta, tb)
365 372 else:
366 373 d = decompress(chunks[b])
367 374
368 375 p = self.parents(n)
369 376 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
370 377 l = struct.pack(">l", len(meta) + len(d) + 4)
371 378 deltas.append(l + meta + d)
372 379
373 380 l = struct.pack(">l", sum(map(len, deltas)) + 4)
374 381 deltas.insert(0, l)
375 382 return "".join(deltas)
376 383
377 384 def addgroup(self, data, linkmapper, transaction):
378 385 # given a set of deltas, add them to the revision log. the
379 386 # first delta is against its parent, which should be in our
380 387 # log, the rest are against the previous delta.
381 388
382 389 if not data: return self.tip()
383 390
384 391 # retrieve the parent revision of the delta chain
385 392 chain = data[24:44]
386 393 if not chain in self.nodemap:
387 394 raise "unknown base %s" % short(chain[:4])
388 395
389 396 # track the base of the current delta log
390 397 r = self.count()
391 398 t = r - 1
392 399
393 400 base = prev = -1
394 401 start = end = 0
395 402 if r:
396 403 start = self.start(self.base(t))
397 404 end = self.end(t)
398 405 measure = self.length(self.base(t))
399 406 base = self.base(t)
400 407 prev = self.tip()
401 408
402 409 transaction.add(self.datafile, end)
403 410 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
404 411 dfh = self.opener(self.datafile, "a")
405 412 ifh = self.opener(self.indexfile, "a")
406 413
407 414 # loop through our set of deltas
408 415 pos = 0
409 416 while pos < len(data):
410 417 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
411 418 data[pos:pos+84])
412 419 link = linkmapper(cs)
413 420 if node in self.nodemap:
414 421 raise "already have %s" % hex(node[:4])
415 422 delta = data[pos + 84:pos + l]
416 423 pos += l
417 424
418 425 # full versions are inserted when the needed deltas become
419 426 # comparable to the uncompressed text or when the previous
420 427 # version is not the one we have a delta against. We use
421 428 # the size of the previous full rev as a proxy for the
422 429 # current size.
423 430
424 431 if chain == prev:
425 432 cdelta = compress(delta)
426 433
427 434 if chain != prev or (end - start + len(cdelta)) > measure * 2:
428 435 # flush our writes here so we can read it in revision
429 436 dfh.flush()
430 437 ifh.flush()
431 438 text = self.revision(chain)
432 439 text = self.patches(text, [delta])
433 440 chk = self.addrevision(text, transaction, link, p1, p2)
434 441 if chk != node:
435 442 raise "consistency error adding group"
436 443 measure = len(text)
437 444 else:
438 445 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
439 446 self.index.append(e)
440 447 self.nodemap[node] = r
441 448 dfh.write(cdelta)
442 449 ifh.write(struct.pack(indexformat, *e))
443 450
444 451 t, r, chain, prev = r, r + 1, node, node
445 452 start = self.start(self.base(t))
446 453 end = self.end(t)
447 454
448 455 dfh.close()
449 456 ifh.close()
450 457 return node
General Comments 0
You need to be logged in to leave comments. Login now