##// END OF EJS Templates
revlog: add a children function...
mpm@selenic.com -
r370:c90385d8 default
parent child Browse files
Show More
@@ -1,522 +1,535 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, binascii, heapq
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:4])
17 17
18 18 def compress(text):
19 19 if not text: return text
20 20 if len(text) < 44:
21 21 if text[0] == '\0': return text
22 22 return 'u' + text
23 23 bin = zlib.compress(text)
24 24 if len(bin) > len(text):
25 25 if text[0] == '\0': return text
26 26 return 'u' + text
27 27 return bin
28 28
29 29 def decompress(bin):
30 30 if not bin: return bin
31 31 t = bin[0]
32 32 if t == '\0': return bin
33 33 if t == 'x': return zlib.decompress(bin)
34 34 if t == 'u': return bin[1:]
35 35 raise "unknown compression type %s" % t
36 36
37 37 def hash(text, p1, p2):
38 38 l = [p1, p2]
39 39 l.sort()
40 40 return sha.sha(l[0] + l[1] + text).digest()
41 41
42 42 nullid = "\0" * 20
43 43 indexformat = ">4l20s20s20s"
44 44
45 45 class lazyparser:
46 46 def __init__(self, data, revlog):
47 47 self.data = data
48 48 self.s = struct.calcsize(indexformat)
49 49 self.l = len(data)/self.s
50 50 self.index = [None] * self.l
51 51 self.map = {nullid: -1}
52 52 self.all = 0
53 53 self.revlog = revlog
54 54
55 55 def load(self, pos=None):
56 56 if self.all: return
57 57 if pos is not None:
58 58 block = pos / 1000
59 59 i = block * 1000
60 60 end = min(self.l, i + 1000)
61 61 else:
62 62 self.all = 1
63 63 i = 0
64 64 end = self.l
65 65 self.revlog.index = self.index
66 66 self.revlog.nodemap = self.map
67 67
68 68 while i < end:
69 69 d = self.data[i * self.s: (i + 1) * self.s]
70 70 e = struct.unpack(indexformat, d)
71 71 self.index[i] = e
72 72 self.map[e[6]] = i
73 73 i += 1
74 74
75 75 class lazyindex:
76 76 def __init__(self, parser):
77 77 self.p = parser
78 78 def __len__(self):
79 79 return len(self.p.index)
80 80 def load(self, pos):
81 81 self.p.load(pos)
82 82 return self.p.index[pos]
83 83 def __getitem__(self, pos):
84 84 return self.p.index[pos] or self.load(pos)
85 85 def append(self, e):
86 86 self.p.index.append(e)
87 87
88 88 class lazymap:
89 89 def __init__(self, parser):
90 90 self.p = parser
91 91 def load(self, key):
92 92 if self.p.all: return
93 93 n = self.p.data.find(key)
94 94 if n < 0: raise KeyError("node " + hex(key))
95 95 pos = n / self.p.s
96 96 self.p.load(pos)
97 97 def __contains__(self, key):
98 98 self.p.load()
99 99 return key in self.p.map
100 100 def __iter__(self):
101 101 for i in xrange(self.p.l):
102 102 try:
103 103 yield self.p.index[i][6]
104 104 except:
105 105 self.p.load(i)
106 106 yield self.p.index[i][6]
107 107 def __getitem__(self, key):
108 108 try:
109 109 return self.p.map[key]
110 110 except KeyError:
111 111 try:
112 112 self.load(key)
113 113 return self.p.map[key]
114 114 except KeyError:
115 115 raise KeyError("node " + hex(key))
116 116 def __setitem__(self, key, val):
117 117 self.p.map[key] = val
118 118
119 119 class revlog:
120 120 def __init__(self, opener, indexfile, datafile):
121 121 self.indexfile = indexfile
122 122 self.datafile = datafile
123 123 self.opener = opener
124 124 self.cache = None
125 125
126 126 try:
127 127 i = self.opener(self.indexfile).read()
128 128 except IOError:
129 129 i = ""
130 130
131 131 if len(i) > 10000:
132 132 # big index, let's parse it on demand
133 133 parser = lazyparser(i, self)
134 134 self.index = lazyindex(parser)
135 135 self.nodemap = lazymap(parser)
136 136 else:
137 137 s = struct.calcsize(indexformat)
138 138 l = len(i) / s
139 139 self.index = [None] * l
140 140 m = [None] * l
141 141
142 142 n = 0
143 143 for f in xrange(0, len(i), s):
144 144 # offset, size, base, linkrev, p1, p2, nodeid
145 145 e = struct.unpack(indexformat, i[f:f + s])
146 146 m[n] = (e[6], n)
147 147 self.index[n] = e
148 148 n += 1
149 149
150 150 self.nodemap = dict(m)
151 151 self.nodemap[nullid] = -1
152 152
153 153
154 154 def tip(self): return self.node(len(self.index) - 1)
155 155 def count(self): return len(self.index)
156 156 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
157 157 def rev(self, node): return self.nodemap[node]
158 158 def linkrev(self, node): return self.index[self.nodemap[node]][3]
159 159 def parents(self, node):
160 160 if node == nullid: return (nullid, nullid)
161 161 return self.index[self.nodemap[node]][4:6]
162 162
163 163 def start(self, rev): return self.index[rev][0]
164 164 def length(self, rev): return self.index[rev][1]
165 165 def end(self, rev): return self.start(rev) + self.length(rev)
166 166 def base(self, rev): return self.index[rev][2]
167 167
168 168 def heads(self):
169 169 p = {}
170 170 h = []
171 171 for r in range(self.count() - 1, -1, -1):
172 172 n = self.node(r)
173 173 if n not in p:
174 174 h.append(n)
175 175 for pn in self.parents(n):
176 176 p[pn] = 1
177 177 return h
178
179 def children(self, node):
180 c = []
181 p = self.rev(node)
182 for r in range(p + 1, self.count()):
183 n = self.node(r)
184 for pn in self.parents(n):
185 if pn == p:
186 c.append(p)
187 continue
188 elif pn == nullid:
189 continue
190 return c
178 191
179 192 def lookup(self, id):
180 193 try:
181 194 rev = int(id)
182 195 return self.node(rev)
183 196 except ValueError:
184 197 c = []
185 198 for n in self.nodemap:
186 199 if id in hex(n):
187 200 c.append(n)
188 201 if len(c) > 1: raise KeyError("Ambiguous identifier")
189 202 if len(c) < 1: raise KeyError("No match found")
190 203 return c[0]
191 204
192 205 return None
193 206
194 207 def diff(self, a, b):
195 208 return mdiff.textdiff(a, b)
196 209
197 210 def patches(self, t, pl):
198 211 return mdiff.patches(t, pl)
199 212
200 213 def delta(self, node):
201 214 r = self.rev(node)
202 215 b = self.base(r)
203 216 if r == b:
204 217 return self.diff(self.revision(self.node(r - 1)),
205 218 self.revision(node))
206 219 else:
207 220 f = self.opener(self.datafile)
208 221 f.seek(self.start(r))
209 222 data = f.read(self.length(r))
210 223 return decompress(data)
211 224
212 225 def revision(self, node):
213 226 if node == nullid: return ""
214 227 if self.cache and self.cache[0] == node: return self.cache[2]
215 228
216 229 text = None
217 230 rev = self.rev(node)
218 231 start, length, base, link, p1, p2, node = self.index[rev]
219 232 end = start + length
220 233 if base != rev: start = self.start(base)
221 234
222 235 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
223 236 base = self.cache[1]
224 237 start = self.start(base + 1)
225 238 text = self.cache[2]
226 239 last = 0
227 240
228 241 f = self.opener(self.datafile)
229 242 f.seek(start)
230 243 data = f.read(end - start)
231 244
232 245 if not text:
233 246 last = self.length(base)
234 247 text = decompress(data[:last])
235 248
236 249 bins = []
237 250 for r in xrange(base + 1, rev + 1):
238 251 s = self.length(r)
239 252 bins.append(decompress(data[last:last + s]))
240 253 last = last + s
241 254
242 255 text = mdiff.patches(text, bins)
243 256
244 257 if node != hash(text, p1, p2):
245 258 raise IOError("integrity check failed on %s:%d"
246 259 % (self.datafile, rev))
247 260
248 261 self.cache = (node, rev, text)
249 262 return text
250 263
251 264 def addrevision(self, text, transaction, link, p1=None, p2=None):
252 265 if text is None: text = ""
253 266 if p1 is None: p1 = self.tip()
254 267 if p2 is None: p2 = nullid
255 268
256 269 node = hash(text, p1, p2)
257 270
258 271 if node in self.nodemap:
259 272 return node
260 273
261 274 n = self.count()
262 275 t = n - 1
263 276
264 277 if n:
265 278 base = self.base(t)
266 279 start = self.start(base)
267 280 end = self.end(t)
268 281 prev = self.revision(self.tip())
269 282 d = self.diff(prev, text)
270 283 data = compress(d)
271 284 dist = end - start + len(data)
272 285
273 286 # full versions are inserted when the needed deltas
274 287 # become comparable to the uncompressed text
275 288 if not n or dist > len(text) * 2:
276 289 data = compress(text)
277 290 base = n
278 291 else:
279 292 base = self.base(t)
280 293
281 294 offset = 0
282 295 if t >= 0:
283 296 offset = self.end(t)
284 297
285 298 e = (offset, len(data), base, link, p1, p2, node)
286 299
287 300 self.index.append(e)
288 301 self.nodemap[node] = n
289 302 entry = struct.pack(indexformat, *e)
290 303
291 304 transaction.add(self.datafile, e[0])
292 305 self.opener(self.datafile, "a").write(data)
293 306 transaction.add(self.indexfile, n * len(entry))
294 307 self.opener(self.indexfile, "a").write(entry)
295 308
296 309 self.cache = (node, n, text)
297 310 return node
298 311
299 312 def ancestor(self, a, b):
300 313 # calculate the distance of every node from root
301 314 dist = {nullid: 0}
302 315 for i in xrange(self.count()):
303 316 n = self.node(i)
304 317 p1, p2 = self.parents(n)
305 318 dist[n] = max(dist[p1], dist[p2]) + 1
306 319
307 320 # traverse ancestors in order of decreasing distance from root
308 321 def ancestors(node):
309 322 # we store negative distances because heap returns smallest member
310 323 h = [(-dist[node], node)]
311 324 seen = {}
312 325 earliest = self.count()
313 326 while h:
314 327 d, n = heapq.heappop(h)
315 328 r = self.rev(n)
316 329 if n not in seen:
317 330 seen[n] = 1
318 331 yield (-d, n)
319 332 for p in self.parents(n):
320 333 heapq.heappush(h, (-dist[p], p))
321 334
322 335 x = ancestors(a)
323 336 y = ancestors(b)
324 337 lx = x.next()
325 338 ly = y.next()
326 339
327 340 # increment each ancestor list until it is closer to root than
328 341 # the other, or they match
329 342 while 1:
330 343 if lx == ly:
331 344 return lx[1]
332 345 elif lx < ly:
333 346 ly = y.next()
334 347 elif lx > ly:
335 348 lx = x.next()
336 349
337 350 def group(self, linkmap):
338 351 # given a list of changeset revs, return a set of deltas and
339 352 # metadata corresponding to nodes. the first delta is
340 353 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
341 354 # have this parent as it has all history before these
342 355 # changesets. parent is parent[0]
343 356
344 357 revs = []
345 358 needed = {}
346 359
347 360 # find file nodes/revs that match changeset revs
348 361 for i in xrange(0, self.count()):
349 362 if self.index[i][3] in linkmap:
350 363 revs.append(i)
351 364 needed[i] = 1
352 365
353 366 # if we don't have any revisions touched by these changesets, bail
354 367 if not revs:
355 368 yield struct.pack(">l", 0)
356 369 return
357 370
358 371 # add the parent of the first rev
359 372 p = self.parents(self.node(revs[0]))[0]
360 373 revs.insert(0, self.rev(p))
361 374
362 375 # for each delta that isn't contiguous in the log, we need to
363 376 # reconstruct the base, reconstruct the result, and then
364 377 # calculate the delta. We also need to do this where we've
365 378 # stored a full version and not a delta
366 379 for i in xrange(0, len(revs) - 1):
367 380 a, b = revs[i], revs[i + 1]
368 381 if a + 1 != b or self.base(b) == b:
369 382 for j in xrange(self.base(a), a + 1):
370 383 needed[j] = 1
371 384 for j in xrange(self.base(b), b + 1):
372 385 needed[j] = 1
373 386
374 387 # calculate spans to retrieve from datafile
375 388 needed = needed.keys()
376 389 needed.sort()
377 390 spans = []
378 391 oo = -1
379 392 ol = 0
380 393 for n in needed:
381 394 if n < 0: continue
382 395 o = self.start(n)
383 396 l = self.length(n)
384 397 if oo + ol == o: # can we merge with the previous?
385 398 nl = spans[-1][2]
386 399 nl.append((n, l))
387 400 ol += l
388 401 spans[-1] = (oo, ol, nl)
389 402 else:
390 403 oo = o
391 404 ol = l
392 405 spans.append((oo, ol, [(n, l)]))
393 406
394 407 # read spans in, divide up chunks
395 408 chunks = {}
396 409 for span in spans:
397 410 # we reopen the file for each span to make http happy for now
398 411 f = self.opener(self.datafile)
399 412 f.seek(span[0])
400 413 data = f.read(span[1])
401 414
402 415 # divide up the span
403 416 pos = 0
404 417 for r, l in span[2]:
405 418 chunks[r] = decompress(data[pos: pos + l])
406 419 pos += l
407 420
408 421 # helper to reconstruct intermediate versions
409 422 def construct(text, base, rev):
410 423 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
411 424 return mdiff.patches(text, bins)
412 425
413 426 # build deltas
414 427 deltas = []
415 428 for d in xrange(0, len(revs) - 1):
416 429 a, b = revs[d], revs[d + 1]
417 430 n = self.node(b)
418 431
419 432 # do we need to construct a new delta?
420 433 if a + 1 != b or self.base(b) == b:
421 434 if a >= 0:
422 435 base = self.base(a)
423 436 ta = chunks[self.base(a)]
424 437 ta = construct(ta, base, a)
425 438 else:
426 439 ta = ""
427 440
428 441 base = self.base(b)
429 442 if a > base:
430 443 base = a
431 444 tb = ta
432 445 else:
433 446 tb = chunks[self.base(b)]
434 447 tb = construct(tb, base, b)
435 448 d = self.diff(ta, tb)
436 449 else:
437 450 d = chunks[b]
438 451
439 452 p = self.parents(n)
440 453 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
441 454 l = struct.pack(">l", len(meta) + len(d) + 4)
442 455 yield l
443 456 yield meta
444 457 yield d
445 458
446 459 yield struct.pack(">l", 0)
447 460
448 461 def addgroup(self, revs, linkmapper, transaction, unique = 0):
449 462 # given a set of deltas, add them to the revision log. the
450 463 # first delta is against its parent, which should be in our
451 464 # log, the rest are against the previous delta.
452 465
453 466 # track the base of the current delta log
454 467 r = self.count()
455 468 t = r - 1
456 469 node = nullid
457 470
458 471 base = prev = -1
459 472 start = end = 0
460 473 if r:
461 474 start = self.start(self.base(t))
462 475 end = self.end(t)
463 476 measure = self.length(self.base(t))
464 477 base = self.base(t)
465 478 prev = self.tip()
466 479
467 480 transaction.add(self.datafile, end)
468 481 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
469 482 dfh = self.opener(self.datafile, "a")
470 483 ifh = self.opener(self.indexfile, "a")
471 484
472 485 # loop through our set of deltas
473 486 chain = None
474 487 for chunk in revs:
475 488 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
476 489 link = linkmapper(cs)
477 490 if node in self.nodemap:
478 491 # this can happen if two branches make the same change
479 492 if unique:
480 493 raise "already have %s" % hex(node[:4])
481 494 continue
482 495 delta = chunk[80:]
483 496
484 497 if not chain:
485 498 # retrieve the parent revision of the delta chain
486 499 chain = p1
487 500 if not chain in self.nodemap:
488 501 raise "unknown base %s" % short(chain[:4])
489 502
490 503 # full versions are inserted when the needed deltas become
491 504 # comparable to the uncompressed text or when the previous
492 505 # version is not the one we have a delta against. We use
493 506 # the size of the previous full rev as a proxy for the
494 507 # current size.
495 508
496 509 if chain == prev:
497 510 cdelta = compress(delta)
498 511
499 512 if chain != prev or (end - start + len(cdelta)) > measure * 2:
500 513 # flush our writes here so we can read it in revision
501 514 dfh.flush()
502 515 ifh.flush()
503 516 text = self.revision(chain)
504 517 text = self.patches(text, [delta])
505 518 chk = self.addrevision(text, transaction, link, p1, p2)
506 519 if chk != node:
507 520 raise "consistency error adding group"
508 521 measure = len(text)
509 522 else:
510 523 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
511 524 self.index.append(e)
512 525 self.nodemap[node] = r
513 526 dfh.write(cdelta)
514 527 ifh.write(struct.pack(indexformat, *e))
515 528
516 529 t, r, chain, prev = r, r + 1, node, node
517 530 start = self.start(self.base(t))
518 531 end = self.end(t)
519 532
520 533 dfh.close()
521 534 ifh.close()
522 535 return node
General Comments 0
You need to be logged in to leave comments. Login now