##// END OF EJS Templates
Friendlier exceptions for unknown node errors
mpm@selenic.com -
r86:1b945e8b default
parent child Browse files
Show More
@@ -1,478 +1,481 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, os, tempfile, binascii
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 16 def short(node): return hex(node[:4])
17 17
18 18 def compress(text):
19 19 return zlib.compress(text)
20 20
21 21 def decompress(bin):
22 22 return zlib.decompress(bin)
23 23
24 24 def hash(text, p1, p2):
25 25 l = [p1, p2]
26 26 l.sort()
27 27 return sha.sha(l[0] + l[1] + text).digest()
28 28
29 29 nullid = "\0" * 20
30 30 indexformat = ">4l20s20s20s"
31 31
32 32 class lazyparser:
33 33 def __init__(self, data):
34 34 self.data = data
35 35 self.s = struct.calcsize(indexformat)
36 36 self.l = len(data)/self.s
37 37 self.index = [None] * self.l
38 38 self.map = {nullid: -1}
39 39
40 40 if 0:
41 41 n = 0
42 42 i = self.data
43 43 s = struct.calcsize(indexformat)
44 44 for f in xrange(0, len(i), s):
45 45 # offset, size, base, linkrev, p1, p2, nodeid
46 46 e = struct.unpack(indexformat, i[f:f + s])
47 47 self.map[e[6]] = n
48 48 self.index.append(e)
49 49 n += 1
50 50
51 51 def load(self, pos):
52 52 block = pos / 1000
53 53 i = block * 1000
54 54 end = min(self.l, i + 1000)
55 55 while i < end:
56 56 d = self.data[i * self.s: (i + 1) * self.s]
57 57 e = struct.unpack(indexformat, d)
58 58 self.index[i] = e
59 59 self.map[e[6]] = i
60 60 i += 1
61 61
62 62 class lazyindex:
63 63 def __init__(self, parser):
64 64 self.p = parser
65 65 def __len__(self):
66 66 return len(self.p.index)
67 67 def __getitem__(self, pos):
68 68 i = self.p.index[pos]
69 69 if not i:
70 70 self.p.load(pos)
71 71 return self.p.index[pos]
72 72 return i
73 73 def append(self, e):
74 74 self.p.index.append(e)
75 75
76 76 class lazymap:
77 77 def __init__(self, parser):
78 78 self.p = parser
79 79 def load(self, key):
80 80 n = self.p.data.find(key)
81 if n < 0: raise KeyError(key)
81 if n < 0: raise KeyError("node " + hex(key))
82 82 pos = n / self.p.s
83 83 self.p.load(pos)
84 84 def __contains__(self, key):
85 85 try:
86 86 self[key]
87 87 return True
88 88 except KeyError:
89 89 return False
90 90 def __getitem__(self, key):
91 91 try:
92 92 return self.p.map[key]
93 93 except KeyError:
94 self.load(key)
95 return self.p.map[key]
94 try:
95 self.load(key)
96 return self.p.map[key]
97 except KeyError:
98 raise KeyError("node " + hex(key))
96 99 def __setitem__(self, key, val):
97 100 self.p.map[key] = val
98 101
99 102 class revlog:
100 103 def __init__(self, opener, indexfile, datafile):
101 104 self.indexfile = indexfile
102 105 self.datafile = datafile
103 106 self.opener = opener
104 107 self.cache = None
105 108 # read the whole index for now, handle on-demand later
106 109 try:
107 110 i = self.opener(self.indexfile).read()
108 111 except IOError:
109 112 i = ""
110 113 parser = lazyparser(i)
111 114 self.index = lazyindex(parser)
112 115 self.nodemap = lazymap(parser)
113 116
114 117 def tip(self): return self.node(len(self.index) - 1)
115 118 def count(self): return len(self.index)
116 119 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
117 120 def rev(self, node): return self.nodemap[node]
118 121 def linkrev(self, node): return self.index[self.nodemap[node]][3]
119 122 def parents(self, node):
120 123 if node == nullid: return (nullid, nullid)
121 124 return self.index[self.nodemap[node]][4:6]
122 125
123 126 def start(self, rev): return self.index[rev][0]
124 127 def length(self, rev): return self.index[rev][1]
125 128 def end(self, rev): return self.start(rev) + self.length(rev)
126 129 def base(self, rev): return self.index[rev][2]
127 130
128 131 def lookup(self, id):
129 132 try:
130 133 rev = int(id)
131 134 return self.node(rev)
132 135 except ValueError:
133 136 c = []
134 137 for n in self.nodemap:
135 138 if id in hex(n):
136 139 c.append(n)
137 140 if len(c) > 1: raise KeyError("Ambiguous identifier")
138 141 if len(c) < 1: raise KeyError("No match found")
139 142 return c[0]
140 143
141 144 return None
142 145
143 146 def revisions(self, list):
144 147 # this can be optimized to do spans, etc
145 148 # be stupid for now
146 149 for node in list:
147 150 yield self.revision(node)
148 151
149 152 def diff(self, a, b):
150 153 return mdiff.textdiff(a, b)
151 154
152 155 def patches(self, t, pl):
153 156 return mdiff.patches(t, pl)
154 157
155 158 def revision(self, node):
156 159 if node == nullid: return ""
157 160 if self.cache and self.cache[0] == node: return self.cache[2]
158 161
159 162 text = None
160 163 rev = self.rev(node)
161 164 base = self.base(rev)
162 165 start = self.start(base)
163 166 end = self.end(rev)
164 167
165 168 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
166 169 base = self.cache[1]
167 170 start = self.start(base + 1)
168 171 text = self.cache[2]
169 172 last = 0
170 173
171 174 f = self.opener(self.datafile)
172 175 f.seek(start)
173 176 data = f.read(end - start)
174 177
175 178 if not text:
176 179 last = self.length(base)
177 180 text = decompress(data[:last])
178 181
179 182 bins = []
180 183 for r in xrange(base + 1, rev + 1):
181 184 s = self.length(r)
182 185 bins.append(decompress(data[last:last + s]))
183 186 last = last + s
184 187
185 188 text = mdiff.patches(text, bins)
186 189
187 190 (p1, p2) = self.parents(node)
188 191 if node != hash(text, p1, p2):
189 192 raise "integrity check failed on %s:%d" % (self.datafile, rev)
190 193
191 194 self.cache = (node, rev, text)
192 195 return text
193 196
194 197 def addrevision(self, text, transaction, link, p1=None, p2=None):
195 198 if text is None: text = ""
196 199 if p1 is None: p1 = self.tip()
197 200 if p2 is None: p2 = nullid
198 201
199 202 node = hash(text, p1, p2)
200 203
201 204 n = self.count()
202 205 t = n - 1
203 206
204 207 if n:
205 208 base = self.base(t)
206 209 start = self.start(base)
207 210 end = self.end(t)
208 211 prev = self.revision(self.tip())
209 212 data = compress(self.diff(prev, text))
210 213 dist = end - start + len(data)
211 214
212 215 # full versions are inserted when the needed deltas
213 216 # become comparable to the uncompressed text
214 217 if not n or dist > len(text) * 2:
215 218 data = compress(text)
216 219 base = n
217 220 else:
218 221 base = self.base(t)
219 222
220 223 offset = 0
221 224 if t >= 0:
222 225 offset = self.end(t)
223 226
224 227 e = (offset, len(data), base, link, p1, p2, node)
225 228
226 229 self.index.append(e)
227 230 self.nodemap[node] = n
228 231 entry = struct.pack(indexformat, *e)
229 232
230 233 transaction.add(self.datafile, e[0])
231 234 self.opener(self.datafile, "a").write(data)
232 235 transaction.add(self.indexfile, n * len(entry))
233 236 self.opener(self.indexfile, "a").write(entry)
234 237
235 238 self.cache = (node, n, text)
236 239 return node
237 240
238 241 def ancestor(self, a, b):
239 242 def expand(list, map):
240 243 a = []
241 244 while list:
242 245 n = list.pop(0)
243 246 map[n] = 1
244 247 yield n
245 248 for p in self.parents(n):
246 249 if p != nullid and p not in map:
247 250 list.append(p)
248 251 yield nullid
249 252
250 253 amap = {}
251 254 bmap = {}
252 255 ag = expand([a], amap)
253 256 bg = expand([b], bmap)
254 257 adone = bdone = 0
255 258
256 259 while not adone or not bdone:
257 260 if not adone:
258 261 an = ag.next()
259 262 if an == nullid:
260 263 adone = 1
261 264 elif an in bmap:
262 265 return an
263 266 if not bdone:
264 267 bn = bg.next()
265 268 if bn == nullid:
266 269 bdone = 1
267 270 elif bn in amap:
268 271 return bn
269 272
270 273 return nullid
271 274
272 275 def mergedag(self, other, transaction, linkseq, accumulate = None):
273 276 """combine the nodes from other's DAG into ours"""
274 277 old = self.tip()
275 278 i = self.count()
276 279 l = []
277 280
278 281 # merge the other revision log into our DAG
279 282 for r in range(other.count()):
280 283 id = other.node(r)
281 284 if id not in self.nodemap:
282 285 (xn, yn) = other.parents(id)
283 286 l.append((id, xn, yn))
284 287 self.nodemap[id] = i
285 288 i += 1
286 289
287 290 # merge node date for new nodes
288 291 r = other.revisions([e[0] for e in l])
289 292 for e in l:
290 293 t = r.next()
291 294 if accumulate: accumulate(t)
292 295 self.addrevision(t, transaction, linkseq.next(), e[1], e[2])
293 296
294 297 # return the unmerged heads for later resolving
295 298 return (old, self.tip())
296 299
297 300 def group(self, linkmap):
298 301 # given a list of changeset revs, return a set of deltas and
299 302 # metadata corresponding to nodes the first delta is
300 303 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
301 304 # have this parent as it has all history before these
302 305 # changesets. parent is parent[0]
303 306
304 307 revs = []
305 308 needed = {}
306 309
307 310 # find file nodes/revs that match changeset revs
308 311 for i in xrange(0, self.count()):
309 312 if self.index[i][3] in linkmap:
310 313 revs.append(i)
311 314 needed[i] = 1
312 315
313 316 # if we don't have any revisions touched by these changesets, bail
314 317 if not revs: return struct.pack(">l", 0)
315 318
316 319 # add the parent of the first rev
317 320 p = self.parents(self.node(revs[0]))[0]
318 321 revs.insert(0, self.rev(p))
319 322
320 323 # for each delta that isn't contiguous in the log, we need to
321 324 # reconstruct the base, reconstruct the result, and then
322 325 # calculate the delta. We also need to do this where we've
323 326 # stored a full version and not a delta
324 327 for i in xrange(0, len(revs) - 1):
325 328 a, b = revs[i], revs[i + 1]
326 329 if a + 1 != b or self.base(b) == b:
327 330 for j in xrange(self.base(a), a + 1):
328 331 needed[j] = 1
329 332 for j in xrange(self.base(b), b + 1):
330 333 needed[j] = 1
331 334
332 335 # calculate spans to retrieve from datafile
333 336 needed = needed.keys()
334 337 needed.sort()
335 338 spans = []
336 339 for n in needed:
337 340 if n < 0: continue
338 341 o = self.start(n)
339 342 l = self.length(n)
340 343 spans.append((o, l, [(n, l)]))
341 344
342 345 # merge spans
343 346 merge = [spans.pop(0)]
344 347 while spans:
345 348 e = spans.pop(0)
346 349 f = merge[-1]
347 350 if e[0] == f[0] + f[1]:
348 351 merge[-1] = (f[0], f[1] + e[1], f[2] + e[2])
349 352 else:
350 353 merge.append(e)
351 354
352 355 # read spans in, divide up chunks
353 356 chunks = {}
354 357 for span in merge:
355 358 # we reopen the file for each span to make http happy for now
356 359 f = self.opener(self.datafile)
357 360 f.seek(span[0])
358 361 data = f.read(span[1])
359 362
360 363 # divide up the span
361 364 pos = 0
362 365 for r, l in span[2]:
363 366 chunks[r] = data[pos: pos + l]
364 367 pos += l
365 368
366 369 # helper to reconstruct intermediate versions
367 370 def construct(text, base, rev):
368 371 bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)]
369 372 return mdiff.patches(text, bins)
370 373
371 374 # build deltas
372 375 deltas = []
373 376 for d in xrange(0, len(revs) - 1):
374 377 a, b = revs[d], revs[d + 1]
375 378 n = self.node(b)
376 379
377 380 if a + 1 != b or self.base(b) == b:
378 381 if a >= 0:
379 382 base = self.base(a)
380 383 ta = decompress(chunks[self.base(a)])
381 384 ta = construct(ta, base, a)
382 385 else:
383 386 ta = ""
384 387
385 388 base = self.base(b)
386 389 if a > base:
387 390 base = a
388 391 tb = ta
389 392 else:
390 393 tb = decompress(chunks[self.base(b)])
391 394 tb = construct(tb, base, b)
392 395 d = self.diff(ta, tb)
393 396 else:
394 397 d = decompress(chunks[b])
395 398
396 399 p = self.parents(n)
397 400 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
398 401 l = struct.pack(">l", len(meta) + len(d) + 4)
399 402 deltas.append(l + meta + d)
400 403
401 404 l = struct.pack(">l", sum(map(len, deltas)) + 4)
402 405 deltas.insert(0, l)
403 406 return "".join(deltas)
404 407
405 408 def addgroup(self, data, linkmapper, transaction):
406 409 # given a set of deltas, add them to the revision log. the
407 410 # first delta is against its parent, which should be in our
408 411 # log, the rest are against the previous delta.
409 412
410 413 if not data: return self.tip()
411 414
412 415 # retrieve the parent revision of the delta chain
413 416 chain = data[24:44]
414 417 if not chain in self.nodemap:
415 418 raise "unknown base %s" % short(chain[:4])
416 419
417 420 # track the base of the current delta log
418 421 r = self.count()
419 422 t = r - 1
420 423
421 424 base = prev = -1
422 425 start = end = 0
423 426 if r:
424 427 start = self.start(self.base(t))
425 428 end = self.end(t)
426 429 measure = self.length(self.base(t))
427 430 base = self.base(t)
428 431 prev = self.tip()
429 432
430 433 transaction.add(self.datafile, end)
431 434 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
432 435 dfh = self.opener(self.datafile, "a")
433 436 ifh = self.opener(self.indexfile, "a")
434 437
435 438 # loop through our set of deltas
436 439 pos = 0
437 440 while pos < len(data):
438 441 l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s",
439 442 data[pos:pos+84])
440 443 if node in self.nodemap:
441 444 raise "already have %s" % hex(node[:4])
442 445 link = linkmapper(cs)
443 446 delta = data[pos + 84:pos + l]
444 447 pos += l
445 448
446 449 # full versions are inserted when the needed deltas become
447 450 # comparable to the uncompressed text or when the previous
448 451 # version is not the one we have a delta against. We use
449 452 # the size of the previous full rev as a proxy for the
450 453 # current size.
451 454
452 455 if chain == prev:
453 456 cdelta = compress(delta)
454 457
455 458 if chain != prev or (end - start + len(cdelta)) > measure * 2:
456 459 # flush our writes here so we can read it in revision
457 460 dfh.flush()
458 461 ifh.flush()
459 462 text = self.revision(chain)
460 463 text = self.patches(text, [delta])
461 464 chk = self.addrevision(text, transaction, link, p1, p2)
462 465 if chk != node:
463 466 raise "consistency error adding group"
464 467 measure = len(text)
465 468 else:
466 469 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
467 470 self.index.append(e)
468 471 self.nodemap[node] = r
469 472 dfh.write(cdelta)
470 473 ifh.write(struct.pack(indexformat, *e))
471 474
472 475 t, r, chain, prev = r, r + 1, node, node
473 476 start = self.start(self.base(t))
474 477 end = self.end(t)
475 478
476 479 dfh.close()
477 480 ifh.close()
478 481 return node
General Comments 0
You need to be logged in to leave comments. Login now