##// END OF EJS Templates
Change the size of the short hash representation...
mpm@selenic.com -
r373:67081329 default
parent child Browse files
Show More
@@ -1,535 +1,535 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # This provides efficient delta storage with O(1) retrieve and append
4 4 # and O(changes) merge between branches
5 5 #
6 6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 7 #
8 8 # This software may be used and distributed according to the terms
9 9 # of the GNU General Public License, incorporated herein by reference.
10 10
11 11 import zlib, struct, sha, binascii, heapq
12 12 from mercurial import mdiff
13 13
14 14 def hex(node): return binascii.hexlify(node)
15 15 def bin(node): return binascii.unhexlify(node)
16 def short(node): return hex(node[:4])
16 def short(node): return hex(node[:6])
17 17
18 18 def compress(text):
19 19 if not text: return text
20 20 if len(text) < 44:
21 21 if text[0] == '\0': return text
22 22 return 'u' + text
23 23 bin = zlib.compress(text)
24 24 if len(bin) > len(text):
25 25 if text[0] == '\0': return text
26 26 return 'u' + text
27 27 return bin
28 28
29 29 def decompress(bin):
30 30 if not bin: return bin
31 31 t = bin[0]
32 32 if t == '\0': return bin
33 33 if t == 'x': return zlib.decompress(bin)
34 34 if t == 'u': return bin[1:]
35 35 raise "unknown compression type %s" % t
36 36
37 37 def hash(text, p1, p2):
38 38 l = [p1, p2]
39 39 l.sort()
40 40 return sha.sha(l[0] + l[1] + text).digest()
41 41
42 42 nullid = "\0" * 20
43 43 indexformat = ">4l20s20s20s"
44 44
45 45 class lazyparser:
46 46 def __init__(self, data, revlog):
47 47 self.data = data
48 48 self.s = struct.calcsize(indexformat)
49 49 self.l = len(data)/self.s
50 50 self.index = [None] * self.l
51 51 self.map = {nullid: -1}
52 52 self.all = 0
53 53 self.revlog = revlog
54 54
55 55 def load(self, pos=None):
56 56 if self.all: return
57 57 if pos is not None:
58 58 block = pos / 1000
59 59 i = block * 1000
60 60 end = min(self.l, i + 1000)
61 61 else:
62 62 self.all = 1
63 63 i = 0
64 64 end = self.l
65 65 self.revlog.index = self.index
66 66 self.revlog.nodemap = self.map
67 67
68 68 while i < end:
69 69 d = self.data[i * self.s: (i + 1) * self.s]
70 70 e = struct.unpack(indexformat, d)
71 71 self.index[i] = e
72 72 self.map[e[6]] = i
73 73 i += 1
74 74
75 75 class lazyindex:
76 76 def __init__(self, parser):
77 77 self.p = parser
78 78 def __len__(self):
79 79 return len(self.p.index)
80 80 def load(self, pos):
81 81 self.p.load(pos)
82 82 return self.p.index[pos]
83 83 def __getitem__(self, pos):
84 84 return self.p.index[pos] or self.load(pos)
85 85 def append(self, e):
86 86 self.p.index.append(e)
87 87
88 88 class lazymap:
89 89 def __init__(self, parser):
90 90 self.p = parser
91 91 def load(self, key):
92 92 if self.p.all: return
93 93 n = self.p.data.find(key)
94 94 if n < 0: raise KeyError("node " + hex(key))
95 95 pos = n / self.p.s
96 96 self.p.load(pos)
97 97 def __contains__(self, key):
98 98 self.p.load()
99 99 return key in self.p.map
100 100 def __iter__(self):
101 101 for i in xrange(self.p.l):
102 102 try:
103 103 yield self.p.index[i][6]
104 104 except:
105 105 self.p.load(i)
106 106 yield self.p.index[i][6]
107 107 def __getitem__(self, key):
108 108 try:
109 109 return self.p.map[key]
110 110 except KeyError:
111 111 try:
112 112 self.load(key)
113 113 return self.p.map[key]
114 114 except KeyError:
115 115 raise KeyError("node " + hex(key))
116 116 def __setitem__(self, key, val):
117 117 self.p.map[key] = val
118 118
119 119 class revlog:
120 120 def __init__(self, opener, indexfile, datafile):
121 121 self.indexfile = indexfile
122 122 self.datafile = datafile
123 123 self.opener = opener
124 124 self.cache = None
125 125
126 126 try:
127 127 i = self.opener(self.indexfile).read()
128 128 except IOError:
129 129 i = ""
130 130
131 131 if len(i) > 10000:
132 132 # big index, let's parse it on demand
133 133 parser = lazyparser(i, self)
134 134 self.index = lazyindex(parser)
135 135 self.nodemap = lazymap(parser)
136 136 else:
137 137 s = struct.calcsize(indexformat)
138 138 l = len(i) / s
139 139 self.index = [None] * l
140 140 m = [None] * l
141 141
142 142 n = 0
143 143 for f in xrange(0, len(i), s):
144 144 # offset, size, base, linkrev, p1, p2, nodeid
145 145 e = struct.unpack(indexformat, i[f:f + s])
146 146 m[n] = (e[6], n)
147 147 self.index[n] = e
148 148 n += 1
149 149
150 150 self.nodemap = dict(m)
151 151 self.nodemap[nullid] = -1
152 152
153 153
154 154 def tip(self): return self.node(len(self.index) - 1)
155 155 def count(self): return len(self.index)
156 156 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
157 157 def rev(self, node): return self.nodemap[node]
158 158 def linkrev(self, node): return self.index[self.nodemap[node]][3]
159 159 def parents(self, node):
160 160 if node == nullid: return (nullid, nullid)
161 161 return self.index[self.nodemap[node]][4:6]
162 162
163 163 def start(self, rev): return self.index[rev][0]
164 164 def length(self, rev): return self.index[rev][1]
165 165 def end(self, rev): return self.start(rev) + self.length(rev)
166 166 def base(self, rev): return self.index[rev][2]
167 167
168 168 def heads(self):
169 169 p = {}
170 170 h = []
171 171 for r in range(self.count() - 1, -1, -1):
172 172 n = self.node(r)
173 173 if n not in p:
174 174 h.append(n)
175 175 for pn in self.parents(n):
176 176 p[pn] = 1
177 177 return h
178 178
179 179 def children(self, node):
180 180 c = []
181 181 p = self.rev(node)
182 182 for r in range(p + 1, self.count()):
183 183 n = self.node(r)
184 184 for pn in self.parents(n):
185 185 if pn == p:
186 186 c.append(p)
187 187 continue
188 188 elif pn == nullid:
189 189 continue
190 190 return c
191 191
192 192 def lookup(self, id):
193 193 try:
194 194 rev = int(id)
195 195 return self.node(rev)
196 196 except ValueError:
197 197 c = []
198 198 for n in self.nodemap:
199 199 if id in hex(n):
200 200 c.append(n)
201 201 if len(c) > 1: raise KeyError("Ambiguous identifier")
202 202 if len(c) < 1: raise KeyError("No match found")
203 203 return c[0]
204 204
205 205 return None
206 206
207 207 def diff(self, a, b):
208 208 return mdiff.textdiff(a, b)
209 209
210 210 def patches(self, t, pl):
211 211 return mdiff.patches(t, pl)
212 212
213 213 def delta(self, node):
214 214 r = self.rev(node)
215 215 b = self.base(r)
216 216 if r == b:
217 217 return self.diff(self.revision(self.node(r - 1)),
218 218 self.revision(node))
219 219 else:
220 220 f = self.opener(self.datafile)
221 221 f.seek(self.start(r))
222 222 data = f.read(self.length(r))
223 223 return decompress(data)
224 224
225 225 def revision(self, node):
226 226 if node == nullid: return ""
227 227 if self.cache and self.cache[0] == node: return self.cache[2]
228 228
229 229 text = None
230 230 rev = self.rev(node)
231 231 start, length, base, link, p1, p2, node = self.index[rev]
232 232 end = start + length
233 233 if base != rev: start = self.start(base)
234 234
235 235 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
236 236 base = self.cache[1]
237 237 start = self.start(base + 1)
238 238 text = self.cache[2]
239 239 last = 0
240 240
241 241 f = self.opener(self.datafile)
242 242 f.seek(start)
243 243 data = f.read(end - start)
244 244
245 245 if not text:
246 246 last = self.length(base)
247 247 text = decompress(data[:last])
248 248
249 249 bins = []
250 250 for r in xrange(base + 1, rev + 1):
251 251 s = self.length(r)
252 252 bins.append(decompress(data[last:last + s]))
253 253 last = last + s
254 254
255 255 text = mdiff.patches(text, bins)
256 256
257 257 if node != hash(text, p1, p2):
258 258 raise IOError("integrity check failed on %s:%d"
259 259 % (self.datafile, rev))
260 260
261 261 self.cache = (node, rev, text)
262 262 return text
263 263
264 264 def addrevision(self, text, transaction, link, p1=None, p2=None):
265 265 if text is None: text = ""
266 266 if p1 is None: p1 = self.tip()
267 267 if p2 is None: p2 = nullid
268 268
269 269 node = hash(text, p1, p2)
270 270
271 271 if node in self.nodemap:
272 272 return node
273 273
274 274 n = self.count()
275 275 t = n - 1
276 276
277 277 if n:
278 278 base = self.base(t)
279 279 start = self.start(base)
280 280 end = self.end(t)
281 281 prev = self.revision(self.tip())
282 282 d = self.diff(prev, text)
283 283 data = compress(d)
284 284 dist = end - start + len(data)
285 285
286 286 # full versions are inserted when the needed deltas
287 287 # become comparable to the uncompressed text
288 288 if not n or dist > len(text) * 2:
289 289 data = compress(text)
290 290 base = n
291 291 else:
292 292 base = self.base(t)
293 293
294 294 offset = 0
295 295 if t >= 0:
296 296 offset = self.end(t)
297 297
298 298 e = (offset, len(data), base, link, p1, p2, node)
299 299
300 300 self.index.append(e)
301 301 self.nodemap[node] = n
302 302 entry = struct.pack(indexformat, *e)
303 303
304 304 transaction.add(self.datafile, e[0])
305 305 self.opener(self.datafile, "a").write(data)
306 306 transaction.add(self.indexfile, n * len(entry))
307 307 self.opener(self.indexfile, "a").write(entry)
308 308
309 309 self.cache = (node, n, text)
310 310 return node
311 311
312 312 def ancestor(self, a, b):
313 313 # calculate the distance of every node from root
314 314 dist = {nullid: 0}
315 315 for i in xrange(self.count()):
316 316 n = self.node(i)
317 317 p1, p2 = self.parents(n)
318 318 dist[n] = max(dist[p1], dist[p2]) + 1
319 319
320 320 # traverse ancestors in order of decreasing distance from root
321 321 def ancestors(node):
322 322 # we store negative distances because heap returns smallest member
323 323 h = [(-dist[node], node)]
324 324 seen = {}
325 325 earliest = self.count()
326 326 while h:
327 327 d, n = heapq.heappop(h)
328 328 r = self.rev(n)
329 329 if n not in seen:
330 330 seen[n] = 1
331 331 yield (-d, n)
332 332 for p in self.parents(n):
333 333 heapq.heappush(h, (-dist[p], p))
334 334
335 335 x = ancestors(a)
336 336 y = ancestors(b)
337 337 lx = x.next()
338 338 ly = y.next()
339 339
340 340 # increment each ancestor list until it is closer to root than
341 341 # the other, or they match
342 342 while 1:
343 343 if lx == ly:
344 344 return lx[1]
345 345 elif lx < ly:
346 346 ly = y.next()
347 347 elif lx > ly:
348 348 lx = x.next()
349 349
350 350 def group(self, linkmap):
351 351 # given a list of changeset revs, return a set of deltas and
352 352 # metadata corresponding to nodes. the first delta is
353 353 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
354 354 # have this parent as it has all history before these
355 355 # changesets. parent is parent[0]
356 356
357 357 revs = []
358 358 needed = {}
359 359
360 360 # find file nodes/revs that match changeset revs
361 361 for i in xrange(0, self.count()):
362 362 if self.index[i][3] in linkmap:
363 363 revs.append(i)
364 364 needed[i] = 1
365 365
366 366 # if we don't have any revisions touched by these changesets, bail
367 367 if not revs:
368 368 yield struct.pack(">l", 0)
369 369 return
370 370
371 371 # add the parent of the first rev
372 372 p = self.parents(self.node(revs[0]))[0]
373 373 revs.insert(0, self.rev(p))
374 374
375 375 # for each delta that isn't contiguous in the log, we need to
376 376 # reconstruct the base, reconstruct the result, and then
377 377 # calculate the delta. We also need to do this where we've
378 378 # stored a full version and not a delta
379 379 for i in xrange(0, len(revs) - 1):
380 380 a, b = revs[i], revs[i + 1]
381 381 if a + 1 != b or self.base(b) == b:
382 382 for j in xrange(self.base(a), a + 1):
383 383 needed[j] = 1
384 384 for j in xrange(self.base(b), b + 1):
385 385 needed[j] = 1
386 386
387 387 # calculate spans to retrieve from datafile
388 388 needed = needed.keys()
389 389 needed.sort()
390 390 spans = []
391 391 oo = -1
392 392 ol = 0
393 393 for n in needed:
394 394 if n < 0: continue
395 395 o = self.start(n)
396 396 l = self.length(n)
397 397 if oo + ol == o: # can we merge with the previous?
398 398 nl = spans[-1][2]
399 399 nl.append((n, l))
400 400 ol += l
401 401 spans[-1] = (oo, ol, nl)
402 402 else:
403 403 oo = o
404 404 ol = l
405 405 spans.append((oo, ol, [(n, l)]))
406 406
407 407 # read spans in, divide up chunks
408 408 chunks = {}
409 409 for span in spans:
410 410 # we reopen the file for each span to make http happy for now
411 411 f = self.opener(self.datafile)
412 412 f.seek(span[0])
413 413 data = f.read(span[1])
414 414
415 415 # divide up the span
416 416 pos = 0
417 417 for r, l in span[2]:
418 418 chunks[r] = decompress(data[pos: pos + l])
419 419 pos += l
420 420
421 421 # helper to reconstruct intermediate versions
422 422 def construct(text, base, rev):
423 423 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
424 424 return mdiff.patches(text, bins)
425 425
426 426 # build deltas
427 427 deltas = []
428 428 for d in xrange(0, len(revs) - 1):
429 429 a, b = revs[d], revs[d + 1]
430 430 n = self.node(b)
431 431
432 432 # do we need to construct a new delta?
433 433 if a + 1 != b or self.base(b) == b:
434 434 if a >= 0:
435 435 base = self.base(a)
436 436 ta = chunks[self.base(a)]
437 437 ta = construct(ta, base, a)
438 438 else:
439 439 ta = ""
440 440
441 441 base = self.base(b)
442 442 if a > base:
443 443 base = a
444 444 tb = ta
445 445 else:
446 446 tb = chunks[self.base(b)]
447 447 tb = construct(tb, base, b)
448 448 d = self.diff(ta, tb)
449 449 else:
450 450 d = chunks[b]
451 451
452 452 p = self.parents(n)
453 453 meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
454 454 l = struct.pack(">l", len(meta) + len(d) + 4)
455 455 yield l
456 456 yield meta
457 457 yield d
458 458
459 459 yield struct.pack(">l", 0)
460 460
461 461 def addgroup(self, revs, linkmapper, transaction, unique = 0):
462 462 # given a set of deltas, add them to the revision log. the
463 463 # first delta is against its parent, which should be in our
464 464 # log, the rest are against the previous delta.
465 465
466 466 # track the base of the current delta log
467 467 r = self.count()
468 468 t = r - 1
469 469 node = nullid
470 470
471 471 base = prev = -1
472 472 start = end = 0
473 473 if r:
474 474 start = self.start(self.base(t))
475 475 end = self.end(t)
476 476 measure = self.length(self.base(t))
477 477 base = self.base(t)
478 478 prev = self.tip()
479 479
480 480 transaction.add(self.datafile, end)
481 481 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
482 482 dfh = self.opener(self.datafile, "a")
483 483 ifh = self.opener(self.indexfile, "a")
484 484
485 485 # loop through our set of deltas
486 486 chain = None
487 487 for chunk in revs:
488 488 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
489 489 link = linkmapper(cs)
490 490 if node in self.nodemap:
491 491 # this can happen if two branches make the same change
492 492 if unique:
493 493 raise "already have %s" % hex(node[:4])
494 494 continue
495 495 delta = chunk[80:]
496 496
497 497 if not chain:
498 498 # retrieve the parent revision of the delta chain
499 499 chain = p1
500 500 if not chain in self.nodemap:
501 501 raise "unknown base %s" % short(chain[:4])
502 502
503 503 # full versions are inserted when the needed deltas become
504 504 # comparable to the uncompressed text or when the previous
505 505 # version is not the one we have a delta against. We use
506 506 # the size of the previous full rev as a proxy for the
507 507 # current size.
508 508
509 509 if chain == prev:
510 510 cdelta = compress(delta)
511 511
512 512 if chain != prev or (end - start + len(cdelta)) > measure * 2:
513 513 # flush our writes here so we can read it in revision
514 514 dfh.flush()
515 515 ifh.flush()
516 516 text = self.revision(chain)
517 517 text = self.patches(text, [delta])
518 518 chk = self.addrevision(text, transaction, link, p1, p2)
519 519 if chk != node:
520 520 raise "consistency error adding group"
521 521 measure = len(text)
522 522 else:
523 523 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
524 524 self.index.append(e)
525 525 self.nodemap[node] = r
526 526 dfh.write(cdelta)
527 527 ifh.write(struct.pack(indexformat, *e))
528 528
529 529 t, r, chain, prev = r, r + 1, node, node
530 530 start = self.start(self.base(t))
531 531 end = self.end(t)
532 532
533 533 dfh.close()
534 534 ifh.close()
535 535 return node
General Comments 0
You need to be logged in to leave comments. Login now