Show More
@@ -1,12 +1,14 b'' | |||
|
1 | # revlog.py - storage back-end for mercurial | |
|
2 | # | |
|
3 | # This provides efficient delta storage with O(1) retrieve and append | |
|
4 | # and O(changes) merge between branches | |
|
5 | # | |
|
6 | # Copyright 2005 Matt Mackall <mpm@selenic.com> | |
|
7 | # | |
|
8 | # This software may be used and distributed according to the terms | |
|
9 | # of the GNU General Public License, incorporated herein by reference. | |
|
1 | """ | |
|
2 | revlog.py - storage back-end for mercurial | |
|
3 | ||
|
4 | This provides efficient delta storage with O(1) retrieve and append | |
|
5 | and O(changes) merge between branches | |
|
6 | ||
|
7 | Copyright 2005 Matt Mackall <mpm@selenic.com> | |
|
8 | ||
|
9 | This software may be used and distributed according to the terms | |
|
10 | of the GNU General Public License, incorporated herein by reference. | |
|
11 | """ | |
|
10 | 12 | |
|
11 | 13 | import zlib, struct, sha, binascii, heapq |
|
12 | 14 | from mercurial import mdiff |
@@ -16,6 +18,7 b' def bin(node): return binascii.unhexlify' | |||
|
16 | 18 | def short(node): return hex(node[:6]) |
|
17 | 19 | |
|
18 | 20 | def compress(text): |
|
21 | """ generate a possibly-compressed representation of text """ | |
|
19 | 22 | if not text: return text |
|
20 | 23 | if len(text) < 44: |
|
21 | 24 | if text[0] == '\0': return text |
@@ -27,6 +30,7 b' def compress(text):' | |||
|
27 | 30 | return bin |
|
28 | 31 | |
|
29 | 32 | def decompress(bin): |
|
33 | """ decompress the given input """ | |
|
30 | 34 | if not bin: return bin |
|
31 | 35 | t = bin[0] |
|
32 | 36 | if t == '\0': return bin |
@@ -35,6 +39,12 b' def decompress(bin):' | |||
|
35 | 39 | raise RevlogError("unknown compression type %s" % t) |
|
36 | 40 | |
|
37 | 41 | def hash(text, p1, p2): |
|
42 | """generate a hash from the given text and its parent hashes | |
|
43 | ||
|
44 | This hash combines both the current file contents and its history | |
|
45 | in a manner that makes it easy to distinguish nodes with the same | |
|
46 | content in the revision graph. | |
|
47 | """ | |
|
38 | 48 | l = [p1, p2] |
|
39 | 49 | l.sort() |
|
40 | 50 | s = sha.new(l[0]) |
@@ -46,6 +56,15 b' nullid = "\\0" * 20' | |||
|
46 | 56 | indexformat = ">4l20s20s20s" |
|
47 | 57 | |
|
48 | 58 | class lazyparser: |
|
59 | """ | |
|
60 | this class avoids the need to parse the entirety of large indices | |
|
61 | ||
|
62 | By default we parse and load 1000 entries at a time. | |
|
63 | ||
|
64 | If no position is specified, we load the whole index, and replace | |
|
65 | the lazy objects in revlog with the underlying objects for | |
|
66 | efficiency in cases where we look at most of the nodes. | |
|
67 | """ | |
|
49 | 68 | def __init__(self, data, revlog): |
|
50 | 69 | self.data = data |
|
51 | 70 | self.s = struct.calcsize(indexformat) |
@@ -76,6 +95,7 b' class lazyparser:' | |||
|
76 | 95 | i += 1 |
|
77 | 96 | |
|
78 | 97 | class lazyindex: |
|
98 | """a lazy version of the index array""" | |
|
79 | 99 | def __init__(self, parser): |
|
80 | 100 | self.p = parser |
|
81 | 101 | def __len__(self): |
@@ -89,6 +109,7 b' class lazyindex:' | |||
|
89 | 109 | self.p.index.append(e) |
|
90 | 110 | |
|
91 | 111 | class lazymap: |
|
112 | """a lazy version of the node map""" | |
|
92 | 113 | def __init__(self, parser): |
|
93 | 114 | self.p = parser |
|
94 | 115 | def load(self, key): |
@@ -123,7 +144,37 b' class lazymap:' | |||
|
123 | 144 | class RevlogError(Exception): pass |
|
124 | 145 | |
|
125 | 146 | class revlog: |
|
147 | """ | |
|
148 | the underlying revision storage object | |
|
149 | ||
|
150 | A revlog consists of two parts, an index and the revision data. | |
|
151 | ||
|
152 | The index is a file with a fixed record size containing | |
|
153 | information on each revision, includings its nodeid (hash), the | |
|
154 | nodeids of its parents, the position and offset of its data within | |
|
155 | the data file, and the revision it's based on. Finally, each entry | |
|
156 | contains a linkrev entry that can serve as a pointer to external | |
|
157 | data. | |
|
158 | ||
|
159 | The revision data itself is a linear collection of data chunks. | |
|
160 | Each chunk represents a revision and is usually represented as a | |
|
161 | delta against the previous chunk. To bound lookup time, runs of | |
|
162 | deltas are limited to about 2 times the length of the original | |
|
163 | version data. This makes retrieval of a version proportional to | |
|
164 | its size, or O(1) relative to the number of revisions. | |
|
165 | ||
|
166 | Both pieces of the revlog are written to in an append-only | |
|
167 | fashion, which means we never need to rewrite a file to insert or | |
|
168 | remove data, and can use some simple techniques to avoid the need | |
|
169 | for locking while reading. | |
|
170 | """ | |
|
126 | 171 | def __init__(self, opener, indexfile, datafile): |
|
172 | """ | |
|
173 | create a revlog object | |
|
174 | ||
|
175 | opener is a function that abstracts the file opening operation | |
|
176 | and can be used to implement COW semantics or the like. | |
|
177 | """ | |
|
127 | 178 | self.indexfile = indexfile |
|
128 | 179 | self.datafile = datafile |
|
129 | 180 | self.opener = opener |
@@ -193,12 +244,13 b' class revlog:' | |||
|
193 | 244 | return reachable |
|
194 | 245 | |
|
195 | 246 | def heads(self, stop=None): |
|
247 | """return the list of all nodes that have no children""" | |
|
196 | 248 | p = {} |
|
197 | 249 | h = [] |
|
198 | 250 | stoprev = 0 |
|
199 | 251 | if stop and stop in self.nodemap: |
|
200 | 252 | stoprev = self.rev(stop) |
|
201 | ||
|
253 | ||
|
202 | 254 | for r in range(self.count() - 1, -1, -1): |
|
203 | 255 | n = self.node(r) |
|
204 | 256 | if n not in p: |
@@ -212,6 +264,7 b' class revlog:' | |||
|
212 | 264 | return h |
|
213 | 265 | |
|
214 | 266 | def children(self, node): |
|
267 | """find the children of a given node""" | |
|
215 | 268 | c = [] |
|
216 | 269 | p = self.rev(node) |
|
217 | 270 | for r in range(p + 1, self.count()): |
@@ -225,6 +278,7 b' class revlog:' | |||
|
225 | 278 | return c |
|
226 | 279 | |
|
227 | 280 | def lookup(self, id): |
|
281 | """locate a node based on revision number or subset of hex nodeid""" | |
|
228 | 282 | try: |
|
229 | 283 | rev = int(id) |
|
230 | 284 | if str(rev) != id: raise ValueError |
@@ -243,12 +297,15 b' class revlog:' | |||
|
243 | 297 | return None |
|
244 | 298 | |
|
245 | 299 | def diff(self, a, b): |
|
300 | """return a delta between two revisions""" | |
|
246 | 301 | return mdiff.textdiff(a, b) |
|
247 | 302 | |
|
248 | 303 | def patches(self, t, pl): |
|
304 | """apply a list of patches to a string""" | |
|
249 | 305 | return mdiff.patches(t, pl) |
|
250 | 306 | |
|
251 | 307 | def delta(self, node): |
|
308 | """return or calculate a delta between a node and its predecessor""" | |
|
252 | 309 | r = self.rev(node) |
|
253 | 310 | b = self.base(r) |
|
254 | 311 | if r == b: |
@@ -261,15 +318,18 b' class revlog:' | |||
|
261 | 318 | return decompress(data) |
|
262 | 319 | |
|
263 | 320 | def revision(self, node): |
|
321 | """return an uncompressed revision of a given""" | |
|
264 | 322 | if node == nullid: return "" |
|
265 | 323 | if self.cache and self.cache[0] == node: return self.cache[2] |
|
266 | 324 | |
|
325 | # look up what we need to read | |
|
267 | 326 | text = None |
|
268 | 327 | rev = self.rev(node) |
|
269 | 328 | start, length, base, link, p1, p2, node = self.index[rev] |
|
270 | 329 | end = start + length |
|
271 | 330 | if base != rev: start = self.start(base) |
|
272 | 331 | |
|
332 | # do we have useful data cached? | |
|
273 | 333 | if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
|
274 | 334 | base = self.cache[1] |
|
275 | 335 | start = self.start(base + 1) |
@@ -300,6 +360,14 b' class revlog:' | |||
|
300 | 360 | return text |
|
301 | 361 | |
|
302 | 362 | def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): |
|
363 | """add a revision to the log | |
|
364 | ||
|
365 | text - the revision data to add | |
|
366 | transaction - the transaction object used for rollback | |
|
367 | link - the linkrev data to add | |
|
368 | p1, p2 - the parent nodeids of the revision | |
|
369 | d - an optional precomputed delta | |
|
370 | """ | |
|
303 | 371 | if text is None: text = "" |
|
304 | 372 | if p1 is None: p1 = self.tip() |
|
305 | 373 | if p2 is None: p2 = nullid |
@@ -349,6 +417,7 b' class revlog:' | |||
|
349 | 417 | return node |
|
350 | 418 | |
|
351 | 419 | def ancestor(self, a, b): |
|
420 | """calculate the least common ancestor of nodes a and b""" | |
|
352 | 421 | # calculate the distance of every node from root |
|
353 | 422 | dist = {nullid: 0} |
|
354 | 423 | for i in xrange(self.count()): |
@@ -387,12 +456,14 b' class revlog:' | |||
|
387 | 456 | lx = x.next() |
|
388 | 457 | |
|
389 | 458 | def group(self, linkmap): |
|
390 | # given a list of changeset revs, return a set of deltas and | |
|
391 | # metadata corresponding to nodes. the first delta is | |
|
392 | # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to | |
|
393 | # have this parent as it has all history before these | |
|
394 | # changesets. parent is parent[0] | |
|
459 | """calculate a delta group | |
|
395 | 460 |
|
|
461 | Given a list of changeset revs, return a set of deltas and | |
|
462 | metadata corresponding to nodes. the first delta is | |
|
463 | parent(nodes[0]) -> nodes[0] the receiver is guaranteed to | |
|
464 | have this parent as it has all history before these | |
|
465 | changesets. parent is parent[0] | |
|
466 | """ | |
|
396 | 467 | revs = [] |
|
397 | 468 | needed = {} |
|
398 | 469 | |
@@ -498,11 +569,15 b' class revlog:' | |||
|
498 | 569 | yield struct.pack(">l", 0) |
|
499 | 570 | |
|
500 | 571 | def addgroup(self, revs, linkmapper, transaction, unique=0): |
|
501 | # given a set of deltas, add them to the revision log. the | |
|
502 | # first delta is against its parent, which should be in our | |
|
503 | # log, the rest are against the previous delta. | |
|
572 | """ | |
|
573 | add a delta group | |
|
504 | 574 |
|
|
505 | # track the base of the current delta log | |
|
575 | given a set of deltas, add them to the revision log. the | |
|
576 | first delta is against its parent, which should be in our | |
|
577 | log, the rest are against the previous delta. | |
|
578 | """ | |
|
579 | ||
|
580 | #track the base of the current delta log | |
|
506 | 581 | r = self.count() |
|
507 | 582 | t = r - 1 |
|
508 | 583 | node = nullid |
General Comments 0
You need to be logged in to leave comments.
Login now