##// END OF EJS Templates
Add some docstrings to revlog.py
mpm@selenic.com -
r1083:30974cf7 default
parent child Browse files
Show More
@@ -1,12 +1,14 b''
1 # revlog.py - storage back-end for mercurial
2 #
3 # This provides efficient delta storage with O(1) retrieve and append
4 # and O(changes) merge between branches
5 #
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
7 #
8 # This software may be used and distributed according to the terms
9 # of the GNU General Public License, incorporated herein by reference.
1 """
2 revlog.py - storage back-end for mercurial
3
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
6
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
11 """
10 12
11 13 import zlib, struct, sha, binascii, heapq
12 14 from mercurial import mdiff
@@ -16,6 +18,7 b' def bin(node): return binascii.unhexlify'
16 18 def short(node): return hex(node[:6])
17 19
18 20 def compress(text):
21 """ generate a possibly-compressed representation of text """
19 22 if not text: return text
20 23 if len(text) < 44:
21 24 if text[0] == '\0': return text
@@ -27,6 +30,7 b' def compress(text):'
27 30 return bin
28 31
29 32 def decompress(bin):
33 """ decompress the given input """
30 34 if not bin: return bin
31 35 t = bin[0]
32 36 if t == '\0': return bin
@@ -35,6 +39,12 b' def decompress(bin):'
35 39 raise RevlogError("unknown compression type %s" % t)
36 40
37 41 def hash(text, p1, p2):
42 """generate a hash from the given text and its parent hashes
43
44 This hash combines both the current file contents and its history
45 in a manner that makes it easy to distinguish nodes with the same
46 content in the revision graph.
47 """
38 48 l = [p1, p2]
39 49 l.sort()
40 50 s = sha.new(l[0])
@@ -46,6 +56,15 b' nullid = "\\0" * 20'
46 56 indexformat = ">4l20s20s20s"
47 57
48 58 class lazyparser:
59 """
60 this class avoids the need to parse the entirety of large indices
61
62 By default we parse and load 1000 entries at a time.
63
64 If no position is specified, we load the whole index, and replace
65 the lazy objects in revlog with the underlying objects for
66 efficiency in cases where we look at most of the nodes.
67 """
49 68 def __init__(self, data, revlog):
50 69 self.data = data
51 70 self.s = struct.calcsize(indexformat)
@@ -76,6 +95,7 b' class lazyparser:'
76 95 i += 1
77 96
78 97 class lazyindex:
98 """a lazy version of the index array"""
79 99 def __init__(self, parser):
80 100 self.p = parser
81 101 def __len__(self):
@@ -89,6 +109,7 b' class lazyindex:'
89 109 self.p.index.append(e)
90 110
91 111 class lazymap:
112 """a lazy version of the node map"""
92 113 def __init__(self, parser):
93 114 self.p = parser
94 115 def load(self, key):
@@ -123,7 +144,37 b' class lazymap:'
123 144 class RevlogError(Exception): pass
124 145
125 146 class revlog:
147 """
148 the underlying revision storage object
149
150 A revlog consists of two parts, an index and the revision data.
151
152 The index is a file with a fixed record size containing
153 information on each revision, includings its nodeid (hash), the
154 nodeids of its parents, the position and offset of its data within
155 the data file, and the revision it's based on. Finally, each entry
156 contains a linkrev entry that can serve as a pointer to external
157 data.
158
159 The revision data itself is a linear collection of data chunks.
160 Each chunk represents a revision and is usually represented as a
161 delta against the previous chunk. To bound lookup time, runs of
162 deltas are limited to about 2 times the length of the original
163 version data. This makes retrieval of a version proportional to
164 its size, or O(1) relative to the number of revisions.
165
166 Both pieces of the revlog are written to in an append-only
167 fashion, which means we never need to rewrite a file to insert or
168 remove data, and can use some simple techniques to avoid the need
169 for locking while reading.
170 """
126 171 def __init__(self, opener, indexfile, datafile):
172 """
173 create a revlog object
174
175 opener is a function that abstracts the file opening operation
176 and can be used to implement COW semantics or the like.
177 """
127 178 self.indexfile = indexfile
128 179 self.datafile = datafile
129 180 self.opener = opener
@@ -193,6 +244,7 b' class revlog:'
193 244 return reachable
194 245
195 246 def heads(self, stop=None):
247 """return the list of all nodes that have no children"""
196 248 p = {}
197 249 h = []
198 250 stoprev = 0
@@ -212,6 +264,7 b' class revlog:'
212 264 return h
213 265
214 266 def children(self, node):
267 """find the children of a given node"""
215 268 c = []
216 269 p = self.rev(node)
217 270 for r in range(p + 1, self.count()):
@@ -225,6 +278,7 b' class revlog:'
225 278 return c
226 279
227 280 def lookup(self, id):
281 """locate a node based on revision number or subset of hex nodeid"""
228 282 try:
229 283 rev = int(id)
230 284 if str(rev) != id: raise ValueError
@@ -243,12 +297,15 b' class revlog:'
243 297 return None
244 298
245 299 def diff(self, a, b):
300 """return a delta between two revisions"""
246 301 return mdiff.textdiff(a, b)
247 302
248 303 def patches(self, t, pl):
304 """apply a list of patches to a string"""
249 305 return mdiff.patches(t, pl)
250 306
251 307 def delta(self, node):
308 """return or calculate a delta between a node and its predecessor"""
252 309 r = self.rev(node)
253 310 b = self.base(r)
254 311 if r == b:
@@ -261,15 +318,18 b' class revlog:'
261 318 return decompress(data)
262 319
263 320 def revision(self, node):
321 """return an uncompressed revision of a given"""
264 322 if node == nullid: return ""
265 323 if self.cache and self.cache[0] == node: return self.cache[2]
266 324
325 # look up what we need to read
267 326 text = None
268 327 rev = self.rev(node)
269 328 start, length, base, link, p1, p2, node = self.index[rev]
270 329 end = start + length
271 330 if base != rev: start = self.start(base)
272 331
332 # do we have useful data cached?
273 333 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
274 334 base = self.cache[1]
275 335 start = self.start(base + 1)
@@ -300,6 +360,14 b' class revlog:'
300 360 return text
301 361
302 362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
363 """add a revision to the log
364
365 text - the revision data to add
366 transaction - the transaction object used for rollback
367 link - the linkrev data to add
368 p1, p2 - the parent nodeids of the revision
369 d - an optional precomputed delta
370 """
303 371 if text is None: text = ""
304 372 if p1 is None: p1 = self.tip()
305 373 if p2 is None: p2 = nullid
@@ -349,6 +417,7 b' class revlog:'
349 417 return node
350 418
351 419 def ancestor(self, a, b):
420 """calculate the least common ancestor of nodes a and b"""
352 421 # calculate the distance of every node from root
353 422 dist = {nullid: 0}
354 423 for i in xrange(self.count()):
@@ -387,12 +456,14 b' class revlog:'
387 456 lx = x.next()
388 457
389 458 def group(self, linkmap):
390 # given a list of changeset revs, return a set of deltas and
391 # metadata corresponding to nodes. the first delta is
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
393 # have this parent as it has all history before these
394 # changesets. parent is parent[0]
459 """calculate a delta group
395 460
461 Given a list of changeset revs, return a set of deltas and
462 metadata corresponding to nodes. the first delta is
463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
464 have this parent as it has all history before these
465 changesets. parent is parent[0]
466 """
396 467 revs = []
397 468 needed = {}
398 469
@@ -498,9 +569,13 b' class revlog:'
498 569 yield struct.pack(">l", 0)
499 570
500 571 def addgroup(self, revs, linkmapper, transaction, unique=0):
501 # given a set of deltas, add them to the revision log. the
502 # first delta is against its parent, which should be in our
503 # log, the rest are against the previous delta.
572 """
573 add a delta group
574
575 given a set of deltas, add them to the revision log. the
576 first delta is against its parent, which should be in our
577 log, the rest are against the previous delta.
578 """
504 579
505 580 # track the base of the current delta log
506 581 r = self.count()
General Comments 0
You need to be logged in to leave comments. Login now