##// END OF EJS Templates
Add some docstrings to revlog.py
mpm@selenic.com -
r1083:30974cf7 default
parent child Browse files
Show More
@@ -1,12 +1,14 b''
1 # revlog.py - storage back-end for mercurial
1 """
2 #
2 revlog.py - storage back-end for mercurial
3 # This provides efficient delta storage with O(1) retrieve and append
3
4 # and O(changes) merge between branches
4 This provides efficient delta storage with O(1) retrieve and append
5 #
5 and O(changes) merge between branches
6 # Copyright 2005 Matt Mackall <mpm@selenic.com>
6
7 #
7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 # This software may be used and distributed according to the terms
8
9 # of the GNU General Public License, incorporated herein by reference.
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
11 """
10
12
11 import zlib, struct, sha, binascii, heapq
13 import zlib, struct, sha, binascii, heapq
12 from mercurial import mdiff
14 from mercurial import mdiff
@@ -16,6 +18,7 b' def bin(node): return binascii.unhexlify'
16 def short(node): return hex(node[:6])
18 def short(node): return hex(node[:6])
17
19
18 def compress(text):
20 def compress(text):
21 """ generate a possibly-compressed representation of text """
19 if not text: return text
22 if not text: return text
20 if len(text) < 44:
23 if len(text) < 44:
21 if text[0] == '\0': return text
24 if text[0] == '\0': return text
@@ -27,6 +30,7 b' def compress(text):'
27 return bin
30 return bin
28
31
29 def decompress(bin):
32 def decompress(bin):
33 """ decompress the given input """
30 if not bin: return bin
34 if not bin: return bin
31 t = bin[0]
35 t = bin[0]
32 if t == '\0': return bin
36 if t == '\0': return bin
@@ -35,6 +39,12 b' def decompress(bin):'
35 raise RevlogError("unknown compression type %s" % t)
39 raise RevlogError("unknown compression type %s" % t)
36
40
37 def hash(text, p1, p2):
41 def hash(text, p1, p2):
42 """generate a hash from the given text and its parent hashes
43
44 This hash combines both the current file contents and its history
45 in a manner that makes it easy to distinguish nodes with the same
46 content in the revision graph.
47 """
38 l = [p1, p2]
48 l = [p1, p2]
39 l.sort()
49 l.sort()
40 s = sha.new(l[0])
50 s = sha.new(l[0])
@@ -46,6 +56,15 b' nullid = "\\0" * 20'
46 indexformat = ">4l20s20s20s"
56 indexformat = ">4l20s20s20s"
47
57
48 class lazyparser:
58 class lazyparser:
59 """
60 this class avoids the need to parse the entirety of large indices
61
62 By default we parse and load 1000 entries at a time.
63
64 If no position is specified, we load the whole index, and replace
65 the lazy objects in revlog with the underlying objects for
66 efficiency in cases where we look at most of the nodes.
67 """
49 def __init__(self, data, revlog):
68 def __init__(self, data, revlog):
50 self.data = data
69 self.data = data
51 self.s = struct.calcsize(indexformat)
70 self.s = struct.calcsize(indexformat)
@@ -76,6 +95,7 b' class lazyparser:'
76 i += 1
95 i += 1
77
96
78 class lazyindex:
97 class lazyindex:
98 """a lazy version of the index array"""
79 def __init__(self, parser):
99 def __init__(self, parser):
80 self.p = parser
100 self.p = parser
81 def __len__(self):
101 def __len__(self):
@@ -89,6 +109,7 b' class lazyindex:'
89 self.p.index.append(e)
109 self.p.index.append(e)
90
110
91 class lazymap:
111 class lazymap:
112 """a lazy version of the node map"""
92 def __init__(self, parser):
113 def __init__(self, parser):
93 self.p = parser
114 self.p = parser
94 def load(self, key):
115 def load(self, key):
@@ -123,7 +144,37 b' class lazymap:'
123 class RevlogError(Exception): pass
144 class RevlogError(Exception): pass
124
145
125 class revlog:
146 class revlog:
147 """
148 the underlying revision storage object
149
150 A revlog consists of two parts, an index and the revision data.
151
152 The index is a file with a fixed record size containing
153 information on each revision, includings its nodeid (hash), the
154 nodeids of its parents, the position and offset of its data within
155 the data file, and the revision it's based on. Finally, each entry
156 contains a linkrev entry that can serve as a pointer to external
157 data.
158
159 The revision data itself is a linear collection of data chunks.
160 Each chunk represents a revision and is usually represented as a
161 delta against the previous chunk. To bound lookup time, runs of
162 deltas are limited to about 2 times the length of the original
163 version data. This makes retrieval of a version proportional to
164 its size, or O(1) relative to the number of revisions.
165
166 Both pieces of the revlog are written to in an append-only
167 fashion, which means we never need to rewrite a file to insert or
168 remove data, and can use some simple techniques to avoid the need
169 for locking while reading.
170 """
126 def __init__(self, opener, indexfile, datafile):
171 def __init__(self, opener, indexfile, datafile):
172 """
173 create a revlog object
174
175 opener is a function that abstracts the file opening operation
176 and can be used to implement COW semantics or the like.
177 """
127 self.indexfile = indexfile
178 self.indexfile = indexfile
128 self.datafile = datafile
179 self.datafile = datafile
129 self.opener = opener
180 self.opener = opener
@@ -193,6 +244,7 b' class revlog:'
193 return reachable
244 return reachable
194
245
195 def heads(self, stop=None):
246 def heads(self, stop=None):
247 """return the list of all nodes that have no children"""
196 p = {}
248 p = {}
197 h = []
249 h = []
198 stoprev = 0
250 stoprev = 0
@@ -212,6 +264,7 b' class revlog:'
212 return h
264 return h
213
265
214 def children(self, node):
266 def children(self, node):
267 """find the children of a given node"""
215 c = []
268 c = []
216 p = self.rev(node)
269 p = self.rev(node)
217 for r in range(p + 1, self.count()):
270 for r in range(p + 1, self.count()):
@@ -225,6 +278,7 b' class revlog:'
225 return c
278 return c
226
279
227 def lookup(self, id):
280 def lookup(self, id):
281 """locate a node based on revision number or subset of hex nodeid"""
228 try:
282 try:
229 rev = int(id)
283 rev = int(id)
230 if str(rev) != id: raise ValueError
284 if str(rev) != id: raise ValueError
@@ -243,12 +297,15 b' class revlog:'
243 return None
297 return None
244
298
245 def diff(self, a, b):
299 def diff(self, a, b):
300 """return a delta between two revisions"""
246 return mdiff.textdiff(a, b)
301 return mdiff.textdiff(a, b)
247
302
248 def patches(self, t, pl):
303 def patches(self, t, pl):
304 """apply a list of patches to a string"""
249 return mdiff.patches(t, pl)
305 return mdiff.patches(t, pl)
250
306
251 def delta(self, node):
307 def delta(self, node):
308 """return or calculate a delta between a node and its predecessor"""
252 r = self.rev(node)
309 r = self.rev(node)
253 b = self.base(r)
310 b = self.base(r)
254 if r == b:
311 if r == b:
@@ -261,15 +318,18 b' class revlog:'
261 return decompress(data)
318 return decompress(data)
262
319
263 def revision(self, node):
320 def revision(self, node):
321 """return an uncompressed revision of a given"""
264 if node == nullid: return ""
322 if node == nullid: return ""
265 if self.cache and self.cache[0] == node: return self.cache[2]
323 if self.cache and self.cache[0] == node: return self.cache[2]
266
324
325 # look up what we need to read
267 text = None
326 text = None
268 rev = self.rev(node)
327 rev = self.rev(node)
269 start, length, base, link, p1, p2, node = self.index[rev]
328 start, length, base, link, p1, p2, node = self.index[rev]
270 end = start + length
329 end = start + length
271 if base != rev: start = self.start(base)
330 if base != rev: start = self.start(base)
272
331
332 # do we have useful data cached?
273 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
333 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
274 base = self.cache[1]
334 base = self.cache[1]
275 start = self.start(base + 1)
335 start = self.start(base + 1)
@@ -300,6 +360,14 b' class revlog:'
300 return text
360 return text
301
361
302 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
362 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
363 """add a revision to the log
364
365 text - the revision data to add
366 transaction - the transaction object used for rollback
367 link - the linkrev data to add
368 p1, p2 - the parent nodeids of the revision
369 d - an optional precomputed delta
370 """
303 if text is None: text = ""
371 if text is None: text = ""
304 if p1 is None: p1 = self.tip()
372 if p1 is None: p1 = self.tip()
305 if p2 is None: p2 = nullid
373 if p2 is None: p2 = nullid
@@ -349,6 +417,7 b' class revlog:'
349 return node
417 return node
350
418
351 def ancestor(self, a, b):
419 def ancestor(self, a, b):
420 """calculate the least common ancestor of nodes a and b"""
352 # calculate the distance of every node from root
421 # calculate the distance of every node from root
353 dist = {nullid: 0}
422 dist = {nullid: 0}
354 for i in xrange(self.count()):
423 for i in xrange(self.count()):
@@ -387,12 +456,14 b' class revlog:'
387 lx = x.next()
456 lx = x.next()
388
457
389 def group(self, linkmap):
458 def group(self, linkmap):
390 # given a list of changeset revs, return a set of deltas and
459 """calculate a delta group
391 # metadata corresponding to nodes. the first delta is
392 # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
393 # have this parent as it has all history before these
394 # changesets. parent is parent[0]
395
460
461 Given a list of changeset revs, return a set of deltas and
462 metadata corresponding to nodes. the first delta is
463 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
464 have this parent as it has all history before these
465 changesets. parent is parent[0]
466 """
396 revs = []
467 revs = []
397 needed = {}
468 needed = {}
398
469
@@ -498,9 +569,13 b' class revlog:'
498 yield struct.pack(">l", 0)
569 yield struct.pack(">l", 0)
499
570
500 def addgroup(self, revs, linkmapper, transaction, unique=0):
571 def addgroup(self, revs, linkmapper, transaction, unique=0):
501 # given a set of deltas, add them to the revision log. the
572 """
502 # first delta is against its parent, which should be in our
573 add a delta group
503 # log, the rest are against the previous delta.
574
575 given a set of deltas, add them to the revision log. the
576 first delta is against its parent, which should be in our
577 log, the rest are against the previous delta.
578 """
504
579
505 # track the base of the current delta log
580 #track the base of the current delta log
506 r = self.count()
581 r = self.count()
General Comments 0
You need to be logged in to leave comments. Login now