##// END OF EJS Templates
revlog: move ancestor generation out to a new class...
Siddharth Agarwal -
r18090:9abc55ef default
parent child Browse files
Show More
@@ -1,173 +1,221 b''
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import heapq
8 import heapq, util
9 from node import nullrev
9 from node import nullrev
10
10
11 def ancestor(a, b, pfunc):
11 def ancestor(a, b, pfunc):
12 """
12 """
13 Returns the common ancestor of a and b that is furthest from a
13 Returns the common ancestor of a and b that is furthest from a
14 root (as measured by longest path) or None if no ancestor is
14 root (as measured by longest path) or None if no ancestor is
15 found. If there are multiple common ancestors at the same
15 found. If there are multiple common ancestors at the same
16 distance, the first one found is returned.
16 distance, the first one found is returned.
17
17
18 pfunc must return a list of parent vertices for a given vertex
18 pfunc must return a list of parent vertices for a given vertex
19 """
19 """
20
20
21 if a == b:
21 if a == b:
22 return a
22 return a
23
23
24 a, b = sorted([a, b])
24 a, b = sorted([a, b])
25
25
26 # find depth from root of all ancestors
26 # find depth from root of all ancestors
27 # depth is stored as a negative for heapq
27 # depth is stored as a negative for heapq
28 parentcache = {}
28 parentcache = {}
29 visit = [a, b]
29 visit = [a, b]
30 depth = {}
30 depth = {}
31 while visit:
31 while visit:
32 vertex = visit[-1]
32 vertex = visit[-1]
33 pl = pfunc(vertex)
33 pl = pfunc(vertex)
34 parentcache[vertex] = pl
34 parentcache[vertex] = pl
35 if not pl:
35 if not pl:
36 depth[vertex] = 0
36 depth[vertex] = 0
37 visit.pop()
37 visit.pop()
38 else:
38 else:
39 for p in pl:
39 for p in pl:
40 if p == a or p == b: # did we find a or b as a parent?
40 if p == a or p == b: # did we find a or b as a parent?
41 return p # we're done
41 return p # we're done
42 if p not in depth:
42 if p not in depth:
43 visit.append(p)
43 visit.append(p)
44 if visit[-1] == vertex:
44 if visit[-1] == vertex:
45 # -(maximum distance of parents + 1)
45 # -(maximum distance of parents + 1)
46 depth[vertex] = min([depth[p] for p in pl]) - 1
46 depth[vertex] = min([depth[p] for p in pl]) - 1
47 visit.pop()
47 visit.pop()
48
48
49 # traverse ancestors in order of decreasing distance from root
49 # traverse ancestors in order of decreasing distance from root
50 def ancestors(vertex):
50 def ancestors(vertex):
51 h = [(depth[vertex], vertex)]
51 h = [(depth[vertex], vertex)]
52 seen = set()
52 seen = set()
53 while h:
53 while h:
54 d, n = heapq.heappop(h)
54 d, n = heapq.heappop(h)
55 if n not in seen:
55 if n not in seen:
56 seen.add(n)
56 seen.add(n)
57 yield (d, n)
57 yield (d, n)
58 for p in parentcache[n]:
58 for p in parentcache[n]:
59 heapq.heappush(h, (depth[p], p))
59 heapq.heappush(h, (depth[p], p))
60
60
61 def generations(vertex):
61 def generations(vertex):
62 sg, s = None, set()
62 sg, s = None, set()
63 for g, v in ancestors(vertex):
63 for g, v in ancestors(vertex):
64 if g != sg:
64 if g != sg:
65 if sg:
65 if sg:
66 yield sg, s
66 yield sg, s
67 sg, s = g, set((v,))
67 sg, s = g, set((v,))
68 else:
68 else:
69 s.add(v)
69 s.add(v)
70 yield sg, s
70 yield sg, s
71
71
72 x = generations(a)
72 x = generations(a)
73 y = generations(b)
73 y = generations(b)
74 gx = x.next()
74 gx = x.next()
75 gy = y.next()
75 gy = y.next()
76
76
77 # increment each ancestor list until it is closer to root than
77 # increment each ancestor list until it is closer to root than
78 # the other, or they match
78 # the other, or they match
79 try:
79 try:
80 while True:
80 while True:
81 if gx[0] == gy[0]:
81 if gx[0] == gy[0]:
82 for v in gx[1]:
82 for v in gx[1]:
83 if v in gy[1]:
83 if v in gy[1]:
84 return v
84 return v
85 gy = y.next()
85 gy = y.next()
86 gx = x.next()
86 gx = x.next()
87 elif gx[0] > gy[0]:
87 elif gx[0] > gy[0]:
88 gy = y.next()
88 gy = y.next()
89 else:
89 else:
90 gx = x.next()
90 gx = x.next()
91 except StopIteration:
91 except StopIteration:
92 return None
92 return None
93
93
94 def missingancestors(revs, bases, pfunc):
94 def missingancestors(revs, bases, pfunc):
95 """Return all the ancestors of revs that are not ancestors of bases.
95 """Return all the ancestors of revs that are not ancestors of bases.
96
96
97 This may include elements from revs.
97 This may include elements from revs.
98
98
99 Equivalent to the revset (::revs - ::bases). Revs are returned in
99 Equivalent to the revset (::revs - ::bases). Revs are returned in
100 revision number order, which is a topological order.
100 revision number order, which is a topological order.
101
101
102 revs and bases should both be iterables. pfunc must return a list of
102 revs and bases should both be iterables. pfunc must return a list of
103 parent revs for a given revs.
103 parent revs for a given revs.
104 """
104 """
105
105
106 revsvisit = set(revs)
106 revsvisit = set(revs)
107 basesvisit = set(bases)
107 basesvisit = set(bases)
108 if not revsvisit:
108 if not revsvisit:
109 return []
109 return []
110 if not basesvisit:
110 if not basesvisit:
111 basesvisit.add(nullrev)
111 basesvisit.add(nullrev)
112 start = max(max(revsvisit), max(basesvisit))
112 start = max(max(revsvisit), max(basesvisit))
113 bothvisit = revsvisit.intersection(basesvisit)
113 bothvisit = revsvisit.intersection(basesvisit)
114 revsvisit.difference_update(bothvisit)
114 revsvisit.difference_update(bothvisit)
115 basesvisit.difference_update(bothvisit)
115 basesvisit.difference_update(bothvisit)
116 # At this point, we hold the invariants that:
116 # At this point, we hold the invariants that:
117 # - revsvisit is the set of nodes we know are an ancestor of at least one
117 # - revsvisit is the set of nodes we know are an ancestor of at least one
118 # of the nodes in revs
118 # of the nodes in revs
119 # - basesvisit is the same for bases
119 # - basesvisit is the same for bases
120 # - bothvisit is the set of nodes we know are ancestors of at least one of
120 # - bothvisit is the set of nodes we know are ancestors of at least one of
121 # the nodes in revs and one of the nodes in bases
121 # the nodes in revs and one of the nodes in bases
122 # - a node may be in none or one, but not more, of revsvisit, basesvisit
122 # - a node may be in none or one, but not more, of revsvisit, basesvisit
123 # and bothvisit at any given time
123 # and bothvisit at any given time
124 # Now we walk down in reverse topo order, adding parents of nodes already
124 # Now we walk down in reverse topo order, adding parents of nodes already
125 # visited to the sets while maintaining the invariants. When a node is
125 # visited to the sets while maintaining the invariants. When a node is
126 # found in both revsvisit and basesvisit, it is removed from them and
126 # found in both revsvisit and basesvisit, it is removed from them and
127 # added to bothvisit instead. When revsvisit becomes empty, there are no
127 # added to bothvisit instead. When revsvisit becomes empty, there are no
128 # more ancestors of revs that aren't also ancestors of bases, so exit.
128 # more ancestors of revs that aren't also ancestors of bases, so exit.
129
129
130 missing = []
130 missing = []
131 for curr in xrange(start, nullrev, -1):
131 for curr in xrange(start, nullrev, -1):
132 if not revsvisit:
132 if not revsvisit:
133 break
133 break
134
134
135 if curr in bothvisit:
135 if curr in bothvisit:
136 bothvisit.remove(curr)
136 bothvisit.remove(curr)
137 # curr's parents might have made it into revsvisit or basesvisit
137 # curr's parents might have made it into revsvisit or basesvisit
138 # through another path
138 # through another path
139 for p in pfunc(curr):
139 for p in pfunc(curr):
140 revsvisit.discard(p)
140 revsvisit.discard(p)
141 basesvisit.discard(p)
141 basesvisit.discard(p)
142 bothvisit.add(p)
142 bothvisit.add(p)
143 continue
143 continue
144
144
145 # curr will never be in both revsvisit and basesvisit, since if it
145 # curr will never be in both revsvisit and basesvisit, since if it
146 # were it'd have been pushed to bothvisit
146 # were it'd have been pushed to bothvisit
147 if curr in revsvisit:
147 if curr in revsvisit:
148 missing.append(curr)
148 missing.append(curr)
149 thisvisit = revsvisit
149 thisvisit = revsvisit
150 othervisit = basesvisit
150 othervisit = basesvisit
151 elif curr in basesvisit:
151 elif curr in basesvisit:
152 thisvisit = basesvisit
152 thisvisit = basesvisit
153 othervisit = revsvisit
153 othervisit = revsvisit
154 else:
154 else:
155 # not an ancestor of revs or bases: ignore
155 # not an ancestor of revs or bases: ignore
156 continue
156 continue
157
157
158 thisvisit.remove(curr)
158 thisvisit.remove(curr)
159 for p in pfunc(curr):
159 for p in pfunc(curr):
160 if p == nullrev:
160 if p == nullrev:
161 pass
161 pass
162 elif p in othervisit or p in bothvisit:
162 elif p in othervisit or p in bothvisit:
163 # p is implicitly in thisvisit. This means p is or should be
163 # p is implicitly in thisvisit. This means p is or should be
164 # in bothvisit
164 # in bothvisit
165 revsvisit.discard(p)
165 revsvisit.discard(p)
166 basesvisit.discard(p)
166 basesvisit.discard(p)
167 bothvisit.add(p)
167 bothvisit.add(p)
168 else:
168 else:
169 # visit later
169 # visit later
170 thisvisit.add(p)
170 thisvisit.add(p)
171
171
172 missing.reverse()
172 missing.reverse()
173 return missing
173 return missing
174
175 class lazyancestors(object):
176 def __init__(self, cl, revs, stoprev=0, inclusive=False):
177 """Create a new object generating ancestors for the given revs. Does
178 not generate revs lower than stoprev.
179
180 This is computed lazily starting from revs. The object only supports
181 iteration.
182
183 cl should be a changelog and revs should be an iterable. inclusive is
184 a boolean that indicates whether revs should be included. Revs lower
185 than stoprev will not be generated.
186
187 Result does not include the null revision."""
188 self._parentrevs = cl.parentrevs
189 self._initrevs = revs
190 self._stoprev = stoprev
191 self._inclusive = inclusive
192
193 def __iter__(self):
194 """Generate the ancestors of _initrevs in reverse topological order.
195
196 If inclusive is False, yield a sequence of revision numbers starting
197 with the parents of each revision in revs, i.e., each revision is *not*
198 considered an ancestor of itself. Results are in breadth-first order:
199 parents of each rev in revs, then parents of those, etc.
200
201 If inclusive is True, yield all the revs first (ignoring stoprev),
202 then yield all the ancestors of revs as when inclusive is False.
203 If an element in revs is an ancestor of a different rev it is not
204 yielded again."""
205 seen = set()
206 revs = self._initrevs
207 if self._inclusive:
208 for rev in revs:
209 yield rev
210 seen.update(revs)
211
212 parentrevs = self._parentrevs
213 stoprev = self._stoprev
214 visit = util.deque(revs)
215
216 while visit:
217 for parent in parentrevs(visit.popleft()):
218 if parent >= stoprev and parent not in seen:
219 visit.append(parent)
220 seen.add(parent)
221 yield parent
@@ -1,1358 +1,1337 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, dagutil
17 import ancestor, mdiff, parsers, error, util, dagutil
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def decompress(bin):
78 def decompress(bin):
79 """ decompress the given input """
79 """ decompress the given input """
80 if not bin:
80 if not bin:
81 return bin
81 return bin
82 t = bin[0]
82 t = bin[0]
83 if t == '\0':
83 if t == '\0':
84 return bin
84 return bin
85 if t == 'x':
85 if t == 'x':
86 try:
86 try:
87 return _decompress(bin)
87 return _decompress(bin)
88 except zlib.error, e:
88 except zlib.error, e:
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 if t == 'u':
90 if t == 'u':
91 return bin[1:]
91 return bin[1:]
92 raise RevlogError(_("unknown compression type %r") % t)
92 raise RevlogError(_("unknown compression type %r") % t)
93
93
94 indexformatv0 = ">4l20s20s20s"
94 indexformatv0 = ">4l20s20s20s"
95 v0shaoffset = 56
95 v0shaoffset = 56
96
96
97 class revlogoldio(object):
97 class revlogoldio(object):
98 def __init__(self):
98 def __init__(self):
99 self.size = struct.calcsize(indexformatv0)
99 self.size = struct.calcsize(indexformatv0)
100
100
101 def parseindex(self, data, inline):
101 def parseindex(self, data, inline):
102 s = self.size
102 s = self.size
103 index = []
103 index = []
104 nodemap = {nullid: nullrev}
104 nodemap = {nullid: nullrev}
105 n = off = 0
105 n = off = 0
106 l = len(data)
106 l = len(data)
107 while off + s <= l:
107 while off + s <= l:
108 cur = data[off:off + s]
108 cur = data[off:off + s]
109 off += s
109 off += s
110 e = _unpack(indexformatv0, cur)
110 e = _unpack(indexformatv0, cur)
111 # transform to revlogv1 format
111 # transform to revlogv1 format
112 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
112 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
113 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
113 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
114 index.append(e2)
114 index.append(e2)
115 nodemap[e[6]] = n
115 nodemap[e[6]] = n
116 n += 1
116 n += 1
117
117
118 # add the magic null revision at -1
118 # add the magic null revision at -1
119 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
119 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
120
120
121 return index, nodemap, None
121 return index, nodemap, None
122
122
123 def packentry(self, entry, node, version, rev):
123 def packentry(self, entry, node, version, rev):
124 if gettype(entry[0]):
124 if gettype(entry[0]):
125 raise RevlogError(_("index entry flags need RevlogNG"))
125 raise RevlogError(_("index entry flags need RevlogNG"))
126 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
126 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
127 node(entry[5]), node(entry[6]), entry[7])
127 node(entry[5]), node(entry[6]), entry[7])
128 return _pack(indexformatv0, *e2)
128 return _pack(indexformatv0, *e2)
129
129
130 # index ng:
130 # index ng:
131 # 6 bytes: offset
131 # 6 bytes: offset
132 # 2 bytes: flags
132 # 2 bytes: flags
133 # 4 bytes: compressed length
133 # 4 bytes: compressed length
134 # 4 bytes: uncompressed length
134 # 4 bytes: uncompressed length
135 # 4 bytes: base rev
135 # 4 bytes: base rev
136 # 4 bytes: link rev
136 # 4 bytes: link rev
137 # 4 bytes: parent 1 rev
137 # 4 bytes: parent 1 rev
138 # 4 bytes: parent 2 rev
138 # 4 bytes: parent 2 rev
139 # 32 bytes: nodeid
139 # 32 bytes: nodeid
140 indexformatng = ">Qiiiiii20s12x"
140 indexformatng = ">Qiiiiii20s12x"
141 ngshaoffset = 32
141 ngshaoffset = 32
142 versionformat = ">I"
142 versionformat = ">I"
143
143
144 class revlogio(object):
144 class revlogio(object):
145 def __init__(self):
145 def __init__(self):
146 self.size = struct.calcsize(indexformatng)
146 self.size = struct.calcsize(indexformatng)
147
147
148 def parseindex(self, data, inline):
148 def parseindex(self, data, inline):
149 # call the C implementation to parse the index data
149 # call the C implementation to parse the index data
150 index, cache = parsers.parse_index2(data, inline)
150 index, cache = parsers.parse_index2(data, inline)
151 return index, getattr(index, 'nodemap', None), cache
151 return index, getattr(index, 'nodemap', None), cache
152
152
153 def packentry(self, entry, node, version, rev):
153 def packentry(self, entry, node, version, rev):
154 p = _pack(indexformatng, *entry)
154 p = _pack(indexformatng, *entry)
155 if rev == 0:
155 if rev == 0:
156 p = _pack(versionformat, version) + p[4:]
156 p = _pack(versionformat, version) + p[4:]
157 return p
157 return p
158
158
159 class revlog(object):
159 class revlog(object):
160 """
160 """
161 the underlying revision storage object
161 the underlying revision storage object
162
162
163 A revlog consists of two parts, an index and the revision data.
163 A revlog consists of two parts, an index and the revision data.
164
164
165 The index is a file with a fixed record size containing
165 The index is a file with a fixed record size containing
166 information on each revision, including its nodeid (hash), the
166 information on each revision, including its nodeid (hash), the
167 nodeids of its parents, the position and offset of its data within
167 nodeids of its parents, the position and offset of its data within
168 the data file, and the revision it's based on. Finally, each entry
168 the data file, and the revision it's based on. Finally, each entry
169 contains a linkrev entry that can serve as a pointer to external
169 contains a linkrev entry that can serve as a pointer to external
170 data.
170 data.
171
171
172 The revision data itself is a linear collection of data chunks.
172 The revision data itself is a linear collection of data chunks.
173 Each chunk represents a revision and is usually represented as a
173 Each chunk represents a revision and is usually represented as a
174 delta against the previous chunk. To bound lookup time, runs of
174 delta against the previous chunk. To bound lookup time, runs of
175 deltas are limited to about 2 times the length of the original
175 deltas are limited to about 2 times the length of the original
176 version data. This makes retrieval of a version proportional to
176 version data. This makes retrieval of a version proportional to
177 its size, or O(1) relative to the number of revisions.
177 its size, or O(1) relative to the number of revisions.
178
178
179 Both pieces of the revlog are written to in an append-only
179 Both pieces of the revlog are written to in an append-only
180 fashion, which means we never need to rewrite a file to insert or
180 fashion, which means we never need to rewrite a file to insert or
181 remove data, and can use some simple techniques to avoid the need
181 remove data, and can use some simple techniques to avoid the need
182 for locking while reading.
182 for locking while reading.
183 """
183 """
184 def __init__(self, opener, indexfile):
184 def __init__(self, opener, indexfile):
185 """
185 """
186 create a revlog object
186 create a revlog object
187
187
188 opener is a function that abstracts the file opening operation
188 opener is a function that abstracts the file opening operation
189 and can be used to implement COW semantics or the like.
189 and can be used to implement COW semantics or the like.
190 """
190 """
191 self.indexfile = indexfile
191 self.indexfile = indexfile
192 self.datafile = indexfile[:-2] + ".d"
192 self.datafile = indexfile[:-2] + ".d"
193 self.opener = opener
193 self.opener = opener
194 self._cache = None
194 self._cache = None
195 self._basecache = (0, 0)
195 self._basecache = (0, 0)
196 self._chunkcache = (0, '')
196 self._chunkcache = (0, '')
197 self.index = []
197 self.index = []
198 self._pcache = {}
198 self._pcache = {}
199 self._nodecache = {nullid: nullrev}
199 self._nodecache = {nullid: nullrev}
200 self._nodepos = None
200 self._nodepos = None
201
201
202 v = REVLOG_DEFAULT_VERSION
202 v = REVLOG_DEFAULT_VERSION
203 opts = getattr(opener, 'options', None)
203 opts = getattr(opener, 'options', None)
204 if opts is not None:
204 if opts is not None:
205 if 'revlogv1' in opts:
205 if 'revlogv1' in opts:
206 if 'generaldelta' in opts:
206 if 'generaldelta' in opts:
207 v |= REVLOGGENERALDELTA
207 v |= REVLOGGENERALDELTA
208 else:
208 else:
209 v = 0
209 v = 0
210
210
211 i = ''
211 i = ''
212 self._initempty = True
212 self._initempty = True
213 try:
213 try:
214 f = self.opener(self.indexfile)
214 f = self.opener(self.indexfile)
215 i = f.read()
215 i = f.read()
216 f.close()
216 f.close()
217 if len(i) > 0:
217 if len(i) > 0:
218 v = struct.unpack(versionformat, i[:4])[0]
218 v = struct.unpack(versionformat, i[:4])[0]
219 self._initempty = False
219 self._initempty = False
220 except IOError, inst:
220 except IOError, inst:
221 if inst.errno != errno.ENOENT:
221 if inst.errno != errno.ENOENT:
222 raise
222 raise
223
223
224 self.version = v
224 self.version = v
225 self._inline = v & REVLOGNGINLINEDATA
225 self._inline = v & REVLOGNGINLINEDATA
226 self._generaldelta = v & REVLOGGENERALDELTA
226 self._generaldelta = v & REVLOGGENERALDELTA
227 flags = v & ~0xFFFF
227 flags = v & ~0xFFFF
228 fmt = v & 0xFFFF
228 fmt = v & 0xFFFF
229 if fmt == REVLOGV0 and flags:
229 if fmt == REVLOGV0 and flags:
230 raise RevlogError(_("index %s unknown flags %#04x for format v0")
230 raise RevlogError(_("index %s unknown flags %#04x for format v0")
231 % (self.indexfile, flags >> 16))
231 % (self.indexfile, flags >> 16))
232 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
232 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
233 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
233 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
234 % (self.indexfile, flags >> 16))
234 % (self.indexfile, flags >> 16))
235 elif fmt > REVLOGNG:
235 elif fmt > REVLOGNG:
236 raise RevlogError(_("index %s unknown format %d")
236 raise RevlogError(_("index %s unknown format %d")
237 % (self.indexfile, fmt))
237 % (self.indexfile, fmt))
238
238
239 self._io = revlogio()
239 self._io = revlogio()
240 if self.version == REVLOGV0:
240 if self.version == REVLOGV0:
241 self._io = revlogoldio()
241 self._io = revlogoldio()
242 try:
242 try:
243 d = self._io.parseindex(i, self._inline)
243 d = self._io.parseindex(i, self._inline)
244 except (ValueError, IndexError):
244 except (ValueError, IndexError):
245 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
245 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
246 self.index, nodemap, self._chunkcache = d
246 self.index, nodemap, self._chunkcache = d
247 if nodemap is not None:
247 if nodemap is not None:
248 self.nodemap = self._nodecache = nodemap
248 self.nodemap = self._nodecache = nodemap
249 if not self._chunkcache:
249 if not self._chunkcache:
250 self._chunkclear()
250 self._chunkclear()
251
251
252 def tip(self):
252 def tip(self):
253 return self.node(len(self.index) - 2)
253 return self.node(len(self.index) - 2)
254 def __len__(self):
254 def __len__(self):
255 return len(self.index) - 1
255 return len(self.index) - 1
256 def __iter__(self):
256 def __iter__(self):
257 return iter(xrange(len(self)))
257 return iter(xrange(len(self)))
258 def revs(self, start=0, stop=None):
258 def revs(self, start=0, stop=None):
259 """iterate over all rev in this revlog (from start to stop)"""
259 """iterate over all rev in this revlog (from start to stop)"""
260 step = 1
260 step = 1
261 if stop is not None:
261 if stop is not None:
262 if start > stop:
262 if start > stop:
263 step = -1
263 step = -1
264 stop += step
264 stop += step
265 else:
265 else:
266 stop = len(self)
266 stop = len(self)
267 return xrange(start, stop, step)
267 return xrange(start, stop, step)
268
268
269 @util.propertycache
269 @util.propertycache
270 def nodemap(self):
270 def nodemap(self):
271 self.rev(self.node(0))
271 self.rev(self.node(0))
272 return self._nodecache
272 return self._nodecache
273
273
274 def hasnode(self, node):
274 def hasnode(self, node):
275 try:
275 try:
276 self.rev(node)
276 self.rev(node)
277 return True
277 return True
278 except KeyError:
278 except KeyError:
279 return False
279 return False
280
280
281 def clearcaches(self):
281 def clearcaches(self):
282 try:
282 try:
283 self._nodecache.clearcaches()
283 self._nodecache.clearcaches()
284 except AttributeError:
284 except AttributeError:
285 self._nodecache = {nullid: nullrev}
285 self._nodecache = {nullid: nullrev}
286 self._nodepos = None
286 self._nodepos = None
287
287
288 def rev(self, node):
288 def rev(self, node):
289 try:
289 try:
290 return self._nodecache[node]
290 return self._nodecache[node]
291 except RevlogError:
291 except RevlogError:
292 # parsers.c radix tree lookup failed
292 # parsers.c radix tree lookup failed
293 raise LookupError(node, self.indexfile, _('no node'))
293 raise LookupError(node, self.indexfile, _('no node'))
294 except KeyError:
294 except KeyError:
295 # pure python cache lookup failed
295 # pure python cache lookup failed
296 n = self._nodecache
296 n = self._nodecache
297 i = self.index
297 i = self.index
298 p = self._nodepos
298 p = self._nodepos
299 if p is None:
299 if p is None:
300 p = len(i) - 2
300 p = len(i) - 2
301 for r in xrange(p, -1, -1):
301 for r in xrange(p, -1, -1):
302 v = i[r][7]
302 v = i[r][7]
303 n[v] = r
303 n[v] = r
304 if v == node:
304 if v == node:
305 self._nodepos = r - 1
305 self._nodepos = r - 1
306 return r
306 return r
307 raise LookupError(node, self.indexfile, _('no node'))
307 raise LookupError(node, self.indexfile, _('no node'))
308
308
309 def node(self, rev):
309 def node(self, rev):
310 return self.index[rev][7]
310 return self.index[rev][7]
311 def linkrev(self, rev):
311 def linkrev(self, rev):
312 return self.index[rev][4]
312 return self.index[rev][4]
313 def parents(self, node):
313 def parents(self, node):
314 i = self.index
314 i = self.index
315 d = i[self.rev(node)]
315 d = i[self.rev(node)]
316 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
316 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
317 def parentrevs(self, rev):
317 def parentrevs(self, rev):
318 return self.index[rev][5:7]
318 return self.index[rev][5:7]
319 def start(self, rev):
319 def start(self, rev):
320 return int(self.index[rev][0] >> 16)
320 return int(self.index[rev][0] >> 16)
321 def end(self, rev):
321 def end(self, rev):
322 return self.start(rev) + self.length(rev)
322 return self.start(rev) + self.length(rev)
323 def length(self, rev):
323 def length(self, rev):
324 return self.index[rev][1]
324 return self.index[rev][1]
325 def chainbase(self, rev):
325 def chainbase(self, rev):
326 index = self.index
326 index = self.index
327 base = index[rev][3]
327 base = index[rev][3]
328 while base != rev:
328 while base != rev:
329 rev = base
329 rev = base
330 base = index[rev][3]
330 base = index[rev][3]
331 return base
331 return base
332 def flags(self, rev):
332 def flags(self, rev):
333 return self.index[rev][0] & 0xFFFF
333 return self.index[rev][0] & 0xFFFF
334 def rawsize(self, rev):
334 def rawsize(self, rev):
335 """return the length of the uncompressed text for a given revision"""
335 """return the length of the uncompressed text for a given revision"""
336 l = self.index[rev][2]
336 l = self.index[rev][2]
337 if l >= 0:
337 if l >= 0:
338 return l
338 return l
339
339
340 t = self.revision(self.node(rev))
340 t = self.revision(self.node(rev))
341 return len(t)
341 return len(t)
342 size = rawsize
342 size = rawsize
343
343
344 def ancestors(self, revs, stoprev=0, inclusive=False):
344 def ancestors(self, revs, stoprev=0, inclusive=False):
345 """Generate the ancestors of 'revs' in reverse topological order.
345 """Generate the ancestors of 'revs' in reverse topological order.
346 Does not generate revs lower than stoprev.
346 Does not generate revs lower than stoprev.
347
347
348 If inclusive is False, yield a sequence of revision numbers starting
348 See the documentation for ancestor.lazyancestors for more details."""
349 with the parents of each revision in revs, i.e., each revision is *not*
350 considered an ancestor of itself. Results are in breadth-first order:
351 parents of each rev in revs, then parents of those, etc.
352
353 If inclusive is True, yield all the revs first (ignoring stoprev),
354 then yield all the ancestors of revs as when inclusive is False.
355 If an element in revs is an ancestor of a different rev it is not
356 yielded again.
357
349
358 Result does not include the null revision."""
350 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 visit = util.deque(revs)
351 inclusive=inclusive)
360 seen = set([nullrev])
361 if inclusive:
362 for rev in revs:
363 yield rev
364 seen.update(revs)
365 while visit:
366 for parent in self.parentrevs(visit.popleft()):
367 if parent < stoprev:
368 continue
369 if parent not in seen:
370 visit.append(parent)
371 seen.add(parent)
372 yield parent
373
352
374 def descendants(self, revs):
353 def descendants(self, revs):
375 """Generate the descendants of 'revs' in revision order.
354 """Generate the descendants of 'revs' in revision order.
376
355
377 Yield a sequence of revision numbers starting with a child of
356 Yield a sequence of revision numbers starting with a child of
378 some rev in revs, i.e., each revision is *not* considered a
357 some rev in revs, i.e., each revision is *not* considered a
379 descendant of itself. Results are ordered by revision number (a
358 descendant of itself. Results are ordered by revision number (a
380 topological sort)."""
359 topological sort)."""
381 first = min(revs)
360 first = min(revs)
382 if first == nullrev:
361 if first == nullrev:
383 for i in self:
362 for i in self:
384 yield i
363 yield i
385 return
364 return
386
365
387 seen = set(revs)
366 seen = set(revs)
388 for i in self.revs(start=first + 1):
367 for i in self.revs(start=first + 1):
389 for x in self.parentrevs(i):
368 for x in self.parentrevs(i):
390 if x != nullrev and x in seen:
369 if x != nullrev and x in seen:
391 seen.add(i)
370 seen.add(i)
392 yield i
371 yield i
393 break
372 break
394
373
395 def findcommonmissing(self, common=None, heads=None):
374 def findcommonmissing(self, common=None, heads=None):
396 """Return a tuple of the ancestors of common and the ancestors of heads
375 """Return a tuple of the ancestors of common and the ancestors of heads
397 that are not ancestors of common. In revset terminology, we return the
376 that are not ancestors of common. In revset terminology, we return the
398 tuple:
377 tuple:
399
378
400 ::common, (::heads) - (::common)
379 ::common, (::heads) - (::common)
401
380
402 The list is sorted by revision number, meaning it is
381 The list is sorted by revision number, meaning it is
403 topologically sorted.
382 topologically sorted.
404
383
405 'heads' and 'common' are both lists of node IDs. If heads is
384 'heads' and 'common' are both lists of node IDs. If heads is
406 not supplied, uses all of the revlog's heads. If common is not
385 not supplied, uses all of the revlog's heads. If common is not
407 supplied, uses nullid."""
386 supplied, uses nullid."""
408 if common is None:
387 if common is None:
409 common = [nullid]
388 common = [nullid]
410 if heads is None:
389 if heads is None:
411 heads = self.heads()
390 heads = self.heads()
412
391
413 common = [self.rev(n) for n in common]
392 common = [self.rev(n) for n in common]
414 heads = [self.rev(n) for n in heads]
393 heads = [self.rev(n) for n in heads]
415
394
416 # we want the ancestors, but inclusive
395 # we want the ancestors, but inclusive
417 has = set(self.ancestors(common))
396 has = set(self.ancestors(common))
418 has.add(nullrev)
397 has.add(nullrev)
419 has.update(common)
398 has.update(common)
420
399
421 # take all ancestors from heads that aren't in has
400 # take all ancestors from heads that aren't in has
422 missing = set()
401 missing = set()
423 visit = util.deque(r for r in heads if r not in has)
402 visit = util.deque(r for r in heads if r not in has)
424 while visit:
403 while visit:
425 r = visit.popleft()
404 r = visit.popleft()
426 if r in missing:
405 if r in missing:
427 continue
406 continue
428 else:
407 else:
429 missing.add(r)
408 missing.add(r)
430 for p in self.parentrevs(r):
409 for p in self.parentrevs(r):
431 if p not in has:
410 if p not in has:
432 visit.append(p)
411 visit.append(p)
433 missing = list(missing)
412 missing = list(missing)
434 missing.sort()
413 missing.sort()
435 return has, [self.node(r) for r in missing]
414 return has, [self.node(r) for r in missing]
436
415
437 def findmissingrevs(self, common=None, heads=None):
416 def findmissingrevs(self, common=None, heads=None):
438 """Return the revision numbers of the ancestors of heads that
417 """Return the revision numbers of the ancestors of heads that
439 are not ancestors of common.
418 are not ancestors of common.
440
419
441 More specifically, return a list of revision numbers corresponding to
420 More specifically, return a list of revision numbers corresponding to
442 nodes N such that every N satisfies the following constraints:
421 nodes N such that every N satisfies the following constraints:
443
422
444 1. N is an ancestor of some node in 'heads'
423 1. N is an ancestor of some node in 'heads'
445 2. N is not an ancestor of any node in 'common'
424 2. N is not an ancestor of any node in 'common'
446
425
447 The list is sorted by revision number, meaning it is
426 The list is sorted by revision number, meaning it is
448 topologically sorted.
427 topologically sorted.
449
428
450 'heads' and 'common' are both lists of revision numbers. If heads is
429 'heads' and 'common' are both lists of revision numbers. If heads is
451 not supplied, uses all of the revlog's heads. If common is not
430 not supplied, uses all of the revlog's heads. If common is not
452 supplied, uses nullid."""
431 supplied, uses nullid."""
453 if common is None:
432 if common is None:
454 common = [nullrev]
433 common = [nullrev]
455 if heads is None:
434 if heads is None:
456 heads = self.headrevs()
435 heads = self.headrevs()
457
436
458 return ancestor.missingancestors(heads, common, self.parentrevs)
437 return ancestor.missingancestors(heads, common, self.parentrevs)
459
438
460 def findmissing(self, common=None, heads=None):
439 def findmissing(self, common=None, heads=None):
461 """Return the ancestors of heads that are not ancestors of common.
440 """Return the ancestors of heads that are not ancestors of common.
462
441
463 More specifically, return a list of nodes N such that every N
442 More specifically, return a list of nodes N such that every N
464 satisfies the following constraints:
443 satisfies the following constraints:
465
444
466 1. N is an ancestor of some node in 'heads'
445 1. N is an ancestor of some node in 'heads'
467 2. N is not an ancestor of any node in 'common'
446 2. N is not an ancestor of any node in 'common'
468
447
469 The list is sorted by revision number, meaning it is
448 The list is sorted by revision number, meaning it is
470 topologically sorted.
449 topologically sorted.
471
450
472 'heads' and 'common' are both lists of node IDs. If heads is
451 'heads' and 'common' are both lists of node IDs. If heads is
473 not supplied, uses all of the revlog's heads. If common is not
452 not supplied, uses all of the revlog's heads. If common is not
474 supplied, uses nullid."""
453 supplied, uses nullid."""
475 if common is None:
454 if common is None:
476 common = [nullid]
455 common = [nullid]
477 if heads is None:
456 if heads is None:
478 heads = self.heads()
457 heads = self.heads()
479
458
480 common = [self.rev(n) for n in common]
459 common = [self.rev(n) for n in common]
481 heads = [self.rev(n) for n in heads]
460 heads = [self.rev(n) for n in heads]
482
461
483 return [self.node(r) for r in
462 return [self.node(r) for r in
484 ancestor.missingancestors(heads, common, self.parentrevs)]
463 ancestor.missingancestors(heads, common, self.parentrevs)]
485
464
486 def nodesbetween(self, roots=None, heads=None):
465 def nodesbetween(self, roots=None, heads=None):
487 """Return a topological path from 'roots' to 'heads'.
466 """Return a topological path from 'roots' to 'heads'.
488
467
489 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
490 topologically sorted list of all nodes N that satisfy both of
469 topologically sorted list of all nodes N that satisfy both of
491 these constraints:
470 these constraints:
492
471
493 1. N is a descendant of some node in 'roots'
472 1. N is a descendant of some node in 'roots'
494 2. N is an ancestor of some node in 'heads'
473 2. N is an ancestor of some node in 'heads'
495
474
496 Every node is considered to be both a descendant and an ancestor
475 Every node is considered to be both a descendant and an ancestor
497 of itself, so every reachable node in 'roots' and 'heads' will be
476 of itself, so every reachable node in 'roots' and 'heads' will be
498 included in 'nodes'.
477 included in 'nodes'.
499
478
500 'outroots' is the list of reachable nodes in 'roots', i.e., the
479 'outroots' is the list of reachable nodes in 'roots', i.e., the
501 subset of 'roots' that is returned in 'nodes'. Likewise,
480 subset of 'roots' that is returned in 'nodes'. Likewise,
502 'outheads' is the subset of 'heads' that is also in 'nodes'.
481 'outheads' is the subset of 'heads' that is also in 'nodes'.
503
482
504 'roots' and 'heads' are both lists of node IDs. If 'roots' is
483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
505 unspecified, uses nullid as the only root. If 'heads' is
484 unspecified, uses nullid as the only root. If 'heads' is
506 unspecified, uses list of all of the revlog's heads."""
485 unspecified, uses list of all of the revlog's heads."""
507 nonodes = ([], [], [])
486 nonodes = ([], [], [])
508 if roots is not None:
487 if roots is not None:
509 roots = list(roots)
488 roots = list(roots)
510 if not roots:
489 if not roots:
511 return nonodes
490 return nonodes
512 lowestrev = min([self.rev(n) for n in roots])
491 lowestrev = min([self.rev(n) for n in roots])
513 else:
492 else:
514 roots = [nullid] # Everybody's a descendant of nullid
493 roots = [nullid] # Everybody's a descendant of nullid
515 lowestrev = nullrev
494 lowestrev = nullrev
516 if (lowestrev == nullrev) and (heads is None):
495 if (lowestrev == nullrev) and (heads is None):
517 # We want _all_ the nodes!
496 # We want _all_ the nodes!
518 return ([self.node(r) for r in self], [nullid], list(self.heads()))
497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
519 if heads is None:
498 if heads is None:
520 # All nodes are ancestors, so the latest ancestor is the last
499 # All nodes are ancestors, so the latest ancestor is the last
521 # node.
500 # node.
522 highestrev = len(self) - 1
501 highestrev = len(self) - 1
523 # Set ancestors to None to signal that every node is an ancestor.
502 # Set ancestors to None to signal that every node is an ancestor.
524 ancestors = None
503 ancestors = None
525 # Set heads to an empty dictionary for later discovery of heads
504 # Set heads to an empty dictionary for later discovery of heads
526 heads = {}
505 heads = {}
527 else:
506 else:
528 heads = list(heads)
507 heads = list(heads)
529 if not heads:
508 if not heads:
530 return nonodes
509 return nonodes
531 ancestors = set()
510 ancestors = set()
532 # Turn heads into a dictionary so we can remove 'fake' heads.
511 # Turn heads into a dictionary so we can remove 'fake' heads.
533 # Also, later we will be using it to filter out the heads we can't
512 # Also, later we will be using it to filter out the heads we can't
534 # find from roots.
513 # find from roots.
535 heads = dict.fromkeys(heads, False)
514 heads = dict.fromkeys(heads, False)
536 # Start at the top and keep marking parents until we're done.
515 # Start at the top and keep marking parents until we're done.
537 nodestotag = set(heads)
516 nodestotag = set(heads)
538 # Remember where the top was so we can use it as a limit later.
517 # Remember where the top was so we can use it as a limit later.
539 highestrev = max([self.rev(n) for n in nodestotag])
518 highestrev = max([self.rev(n) for n in nodestotag])
540 while nodestotag:
519 while nodestotag:
541 # grab a node to tag
520 # grab a node to tag
542 n = nodestotag.pop()
521 n = nodestotag.pop()
543 # Never tag nullid
522 # Never tag nullid
544 if n == nullid:
523 if n == nullid:
545 continue
524 continue
546 # A node's revision number represents its place in a
525 # A node's revision number represents its place in a
547 # topologically sorted list of nodes.
526 # topologically sorted list of nodes.
548 r = self.rev(n)
527 r = self.rev(n)
549 if r >= lowestrev:
528 if r >= lowestrev:
550 if n not in ancestors:
529 if n not in ancestors:
551 # If we are possibly a descendant of one of the roots
530 # If we are possibly a descendant of one of the roots
552 # and we haven't already been marked as an ancestor
531 # and we haven't already been marked as an ancestor
553 ancestors.add(n) # Mark as ancestor
532 ancestors.add(n) # Mark as ancestor
554 # Add non-nullid parents to list of nodes to tag.
533 # Add non-nullid parents to list of nodes to tag.
555 nodestotag.update([p for p in self.parents(n) if
534 nodestotag.update([p for p in self.parents(n) if
556 p != nullid])
535 p != nullid])
557 elif n in heads: # We've seen it before, is it a fake head?
536 elif n in heads: # We've seen it before, is it a fake head?
558 # So it is, real heads should not be the ancestors of
537 # So it is, real heads should not be the ancestors of
559 # any other heads.
538 # any other heads.
560 heads.pop(n)
539 heads.pop(n)
561 if not ancestors:
540 if not ancestors:
562 return nonodes
541 return nonodes
563 # Now that we have our set of ancestors, we want to remove any
542 # Now that we have our set of ancestors, we want to remove any
564 # roots that are not ancestors.
543 # roots that are not ancestors.
565
544
566 # If one of the roots was nullid, everything is included anyway.
545 # If one of the roots was nullid, everything is included anyway.
567 if lowestrev > nullrev:
546 if lowestrev > nullrev:
568 # But, since we weren't, let's recompute the lowest rev to not
547 # But, since we weren't, let's recompute the lowest rev to not
569 # include roots that aren't ancestors.
548 # include roots that aren't ancestors.
570
549
571 # Filter out roots that aren't ancestors of heads
550 # Filter out roots that aren't ancestors of heads
572 roots = [n for n in roots if n in ancestors]
551 roots = [n for n in roots if n in ancestors]
573 # Recompute the lowest revision
552 # Recompute the lowest revision
574 if roots:
553 if roots:
575 lowestrev = min([self.rev(n) for n in roots])
554 lowestrev = min([self.rev(n) for n in roots])
576 else:
555 else:
577 # No more roots? Return empty list
556 # No more roots? Return empty list
578 return nonodes
557 return nonodes
579 else:
558 else:
580 # We are descending from nullid, and don't need to care about
559 # We are descending from nullid, and don't need to care about
581 # any other roots.
560 # any other roots.
582 lowestrev = nullrev
561 lowestrev = nullrev
583 roots = [nullid]
562 roots = [nullid]
584 # Transform our roots list into a set.
563 # Transform our roots list into a set.
585 descendants = set(roots)
564 descendants = set(roots)
586 # Also, keep the original roots so we can filter out roots that aren't
565 # Also, keep the original roots so we can filter out roots that aren't
587 # 'real' roots (i.e. are descended from other roots).
566 # 'real' roots (i.e. are descended from other roots).
588 roots = descendants.copy()
567 roots = descendants.copy()
589 # Our topologically sorted list of output nodes.
568 # Our topologically sorted list of output nodes.
590 orderedout = []
569 orderedout = []
591 # Don't start at nullid since we don't want nullid in our output list,
570 # Don't start at nullid since we don't want nullid in our output list,
592 # and if nullid shows up in descendants, empty parents will look like
571 # and if nullid shows up in descendants, empty parents will look like
593 # they're descendants.
572 # they're descendants.
594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
573 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
595 n = self.node(r)
574 n = self.node(r)
596 isdescendant = False
575 isdescendant = False
597 if lowestrev == nullrev: # Everybody is a descendant of nullid
576 if lowestrev == nullrev: # Everybody is a descendant of nullid
598 isdescendant = True
577 isdescendant = True
599 elif n in descendants:
578 elif n in descendants:
600 # n is already a descendant
579 # n is already a descendant
601 isdescendant = True
580 isdescendant = True
602 # This check only needs to be done here because all the roots
581 # This check only needs to be done here because all the roots
603 # will start being marked is descendants before the loop.
582 # will start being marked is descendants before the loop.
604 if n in roots:
583 if n in roots:
605 # If n was a root, check if it's a 'real' root.
584 # If n was a root, check if it's a 'real' root.
606 p = tuple(self.parents(n))
585 p = tuple(self.parents(n))
607 # If any of its parents are descendants, it's not a root.
586 # If any of its parents are descendants, it's not a root.
608 if (p[0] in descendants) or (p[1] in descendants):
587 if (p[0] in descendants) or (p[1] in descendants):
609 roots.remove(n)
588 roots.remove(n)
610 else:
589 else:
611 p = tuple(self.parents(n))
590 p = tuple(self.parents(n))
612 # A node is a descendant if either of its parents are
591 # A node is a descendant if either of its parents are
613 # descendants. (We seeded the dependents list with the roots
592 # descendants. (We seeded the dependents list with the roots
614 # up there, remember?)
593 # up there, remember?)
615 if (p[0] in descendants) or (p[1] in descendants):
594 if (p[0] in descendants) or (p[1] in descendants):
616 descendants.add(n)
595 descendants.add(n)
617 isdescendant = True
596 isdescendant = True
618 if isdescendant and ((ancestors is None) or (n in ancestors)):
597 if isdescendant and ((ancestors is None) or (n in ancestors)):
619 # Only include nodes that are both descendants and ancestors.
598 # Only include nodes that are both descendants and ancestors.
620 orderedout.append(n)
599 orderedout.append(n)
621 if (ancestors is not None) and (n in heads):
600 if (ancestors is not None) and (n in heads):
622 # We're trying to figure out which heads are reachable
601 # We're trying to figure out which heads are reachable
623 # from roots.
602 # from roots.
624 # Mark this head as having been reached
603 # Mark this head as having been reached
625 heads[n] = True
604 heads[n] = True
626 elif ancestors is None:
605 elif ancestors is None:
627 # Otherwise, we're trying to discover the heads.
606 # Otherwise, we're trying to discover the heads.
628 # Assume this is a head because if it isn't, the next step
607 # Assume this is a head because if it isn't, the next step
629 # will eventually remove it.
608 # will eventually remove it.
630 heads[n] = True
609 heads[n] = True
631 # But, obviously its parents aren't.
610 # But, obviously its parents aren't.
632 for p in self.parents(n):
611 for p in self.parents(n):
633 heads.pop(p, None)
612 heads.pop(p, None)
634 heads = [n for n, flag in heads.iteritems() if flag]
613 heads = [n for n, flag in heads.iteritems() if flag]
635 roots = list(roots)
614 roots = list(roots)
636 assert orderedout
615 assert orderedout
637 assert roots
616 assert roots
638 assert heads
617 assert heads
639 return (orderedout, roots, heads)
618 return (orderedout, roots, heads)
640
619
641 def headrevs(self):
620 def headrevs(self):
642 try:
621 try:
643 return self.index.headrevs()
622 return self.index.headrevs()
644 except AttributeError:
623 except AttributeError:
645 return self._headrevs()
624 return self._headrevs()
646
625
647 def _headrevs(self):
626 def _headrevs(self):
648 count = len(self)
627 count = len(self)
649 if not count:
628 if not count:
650 return [nullrev]
629 return [nullrev]
651 # we won't iter over filtered rev so nobody is a head at start
630 # we won't iter over filtered rev so nobody is a head at start
652 ishead = [0] * (count + 1)
631 ishead = [0] * (count + 1)
653 index = self.index
632 index = self.index
654 for r in self:
633 for r in self:
655 ishead[r] = 1 # I may be an head
634 ishead[r] = 1 # I may be an head
656 e = index[r]
635 e = index[r]
657 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
636 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
658 return [r for r, val in enumerate(ishead) if val]
637 return [r for r, val in enumerate(ishead) if val]
659
638
660 def heads(self, start=None, stop=None):
639 def heads(self, start=None, stop=None):
661 """return the list of all nodes that have no children
640 """return the list of all nodes that have no children
662
641
663 if start is specified, only heads that are descendants of
642 if start is specified, only heads that are descendants of
664 start will be returned
643 start will be returned
665 if stop is specified, it will consider all the revs from stop
644 if stop is specified, it will consider all the revs from stop
666 as if they had no children
645 as if they had no children
667 """
646 """
668 if start is None and stop is None:
647 if start is None and stop is None:
669 if not len(self):
648 if not len(self):
670 return [nullid]
649 return [nullid]
671 return [self.node(r) for r in self.headrevs()]
650 return [self.node(r) for r in self.headrevs()]
672
651
673 if start is None:
652 if start is None:
674 start = nullid
653 start = nullid
675 if stop is None:
654 if stop is None:
676 stop = []
655 stop = []
677 stoprevs = set([self.rev(n) for n in stop])
656 stoprevs = set([self.rev(n) for n in stop])
678 startrev = self.rev(start)
657 startrev = self.rev(start)
679 reachable = set((startrev,))
658 reachable = set((startrev,))
680 heads = set((startrev,))
659 heads = set((startrev,))
681
660
682 parentrevs = self.parentrevs
661 parentrevs = self.parentrevs
683 for r in self.revs(start=startrev + 1):
662 for r in self.revs(start=startrev + 1):
684 for p in parentrevs(r):
663 for p in parentrevs(r):
685 if p in reachable:
664 if p in reachable:
686 if r not in stoprevs:
665 if r not in stoprevs:
687 reachable.add(r)
666 reachable.add(r)
688 heads.add(r)
667 heads.add(r)
689 if p in heads and p not in stoprevs:
668 if p in heads and p not in stoprevs:
690 heads.remove(p)
669 heads.remove(p)
691
670
692 return [self.node(r) for r in heads]
671 return [self.node(r) for r in heads]
693
672
694 def children(self, node):
673 def children(self, node):
695 """find the children of a given node"""
674 """find the children of a given node"""
696 c = []
675 c = []
697 p = self.rev(node)
676 p = self.rev(node)
698 for r in self.revs(start=p + 1):
677 for r in self.revs(start=p + 1):
699 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
678 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
700 if prevs:
679 if prevs:
701 for pr in prevs:
680 for pr in prevs:
702 if pr == p:
681 if pr == p:
703 c.append(self.node(r))
682 c.append(self.node(r))
704 elif p == nullrev:
683 elif p == nullrev:
705 c.append(self.node(r))
684 c.append(self.node(r))
706 return c
685 return c
707
686
708 def descendant(self, start, end):
687 def descendant(self, start, end):
709 if start == nullrev:
688 if start == nullrev:
710 return True
689 return True
711 for i in self.descendants([start]):
690 for i in self.descendants([start]):
712 if i == end:
691 if i == end:
713 return True
692 return True
714 elif i > end:
693 elif i > end:
715 break
694 break
716 return False
695 return False
717
696
718 def ancestor(self, a, b):
697 def ancestor(self, a, b):
719 """calculate the least common ancestor of nodes a and b"""
698 """calculate the least common ancestor of nodes a and b"""
720
699
721 # fast path, check if it is a descendant
700 # fast path, check if it is a descendant
722 a, b = self.rev(a), self.rev(b)
701 a, b = self.rev(a), self.rev(b)
723 start, end = sorted((a, b))
702 start, end = sorted((a, b))
724 if self.descendant(start, end):
703 if self.descendant(start, end):
725 return self.node(start)
704 return self.node(start)
726
705
727 def parents(rev):
706 def parents(rev):
728 return [p for p in self.parentrevs(rev) if p != nullrev]
707 return [p for p in self.parentrevs(rev) if p != nullrev]
729
708
730 c = ancestor.ancestor(a, b, parents)
709 c = ancestor.ancestor(a, b, parents)
731 if c is None:
710 if c is None:
732 return nullid
711 return nullid
733
712
734 return self.node(c)
713 return self.node(c)
735
714
736 def _match(self, id):
715 def _match(self, id):
737 if isinstance(id, int):
716 if isinstance(id, int):
738 # rev
717 # rev
739 return self.node(id)
718 return self.node(id)
740 if len(id) == 20:
719 if len(id) == 20:
741 # possibly a binary node
720 # possibly a binary node
742 # odds of a binary node being all hex in ASCII are 1 in 10**25
721 # odds of a binary node being all hex in ASCII are 1 in 10**25
743 try:
722 try:
744 node = id
723 node = id
745 self.rev(node) # quick search the index
724 self.rev(node) # quick search the index
746 return node
725 return node
747 except LookupError:
726 except LookupError:
748 pass # may be partial hex id
727 pass # may be partial hex id
749 try:
728 try:
750 # str(rev)
729 # str(rev)
751 rev = int(id)
730 rev = int(id)
752 if str(rev) != id:
731 if str(rev) != id:
753 raise ValueError
732 raise ValueError
754 if rev < 0:
733 if rev < 0:
755 rev = len(self) + rev
734 rev = len(self) + rev
756 if rev < 0 or rev >= len(self):
735 if rev < 0 or rev >= len(self):
757 raise ValueError
736 raise ValueError
758 return self.node(rev)
737 return self.node(rev)
759 except (ValueError, OverflowError):
738 except (ValueError, OverflowError):
760 pass
739 pass
761 if len(id) == 40:
740 if len(id) == 40:
762 try:
741 try:
763 # a full hex nodeid?
742 # a full hex nodeid?
764 node = bin(id)
743 node = bin(id)
765 self.rev(node)
744 self.rev(node)
766 return node
745 return node
767 except (TypeError, LookupError):
746 except (TypeError, LookupError):
768 pass
747 pass
769
748
770 def _partialmatch(self, id):
749 def _partialmatch(self, id):
771 try:
750 try:
772 return self.index.partialmatch(id)
751 return self.index.partialmatch(id)
773 except RevlogError:
752 except RevlogError:
774 # parsers.c radix tree lookup gave multiple matches
753 # parsers.c radix tree lookup gave multiple matches
775 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
754 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
776 except (AttributeError, ValueError):
755 except (AttributeError, ValueError):
777 # we are pure python, or key was too short to search radix tree
756 # we are pure python, or key was too short to search radix tree
778 pass
757 pass
779
758
780 if id in self._pcache:
759 if id in self._pcache:
781 return self._pcache[id]
760 return self._pcache[id]
782
761
783 if len(id) < 40:
762 if len(id) < 40:
784 try:
763 try:
785 # hex(node)[:...]
764 # hex(node)[:...]
786 l = len(id) // 2 # grab an even number of digits
765 l = len(id) // 2 # grab an even number of digits
787 prefix = bin(id[:l * 2])
766 prefix = bin(id[:l * 2])
788 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
789 nl = [n for n in nl if hex(n).startswith(id)]
768 nl = [n for n in nl if hex(n).startswith(id)]
790 if len(nl) > 0:
769 if len(nl) > 0:
791 if len(nl) == 1:
770 if len(nl) == 1:
792 self._pcache[id] = nl[0]
771 self._pcache[id] = nl[0]
793 return nl[0]
772 return nl[0]
794 raise LookupError(id, self.indexfile,
773 raise LookupError(id, self.indexfile,
795 _('ambiguous identifier'))
774 _('ambiguous identifier'))
796 return None
775 return None
797 except TypeError:
776 except TypeError:
798 pass
777 pass
799
778
800 def lookup(self, id):
779 def lookup(self, id):
801 """locate a node based on:
780 """locate a node based on:
802 - revision number or str(revision number)
781 - revision number or str(revision number)
803 - nodeid or subset of hex nodeid
782 - nodeid or subset of hex nodeid
804 """
783 """
805 n = self._match(id)
784 n = self._match(id)
806 if n is not None:
785 if n is not None:
807 return n
786 return n
808 n = self._partialmatch(id)
787 n = self._partialmatch(id)
809 if n:
788 if n:
810 return n
789 return n
811
790
812 raise LookupError(id, self.indexfile, _('no match found'))
791 raise LookupError(id, self.indexfile, _('no match found'))
813
792
814 def cmp(self, node, text):
793 def cmp(self, node, text):
815 """compare text with a given file revision
794 """compare text with a given file revision
816
795
817 returns True if text is different than what is stored.
796 returns True if text is different than what is stored.
818 """
797 """
819 p1, p2 = self.parents(node)
798 p1, p2 = self.parents(node)
820 return hash(text, p1, p2) != node
799 return hash(text, p1, p2) != node
821
800
822 def _addchunk(self, offset, data):
801 def _addchunk(self, offset, data):
823 o, d = self._chunkcache
802 o, d = self._chunkcache
824 # try to add to existing cache
803 # try to add to existing cache
825 if o + len(d) == offset and len(d) + len(data) < _chunksize:
804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
826 self._chunkcache = o, d + data
805 self._chunkcache = o, d + data
827 else:
806 else:
828 self._chunkcache = offset, data
807 self._chunkcache = offset, data
829
808
830 def _loadchunk(self, offset, length):
809 def _loadchunk(self, offset, length):
831 if self._inline:
810 if self._inline:
832 df = self.opener(self.indexfile)
811 df = self.opener(self.indexfile)
833 else:
812 else:
834 df = self.opener(self.datafile)
813 df = self.opener(self.datafile)
835
814
836 readahead = max(65536, length)
815 readahead = max(65536, length)
837 df.seek(offset)
816 df.seek(offset)
838 d = df.read(readahead)
817 d = df.read(readahead)
839 df.close()
818 df.close()
840 self._addchunk(offset, d)
819 self._addchunk(offset, d)
841 if readahead > length:
820 if readahead > length:
842 return util.buffer(d, 0, length)
821 return util.buffer(d, 0, length)
843 return d
822 return d
844
823
845 def _getchunk(self, offset, length):
824 def _getchunk(self, offset, length):
846 o, d = self._chunkcache
825 o, d = self._chunkcache
847 l = len(d)
826 l = len(d)
848
827
849 # is it in the cache?
828 # is it in the cache?
850 cachestart = offset - o
829 cachestart = offset - o
851 cacheend = cachestart + length
830 cacheend = cachestart + length
852 if cachestart >= 0 and cacheend <= l:
831 if cachestart >= 0 and cacheend <= l:
853 if cachestart == 0 and cacheend == l:
832 if cachestart == 0 and cacheend == l:
854 return d # avoid a copy
833 return d # avoid a copy
855 return util.buffer(d, cachestart, cacheend - cachestart)
834 return util.buffer(d, cachestart, cacheend - cachestart)
856
835
857 return self._loadchunk(offset, length)
836 return self._loadchunk(offset, length)
858
837
859 def _chunkraw(self, startrev, endrev):
838 def _chunkraw(self, startrev, endrev):
860 start = self.start(startrev)
839 start = self.start(startrev)
861 length = self.end(endrev) - start
840 length = self.end(endrev) - start
862 if self._inline:
841 if self._inline:
863 start += (startrev + 1) * self._io.size
842 start += (startrev + 1) * self._io.size
864 return self._getchunk(start, length)
843 return self._getchunk(start, length)
865
844
866 def _chunk(self, rev):
845 def _chunk(self, rev):
867 return decompress(self._chunkraw(rev, rev))
846 return decompress(self._chunkraw(rev, rev))
868
847
869 def _chunkbase(self, rev):
848 def _chunkbase(self, rev):
870 return self._chunk(rev)
849 return self._chunk(rev)
871
850
872 def _chunkclear(self):
851 def _chunkclear(self):
873 self._chunkcache = (0, '')
852 self._chunkcache = (0, '')
874
853
875 def deltaparent(self, rev):
854 def deltaparent(self, rev):
876 """return deltaparent of the given revision"""
855 """return deltaparent of the given revision"""
877 base = self.index[rev][3]
856 base = self.index[rev][3]
878 if base == rev:
857 if base == rev:
879 return nullrev
858 return nullrev
880 elif self._generaldelta:
859 elif self._generaldelta:
881 return base
860 return base
882 else:
861 else:
883 return rev - 1
862 return rev - 1
884
863
885 def revdiff(self, rev1, rev2):
864 def revdiff(self, rev1, rev2):
886 """return or calculate a delta between two revisions"""
865 """return or calculate a delta between two revisions"""
887 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
888 return str(self._chunk(rev2))
867 return str(self._chunk(rev2))
889
868
890 return mdiff.textdiff(self.revision(rev1),
869 return mdiff.textdiff(self.revision(rev1),
891 self.revision(rev2))
870 self.revision(rev2))
892
871
893 def revision(self, nodeorrev):
872 def revision(self, nodeorrev):
894 """return an uncompressed revision of a given node or revision
873 """return an uncompressed revision of a given node or revision
895 number.
874 number.
896 """
875 """
897 if isinstance(nodeorrev, int):
876 if isinstance(nodeorrev, int):
898 rev = nodeorrev
877 rev = nodeorrev
899 node = self.node(rev)
878 node = self.node(rev)
900 else:
879 else:
901 node = nodeorrev
880 node = nodeorrev
902 rev = None
881 rev = None
903
882
904 cachedrev = None
883 cachedrev = None
905 if node == nullid:
884 if node == nullid:
906 return ""
885 return ""
907 if self._cache:
886 if self._cache:
908 if self._cache[0] == node:
887 if self._cache[0] == node:
909 return self._cache[2]
888 return self._cache[2]
910 cachedrev = self._cache[1]
889 cachedrev = self._cache[1]
911
890
912 # look up what we need to read
891 # look up what we need to read
913 text = None
892 text = None
914 if rev is None:
893 if rev is None:
915 rev = self.rev(node)
894 rev = self.rev(node)
916
895
917 # check rev flags
896 # check rev flags
918 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
897 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
919 raise RevlogError(_('incompatible revision flag %x') %
898 raise RevlogError(_('incompatible revision flag %x') %
920 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
899 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
921
900
922 # build delta chain
901 # build delta chain
923 chain = []
902 chain = []
924 index = self.index # for performance
903 index = self.index # for performance
925 generaldelta = self._generaldelta
904 generaldelta = self._generaldelta
926 iterrev = rev
905 iterrev = rev
927 e = index[iterrev]
906 e = index[iterrev]
928 while iterrev != e[3] and iterrev != cachedrev:
907 while iterrev != e[3] and iterrev != cachedrev:
929 chain.append(iterrev)
908 chain.append(iterrev)
930 if generaldelta:
909 if generaldelta:
931 iterrev = e[3]
910 iterrev = e[3]
932 else:
911 else:
933 iterrev -= 1
912 iterrev -= 1
934 e = index[iterrev]
913 e = index[iterrev]
935 chain.reverse()
914 chain.reverse()
936 base = iterrev
915 base = iterrev
937
916
938 if iterrev == cachedrev:
917 if iterrev == cachedrev:
939 # cache hit
918 # cache hit
940 text = self._cache[2]
919 text = self._cache[2]
941
920
942 # drop cache to save memory
921 # drop cache to save memory
943 self._cache = None
922 self._cache = None
944
923
945 self._chunkraw(base, rev)
924 self._chunkraw(base, rev)
946 if text is None:
925 if text is None:
947 text = str(self._chunkbase(base))
926 text = str(self._chunkbase(base))
948
927
949 bins = [self._chunk(r) for r in chain]
928 bins = [self._chunk(r) for r in chain]
950 text = mdiff.patches(text, bins)
929 text = mdiff.patches(text, bins)
951
930
952 text = self._checkhash(text, node, rev)
931 text = self._checkhash(text, node, rev)
953
932
954 self._cache = (node, rev, text)
933 self._cache = (node, rev, text)
955 return text
934 return text
956
935
957 def _checkhash(self, text, node, rev):
936 def _checkhash(self, text, node, rev):
958 p1, p2 = self.parents(node)
937 p1, p2 = self.parents(node)
959 if node != hash(text, p1, p2):
938 if node != hash(text, p1, p2):
960 raise RevlogError(_("integrity check failed on %s:%d")
939 raise RevlogError(_("integrity check failed on %s:%d")
961 % (self.indexfile, rev))
940 % (self.indexfile, rev))
962 return text
941 return text
963
942
964 def checkinlinesize(self, tr, fp=None):
943 def checkinlinesize(self, tr, fp=None):
965 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
944 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
966 return
945 return
967
946
968 trinfo = tr.find(self.indexfile)
947 trinfo = tr.find(self.indexfile)
969 if trinfo is None:
948 if trinfo is None:
970 raise RevlogError(_("%s not found in the transaction")
949 raise RevlogError(_("%s not found in the transaction")
971 % self.indexfile)
950 % self.indexfile)
972
951
973 trindex = trinfo[2]
952 trindex = trinfo[2]
974 dataoff = self.start(trindex)
953 dataoff = self.start(trindex)
975
954
976 tr.add(self.datafile, dataoff)
955 tr.add(self.datafile, dataoff)
977
956
978 if fp:
957 if fp:
979 fp.flush()
958 fp.flush()
980 fp.close()
959 fp.close()
981
960
982 df = self.opener(self.datafile, 'w')
961 df = self.opener(self.datafile, 'w')
983 try:
962 try:
984 for r in self:
963 for r in self:
985 df.write(self._chunkraw(r, r))
964 df.write(self._chunkraw(r, r))
986 finally:
965 finally:
987 df.close()
966 df.close()
988
967
989 fp = self.opener(self.indexfile, 'w', atomictemp=True)
968 fp = self.opener(self.indexfile, 'w', atomictemp=True)
990 self.version &= ~(REVLOGNGINLINEDATA)
969 self.version &= ~(REVLOGNGINLINEDATA)
991 self._inline = False
970 self._inline = False
992 for i in self:
971 for i in self:
993 e = self._io.packentry(self.index[i], self.node, self.version, i)
972 e = self._io.packentry(self.index[i], self.node, self.version, i)
994 fp.write(e)
973 fp.write(e)
995
974
996 # if we don't call close, the temp file will never replace the
975 # if we don't call close, the temp file will never replace the
997 # real index
976 # real index
998 fp.close()
977 fp.close()
999
978
1000 tr.replace(self.indexfile, trindex * self._io.size)
979 tr.replace(self.indexfile, trindex * self._io.size)
1001 self._chunkclear()
980 self._chunkclear()
1002
981
1003 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
982 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1004 """add a revision to the log
983 """add a revision to the log
1005
984
1006 text - the revision data to add
985 text - the revision data to add
1007 transaction - the transaction object used for rollback
986 transaction - the transaction object used for rollback
1008 link - the linkrev data to add
987 link - the linkrev data to add
1009 p1, p2 - the parent nodeids of the revision
988 p1, p2 - the parent nodeids of the revision
1010 cachedelta - an optional precomputed delta
989 cachedelta - an optional precomputed delta
1011 """
990 """
1012 node = hash(text, p1, p2)
991 node = hash(text, p1, p2)
1013 if node in self.nodemap:
992 if node in self.nodemap:
1014 return node
993 return node
1015
994
1016 dfh = None
995 dfh = None
1017 if not self._inline:
996 if not self._inline:
1018 dfh = self.opener(self.datafile, "a")
997 dfh = self.opener(self.datafile, "a")
1019 ifh = self.opener(self.indexfile, "a+")
998 ifh = self.opener(self.indexfile, "a+")
1020 try:
999 try:
1021 return self._addrevision(node, text, transaction, link, p1, p2,
1000 return self._addrevision(node, text, transaction, link, p1, p2,
1022 cachedelta, ifh, dfh)
1001 cachedelta, ifh, dfh)
1023 finally:
1002 finally:
1024 if dfh:
1003 if dfh:
1025 dfh.close()
1004 dfh.close()
1026 ifh.close()
1005 ifh.close()
1027
1006
1028 def compress(self, text):
1007 def compress(self, text):
1029 """ generate a possibly-compressed representation of text """
1008 """ generate a possibly-compressed representation of text """
1030 if not text:
1009 if not text:
1031 return ("", text)
1010 return ("", text)
1032 l = len(text)
1011 l = len(text)
1033 bin = None
1012 bin = None
1034 if l < 44:
1013 if l < 44:
1035 pass
1014 pass
1036 elif l > 1000000:
1015 elif l > 1000000:
1037 # zlib makes an internal copy, thus doubling memory usage for
1016 # zlib makes an internal copy, thus doubling memory usage for
1038 # large files, so lets do this in pieces
1017 # large files, so lets do this in pieces
1039 z = zlib.compressobj()
1018 z = zlib.compressobj()
1040 p = []
1019 p = []
1041 pos = 0
1020 pos = 0
1042 while pos < l:
1021 while pos < l:
1043 pos2 = pos + 2**20
1022 pos2 = pos + 2**20
1044 p.append(z.compress(text[pos:pos2]))
1023 p.append(z.compress(text[pos:pos2]))
1045 pos = pos2
1024 pos = pos2
1046 p.append(z.flush())
1025 p.append(z.flush())
1047 if sum(map(len, p)) < l:
1026 if sum(map(len, p)) < l:
1048 bin = "".join(p)
1027 bin = "".join(p)
1049 else:
1028 else:
1050 bin = _compress(text)
1029 bin = _compress(text)
1051 if bin is None or len(bin) > l:
1030 if bin is None or len(bin) > l:
1052 if text[0] == '\0':
1031 if text[0] == '\0':
1053 return ("", text)
1032 return ("", text)
1054 return ('u', text)
1033 return ('u', text)
1055 return ("", bin)
1034 return ("", bin)
1056
1035
1057 def _addrevision(self, node, text, transaction, link, p1, p2,
1036 def _addrevision(self, node, text, transaction, link, p1, p2,
1058 cachedelta, ifh, dfh):
1037 cachedelta, ifh, dfh):
1059 """internal function to add revisions to the log
1038 """internal function to add revisions to the log
1060
1039
1061 see addrevision for argument descriptions.
1040 see addrevision for argument descriptions.
1062 invariants:
1041 invariants:
1063 - text is optional (can be None); if not set, cachedelta must be set.
1042 - text is optional (can be None); if not set, cachedelta must be set.
1064 if both are set, they must correspond to each other.
1043 if both are set, they must correspond to each other.
1065 """
1044 """
1066 btext = [text]
1045 btext = [text]
1067 def buildtext():
1046 def buildtext():
1068 if btext[0] is not None:
1047 if btext[0] is not None:
1069 return btext[0]
1048 return btext[0]
1070 # flush any pending writes here so we can read it in revision
1049 # flush any pending writes here so we can read it in revision
1071 if dfh:
1050 if dfh:
1072 dfh.flush()
1051 dfh.flush()
1073 ifh.flush()
1052 ifh.flush()
1074 basetext = self.revision(self.node(cachedelta[0]))
1053 basetext = self.revision(self.node(cachedelta[0]))
1075 btext[0] = mdiff.patch(basetext, cachedelta[1])
1054 btext[0] = mdiff.patch(basetext, cachedelta[1])
1076 chk = hash(btext[0], p1, p2)
1055 chk = hash(btext[0], p1, p2)
1077 if chk != node:
1056 if chk != node:
1078 raise RevlogError(_("consistency error in delta"))
1057 raise RevlogError(_("consistency error in delta"))
1079 return btext[0]
1058 return btext[0]
1080
1059
1081 def builddelta(rev):
1060 def builddelta(rev):
1082 # can we use the cached delta?
1061 # can we use the cached delta?
1083 if cachedelta and cachedelta[0] == rev:
1062 if cachedelta and cachedelta[0] == rev:
1084 delta = cachedelta[1]
1063 delta = cachedelta[1]
1085 else:
1064 else:
1086 t = buildtext()
1065 t = buildtext()
1087 ptext = self.revision(self.node(rev))
1066 ptext = self.revision(self.node(rev))
1088 delta = mdiff.textdiff(ptext, t)
1067 delta = mdiff.textdiff(ptext, t)
1089 data = self.compress(delta)
1068 data = self.compress(delta)
1090 l = len(data[1]) + len(data[0])
1069 l = len(data[1]) + len(data[0])
1091 if basecache[0] == rev:
1070 if basecache[0] == rev:
1092 chainbase = basecache[1]
1071 chainbase = basecache[1]
1093 else:
1072 else:
1094 chainbase = self.chainbase(rev)
1073 chainbase = self.chainbase(rev)
1095 dist = l + offset - self.start(chainbase)
1074 dist = l + offset - self.start(chainbase)
1096 if self._generaldelta:
1075 if self._generaldelta:
1097 base = rev
1076 base = rev
1098 else:
1077 else:
1099 base = chainbase
1078 base = chainbase
1100 return dist, l, data, base, chainbase
1079 return dist, l, data, base, chainbase
1101
1080
1102 curr = len(self)
1081 curr = len(self)
1103 prev = curr - 1
1082 prev = curr - 1
1104 base = chainbase = curr
1083 base = chainbase = curr
1105 offset = self.end(prev)
1084 offset = self.end(prev)
1106 flags = 0
1085 flags = 0
1107 d = None
1086 d = None
1108 basecache = self._basecache
1087 basecache = self._basecache
1109 p1r, p2r = self.rev(p1), self.rev(p2)
1088 p1r, p2r = self.rev(p1), self.rev(p2)
1110
1089
1111 # should we try to build a delta?
1090 # should we try to build a delta?
1112 if prev != nullrev:
1091 if prev != nullrev:
1113 if self._generaldelta:
1092 if self._generaldelta:
1114 if p1r >= basecache[1]:
1093 if p1r >= basecache[1]:
1115 d = builddelta(p1r)
1094 d = builddelta(p1r)
1116 elif p2r >= basecache[1]:
1095 elif p2r >= basecache[1]:
1117 d = builddelta(p2r)
1096 d = builddelta(p2r)
1118 else:
1097 else:
1119 d = builddelta(prev)
1098 d = builddelta(prev)
1120 else:
1099 else:
1121 d = builddelta(prev)
1100 d = builddelta(prev)
1122 dist, l, data, base, chainbase = d
1101 dist, l, data, base, chainbase = d
1123
1102
1124 # full versions are inserted when the needed deltas
1103 # full versions are inserted when the needed deltas
1125 # become comparable to the uncompressed text
1104 # become comparable to the uncompressed text
1126 if text is None:
1105 if text is None:
1127 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1106 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1128 cachedelta[1])
1107 cachedelta[1])
1129 else:
1108 else:
1130 textlen = len(text)
1109 textlen = len(text)
1131 if d is None or dist > textlen * 2:
1110 if d is None or dist > textlen * 2:
1132 text = buildtext()
1111 text = buildtext()
1133 data = self.compress(text)
1112 data = self.compress(text)
1134 l = len(data[1]) + len(data[0])
1113 l = len(data[1]) + len(data[0])
1135 base = chainbase = curr
1114 base = chainbase = curr
1136
1115
1137 e = (offset_type(offset, flags), l, textlen,
1116 e = (offset_type(offset, flags), l, textlen,
1138 base, link, p1r, p2r, node)
1117 base, link, p1r, p2r, node)
1139 self.index.insert(-1, e)
1118 self.index.insert(-1, e)
1140 self.nodemap[node] = curr
1119 self.nodemap[node] = curr
1141
1120
1142 entry = self._io.packentry(e, self.node, self.version, curr)
1121 entry = self._io.packentry(e, self.node, self.version, curr)
1143 if not self._inline:
1122 if not self._inline:
1144 transaction.add(self.datafile, offset)
1123 transaction.add(self.datafile, offset)
1145 transaction.add(self.indexfile, curr * len(entry))
1124 transaction.add(self.indexfile, curr * len(entry))
1146 if data[0]:
1125 if data[0]:
1147 dfh.write(data[0])
1126 dfh.write(data[0])
1148 dfh.write(data[1])
1127 dfh.write(data[1])
1149 dfh.flush()
1128 dfh.flush()
1150 ifh.write(entry)
1129 ifh.write(entry)
1151 else:
1130 else:
1152 offset += curr * self._io.size
1131 offset += curr * self._io.size
1153 transaction.add(self.indexfile, offset, curr)
1132 transaction.add(self.indexfile, offset, curr)
1154 ifh.write(entry)
1133 ifh.write(entry)
1155 ifh.write(data[0])
1134 ifh.write(data[0])
1156 ifh.write(data[1])
1135 ifh.write(data[1])
1157 self.checkinlinesize(transaction, ifh)
1136 self.checkinlinesize(transaction, ifh)
1158
1137
1159 if type(text) == str: # only accept immutable objects
1138 if type(text) == str: # only accept immutable objects
1160 self._cache = (node, curr, text)
1139 self._cache = (node, curr, text)
1161 self._basecache = (curr, chainbase)
1140 self._basecache = (curr, chainbase)
1162 return node
1141 return node
1163
1142
1164 def group(self, nodelist, bundler, reorder=None):
1143 def group(self, nodelist, bundler, reorder=None):
1165 """Calculate a delta group, yielding a sequence of changegroup chunks
1144 """Calculate a delta group, yielding a sequence of changegroup chunks
1166 (strings).
1145 (strings).
1167
1146
1168 Given a list of changeset revs, return a set of deltas and
1147 Given a list of changeset revs, return a set of deltas and
1169 metadata corresponding to nodes. The first delta is
1148 metadata corresponding to nodes. The first delta is
1170 first parent(nodelist[0]) -> nodelist[0], the receiver is
1149 first parent(nodelist[0]) -> nodelist[0], the receiver is
1171 guaranteed to have this parent as it has all history before
1150 guaranteed to have this parent as it has all history before
1172 these changesets. In the case firstparent is nullrev the
1151 these changesets. In the case firstparent is nullrev the
1173 changegroup starts with a full revision.
1152 changegroup starts with a full revision.
1174 """
1153 """
1175
1154
1176 # if we don't have any revisions touched by these changesets, bail
1155 # if we don't have any revisions touched by these changesets, bail
1177 if len(nodelist) == 0:
1156 if len(nodelist) == 0:
1178 yield bundler.close()
1157 yield bundler.close()
1179 return
1158 return
1180
1159
1181 # for generaldelta revlogs, we linearize the revs; this will both be
1160 # for generaldelta revlogs, we linearize the revs; this will both be
1182 # much quicker and generate a much smaller bundle
1161 # much quicker and generate a much smaller bundle
1183 if (self._generaldelta and reorder is not False) or reorder:
1162 if (self._generaldelta and reorder is not False) or reorder:
1184 dag = dagutil.revlogdag(self)
1163 dag = dagutil.revlogdag(self)
1185 revs = set(self.rev(n) for n in nodelist)
1164 revs = set(self.rev(n) for n in nodelist)
1186 revs = dag.linearize(revs)
1165 revs = dag.linearize(revs)
1187 else:
1166 else:
1188 revs = sorted([self.rev(n) for n in nodelist])
1167 revs = sorted([self.rev(n) for n in nodelist])
1189
1168
1190 # add the parent of the first rev
1169 # add the parent of the first rev
1191 p = self.parentrevs(revs[0])[0]
1170 p = self.parentrevs(revs[0])[0]
1192 revs.insert(0, p)
1171 revs.insert(0, p)
1193
1172
1194 # build deltas
1173 # build deltas
1195 for r in xrange(len(revs) - 1):
1174 for r in xrange(len(revs) - 1):
1196 prev, curr = revs[r], revs[r + 1]
1175 prev, curr = revs[r], revs[r + 1]
1197 for c in bundler.revchunk(self, curr, prev):
1176 for c in bundler.revchunk(self, curr, prev):
1198 yield c
1177 yield c
1199
1178
1200 yield bundler.close()
1179 yield bundler.close()
1201
1180
1202 def addgroup(self, bundle, linkmapper, transaction):
1181 def addgroup(self, bundle, linkmapper, transaction):
1203 """
1182 """
1204 add a delta group
1183 add a delta group
1205
1184
1206 given a set of deltas, add them to the revision log. the
1185 given a set of deltas, add them to the revision log. the
1207 first delta is against its parent, which should be in our
1186 first delta is against its parent, which should be in our
1208 log, the rest are against the previous delta.
1187 log, the rest are against the previous delta.
1209 """
1188 """
1210
1189
1211 # track the base of the current delta log
1190 # track the base of the current delta log
1212 content = []
1191 content = []
1213 node = None
1192 node = None
1214
1193
1215 r = len(self)
1194 r = len(self)
1216 end = 0
1195 end = 0
1217 if r:
1196 if r:
1218 end = self.end(r - 1)
1197 end = self.end(r - 1)
1219 ifh = self.opener(self.indexfile, "a+")
1198 ifh = self.opener(self.indexfile, "a+")
1220 isize = r * self._io.size
1199 isize = r * self._io.size
1221 if self._inline:
1200 if self._inline:
1222 transaction.add(self.indexfile, end + isize, r)
1201 transaction.add(self.indexfile, end + isize, r)
1223 dfh = None
1202 dfh = None
1224 else:
1203 else:
1225 transaction.add(self.indexfile, isize, r)
1204 transaction.add(self.indexfile, isize, r)
1226 transaction.add(self.datafile, end)
1205 transaction.add(self.datafile, end)
1227 dfh = self.opener(self.datafile, "a")
1206 dfh = self.opener(self.datafile, "a")
1228
1207
1229 try:
1208 try:
1230 # loop through our set of deltas
1209 # loop through our set of deltas
1231 chain = None
1210 chain = None
1232 while True:
1211 while True:
1233 chunkdata = bundle.deltachunk(chain)
1212 chunkdata = bundle.deltachunk(chain)
1234 if not chunkdata:
1213 if not chunkdata:
1235 break
1214 break
1236 node = chunkdata['node']
1215 node = chunkdata['node']
1237 p1 = chunkdata['p1']
1216 p1 = chunkdata['p1']
1238 p2 = chunkdata['p2']
1217 p2 = chunkdata['p2']
1239 cs = chunkdata['cs']
1218 cs = chunkdata['cs']
1240 deltabase = chunkdata['deltabase']
1219 deltabase = chunkdata['deltabase']
1241 delta = chunkdata['delta']
1220 delta = chunkdata['delta']
1242
1221
1243 content.append(node)
1222 content.append(node)
1244
1223
1245 link = linkmapper(cs)
1224 link = linkmapper(cs)
1246 if node in self.nodemap:
1225 if node in self.nodemap:
1247 # this can happen if two branches make the same change
1226 # this can happen if two branches make the same change
1248 chain = node
1227 chain = node
1249 continue
1228 continue
1250
1229
1251 for p in (p1, p2):
1230 for p in (p1, p2):
1252 if p not in self.nodemap:
1231 if p not in self.nodemap:
1253 raise LookupError(p, self.indexfile,
1232 raise LookupError(p, self.indexfile,
1254 _('unknown parent'))
1233 _('unknown parent'))
1255
1234
1256 if deltabase not in self.nodemap:
1235 if deltabase not in self.nodemap:
1257 raise LookupError(deltabase, self.indexfile,
1236 raise LookupError(deltabase, self.indexfile,
1258 _('unknown delta base'))
1237 _('unknown delta base'))
1259
1238
1260 baserev = self.rev(deltabase)
1239 baserev = self.rev(deltabase)
1261 chain = self._addrevision(node, None, transaction, link,
1240 chain = self._addrevision(node, None, transaction, link,
1262 p1, p2, (baserev, delta), ifh, dfh)
1241 p1, p2, (baserev, delta), ifh, dfh)
1263 if not dfh and not self._inline:
1242 if not dfh and not self._inline:
1264 # addrevision switched from inline to conventional
1243 # addrevision switched from inline to conventional
1265 # reopen the index
1244 # reopen the index
1266 ifh.close()
1245 ifh.close()
1267 dfh = self.opener(self.datafile, "a")
1246 dfh = self.opener(self.datafile, "a")
1268 ifh = self.opener(self.indexfile, "a")
1247 ifh = self.opener(self.indexfile, "a")
1269 finally:
1248 finally:
1270 if dfh:
1249 if dfh:
1271 dfh.close()
1250 dfh.close()
1272 ifh.close()
1251 ifh.close()
1273
1252
1274 return content
1253 return content
1275
1254
1276 def strip(self, minlink, transaction):
1255 def strip(self, minlink, transaction):
1277 """truncate the revlog on the first revision with a linkrev >= minlink
1256 """truncate the revlog on the first revision with a linkrev >= minlink
1278
1257
1279 This function is called when we're stripping revision minlink and
1258 This function is called when we're stripping revision minlink and
1280 its descendants from the repository.
1259 its descendants from the repository.
1281
1260
1282 We have to remove all revisions with linkrev >= minlink, because
1261 We have to remove all revisions with linkrev >= minlink, because
1283 the equivalent changelog revisions will be renumbered after the
1262 the equivalent changelog revisions will be renumbered after the
1284 strip.
1263 strip.
1285
1264
1286 So we truncate the revlog on the first of these revisions, and
1265 So we truncate the revlog on the first of these revisions, and
1287 trust that the caller has saved the revisions that shouldn't be
1266 trust that the caller has saved the revisions that shouldn't be
1288 removed and that it'll re-add them after this truncation.
1267 removed and that it'll re-add them after this truncation.
1289 """
1268 """
1290 if len(self) == 0:
1269 if len(self) == 0:
1291 return
1270 return
1292
1271
1293 for rev in self:
1272 for rev in self:
1294 if self.index[rev][4] >= minlink:
1273 if self.index[rev][4] >= minlink:
1295 break
1274 break
1296 else:
1275 else:
1297 return
1276 return
1298
1277
1299 # first truncate the files on disk
1278 # first truncate the files on disk
1300 end = self.start(rev)
1279 end = self.start(rev)
1301 if not self._inline:
1280 if not self._inline:
1302 transaction.add(self.datafile, end)
1281 transaction.add(self.datafile, end)
1303 end = rev * self._io.size
1282 end = rev * self._io.size
1304 else:
1283 else:
1305 end += rev * self._io.size
1284 end += rev * self._io.size
1306
1285
1307 transaction.add(self.indexfile, end)
1286 transaction.add(self.indexfile, end)
1308
1287
1309 # then reset internal state in memory to forget those revisions
1288 # then reset internal state in memory to forget those revisions
1310 self._cache = None
1289 self._cache = None
1311 self._chunkclear()
1290 self._chunkclear()
1312 for x in xrange(rev, len(self)):
1291 for x in xrange(rev, len(self)):
1313 del self.nodemap[self.node(x)]
1292 del self.nodemap[self.node(x)]
1314
1293
1315 del self.index[rev:-1]
1294 del self.index[rev:-1]
1316
1295
1317 def checksize(self):
1296 def checksize(self):
1318 expected = 0
1297 expected = 0
1319 if len(self):
1298 if len(self):
1320 expected = max(0, self.end(len(self) - 1))
1299 expected = max(0, self.end(len(self) - 1))
1321
1300
1322 try:
1301 try:
1323 f = self.opener(self.datafile)
1302 f = self.opener(self.datafile)
1324 f.seek(0, 2)
1303 f.seek(0, 2)
1325 actual = f.tell()
1304 actual = f.tell()
1326 f.close()
1305 f.close()
1327 dd = actual - expected
1306 dd = actual - expected
1328 except IOError, inst:
1307 except IOError, inst:
1329 if inst.errno != errno.ENOENT:
1308 if inst.errno != errno.ENOENT:
1330 raise
1309 raise
1331 dd = 0
1310 dd = 0
1332
1311
1333 try:
1312 try:
1334 f = self.opener(self.indexfile)
1313 f = self.opener(self.indexfile)
1335 f.seek(0, 2)
1314 f.seek(0, 2)
1336 actual = f.tell()
1315 actual = f.tell()
1337 f.close()
1316 f.close()
1338 s = self._io.size
1317 s = self._io.size
1339 i = max(0, actual // s)
1318 i = max(0, actual // s)
1340 di = actual - (i * s)
1319 di = actual - (i * s)
1341 if self._inline:
1320 if self._inline:
1342 databytes = 0
1321 databytes = 0
1343 for r in self:
1322 for r in self:
1344 databytes += max(0, self.length(r))
1323 databytes += max(0, self.length(r))
1345 dd = 0
1324 dd = 0
1346 di = actual - len(self) * s - databytes
1325 di = actual - len(self) * s - databytes
1347 except IOError, inst:
1326 except IOError, inst:
1348 if inst.errno != errno.ENOENT:
1327 if inst.errno != errno.ENOENT:
1349 raise
1328 raise
1350 di = 0
1329 di = 0
1351
1330
1352 return (dd, di)
1331 return (dd, di)
1353
1332
1354 def files(self):
1333 def files(self):
1355 res = [self.indexfile]
1334 res = [self.indexfile]
1356 if not self._inline:
1335 if not self._inline:
1357 res.append(self.datafile)
1336 res.append(self.datafile)
1358 return res
1337 return res
General Comments 0
You need to be logged in to leave comments. Login now