##// END OF EJS Templates
Abstract ancestor algorithm into generic function...
Matt Mackall -
r3135:b1db258e default
parent child Browse files
Show More
@@ -0,0 +1,83 b''
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 import heapq
9
10 def ancestor(a, b, pfunc):
11 """
12 return the least common ancestor of nodes a and b or None if there
13 is no such ancestor.
14
15 pfunc must return a list of parent vertices
16 """
17
18 if a == b:
19 return a
20
21 # find depth from root of all ancestors
22 visit = [a, b]
23 depth = {}
24 while visit:
25 vertex = visit[-1]
26 pl = pfunc(vertex)
27 if not pl:
28 depth[vertex] = 0
29 visit.pop()
30 else:
31 for p in pl:
32 if p == a or p == b: # did we find a or b as a parent?
33 return p # we're done
34 if p not in depth:
35 visit.append(p)
36 if visit[-1] == vertex:
37 depth[vertex] = min([depth[p] for p in pl]) - 1
38 visit.pop()
39
40 # traverse ancestors in order of decreasing distance from root
41 def ancestors(vertex):
42 h = [(depth[vertex], vertex)]
43 seen = {}
44 while h:
45 d, n = heapq.heappop(h)
46 if n not in seen:
47 seen[n] = 1
48 yield (d, n)
49 for p in pfunc(n):
50 heapq.heappush(h, (depth[p], p))
51
52 def generations(vertex):
53 sg, s = None, {}
54 for g,v in ancestors(vertex):
55 if g != sg:
56 if sg:
57 yield sg, s
58 sg, s = g, {v:1}
59 else:
60 s[v] = 1
61 yield sg, s
62
63 x = generations(a)
64 y = generations(b)
65 gx = x.next()
66 gy = y.next()
67
68 # increment each ancestor list until it is closer to root than
69 # the other, or they match
70 try:
71 while 1:
72 if gx[0] == gy[0]:
73 for v in gx[1]:
74 if v in gy[1]:
75 return v
76 gy = y.next()
77 gx = x.next()
78 elif gx[0] > gy[0]:
79 gy = y.next()
80 else:
81 gx = x.next()
82 except StopIteration:
83 return None
@@ -1,263 +1,186 b''
1 # context.py - changeset and file context objects for mercurial
1 # context.py - changeset and file context objects for mercurial
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from node import *
8 from node import *
9 from demandload import demandload
9 from demandload import demandload
10 demandload(globals(), "heapq")
10 demandload(globals(), "ancestor")
11
11
12 class changectx(object):
12 class changectx(object):
13 """A changecontext object makes access to data related to a particular
13 """A changecontext object makes access to data related to a particular
14 changeset convenient."""
14 changeset convenient."""
15 def __init__(self, repo, changeid=None):
15 def __init__(self, repo, changeid=None):
16 """changeid is a revision number, node, or tag"""
16 """changeid is a revision number, node, or tag"""
17 self._repo = repo
17 self._repo = repo
18
18
19 if not changeid:
19 if not changeid:
20 p1, p2 = self._repo.dirstate.parents()
20 p1, p2 = self._repo.dirstate.parents()
21 self._rev = self._repo.changelog.rev(p1)
21 self._rev = self._repo.changelog.rev(p1)
22 if self._rev == -1:
22 if self._rev == -1:
23 changeid = 'tip'
23 changeid = 'tip'
24 else:
24 else:
25 self._node = p1
25 self._node = p1
26 return
26 return
27
27
28 self._node = self._repo.lookup(changeid)
28 self._node = self._repo.lookup(changeid)
29 self._rev = self._repo.changelog.rev(self._node)
29 self._rev = self._repo.changelog.rev(self._node)
30
30
31 def changeset(self):
31 def changeset(self):
32 try:
32 try:
33 return self._changeset
33 return self._changeset
34 except AttributeError:
34 except AttributeError:
35 self._changeset = self._repo.changelog.read(self.node())
35 self._changeset = self._repo.changelog.read(self.node())
36 return self._changeset
36 return self._changeset
37
37
38 def manifest(self):
38 def manifest(self):
39 try:
39 try:
40 return self._manifest
40 return self._manifest
41 except AttributeError:
41 except AttributeError:
42 self._manifest = self._repo.manifest.read(self.changeset()[0])
42 self._manifest = self._repo.manifest.read(self.changeset()[0])
43 return self._manifest
43 return self._manifest
44
44
45 def rev(self): return self._rev
45 def rev(self): return self._rev
46 def node(self): return self._node
46 def node(self): return self._node
47 def user(self): return self.changeset()[1]
47 def user(self): return self.changeset()[1]
48 def date(self): return self.changeset()[2]
48 def date(self): return self.changeset()[2]
49 def files(self): return self.changeset()[3]
49 def files(self): return self.changeset()[3]
50 def description(self): return self.changeset()[4]
50 def description(self): return self.changeset()[4]
51
51
52 def parents(self):
52 def parents(self):
53 """return contexts for each parent changeset"""
53 """return contexts for each parent changeset"""
54 p = self._repo.changelog.parents(self._node)
54 p = self._repo.changelog.parents(self._node)
55 return [ changectx(self._repo, x) for x in p ]
55 return [ changectx(self._repo, x) for x in p ]
56
56
57 def children(self):
57 def children(self):
58 """return contexts for each child changeset"""
58 """return contexts for each child changeset"""
59 c = self._repo.changelog.children(self._node)
59 c = self._repo.changelog.children(self._node)
60 return [ changectx(self._repo, x) for x in c ]
60 return [ changectx(self._repo, x) for x in c ]
61
61
62 def filenode(self, path):
62 def filenode(self, path):
63 node, flag = self._repo.manifest.find(self.changeset()[0], path)
63 node, flag = self._repo.manifest.find(self.changeset()[0], path)
64 return node
64 return node
65
65
66 def filectx(self, path, fileid=None):
66 def filectx(self, path, fileid=None):
67 """get a file context from this changeset"""
67 """get a file context from this changeset"""
68 if fileid is None:
68 if fileid is None:
69 fileid = self.filenode(path)
69 fileid = self.filenode(path)
70 return filectx(self._repo, path, fileid=fileid)
70 return filectx(self._repo, path, fileid=fileid)
71
71
72 def filectxs(self):
72 def filectxs(self):
73 """generate a file context for each file in this changeset's
73 """generate a file context for each file in this changeset's
74 manifest"""
74 manifest"""
75 mf = self.manifest()
75 mf = self.manifest()
76 m = mf.keys()
76 m = mf.keys()
77 m.sort()
77 m.sort()
78 for f in m:
78 for f in m:
79 yield self.filectx(f, fileid=mf[f])
79 yield self.filectx(f, fileid=mf[f])
80
80
81 def ancestor(self, c2):
81 def ancestor(self, c2):
82 """
82 """
83 return the ancestor context of self and c2
83 return the ancestor context of self and c2
84 """
84 """
85 n = self._repo.changelog.ancestor(self._node, c2._node)
85 n = self._repo.changelog.ancestor(self._node, c2._node)
86 return changectx(self._repo, n)
86 return changectx(self._repo, n)
87
87
88 class filectx(object):
88 class filectx(object):
89 """A filecontext object makes access to data related to a particular
89 """A filecontext object makes access to data related to a particular
90 filerevision convenient."""
90 filerevision convenient."""
91 def __init__(self, repo, path, changeid=None, fileid=None, filelog=None):
91 def __init__(self, repo, path, changeid=None, fileid=None, filelog=None):
92 """changeid can be a changeset revision, node, or tag.
92 """changeid can be a changeset revision, node, or tag.
93 fileid can be a file revision or node."""
93 fileid can be a file revision or node."""
94 self._repo = repo
94 self._repo = repo
95 self._path = path
95 self._path = path
96
96
97 assert changeid or fileid
97 assert changeid or fileid
98
98
99 if filelog:
99 if filelog:
100 self._filelog = filelog
100 self._filelog = filelog
101 else:
101 else:
102 self._filelog = self._repo.file(self._path)
102 self._filelog = self._repo.file(self._path)
103
103
104 if not fileid:
104 if not fileid:
105 # if given a changeset id, go ahead and look up the file
105 # if given a changeset id, go ahead and look up the file
106 self._changeid = changeid
106 self._changeid = changeid
107 self._changectx = self.changectx()
107 self._changectx = self.changectx()
108 self._filenode = self._changectx.filenode(self._path)
108 self._filenode = self._changectx.filenode(self._path)
109 else:
109 else:
110 # else delay changectx creation
110 # else delay changectx creation
111 self._filenode = self._filelog.lookup(fileid)
111 self._filenode = self._filelog.lookup(fileid)
112 self._changeid = self._filelog.linkrev(self._filenode)
112 self._changeid = self._filelog.linkrev(self._filenode)
113 self._filerev = self._filelog.rev(self._filenode)
113 self._filerev = self._filelog.rev(self._filenode)
114
114
115 def changectx(self):
115 def changectx(self):
116 try:
116 try:
117 return self._changectx
117 return self._changectx
118 except AttributeError:
118 except AttributeError:
119 self._changectx = changectx(self._repo, self._changeid)
119 self._changectx = changectx(self._repo, self._changeid)
120 return self._changectx
120 return self._changectx
121
121
122 def filerev(self): return self._filerev
122 def filerev(self): return self._filerev
123 def filenode(self): return self._filenode
123 def filenode(self): return self._filenode
124 def filelog(self): return self._filelog
124 def filelog(self): return self._filelog
125
125
126 def rev(self): return self.changectx().rev()
126 def rev(self): return self.changectx().rev()
127 def node(self): return self.changectx().node()
127 def node(self): return self.changectx().node()
128 def user(self): return self.changectx().user()
128 def user(self): return self.changectx().user()
129 def date(self): return self.changectx().date()
129 def date(self): return self.changectx().date()
130 def files(self): return self.changectx().files()
130 def files(self): return self.changectx().files()
131 def description(self): return self.changectx().description()
131 def description(self): return self.changectx().description()
132 def manifest(self): return self.changectx().manifest()
132 def manifest(self): return self.changectx().manifest()
133
133
134 def data(self): return self._filelog.read(self._filenode)
134 def data(self): return self._filelog.read(self._filenode)
135 def renamed(self): return self._filelog.renamed(self._filenode)
135 def renamed(self): return self._filelog.renamed(self._filenode)
136 def path(self): return self._path
136 def path(self): return self._path
137
137
138 def parents(self):
138 def parents(self):
139 p = self._path
139 p = self._path
140 fl = self._filelog
140 fl = self._filelog
141 pl = [ (p, n, fl) for n in self._filelog.parents(self._filenode) ]
141 pl = [ (p, n, fl) for n in self._filelog.parents(self._filenode) ]
142
142
143 r = self.renamed()
143 r = self.renamed()
144 if r:
144 if r:
145 pl[0] = (r[0], r[1], None)
145 pl[0] = (r[0], r[1], None)
146
146
147 return [ filectx(self._repo, p, fileid=n, filelog=l)
147 return [ filectx(self._repo, p, fileid=n, filelog=l)
148 for p,n,l in pl if n != nullid ]
148 for p,n,l in pl if n != nullid ]
149
149
150 def children(self):
150 def children(self):
151 # hard for renames
151 # hard for renames
152 c = self._filelog.children(self._filenode)
152 c = self._filelog.children(self._filenode)
153 return [ filectx(self._repo, self._path, fileid=x,
153 return [ filectx(self._repo, self._path, fileid=x,
154 filelog=self._filelog) for x in c ]
154 filelog=self._filelog) for x in c ]
155
155
156 def annotate(self):
156 def annotate(self):
157 return self._filelog.annotate(self._filenode)
157 return self._filelog.annotate(self._filenode)
158
158
159 def ancestor(self, fc2):
159 def ancestor(self, fc2):
160 """
160 """
161 find the common ancestor file context, if any, of self, and fc2
161 find the common ancestor file context, if any, of self, and fc2
162 """
162 """
163
163
164 a, b = (self._path, self._filenode), (fc2._path, fc2._filenode)
164 acache = {}
165
166 if a == b:
167 return self
168
169 if a[0] == b[0]:
170 n = self._filelog.ancestor(a[1], b[1])
171 if n != nullid:
172 return filectx(self._repo, self._path,
173 fileid=n, filelog=self._filelog)
174
175 # build a graph of all ancestors, crossing renames
176 ag = {}
177 fv = [a, b]
178 flcache = {self._path:self._filelog, fc2._path:fc2._filelog}
165 flcache = {self._path:self._filelog, fc2._path:fc2._filelog}
179
166 def parents(vertex):
180 while fv:
167 if vertex in acache:
181 f,n = fv.pop()
168 return acache[vertex]
182 try:
169 f, n = vertex
183 fl = flcache[f]
170 if f not in flcache:
184 except KeyError:
185 flcache[f] = self._repo.file(f)
171 flcache[f] = self._repo.file(f)
186 fl = flcache[f]
172 fl = flcache[f]
187 v = [n]
173 pl = [ (f,p) for p in fl.parents(n) if p != nullid ]
188 while v:
174 re = fl.renamed(n)
189 n = v.pop()
175 if re:
190 c = (f, n)
176 pl.append(re)
191 if c in ag:
177 acache[vertex]=pl
192 continue
178 return pl
193
194 pl = [ n for n in fl.parents(n) if n != nullid ]
195 v += pl
196 pl = [ (f, n) for n in pl ]
197 re = fl.renamed(n)
198 if re:
199 pl.append(re)
200 if re not in ag:
201 fv.append(re)
202 ag[c] = pl
203
179
204 dist = {}
180 a, b = (self._path, self._filenode), (fc2._path, fc2._filenode)
205 def depth(node):
181 v = ancestor.ancestor(a, b, parents)
206 try:
182 if v:
207 return dist[node]
183 f,n = v
208 except KeyError:
184 return filectx(self._repo, f, fileid=n, filelog=flcache[f])
209 pl = ag[node]
210 if not pl:
211 dist[node] = 0
212 else:
213 dist[node] = max([depth(p) for p in pl]) + 1
214 return dist[node]
215
216 # traverse ancestors in order of decreasing distance from root
217 def ancestors(vertex):
218 h = [(-depth(vertex), vertex)]
219 seen = {}
220 while h:
221 d, v = heapq.heappop(h)
222 if v not in seen:
223 seen[v] = 1
224 yield (-d, v)
225 for p in ag[v]:
226 heapq.heappush(h, (-depth(p), p))
227
185
228 def generations(vertex):
186 return None
229 sg, s = None, {}
230 for g,v in ancestors(vertex):
231 if g != sg:
232 if sg:
233 yield sg, s
234 sg, s = g, {v:1}
235 else:
236 s[v] = 1
237 yield sg, s
238
239 x = generations(a)
240 y = generations(b)
241 gx = x.next()
242 gy = y.next()
243
244 # increment each ancestor list until it is closer to root than
245 # the other, or they match
246 try:
247 while 1:
248 if gx[0] == gy[0]:
249 # find the intersection
250 i = [ n for n in gx[1] if n in gy[1] ]
251 if i:
252 fp,fn = i[0]
253 fl = flcache[fp]
254 return filectx(self._repo, fp, fileid=fn, filelog=fl)
255 else:
256 gy = y.next()
257 gx = x.next()
258 elif gx[0] < gy[0]:
259 gy = y.next()
260 else:
261 gx = x.next()
262 except StopIteration:
263 return None
@@ -1,1302 +1,1234 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import gettext as _
14 from i18n import gettext as _
15 from demandload import demandload
15 from demandload import demandload
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 demandload(globals(), "sha struct util zlib")
17 demandload(globals(), "sha struct util zlib")
18
18
19 # revlog version strings
19 # revlog version strings
20 REVLOGV0 = 0
20 REVLOGV0 = 0
21 REVLOGNG = 1
21 REVLOGNG = 1
22
22
23 # revlog flags
23 # revlog flags
24 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOGNGINLINEDATA = (1 << 16)
25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26
26
27 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29
29
30 def flagstr(flag):
30 def flagstr(flag):
31 if flag == "inline":
31 if flag == "inline":
32 return REVLOGNGINLINEDATA
32 return REVLOGNGINLINEDATA
33 raise RevlogError(_("unknown revlog flag %s" % flag))
33 raise RevlogError(_("unknown revlog flag %s" % flag))
34
34
35 def hash(text, p1, p2):
35 def hash(text, p1, p2):
36 """generate a hash from the given text and its parent hashes
36 """generate a hash from the given text and its parent hashes
37
37
38 This hash combines both the current file contents and its history
38 This hash combines both the current file contents and its history
39 in a manner that makes it easy to distinguish nodes with the same
39 in a manner that makes it easy to distinguish nodes with the same
40 content in the revision graph.
40 content in the revision graph.
41 """
41 """
42 l = [p1, p2]
42 l = [p1, p2]
43 l.sort()
43 l.sort()
44 s = sha.new(l[0])
44 s = sha.new(l[0])
45 s.update(l[1])
45 s.update(l[1])
46 s.update(text)
46 s.update(text)
47 return s.digest()
47 return s.digest()
48
48
49 def compress(text):
49 def compress(text):
50 """ generate a possibly-compressed representation of text """
50 """ generate a possibly-compressed representation of text """
51 if not text: return ("", text)
51 if not text: return ("", text)
52 if len(text) < 44:
52 if len(text) < 44:
53 if text[0] == '\0': return ("", text)
53 if text[0] == '\0': return ("", text)
54 return ('u', text)
54 return ('u', text)
55 bin = zlib.compress(text)
55 bin = zlib.compress(text)
56 if len(bin) > len(text):
56 if len(bin) > len(text):
57 if text[0] == '\0': return ("", text)
57 if text[0] == '\0': return ("", text)
58 return ('u', text)
58 return ('u', text)
59 return ("", bin)
59 return ("", bin)
60
60
61 def decompress(bin):
61 def decompress(bin):
62 """ decompress the given input """
62 """ decompress the given input """
63 if not bin: return bin
63 if not bin: return bin
64 t = bin[0]
64 t = bin[0]
65 if t == '\0': return bin
65 if t == '\0': return bin
66 if t == 'x': return zlib.decompress(bin)
66 if t == 'x': return zlib.decompress(bin)
67 if t == 'u': return bin[1:]
67 if t == 'u': return bin[1:]
68 raise RevlogError(_("unknown compression type %r") % t)
68 raise RevlogError(_("unknown compression type %r") % t)
69
69
70 indexformatv0 = ">4l20s20s20s"
70 indexformatv0 = ">4l20s20s20s"
71 v0shaoffset = 56
71 v0shaoffset = 56
72 # index ng:
72 # index ng:
73 # 6 bytes offset
73 # 6 bytes offset
74 # 2 bytes flags
74 # 2 bytes flags
75 # 4 bytes compressed length
75 # 4 bytes compressed length
76 # 4 bytes uncompressed length
76 # 4 bytes uncompressed length
77 # 4 bytes: base rev
77 # 4 bytes: base rev
78 # 4 bytes link rev
78 # 4 bytes link rev
79 # 4 bytes parent 1 rev
79 # 4 bytes parent 1 rev
80 # 4 bytes parent 2 rev
80 # 4 bytes parent 2 rev
81 # 32 bytes: nodeid
81 # 32 bytes: nodeid
82 indexformatng = ">Qiiiiii20s12x"
82 indexformatng = ">Qiiiiii20s12x"
83 ngshaoffset = 32
83 ngshaoffset = 32
84 versionformat = ">i"
84 versionformat = ">i"
85
85
86 class lazyparser(object):
86 class lazyparser(object):
87 """
87 """
88 this class avoids the need to parse the entirety of large indices
88 this class avoids the need to parse the entirety of large indices
89 """
89 """
90
90
91 # lazyparser is not safe to use on windows if win32 extensions not
91 # lazyparser is not safe to use on windows if win32 extensions not
92 # available. it keeps file handle open, which make it not possible
92 # available. it keeps file handle open, which make it not possible
93 # to break hardlinks on local cloned repos.
93 # to break hardlinks on local cloned repos.
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 hasattr(util, 'win32api'))
95 hasattr(util, 'win32api'))
96
96
97 def __init__(self, dataf, size, indexformat, shaoffset):
97 def __init__(self, dataf, size, indexformat, shaoffset):
98 self.dataf = dataf
98 self.dataf = dataf
99 self.format = indexformat
99 self.format = indexformat
100 self.s = struct.calcsize(indexformat)
100 self.s = struct.calcsize(indexformat)
101 self.indexformat = indexformat
101 self.indexformat = indexformat
102 self.datasize = size
102 self.datasize = size
103 self.l = size/self.s
103 self.l = size/self.s
104 self.index = [None] * self.l
104 self.index = [None] * self.l
105 self.map = {nullid: -1}
105 self.map = {nullid: -1}
106 self.allmap = 0
106 self.allmap = 0
107 self.all = 0
107 self.all = 0
108 self.mapfind_count = 0
108 self.mapfind_count = 0
109 self.shaoffset = shaoffset
109 self.shaoffset = shaoffset
110
110
111 def loadmap(self):
111 def loadmap(self):
112 """
112 """
113 during a commit, we need to make sure the rev being added is
113 during a commit, we need to make sure the rev being added is
114 not a duplicate. This requires loading the entire index,
114 not a duplicate. This requires loading the entire index,
115 which is fairly slow. loadmap can load up just the node map,
115 which is fairly slow. loadmap can load up just the node map,
116 which takes much less time.
116 which takes much less time.
117 """
117 """
118 if self.allmap: return
118 if self.allmap: return
119 end = self.datasize
119 end = self.datasize
120 self.allmap = 1
120 self.allmap = 1
121 cur = 0
121 cur = 0
122 count = 0
122 count = 0
123 blocksize = self.s * 256
123 blocksize = self.s * 256
124 self.dataf.seek(0)
124 self.dataf.seek(0)
125 while cur < end:
125 while cur < end:
126 data = self.dataf.read(blocksize)
126 data = self.dataf.read(blocksize)
127 off = 0
127 off = 0
128 for x in xrange(256):
128 for x in xrange(256):
129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 self.map[n] = count
130 self.map[n] = count
131 count += 1
131 count += 1
132 if count >= self.l:
132 if count >= self.l:
133 break
133 break
134 off += self.s
134 off += self.s
135 cur += blocksize
135 cur += blocksize
136
136
137 def loadblock(self, blockstart, blocksize, data=None):
137 def loadblock(self, blockstart, blocksize, data=None):
138 if self.all: return
138 if self.all: return
139 if data is None:
139 if data is None:
140 self.dataf.seek(blockstart)
140 self.dataf.seek(blockstart)
141 if blockstart + blocksize > self.datasize:
141 if blockstart + blocksize > self.datasize:
142 # the revlog may have grown since we've started running,
142 # the revlog may have grown since we've started running,
143 # but we don't have space in self.index for more entries.
143 # but we don't have space in self.index for more entries.
144 # limit blocksize so that we don't get too much data.
144 # limit blocksize so that we don't get too much data.
145 blocksize = max(self.datasize - blockstart, 0)
145 blocksize = max(self.datasize - blockstart, 0)
146 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
147 lend = len(data) / self.s
147 lend = len(data) / self.s
148 i = blockstart / self.s
148 i = blockstart / self.s
149 off = 0
149 off = 0
150 for x in xrange(lend):
150 for x in xrange(lend):
151 if self.index[i + x] == None:
151 if self.index[i + x] == None:
152 b = data[off : off + self.s]
152 b = data[off : off + self.s]
153 self.index[i + x] = b
153 self.index[i + x] = b
154 n = b[self.shaoffset:self.shaoffset + 20]
154 n = b[self.shaoffset:self.shaoffset + 20]
155 self.map[n] = i + x
155 self.map[n] = i + x
156 off += self.s
156 off += self.s
157
157
158 def findnode(self, node):
158 def findnode(self, node):
159 """search backwards through the index file for a specific node"""
159 """search backwards through the index file for a specific node"""
160 if self.allmap: return None
160 if self.allmap: return None
161
161
162 # hg log will cause many many searches for the manifest
162 # hg log will cause many many searches for the manifest
163 # nodes. After we get called a few times, just load the whole
163 # nodes. After we get called a few times, just load the whole
164 # thing.
164 # thing.
165 if self.mapfind_count > 8:
165 if self.mapfind_count > 8:
166 self.loadmap()
166 self.loadmap()
167 if node in self.map:
167 if node in self.map:
168 return node
168 return node
169 return None
169 return None
170 self.mapfind_count += 1
170 self.mapfind_count += 1
171 last = self.l - 1
171 last = self.l - 1
172 while self.index[last] != None:
172 while self.index[last] != None:
173 if last == 0:
173 if last == 0:
174 self.all = 1
174 self.all = 1
175 self.allmap = 1
175 self.allmap = 1
176 return None
176 return None
177 last -= 1
177 last -= 1
178 end = (last + 1) * self.s
178 end = (last + 1) * self.s
179 blocksize = self.s * 256
179 blocksize = self.s * 256
180 while end >= 0:
180 while end >= 0:
181 start = max(end - blocksize, 0)
181 start = max(end - blocksize, 0)
182 self.dataf.seek(start)
182 self.dataf.seek(start)
183 data = self.dataf.read(end - start)
183 data = self.dataf.read(end - start)
184 findend = end - start
184 findend = end - start
185 while True:
185 while True:
186 # we're searching backwards, so weh have to make sure
186 # we're searching backwards, so weh have to make sure
187 # we don't find a changeset where this node is a parent
187 # we don't find a changeset where this node is a parent
188 off = data.rfind(node, 0, findend)
188 off = data.rfind(node, 0, findend)
189 findend = off
189 findend = off
190 if off >= 0:
190 if off >= 0:
191 i = off / self.s
191 i = off / self.s
192 off = i * self.s
192 off = i * self.s
193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 if n == node:
194 if n == node:
195 self.map[n] = i + start / self.s
195 self.map[n] = i + start / self.s
196 return node
196 return node
197 else:
197 else:
198 break
198 break
199 end -= blocksize
199 end -= blocksize
200 return None
200 return None
201
201
202 def loadindex(self, i=None, end=None):
202 def loadindex(self, i=None, end=None):
203 if self.all: return
203 if self.all: return
204 all = False
204 all = False
205 if i == None:
205 if i == None:
206 blockstart = 0
206 blockstart = 0
207 blocksize = (512 / self.s) * self.s
207 blocksize = (512 / self.s) * self.s
208 end = self.datasize
208 end = self.datasize
209 all = True
209 all = True
210 else:
210 else:
211 if end:
211 if end:
212 blockstart = i * self.s
212 blockstart = i * self.s
213 end = end * self.s
213 end = end * self.s
214 blocksize = end - blockstart
214 blocksize = end - blockstart
215 else:
215 else:
216 blockstart = (i & ~(32)) * self.s
216 blockstart = (i & ~(32)) * self.s
217 blocksize = self.s * 64
217 blocksize = self.s * 64
218 end = blockstart + blocksize
218 end = blockstart + blocksize
219 while blockstart < end:
219 while blockstart < end:
220 self.loadblock(blockstart, blocksize)
220 self.loadblock(blockstart, blocksize)
221 blockstart += blocksize
221 blockstart += blocksize
222 if all: self.all = True
222 if all: self.all = True
223
223
224 class lazyindex(object):
224 class lazyindex(object):
225 """a lazy version of the index array"""
225 """a lazy version of the index array"""
226 def __init__(self, parser):
226 def __init__(self, parser):
227 self.p = parser
227 self.p = parser
228 def __len__(self):
228 def __len__(self):
229 return len(self.p.index)
229 return len(self.p.index)
230 def load(self, pos):
230 def load(self, pos):
231 if pos < 0:
231 if pos < 0:
232 pos += len(self.p.index)
232 pos += len(self.p.index)
233 self.p.loadindex(pos)
233 self.p.loadindex(pos)
234 return self.p.index[pos]
234 return self.p.index[pos]
235 def __getitem__(self, pos):
235 def __getitem__(self, pos):
236 ret = self.p.index[pos] or self.load(pos)
236 ret = self.p.index[pos] or self.load(pos)
237 if isinstance(ret, str):
237 if isinstance(ret, str):
238 ret = struct.unpack(self.p.indexformat, ret)
238 ret = struct.unpack(self.p.indexformat, ret)
239 return ret
239 return ret
240 def __setitem__(self, pos, item):
240 def __setitem__(self, pos, item):
241 self.p.index[pos] = item
241 self.p.index[pos] = item
242 def __delitem__(self, pos):
242 def __delitem__(self, pos):
243 del self.p.index[pos]
243 del self.p.index[pos]
244 def append(self, e):
244 def append(self, e):
245 self.p.index.append(e)
245 self.p.index.append(e)
246
246
247 class lazymap(object):
247 class lazymap(object):
248 """a lazy version of the node map"""
248 """a lazy version of the node map"""
249 def __init__(self, parser):
249 def __init__(self, parser):
250 self.p = parser
250 self.p = parser
251 def load(self, key):
251 def load(self, key):
252 n = self.p.findnode(key)
252 n = self.p.findnode(key)
253 if n == None:
253 if n == None:
254 raise KeyError(key)
254 raise KeyError(key)
255 def __contains__(self, key):
255 def __contains__(self, key):
256 if key in self.p.map:
256 if key in self.p.map:
257 return True
257 return True
258 self.p.loadmap()
258 self.p.loadmap()
259 return key in self.p.map
259 return key in self.p.map
260 def __iter__(self):
260 def __iter__(self):
261 yield nullid
261 yield nullid
262 for i in xrange(self.p.l):
262 for i in xrange(self.p.l):
263 ret = self.p.index[i]
263 ret = self.p.index[i]
264 if not ret:
264 if not ret:
265 self.p.loadindex(i)
265 self.p.loadindex(i)
266 ret = self.p.index[i]
266 ret = self.p.index[i]
267 if isinstance(ret, str):
267 if isinstance(ret, str):
268 ret = struct.unpack(self.p.indexformat, ret)
268 ret = struct.unpack(self.p.indexformat, ret)
269 yield ret[-1]
269 yield ret[-1]
270 def __getitem__(self, key):
270 def __getitem__(self, key):
271 try:
271 try:
272 return self.p.map[key]
272 return self.p.map[key]
273 except KeyError:
273 except KeyError:
274 try:
274 try:
275 self.load(key)
275 self.load(key)
276 return self.p.map[key]
276 return self.p.map[key]
277 except KeyError:
277 except KeyError:
278 raise KeyError("node " + hex(key))
278 raise KeyError("node " + hex(key))
279 def __setitem__(self, key, val):
279 def __setitem__(self, key, val):
280 self.p.map[key] = val
280 self.p.map[key] = val
281 def __delitem__(self, key):
281 def __delitem__(self, key):
282 del self.p.map[key]
282 del self.p.map[key]
283
283
284 class RevlogError(Exception): pass
284 class RevlogError(Exception): pass
285
285
286 class revlog(object):
286 class revlog(object):
287 """
287 """
288 the underlying revision storage object
288 the underlying revision storage object
289
289
290 A revlog consists of two parts, an index and the revision data.
290 A revlog consists of two parts, an index and the revision data.
291
291
292 The index is a file with a fixed record size containing
292 The index is a file with a fixed record size containing
293 information on each revision, includings its nodeid (hash), the
293 information on each revision, includings its nodeid (hash), the
294 nodeids of its parents, the position and offset of its data within
294 nodeids of its parents, the position and offset of its data within
295 the data file, and the revision it's based on. Finally, each entry
295 the data file, and the revision it's based on. Finally, each entry
296 contains a linkrev entry that can serve as a pointer to external
296 contains a linkrev entry that can serve as a pointer to external
297 data.
297 data.
298
298
299 The revision data itself is a linear collection of data chunks.
299 The revision data itself is a linear collection of data chunks.
300 Each chunk represents a revision and is usually represented as a
300 Each chunk represents a revision and is usually represented as a
301 delta against the previous chunk. To bound lookup time, runs of
301 delta against the previous chunk. To bound lookup time, runs of
302 deltas are limited to about 2 times the length of the original
302 deltas are limited to about 2 times the length of the original
303 version data. This makes retrieval of a version proportional to
303 version data. This makes retrieval of a version proportional to
304 its size, or O(1) relative to the number of revisions.
304 its size, or O(1) relative to the number of revisions.
305
305
306 Both pieces of the revlog are written to in an append-only
306 Both pieces of the revlog are written to in an append-only
307 fashion, which means we never need to rewrite a file to insert or
307 fashion, which means we never need to rewrite a file to insert or
308 remove data, and can use some simple techniques to avoid the need
308 remove data, and can use some simple techniques to avoid the need
309 for locking while reading.
309 for locking while reading.
310 """
310 """
311 def __init__(self, opener, indexfile, datafile,
311 def __init__(self, opener, indexfile, datafile,
312 defversion=REVLOG_DEFAULT_VERSION):
312 defversion=REVLOG_DEFAULT_VERSION):
313 """
313 """
314 create a revlog object
314 create a revlog object
315
315
316 opener is a function that abstracts the file opening operation
316 opener is a function that abstracts the file opening operation
317 and can be used to implement COW semantics or the like.
317 and can be used to implement COW semantics or the like.
318 """
318 """
319 self.indexfile = indexfile
319 self.indexfile = indexfile
320 self.datafile = datafile
320 self.datafile = datafile
321 self.opener = opener
321 self.opener = opener
322
322
323 self.indexstat = None
323 self.indexstat = None
324 self.cache = None
324 self.cache = None
325 self.chunkcache = None
325 self.chunkcache = None
326 self.defversion = defversion
326 self.defversion = defversion
327 self.load()
327 self.load()
328
328
329 def load(self):
329 def load(self):
330 v = self.defversion
330 v = self.defversion
331 try:
331 try:
332 f = self.opener(self.indexfile)
332 f = self.opener(self.indexfile)
333 i = f.read(4)
333 i = f.read(4)
334 f.seek(0)
334 f.seek(0)
335 except IOError, inst:
335 except IOError, inst:
336 if inst.errno != errno.ENOENT:
336 if inst.errno != errno.ENOENT:
337 raise
337 raise
338 i = ""
338 i = ""
339 else:
339 else:
340 try:
340 try:
341 st = util.fstat(f)
341 st = util.fstat(f)
342 except AttributeError, inst:
342 except AttributeError, inst:
343 st = None
343 st = None
344 else:
344 else:
345 oldst = self.indexstat
345 oldst = self.indexstat
346 if (oldst and st.st_dev == oldst.st_dev
346 if (oldst and st.st_dev == oldst.st_dev
347 and st.st_ino == oldst.st_ino
347 and st.st_ino == oldst.st_ino
348 and st.st_mtime == oldst.st_mtime
348 and st.st_mtime == oldst.st_mtime
349 and st.st_ctime == oldst.st_ctime):
349 and st.st_ctime == oldst.st_ctime):
350 return
350 return
351 self.indexstat = st
351 self.indexstat = st
352 if len(i) > 0:
352 if len(i) > 0:
353 v = struct.unpack(versionformat, i)[0]
353 v = struct.unpack(versionformat, i)[0]
354 flags = v & ~0xFFFF
354 flags = v & ~0xFFFF
355 fmt = v & 0xFFFF
355 fmt = v & 0xFFFF
356 if fmt == REVLOGV0:
356 if fmt == REVLOGV0:
357 if flags:
357 if flags:
358 raise RevlogError(_("index %s invalid flags %x for format v0" %
358 raise RevlogError(_("index %s invalid flags %x for format v0" %
359 (self.indexfile, flags)))
359 (self.indexfile, flags)))
360 elif fmt == REVLOGNG:
360 elif fmt == REVLOGNG:
361 if flags & ~REVLOGNGINLINEDATA:
361 if flags & ~REVLOGNGINLINEDATA:
362 raise RevlogError(_("index %s invalid flags %x for revlogng" %
362 raise RevlogError(_("index %s invalid flags %x for revlogng" %
363 (self.indexfile, flags)))
363 (self.indexfile, flags)))
364 else:
364 else:
365 raise RevlogError(_("index %s invalid format %d" %
365 raise RevlogError(_("index %s invalid format %d" %
366 (self.indexfile, fmt)))
366 (self.indexfile, fmt)))
367 self.version = v
367 self.version = v
368 if v == REVLOGV0:
368 if v == REVLOGV0:
369 self.indexformat = indexformatv0
369 self.indexformat = indexformatv0
370 shaoffset = v0shaoffset
370 shaoffset = v0shaoffset
371 else:
371 else:
372 self.indexformat = indexformatng
372 self.indexformat = indexformatng
373 shaoffset = ngshaoffset
373 shaoffset = ngshaoffset
374
374
375 if i:
375 if i:
376 if (lazyparser.safe_to_use and not self.inlinedata() and
376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 st and st.st_size > 10000):
377 st and st.st_size > 10000):
378 # big index, let's parse it on demand
378 # big index, let's parse it on demand
379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 self.index = lazyindex(parser)
380 self.index = lazyindex(parser)
381 self.nodemap = lazymap(parser)
381 self.nodemap = lazymap(parser)
382 else:
382 else:
383 self.parseindex(f, st)
383 self.parseindex(f, st)
384 if self.version != REVLOGV0:
384 if self.version != REVLOGV0:
385 e = list(self.index[0])
385 e = list(self.index[0])
386 type = self.ngtype(e[0])
386 type = self.ngtype(e[0])
387 e[0] = self.offset_type(0, type)
387 e[0] = self.offset_type(0, type)
388 self.index[0] = e
388 self.index[0] = e
389 else:
389 else:
390 self.nodemap = { nullid: -1}
390 self.nodemap = { nullid: -1}
391 self.index = []
391 self.index = []
392
392
393
393
394 def parseindex(self, fp, st):
394 def parseindex(self, fp, st):
395 s = struct.calcsize(self.indexformat)
395 s = struct.calcsize(self.indexformat)
396 self.index = []
396 self.index = []
397 self.nodemap = {nullid: -1}
397 self.nodemap = {nullid: -1}
398 inline = self.inlinedata()
398 inline = self.inlinedata()
399 n = 0
399 n = 0
400 leftover = None
400 leftover = None
401 while True:
401 while True:
402 if st:
402 if st:
403 data = fp.read(65536)
403 data = fp.read(65536)
404 else:
404 else:
405 # hack for httprangereader, it doesn't do partial reads well
405 # hack for httprangereader, it doesn't do partial reads well
406 data = fp.read()
406 data = fp.read()
407 if not data:
407 if not data:
408 break
408 break
409 if n == 0 and self.inlinedata():
409 if n == 0 and self.inlinedata():
410 # cache the first chunk
410 # cache the first chunk
411 self.chunkcache = (0, data)
411 self.chunkcache = (0, data)
412 if leftover:
412 if leftover:
413 data = leftover + data
413 data = leftover + data
414 leftover = None
414 leftover = None
415 off = 0
415 off = 0
416 l = len(data)
416 l = len(data)
417 while off < l:
417 while off < l:
418 if l - off < s:
418 if l - off < s:
419 leftover = data[off:]
419 leftover = data[off:]
420 break
420 break
421 cur = data[off:off + s]
421 cur = data[off:off + s]
422 off += s
422 off += s
423 e = struct.unpack(self.indexformat, cur)
423 e = struct.unpack(self.indexformat, cur)
424 self.index.append(e)
424 self.index.append(e)
425 self.nodemap[e[-1]] = n
425 self.nodemap[e[-1]] = n
426 n += 1
426 n += 1
427 if inline:
427 if inline:
428 off += e[1]
428 off += e[1]
429 if off > l:
429 if off > l:
430 # some things don't seek well, just read it
430 # some things don't seek well, just read it
431 fp.read(off - l)
431 fp.read(off - l)
432 if not st:
432 if not st:
433 break
433 break
434
434
435
435
436 def ngoffset(self, q):
436 def ngoffset(self, q):
437 if q & 0xFFFF:
437 if q & 0xFFFF:
438 raise RevlogError(_('%s: incompatible revision flag %x') %
438 raise RevlogError(_('%s: incompatible revision flag %x') %
439 (self.indexfile, q))
439 (self.indexfile, q))
440 return long(q >> 16)
440 return long(q >> 16)
441
441
442 def ngtype(self, q):
442 def ngtype(self, q):
443 return int(q & 0xFFFF)
443 return int(q & 0xFFFF)
444
444
445 def offset_type(self, offset, type):
445 def offset_type(self, offset, type):
446 return long(long(offset) << 16 | type)
446 return long(long(offset) << 16 | type)
447
447
448 def loadindex(self, start, end):
448 def loadindex(self, start, end):
449 """load a block of indexes all at once from the lazy parser"""
449 """load a block of indexes all at once from the lazy parser"""
450 if isinstance(self.index, lazyindex):
450 if isinstance(self.index, lazyindex):
451 self.index.p.loadindex(start, end)
451 self.index.p.loadindex(start, end)
452
452
453 def loadindexmap(self):
453 def loadindexmap(self):
454 """loads both the map and the index from the lazy parser"""
454 """loads both the map and the index from the lazy parser"""
455 if isinstance(self.index, lazyindex):
455 if isinstance(self.index, lazyindex):
456 p = self.index.p
456 p = self.index.p
457 p.loadindex()
457 p.loadindex()
458 self.nodemap = p.map
458 self.nodemap = p.map
459
459
460 def loadmap(self):
460 def loadmap(self):
461 """loads the map from the lazy parser"""
461 """loads the map from the lazy parser"""
462 if isinstance(self.nodemap, lazymap):
462 if isinstance(self.nodemap, lazymap):
463 self.nodemap.p.loadmap()
463 self.nodemap.p.loadmap()
464 self.nodemap = self.nodemap.p.map
464 self.nodemap = self.nodemap.p.map
465
465
466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 def tip(self): return self.node(len(self.index) - 1)
467 def tip(self): return self.node(len(self.index) - 1)
468 def count(self): return len(self.index)
468 def count(self): return len(self.index)
469 def node(self, rev):
469 def node(self, rev):
470 return (rev < 0) and nullid or self.index[rev][-1]
470 return (rev < 0) and nullid or self.index[rev][-1]
471 def rev(self, node):
471 def rev(self, node):
472 try:
472 try:
473 return self.nodemap[node]
473 return self.nodemap[node]
474 except KeyError:
474 except KeyError:
475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 def linkrev(self, node):
476 def linkrev(self, node):
477 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
477 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
478 def parents(self, node):
478 def parents(self, node):
479 if node == nullid: return (nullid, nullid)
479 if node == nullid: return (nullid, nullid)
480 r = self.rev(node)
480 r = self.rev(node)
481 d = self.index[r][-3:-1]
481 d = self.index[r][-3:-1]
482 if self.version == REVLOGV0:
482 if self.version == REVLOGV0:
483 return d
483 return d
484 return [ self.node(x) for x in d ]
484 return [ self.node(x) for x in d ]
485 def parentrevs(self, rev):
485 def parentrevs(self, rev):
486 if rev == -1:
486 if rev == -1:
487 return (-1, -1)
487 return (-1, -1)
488 d = self.index[rev][-3:-1]
488 d = self.index[rev][-3:-1]
489 if self.version == REVLOGV0:
489 if self.version == REVLOGV0:
490 return [ self.rev(x) for x in d ]
490 return [ self.rev(x) for x in d ]
491 return d
491 return d
492 def start(self, rev):
492 def start(self, rev):
493 if rev < 0:
493 if rev < 0:
494 return -1
494 return -1
495 if self.version != REVLOGV0:
495 if self.version != REVLOGV0:
496 return self.ngoffset(self.index[rev][0])
496 return self.ngoffset(self.index[rev][0])
497 return self.index[rev][0]
497 return self.index[rev][0]
498
498
499 def end(self, rev): return self.start(rev) + self.length(rev)
499 def end(self, rev): return self.start(rev) + self.length(rev)
500
500
501 def size(self, rev):
501 def size(self, rev):
502 """return the length of the uncompressed text for a given revision"""
502 """return the length of the uncompressed text for a given revision"""
503 l = -1
503 l = -1
504 if self.version != REVLOGV0:
504 if self.version != REVLOGV0:
505 l = self.index[rev][2]
505 l = self.index[rev][2]
506 if l >= 0:
506 if l >= 0:
507 return l
507 return l
508
508
509 t = self.revision(self.node(rev))
509 t = self.revision(self.node(rev))
510 return len(t)
510 return len(t)
511
511
512 # alternate implementation, The advantage to this code is it
512 # alternate implementation, The advantage to this code is it
513 # will be faster for a single revision. But, the results are not
513 # will be faster for a single revision. But, the results are not
514 # cached, so finding the size of every revision will be slower.
514 # cached, so finding the size of every revision will be slower.
515 """
515 """
516 if self.cache and self.cache[1] == rev:
516 if self.cache and self.cache[1] == rev:
517 return len(self.cache[2])
517 return len(self.cache[2])
518
518
519 base = self.base(rev)
519 base = self.base(rev)
520 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
520 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
521 base = self.cache[1]
521 base = self.cache[1]
522 text = self.cache[2]
522 text = self.cache[2]
523 else:
523 else:
524 text = self.revision(self.node(base))
524 text = self.revision(self.node(base))
525
525
526 l = len(text)
526 l = len(text)
527 for x in xrange(base + 1, rev + 1):
527 for x in xrange(base + 1, rev + 1):
528 l = mdiff.patchedsize(l, self.chunk(x))
528 l = mdiff.patchedsize(l, self.chunk(x))
529 return l
529 return l
530 """
530 """
531
531
532 def length(self, rev):
532 def length(self, rev):
533 if rev < 0:
533 if rev < 0:
534 return 0
534 return 0
535 else:
535 else:
536 return self.index[rev][1]
536 return self.index[rev][1]
537 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
537 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
538
538
539 def reachable(self, rev, stop=None):
539 def reachable(self, rev, stop=None):
540 reachable = {}
540 reachable = {}
541 visit = [rev]
541 visit = [rev]
542 reachable[rev] = 1
542 reachable[rev] = 1
543 if stop:
543 if stop:
544 stopn = self.rev(stop)
544 stopn = self.rev(stop)
545 else:
545 else:
546 stopn = 0
546 stopn = 0
547 while visit:
547 while visit:
548 n = visit.pop(0)
548 n = visit.pop(0)
549 if n == stop:
549 if n == stop:
550 continue
550 continue
551 if n == nullid:
551 if n == nullid:
552 continue
552 continue
553 for p in self.parents(n):
553 for p in self.parents(n):
554 if self.rev(p) < stopn:
554 if self.rev(p) < stopn:
555 continue
555 continue
556 if p not in reachable:
556 if p not in reachable:
557 reachable[p] = 1
557 reachable[p] = 1
558 visit.append(p)
558 visit.append(p)
559 return reachable
559 return reachable
560
560
561 def nodesbetween(self, roots=None, heads=None):
561 def nodesbetween(self, roots=None, heads=None):
562 """Return a tuple containing three elements. Elements 1 and 2 contain
562 """Return a tuple containing three elements. Elements 1 and 2 contain
563 a final list bases and heads after all the unreachable ones have been
563 a final list bases and heads after all the unreachable ones have been
564 pruned. Element 0 contains a topologically sorted list of all
564 pruned. Element 0 contains a topologically sorted list of all
565
565
566 nodes that satisfy these constraints:
566 nodes that satisfy these constraints:
567 1. All nodes must be descended from a node in roots (the nodes on
567 1. All nodes must be descended from a node in roots (the nodes on
568 roots are considered descended from themselves).
568 roots are considered descended from themselves).
569 2. All nodes must also be ancestors of a node in heads (the nodes in
569 2. All nodes must also be ancestors of a node in heads (the nodes in
570 heads are considered to be their own ancestors).
570 heads are considered to be their own ancestors).
571
571
572 If roots is unspecified, nullid is assumed as the only root.
572 If roots is unspecified, nullid is assumed as the only root.
573 If heads is unspecified, it is taken to be the output of the
573 If heads is unspecified, it is taken to be the output of the
574 heads method (i.e. a list of all nodes in the repository that
574 heads method (i.e. a list of all nodes in the repository that
575 have no children)."""
575 have no children)."""
576 nonodes = ([], [], [])
576 nonodes = ([], [], [])
577 if roots is not None:
577 if roots is not None:
578 roots = list(roots)
578 roots = list(roots)
579 if not roots:
579 if not roots:
580 return nonodes
580 return nonodes
581 lowestrev = min([self.rev(n) for n in roots])
581 lowestrev = min([self.rev(n) for n in roots])
582 else:
582 else:
583 roots = [nullid] # Everybody's a descendent of nullid
583 roots = [nullid] # Everybody's a descendent of nullid
584 lowestrev = -1
584 lowestrev = -1
585 if (lowestrev == -1) and (heads is None):
585 if (lowestrev == -1) and (heads is None):
586 # We want _all_ the nodes!
586 # We want _all_ the nodes!
587 return ([self.node(r) for r in xrange(0, self.count())],
587 return ([self.node(r) for r in xrange(0, self.count())],
588 [nullid], list(self.heads()))
588 [nullid], list(self.heads()))
589 if heads is None:
589 if heads is None:
590 # All nodes are ancestors, so the latest ancestor is the last
590 # All nodes are ancestors, so the latest ancestor is the last
591 # node.
591 # node.
592 highestrev = self.count() - 1
592 highestrev = self.count() - 1
593 # Set ancestors to None to signal that every node is an ancestor.
593 # Set ancestors to None to signal that every node is an ancestor.
594 ancestors = None
594 ancestors = None
595 # Set heads to an empty dictionary for later discovery of heads
595 # Set heads to an empty dictionary for later discovery of heads
596 heads = {}
596 heads = {}
597 else:
597 else:
598 heads = list(heads)
598 heads = list(heads)
599 if not heads:
599 if not heads:
600 return nonodes
600 return nonodes
601 ancestors = {}
601 ancestors = {}
602 # Start at the top and keep marking parents until we're done.
602 # Start at the top and keep marking parents until we're done.
603 nodestotag = heads[:]
603 nodestotag = heads[:]
604 # Turn heads into a dictionary so we can remove 'fake' heads.
604 # Turn heads into a dictionary so we can remove 'fake' heads.
605 # Also, later we will be using it to filter out the heads we can't
605 # Also, later we will be using it to filter out the heads we can't
606 # find from roots.
606 # find from roots.
607 heads = dict.fromkeys(heads, 0)
607 heads = dict.fromkeys(heads, 0)
608 # Remember where the top was so we can use it as a limit later.
608 # Remember where the top was so we can use it as a limit later.
609 highestrev = max([self.rev(n) for n in nodestotag])
609 highestrev = max([self.rev(n) for n in nodestotag])
610 while nodestotag:
610 while nodestotag:
611 # grab a node to tag
611 # grab a node to tag
612 n = nodestotag.pop()
612 n = nodestotag.pop()
613 # Never tag nullid
613 # Never tag nullid
614 if n == nullid:
614 if n == nullid:
615 continue
615 continue
616 # A node's revision number represents its place in a
616 # A node's revision number represents its place in a
617 # topologically sorted list of nodes.
617 # topologically sorted list of nodes.
618 r = self.rev(n)
618 r = self.rev(n)
619 if r >= lowestrev:
619 if r >= lowestrev:
620 if n not in ancestors:
620 if n not in ancestors:
621 # If we are possibly a descendent of one of the roots
621 # If we are possibly a descendent of one of the roots
622 # and we haven't already been marked as an ancestor
622 # and we haven't already been marked as an ancestor
623 ancestors[n] = 1 # Mark as ancestor
623 ancestors[n] = 1 # Mark as ancestor
624 # Add non-nullid parents to list of nodes to tag.
624 # Add non-nullid parents to list of nodes to tag.
625 nodestotag.extend([p for p in self.parents(n) if
625 nodestotag.extend([p for p in self.parents(n) if
626 p != nullid])
626 p != nullid])
627 elif n in heads: # We've seen it before, is it a fake head?
627 elif n in heads: # We've seen it before, is it a fake head?
628 # So it is, real heads should not be the ancestors of
628 # So it is, real heads should not be the ancestors of
629 # any other heads.
629 # any other heads.
630 heads.pop(n)
630 heads.pop(n)
631 if not ancestors:
631 if not ancestors:
632 return nonodes
632 return nonodes
633 # Now that we have our set of ancestors, we want to remove any
633 # Now that we have our set of ancestors, we want to remove any
634 # roots that are not ancestors.
634 # roots that are not ancestors.
635
635
636 # If one of the roots was nullid, everything is included anyway.
636 # If one of the roots was nullid, everything is included anyway.
637 if lowestrev > -1:
637 if lowestrev > -1:
638 # But, since we weren't, let's recompute the lowest rev to not
638 # But, since we weren't, let's recompute the lowest rev to not
639 # include roots that aren't ancestors.
639 # include roots that aren't ancestors.
640
640
641 # Filter out roots that aren't ancestors of heads
641 # Filter out roots that aren't ancestors of heads
642 roots = [n for n in roots if n in ancestors]
642 roots = [n for n in roots if n in ancestors]
643 # Recompute the lowest revision
643 # Recompute the lowest revision
644 if roots:
644 if roots:
645 lowestrev = min([self.rev(n) for n in roots])
645 lowestrev = min([self.rev(n) for n in roots])
646 else:
646 else:
647 # No more roots? Return empty list
647 # No more roots? Return empty list
648 return nonodes
648 return nonodes
649 else:
649 else:
650 # We are descending from nullid, and don't need to care about
650 # We are descending from nullid, and don't need to care about
651 # any other roots.
651 # any other roots.
652 lowestrev = -1
652 lowestrev = -1
653 roots = [nullid]
653 roots = [nullid]
654 # Transform our roots list into a 'set' (i.e. a dictionary where the
654 # Transform our roots list into a 'set' (i.e. a dictionary where the
655 # values don't matter.
655 # values don't matter.
656 descendents = dict.fromkeys(roots, 1)
656 descendents = dict.fromkeys(roots, 1)
657 # Also, keep the original roots so we can filter out roots that aren't
657 # Also, keep the original roots so we can filter out roots that aren't
658 # 'real' roots (i.e. are descended from other roots).
658 # 'real' roots (i.e. are descended from other roots).
659 roots = descendents.copy()
659 roots = descendents.copy()
660 # Our topologically sorted list of output nodes.
660 # Our topologically sorted list of output nodes.
661 orderedout = []
661 orderedout = []
662 # Don't start at nullid since we don't want nullid in our output list,
662 # Don't start at nullid since we don't want nullid in our output list,
663 # and if nullid shows up in descedents, empty parents will look like
663 # and if nullid shows up in descedents, empty parents will look like
664 # they're descendents.
664 # they're descendents.
665 for r in xrange(max(lowestrev, 0), highestrev + 1):
665 for r in xrange(max(lowestrev, 0), highestrev + 1):
666 n = self.node(r)
666 n = self.node(r)
667 isdescendent = False
667 isdescendent = False
668 if lowestrev == -1: # Everybody is a descendent of nullid
668 if lowestrev == -1: # Everybody is a descendent of nullid
669 isdescendent = True
669 isdescendent = True
670 elif n in descendents:
670 elif n in descendents:
671 # n is already a descendent
671 # n is already a descendent
672 isdescendent = True
672 isdescendent = True
673 # This check only needs to be done here because all the roots
673 # This check only needs to be done here because all the roots
674 # will start being marked is descendents before the loop.
674 # will start being marked is descendents before the loop.
675 if n in roots:
675 if n in roots:
676 # If n was a root, check if it's a 'real' root.
676 # If n was a root, check if it's a 'real' root.
677 p = tuple(self.parents(n))
677 p = tuple(self.parents(n))
678 # If any of its parents are descendents, it's not a root.
678 # If any of its parents are descendents, it's not a root.
679 if (p[0] in descendents) or (p[1] in descendents):
679 if (p[0] in descendents) or (p[1] in descendents):
680 roots.pop(n)
680 roots.pop(n)
681 else:
681 else:
682 p = tuple(self.parents(n))
682 p = tuple(self.parents(n))
683 # A node is a descendent if either of its parents are
683 # A node is a descendent if either of its parents are
684 # descendents. (We seeded the dependents list with the roots
684 # descendents. (We seeded the dependents list with the roots
685 # up there, remember?)
685 # up there, remember?)
686 if (p[0] in descendents) or (p[1] in descendents):
686 if (p[0] in descendents) or (p[1] in descendents):
687 descendents[n] = 1
687 descendents[n] = 1
688 isdescendent = True
688 isdescendent = True
689 if isdescendent and ((ancestors is None) or (n in ancestors)):
689 if isdescendent and ((ancestors is None) or (n in ancestors)):
690 # Only include nodes that are both descendents and ancestors.
690 # Only include nodes that are both descendents and ancestors.
691 orderedout.append(n)
691 orderedout.append(n)
692 if (ancestors is not None) and (n in heads):
692 if (ancestors is not None) and (n in heads):
693 # We're trying to figure out which heads are reachable
693 # We're trying to figure out which heads are reachable
694 # from roots.
694 # from roots.
695 # Mark this head as having been reached
695 # Mark this head as having been reached
696 heads[n] = 1
696 heads[n] = 1
697 elif ancestors is None:
697 elif ancestors is None:
698 # Otherwise, we're trying to discover the heads.
698 # Otherwise, we're trying to discover the heads.
699 # Assume this is a head because if it isn't, the next step
699 # Assume this is a head because if it isn't, the next step
700 # will eventually remove it.
700 # will eventually remove it.
701 heads[n] = 1
701 heads[n] = 1
702 # But, obviously its parents aren't.
702 # But, obviously its parents aren't.
703 for p in self.parents(n):
703 for p in self.parents(n):
704 heads.pop(p, None)
704 heads.pop(p, None)
705 heads = [n for n in heads.iterkeys() if heads[n] != 0]
705 heads = [n for n in heads.iterkeys() if heads[n] != 0]
706 roots = roots.keys()
706 roots = roots.keys()
707 assert orderedout
707 assert orderedout
708 assert roots
708 assert roots
709 assert heads
709 assert heads
710 return (orderedout, roots, heads)
710 return (orderedout, roots, heads)
711
711
712 def heads(self, start=None):
712 def heads(self, start=None):
713 """return the list of all nodes that have no children
713 """return the list of all nodes that have no children
714
714
715 if start is specified, only heads that are descendants of
715 if start is specified, only heads that are descendants of
716 start will be returned
716 start will be returned
717
717
718 """
718 """
719 if start is None:
719 if start is None:
720 start = nullid
720 start = nullid
721 startrev = self.rev(start)
721 startrev = self.rev(start)
722 reachable = {startrev: 1}
722 reachable = {startrev: 1}
723 heads = {startrev: 1}
723 heads = {startrev: 1}
724
724
725 parentrevs = self.parentrevs
725 parentrevs = self.parentrevs
726 for r in xrange(startrev + 1, self.count()):
726 for r in xrange(startrev + 1, self.count()):
727 for p in parentrevs(r):
727 for p in parentrevs(r):
728 if p in reachable:
728 if p in reachable:
729 reachable[r] = 1
729 reachable[r] = 1
730 heads[r] = 1
730 heads[r] = 1
731 if p in heads:
731 if p in heads:
732 del heads[p]
732 del heads[p]
733 return [self.node(r) for r in heads]
733 return [self.node(r) for r in heads]
734
734
735 def children(self, node):
735 def children(self, node):
736 """find the children of a given node"""
736 """find the children of a given node"""
737 c = []
737 c = []
738 p = self.rev(node)
738 p = self.rev(node)
739 for r in range(p + 1, self.count()):
739 for r in range(p + 1, self.count()):
740 n = self.node(r)
740 n = self.node(r)
741 for pn in self.parents(n):
741 for pn in self.parents(n):
742 if pn == node:
742 if pn == node:
743 c.append(n)
743 c.append(n)
744 continue
744 continue
745 elif pn == nullid:
745 elif pn == nullid:
746 continue
746 continue
747 return c
747 return c
748
748
749 def lookup(self, id):
749 def lookup(self, id):
750 """locate a node based on revision number or subset of hex nodeid"""
750 """locate a node based on revision number or subset of hex nodeid"""
751 if type(id) == type(0):
751 if type(id) == type(0):
752 return self.node(id)
752 return self.node(id)
753 try:
753 try:
754 rev = int(id)
754 rev = int(id)
755 if str(rev) != id: raise ValueError
755 if str(rev) != id: raise ValueError
756 if rev < 0: rev = self.count() + rev
756 if rev < 0: rev = self.count() + rev
757 if rev < 0 or rev >= self.count(): raise ValueError
757 if rev < 0 or rev >= self.count(): raise ValueError
758 return self.node(rev)
758 return self.node(rev)
759 except (ValueError, OverflowError):
759 except (ValueError, OverflowError):
760 c = []
760 c = []
761 for n in self.nodemap:
761 for n in self.nodemap:
762 if hex(n).startswith(id):
762 if hex(n).startswith(id):
763 c.append(n)
763 c.append(n)
764 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
764 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
765 if len(c) == 1: return c[0]
765 if len(c) == 1: return c[0]
766
766
767 # might need fixing if we change hash lengths
767 # might need fixing if we change hash lengths
768 if len(id) == 20 and id in self.nodemap:
768 if len(id) == 20 and id in self.nodemap:
769 return id
769 return id
770
770
771 raise RevlogError(_("No match found"))
771 raise RevlogError(_("No match found"))
772
772
773 def cmp(self, node, text):
773 def cmp(self, node, text):
774 """compare text with a given file revision"""
774 """compare text with a given file revision"""
775 p1, p2 = self.parents(node)
775 p1, p2 = self.parents(node)
776 return hash(text, p1, p2) != node
776 return hash(text, p1, p2) != node
777
777
778 def makenode(self, node, text):
778 def makenode(self, node, text):
779 """calculate a file nodeid for text, descended or possibly
779 """calculate a file nodeid for text, descended or possibly
780 unchanged from node"""
780 unchanged from node"""
781
781
782 if self.cmp(node, text):
782 if self.cmp(node, text):
783 return hash(text, node, nullid)
783 return hash(text, node, nullid)
784 return node
784 return node
785
785
786 def diff(self, a, b):
786 def diff(self, a, b):
787 """return a delta between two revisions"""
787 """return a delta between two revisions"""
788 return mdiff.textdiff(a, b)
788 return mdiff.textdiff(a, b)
789
789
790 def patches(self, t, pl):
790 def patches(self, t, pl):
791 """apply a list of patches to a string"""
791 """apply a list of patches to a string"""
792 return mdiff.patches(t, pl)
792 return mdiff.patches(t, pl)
793
793
794 def chunk(self, rev, df=None, cachelen=4096):
794 def chunk(self, rev, df=None, cachelen=4096):
795 start, length = self.start(rev), self.length(rev)
795 start, length = self.start(rev), self.length(rev)
796 inline = self.inlinedata()
796 inline = self.inlinedata()
797 if inline:
797 if inline:
798 start += (rev + 1) * struct.calcsize(self.indexformat)
798 start += (rev + 1) * struct.calcsize(self.indexformat)
799 end = start + length
799 end = start + length
800 def loadcache(df):
800 def loadcache(df):
801 cache_length = max(cachelen, length) # 4k
801 cache_length = max(cachelen, length) # 4k
802 if not df:
802 if not df:
803 if inline:
803 if inline:
804 df = self.opener(self.indexfile)
804 df = self.opener(self.indexfile)
805 else:
805 else:
806 df = self.opener(self.datafile)
806 df = self.opener(self.datafile)
807 df.seek(start)
807 df.seek(start)
808 self.chunkcache = (start, df.read(cache_length))
808 self.chunkcache = (start, df.read(cache_length))
809
809
810 if not self.chunkcache:
810 if not self.chunkcache:
811 loadcache(df)
811 loadcache(df)
812
812
813 cache_start = self.chunkcache[0]
813 cache_start = self.chunkcache[0]
814 cache_end = cache_start + len(self.chunkcache[1])
814 cache_end = cache_start + len(self.chunkcache[1])
815 if start >= cache_start and end <= cache_end:
815 if start >= cache_start and end <= cache_end:
816 # it is cached
816 # it is cached
817 offset = start - cache_start
817 offset = start - cache_start
818 else:
818 else:
819 loadcache(df)
819 loadcache(df)
820 offset = 0
820 offset = 0
821
821
822 #def checkchunk():
822 #def checkchunk():
823 # df = self.opener(self.datafile)
823 # df = self.opener(self.datafile)
824 # df.seek(start)
824 # df.seek(start)
825 # return df.read(length)
825 # return df.read(length)
826 #assert s == checkchunk()
826 #assert s == checkchunk()
827 return decompress(self.chunkcache[1][offset:offset + length])
827 return decompress(self.chunkcache[1][offset:offset + length])
828
828
829 def delta(self, node):
829 def delta(self, node):
830 """return or calculate a delta between a node and its predecessor"""
830 """return or calculate a delta between a node and its predecessor"""
831 r = self.rev(node)
831 r = self.rev(node)
832 return self.revdiff(r - 1, r)
832 return self.revdiff(r - 1, r)
833
833
834 def revdiff(self, rev1, rev2):
834 def revdiff(self, rev1, rev2):
835 """return or calculate a delta between two revisions"""
835 """return or calculate a delta between two revisions"""
836 b1 = self.base(rev1)
836 b1 = self.base(rev1)
837 b2 = self.base(rev2)
837 b2 = self.base(rev2)
838 if b1 == b2 and rev1 + 1 == rev2:
838 if b1 == b2 and rev1 + 1 == rev2:
839 return self.chunk(rev2)
839 return self.chunk(rev2)
840 else:
840 else:
841 return self.diff(self.revision(self.node(rev1)),
841 return self.diff(self.revision(self.node(rev1)),
842 self.revision(self.node(rev2)))
842 self.revision(self.node(rev2)))
843
843
844 def revision(self, node):
844 def revision(self, node):
845 """return an uncompressed revision of a given"""
845 """return an uncompressed revision of a given"""
846 if node == nullid: return ""
846 if node == nullid: return ""
847 if self.cache and self.cache[0] == node: return self.cache[2]
847 if self.cache and self.cache[0] == node: return self.cache[2]
848
848
849 # look up what we need to read
849 # look up what we need to read
850 text = None
850 text = None
851 rev = self.rev(node)
851 rev = self.rev(node)
852 base = self.base(rev)
852 base = self.base(rev)
853
853
854 if self.inlinedata():
854 if self.inlinedata():
855 # we probably have the whole chunk cached
855 # we probably have the whole chunk cached
856 df = None
856 df = None
857 else:
857 else:
858 df = self.opener(self.datafile)
858 df = self.opener(self.datafile)
859
859
860 # do we have useful data cached?
860 # do we have useful data cached?
861 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
861 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
862 base = self.cache[1]
862 base = self.cache[1]
863 text = self.cache[2]
863 text = self.cache[2]
864 self.loadindex(base, rev + 1)
864 self.loadindex(base, rev + 1)
865 else:
865 else:
866 self.loadindex(base, rev + 1)
866 self.loadindex(base, rev + 1)
867 text = self.chunk(base, df=df)
867 text = self.chunk(base, df=df)
868
868
869 bins = []
869 bins = []
870 for r in xrange(base + 1, rev + 1):
870 for r in xrange(base + 1, rev + 1):
871 bins.append(self.chunk(r, df=df))
871 bins.append(self.chunk(r, df=df))
872
872
873 text = self.patches(text, bins)
873 text = self.patches(text, bins)
874
874
875 p1, p2 = self.parents(node)
875 p1, p2 = self.parents(node)
876 if node != hash(text, p1, p2):
876 if node != hash(text, p1, p2):
877 raise RevlogError(_("integrity check failed on %s:%d")
877 raise RevlogError(_("integrity check failed on %s:%d")
878 % (self.datafile, rev))
878 % (self.datafile, rev))
879
879
880 self.cache = (node, rev, text)
880 self.cache = (node, rev, text)
881 return text
881 return text
882
882
883 def checkinlinesize(self, tr, fp=None):
883 def checkinlinesize(self, tr, fp=None):
884 if not self.inlinedata():
884 if not self.inlinedata():
885 return
885 return
886 if not fp:
886 if not fp:
887 fp = self.opener(self.indexfile, 'r')
887 fp = self.opener(self.indexfile, 'r')
888 fp.seek(0, 2)
888 fp.seek(0, 2)
889 size = fp.tell()
889 size = fp.tell()
890 if size < 131072:
890 if size < 131072:
891 return
891 return
892 trinfo = tr.find(self.indexfile)
892 trinfo = tr.find(self.indexfile)
893 if trinfo == None:
893 if trinfo == None:
894 raise RevlogError(_("%s not found in the transaction" %
894 raise RevlogError(_("%s not found in the transaction" %
895 self.indexfile))
895 self.indexfile))
896
896
897 trindex = trinfo[2]
897 trindex = trinfo[2]
898 dataoff = self.start(trindex)
898 dataoff = self.start(trindex)
899
899
900 tr.add(self.datafile, dataoff)
900 tr.add(self.datafile, dataoff)
901 df = self.opener(self.datafile, 'w')
901 df = self.opener(self.datafile, 'w')
902 calc = struct.calcsize(self.indexformat)
902 calc = struct.calcsize(self.indexformat)
903 for r in xrange(self.count()):
903 for r in xrange(self.count()):
904 start = self.start(r) + (r + 1) * calc
904 start = self.start(r) + (r + 1) * calc
905 length = self.length(r)
905 length = self.length(r)
906 fp.seek(start)
906 fp.seek(start)
907 d = fp.read(length)
907 d = fp.read(length)
908 df.write(d)
908 df.write(d)
909 fp.close()
909 fp.close()
910 df.close()
910 df.close()
911 fp = self.opener(self.indexfile, 'w', atomictemp=True)
911 fp = self.opener(self.indexfile, 'w', atomictemp=True)
912 self.version &= ~(REVLOGNGINLINEDATA)
912 self.version &= ~(REVLOGNGINLINEDATA)
913 if self.count():
913 if self.count():
914 x = self.index[0]
914 x = self.index[0]
915 e = struct.pack(self.indexformat, *x)[4:]
915 e = struct.pack(self.indexformat, *x)[4:]
916 l = struct.pack(versionformat, self.version)
916 l = struct.pack(versionformat, self.version)
917 fp.write(l)
917 fp.write(l)
918 fp.write(e)
918 fp.write(e)
919
919
920 for i in xrange(1, self.count()):
920 for i in xrange(1, self.count()):
921 x = self.index[i]
921 x = self.index[i]
922 e = struct.pack(self.indexformat, *x)
922 e = struct.pack(self.indexformat, *x)
923 fp.write(e)
923 fp.write(e)
924
924
925 # if we don't call rename, the temp file will never replace the
925 # if we don't call rename, the temp file will never replace the
926 # real index
926 # real index
927 fp.rename()
927 fp.rename()
928
928
929 tr.replace(self.indexfile, trindex * calc)
929 tr.replace(self.indexfile, trindex * calc)
930 self.chunkcache = None
930 self.chunkcache = None
931
931
932 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
932 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
933 """add a revision to the log
933 """add a revision to the log
934
934
935 text - the revision data to add
935 text - the revision data to add
936 transaction - the transaction object used for rollback
936 transaction - the transaction object used for rollback
937 link - the linkrev data to add
937 link - the linkrev data to add
938 p1, p2 - the parent nodeids of the revision
938 p1, p2 - the parent nodeids of the revision
939 d - an optional precomputed delta
939 d - an optional precomputed delta
940 """
940 """
941 if text is None: text = ""
941 if text is None: text = ""
942 if p1 is None: p1 = self.tip()
942 if p1 is None: p1 = self.tip()
943 if p2 is None: p2 = nullid
943 if p2 is None: p2 = nullid
944
944
945 node = hash(text, p1, p2)
945 node = hash(text, p1, p2)
946
946
947 if node in self.nodemap:
947 if node in self.nodemap:
948 return node
948 return node
949
949
950 n = self.count()
950 n = self.count()
951 t = n - 1
951 t = n - 1
952
952
953 if n:
953 if n:
954 base = self.base(t)
954 base = self.base(t)
955 start = self.start(base)
955 start = self.start(base)
956 end = self.end(t)
956 end = self.end(t)
957 if not d:
957 if not d:
958 prev = self.revision(self.tip())
958 prev = self.revision(self.tip())
959 d = self.diff(prev, str(text))
959 d = self.diff(prev, str(text))
960 data = compress(d)
960 data = compress(d)
961 l = len(data[1]) + len(data[0])
961 l = len(data[1]) + len(data[0])
962 dist = end - start + l
962 dist = end - start + l
963
963
964 # full versions are inserted when the needed deltas
964 # full versions are inserted when the needed deltas
965 # become comparable to the uncompressed text
965 # become comparable to the uncompressed text
966 if not n or dist > len(text) * 2:
966 if not n or dist > len(text) * 2:
967 data = compress(text)
967 data = compress(text)
968 l = len(data[1]) + len(data[0])
968 l = len(data[1]) + len(data[0])
969 base = n
969 base = n
970 else:
970 else:
971 base = self.base(t)
971 base = self.base(t)
972
972
973 offset = 0
973 offset = 0
974 if t >= 0:
974 if t >= 0:
975 offset = self.end(t)
975 offset = self.end(t)
976
976
977 if self.version == REVLOGV0:
977 if self.version == REVLOGV0:
978 e = (offset, l, base, link, p1, p2, node)
978 e = (offset, l, base, link, p1, p2, node)
979 else:
979 else:
980 e = (self.offset_type(offset, 0), l, len(text),
980 e = (self.offset_type(offset, 0), l, len(text),
981 base, link, self.rev(p1), self.rev(p2), node)
981 base, link, self.rev(p1), self.rev(p2), node)
982
982
983 self.index.append(e)
983 self.index.append(e)
984 self.nodemap[node] = n
984 self.nodemap[node] = n
985 entry = struct.pack(self.indexformat, *e)
985 entry = struct.pack(self.indexformat, *e)
986
986
987 if not self.inlinedata():
987 if not self.inlinedata():
988 transaction.add(self.datafile, offset)
988 transaction.add(self.datafile, offset)
989 transaction.add(self.indexfile, n * len(entry))
989 transaction.add(self.indexfile, n * len(entry))
990 f = self.opener(self.datafile, "a")
990 f = self.opener(self.datafile, "a")
991 if data[0]:
991 if data[0]:
992 f.write(data[0])
992 f.write(data[0])
993 f.write(data[1])
993 f.write(data[1])
994 f.close()
994 f.close()
995 f = self.opener(self.indexfile, "a")
995 f = self.opener(self.indexfile, "a")
996 else:
996 else:
997 f = self.opener(self.indexfile, "a+")
997 f = self.opener(self.indexfile, "a+")
998 f.seek(0, 2)
998 f.seek(0, 2)
999 transaction.add(self.indexfile, f.tell(), self.count() - 1)
999 transaction.add(self.indexfile, f.tell(), self.count() - 1)
1000
1000
1001 if len(self.index) == 1 and self.version != REVLOGV0:
1001 if len(self.index) == 1 and self.version != REVLOGV0:
1002 l = struct.pack(versionformat, self.version)
1002 l = struct.pack(versionformat, self.version)
1003 f.write(l)
1003 f.write(l)
1004 entry = entry[4:]
1004 entry = entry[4:]
1005
1005
1006 f.write(entry)
1006 f.write(entry)
1007
1007
1008 if self.inlinedata():
1008 if self.inlinedata():
1009 f.write(data[0])
1009 f.write(data[0])
1010 f.write(data[1])
1010 f.write(data[1])
1011 self.checkinlinesize(transaction, f)
1011 self.checkinlinesize(transaction, f)
1012
1012
1013 self.cache = (node, n, text)
1013 self.cache = (node, n, text)
1014 return node
1014 return node
1015
1015
1016 def ancestor(self, a, b):
1016 def ancestor(self, a, b):
1017 """calculate the least common ancestor of nodes a and b"""
1017 """calculate the least common ancestor of nodes a and b"""
1018
1018
1019 # start with some short cuts for the linear cases
1019 def parents(node):
1020 if a == b:
1020 return [p for p in self.parents(node) if p != nullid]
1021 return a
1022 ra = self.rev(a)
1023 rb = self.rev(b)
1024 if ra < rb:
1025 last = b
1026 first = a
1027 else:
1028 last = a
1029 first = b
1030
1031 # reachable won't include stop in the list, so we have to use a parent
1032 reachable = self.reachable(last, stop=self.parents(first)[0])
1033 if first in reachable:
1034 return first
1035
1036 # calculate the distance of every node from root
1037 dist = {nullid: 0}
1038 for i in xrange(self.count()):
1039 n = self.node(i)
1040 p1, p2 = self.parents(n)
1041 dist[n] = max(dist[p1], dist[p2]) + 1
1042
1021
1043 # traverse ancestors in order of decreasing distance from root
1022 return ancestor.ancestor(a, b, parents) or nullid
1044 def ancestors(node):
1045 # we store negative distances because heap returns smallest member
1046 h = [(-dist[node], node)]
1047 seen = {}
1048 while h:
1049 d, n = heapq.heappop(h)
1050 if n not in seen:
1051 seen[n] = 1
1052 yield (-d, n)
1053 for p in self.parents(n):
1054 heapq.heappush(h, (-dist[p], p))
1055
1056 def generations(node):
1057 sg, s = None, {}
1058 for g,n in ancestors(node):
1059 if g != sg:
1060 if sg:
1061 yield sg, s
1062 sg, s = g, {n:1}
1063 else:
1064 s[n] = 1
1065 yield sg, s
1066
1067 x = generations(a)
1068 y = generations(b)
1069 gx = x.next()
1070 gy = y.next()
1071
1072 # increment each ancestor list until it is closer to root than
1073 # the other, or they match
1074 while 1:
1075 #print "ancestor gen %s %s" % (gx[0], gy[0])
1076 if gx[0] == gy[0]:
1077 # find the intersection
1078 i = [ n for n in gx[1] if n in gy[1] ]
1079 if i:
1080 return i[0]
1081 else:
1082 #print "next"
1083 gy = y.next()
1084 gx = x.next()
1085 elif gx[0] < gy[0]:
1086 #print "next y"
1087 gy = y.next()
1088 else:
1089 #print "next x"
1090 gx = x.next()
1091
1023
1092 def group(self, nodelist, lookup, infocollect=None):
1024 def group(self, nodelist, lookup, infocollect=None):
1093 """calculate a delta group
1025 """calculate a delta group
1094
1026
1095 Given a list of changeset revs, return a set of deltas and
1027 Given a list of changeset revs, return a set of deltas and
1096 metadata corresponding to nodes. the first delta is
1028 metadata corresponding to nodes. the first delta is
1097 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1029 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1098 have this parent as it has all history before these
1030 have this parent as it has all history before these
1099 changesets. parent is parent[0]
1031 changesets. parent is parent[0]
1100 """
1032 """
1101 revs = [self.rev(n) for n in nodelist]
1033 revs = [self.rev(n) for n in nodelist]
1102
1034
1103 # if we don't have any revisions touched by these changesets, bail
1035 # if we don't have any revisions touched by these changesets, bail
1104 if not revs:
1036 if not revs:
1105 yield changegroup.closechunk()
1037 yield changegroup.closechunk()
1106 return
1038 return
1107
1039
1108 # add the parent of the first rev
1040 # add the parent of the first rev
1109 p = self.parents(self.node(revs[0]))[0]
1041 p = self.parents(self.node(revs[0]))[0]
1110 revs.insert(0, self.rev(p))
1042 revs.insert(0, self.rev(p))
1111
1043
1112 # build deltas
1044 # build deltas
1113 for d in xrange(0, len(revs) - 1):
1045 for d in xrange(0, len(revs) - 1):
1114 a, b = revs[d], revs[d + 1]
1046 a, b = revs[d], revs[d + 1]
1115 nb = self.node(b)
1047 nb = self.node(b)
1116
1048
1117 if infocollect is not None:
1049 if infocollect is not None:
1118 infocollect(nb)
1050 infocollect(nb)
1119
1051
1120 d = self.revdiff(a, b)
1052 d = self.revdiff(a, b)
1121 p = self.parents(nb)
1053 p = self.parents(nb)
1122 meta = nb + p[0] + p[1] + lookup(nb)
1054 meta = nb + p[0] + p[1] + lookup(nb)
1123 yield changegroup.genchunk("%s%s" % (meta, d))
1055 yield changegroup.genchunk("%s%s" % (meta, d))
1124
1056
1125 yield changegroup.closechunk()
1057 yield changegroup.closechunk()
1126
1058
1127 def addgroup(self, revs, linkmapper, transaction, unique=0):
1059 def addgroup(self, revs, linkmapper, transaction, unique=0):
1128 """
1060 """
1129 add a delta group
1061 add a delta group
1130
1062
1131 given a set of deltas, add them to the revision log. the
1063 given a set of deltas, add them to the revision log. the
1132 first delta is against its parent, which should be in our
1064 first delta is against its parent, which should be in our
1133 log, the rest are against the previous delta.
1065 log, the rest are against the previous delta.
1134 """
1066 """
1135
1067
1136 #track the base of the current delta log
1068 #track the base of the current delta log
1137 r = self.count()
1069 r = self.count()
1138 t = r - 1
1070 t = r - 1
1139 node = None
1071 node = None
1140
1072
1141 base = prev = -1
1073 base = prev = -1
1142 start = end = textlen = 0
1074 start = end = textlen = 0
1143 if r:
1075 if r:
1144 end = self.end(t)
1076 end = self.end(t)
1145
1077
1146 ifh = self.opener(self.indexfile, "a+")
1078 ifh = self.opener(self.indexfile, "a+")
1147 ifh.seek(0, 2)
1079 ifh.seek(0, 2)
1148 transaction.add(self.indexfile, ifh.tell(), self.count())
1080 transaction.add(self.indexfile, ifh.tell(), self.count())
1149 if self.inlinedata():
1081 if self.inlinedata():
1150 dfh = None
1082 dfh = None
1151 else:
1083 else:
1152 transaction.add(self.datafile, end)
1084 transaction.add(self.datafile, end)
1153 dfh = self.opener(self.datafile, "a")
1085 dfh = self.opener(self.datafile, "a")
1154
1086
1155 # loop through our set of deltas
1087 # loop through our set of deltas
1156 chain = None
1088 chain = None
1157 for chunk in revs:
1089 for chunk in revs:
1158 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1090 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1159 link = linkmapper(cs)
1091 link = linkmapper(cs)
1160 if node in self.nodemap:
1092 if node in self.nodemap:
1161 # this can happen if two branches make the same change
1093 # this can happen if two branches make the same change
1162 # if unique:
1094 # if unique:
1163 # raise RevlogError(_("already have %s") % hex(node[:4]))
1095 # raise RevlogError(_("already have %s") % hex(node[:4]))
1164 chain = node
1096 chain = node
1165 continue
1097 continue
1166 delta = chunk[80:]
1098 delta = chunk[80:]
1167
1099
1168 for p in (p1, p2):
1100 for p in (p1, p2):
1169 if not p in self.nodemap:
1101 if not p in self.nodemap:
1170 raise RevlogError(_("unknown parent %s") % short(p))
1102 raise RevlogError(_("unknown parent %s") % short(p))
1171
1103
1172 if not chain:
1104 if not chain:
1173 # retrieve the parent revision of the delta chain
1105 # retrieve the parent revision of the delta chain
1174 chain = p1
1106 chain = p1
1175 if not chain in self.nodemap:
1107 if not chain in self.nodemap:
1176 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1108 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1177
1109
1178 # full versions are inserted when the needed deltas become
1110 # full versions are inserted when the needed deltas become
1179 # comparable to the uncompressed text or when the previous
1111 # comparable to the uncompressed text or when the previous
1180 # version is not the one we have a delta against. We use
1112 # version is not the one we have a delta against. We use
1181 # the size of the previous full rev as a proxy for the
1113 # the size of the previous full rev as a proxy for the
1182 # current size.
1114 # current size.
1183
1115
1184 if chain == prev:
1116 if chain == prev:
1185 tempd = compress(delta)
1117 tempd = compress(delta)
1186 cdelta = tempd[0] + tempd[1]
1118 cdelta = tempd[0] + tempd[1]
1187 textlen = mdiff.patchedsize(textlen, delta)
1119 textlen = mdiff.patchedsize(textlen, delta)
1188
1120
1189 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1121 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1190 # flush our writes here so we can read it in revision
1122 # flush our writes here so we can read it in revision
1191 if dfh:
1123 if dfh:
1192 dfh.flush()
1124 dfh.flush()
1193 ifh.flush()
1125 ifh.flush()
1194 text = self.revision(chain)
1126 text = self.revision(chain)
1195 text = self.patches(text, [delta])
1127 text = self.patches(text, [delta])
1196 chk = self.addrevision(text, transaction, link, p1, p2)
1128 chk = self.addrevision(text, transaction, link, p1, p2)
1197 if chk != node:
1129 if chk != node:
1198 raise RevlogError(_("consistency error adding group"))
1130 raise RevlogError(_("consistency error adding group"))
1199 textlen = len(text)
1131 textlen = len(text)
1200 else:
1132 else:
1201 if self.version == REVLOGV0:
1133 if self.version == REVLOGV0:
1202 e = (end, len(cdelta), base, link, p1, p2, node)
1134 e = (end, len(cdelta), base, link, p1, p2, node)
1203 else:
1135 else:
1204 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1136 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1205 link, self.rev(p1), self.rev(p2), node)
1137 link, self.rev(p1), self.rev(p2), node)
1206 self.index.append(e)
1138 self.index.append(e)
1207 self.nodemap[node] = r
1139 self.nodemap[node] = r
1208 if self.inlinedata():
1140 if self.inlinedata():
1209 ifh.write(struct.pack(self.indexformat, *e))
1141 ifh.write(struct.pack(self.indexformat, *e))
1210 ifh.write(cdelta)
1142 ifh.write(cdelta)
1211 self.checkinlinesize(transaction, ifh)
1143 self.checkinlinesize(transaction, ifh)
1212 if not self.inlinedata():
1144 if not self.inlinedata():
1213 dfh = self.opener(self.datafile, "a")
1145 dfh = self.opener(self.datafile, "a")
1214 ifh = self.opener(self.indexfile, "a")
1146 ifh = self.opener(self.indexfile, "a")
1215 else:
1147 else:
1216 if not dfh:
1148 if not dfh:
1217 # addrevision switched from inline to conventional
1149 # addrevision switched from inline to conventional
1218 # reopen the index
1150 # reopen the index
1219 dfh = self.opener(self.datafile, "a")
1151 dfh = self.opener(self.datafile, "a")
1220 ifh = self.opener(self.indexfile, "a")
1152 ifh = self.opener(self.indexfile, "a")
1221 dfh.write(cdelta)
1153 dfh.write(cdelta)
1222 ifh.write(struct.pack(self.indexformat, *e))
1154 ifh.write(struct.pack(self.indexformat, *e))
1223
1155
1224 t, r, chain, prev = r, r + 1, node, node
1156 t, r, chain, prev = r, r + 1, node, node
1225 base = self.base(t)
1157 base = self.base(t)
1226 start = self.start(base)
1158 start = self.start(base)
1227 end = self.end(t)
1159 end = self.end(t)
1228
1160
1229 return node
1161 return node
1230
1162
1231 def strip(self, rev, minlink):
1163 def strip(self, rev, minlink):
1232 if self.count() == 0 or rev >= self.count():
1164 if self.count() == 0 or rev >= self.count():
1233 return
1165 return
1234
1166
1235 if isinstance(self.index, lazyindex):
1167 if isinstance(self.index, lazyindex):
1236 self.loadindexmap()
1168 self.loadindexmap()
1237
1169
1238 # When stripping away a revision, we need to make sure it
1170 # When stripping away a revision, we need to make sure it
1239 # does not actually belong to an older changeset.
1171 # does not actually belong to an older changeset.
1240 # The minlink parameter defines the oldest revision
1172 # The minlink parameter defines the oldest revision
1241 # we're allowed to strip away.
1173 # we're allowed to strip away.
1242 while minlink > self.index[rev][-4]:
1174 while minlink > self.index[rev][-4]:
1243 rev += 1
1175 rev += 1
1244 if rev >= self.count():
1176 if rev >= self.count():
1245 return
1177 return
1246
1178
1247 # first truncate the files on disk
1179 # first truncate the files on disk
1248 end = self.start(rev)
1180 end = self.start(rev)
1249 if not self.inlinedata():
1181 if not self.inlinedata():
1250 df = self.opener(self.datafile, "a")
1182 df = self.opener(self.datafile, "a")
1251 df.truncate(end)
1183 df.truncate(end)
1252 end = rev * struct.calcsize(self.indexformat)
1184 end = rev * struct.calcsize(self.indexformat)
1253 else:
1185 else:
1254 end += rev * struct.calcsize(self.indexformat)
1186 end += rev * struct.calcsize(self.indexformat)
1255
1187
1256 indexf = self.opener(self.indexfile, "a")
1188 indexf = self.opener(self.indexfile, "a")
1257 indexf.truncate(end)
1189 indexf.truncate(end)
1258
1190
1259 # then reset internal state in memory to forget those revisions
1191 # then reset internal state in memory to forget those revisions
1260 self.cache = None
1192 self.cache = None
1261 self.chunkcache = None
1193 self.chunkcache = None
1262 for x in xrange(rev, self.count()):
1194 for x in xrange(rev, self.count()):
1263 del self.nodemap[self.node(x)]
1195 del self.nodemap[self.node(x)]
1264
1196
1265 del self.index[rev:]
1197 del self.index[rev:]
1266
1198
1267 def checksize(self):
1199 def checksize(self):
1268 expected = 0
1200 expected = 0
1269 if self.count():
1201 if self.count():
1270 expected = self.end(self.count() - 1)
1202 expected = self.end(self.count() - 1)
1271
1203
1272 try:
1204 try:
1273 f = self.opener(self.datafile)
1205 f = self.opener(self.datafile)
1274 f.seek(0, 2)
1206 f.seek(0, 2)
1275 actual = f.tell()
1207 actual = f.tell()
1276 dd = actual - expected
1208 dd = actual - expected
1277 except IOError, inst:
1209 except IOError, inst:
1278 if inst.errno != errno.ENOENT:
1210 if inst.errno != errno.ENOENT:
1279 raise
1211 raise
1280 dd = 0
1212 dd = 0
1281
1213
1282 try:
1214 try:
1283 f = self.opener(self.indexfile)
1215 f = self.opener(self.indexfile)
1284 f.seek(0, 2)
1216 f.seek(0, 2)
1285 actual = f.tell()
1217 actual = f.tell()
1286 s = struct.calcsize(self.indexformat)
1218 s = struct.calcsize(self.indexformat)
1287 i = actual / s
1219 i = actual / s
1288 di = actual - (i * s)
1220 di = actual - (i * s)
1289 if self.inlinedata():
1221 if self.inlinedata():
1290 databytes = 0
1222 databytes = 0
1291 for r in xrange(self.count()):
1223 for r in xrange(self.count()):
1292 databytes += self.length(r)
1224 databytes += self.length(r)
1293 dd = 0
1225 dd = 0
1294 di = actual - self.count() * s - databytes
1226 di = actual - self.count() * s - databytes
1295 except IOError, inst:
1227 except IOError, inst:
1296 if inst.errno != errno.ENOENT:
1228 if inst.errno != errno.ENOENT:
1297 raise
1229 raise
1298 di = 0
1230 di = 0
1299
1231
1300 return (dd, di)
1232 return (dd, di)
1301
1233
1302
1234
General Comments 0
You need to be logged in to leave comments. Login now