##// END OF EJS Templates
Move cmp bits from filelog to revlog
Matt Mackall -
r2890:5df3e5cf default
parent child Browse files
Show More
@@ -1,128 +1,117 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from revlog import *
8 from revlog import *
9 from demandload import *
9 from demandload import *
10 demandload(globals(), "bdiff os")
10 demandload(globals(), "bdiff os")
11
11
12 class filelog(revlog):
12 class filelog(revlog):
13 def __init__(self, opener, path, defversion=REVLOG_DEFAULT_VERSION):
13 def __init__(self, opener, path, defversion=REVLOG_DEFAULT_VERSION):
14 revlog.__init__(self, opener,
14 revlog.__init__(self, opener,
15 os.path.join("data", self.encodedir(path + ".i")),
15 os.path.join("data", self.encodedir(path + ".i")),
16 os.path.join("data", self.encodedir(path + ".d")),
16 os.path.join("data", self.encodedir(path + ".d")),
17 defversion)
17 defversion)
18
18
19 # This avoids a collision between a file named foo and a dir named
19 # This avoids a collision between a file named foo and a dir named
20 # foo.i or foo.d
20 # foo.i or foo.d
21 def encodedir(self, path):
21 def encodedir(self, path):
22 return (path
22 return (path
23 .replace(".hg/", ".hg.hg/")
23 .replace(".hg/", ".hg.hg/")
24 .replace(".i/", ".i.hg/")
24 .replace(".i/", ".i.hg/")
25 .replace(".d/", ".d.hg/"))
25 .replace(".d/", ".d.hg/"))
26
26
27 def decodedir(self, path):
27 def decodedir(self, path):
28 return (path
28 return (path
29 .replace(".d.hg/", ".d/")
29 .replace(".d.hg/", ".d/")
30 .replace(".i.hg/", ".i/")
30 .replace(".i.hg/", ".i/")
31 .replace(".hg.hg/", ".hg/"))
31 .replace(".hg.hg/", ".hg/"))
32
32
33 def read(self, node):
33 def read(self, node):
34 t = self.revision(node)
34 t = self.revision(node)
35 if not t.startswith('\1\n'):
35 if not t.startswith('\1\n'):
36 return t
36 return t
37 s = t.index('\1\n', 2)
37 s = t.index('\1\n', 2)
38 return t[s+2:]
38 return t[s+2:]
39
39
40 def readmeta(self, node):
40 def readmeta(self, node):
41 t = self.revision(node)
41 t = self.revision(node)
42 if not t.startswith('\1\n'):
42 if not t.startswith('\1\n'):
43 return {}
43 return {}
44 s = t.index('\1\n', 2)
44 s = t.index('\1\n', 2)
45 mt = t[2:s]
45 mt = t[2:s]
46 m = {}
46 m = {}
47 for l in mt.splitlines():
47 for l in mt.splitlines():
48 k, v = l.split(": ", 1)
48 k, v = l.split(": ", 1)
49 m[k] = v
49 m[k] = v
50 return m
50 return m
51
51
52 def add(self, text, meta, transaction, link, p1=None, p2=None):
52 def add(self, text, meta, transaction, link, p1=None, p2=None):
53 if meta or text.startswith('\1\n'):
53 if meta or text.startswith('\1\n'):
54 mt = ""
54 mt = ""
55 if meta:
55 if meta:
56 mt = [ "%s: %s\n" % (k, v) for k,v in meta.items() ]
56 mt = [ "%s: %s\n" % (k, v) for k,v in meta.items() ]
57 text = "\1\n%s\1\n%s" % ("".join(mt), text)
57 text = "\1\n%s\1\n%s" % ("".join(mt), text)
58 return self.addrevision(text, transaction, link, p1, p2)
58 return self.addrevision(text, transaction, link, p1, p2)
59
59
60 def renamed(self, node):
60 def renamed(self, node):
61 if self.parents(node)[0] != nullid:
61 if self.parents(node)[0] != nullid:
62 return False
62 return False
63 m = self.readmeta(node)
63 m = self.readmeta(node)
64 if m and m.has_key("copy"):
64 if m and m.has_key("copy"):
65 return (m["copy"], bin(m["copyrev"]))
65 return (m["copy"], bin(m["copyrev"]))
66 return False
66 return False
67
67
68 def cmp(self, node, text):
68 def cmp(self, node, text):
69 """compare text with a given file revision"""
69 """compare text with a given file revision"""
70
70
71 # for renames, we have to go the slow way
71 # for renames, we have to go the slow way
72 if self.renamed(node):
72 if self.renamed(node):
73 t2 = self.read(node)
73 t2 = self.read(node)
74 return t2 == text
74 return t2 == text
75
75
76 p1, p2 = self.parents(node)
76 return revlog.cmp(self, node, text)
77 h = hash(text, p1, p2)
78
79 return h != node
80
81 def makenode(self, node, text):
82 """calculate a file nodeid for text, descended or possibly
83 unchanged from node"""
84
85 if self.cmp(node, text):
86 return hash(text, node, nullid)
87 return node
88
77
89 def annotate(self, node):
78 def annotate(self, node):
90
79
91 def decorate(text, rev):
80 def decorate(text, rev):
92 return ([rev] * len(text.splitlines()), text)
81 return ([rev] * len(text.splitlines()), text)
93
82
94 def pair(parent, child):
83 def pair(parent, child):
95 for a1, a2, b1, b2 in bdiff.blocks(parent[1], child[1]):
84 for a1, a2, b1, b2 in bdiff.blocks(parent[1], child[1]):
96 child[0][b1:b2] = parent[0][a1:a2]
85 child[0][b1:b2] = parent[0][a1:a2]
97 return child
86 return child
98
87
99 # find all ancestors
88 # find all ancestors
100 needed = {node:1}
89 needed = {node:1}
101 visit = [node]
90 visit = [node]
102 while visit:
91 while visit:
103 n = visit.pop(0)
92 n = visit.pop(0)
104 for p in self.parents(n):
93 for p in self.parents(n):
105 if p not in needed:
94 if p not in needed:
106 needed[p] = 1
95 needed[p] = 1
107 visit.append(p)
96 visit.append(p)
108 else:
97 else:
109 # count how many times we'll use this
98 # count how many times we'll use this
110 needed[p] += 1
99 needed[p] += 1
111
100
112 # sort by revision which is a topological order
101 # sort by revision which is a topological order
113 visit = [ (self.rev(n), n) for n in needed.keys() ]
102 visit = [ (self.rev(n), n) for n in needed.keys() ]
114 visit.sort()
103 visit.sort()
115 hist = {}
104 hist = {}
116
105
117 for r,n in visit:
106 for r,n in visit:
118 curr = decorate(self.read(n), self.linkrev(n))
107 curr = decorate(self.read(n), self.linkrev(n))
119 for p in self.parents(n):
108 for p in self.parents(n):
120 if p != nullid:
109 if p != nullid:
121 curr = pair(hist[p], curr)
110 curr = pair(hist[p], curr)
122 # trim the history of unneeded revs
111 # trim the history of unneeded revs
123 needed[p] -= 1
112 needed[p] -= 1
124 if not needed[p]:
113 if not needed[p]:
125 del hist[p]
114 del hist[p]
126 hist[n] = curr
115 hist[n] = curr
127
116
128 return zip(hist[n][0], hist[n][1].splitlines(1))
117 return zip(hist[n][0], hist[n][1].splitlines(1))
@@ -1,1285 +1,1298 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import gettext as _
14 from i18n import gettext as _
15 from demandload import demandload
15 from demandload import demandload
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 demandload(globals(), "sha struct util zlib")
17 demandload(globals(), "sha struct util zlib")
18
18
19 # revlog version strings
19 # revlog version strings
20 REVLOGV0 = 0
20 REVLOGV0 = 0
21 REVLOGNG = 1
21 REVLOGNG = 1
22
22
23 # revlog flags
23 # revlog flags
24 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOGNGINLINEDATA = (1 << 16)
25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26
26
27 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29
29
30 def flagstr(flag):
30 def flagstr(flag):
31 if flag == "inline":
31 if flag == "inline":
32 return REVLOGNGINLINEDATA
32 return REVLOGNGINLINEDATA
33 raise RevlogError(_("unknown revlog flag %s" % flag))
33 raise RevlogError(_("unknown revlog flag %s" % flag))
34
34
35 def hash(text, p1, p2):
35 def hash(text, p1, p2):
36 """generate a hash from the given text and its parent hashes
36 """generate a hash from the given text and its parent hashes
37
37
38 This hash combines both the current file contents and its history
38 This hash combines both the current file contents and its history
39 in a manner that makes it easy to distinguish nodes with the same
39 in a manner that makes it easy to distinguish nodes with the same
40 content in the revision graph.
40 content in the revision graph.
41 """
41 """
42 l = [p1, p2]
42 l = [p1, p2]
43 l.sort()
43 l.sort()
44 s = sha.new(l[0])
44 s = sha.new(l[0])
45 s.update(l[1])
45 s.update(l[1])
46 s.update(text)
46 s.update(text)
47 return s.digest()
47 return s.digest()
48
48
49 def compress(text):
49 def compress(text):
50 """ generate a possibly-compressed representation of text """
50 """ generate a possibly-compressed representation of text """
51 if not text: return ("", text)
51 if not text: return ("", text)
52 if len(text) < 44:
52 if len(text) < 44:
53 if text[0] == '\0': return ("", text)
53 if text[0] == '\0': return ("", text)
54 return ('u', text)
54 return ('u', text)
55 bin = zlib.compress(text)
55 bin = zlib.compress(text)
56 if len(bin) > len(text):
56 if len(bin) > len(text):
57 if text[0] == '\0': return ("", text)
57 if text[0] == '\0': return ("", text)
58 return ('u', text)
58 return ('u', text)
59 return ("", bin)
59 return ("", bin)
60
60
61 def decompress(bin):
61 def decompress(bin):
62 """ decompress the given input """
62 """ decompress the given input """
63 if not bin: return bin
63 if not bin: return bin
64 t = bin[0]
64 t = bin[0]
65 if t == '\0': return bin
65 if t == '\0': return bin
66 if t == 'x': return zlib.decompress(bin)
66 if t == 'x': return zlib.decompress(bin)
67 if t == 'u': return bin[1:]
67 if t == 'u': return bin[1:]
68 raise RevlogError(_("unknown compression type %r") % t)
68 raise RevlogError(_("unknown compression type %r") % t)
69
69
70 indexformatv0 = ">4l20s20s20s"
70 indexformatv0 = ">4l20s20s20s"
71 v0shaoffset = 56
71 v0shaoffset = 56
72 # index ng:
72 # index ng:
73 # 6 bytes offset
73 # 6 bytes offset
74 # 2 bytes flags
74 # 2 bytes flags
75 # 4 bytes compressed length
75 # 4 bytes compressed length
76 # 4 bytes uncompressed length
76 # 4 bytes uncompressed length
77 # 4 bytes: base rev
77 # 4 bytes: base rev
78 # 4 bytes link rev
78 # 4 bytes link rev
79 # 4 bytes parent 1 rev
79 # 4 bytes parent 1 rev
80 # 4 bytes parent 2 rev
80 # 4 bytes parent 2 rev
81 # 32 bytes: nodeid
81 # 32 bytes: nodeid
82 indexformatng = ">Qiiiiii20s12x"
82 indexformatng = ">Qiiiiii20s12x"
83 ngshaoffset = 32
83 ngshaoffset = 32
84 versionformat = ">i"
84 versionformat = ">i"
85
85
86 class lazyparser(object):
86 class lazyparser(object):
87 """
87 """
88 this class avoids the need to parse the entirety of large indices
88 this class avoids the need to parse the entirety of large indices
89 """
89 """
90
90
91 # lazyparser is not safe to use on windows if win32 extensions not
91 # lazyparser is not safe to use on windows if win32 extensions not
92 # available. it keeps file handle open, which make it not possible
92 # available. it keeps file handle open, which make it not possible
93 # to break hardlinks on local cloned repos.
93 # to break hardlinks on local cloned repos.
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 hasattr(util, 'win32api'))
95 hasattr(util, 'win32api'))
96
96
97 def __init__(self, dataf, size, indexformat, shaoffset):
97 def __init__(self, dataf, size, indexformat, shaoffset):
98 self.dataf = dataf
98 self.dataf = dataf
99 self.format = indexformat
99 self.format = indexformat
100 self.s = struct.calcsize(indexformat)
100 self.s = struct.calcsize(indexformat)
101 self.indexformat = indexformat
101 self.indexformat = indexformat
102 self.datasize = size
102 self.datasize = size
103 self.l = size/self.s
103 self.l = size/self.s
104 self.index = [None] * self.l
104 self.index = [None] * self.l
105 self.map = {nullid: -1}
105 self.map = {nullid: -1}
106 self.allmap = 0
106 self.allmap = 0
107 self.all = 0
107 self.all = 0
108 self.mapfind_count = 0
108 self.mapfind_count = 0
109 self.shaoffset = shaoffset
109 self.shaoffset = shaoffset
110
110
111 def loadmap(self):
111 def loadmap(self):
112 """
112 """
113 during a commit, we need to make sure the rev being added is
113 during a commit, we need to make sure the rev being added is
114 not a duplicate. This requires loading the entire index,
114 not a duplicate. This requires loading the entire index,
115 which is fairly slow. loadmap can load up just the node map,
115 which is fairly slow. loadmap can load up just the node map,
116 which takes much less time.
116 which takes much less time.
117 """
117 """
118 if self.allmap: return
118 if self.allmap: return
119 start = 0
119 start = 0
120 end = self.datasize
120 end = self.datasize
121 self.allmap = 1
121 self.allmap = 1
122 cur = 0
122 cur = 0
123 count = 0
123 count = 0
124 blocksize = self.s * 256
124 blocksize = self.s * 256
125 self.dataf.seek(0)
125 self.dataf.seek(0)
126 while cur < end:
126 while cur < end:
127 data = self.dataf.read(blocksize)
127 data = self.dataf.read(blocksize)
128 off = 0
128 off = 0
129 for x in xrange(256):
129 for x in xrange(256):
130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 self.map[n] = count
131 self.map[n] = count
132 count += 1
132 count += 1
133 if count >= self.l:
133 if count >= self.l:
134 break
134 break
135 off += self.s
135 off += self.s
136 cur += blocksize
136 cur += blocksize
137
137
138 def loadblock(self, blockstart, blocksize, data=None):
138 def loadblock(self, blockstart, blocksize, data=None):
139 if self.all: return
139 if self.all: return
140 if data is None:
140 if data is None:
141 self.dataf.seek(blockstart)
141 self.dataf.seek(blockstart)
142 data = self.dataf.read(blocksize)
142 data = self.dataf.read(blocksize)
143 lend = len(data) / self.s
143 lend = len(data) / self.s
144 i = blockstart / self.s
144 i = blockstart / self.s
145 off = 0
145 off = 0
146 for x in xrange(lend):
146 for x in xrange(lend):
147 if self.index[i + x] == None:
147 if self.index[i + x] == None:
148 b = data[off : off + self.s]
148 b = data[off : off + self.s]
149 self.index[i + x] = b
149 self.index[i + x] = b
150 n = b[self.shaoffset:self.shaoffset + 20]
150 n = b[self.shaoffset:self.shaoffset + 20]
151 self.map[n] = i + x
151 self.map[n] = i + x
152 off += self.s
152 off += self.s
153
153
154 def findnode(self, node):
154 def findnode(self, node):
155 """search backwards through the index file for a specific node"""
155 """search backwards through the index file for a specific node"""
156 if self.allmap: return None
156 if self.allmap: return None
157
157
158 # hg log will cause many many searches for the manifest
158 # hg log will cause many many searches for the manifest
159 # nodes. After we get called a few times, just load the whole
159 # nodes. After we get called a few times, just load the whole
160 # thing.
160 # thing.
161 if self.mapfind_count > 8:
161 if self.mapfind_count > 8:
162 self.loadmap()
162 self.loadmap()
163 if node in self.map:
163 if node in self.map:
164 return node
164 return node
165 return None
165 return None
166 self.mapfind_count += 1
166 self.mapfind_count += 1
167 last = self.l - 1
167 last = self.l - 1
168 while self.index[last] != None:
168 while self.index[last] != None:
169 if last == 0:
169 if last == 0:
170 self.all = 1
170 self.all = 1
171 self.allmap = 1
171 self.allmap = 1
172 return None
172 return None
173 last -= 1
173 last -= 1
174 end = (last + 1) * self.s
174 end = (last + 1) * self.s
175 blocksize = self.s * 256
175 blocksize = self.s * 256
176 while end >= 0:
176 while end >= 0:
177 start = max(end - blocksize, 0)
177 start = max(end - blocksize, 0)
178 self.dataf.seek(start)
178 self.dataf.seek(start)
179 data = self.dataf.read(end - start)
179 data = self.dataf.read(end - start)
180 findend = end - start
180 findend = end - start
181 while True:
181 while True:
182 # we're searching backwards, so weh have to make sure
182 # we're searching backwards, so weh have to make sure
183 # we don't find a changeset where this node is a parent
183 # we don't find a changeset where this node is a parent
184 off = data.rfind(node, 0, findend)
184 off = data.rfind(node, 0, findend)
185 findend = off
185 findend = off
186 if off >= 0:
186 if off >= 0:
187 i = off / self.s
187 i = off / self.s
188 off = i * self.s
188 off = i * self.s
189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
189 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 if n == node:
190 if n == node:
191 self.map[n] = i + start / self.s
191 self.map[n] = i + start / self.s
192 return node
192 return node
193 else:
193 else:
194 break
194 break
195 end -= blocksize
195 end -= blocksize
196 return None
196 return None
197
197
198 def loadindex(self, i=None, end=None):
198 def loadindex(self, i=None, end=None):
199 if self.all: return
199 if self.all: return
200 all = False
200 all = False
201 if i == None:
201 if i == None:
202 blockstart = 0
202 blockstart = 0
203 blocksize = (512 / self.s) * self.s
203 blocksize = (512 / self.s) * self.s
204 end = self.datasize
204 end = self.datasize
205 all = True
205 all = True
206 else:
206 else:
207 if end:
207 if end:
208 blockstart = i * self.s
208 blockstart = i * self.s
209 end = end * self.s
209 end = end * self.s
210 blocksize = end - blockstart
210 blocksize = end - blockstart
211 else:
211 else:
212 blockstart = (i & ~(32)) * self.s
212 blockstart = (i & ~(32)) * self.s
213 blocksize = self.s * 64
213 blocksize = self.s * 64
214 end = blockstart + blocksize
214 end = blockstart + blocksize
215 while blockstart < end:
215 while blockstart < end:
216 self.loadblock(blockstart, blocksize)
216 self.loadblock(blockstart, blocksize)
217 blockstart += blocksize
217 blockstart += blocksize
218 if all: self.all = True
218 if all: self.all = True
219
219
220 class lazyindex(object):
220 class lazyindex(object):
221 """a lazy version of the index array"""
221 """a lazy version of the index array"""
222 def __init__(self, parser):
222 def __init__(self, parser):
223 self.p = parser
223 self.p = parser
224 def __len__(self):
224 def __len__(self):
225 return len(self.p.index)
225 return len(self.p.index)
226 def load(self, pos):
226 def load(self, pos):
227 if pos < 0:
227 if pos < 0:
228 pos += len(self.p.index)
228 pos += len(self.p.index)
229 self.p.loadindex(pos)
229 self.p.loadindex(pos)
230 return self.p.index[pos]
230 return self.p.index[pos]
231 def __getitem__(self, pos):
231 def __getitem__(self, pos):
232 ret = self.p.index[pos] or self.load(pos)
232 ret = self.p.index[pos] or self.load(pos)
233 if isinstance(ret, str):
233 if isinstance(ret, str):
234 ret = struct.unpack(self.p.indexformat, ret)
234 ret = struct.unpack(self.p.indexformat, ret)
235 return ret
235 return ret
236 def __setitem__(self, pos, item):
236 def __setitem__(self, pos, item):
237 self.p.index[pos] = item
237 self.p.index[pos] = item
238 def __delitem__(self, pos):
238 def __delitem__(self, pos):
239 del self.p.index[pos]
239 del self.p.index[pos]
240 def append(self, e):
240 def append(self, e):
241 self.p.index.append(e)
241 self.p.index.append(e)
242
242
243 class lazymap(object):
243 class lazymap(object):
244 """a lazy version of the node map"""
244 """a lazy version of the node map"""
245 def __init__(self, parser):
245 def __init__(self, parser):
246 self.p = parser
246 self.p = parser
247 def load(self, key):
247 def load(self, key):
248 n = self.p.findnode(key)
248 n = self.p.findnode(key)
249 if n == None:
249 if n == None:
250 raise KeyError(key)
250 raise KeyError(key)
251 def __contains__(self, key):
251 def __contains__(self, key):
252 if key in self.p.map:
252 if key in self.p.map:
253 return True
253 return True
254 self.p.loadmap()
254 self.p.loadmap()
255 return key in self.p.map
255 return key in self.p.map
256 def __iter__(self):
256 def __iter__(self):
257 yield nullid
257 yield nullid
258 for i in xrange(self.p.l):
258 for i in xrange(self.p.l):
259 ret = self.p.index[i]
259 ret = self.p.index[i]
260 if not ret:
260 if not ret:
261 self.p.loadindex(i)
261 self.p.loadindex(i)
262 ret = self.p.index[i]
262 ret = self.p.index[i]
263 if isinstance(ret, str):
263 if isinstance(ret, str):
264 ret = struct.unpack(self.p.indexformat, ret)
264 ret = struct.unpack(self.p.indexformat, ret)
265 yield ret[-1]
265 yield ret[-1]
266 def __getitem__(self, key):
266 def __getitem__(self, key):
267 try:
267 try:
268 return self.p.map[key]
268 return self.p.map[key]
269 except KeyError:
269 except KeyError:
270 try:
270 try:
271 self.load(key)
271 self.load(key)
272 return self.p.map[key]
272 return self.p.map[key]
273 except KeyError:
273 except KeyError:
274 raise KeyError("node " + hex(key))
274 raise KeyError("node " + hex(key))
275 def __setitem__(self, key, val):
275 def __setitem__(self, key, val):
276 self.p.map[key] = val
276 self.p.map[key] = val
277 def __delitem__(self, key):
277 def __delitem__(self, key):
278 del self.p.map[key]
278 del self.p.map[key]
279
279
280 class RevlogError(Exception): pass
280 class RevlogError(Exception): pass
281
281
282 class revlog(object):
282 class revlog(object):
283 """
283 """
284 the underlying revision storage object
284 the underlying revision storage object
285
285
286 A revlog consists of two parts, an index and the revision data.
286 A revlog consists of two parts, an index and the revision data.
287
287
288 The index is a file with a fixed record size containing
288 The index is a file with a fixed record size containing
289 information on each revision, includings its nodeid (hash), the
289 information on each revision, includings its nodeid (hash), the
290 nodeids of its parents, the position and offset of its data within
290 nodeids of its parents, the position and offset of its data within
291 the data file, and the revision it's based on. Finally, each entry
291 the data file, and the revision it's based on. Finally, each entry
292 contains a linkrev entry that can serve as a pointer to external
292 contains a linkrev entry that can serve as a pointer to external
293 data.
293 data.
294
294
295 The revision data itself is a linear collection of data chunks.
295 The revision data itself is a linear collection of data chunks.
296 Each chunk represents a revision and is usually represented as a
296 Each chunk represents a revision and is usually represented as a
297 delta against the previous chunk. To bound lookup time, runs of
297 delta against the previous chunk. To bound lookup time, runs of
298 deltas are limited to about 2 times the length of the original
298 deltas are limited to about 2 times the length of the original
299 version data. This makes retrieval of a version proportional to
299 version data. This makes retrieval of a version proportional to
300 its size, or O(1) relative to the number of revisions.
300 its size, or O(1) relative to the number of revisions.
301
301
302 Both pieces of the revlog are written to in an append-only
302 Both pieces of the revlog are written to in an append-only
303 fashion, which means we never need to rewrite a file to insert or
303 fashion, which means we never need to rewrite a file to insert or
304 remove data, and can use some simple techniques to avoid the need
304 remove data, and can use some simple techniques to avoid the need
305 for locking while reading.
305 for locking while reading.
306 """
306 """
307 def __init__(self, opener, indexfile, datafile,
307 def __init__(self, opener, indexfile, datafile,
308 defversion=REVLOG_DEFAULT_VERSION):
308 defversion=REVLOG_DEFAULT_VERSION):
309 """
309 """
310 create a revlog object
310 create a revlog object
311
311
312 opener is a function that abstracts the file opening operation
312 opener is a function that abstracts the file opening operation
313 and can be used to implement COW semantics or the like.
313 and can be used to implement COW semantics or the like.
314 """
314 """
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile
316 self.datafile = datafile
317 self.opener = opener
317 self.opener = opener
318
318
319 self.indexstat = None
319 self.indexstat = None
320 self.cache = None
320 self.cache = None
321 self.chunkcache = None
321 self.chunkcache = None
322 self.defversion = defversion
322 self.defversion = defversion
323 self.load()
323 self.load()
324
324
325 def load(self):
325 def load(self):
326 v = self.defversion
326 v = self.defversion
327 try:
327 try:
328 f = self.opener(self.indexfile)
328 f = self.opener(self.indexfile)
329 i = f.read(4)
329 i = f.read(4)
330 f.seek(0)
330 f.seek(0)
331 except IOError, inst:
331 except IOError, inst:
332 if inst.errno != errno.ENOENT:
332 if inst.errno != errno.ENOENT:
333 raise
333 raise
334 i = ""
334 i = ""
335 else:
335 else:
336 try:
336 try:
337 st = util.fstat(f)
337 st = util.fstat(f)
338 except AttributeError, inst:
338 except AttributeError, inst:
339 st = None
339 st = None
340 else:
340 else:
341 oldst = self.indexstat
341 oldst = self.indexstat
342 if (oldst and st.st_dev == oldst.st_dev
342 if (oldst and st.st_dev == oldst.st_dev
343 and st.st_ino == oldst.st_ino
343 and st.st_ino == oldst.st_ino
344 and st.st_mtime == oldst.st_mtime
344 and st.st_mtime == oldst.st_mtime
345 and st.st_ctime == oldst.st_ctime):
345 and st.st_ctime == oldst.st_ctime):
346 return
346 return
347 self.indexstat = st
347 self.indexstat = st
348 if len(i) > 0:
348 if len(i) > 0:
349 v = struct.unpack(versionformat, i)[0]
349 v = struct.unpack(versionformat, i)[0]
350 flags = v & ~0xFFFF
350 flags = v & ~0xFFFF
351 fmt = v & 0xFFFF
351 fmt = v & 0xFFFF
352 if fmt == REVLOGV0:
352 if fmt == REVLOGV0:
353 if flags:
353 if flags:
354 raise RevlogError(_("index %s invalid flags %x for format v0" %
354 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 (self.indexfile, flags)))
355 (self.indexfile, flags)))
356 elif fmt == REVLOGNG:
356 elif fmt == REVLOGNG:
357 if flags & ~REVLOGNGINLINEDATA:
357 if flags & ~REVLOGNGINLINEDATA:
358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
358 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 (self.indexfile, flags)))
359 (self.indexfile, flags)))
360 else:
360 else:
361 raise RevlogError(_("index %s invalid format %d" %
361 raise RevlogError(_("index %s invalid format %d" %
362 (self.indexfile, fmt)))
362 (self.indexfile, fmt)))
363 self.version = v
363 self.version = v
364 if v == REVLOGV0:
364 if v == REVLOGV0:
365 self.indexformat = indexformatv0
365 self.indexformat = indexformatv0
366 shaoffset = v0shaoffset
366 shaoffset = v0shaoffset
367 else:
367 else:
368 self.indexformat = indexformatng
368 self.indexformat = indexformatng
369 shaoffset = ngshaoffset
369 shaoffset = ngshaoffset
370
370
371 if i:
371 if i:
372 if (lazyparser.safe_to_use and not self.inlinedata() and
372 if (lazyparser.safe_to_use and not self.inlinedata() and
373 st and st.st_size > 10000):
373 st and st.st_size > 10000):
374 # big index, let's parse it on demand
374 # big index, let's parse it on demand
375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
375 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 self.index = lazyindex(parser)
376 self.index = lazyindex(parser)
377 self.nodemap = lazymap(parser)
377 self.nodemap = lazymap(parser)
378 else:
378 else:
379 self.parseindex(f, st)
379 self.parseindex(f, st)
380 if self.version != REVLOGV0:
380 if self.version != REVLOGV0:
381 e = list(self.index[0])
381 e = list(self.index[0])
382 type = self.ngtype(e[0])
382 type = self.ngtype(e[0])
383 e[0] = self.offset_type(0, type)
383 e[0] = self.offset_type(0, type)
384 self.index[0] = e
384 self.index[0] = e
385 else:
385 else:
386 self.nodemap = { nullid: -1}
386 self.nodemap = { nullid: -1}
387 self.index = []
387 self.index = []
388
388
389
389
390 def parseindex(self, fp, st):
390 def parseindex(self, fp, st):
391 s = struct.calcsize(self.indexformat)
391 s = struct.calcsize(self.indexformat)
392 self.index = []
392 self.index = []
393 self.nodemap = {nullid: -1}
393 self.nodemap = {nullid: -1}
394 inline = self.inlinedata()
394 inline = self.inlinedata()
395 n = 0
395 n = 0
396 leftover = None
396 leftover = None
397 while True:
397 while True:
398 if st:
398 if st:
399 data = fp.read(65536)
399 data = fp.read(65536)
400 else:
400 else:
401 # hack for httprangereader, it doesn't do partial reads well
401 # hack for httprangereader, it doesn't do partial reads well
402 data = fp.read()
402 data = fp.read()
403 if not data:
403 if not data:
404 break
404 break
405 if n == 0 and self.inlinedata():
405 if n == 0 and self.inlinedata():
406 # cache the first chunk
406 # cache the first chunk
407 self.chunkcache = (0, data)
407 self.chunkcache = (0, data)
408 if leftover:
408 if leftover:
409 data = leftover + data
409 data = leftover + data
410 leftover = None
410 leftover = None
411 off = 0
411 off = 0
412 l = len(data)
412 l = len(data)
413 while off < l:
413 while off < l:
414 if l - off < s:
414 if l - off < s:
415 leftover = data[off:]
415 leftover = data[off:]
416 break
416 break
417 cur = data[off:off + s]
417 cur = data[off:off + s]
418 off += s
418 off += s
419 e = struct.unpack(self.indexformat, cur)
419 e = struct.unpack(self.indexformat, cur)
420 self.index.append(e)
420 self.index.append(e)
421 self.nodemap[e[-1]] = n
421 self.nodemap[e[-1]] = n
422 n += 1
422 n += 1
423 if inline:
423 if inline:
424 off += e[1]
424 off += e[1]
425 if off > l:
425 if off > l:
426 # some things don't seek well, just read it
426 # some things don't seek well, just read it
427 fp.read(off - l)
427 fp.read(off - l)
428 if not st:
428 if not st:
429 break
429 break
430
430
431
431
432 def ngoffset(self, q):
432 def ngoffset(self, q):
433 if q & 0xFFFF:
433 if q & 0xFFFF:
434 raise RevlogError(_('%s: incompatible revision flag %x') %
434 raise RevlogError(_('%s: incompatible revision flag %x') %
435 (self.indexfile, q))
435 (self.indexfile, q))
436 return long(q >> 16)
436 return long(q >> 16)
437
437
438 def ngtype(self, q):
438 def ngtype(self, q):
439 return int(q & 0xFFFF)
439 return int(q & 0xFFFF)
440
440
441 def offset_type(self, offset, type):
441 def offset_type(self, offset, type):
442 return long(long(offset) << 16 | type)
442 return long(long(offset) << 16 | type)
443
443
444 def loadindex(self, start, end):
444 def loadindex(self, start, end):
445 """load a block of indexes all at once from the lazy parser"""
445 """load a block of indexes all at once from the lazy parser"""
446 if isinstance(self.index, lazyindex):
446 if isinstance(self.index, lazyindex):
447 self.index.p.loadindex(start, end)
447 self.index.p.loadindex(start, end)
448
448
449 def loadindexmap(self):
449 def loadindexmap(self):
450 """loads both the map and the index from the lazy parser"""
450 """loads both the map and the index from the lazy parser"""
451 if isinstance(self.index, lazyindex):
451 if isinstance(self.index, lazyindex):
452 p = self.index.p
452 p = self.index.p
453 p.loadindex()
453 p.loadindex()
454 self.nodemap = p.map
454 self.nodemap = p.map
455
455
456 def loadmap(self):
456 def loadmap(self):
457 """loads the map from the lazy parser"""
457 """loads the map from the lazy parser"""
458 if isinstance(self.nodemap, lazymap):
458 if isinstance(self.nodemap, lazymap):
459 self.nodemap.p.loadmap()
459 self.nodemap.p.loadmap()
460 self.nodemap = self.nodemap.p.map
460 self.nodemap = self.nodemap.p.map
461
461
462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
462 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 def tip(self): return self.node(len(self.index) - 1)
463 def tip(self): return self.node(len(self.index) - 1)
464 def count(self): return len(self.index)
464 def count(self): return len(self.index)
465 def node(self, rev):
465 def node(self, rev):
466 return (rev < 0) and nullid or self.index[rev][-1]
466 return (rev < 0) and nullid or self.index[rev][-1]
467 def rev(self, node):
467 def rev(self, node):
468 try:
468 try:
469 return self.nodemap[node]
469 return self.nodemap[node]
470 except KeyError:
470 except KeyError:
471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
471 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 def linkrev(self, node):
472 def linkrev(self, node):
473 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
473 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
474 def parents(self, node):
474 def parents(self, node):
475 if node == nullid: return (nullid, nullid)
475 if node == nullid: return (nullid, nullid)
476 r = self.rev(node)
476 r = self.rev(node)
477 d = self.index[r][-3:-1]
477 d = self.index[r][-3:-1]
478 if self.version == REVLOGV0:
478 if self.version == REVLOGV0:
479 return d
479 return d
480 return [ self.node(x) for x in d ]
480 return [ self.node(x) for x in d ]
481 def parentrevs(self, rev):
481 def parentrevs(self, rev):
482 if rev == -1:
482 if rev == -1:
483 return (-1, -1)
483 return (-1, -1)
484 d = self.index[rev][-3:-1]
484 d = self.index[rev][-3:-1]
485 if self.version == REVLOGV0:
485 if self.version == REVLOGV0:
486 return [ self.rev(x) for x in d ]
486 return [ self.rev(x) for x in d ]
487 return d
487 return d
488 def start(self, rev):
488 def start(self, rev):
489 if rev < 0:
489 if rev < 0:
490 return -1
490 return -1
491 if self.version != REVLOGV0:
491 if self.version != REVLOGV0:
492 return self.ngoffset(self.index[rev][0])
492 return self.ngoffset(self.index[rev][0])
493 return self.index[rev][0]
493 return self.index[rev][0]
494
494
495 def end(self, rev): return self.start(rev) + self.length(rev)
495 def end(self, rev): return self.start(rev) + self.length(rev)
496
496
497 def size(self, rev):
497 def size(self, rev):
498 """return the length of the uncompressed text for a given revision"""
498 """return the length of the uncompressed text for a given revision"""
499 l = -1
499 l = -1
500 if self.version != REVLOGV0:
500 if self.version != REVLOGV0:
501 l = self.index[rev][2]
501 l = self.index[rev][2]
502 if l >= 0:
502 if l >= 0:
503 return l
503 return l
504
504
505 t = self.revision(self.node(rev))
505 t = self.revision(self.node(rev))
506 return len(t)
506 return len(t)
507
507
508 # alternate implementation, The advantage to this code is it
508 # alternate implementation, The advantage to this code is it
509 # will be faster for a single revision. But, the results are not
509 # will be faster for a single revision. But, the results are not
510 # cached, so finding the size of every revision will be slower.
510 # cached, so finding the size of every revision will be slower.
511 """
511 """
512 if self.cache and self.cache[1] == rev:
512 if self.cache and self.cache[1] == rev:
513 return len(self.cache[2])
513 return len(self.cache[2])
514
514
515 base = self.base(rev)
515 base = self.base(rev)
516 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
516 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
517 base = self.cache[1]
517 base = self.cache[1]
518 text = self.cache[2]
518 text = self.cache[2]
519 else:
519 else:
520 text = self.revision(self.node(base))
520 text = self.revision(self.node(base))
521
521
522 l = len(text)
522 l = len(text)
523 for x in xrange(base + 1, rev + 1):
523 for x in xrange(base + 1, rev + 1):
524 l = mdiff.patchedsize(l, self.chunk(x))
524 l = mdiff.patchedsize(l, self.chunk(x))
525 return l
525 return l
526 """
526 """
527
527
528 def length(self, rev):
528 def length(self, rev):
529 if rev < 0:
529 if rev < 0:
530 return 0
530 return 0
531 else:
531 else:
532 return self.index[rev][1]
532 return self.index[rev][1]
533 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
533 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
534
534
535 def reachable(self, rev, stop=None):
535 def reachable(self, rev, stop=None):
536 reachable = {}
536 reachable = {}
537 visit = [rev]
537 visit = [rev]
538 reachable[rev] = 1
538 reachable[rev] = 1
539 if stop:
539 if stop:
540 stopn = self.rev(stop)
540 stopn = self.rev(stop)
541 else:
541 else:
542 stopn = 0
542 stopn = 0
543 while visit:
543 while visit:
544 n = visit.pop(0)
544 n = visit.pop(0)
545 if n == stop:
545 if n == stop:
546 continue
546 continue
547 if n == nullid:
547 if n == nullid:
548 continue
548 continue
549 for p in self.parents(n):
549 for p in self.parents(n):
550 if self.rev(p) < stopn:
550 if self.rev(p) < stopn:
551 continue
551 continue
552 if p not in reachable:
552 if p not in reachable:
553 reachable[p] = 1
553 reachable[p] = 1
554 visit.append(p)
554 visit.append(p)
555 return reachable
555 return reachable
556
556
557 def nodesbetween(self, roots=None, heads=None):
557 def nodesbetween(self, roots=None, heads=None):
558 """Return a tuple containing three elements. Elements 1 and 2 contain
558 """Return a tuple containing three elements. Elements 1 and 2 contain
559 a final list bases and heads after all the unreachable ones have been
559 a final list bases and heads after all the unreachable ones have been
560 pruned. Element 0 contains a topologically sorted list of all
560 pruned. Element 0 contains a topologically sorted list of all
561
561
562 nodes that satisfy these constraints:
562 nodes that satisfy these constraints:
563 1. All nodes must be descended from a node in roots (the nodes on
563 1. All nodes must be descended from a node in roots (the nodes on
564 roots are considered descended from themselves).
564 roots are considered descended from themselves).
565 2. All nodes must also be ancestors of a node in heads (the nodes in
565 2. All nodes must also be ancestors of a node in heads (the nodes in
566 heads are considered to be their own ancestors).
566 heads are considered to be their own ancestors).
567
567
568 If roots is unspecified, nullid is assumed as the only root.
568 If roots is unspecified, nullid is assumed as the only root.
569 If heads is unspecified, it is taken to be the output of the
569 If heads is unspecified, it is taken to be the output of the
570 heads method (i.e. a list of all nodes in the repository that
570 heads method (i.e. a list of all nodes in the repository that
571 have no children)."""
571 have no children)."""
572 nonodes = ([], [], [])
572 nonodes = ([], [], [])
573 if roots is not None:
573 if roots is not None:
574 roots = list(roots)
574 roots = list(roots)
575 if not roots:
575 if not roots:
576 return nonodes
576 return nonodes
577 lowestrev = min([self.rev(n) for n in roots])
577 lowestrev = min([self.rev(n) for n in roots])
578 else:
578 else:
579 roots = [nullid] # Everybody's a descendent of nullid
579 roots = [nullid] # Everybody's a descendent of nullid
580 lowestrev = -1
580 lowestrev = -1
581 if (lowestrev == -1) and (heads is None):
581 if (lowestrev == -1) and (heads is None):
582 # We want _all_ the nodes!
582 # We want _all_ the nodes!
583 return ([self.node(r) for r in xrange(0, self.count())],
583 return ([self.node(r) for r in xrange(0, self.count())],
584 [nullid], list(self.heads()))
584 [nullid], list(self.heads()))
585 if heads is None:
585 if heads is None:
586 # All nodes are ancestors, so the latest ancestor is the last
586 # All nodes are ancestors, so the latest ancestor is the last
587 # node.
587 # node.
588 highestrev = self.count() - 1
588 highestrev = self.count() - 1
589 # Set ancestors to None to signal that every node is an ancestor.
589 # Set ancestors to None to signal that every node is an ancestor.
590 ancestors = None
590 ancestors = None
591 # Set heads to an empty dictionary for later discovery of heads
591 # Set heads to an empty dictionary for later discovery of heads
592 heads = {}
592 heads = {}
593 else:
593 else:
594 heads = list(heads)
594 heads = list(heads)
595 if not heads:
595 if not heads:
596 return nonodes
596 return nonodes
597 ancestors = {}
597 ancestors = {}
598 # Start at the top and keep marking parents until we're done.
598 # Start at the top and keep marking parents until we're done.
599 nodestotag = heads[:]
599 nodestotag = heads[:]
600 # Turn heads into a dictionary so we can remove 'fake' heads.
600 # Turn heads into a dictionary so we can remove 'fake' heads.
601 # Also, later we will be using it to filter out the heads we can't
601 # Also, later we will be using it to filter out the heads we can't
602 # find from roots.
602 # find from roots.
603 heads = dict.fromkeys(heads, 0)
603 heads = dict.fromkeys(heads, 0)
604 # Remember where the top was so we can use it as a limit later.
604 # Remember where the top was so we can use it as a limit later.
605 highestrev = max([self.rev(n) for n in nodestotag])
605 highestrev = max([self.rev(n) for n in nodestotag])
606 while nodestotag:
606 while nodestotag:
607 # grab a node to tag
607 # grab a node to tag
608 n = nodestotag.pop()
608 n = nodestotag.pop()
609 # Never tag nullid
609 # Never tag nullid
610 if n == nullid:
610 if n == nullid:
611 continue
611 continue
612 # A node's revision number represents its place in a
612 # A node's revision number represents its place in a
613 # topologically sorted list of nodes.
613 # topologically sorted list of nodes.
614 r = self.rev(n)
614 r = self.rev(n)
615 if r >= lowestrev:
615 if r >= lowestrev:
616 if n not in ancestors:
616 if n not in ancestors:
617 # If we are possibly a descendent of one of the roots
617 # If we are possibly a descendent of one of the roots
618 # and we haven't already been marked as an ancestor
618 # and we haven't already been marked as an ancestor
619 ancestors[n] = 1 # Mark as ancestor
619 ancestors[n] = 1 # Mark as ancestor
620 # Add non-nullid parents to list of nodes to tag.
620 # Add non-nullid parents to list of nodes to tag.
621 nodestotag.extend([p for p in self.parents(n) if
621 nodestotag.extend([p for p in self.parents(n) if
622 p != nullid])
622 p != nullid])
623 elif n in heads: # We've seen it before, is it a fake head?
623 elif n in heads: # We've seen it before, is it a fake head?
624 # So it is, real heads should not be the ancestors of
624 # So it is, real heads should not be the ancestors of
625 # any other heads.
625 # any other heads.
626 heads.pop(n)
626 heads.pop(n)
627 if not ancestors:
627 if not ancestors:
628 return nonodes
628 return nonodes
629 # Now that we have our set of ancestors, we want to remove any
629 # Now that we have our set of ancestors, we want to remove any
630 # roots that are not ancestors.
630 # roots that are not ancestors.
631
631
632 # If one of the roots was nullid, everything is included anyway.
632 # If one of the roots was nullid, everything is included anyway.
633 if lowestrev > -1:
633 if lowestrev > -1:
634 # But, since we weren't, let's recompute the lowest rev to not
634 # But, since we weren't, let's recompute the lowest rev to not
635 # include roots that aren't ancestors.
635 # include roots that aren't ancestors.
636
636
637 # Filter out roots that aren't ancestors of heads
637 # Filter out roots that aren't ancestors of heads
638 roots = [n for n in roots if n in ancestors]
638 roots = [n for n in roots if n in ancestors]
639 # Recompute the lowest revision
639 # Recompute the lowest revision
640 if roots:
640 if roots:
641 lowestrev = min([self.rev(n) for n in roots])
641 lowestrev = min([self.rev(n) for n in roots])
642 else:
642 else:
643 # No more roots? Return empty list
643 # No more roots? Return empty list
644 return nonodes
644 return nonodes
645 else:
645 else:
646 # We are descending from nullid, and don't need to care about
646 # We are descending from nullid, and don't need to care about
647 # any other roots.
647 # any other roots.
648 lowestrev = -1
648 lowestrev = -1
649 roots = [nullid]
649 roots = [nullid]
650 # Transform our roots list into a 'set' (i.e. a dictionary where the
650 # Transform our roots list into a 'set' (i.e. a dictionary where the
651 # values don't matter.
651 # values don't matter.
652 descendents = dict.fromkeys(roots, 1)
652 descendents = dict.fromkeys(roots, 1)
653 # Also, keep the original roots so we can filter out roots that aren't
653 # Also, keep the original roots so we can filter out roots that aren't
654 # 'real' roots (i.e. are descended from other roots).
654 # 'real' roots (i.e. are descended from other roots).
655 roots = descendents.copy()
655 roots = descendents.copy()
656 # Our topologically sorted list of output nodes.
656 # Our topologically sorted list of output nodes.
657 orderedout = []
657 orderedout = []
658 # Don't start at nullid since we don't want nullid in our output list,
658 # Don't start at nullid since we don't want nullid in our output list,
659 # and if nullid shows up in descedents, empty parents will look like
659 # and if nullid shows up in descedents, empty parents will look like
660 # they're descendents.
660 # they're descendents.
661 for r in xrange(max(lowestrev, 0), highestrev + 1):
661 for r in xrange(max(lowestrev, 0), highestrev + 1):
662 n = self.node(r)
662 n = self.node(r)
663 isdescendent = False
663 isdescendent = False
664 if lowestrev == -1: # Everybody is a descendent of nullid
664 if lowestrev == -1: # Everybody is a descendent of nullid
665 isdescendent = True
665 isdescendent = True
666 elif n in descendents:
666 elif n in descendents:
667 # n is already a descendent
667 # n is already a descendent
668 isdescendent = True
668 isdescendent = True
669 # This check only needs to be done here because all the roots
669 # This check only needs to be done here because all the roots
670 # will start being marked is descendents before the loop.
670 # will start being marked is descendents before the loop.
671 if n in roots:
671 if n in roots:
672 # If n was a root, check if it's a 'real' root.
672 # If n was a root, check if it's a 'real' root.
673 p = tuple(self.parents(n))
673 p = tuple(self.parents(n))
674 # If any of its parents are descendents, it's not a root.
674 # If any of its parents are descendents, it's not a root.
675 if (p[0] in descendents) or (p[1] in descendents):
675 if (p[0] in descendents) or (p[1] in descendents):
676 roots.pop(n)
676 roots.pop(n)
677 else:
677 else:
678 p = tuple(self.parents(n))
678 p = tuple(self.parents(n))
679 # A node is a descendent if either of its parents are
679 # A node is a descendent if either of its parents are
680 # descendents. (We seeded the dependents list with the roots
680 # descendents. (We seeded the dependents list with the roots
681 # up there, remember?)
681 # up there, remember?)
682 if (p[0] in descendents) or (p[1] in descendents):
682 if (p[0] in descendents) or (p[1] in descendents):
683 descendents[n] = 1
683 descendents[n] = 1
684 isdescendent = True
684 isdescendent = True
685 if isdescendent and ((ancestors is None) or (n in ancestors)):
685 if isdescendent and ((ancestors is None) or (n in ancestors)):
686 # Only include nodes that are both descendents and ancestors.
686 # Only include nodes that are both descendents and ancestors.
687 orderedout.append(n)
687 orderedout.append(n)
688 if (ancestors is not None) and (n in heads):
688 if (ancestors is not None) and (n in heads):
689 # We're trying to figure out which heads are reachable
689 # We're trying to figure out which heads are reachable
690 # from roots.
690 # from roots.
691 # Mark this head as having been reached
691 # Mark this head as having been reached
692 heads[n] = 1
692 heads[n] = 1
693 elif ancestors is None:
693 elif ancestors is None:
694 # Otherwise, we're trying to discover the heads.
694 # Otherwise, we're trying to discover the heads.
695 # Assume this is a head because if it isn't, the next step
695 # Assume this is a head because if it isn't, the next step
696 # will eventually remove it.
696 # will eventually remove it.
697 heads[n] = 1
697 heads[n] = 1
698 # But, obviously its parents aren't.
698 # But, obviously its parents aren't.
699 for p in self.parents(n):
699 for p in self.parents(n):
700 heads.pop(p, None)
700 heads.pop(p, None)
701 heads = [n for n in heads.iterkeys() if heads[n] != 0]
701 heads = [n for n in heads.iterkeys() if heads[n] != 0]
702 roots = roots.keys()
702 roots = roots.keys()
703 assert orderedout
703 assert orderedout
704 assert roots
704 assert roots
705 assert heads
705 assert heads
706 return (orderedout, roots, heads)
706 return (orderedout, roots, heads)
707
707
708 def heads(self, start=None):
708 def heads(self, start=None):
709 """return the list of all nodes that have no children
709 """return the list of all nodes that have no children
710
710
711 if start is specified, only heads that are descendants of
711 if start is specified, only heads that are descendants of
712 start will be returned
712 start will be returned
713
713
714 """
714 """
715 if start is None:
715 if start is None:
716 start = nullid
716 start = nullid
717 startrev = self.rev(start)
717 startrev = self.rev(start)
718 reachable = {startrev: 1}
718 reachable = {startrev: 1}
719 heads = {startrev: 1}
719 heads = {startrev: 1}
720
720
721 parentrevs = self.parentrevs
721 parentrevs = self.parentrevs
722 for r in xrange(startrev + 1, self.count()):
722 for r in xrange(startrev + 1, self.count()):
723 for p in parentrevs(r):
723 for p in parentrevs(r):
724 if p in reachable:
724 if p in reachable:
725 reachable[r] = 1
725 reachable[r] = 1
726 heads[r] = 1
726 heads[r] = 1
727 if p in heads:
727 if p in heads:
728 del heads[p]
728 del heads[p]
729 return [self.node(r) for r in heads]
729 return [self.node(r) for r in heads]
730
730
731 def children(self, node):
731 def children(self, node):
732 """find the children of a given node"""
732 """find the children of a given node"""
733 c = []
733 c = []
734 p = self.rev(node)
734 p = self.rev(node)
735 for r in range(p + 1, self.count()):
735 for r in range(p + 1, self.count()):
736 n = self.node(r)
736 n = self.node(r)
737 for pn in self.parents(n):
737 for pn in self.parents(n):
738 if pn == node:
738 if pn == node:
739 c.append(n)
739 c.append(n)
740 continue
740 continue
741 elif pn == nullid:
741 elif pn == nullid:
742 continue
742 continue
743 return c
743 return c
744
744
745 def lookup(self, id):
745 def lookup(self, id):
746 """locate a node based on revision number or subset of hex nodeid"""
746 """locate a node based on revision number or subset of hex nodeid"""
747 if type(id) == type(0):
747 if type(id) == type(0):
748 return self.node(id)
748 return self.node(id)
749 try:
749 try:
750 rev = int(id)
750 rev = int(id)
751 if str(rev) != id: raise ValueError
751 if str(rev) != id: raise ValueError
752 if rev < 0: rev = self.count() + rev
752 if rev < 0: rev = self.count() + rev
753 if rev < 0 or rev >= self.count(): raise ValueError
753 if rev < 0 or rev >= self.count(): raise ValueError
754 return self.node(rev)
754 return self.node(rev)
755 except (ValueError, OverflowError):
755 except (ValueError, OverflowError):
756 c = []
756 c = []
757 for n in self.nodemap:
757 for n in self.nodemap:
758 if hex(n).startswith(id):
758 if hex(n).startswith(id):
759 c.append(n)
759 c.append(n)
760 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
760 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
761 if len(c) == 1: return c[0]
761 if len(c) == 1: return c[0]
762
762
763 # might need fixing if we change hash lengths
763 # might need fixing if we change hash lengths
764 if len(id) == 20 and id in self.nodemap:
764 if len(id) == 20 and id in self.nodemap:
765 return id
765 return id
766
766
767 raise RevlogError(_("No match found"))
767 raise RevlogError(_("No match found"))
768
768
769 def cmp(self, node, text):
770 """compare text with a given file revision"""
771 p1, p2 = self.parents(node)
772 return hash(text, p1, p2) != node
773
774 def makenode(self, node, text):
775 """calculate a file nodeid for text, descended or possibly
776 unchanged from node"""
777
778 if self.cmp(node, text):
779 return hash(text, node, nullid)
780 return node
781
769 def diff(self, a, b):
782 def diff(self, a, b):
770 """return a delta between two revisions"""
783 """return a delta between two revisions"""
771 return mdiff.textdiff(a, b)
784 return mdiff.textdiff(a, b)
772
785
773 def patches(self, t, pl):
786 def patches(self, t, pl):
774 """apply a list of patches to a string"""
787 """apply a list of patches to a string"""
775 return mdiff.patches(t, pl)
788 return mdiff.patches(t, pl)
776
789
777 def chunk(self, rev, df=None, cachelen=4096):
790 def chunk(self, rev, df=None, cachelen=4096):
778 start, length = self.start(rev), self.length(rev)
791 start, length = self.start(rev), self.length(rev)
779 inline = self.inlinedata()
792 inline = self.inlinedata()
780 if inline:
793 if inline:
781 start += (rev + 1) * struct.calcsize(self.indexformat)
794 start += (rev + 1) * struct.calcsize(self.indexformat)
782 end = start + length
795 end = start + length
783 def loadcache(df):
796 def loadcache(df):
784 cache_length = max(cachelen, length) # 4k
797 cache_length = max(cachelen, length) # 4k
785 if not df:
798 if not df:
786 if inline:
799 if inline:
787 df = self.opener(self.indexfile)
800 df = self.opener(self.indexfile)
788 else:
801 else:
789 df = self.opener(self.datafile)
802 df = self.opener(self.datafile)
790 df.seek(start)
803 df.seek(start)
791 self.chunkcache = (start, df.read(cache_length))
804 self.chunkcache = (start, df.read(cache_length))
792
805
793 if not self.chunkcache:
806 if not self.chunkcache:
794 loadcache(df)
807 loadcache(df)
795
808
796 cache_start = self.chunkcache[0]
809 cache_start = self.chunkcache[0]
797 cache_end = cache_start + len(self.chunkcache[1])
810 cache_end = cache_start + len(self.chunkcache[1])
798 if start >= cache_start and end <= cache_end:
811 if start >= cache_start and end <= cache_end:
799 # it is cached
812 # it is cached
800 offset = start - cache_start
813 offset = start - cache_start
801 else:
814 else:
802 loadcache(df)
815 loadcache(df)
803 offset = 0
816 offset = 0
804
817
805 #def checkchunk():
818 #def checkchunk():
806 # df = self.opener(self.datafile)
819 # df = self.opener(self.datafile)
807 # df.seek(start)
820 # df.seek(start)
808 # return df.read(length)
821 # return df.read(length)
809 #assert s == checkchunk()
822 #assert s == checkchunk()
810 return decompress(self.chunkcache[1][offset:offset + length])
823 return decompress(self.chunkcache[1][offset:offset + length])
811
824
812 def delta(self, node):
825 def delta(self, node):
813 """return or calculate a delta between a node and its predecessor"""
826 """return or calculate a delta between a node and its predecessor"""
814 r = self.rev(node)
827 r = self.rev(node)
815 return self.revdiff(r - 1, r)
828 return self.revdiff(r - 1, r)
816
829
817 def revdiff(self, rev1, rev2):
830 def revdiff(self, rev1, rev2):
818 """return or calculate a delta between two revisions"""
831 """return or calculate a delta between two revisions"""
819 b1 = self.base(rev1)
832 b1 = self.base(rev1)
820 b2 = self.base(rev2)
833 b2 = self.base(rev2)
821 if b1 == b2 and rev1 + 1 == rev2:
834 if b1 == b2 and rev1 + 1 == rev2:
822 return self.chunk(rev2)
835 return self.chunk(rev2)
823 else:
836 else:
824 return self.diff(self.revision(self.node(rev1)),
837 return self.diff(self.revision(self.node(rev1)),
825 self.revision(self.node(rev2)))
838 self.revision(self.node(rev2)))
826
839
827 def revision(self, node):
840 def revision(self, node):
828 """return an uncompressed revision of a given"""
841 """return an uncompressed revision of a given"""
829 if node == nullid: return ""
842 if node == nullid: return ""
830 if self.cache and self.cache[0] == node: return self.cache[2]
843 if self.cache and self.cache[0] == node: return self.cache[2]
831
844
832 # look up what we need to read
845 # look up what we need to read
833 text = None
846 text = None
834 rev = self.rev(node)
847 rev = self.rev(node)
835 base = self.base(rev)
848 base = self.base(rev)
836
849
837 if self.inlinedata():
850 if self.inlinedata():
838 # we probably have the whole chunk cached
851 # we probably have the whole chunk cached
839 df = None
852 df = None
840 else:
853 else:
841 df = self.opener(self.datafile)
854 df = self.opener(self.datafile)
842
855
843 # do we have useful data cached?
856 # do we have useful data cached?
844 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
857 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
845 base = self.cache[1]
858 base = self.cache[1]
846 text = self.cache[2]
859 text = self.cache[2]
847 self.loadindex(base, rev + 1)
860 self.loadindex(base, rev + 1)
848 else:
861 else:
849 self.loadindex(base, rev + 1)
862 self.loadindex(base, rev + 1)
850 text = self.chunk(base, df=df)
863 text = self.chunk(base, df=df)
851
864
852 bins = []
865 bins = []
853 for r in xrange(base + 1, rev + 1):
866 for r in xrange(base + 1, rev + 1):
854 bins.append(self.chunk(r, df=df))
867 bins.append(self.chunk(r, df=df))
855
868
856 text = self.patches(text, bins)
869 text = self.patches(text, bins)
857
870
858 p1, p2 = self.parents(node)
871 p1, p2 = self.parents(node)
859 if node != hash(text, p1, p2):
872 if node != hash(text, p1, p2):
860 raise RevlogError(_("integrity check failed on %s:%d")
873 raise RevlogError(_("integrity check failed on %s:%d")
861 % (self.datafile, rev))
874 % (self.datafile, rev))
862
875
863 self.cache = (node, rev, text)
876 self.cache = (node, rev, text)
864 return text
877 return text
865
878
866 def checkinlinesize(self, tr, fp=None):
879 def checkinlinesize(self, tr, fp=None):
867 if not self.inlinedata():
880 if not self.inlinedata():
868 return
881 return
869 if not fp:
882 if not fp:
870 fp = self.opener(self.indexfile, 'r')
883 fp = self.opener(self.indexfile, 'r')
871 fp.seek(0, 2)
884 fp.seek(0, 2)
872 size = fp.tell()
885 size = fp.tell()
873 if size < 131072:
886 if size < 131072:
874 return
887 return
875 trinfo = tr.find(self.indexfile)
888 trinfo = tr.find(self.indexfile)
876 if trinfo == None:
889 if trinfo == None:
877 raise RevlogError(_("%s not found in the transaction" %
890 raise RevlogError(_("%s not found in the transaction" %
878 self.indexfile))
891 self.indexfile))
879
892
880 trindex = trinfo[2]
893 trindex = trinfo[2]
881 dataoff = self.start(trindex)
894 dataoff = self.start(trindex)
882
895
883 tr.add(self.datafile, dataoff)
896 tr.add(self.datafile, dataoff)
884 df = self.opener(self.datafile, 'w')
897 df = self.opener(self.datafile, 'w')
885 calc = struct.calcsize(self.indexformat)
898 calc = struct.calcsize(self.indexformat)
886 for r in xrange(self.count()):
899 for r in xrange(self.count()):
887 start = self.start(r) + (r + 1) * calc
900 start = self.start(r) + (r + 1) * calc
888 length = self.length(r)
901 length = self.length(r)
889 fp.seek(start)
902 fp.seek(start)
890 d = fp.read(length)
903 d = fp.read(length)
891 df.write(d)
904 df.write(d)
892 fp.close()
905 fp.close()
893 df.close()
906 df.close()
894 fp = self.opener(self.indexfile, 'w', atomictemp=True)
907 fp = self.opener(self.indexfile, 'w', atomictemp=True)
895 self.version &= ~(REVLOGNGINLINEDATA)
908 self.version &= ~(REVLOGNGINLINEDATA)
896 if self.count():
909 if self.count():
897 x = self.index[0]
910 x = self.index[0]
898 e = struct.pack(self.indexformat, *x)[4:]
911 e = struct.pack(self.indexformat, *x)[4:]
899 l = struct.pack(versionformat, self.version)
912 l = struct.pack(versionformat, self.version)
900 fp.write(l)
913 fp.write(l)
901 fp.write(e)
914 fp.write(e)
902
915
903 for i in xrange(1, self.count()):
916 for i in xrange(1, self.count()):
904 x = self.index[i]
917 x = self.index[i]
905 e = struct.pack(self.indexformat, *x)
918 e = struct.pack(self.indexformat, *x)
906 fp.write(e)
919 fp.write(e)
907
920
908 # if we don't call rename, the temp file will never replace the
921 # if we don't call rename, the temp file will never replace the
909 # real index
922 # real index
910 fp.rename()
923 fp.rename()
911
924
912 tr.replace(self.indexfile, trindex * calc)
925 tr.replace(self.indexfile, trindex * calc)
913 self.chunkcache = None
926 self.chunkcache = None
914
927
915 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
928 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
916 """add a revision to the log
929 """add a revision to the log
917
930
918 text - the revision data to add
931 text - the revision data to add
919 transaction - the transaction object used for rollback
932 transaction - the transaction object used for rollback
920 link - the linkrev data to add
933 link - the linkrev data to add
921 p1, p2 - the parent nodeids of the revision
934 p1, p2 - the parent nodeids of the revision
922 d - an optional precomputed delta
935 d - an optional precomputed delta
923 """
936 """
924 if text is None: text = ""
937 if text is None: text = ""
925 if p1 is None: p1 = self.tip()
938 if p1 is None: p1 = self.tip()
926 if p2 is None: p2 = nullid
939 if p2 is None: p2 = nullid
927
940
928 node = hash(text, p1, p2)
941 node = hash(text, p1, p2)
929
942
930 if node in self.nodemap:
943 if node in self.nodemap:
931 return node
944 return node
932
945
933 n = self.count()
946 n = self.count()
934 t = n - 1
947 t = n - 1
935
948
936 if n:
949 if n:
937 base = self.base(t)
950 base = self.base(t)
938 start = self.start(base)
951 start = self.start(base)
939 end = self.end(t)
952 end = self.end(t)
940 if not d:
953 if not d:
941 prev = self.revision(self.tip())
954 prev = self.revision(self.tip())
942 d = self.diff(prev, str(text))
955 d = self.diff(prev, str(text))
943 data = compress(d)
956 data = compress(d)
944 l = len(data[1]) + len(data[0])
957 l = len(data[1]) + len(data[0])
945 dist = end - start + l
958 dist = end - start + l
946
959
947 # full versions are inserted when the needed deltas
960 # full versions are inserted when the needed deltas
948 # become comparable to the uncompressed text
961 # become comparable to the uncompressed text
949 if not n or dist > len(text) * 2:
962 if not n or dist > len(text) * 2:
950 data = compress(text)
963 data = compress(text)
951 l = len(data[1]) + len(data[0])
964 l = len(data[1]) + len(data[0])
952 base = n
965 base = n
953 else:
966 else:
954 base = self.base(t)
967 base = self.base(t)
955
968
956 offset = 0
969 offset = 0
957 if t >= 0:
970 if t >= 0:
958 offset = self.end(t)
971 offset = self.end(t)
959
972
960 if self.version == REVLOGV0:
973 if self.version == REVLOGV0:
961 e = (offset, l, base, link, p1, p2, node)
974 e = (offset, l, base, link, p1, p2, node)
962 else:
975 else:
963 e = (self.offset_type(offset, 0), l, len(text),
976 e = (self.offset_type(offset, 0), l, len(text),
964 base, link, self.rev(p1), self.rev(p2), node)
977 base, link, self.rev(p1), self.rev(p2), node)
965
978
966 self.index.append(e)
979 self.index.append(e)
967 self.nodemap[node] = n
980 self.nodemap[node] = n
968 entry = struct.pack(self.indexformat, *e)
981 entry = struct.pack(self.indexformat, *e)
969
982
970 if not self.inlinedata():
983 if not self.inlinedata():
971 transaction.add(self.datafile, offset)
984 transaction.add(self.datafile, offset)
972 transaction.add(self.indexfile, n * len(entry))
985 transaction.add(self.indexfile, n * len(entry))
973 f = self.opener(self.datafile, "a")
986 f = self.opener(self.datafile, "a")
974 if data[0]:
987 if data[0]:
975 f.write(data[0])
988 f.write(data[0])
976 f.write(data[1])
989 f.write(data[1])
977 f.close()
990 f.close()
978 f = self.opener(self.indexfile, "a")
991 f = self.opener(self.indexfile, "a")
979 else:
992 else:
980 f = self.opener(self.indexfile, "a+")
993 f = self.opener(self.indexfile, "a+")
981 f.seek(0, 2)
994 f.seek(0, 2)
982 transaction.add(self.indexfile, f.tell(), self.count() - 1)
995 transaction.add(self.indexfile, f.tell(), self.count() - 1)
983
996
984 if len(self.index) == 1 and self.version != REVLOGV0:
997 if len(self.index) == 1 and self.version != REVLOGV0:
985 l = struct.pack(versionformat, self.version)
998 l = struct.pack(versionformat, self.version)
986 f.write(l)
999 f.write(l)
987 entry = entry[4:]
1000 entry = entry[4:]
988
1001
989 f.write(entry)
1002 f.write(entry)
990
1003
991 if self.inlinedata():
1004 if self.inlinedata():
992 f.write(data[0])
1005 f.write(data[0])
993 f.write(data[1])
1006 f.write(data[1])
994 self.checkinlinesize(transaction, f)
1007 self.checkinlinesize(transaction, f)
995
1008
996 self.cache = (node, n, text)
1009 self.cache = (node, n, text)
997 return node
1010 return node
998
1011
999 def ancestor(self, a, b):
1012 def ancestor(self, a, b):
1000 """calculate the least common ancestor of nodes a and b"""
1013 """calculate the least common ancestor of nodes a and b"""
1001
1014
1002 # start with some short cuts for the linear cases
1015 # start with some short cuts for the linear cases
1003 if a == b:
1016 if a == b:
1004 return a
1017 return a
1005 ra = self.rev(a)
1018 ra = self.rev(a)
1006 rb = self.rev(b)
1019 rb = self.rev(b)
1007 if ra < rb:
1020 if ra < rb:
1008 last = b
1021 last = b
1009 first = a
1022 first = a
1010 else:
1023 else:
1011 last = a
1024 last = a
1012 first = b
1025 first = b
1013
1026
1014 # reachable won't include stop in the list, so we have to use a parent
1027 # reachable won't include stop in the list, so we have to use a parent
1015 reachable = self.reachable(last, stop=self.parents(first)[0])
1028 reachable = self.reachable(last, stop=self.parents(first)[0])
1016 if first in reachable:
1029 if first in reachable:
1017 return first
1030 return first
1018
1031
1019 # calculate the distance of every node from root
1032 # calculate the distance of every node from root
1020 dist = {nullid: 0}
1033 dist = {nullid: 0}
1021 for i in xrange(self.count()):
1034 for i in xrange(self.count()):
1022 n = self.node(i)
1035 n = self.node(i)
1023 p1, p2 = self.parents(n)
1036 p1, p2 = self.parents(n)
1024 dist[n] = max(dist[p1], dist[p2]) + 1
1037 dist[n] = max(dist[p1], dist[p2]) + 1
1025
1038
1026 # traverse ancestors in order of decreasing distance from root
1039 # traverse ancestors in order of decreasing distance from root
1027 def ancestors(node):
1040 def ancestors(node):
1028 # we store negative distances because heap returns smallest member
1041 # we store negative distances because heap returns smallest member
1029 h = [(-dist[node], node)]
1042 h = [(-dist[node], node)]
1030 seen = {}
1043 seen = {}
1031 while h:
1044 while h:
1032 d, n = heapq.heappop(h)
1045 d, n = heapq.heappop(h)
1033 if n not in seen:
1046 if n not in seen:
1034 seen[n] = 1
1047 seen[n] = 1
1035 yield (-d, n)
1048 yield (-d, n)
1036 for p in self.parents(n):
1049 for p in self.parents(n):
1037 heapq.heappush(h, (-dist[p], p))
1050 heapq.heappush(h, (-dist[p], p))
1038
1051
1039 def generations(node):
1052 def generations(node):
1040 sg, s = None, {}
1053 sg, s = None, {}
1041 for g,n in ancestors(node):
1054 for g,n in ancestors(node):
1042 if g != sg:
1055 if g != sg:
1043 if sg:
1056 if sg:
1044 yield sg, s
1057 yield sg, s
1045 sg, s = g, {n:1}
1058 sg, s = g, {n:1}
1046 else:
1059 else:
1047 s[n] = 1
1060 s[n] = 1
1048 yield sg, s
1061 yield sg, s
1049
1062
1050 x = generations(a)
1063 x = generations(a)
1051 y = generations(b)
1064 y = generations(b)
1052 gx = x.next()
1065 gx = x.next()
1053 gy = y.next()
1066 gy = y.next()
1054
1067
1055 # increment each ancestor list until it is closer to root than
1068 # increment each ancestor list until it is closer to root than
1056 # the other, or they match
1069 # the other, or they match
1057 while 1:
1070 while 1:
1058 #print "ancestor gen %s %s" % (gx[0], gy[0])
1071 #print "ancestor gen %s %s" % (gx[0], gy[0])
1059 if gx[0] == gy[0]:
1072 if gx[0] == gy[0]:
1060 # find the intersection
1073 # find the intersection
1061 i = [ n for n in gx[1] if n in gy[1] ]
1074 i = [ n for n in gx[1] if n in gy[1] ]
1062 if i:
1075 if i:
1063 return i[0]
1076 return i[0]
1064 else:
1077 else:
1065 #print "next"
1078 #print "next"
1066 gy = y.next()
1079 gy = y.next()
1067 gx = x.next()
1080 gx = x.next()
1068 elif gx[0] < gy[0]:
1081 elif gx[0] < gy[0]:
1069 #print "next y"
1082 #print "next y"
1070 gy = y.next()
1083 gy = y.next()
1071 else:
1084 else:
1072 #print "next x"
1085 #print "next x"
1073 gx = x.next()
1086 gx = x.next()
1074
1087
1075 def group(self, nodelist, lookup, infocollect=None):
1088 def group(self, nodelist, lookup, infocollect=None):
1076 """calculate a delta group
1089 """calculate a delta group
1077
1090
1078 Given a list of changeset revs, return a set of deltas and
1091 Given a list of changeset revs, return a set of deltas and
1079 metadata corresponding to nodes. the first delta is
1092 metadata corresponding to nodes. the first delta is
1080 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1093 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1081 have this parent as it has all history before these
1094 have this parent as it has all history before these
1082 changesets. parent is parent[0]
1095 changesets. parent is parent[0]
1083 """
1096 """
1084 revs = [self.rev(n) for n in nodelist]
1097 revs = [self.rev(n) for n in nodelist]
1085
1098
1086 # if we don't have any revisions touched by these changesets, bail
1099 # if we don't have any revisions touched by these changesets, bail
1087 if not revs:
1100 if not revs:
1088 yield changegroup.closechunk()
1101 yield changegroup.closechunk()
1089 return
1102 return
1090
1103
1091 # add the parent of the first rev
1104 # add the parent of the first rev
1092 p = self.parents(self.node(revs[0]))[0]
1105 p = self.parents(self.node(revs[0]))[0]
1093 revs.insert(0, self.rev(p))
1106 revs.insert(0, self.rev(p))
1094
1107
1095 # build deltas
1108 # build deltas
1096 for d in xrange(0, len(revs) - 1):
1109 for d in xrange(0, len(revs) - 1):
1097 a, b = revs[d], revs[d + 1]
1110 a, b = revs[d], revs[d + 1]
1098 nb = self.node(b)
1111 nb = self.node(b)
1099
1112
1100 if infocollect is not None:
1113 if infocollect is not None:
1101 infocollect(nb)
1114 infocollect(nb)
1102
1115
1103 d = self.revdiff(a, b)
1116 d = self.revdiff(a, b)
1104 p = self.parents(nb)
1117 p = self.parents(nb)
1105 meta = nb + p[0] + p[1] + lookup(nb)
1118 meta = nb + p[0] + p[1] + lookup(nb)
1106 yield changegroup.genchunk("%s%s" % (meta, d))
1119 yield changegroup.genchunk("%s%s" % (meta, d))
1107
1120
1108 yield changegroup.closechunk()
1121 yield changegroup.closechunk()
1109
1122
1110 def addgroup(self, revs, linkmapper, transaction, unique=0):
1123 def addgroup(self, revs, linkmapper, transaction, unique=0):
1111 """
1124 """
1112 add a delta group
1125 add a delta group
1113
1126
1114 given a set of deltas, add them to the revision log. the
1127 given a set of deltas, add them to the revision log. the
1115 first delta is against its parent, which should be in our
1128 first delta is against its parent, which should be in our
1116 log, the rest are against the previous delta.
1129 log, the rest are against the previous delta.
1117 """
1130 """
1118
1131
1119 #track the base of the current delta log
1132 #track the base of the current delta log
1120 r = self.count()
1133 r = self.count()
1121 t = r - 1
1134 t = r - 1
1122 node = None
1135 node = None
1123
1136
1124 base = prev = -1
1137 base = prev = -1
1125 start = end = textlen = 0
1138 start = end = textlen = 0
1126 if r:
1139 if r:
1127 end = self.end(t)
1140 end = self.end(t)
1128
1141
1129 ifh = self.opener(self.indexfile, "a+")
1142 ifh = self.opener(self.indexfile, "a+")
1130 ifh.seek(0, 2)
1143 ifh.seek(0, 2)
1131 transaction.add(self.indexfile, ifh.tell(), self.count())
1144 transaction.add(self.indexfile, ifh.tell(), self.count())
1132 if self.inlinedata():
1145 if self.inlinedata():
1133 dfh = None
1146 dfh = None
1134 else:
1147 else:
1135 transaction.add(self.datafile, end)
1148 transaction.add(self.datafile, end)
1136 dfh = self.opener(self.datafile, "a")
1149 dfh = self.opener(self.datafile, "a")
1137
1150
1138 # loop through our set of deltas
1151 # loop through our set of deltas
1139 chain = None
1152 chain = None
1140 for chunk in revs:
1153 for chunk in revs:
1141 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1154 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1142 link = linkmapper(cs)
1155 link = linkmapper(cs)
1143 if node in self.nodemap:
1156 if node in self.nodemap:
1144 # this can happen if two branches make the same change
1157 # this can happen if two branches make the same change
1145 # if unique:
1158 # if unique:
1146 # raise RevlogError(_("already have %s") % hex(node[:4]))
1159 # raise RevlogError(_("already have %s") % hex(node[:4]))
1147 chain = node
1160 chain = node
1148 continue
1161 continue
1149 delta = chunk[80:]
1162 delta = chunk[80:]
1150
1163
1151 for p in (p1, p2):
1164 for p in (p1, p2):
1152 if not p in self.nodemap:
1165 if not p in self.nodemap:
1153 raise RevlogError(_("unknown parent %s") % short(p))
1166 raise RevlogError(_("unknown parent %s") % short(p))
1154
1167
1155 if not chain:
1168 if not chain:
1156 # retrieve the parent revision of the delta chain
1169 # retrieve the parent revision of the delta chain
1157 chain = p1
1170 chain = p1
1158 if not chain in self.nodemap:
1171 if not chain in self.nodemap:
1159 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1172 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1160
1173
1161 # full versions are inserted when the needed deltas become
1174 # full versions are inserted when the needed deltas become
1162 # comparable to the uncompressed text or when the previous
1175 # comparable to the uncompressed text or when the previous
1163 # version is not the one we have a delta against. We use
1176 # version is not the one we have a delta against. We use
1164 # the size of the previous full rev as a proxy for the
1177 # the size of the previous full rev as a proxy for the
1165 # current size.
1178 # current size.
1166
1179
1167 if chain == prev:
1180 if chain == prev:
1168 tempd = compress(delta)
1181 tempd = compress(delta)
1169 cdelta = tempd[0] + tempd[1]
1182 cdelta = tempd[0] + tempd[1]
1170 textlen = mdiff.patchedsize(textlen, delta)
1183 textlen = mdiff.patchedsize(textlen, delta)
1171
1184
1172 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1185 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1173 # flush our writes here so we can read it in revision
1186 # flush our writes here so we can read it in revision
1174 if dfh:
1187 if dfh:
1175 dfh.flush()
1188 dfh.flush()
1176 ifh.flush()
1189 ifh.flush()
1177 text = self.revision(chain)
1190 text = self.revision(chain)
1178 text = self.patches(text, [delta])
1191 text = self.patches(text, [delta])
1179 chk = self.addrevision(text, transaction, link, p1, p2)
1192 chk = self.addrevision(text, transaction, link, p1, p2)
1180 if chk != node:
1193 if chk != node:
1181 raise RevlogError(_("consistency error adding group"))
1194 raise RevlogError(_("consistency error adding group"))
1182 textlen = len(text)
1195 textlen = len(text)
1183 else:
1196 else:
1184 if self.version == REVLOGV0:
1197 if self.version == REVLOGV0:
1185 e = (end, len(cdelta), base, link, p1, p2, node)
1198 e = (end, len(cdelta), base, link, p1, p2, node)
1186 else:
1199 else:
1187 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1200 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1188 link, self.rev(p1), self.rev(p2), node)
1201 link, self.rev(p1), self.rev(p2), node)
1189 self.index.append(e)
1202 self.index.append(e)
1190 self.nodemap[node] = r
1203 self.nodemap[node] = r
1191 if self.inlinedata():
1204 if self.inlinedata():
1192 ifh.write(struct.pack(self.indexformat, *e))
1205 ifh.write(struct.pack(self.indexformat, *e))
1193 ifh.write(cdelta)
1206 ifh.write(cdelta)
1194 self.checkinlinesize(transaction, ifh)
1207 self.checkinlinesize(transaction, ifh)
1195 if not self.inlinedata():
1208 if not self.inlinedata():
1196 dfh = self.opener(self.datafile, "a")
1209 dfh = self.opener(self.datafile, "a")
1197 ifh = self.opener(self.indexfile, "a")
1210 ifh = self.opener(self.indexfile, "a")
1198 else:
1211 else:
1199 if not dfh:
1212 if not dfh:
1200 # addrevision switched from inline to conventional
1213 # addrevision switched from inline to conventional
1201 # reopen the index
1214 # reopen the index
1202 dfh = self.opener(self.datafile, "a")
1215 dfh = self.opener(self.datafile, "a")
1203 ifh = self.opener(self.indexfile, "a")
1216 ifh = self.opener(self.indexfile, "a")
1204 dfh.write(cdelta)
1217 dfh.write(cdelta)
1205 ifh.write(struct.pack(self.indexformat, *e))
1218 ifh.write(struct.pack(self.indexformat, *e))
1206
1219
1207 t, r, chain, prev = r, r + 1, node, node
1220 t, r, chain, prev = r, r + 1, node, node
1208 base = self.base(t)
1221 base = self.base(t)
1209 start = self.start(base)
1222 start = self.start(base)
1210 end = self.end(t)
1223 end = self.end(t)
1211
1224
1212 return node
1225 return node
1213
1226
1214 def strip(self, rev, minlink):
1227 def strip(self, rev, minlink):
1215 if self.count() == 0 or rev >= self.count():
1228 if self.count() == 0 or rev >= self.count():
1216 return
1229 return
1217
1230
1218 if isinstance(self.index, lazyindex):
1231 if isinstance(self.index, lazyindex):
1219 self.loadindexmap()
1232 self.loadindexmap()
1220
1233
1221 # When stripping away a revision, we need to make sure it
1234 # When stripping away a revision, we need to make sure it
1222 # does not actually belong to an older changeset.
1235 # does not actually belong to an older changeset.
1223 # The minlink parameter defines the oldest revision
1236 # The minlink parameter defines the oldest revision
1224 # we're allowed to strip away.
1237 # we're allowed to strip away.
1225 while minlink > self.index[rev][-4]:
1238 while minlink > self.index[rev][-4]:
1226 rev += 1
1239 rev += 1
1227 if rev >= self.count():
1240 if rev >= self.count():
1228 return
1241 return
1229
1242
1230 # first truncate the files on disk
1243 # first truncate the files on disk
1231 end = self.start(rev)
1244 end = self.start(rev)
1232 if not self.inlinedata():
1245 if not self.inlinedata():
1233 df = self.opener(self.datafile, "a")
1246 df = self.opener(self.datafile, "a")
1234 df.truncate(end)
1247 df.truncate(end)
1235 end = rev * struct.calcsize(self.indexformat)
1248 end = rev * struct.calcsize(self.indexformat)
1236 else:
1249 else:
1237 end += rev * struct.calcsize(self.indexformat)
1250 end += rev * struct.calcsize(self.indexformat)
1238
1251
1239 indexf = self.opener(self.indexfile, "a")
1252 indexf = self.opener(self.indexfile, "a")
1240 indexf.truncate(end)
1253 indexf.truncate(end)
1241
1254
1242 # then reset internal state in memory to forget those revisions
1255 # then reset internal state in memory to forget those revisions
1243 self.cache = None
1256 self.cache = None
1244 self.chunkcache = None
1257 self.chunkcache = None
1245 for x in xrange(rev, self.count()):
1258 for x in xrange(rev, self.count()):
1246 del self.nodemap[self.node(x)]
1259 del self.nodemap[self.node(x)]
1247
1260
1248 del self.index[rev:]
1261 del self.index[rev:]
1249
1262
1250 def checksize(self):
1263 def checksize(self):
1251 expected = 0
1264 expected = 0
1252 if self.count():
1265 if self.count():
1253 expected = self.end(self.count() - 1)
1266 expected = self.end(self.count() - 1)
1254
1267
1255 try:
1268 try:
1256 f = self.opener(self.datafile)
1269 f = self.opener(self.datafile)
1257 f.seek(0, 2)
1270 f.seek(0, 2)
1258 actual = f.tell()
1271 actual = f.tell()
1259 dd = actual - expected
1272 dd = actual - expected
1260 except IOError, inst:
1273 except IOError, inst:
1261 if inst.errno != errno.ENOENT:
1274 if inst.errno != errno.ENOENT:
1262 raise
1275 raise
1263 dd = 0
1276 dd = 0
1264
1277
1265 try:
1278 try:
1266 f = self.opener(self.indexfile)
1279 f = self.opener(self.indexfile)
1267 f.seek(0, 2)
1280 f.seek(0, 2)
1268 actual = f.tell()
1281 actual = f.tell()
1269 s = struct.calcsize(self.indexformat)
1282 s = struct.calcsize(self.indexformat)
1270 i = actual / s
1283 i = actual / s
1271 di = actual - (i * s)
1284 di = actual - (i * s)
1272 if self.inlinedata():
1285 if self.inlinedata():
1273 databytes = 0
1286 databytes = 0
1274 for r in xrange(self.count()):
1287 for r in xrange(self.count()):
1275 databytes += self.length(r)
1288 databytes += self.length(r)
1276 dd = 0
1289 dd = 0
1277 di = actual - self.count() * s - databytes
1290 di = actual - self.count() * s - databytes
1278 except IOError, inst:
1291 except IOError, inst:
1279 if inst.errno != errno.ENOENT:
1292 if inst.errno != errno.ENOENT:
1280 raise
1293 raise
1281 di = 0
1294 di = 0
1282
1295
1283 return (dd, di)
1296 return (dd, di)
1284
1297
1285
1298
General Comments 0
You need to be logged in to leave comments. Login now