##// END OF EJS Templates
changegroup: new bundler API
Benoit Boissinot -
r14143:da635d3c default
parent child Browse files
Show More
@@ -1,227 +1,245
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import util
9 from node import nullrev
10 import mdiff, util
10 import struct, os, bz2, zlib, tempfile
11 import struct, os, bz2, zlib, tempfile
11
12
12 _BUNDLE10_DELTA_HEADER = "20s20s20s20s"
13 _BUNDLE10_DELTA_HEADER = "20s20s20s20s"
13
14
14 def readexactly(stream, n):
15 def readexactly(stream, n):
15 '''read n bytes from stream.read and abort if less was available'''
16 '''read n bytes from stream.read and abort if less was available'''
16 s = stream.read(n)
17 s = stream.read(n)
17 if len(s) < n:
18 if len(s) < n:
18 raise util.Abort(_("stream ended unexpectedly"
19 raise util.Abort(_("stream ended unexpectedly"
19 " (got %d bytes, expected %d)")
20 " (got %d bytes, expected %d)")
20 % (len(s), n))
21 % (len(s), n))
21 return s
22 return s
22
23
23 def getchunk(stream):
24 def getchunk(stream):
24 """return the next chunk from stream as a string"""
25 """return the next chunk from stream as a string"""
25 d = readexactly(stream, 4)
26 d = readexactly(stream, 4)
26 l = struct.unpack(">l", d)[0]
27 l = struct.unpack(">l", d)[0]
27 if l <= 4:
28 if l <= 4:
28 if l:
29 if l:
29 raise util.Abort(_("invalid chunk length %d") % l)
30 raise util.Abort(_("invalid chunk length %d") % l)
30 return ""
31 return ""
31 return readexactly(stream, l - 4)
32 return readexactly(stream, l - 4)
32
33
33 def chunkheader(length):
34 def chunkheader(length):
34 """return a changegroup chunk header (string)"""
35 """return a changegroup chunk header (string)"""
35 return struct.pack(">l", length + 4)
36 return struct.pack(">l", length + 4)
36
37
37 def closechunk():
38 def closechunk():
38 """return a changegroup chunk header (string) for a zero-length chunk"""
39 """return a changegroup chunk header (string) for a zero-length chunk"""
39 return struct.pack(">l", 0)
40 return struct.pack(">l", 0)
40
41
41 class nocompress(object):
42 class nocompress(object):
42 def compress(self, x):
43 def compress(self, x):
43 return x
44 return x
44 def flush(self):
45 def flush(self):
45 return ""
46 return ""
46
47
47 bundletypes = {
48 bundletypes = {
48 "": ("", nocompress), # only when using unbundle on ssh and old http servers
49 "": ("", nocompress), # only when using unbundle on ssh and old http servers
49 # since the unification ssh accepts a header but there
50 # since the unification ssh accepts a header but there
50 # is no capability signaling it.
51 # is no capability signaling it.
51 "HG10UN": ("HG10UN", nocompress),
52 "HG10UN": ("HG10UN", nocompress),
52 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
53 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
53 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
54 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
54 }
55 }
55
56
56 # hgweb uses this list to communicate its preferred type
57 # hgweb uses this list to communicate its preferred type
57 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
58 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
58
59
59 def writebundle(cg, filename, bundletype):
60 def writebundle(cg, filename, bundletype):
60 """Write a bundle file and return its filename.
61 """Write a bundle file and return its filename.
61
62
62 Existing files will not be overwritten.
63 Existing files will not be overwritten.
63 If no filename is specified, a temporary file is created.
64 If no filename is specified, a temporary file is created.
64 bz2 compression can be turned off.
65 bz2 compression can be turned off.
65 The bundle file will be deleted in case of errors.
66 The bundle file will be deleted in case of errors.
66 """
67 """
67
68
68 fh = None
69 fh = None
69 cleanup = None
70 cleanup = None
70 try:
71 try:
71 if filename:
72 if filename:
72 fh = open(filename, "wb")
73 fh = open(filename, "wb")
73 else:
74 else:
74 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
75 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
75 fh = os.fdopen(fd, "wb")
76 fh = os.fdopen(fd, "wb")
76 cleanup = filename
77 cleanup = filename
77
78
78 header, compressor = bundletypes[bundletype]
79 header, compressor = bundletypes[bundletype]
79 fh.write(header)
80 fh.write(header)
80 z = compressor()
81 z = compressor()
81
82
82 # parse the changegroup data, otherwise we will block
83 # parse the changegroup data, otherwise we will block
83 # in case of sshrepo because we don't know the end of the stream
84 # in case of sshrepo because we don't know the end of the stream
84
85
85 # an empty chunkgroup is the end of the changegroup
86 # an empty chunkgroup is the end of the changegroup
86 # a changegroup has at least 2 chunkgroups (changelog and manifest).
87 # a changegroup has at least 2 chunkgroups (changelog and manifest).
87 # after that, an empty chunkgroup is the end of the changegroup
88 # after that, an empty chunkgroup is the end of the changegroup
88 empty = False
89 empty = False
89 count = 0
90 count = 0
90 while not empty or count <= 2:
91 while not empty or count <= 2:
91 empty = True
92 empty = True
92 count += 1
93 count += 1
93 while 1:
94 while 1:
94 chunk = getchunk(cg)
95 chunk = getchunk(cg)
95 if not chunk:
96 if not chunk:
96 break
97 break
97 empty = False
98 empty = False
98 fh.write(z.compress(chunkheader(len(chunk))))
99 fh.write(z.compress(chunkheader(len(chunk))))
99 pos = 0
100 pos = 0
100 while pos < len(chunk):
101 while pos < len(chunk):
101 next = pos + 2**20
102 next = pos + 2**20
102 fh.write(z.compress(chunk[pos:next]))
103 fh.write(z.compress(chunk[pos:next]))
103 pos = next
104 pos = next
104 fh.write(z.compress(closechunk()))
105 fh.write(z.compress(closechunk()))
105 fh.write(z.flush())
106 fh.write(z.flush())
106 cleanup = None
107 cleanup = None
107 return filename
108 return filename
108 finally:
109 finally:
109 if fh is not None:
110 if fh is not None:
110 fh.close()
111 fh.close()
111 if cleanup is not None:
112 if cleanup is not None:
112 os.unlink(cleanup)
113 os.unlink(cleanup)
113
114
114 def decompressor(fh, alg):
115 def decompressor(fh, alg):
115 if alg == 'UN':
116 if alg == 'UN':
116 return fh
117 return fh
117 elif alg == 'GZ':
118 elif alg == 'GZ':
118 def generator(f):
119 def generator(f):
119 zd = zlib.decompressobj()
120 zd = zlib.decompressobj()
120 for chunk in f:
121 for chunk in f:
121 yield zd.decompress(chunk)
122 yield zd.decompress(chunk)
122 elif alg == 'BZ':
123 elif alg == 'BZ':
123 def generator(f):
124 def generator(f):
124 zd = bz2.BZ2Decompressor()
125 zd = bz2.BZ2Decompressor()
125 zd.decompress("BZ")
126 zd.decompress("BZ")
126 for chunk in util.filechunkiter(f, 4096):
127 for chunk in util.filechunkiter(f, 4096):
127 yield zd.decompress(chunk)
128 yield zd.decompress(chunk)
128 else:
129 else:
129 raise util.Abort("unknown bundle compression '%s'" % alg)
130 raise util.Abort("unknown bundle compression '%s'" % alg)
130 return util.chunkbuffer(generator(fh))
131 return util.chunkbuffer(generator(fh))
131
132
132 class unbundle10(object):
133 class unbundle10(object):
133 deltaheader = _BUNDLE10_DELTA_HEADER
134 deltaheader = _BUNDLE10_DELTA_HEADER
134 deltaheadersize = struct.calcsize(deltaheader)
135 deltaheadersize = struct.calcsize(deltaheader)
135 def __init__(self, fh, alg):
136 def __init__(self, fh, alg):
136 self._stream = decompressor(fh, alg)
137 self._stream = decompressor(fh, alg)
137 self._type = alg
138 self._type = alg
138 self.callback = None
139 self.callback = None
139 def compressed(self):
140 def compressed(self):
140 return self._type != 'UN'
141 return self._type != 'UN'
141 def read(self, l):
142 def read(self, l):
142 return self._stream.read(l)
143 return self._stream.read(l)
143 def seek(self, pos):
144 def seek(self, pos):
144 return self._stream.seek(pos)
145 return self._stream.seek(pos)
145 def tell(self):
146 def tell(self):
146 return self._stream.tell()
147 return self._stream.tell()
147 def close(self):
148 def close(self):
148 return self._stream.close()
149 return self._stream.close()
149
150
150 def chunklength(self):
151 def chunklength(self):
151 d = readexactly(self._stream, 4)
152 d = readexactly(self._stream, 4)
152 l = struct.unpack(">l", d)[0]
153 l = struct.unpack(">l", d)[0]
153 if l <= 4:
154 if l <= 4:
154 if l:
155 if l:
155 raise util.Abort(_("invalid chunk length %d") % l)
156 raise util.Abort(_("invalid chunk length %d") % l)
156 return 0
157 return 0
157 if self.callback:
158 if self.callback:
158 self.callback()
159 self.callback()
159 return l - 4
160 return l - 4
160
161
161 def chunk(self):
162 def chunk(self):
162 """return the next chunk from changegroup 'source' as a string"""
163 """return the next chunk from changegroup 'source' as a string"""
163 l = self.chunklength()
164 l = self.chunklength()
164 return readexactly(self._stream, l)
165 return readexactly(self._stream, l)
165
166
166 def _deltaheader(self, headertuple, prevnode):
167 def _deltaheader(self, headertuple, prevnode):
167 node, p1, p2, cs = headertuple
168 node, p1, p2, cs = headertuple
168 if prevnode is None:
169 if prevnode is None:
169 deltabase = p1
170 deltabase = p1
170 else:
171 else:
171 deltabase = prevnode
172 deltabase = prevnode
172 return node, p1, p2, deltabase, cs
173 return node, p1, p2, deltabase, cs
173
174
174 def parsechunk(self, prevnode):
175 def parsechunk(self, prevnode):
175 l = self.chunklength()
176 l = self.chunklength()
176 if not l:
177 if not l:
177 return {}
178 return {}
178 headerdata = readexactly(self._stream, self.deltaheadersize)
179 headerdata = readexactly(self._stream, self.deltaheadersize)
179 header = struct.unpack(self.deltaheader, headerdata)
180 header = struct.unpack(self.deltaheader, headerdata)
180 delta = readexactly(self._stream, l - self.deltaheadersize)
181 delta = readexactly(self._stream, l - self.deltaheadersize)
181 node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
182 node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
182 return dict(node=node, p1=p1, p2=p2, cs=cs,
183 return dict(node=node, p1=p1, p2=p2, cs=cs,
183 deltabase=deltabase, delta=delta)
184 deltabase=deltabase, delta=delta)
184
185
185 class headerlessfixup(object):
186 class headerlessfixup(object):
186 def __init__(self, fh, h):
187 def __init__(self, fh, h):
187 self._h = h
188 self._h = h
188 self._fh = fh
189 self._fh = fh
189 def read(self, n):
190 def read(self, n):
190 if self._h:
191 if self._h:
191 d, self._h = self._h[:n], self._h[n:]
192 d, self._h = self._h[:n], self._h[n:]
192 if len(d) < n:
193 if len(d) < n:
193 d += readexactly(self._fh, n - len(d))
194 d += readexactly(self._fh, n - len(d))
194 return d
195 return d
195 return readexactly(self._fh, n)
196 return readexactly(self._fh, n)
196
197
197 def readbundle(fh, fname):
198 def readbundle(fh, fname):
198 header = readexactly(fh, 6)
199 header = readexactly(fh, 6)
199
200
200 if not fname:
201 if not fname:
201 fname = "stream"
202 fname = "stream"
202 if not header.startswith('HG') and header.startswith('\0'):
203 if not header.startswith('HG') and header.startswith('\0'):
203 fh = headerlessfixup(fh, header)
204 fh = headerlessfixup(fh, header)
204 header = "HG10UN"
205 header = "HG10UN"
205
206
206 magic, version, alg = header[0:2], header[2:4], header[4:6]
207 magic, version, alg = header[0:2], header[2:4], header[4:6]
207
208
208 if magic != 'HG':
209 if magic != 'HG':
209 raise util.Abort(_('%s: not a Mercurial bundle') % fname)
210 raise util.Abort(_('%s: not a Mercurial bundle') % fname)
210 if version != '10':
211 if version != '10':
211 raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
212 raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
212 return unbundle10(fh, alg)
213 return unbundle10(fh, alg)
213
214
214 class bundle10(object):
215 class bundle10(object):
216 deltaheader = _BUNDLE10_DELTA_HEADER
215 def __init__(self, lookup):
217 def __init__(self, lookup):
216 self._lookup = lookup
218 self._lookup = lookup
217 def close(self):
219 def close(self):
218 return closechunk()
220 return closechunk()
219 def fileheader(self, fname):
221 def fileheader(self, fname):
220 return chunkheader(len(fname)) + fname
222 return chunkheader(len(fname)) + fname
221 def revchunk(self, revlog, node='', p1='', p2='', prefix='', data=''):
223 def revchunk(self, revlog, rev, prev):
224 node = revlog.node(rev)
225 p1, p2 = revlog.parentrevs(rev)
226 base = prev
227
228 prefix = ''
229 if base == nullrev:
230 delta = revlog.revision(node)
231 prefix = mdiff.trivialdiffheader(len(delta))
232 else:
233 delta = revlog.revdiff(base, rev)
222 linknode = self._lookup(revlog, node)
234 linknode = self._lookup(revlog, node)
223 meta = node + p1 + p2 + linknode + prefix
235 p1n, p2n = revlog.parents(node)
224 l = len(meta) + len(data)
236 basenode = revlog.node(base)
237 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)
238 meta += prefix
239 l = len(meta) + len(delta)
225 yield chunkheader(l)
240 yield chunkheader(l)
226 yield meta
241 yield meta
227 yield data
242 yield delta
243 def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
244 # do nothing with basenode, it is implicitly the previous one in HG10
245 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
@@ -1,1274 +1,1265
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util
17 import ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGSHALLOW = (1 << 17)
30 REVLOGSHALLOW = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_PARENTDELTA = 1
37 REVIDX_PARENTDELTA = 1
38 REVIDX_PUNCHED_FLAG = 2
38 REVIDX_PUNCHED_FLAG = 2
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40
40
41 # max size of revlog with inline data
41 # max size of revlog with inline data
42 _maxinline = 131072
42 _maxinline = 131072
43 _chunksize = 1048576
43 _chunksize = 1048576
44
44
45 RevlogError = error.RevlogError
45 RevlogError = error.RevlogError
46 LookupError = error.LookupError
46 LookupError = error.LookupError
47
47
48 def getoffset(q):
48 def getoffset(q):
49 return int(q >> 16)
49 return int(q >> 16)
50
50
51 def gettype(q):
51 def gettype(q):
52 return int(q & 0xFFFF)
52 return int(q & 0xFFFF)
53
53
54 def offset_type(offset, type):
54 def offset_type(offset, type):
55 return long(long(offset) << 16 | type)
55 return long(long(offset) << 16 | type)
56
56
57 nullhash = _sha(nullid)
57 nullhash = _sha(nullid)
58
58
59 def hash(text, p1, p2):
59 def hash(text, p1, p2):
60 """generate a hash from the given text and its parent hashes
60 """generate a hash from the given text and its parent hashes
61
61
62 This hash combines both the current file contents and its history
62 This hash combines both the current file contents and its history
63 in a manner that makes it easy to distinguish nodes with the same
63 in a manner that makes it easy to distinguish nodes with the same
64 content in the revision graph.
64 content in the revision graph.
65 """
65 """
66 # As of now, if one of the parent node is null, p2 is null
66 # As of now, if one of the parent node is null, p2 is null
67 if p2 == nullid:
67 if p2 == nullid:
68 # deep copy of a hash is faster than creating one
68 # deep copy of a hash is faster than creating one
69 s = nullhash.copy()
69 s = nullhash.copy()
70 s.update(p1)
70 s.update(p1)
71 else:
71 else:
72 # none of the parent nodes are nullid
72 # none of the parent nodes are nullid
73 l = [p1, p2]
73 l = [p1, p2]
74 l.sort()
74 l.sort()
75 s = _sha(l[0])
75 s = _sha(l[0])
76 s.update(l[1])
76 s.update(l[1])
77 s.update(text)
77 s.update(text)
78 return s.digest()
78 return s.digest()
79
79
80 def compress(text):
80 def compress(text):
81 """ generate a possibly-compressed representation of text """
81 """ generate a possibly-compressed representation of text """
82 if not text:
82 if not text:
83 return ("", text)
83 return ("", text)
84 l = len(text)
84 l = len(text)
85 bin = None
85 bin = None
86 if l < 44:
86 if l < 44:
87 pass
87 pass
88 elif l > 1000000:
88 elif l > 1000000:
89 # zlib makes an internal copy, thus doubling memory usage for
89 # zlib makes an internal copy, thus doubling memory usage for
90 # large files, so lets do this in pieces
90 # large files, so lets do this in pieces
91 z = zlib.compressobj()
91 z = zlib.compressobj()
92 p = []
92 p = []
93 pos = 0
93 pos = 0
94 while pos < l:
94 while pos < l:
95 pos2 = pos + 2**20
95 pos2 = pos + 2**20
96 p.append(z.compress(text[pos:pos2]))
96 p.append(z.compress(text[pos:pos2]))
97 pos = pos2
97 pos = pos2
98 p.append(z.flush())
98 p.append(z.flush())
99 if sum(map(len, p)) < l:
99 if sum(map(len, p)) < l:
100 bin = "".join(p)
100 bin = "".join(p)
101 else:
101 else:
102 bin = _compress(text)
102 bin = _compress(text)
103 if bin is None or len(bin) > l:
103 if bin is None or len(bin) > l:
104 if text[0] == '\0':
104 if text[0] == '\0':
105 return ("", text)
105 return ("", text)
106 return ('u', text)
106 return ('u', text)
107 return ("", bin)
107 return ("", bin)
108
108
109 def decompress(bin):
109 def decompress(bin):
110 """ decompress the given input """
110 """ decompress the given input """
111 if not bin:
111 if not bin:
112 return bin
112 return bin
113 t = bin[0]
113 t = bin[0]
114 if t == '\0':
114 if t == '\0':
115 return bin
115 return bin
116 if t == 'x':
116 if t == 'x':
117 return _decompress(bin)
117 return _decompress(bin)
118 if t == 'u':
118 if t == 'u':
119 return bin[1:]
119 return bin[1:]
120 raise RevlogError(_("unknown compression type %r") % t)
120 raise RevlogError(_("unknown compression type %r") % t)
121
121
122 indexformatv0 = ">4l20s20s20s"
122 indexformatv0 = ">4l20s20s20s"
123 v0shaoffset = 56
123 v0shaoffset = 56
124
124
125 class revlogoldio(object):
125 class revlogoldio(object):
126 def __init__(self):
126 def __init__(self):
127 self.size = struct.calcsize(indexformatv0)
127 self.size = struct.calcsize(indexformatv0)
128
128
129 def parseindex(self, data, inline):
129 def parseindex(self, data, inline):
130 s = self.size
130 s = self.size
131 index = []
131 index = []
132 nodemap = {nullid: nullrev}
132 nodemap = {nullid: nullrev}
133 n = off = 0
133 n = off = 0
134 l = len(data)
134 l = len(data)
135 while off + s <= l:
135 while off + s <= l:
136 cur = data[off:off + s]
136 cur = data[off:off + s]
137 off += s
137 off += s
138 e = _unpack(indexformatv0, cur)
138 e = _unpack(indexformatv0, cur)
139 # transform to revlogv1 format
139 # transform to revlogv1 format
140 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
140 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
141 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 index.append(e2)
142 index.append(e2)
143 nodemap[e[6]] = n
143 nodemap[e[6]] = n
144 n += 1
144 n += 1
145
145
146 # add the magic null revision at -1
146 # add the magic null revision at -1
147 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
147 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148
148
149 return index, nodemap, None
149 return index, nodemap, None
150
150
151 def packentry(self, entry, node, version, rev):
151 def packentry(self, entry, node, version, rev):
152 if gettype(entry[0]):
152 if gettype(entry[0]):
153 raise RevlogError(_("index entry flags need RevlogNG"))
153 raise RevlogError(_("index entry flags need RevlogNG"))
154 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
154 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 node(entry[5]), node(entry[6]), entry[7])
155 node(entry[5]), node(entry[6]), entry[7])
156 return _pack(indexformatv0, *e2)
156 return _pack(indexformatv0, *e2)
157
157
158 # index ng:
158 # index ng:
159 # 6 bytes: offset
159 # 6 bytes: offset
160 # 2 bytes: flags
160 # 2 bytes: flags
161 # 4 bytes: compressed length
161 # 4 bytes: compressed length
162 # 4 bytes: uncompressed length
162 # 4 bytes: uncompressed length
163 # 4 bytes: base rev
163 # 4 bytes: base rev
164 # 4 bytes: link rev
164 # 4 bytes: link rev
165 # 4 bytes: parent 1 rev
165 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 2 rev
166 # 4 bytes: parent 2 rev
167 # 32 bytes: nodeid
167 # 32 bytes: nodeid
168 indexformatng = ">Qiiiiii20s12x"
168 indexformatng = ">Qiiiiii20s12x"
169 ngshaoffset = 32
169 ngshaoffset = 32
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 class revlogio(object):
172 class revlogio(object):
173 def __init__(self):
173 def __init__(self):
174 self.size = struct.calcsize(indexformatng)
174 self.size = struct.calcsize(indexformatng)
175
175
176 def parseindex(self, data, inline):
176 def parseindex(self, data, inline):
177 # call the C implementation to parse the index data
177 # call the C implementation to parse the index data
178 index, cache = parsers.parse_index2(data, inline)
178 index, cache = parsers.parse_index2(data, inline)
179 return index, None, cache
179 return index, None, cache
180
180
181 def packentry(self, entry, node, version, rev):
181 def packentry(self, entry, node, version, rev):
182 p = _pack(indexformatng, *entry)
182 p = _pack(indexformatng, *entry)
183 if rev == 0:
183 if rev == 0:
184 p = _pack(versionformat, version) + p[4:]
184 p = _pack(versionformat, version) + p[4:]
185 return p
185 return p
186
186
187 class revlog(object):
187 class revlog(object):
188 """
188 """
189 the underlying revision storage object
189 the underlying revision storage object
190
190
191 A revlog consists of two parts, an index and the revision data.
191 A revlog consists of two parts, an index and the revision data.
192
192
193 The index is a file with a fixed record size containing
193 The index is a file with a fixed record size containing
194 information on each revision, including its nodeid (hash), the
194 information on each revision, including its nodeid (hash), the
195 nodeids of its parents, the position and offset of its data within
195 nodeids of its parents, the position and offset of its data within
196 the data file, and the revision it's based on. Finally, each entry
196 the data file, and the revision it's based on. Finally, each entry
197 contains a linkrev entry that can serve as a pointer to external
197 contains a linkrev entry that can serve as a pointer to external
198 data.
198 data.
199
199
200 The revision data itself is a linear collection of data chunks.
200 The revision data itself is a linear collection of data chunks.
201 Each chunk represents a revision and is usually represented as a
201 Each chunk represents a revision and is usually represented as a
202 delta against the previous chunk. To bound lookup time, runs of
202 delta against the previous chunk. To bound lookup time, runs of
203 deltas are limited to about 2 times the length of the original
203 deltas are limited to about 2 times the length of the original
204 version data. This makes retrieval of a version proportional to
204 version data. This makes retrieval of a version proportional to
205 its size, or O(1) relative to the number of revisions.
205 its size, or O(1) relative to the number of revisions.
206
206
207 Both pieces of the revlog are written to in an append-only
207 Both pieces of the revlog are written to in an append-only
208 fashion, which means we never need to rewrite a file to insert or
208 fashion, which means we never need to rewrite a file to insert or
209 remove data, and can use some simple techniques to avoid the need
209 remove data, and can use some simple techniques to avoid the need
210 for locking while reading.
210 for locking while reading.
211 """
211 """
212 def __init__(self, opener, indexfile, shallowroot=None):
212 def __init__(self, opener, indexfile, shallowroot=None):
213 """
213 """
214 create a revlog object
214 create a revlog object
215
215
216 opener is a function that abstracts the file opening operation
216 opener is a function that abstracts the file opening operation
217 and can be used to implement COW semantics or the like.
217 and can be used to implement COW semantics or the like.
218 """
218 """
219 self.indexfile = indexfile
219 self.indexfile = indexfile
220 self.datafile = indexfile[:-2] + ".d"
220 self.datafile = indexfile[:-2] + ".d"
221 self.opener = opener
221 self.opener = opener
222 self._cache = None
222 self._cache = None
223 self._chunkcache = (0, '')
223 self._chunkcache = (0, '')
224 self.index = []
224 self.index = []
225 self._shallowroot = shallowroot
225 self._shallowroot = shallowroot
226 self._parentdelta = 0
226 self._parentdelta = 0
227 self._pcache = {}
227 self._pcache = {}
228 self._nodecache = {nullid: nullrev}
228 self._nodecache = {nullid: nullrev}
229 self._nodepos = None
229 self._nodepos = None
230
230
231 v = REVLOG_DEFAULT_VERSION
231 v = REVLOG_DEFAULT_VERSION
232 if hasattr(opener, 'options') and 'defversion' in opener.options:
232 if hasattr(opener, 'options') and 'defversion' in opener.options:
233 v = opener.options['defversion']
233 v = opener.options['defversion']
234 if v & REVLOGNG:
234 if v & REVLOGNG:
235 v |= REVLOGNGINLINEDATA
235 v |= REVLOGNGINLINEDATA
236 if v & REVLOGNG and 'parentdelta' in opener.options:
236 if v & REVLOGNG and 'parentdelta' in opener.options:
237 self._parentdelta = 1
237 self._parentdelta = 1
238
238
239 if shallowroot:
239 if shallowroot:
240 v |= REVLOGSHALLOW
240 v |= REVLOGSHALLOW
241
241
242 i = ''
242 i = ''
243 try:
243 try:
244 f = self.opener(self.indexfile)
244 f = self.opener(self.indexfile)
245 i = f.read()
245 i = f.read()
246 f.close()
246 f.close()
247 if len(i) > 0:
247 if len(i) > 0:
248 v = struct.unpack(versionformat, i[:4])[0]
248 v = struct.unpack(versionformat, i[:4])[0]
249 except IOError, inst:
249 except IOError, inst:
250 if inst.errno != errno.ENOENT:
250 if inst.errno != errno.ENOENT:
251 raise
251 raise
252
252
253 self.version = v
253 self.version = v
254 self._inline = v & REVLOGNGINLINEDATA
254 self._inline = v & REVLOGNGINLINEDATA
255 self._shallow = v & REVLOGSHALLOW
255 self._shallow = v & REVLOGSHALLOW
256 flags = v & ~0xFFFF
256 flags = v & ~0xFFFF
257 fmt = v & 0xFFFF
257 fmt = v & 0xFFFF
258 if fmt == REVLOGV0 and flags:
258 if fmt == REVLOGV0 and flags:
259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 % (self.indexfile, flags >> 16))
260 % (self.indexfile, flags >> 16))
261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 % (self.indexfile, flags >> 16))
263 % (self.indexfile, flags >> 16))
264 elif fmt > REVLOGNG:
264 elif fmt > REVLOGNG:
265 raise RevlogError(_("index %s unknown format %d")
265 raise RevlogError(_("index %s unknown format %d")
266 % (self.indexfile, fmt))
266 % (self.indexfile, fmt))
267
267
268 self._io = revlogio()
268 self._io = revlogio()
269 if self.version == REVLOGV0:
269 if self.version == REVLOGV0:
270 self._io = revlogoldio()
270 self._io = revlogoldio()
271 try:
271 try:
272 d = self._io.parseindex(i, self._inline)
272 d = self._io.parseindex(i, self._inline)
273 except (ValueError, IndexError):
273 except (ValueError, IndexError):
274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 self.index, nodemap, self._chunkcache = d
275 self.index, nodemap, self._chunkcache = d
276 if nodemap is not None:
276 if nodemap is not None:
277 self.nodemap = self._nodecache = nodemap
277 self.nodemap = self._nodecache = nodemap
278 if not self._chunkcache:
278 if not self._chunkcache:
279 self._chunkclear()
279 self._chunkclear()
280
280
281 def tip(self):
281 def tip(self):
282 return self.node(len(self.index) - 2)
282 return self.node(len(self.index) - 2)
283 def __len__(self):
283 def __len__(self):
284 return len(self.index) - 1
284 return len(self.index) - 1
285 def __iter__(self):
285 def __iter__(self):
286 for i in xrange(len(self)):
286 for i in xrange(len(self)):
287 yield i
287 yield i
288
288
289 @util.propertycache
289 @util.propertycache
290 def nodemap(self):
290 def nodemap(self):
291 self.rev(self.node(0))
291 self.rev(self.node(0))
292 return self._nodecache
292 return self._nodecache
293
293
294 def rev(self, node):
294 def rev(self, node):
295 try:
295 try:
296 return self._nodecache[node]
296 return self._nodecache[node]
297 except KeyError:
297 except KeyError:
298 n = self._nodecache
298 n = self._nodecache
299 i = self.index
299 i = self.index
300 p = self._nodepos
300 p = self._nodepos
301 if p is None:
301 if p is None:
302 p = len(i) - 2
302 p = len(i) - 2
303 for r in xrange(p, -1, -1):
303 for r in xrange(p, -1, -1):
304 v = i[r][7]
304 v = i[r][7]
305 n[v] = r
305 n[v] = r
306 if v == node:
306 if v == node:
307 self._nodepos = r - 1
307 self._nodepos = r - 1
308 return r
308 return r
309 raise LookupError(node, self.indexfile, _('no node'))
309 raise LookupError(node, self.indexfile, _('no node'))
310
310
311 def node(self, rev):
311 def node(self, rev):
312 return self.index[rev][7]
312 return self.index[rev][7]
313 def linkrev(self, rev):
313 def linkrev(self, rev):
314 return self.index[rev][4]
314 return self.index[rev][4]
315 def parents(self, node):
315 def parents(self, node):
316 i = self.index
316 i = self.index
317 d = i[self.rev(node)]
317 d = i[self.rev(node)]
318 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
318 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
319 def parentrevs(self, rev):
319 def parentrevs(self, rev):
320 return self.index[rev][5:7]
320 return self.index[rev][5:7]
321 def start(self, rev):
321 def start(self, rev):
322 return int(self.index[rev][0] >> 16)
322 return int(self.index[rev][0] >> 16)
323 def end(self, rev):
323 def end(self, rev):
324 return self.start(rev) + self.length(rev)
324 return self.start(rev) + self.length(rev)
325 def length(self, rev):
325 def length(self, rev):
326 return self.index[rev][1]
326 return self.index[rev][1]
327 def base(self, rev):
327 def base(self, rev):
328 return self.index[rev][3]
328 return self.index[rev][3]
329 def flags(self, rev):
329 def flags(self, rev):
330 return self.index[rev][0] & 0xFFFF
330 return self.index[rev][0] & 0xFFFF
331 def rawsize(self, rev):
331 def rawsize(self, rev):
332 """return the length of the uncompressed text for a given revision"""
332 """return the length of the uncompressed text for a given revision"""
333 l = self.index[rev][2]
333 l = self.index[rev][2]
334 if l >= 0:
334 if l >= 0:
335 return l
335 return l
336
336
337 t = self.revision(self.node(rev))
337 t = self.revision(self.node(rev))
338 return len(t)
338 return len(t)
339 size = rawsize
339 size = rawsize
340
340
341 def reachable(self, node, stop=None):
341 def reachable(self, node, stop=None):
342 """return the set of all nodes ancestral to a given node, including
342 """return the set of all nodes ancestral to a given node, including
343 the node itself, stopping when stop is matched"""
343 the node itself, stopping when stop is matched"""
344 reachable = set((node,))
344 reachable = set((node,))
345 visit = [node]
345 visit = [node]
346 if stop:
346 if stop:
347 stopn = self.rev(stop)
347 stopn = self.rev(stop)
348 else:
348 else:
349 stopn = 0
349 stopn = 0
350 while visit:
350 while visit:
351 n = visit.pop(0)
351 n = visit.pop(0)
352 if n == stop:
352 if n == stop:
353 continue
353 continue
354 if n == nullid:
354 if n == nullid:
355 continue
355 continue
356 for p in self.parents(n):
356 for p in self.parents(n):
357 if self.rev(p) < stopn:
357 if self.rev(p) < stopn:
358 continue
358 continue
359 if p not in reachable:
359 if p not in reachable:
360 reachable.add(p)
360 reachable.add(p)
361 visit.append(p)
361 visit.append(p)
362 return reachable
362 return reachable
363
363
364 def ancestors(self, *revs):
364 def ancestors(self, *revs):
365 """Generate the ancestors of 'revs' in reverse topological order.
365 """Generate the ancestors of 'revs' in reverse topological order.
366
366
367 Yield a sequence of revision numbers starting with the parents
367 Yield a sequence of revision numbers starting with the parents
368 of each revision in revs, i.e., each revision is *not* considered
368 of each revision in revs, i.e., each revision is *not* considered
369 an ancestor of itself. Results are in breadth-first order:
369 an ancestor of itself. Results are in breadth-first order:
370 parents of each rev in revs, then parents of those, etc. Result
370 parents of each rev in revs, then parents of those, etc. Result
371 does not include the null revision."""
371 does not include the null revision."""
372 visit = list(revs)
372 visit = list(revs)
373 seen = set([nullrev])
373 seen = set([nullrev])
374 while visit:
374 while visit:
375 for parent in self.parentrevs(visit.pop(0)):
375 for parent in self.parentrevs(visit.pop(0)):
376 if parent not in seen:
376 if parent not in seen:
377 visit.append(parent)
377 visit.append(parent)
378 seen.add(parent)
378 seen.add(parent)
379 yield parent
379 yield parent
380
380
381 def descendants(self, *revs):
381 def descendants(self, *revs):
382 """Generate the descendants of 'revs' in revision order.
382 """Generate the descendants of 'revs' in revision order.
383
383
384 Yield a sequence of revision numbers starting with a child of
384 Yield a sequence of revision numbers starting with a child of
385 some rev in revs, i.e., each revision is *not* considered a
385 some rev in revs, i.e., each revision is *not* considered a
386 descendant of itself. Results are ordered by revision number (a
386 descendant of itself. Results are ordered by revision number (a
387 topological sort)."""
387 topological sort)."""
388 first = min(revs)
388 first = min(revs)
389 if first == nullrev:
389 if first == nullrev:
390 for i in self:
390 for i in self:
391 yield i
391 yield i
392 return
392 return
393
393
394 seen = set(revs)
394 seen = set(revs)
395 for i in xrange(first + 1, len(self)):
395 for i in xrange(first + 1, len(self)):
396 for x in self.parentrevs(i):
396 for x in self.parentrevs(i):
397 if x != nullrev and x in seen:
397 if x != nullrev and x in seen:
398 seen.add(i)
398 seen.add(i)
399 yield i
399 yield i
400 break
400 break
401
401
402 def findcommonmissing(self, common=None, heads=None):
402 def findcommonmissing(self, common=None, heads=None):
403 """Return a tuple of the ancestors of common and the ancestors of heads
403 """Return a tuple of the ancestors of common and the ancestors of heads
404 that are not ancestors of common.
404 that are not ancestors of common.
405
405
406 More specifically, the second element is a list of nodes N such that
406 More specifically, the second element is a list of nodes N such that
407 every N satisfies the following constraints:
407 every N satisfies the following constraints:
408
408
409 1. N is an ancestor of some node in 'heads'
409 1. N is an ancestor of some node in 'heads'
410 2. N is not an ancestor of any node in 'common'
410 2. N is not an ancestor of any node in 'common'
411
411
412 The list is sorted by revision number, meaning it is
412 The list is sorted by revision number, meaning it is
413 topologically sorted.
413 topologically sorted.
414
414
415 'heads' and 'common' are both lists of node IDs. If heads is
415 'heads' and 'common' are both lists of node IDs. If heads is
416 not supplied, uses all of the revlog's heads. If common is not
416 not supplied, uses all of the revlog's heads. If common is not
417 supplied, uses nullid."""
417 supplied, uses nullid."""
418 if common is None:
418 if common is None:
419 common = [nullid]
419 common = [nullid]
420 if heads is None:
420 if heads is None:
421 heads = self.heads()
421 heads = self.heads()
422
422
423 common = [self.rev(n) for n in common]
423 common = [self.rev(n) for n in common]
424 heads = [self.rev(n) for n in heads]
424 heads = [self.rev(n) for n in heads]
425
425
426 # we want the ancestors, but inclusive
426 # we want the ancestors, but inclusive
427 has = set(self.ancestors(*common))
427 has = set(self.ancestors(*common))
428 has.add(nullrev)
428 has.add(nullrev)
429 has.update(common)
429 has.update(common)
430
430
431 # take all ancestors from heads that aren't in has
431 # take all ancestors from heads that aren't in has
432 missing = set()
432 missing = set()
433 visit = [r for r in heads if r not in has]
433 visit = [r for r in heads if r not in has]
434 while visit:
434 while visit:
435 r = visit.pop(0)
435 r = visit.pop(0)
436 if r in missing:
436 if r in missing:
437 continue
437 continue
438 else:
438 else:
439 missing.add(r)
439 missing.add(r)
440 for p in self.parentrevs(r):
440 for p in self.parentrevs(r):
441 if p not in has:
441 if p not in has:
442 visit.append(p)
442 visit.append(p)
443 missing = list(missing)
443 missing = list(missing)
444 missing.sort()
444 missing.sort()
445 return has, [self.node(r) for r in missing]
445 return has, [self.node(r) for r in missing]
446
446
447 def findmissing(self, common=None, heads=None):
447 def findmissing(self, common=None, heads=None):
448 """Return the ancestors of heads that are not ancestors of common.
448 """Return the ancestors of heads that are not ancestors of common.
449
449
450 More specifically, return a list of nodes N such that every N
450 More specifically, return a list of nodes N such that every N
451 satisfies the following constraints:
451 satisfies the following constraints:
452
452
453 1. N is an ancestor of some node in 'heads'
453 1. N is an ancestor of some node in 'heads'
454 2. N is not an ancestor of any node in 'common'
454 2. N is not an ancestor of any node in 'common'
455
455
456 The list is sorted by revision number, meaning it is
456 The list is sorted by revision number, meaning it is
457 topologically sorted.
457 topologically sorted.
458
458
459 'heads' and 'common' are both lists of node IDs. If heads is
459 'heads' and 'common' are both lists of node IDs. If heads is
460 not supplied, uses all of the revlog's heads. If common is not
460 not supplied, uses all of the revlog's heads. If common is not
461 supplied, uses nullid."""
461 supplied, uses nullid."""
462 _common, missing = self.findcommonmissing(common, heads)
462 _common, missing = self.findcommonmissing(common, heads)
463 return missing
463 return missing
464
464
465 def nodesbetween(self, roots=None, heads=None):
465 def nodesbetween(self, roots=None, heads=None):
466 """Return a topological path from 'roots' to 'heads'.
466 """Return a topological path from 'roots' to 'heads'.
467
467
468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
469 topologically sorted list of all nodes N that satisfy both of
469 topologically sorted list of all nodes N that satisfy both of
470 these constraints:
470 these constraints:
471
471
472 1. N is a descendant of some node in 'roots'
472 1. N is a descendant of some node in 'roots'
473 2. N is an ancestor of some node in 'heads'
473 2. N is an ancestor of some node in 'heads'
474
474
475 Every node is considered to be both a descendant and an ancestor
475 Every node is considered to be both a descendant and an ancestor
476 of itself, so every reachable node in 'roots' and 'heads' will be
476 of itself, so every reachable node in 'roots' and 'heads' will be
477 included in 'nodes'.
477 included in 'nodes'.
478
478
479 'outroots' is the list of reachable nodes in 'roots', i.e., the
479 'outroots' is the list of reachable nodes in 'roots', i.e., the
480 subset of 'roots' that is returned in 'nodes'. Likewise,
480 subset of 'roots' that is returned in 'nodes'. Likewise,
481 'outheads' is the subset of 'heads' that is also in 'nodes'.
481 'outheads' is the subset of 'heads' that is also in 'nodes'.
482
482
483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
484 unspecified, uses nullid as the only root. If 'heads' is
484 unspecified, uses nullid as the only root. If 'heads' is
485 unspecified, uses list of all of the revlog's heads."""
485 unspecified, uses list of all of the revlog's heads."""
486 nonodes = ([], [], [])
486 nonodes = ([], [], [])
487 if roots is not None:
487 if roots is not None:
488 roots = list(roots)
488 roots = list(roots)
489 if not roots:
489 if not roots:
490 return nonodes
490 return nonodes
491 lowestrev = min([self.rev(n) for n in roots])
491 lowestrev = min([self.rev(n) for n in roots])
492 else:
492 else:
493 roots = [nullid] # Everybody's a descendent of nullid
493 roots = [nullid] # Everybody's a descendent of nullid
494 lowestrev = nullrev
494 lowestrev = nullrev
495 if (lowestrev == nullrev) and (heads is None):
495 if (lowestrev == nullrev) and (heads is None):
496 # We want _all_ the nodes!
496 # We want _all_ the nodes!
497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
498 if heads is None:
498 if heads is None:
499 # All nodes are ancestors, so the latest ancestor is the last
499 # All nodes are ancestors, so the latest ancestor is the last
500 # node.
500 # node.
501 highestrev = len(self) - 1
501 highestrev = len(self) - 1
502 # Set ancestors to None to signal that every node is an ancestor.
502 # Set ancestors to None to signal that every node is an ancestor.
503 ancestors = None
503 ancestors = None
504 # Set heads to an empty dictionary for later discovery of heads
504 # Set heads to an empty dictionary for later discovery of heads
505 heads = {}
505 heads = {}
506 else:
506 else:
507 heads = list(heads)
507 heads = list(heads)
508 if not heads:
508 if not heads:
509 return nonodes
509 return nonodes
510 ancestors = set()
510 ancestors = set()
511 # Turn heads into a dictionary so we can remove 'fake' heads.
511 # Turn heads into a dictionary so we can remove 'fake' heads.
512 # Also, later we will be using it to filter out the heads we can't
512 # Also, later we will be using it to filter out the heads we can't
513 # find from roots.
513 # find from roots.
514 heads = dict.fromkeys(heads, 0)
514 heads = dict.fromkeys(heads, 0)
515 # Start at the top and keep marking parents until we're done.
515 # Start at the top and keep marking parents until we're done.
516 nodestotag = set(heads)
516 nodestotag = set(heads)
517 # Remember where the top was so we can use it as a limit later.
517 # Remember where the top was so we can use it as a limit later.
518 highestrev = max([self.rev(n) for n in nodestotag])
518 highestrev = max([self.rev(n) for n in nodestotag])
519 while nodestotag:
519 while nodestotag:
520 # grab a node to tag
520 # grab a node to tag
521 n = nodestotag.pop()
521 n = nodestotag.pop()
522 # Never tag nullid
522 # Never tag nullid
523 if n == nullid:
523 if n == nullid:
524 continue
524 continue
525 # A node's revision number represents its place in a
525 # A node's revision number represents its place in a
526 # topologically sorted list of nodes.
526 # topologically sorted list of nodes.
527 r = self.rev(n)
527 r = self.rev(n)
528 if r >= lowestrev:
528 if r >= lowestrev:
529 if n not in ancestors:
529 if n not in ancestors:
530 # If we are possibly a descendent of one of the roots
530 # If we are possibly a descendent of one of the roots
531 # and we haven't already been marked as an ancestor
531 # and we haven't already been marked as an ancestor
532 ancestors.add(n) # Mark as ancestor
532 ancestors.add(n) # Mark as ancestor
533 # Add non-nullid parents to list of nodes to tag.
533 # Add non-nullid parents to list of nodes to tag.
534 nodestotag.update([p for p in self.parents(n) if
534 nodestotag.update([p for p in self.parents(n) if
535 p != nullid])
535 p != nullid])
536 elif n in heads: # We've seen it before, is it a fake head?
536 elif n in heads: # We've seen it before, is it a fake head?
537 # So it is, real heads should not be the ancestors of
537 # So it is, real heads should not be the ancestors of
538 # any other heads.
538 # any other heads.
539 heads.pop(n)
539 heads.pop(n)
540 if not ancestors:
540 if not ancestors:
541 return nonodes
541 return nonodes
542 # Now that we have our set of ancestors, we want to remove any
542 # Now that we have our set of ancestors, we want to remove any
543 # roots that are not ancestors.
543 # roots that are not ancestors.
544
544
545 # If one of the roots was nullid, everything is included anyway.
545 # If one of the roots was nullid, everything is included anyway.
546 if lowestrev > nullrev:
546 if lowestrev > nullrev:
547 # But, since we weren't, let's recompute the lowest rev to not
547 # But, since we weren't, let's recompute the lowest rev to not
548 # include roots that aren't ancestors.
548 # include roots that aren't ancestors.
549
549
550 # Filter out roots that aren't ancestors of heads
550 # Filter out roots that aren't ancestors of heads
551 roots = [n for n in roots if n in ancestors]
551 roots = [n for n in roots if n in ancestors]
552 # Recompute the lowest revision
552 # Recompute the lowest revision
553 if roots:
553 if roots:
554 lowestrev = min([self.rev(n) for n in roots])
554 lowestrev = min([self.rev(n) for n in roots])
555 else:
555 else:
556 # No more roots? Return empty list
556 # No more roots? Return empty list
557 return nonodes
557 return nonodes
558 else:
558 else:
559 # We are descending from nullid, and don't need to care about
559 # We are descending from nullid, and don't need to care about
560 # any other roots.
560 # any other roots.
561 lowestrev = nullrev
561 lowestrev = nullrev
562 roots = [nullid]
562 roots = [nullid]
563 # Transform our roots list into a set.
563 # Transform our roots list into a set.
564 descendents = set(roots)
564 descendents = set(roots)
565 # Also, keep the original roots so we can filter out roots that aren't
565 # Also, keep the original roots so we can filter out roots that aren't
566 # 'real' roots (i.e. are descended from other roots).
566 # 'real' roots (i.e. are descended from other roots).
567 roots = descendents.copy()
567 roots = descendents.copy()
568 # Our topologically sorted list of output nodes.
568 # Our topologically sorted list of output nodes.
569 orderedout = []
569 orderedout = []
570 # Don't start at nullid since we don't want nullid in our output list,
570 # Don't start at nullid since we don't want nullid in our output list,
571 # and if nullid shows up in descedents, empty parents will look like
571 # and if nullid shows up in descedents, empty parents will look like
572 # they're descendents.
572 # they're descendents.
573 for r in xrange(max(lowestrev, 0), highestrev + 1):
573 for r in xrange(max(lowestrev, 0), highestrev + 1):
574 n = self.node(r)
574 n = self.node(r)
575 isdescendent = False
575 isdescendent = False
576 if lowestrev == nullrev: # Everybody is a descendent of nullid
576 if lowestrev == nullrev: # Everybody is a descendent of nullid
577 isdescendent = True
577 isdescendent = True
578 elif n in descendents:
578 elif n in descendents:
579 # n is already a descendent
579 # n is already a descendent
580 isdescendent = True
580 isdescendent = True
581 # This check only needs to be done here because all the roots
581 # This check only needs to be done here because all the roots
582 # will start being marked is descendents before the loop.
582 # will start being marked is descendents before the loop.
583 if n in roots:
583 if n in roots:
584 # If n was a root, check if it's a 'real' root.
584 # If n was a root, check if it's a 'real' root.
585 p = tuple(self.parents(n))
585 p = tuple(self.parents(n))
586 # If any of its parents are descendents, it's not a root.
586 # If any of its parents are descendents, it's not a root.
587 if (p[0] in descendents) or (p[1] in descendents):
587 if (p[0] in descendents) or (p[1] in descendents):
588 roots.remove(n)
588 roots.remove(n)
589 else:
589 else:
590 p = tuple(self.parents(n))
590 p = tuple(self.parents(n))
591 # A node is a descendent if either of its parents are
591 # A node is a descendent if either of its parents are
592 # descendents. (We seeded the dependents list with the roots
592 # descendents. (We seeded the dependents list with the roots
593 # up there, remember?)
593 # up there, remember?)
594 if (p[0] in descendents) or (p[1] in descendents):
594 if (p[0] in descendents) or (p[1] in descendents):
595 descendents.add(n)
595 descendents.add(n)
596 isdescendent = True
596 isdescendent = True
597 if isdescendent and ((ancestors is None) or (n in ancestors)):
597 if isdescendent and ((ancestors is None) or (n in ancestors)):
598 # Only include nodes that are both descendents and ancestors.
598 # Only include nodes that are both descendents and ancestors.
599 orderedout.append(n)
599 orderedout.append(n)
600 if (ancestors is not None) and (n in heads):
600 if (ancestors is not None) and (n in heads):
601 # We're trying to figure out which heads are reachable
601 # We're trying to figure out which heads are reachable
602 # from roots.
602 # from roots.
603 # Mark this head as having been reached
603 # Mark this head as having been reached
604 heads[n] = 1
604 heads[n] = 1
605 elif ancestors is None:
605 elif ancestors is None:
606 # Otherwise, we're trying to discover the heads.
606 # Otherwise, we're trying to discover the heads.
607 # Assume this is a head because if it isn't, the next step
607 # Assume this is a head because if it isn't, the next step
608 # will eventually remove it.
608 # will eventually remove it.
609 heads[n] = 1
609 heads[n] = 1
610 # But, obviously its parents aren't.
610 # But, obviously its parents aren't.
611 for p in self.parents(n):
611 for p in self.parents(n):
612 heads.pop(p, None)
612 heads.pop(p, None)
613 heads = [n for n in heads.iterkeys() if heads[n] != 0]
613 heads = [n for n in heads.iterkeys() if heads[n] != 0]
614 roots = list(roots)
614 roots = list(roots)
615 assert orderedout
615 assert orderedout
616 assert roots
616 assert roots
617 assert heads
617 assert heads
618 return (orderedout, roots, heads)
618 return (orderedout, roots, heads)
619
619
620 def heads(self, start=None, stop=None):
620 def heads(self, start=None, stop=None):
621 """return the list of all nodes that have no children
621 """return the list of all nodes that have no children
622
622
623 if start is specified, only heads that are descendants of
623 if start is specified, only heads that are descendants of
624 start will be returned
624 start will be returned
625 if stop is specified, it will consider all the revs from stop
625 if stop is specified, it will consider all the revs from stop
626 as if they had no children
626 as if they had no children
627 """
627 """
628 if start is None and stop is None:
628 if start is None and stop is None:
629 count = len(self)
629 count = len(self)
630 if not count:
630 if not count:
631 return [nullid]
631 return [nullid]
632 ishead = [1] * (count + 1)
632 ishead = [1] * (count + 1)
633 index = self.index
633 index = self.index
634 for r in xrange(count):
634 for r in xrange(count):
635 e = index[r]
635 e = index[r]
636 ishead[e[5]] = ishead[e[6]] = 0
636 ishead[e[5]] = ishead[e[6]] = 0
637 return [self.node(r) for r in xrange(count) if ishead[r]]
637 return [self.node(r) for r in xrange(count) if ishead[r]]
638
638
639 if start is None:
639 if start is None:
640 start = nullid
640 start = nullid
641 if stop is None:
641 if stop is None:
642 stop = []
642 stop = []
643 stoprevs = set([self.rev(n) for n in stop])
643 stoprevs = set([self.rev(n) for n in stop])
644 startrev = self.rev(start)
644 startrev = self.rev(start)
645 reachable = set((startrev,))
645 reachable = set((startrev,))
646 heads = set((startrev,))
646 heads = set((startrev,))
647
647
648 parentrevs = self.parentrevs
648 parentrevs = self.parentrevs
649 for r in xrange(startrev + 1, len(self)):
649 for r in xrange(startrev + 1, len(self)):
650 for p in parentrevs(r):
650 for p in parentrevs(r):
651 if p in reachable:
651 if p in reachable:
652 if r not in stoprevs:
652 if r not in stoprevs:
653 reachable.add(r)
653 reachable.add(r)
654 heads.add(r)
654 heads.add(r)
655 if p in heads and p not in stoprevs:
655 if p in heads and p not in stoprevs:
656 heads.remove(p)
656 heads.remove(p)
657
657
658 return [self.node(r) for r in heads]
658 return [self.node(r) for r in heads]
659
659
660 def children(self, node):
660 def children(self, node):
661 """find the children of a given node"""
661 """find the children of a given node"""
662 c = []
662 c = []
663 p = self.rev(node)
663 p = self.rev(node)
664 for r in range(p + 1, len(self)):
664 for r in range(p + 1, len(self)):
665 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
665 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
666 if prevs:
666 if prevs:
667 for pr in prevs:
667 for pr in prevs:
668 if pr == p:
668 if pr == p:
669 c.append(self.node(r))
669 c.append(self.node(r))
670 elif p == nullrev:
670 elif p == nullrev:
671 c.append(self.node(r))
671 c.append(self.node(r))
672 return c
672 return c
673
673
674 def descendant(self, start, end):
674 def descendant(self, start, end):
675 if start == nullrev:
675 if start == nullrev:
676 return True
676 return True
677 for i in self.descendants(start):
677 for i in self.descendants(start):
678 if i == end:
678 if i == end:
679 return True
679 return True
680 elif i > end:
680 elif i > end:
681 break
681 break
682 return False
682 return False
683
683
684 def ancestor(self, a, b):
684 def ancestor(self, a, b):
685 """calculate the least common ancestor of nodes a and b"""
685 """calculate the least common ancestor of nodes a and b"""
686
686
687 # fast path, check if it is a descendant
687 # fast path, check if it is a descendant
688 a, b = self.rev(a), self.rev(b)
688 a, b = self.rev(a), self.rev(b)
689 start, end = sorted((a, b))
689 start, end = sorted((a, b))
690 if self.descendant(start, end):
690 if self.descendant(start, end):
691 return self.node(start)
691 return self.node(start)
692
692
693 def parents(rev):
693 def parents(rev):
694 return [p for p in self.parentrevs(rev) if p != nullrev]
694 return [p for p in self.parentrevs(rev) if p != nullrev]
695
695
696 c = ancestor.ancestor(a, b, parents)
696 c = ancestor.ancestor(a, b, parents)
697 if c is None:
697 if c is None:
698 return nullid
698 return nullid
699
699
700 return self.node(c)
700 return self.node(c)
701
701
702 def _match(self, id):
702 def _match(self, id):
703 if isinstance(id, (long, int)):
703 if isinstance(id, (long, int)):
704 # rev
704 # rev
705 return self.node(id)
705 return self.node(id)
706 if len(id) == 20:
706 if len(id) == 20:
707 # possibly a binary node
707 # possibly a binary node
708 # odds of a binary node being all hex in ASCII are 1 in 10**25
708 # odds of a binary node being all hex in ASCII are 1 in 10**25
709 try:
709 try:
710 node = id
710 node = id
711 self.rev(node) # quick search the index
711 self.rev(node) # quick search the index
712 return node
712 return node
713 except LookupError:
713 except LookupError:
714 pass # may be partial hex id
714 pass # may be partial hex id
715 try:
715 try:
716 # str(rev)
716 # str(rev)
717 rev = int(id)
717 rev = int(id)
718 if str(rev) != id:
718 if str(rev) != id:
719 raise ValueError
719 raise ValueError
720 if rev < 0:
720 if rev < 0:
721 rev = len(self) + rev
721 rev = len(self) + rev
722 if rev < 0 or rev >= len(self):
722 if rev < 0 or rev >= len(self):
723 raise ValueError
723 raise ValueError
724 return self.node(rev)
724 return self.node(rev)
725 except (ValueError, OverflowError):
725 except (ValueError, OverflowError):
726 pass
726 pass
727 if len(id) == 40:
727 if len(id) == 40:
728 try:
728 try:
729 # a full hex nodeid?
729 # a full hex nodeid?
730 node = bin(id)
730 node = bin(id)
731 self.rev(node)
731 self.rev(node)
732 return node
732 return node
733 except (TypeError, LookupError):
733 except (TypeError, LookupError):
734 pass
734 pass
735
735
736 def _partialmatch(self, id):
736 def _partialmatch(self, id):
737 if id in self._pcache:
737 if id in self._pcache:
738 return self._pcache[id]
738 return self._pcache[id]
739
739
740 if len(id) < 40:
740 if len(id) < 40:
741 try:
741 try:
742 # hex(node)[:...]
742 # hex(node)[:...]
743 l = len(id) // 2 # grab an even number of digits
743 l = len(id) // 2 # grab an even number of digits
744 prefix = bin(id[:l * 2])
744 prefix = bin(id[:l * 2])
745 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
745 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
746 nl = [n for n in nl if hex(n).startswith(id)]
746 nl = [n for n in nl if hex(n).startswith(id)]
747 if len(nl) > 0:
747 if len(nl) > 0:
748 if len(nl) == 1:
748 if len(nl) == 1:
749 self._pcache[id] = nl[0]
749 self._pcache[id] = nl[0]
750 return nl[0]
750 return nl[0]
751 raise LookupError(id, self.indexfile,
751 raise LookupError(id, self.indexfile,
752 _('ambiguous identifier'))
752 _('ambiguous identifier'))
753 return None
753 return None
754 except TypeError:
754 except TypeError:
755 pass
755 pass
756
756
757 def lookup(self, id):
757 def lookup(self, id):
758 """locate a node based on:
758 """locate a node based on:
759 - revision number or str(revision number)
759 - revision number or str(revision number)
760 - nodeid or subset of hex nodeid
760 - nodeid or subset of hex nodeid
761 """
761 """
762 n = self._match(id)
762 n = self._match(id)
763 if n is not None:
763 if n is not None:
764 return n
764 return n
765 n = self._partialmatch(id)
765 n = self._partialmatch(id)
766 if n:
766 if n:
767 return n
767 return n
768
768
769 raise LookupError(id, self.indexfile, _('no match found'))
769 raise LookupError(id, self.indexfile, _('no match found'))
770
770
771 def cmp(self, node, text):
771 def cmp(self, node, text):
772 """compare text with a given file revision
772 """compare text with a given file revision
773
773
774 returns True if text is different than what is stored.
774 returns True if text is different than what is stored.
775 """
775 """
776 p1, p2 = self.parents(node)
776 p1, p2 = self.parents(node)
777 return hash(text, p1, p2) != node
777 return hash(text, p1, p2) != node
778
778
779 def _addchunk(self, offset, data):
779 def _addchunk(self, offset, data):
780 o, d = self._chunkcache
780 o, d = self._chunkcache
781 # try to add to existing cache
781 # try to add to existing cache
782 if o + len(d) == offset and len(d) + len(data) < _chunksize:
782 if o + len(d) == offset and len(d) + len(data) < _chunksize:
783 self._chunkcache = o, d + data
783 self._chunkcache = o, d + data
784 else:
784 else:
785 self._chunkcache = offset, data
785 self._chunkcache = offset, data
786
786
787 def _loadchunk(self, offset, length):
787 def _loadchunk(self, offset, length):
788 if self._inline:
788 if self._inline:
789 df = self.opener(self.indexfile)
789 df = self.opener(self.indexfile)
790 else:
790 else:
791 df = self.opener(self.datafile)
791 df = self.opener(self.datafile)
792
792
793 readahead = max(65536, length)
793 readahead = max(65536, length)
794 df.seek(offset)
794 df.seek(offset)
795 d = df.read(readahead)
795 d = df.read(readahead)
796 self._addchunk(offset, d)
796 self._addchunk(offset, d)
797 if readahead > length:
797 if readahead > length:
798 return d[:length]
798 return d[:length]
799 return d
799 return d
800
800
801 def _getchunk(self, offset, length):
801 def _getchunk(self, offset, length):
802 o, d = self._chunkcache
802 o, d = self._chunkcache
803 l = len(d)
803 l = len(d)
804
804
805 # is it in the cache?
805 # is it in the cache?
806 cachestart = offset - o
806 cachestart = offset - o
807 cacheend = cachestart + length
807 cacheend = cachestart + length
808 if cachestart >= 0 and cacheend <= l:
808 if cachestart >= 0 and cacheend <= l:
809 if cachestart == 0 and cacheend == l:
809 if cachestart == 0 and cacheend == l:
810 return d # avoid a copy
810 return d # avoid a copy
811 return d[cachestart:cacheend]
811 return d[cachestart:cacheend]
812
812
813 return self._loadchunk(offset, length)
813 return self._loadchunk(offset, length)
814
814
815 def _chunkraw(self, startrev, endrev):
815 def _chunkraw(self, startrev, endrev):
816 start = self.start(startrev)
816 start = self.start(startrev)
817 length = self.end(endrev) - start
817 length = self.end(endrev) - start
818 if self._inline:
818 if self._inline:
819 start += (startrev + 1) * self._io.size
819 start += (startrev + 1) * self._io.size
820 return self._getchunk(start, length)
820 return self._getchunk(start, length)
821
821
822 def _chunk(self, rev):
822 def _chunk(self, rev):
823 return decompress(self._chunkraw(rev, rev))
823 return decompress(self._chunkraw(rev, rev))
824
824
825 def _chunkbase(self, rev):
825 def _chunkbase(self, rev):
826 return self._chunk(rev)
826 return self._chunk(rev)
827
827
828 def _chunkclear(self):
828 def _chunkclear(self):
829 self._chunkcache = (0, '')
829 self._chunkcache = (0, '')
830
830
831 def deltaparent(self, rev):
831 def deltaparent(self, rev):
832 """return previous revision or parentrev according to flags"""
832 """return previous revision or parentrev according to flags"""
833 if self.flags(rev) & REVIDX_PARENTDELTA:
833 if self.flags(rev) & REVIDX_PARENTDELTA:
834 return self.parentrevs(rev)[0]
834 return self.parentrevs(rev)[0]
835 else:
835 else:
836 return rev - 1
836 return rev - 1
837
837
838 def revdiff(self, rev1, rev2):
838 def revdiff(self, rev1, rev2):
839 """return or calculate a delta between two revisions"""
839 """return or calculate a delta between two revisions"""
840 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
840 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
841 return self._chunk(rev2)
841 return self._chunk(rev2)
842
842
843 return mdiff.textdiff(self.revision(self.node(rev1)),
843 return mdiff.textdiff(self.revision(self.node(rev1)),
844 self.revision(self.node(rev2)))
844 self.revision(self.node(rev2)))
845
845
846 def revision(self, node):
846 def revision(self, node):
847 """return an uncompressed revision of a given node"""
847 """return an uncompressed revision of a given node"""
848 cachedrev = None
848 cachedrev = None
849 if node == nullid:
849 if node == nullid:
850 return ""
850 return ""
851 if self._cache:
851 if self._cache:
852 if self._cache[0] == node:
852 if self._cache[0] == node:
853 return self._cache[2]
853 return self._cache[2]
854 cachedrev = self._cache[1]
854 cachedrev = self._cache[1]
855
855
856 # look up what we need to read
856 # look up what we need to read
857 text = None
857 text = None
858 rev = self.rev(node)
858 rev = self.rev(node)
859 base = self.base(rev)
859 base = self.base(rev)
860
860
861 # check rev flags
861 # check rev flags
862 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
862 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
863 raise RevlogError(_('incompatible revision flag %x') %
863 raise RevlogError(_('incompatible revision flag %x') %
864 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
864 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
865
865
866 # build delta chain
866 # build delta chain
867 chain = []
867 chain = []
868 index = self.index # for performance
868 index = self.index # for performance
869 iterrev = rev
869 iterrev = rev
870 e = index[iterrev]
870 e = index[iterrev]
871 while iterrev != base and iterrev != cachedrev:
871 while iterrev != base and iterrev != cachedrev:
872 chain.append(iterrev)
872 chain.append(iterrev)
873 if e[0] & REVIDX_PARENTDELTA:
873 if e[0] & REVIDX_PARENTDELTA:
874 iterrev = e[5]
874 iterrev = e[5]
875 else:
875 else:
876 iterrev -= 1
876 iterrev -= 1
877 e = index[iterrev]
877 e = index[iterrev]
878 chain.reverse()
878 chain.reverse()
879 base = iterrev
879 base = iterrev
880
880
881 if iterrev == cachedrev:
881 if iterrev == cachedrev:
882 # cache hit
882 # cache hit
883 text = self._cache[2]
883 text = self._cache[2]
884
884
885 # drop cache to save memory
885 # drop cache to save memory
886 self._cache = None
886 self._cache = None
887
887
888 self._chunkraw(base, rev)
888 self._chunkraw(base, rev)
889 if text is None:
889 if text is None:
890 text = self._chunkbase(base)
890 text = self._chunkbase(base)
891
891
892 bins = [self._chunk(r) for r in chain]
892 bins = [self._chunk(r) for r in chain]
893 text = mdiff.patches(text, bins)
893 text = mdiff.patches(text, bins)
894
894
895 text = self._checkhash(text, node, rev)
895 text = self._checkhash(text, node, rev)
896
896
897 self._cache = (node, rev, text)
897 self._cache = (node, rev, text)
898 return text
898 return text
899
899
900 def _checkhash(self, text, node, rev):
900 def _checkhash(self, text, node, rev):
901 p1, p2 = self.parents(node)
901 p1, p2 = self.parents(node)
902 if (node != hash(text, p1, p2) and
902 if (node != hash(text, p1, p2) and
903 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
903 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
904 raise RevlogError(_("integrity check failed on %s:%d")
904 raise RevlogError(_("integrity check failed on %s:%d")
905 % (self.indexfile, rev))
905 % (self.indexfile, rev))
906 return text
906 return text
907
907
908 def checkinlinesize(self, tr, fp=None):
908 def checkinlinesize(self, tr, fp=None):
909 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
909 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
910 return
910 return
911
911
912 trinfo = tr.find(self.indexfile)
912 trinfo = tr.find(self.indexfile)
913 if trinfo is None:
913 if trinfo is None:
914 raise RevlogError(_("%s not found in the transaction")
914 raise RevlogError(_("%s not found in the transaction")
915 % self.indexfile)
915 % self.indexfile)
916
916
917 trindex = trinfo[2]
917 trindex = trinfo[2]
918 dataoff = self.start(trindex)
918 dataoff = self.start(trindex)
919
919
920 tr.add(self.datafile, dataoff)
920 tr.add(self.datafile, dataoff)
921
921
922 if fp:
922 if fp:
923 fp.flush()
923 fp.flush()
924 fp.close()
924 fp.close()
925
925
926 df = self.opener(self.datafile, 'w')
926 df = self.opener(self.datafile, 'w')
927 try:
927 try:
928 for r in self:
928 for r in self:
929 df.write(self._chunkraw(r, r))
929 df.write(self._chunkraw(r, r))
930 finally:
930 finally:
931 df.close()
931 df.close()
932
932
933 fp = self.opener(self.indexfile, 'w', atomictemp=True)
933 fp = self.opener(self.indexfile, 'w', atomictemp=True)
934 self.version &= ~(REVLOGNGINLINEDATA)
934 self.version &= ~(REVLOGNGINLINEDATA)
935 self._inline = False
935 self._inline = False
936 for i in self:
936 for i in self:
937 e = self._io.packentry(self.index[i], self.node, self.version, i)
937 e = self._io.packentry(self.index[i], self.node, self.version, i)
938 fp.write(e)
938 fp.write(e)
939
939
940 # if we don't call rename, the temp file will never replace the
940 # if we don't call rename, the temp file will never replace the
941 # real index
941 # real index
942 fp.rename()
942 fp.rename()
943
943
944 tr.replace(self.indexfile, trindex * self._io.size)
944 tr.replace(self.indexfile, trindex * self._io.size)
945 self._chunkclear()
945 self._chunkclear()
946
946
947 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
947 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
948 """add a revision to the log
948 """add a revision to the log
949
949
950 text - the revision data to add
950 text - the revision data to add
951 transaction - the transaction object used for rollback
951 transaction - the transaction object used for rollback
952 link - the linkrev data to add
952 link - the linkrev data to add
953 p1, p2 - the parent nodeids of the revision
953 p1, p2 - the parent nodeids of the revision
954 cachedelta - an optional precomputed delta
954 cachedelta - an optional precomputed delta
955 """
955 """
956 node = hash(text, p1, p2)
956 node = hash(text, p1, p2)
957 if (node in self.nodemap and
957 if (node in self.nodemap and
958 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
958 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
959 return node
959 return node
960
960
961 dfh = None
961 dfh = None
962 if not self._inline:
962 if not self._inline:
963 dfh = self.opener(self.datafile, "a")
963 dfh = self.opener(self.datafile, "a")
964 ifh = self.opener(self.indexfile, "a+")
964 ifh = self.opener(self.indexfile, "a+")
965 try:
965 try:
966 return self._addrevision(node, text, transaction, link, p1, p2,
966 return self._addrevision(node, text, transaction, link, p1, p2,
967 cachedelta, ifh, dfh)
967 cachedelta, ifh, dfh)
968 finally:
968 finally:
969 if dfh:
969 if dfh:
970 dfh.close()
970 dfh.close()
971 ifh.close()
971 ifh.close()
972
972
973 def _addrevision(self, node, text, transaction, link, p1, p2,
973 def _addrevision(self, node, text, transaction, link, p1, p2,
974 cachedelta, ifh, dfh):
974 cachedelta, ifh, dfh):
975
975
976 btext = [text]
976 btext = [text]
977 def buildtext():
977 def buildtext():
978 if btext[0] is not None:
978 if btext[0] is not None:
979 return btext[0]
979 return btext[0]
980 # flush any pending writes here so we can read it in revision
980 # flush any pending writes here so we can read it in revision
981 if dfh:
981 if dfh:
982 dfh.flush()
982 dfh.flush()
983 ifh.flush()
983 ifh.flush()
984 basetext = self.revision(self.node(cachedelta[0]))
984 basetext = self.revision(self.node(cachedelta[0]))
985 btext[0] = mdiff.patch(basetext, cachedelta[1])
985 btext[0] = mdiff.patch(basetext, cachedelta[1])
986 chk = hash(btext[0], p1, p2)
986 chk = hash(btext[0], p1, p2)
987 if chk != node:
987 if chk != node:
988 raise RevlogError(_("consistency error in delta"))
988 raise RevlogError(_("consistency error in delta"))
989 return btext[0]
989 return btext[0]
990
990
991 def builddelta(rev):
991 def builddelta(rev):
992 # can we use the cached delta?
992 # can we use the cached delta?
993 if cachedelta and cachedelta[0] == rev:
993 if cachedelta and cachedelta[0] == rev:
994 delta = cachedelta[1]
994 delta = cachedelta[1]
995 else:
995 else:
996 t = buildtext()
996 t = buildtext()
997 ptext = self.revision(self.node(rev))
997 ptext = self.revision(self.node(rev))
998 delta = mdiff.textdiff(ptext, t)
998 delta = mdiff.textdiff(ptext, t)
999 data = compress(delta)
999 data = compress(delta)
1000 l = len(data[1]) + len(data[0])
1000 l = len(data[1]) + len(data[0])
1001 base = self.base(rev)
1001 base = self.base(rev)
1002 dist = l + offset - self.start(base)
1002 dist = l + offset - self.start(base)
1003 return dist, l, data, base
1003 return dist, l, data, base
1004
1004
1005 curr = len(self)
1005 curr = len(self)
1006 prev = curr - 1
1006 prev = curr - 1
1007 base = curr
1007 base = curr
1008 offset = self.end(prev)
1008 offset = self.end(prev)
1009 flags = 0
1009 flags = 0
1010 d = None
1010 d = None
1011 p1r, p2r = self.rev(p1), self.rev(p2)
1011 p1r, p2r = self.rev(p1), self.rev(p2)
1012
1012
1013 # should we try to build a delta?
1013 # should we try to build a delta?
1014 if prev != nullrev:
1014 if prev != nullrev:
1015 d = builddelta(prev)
1015 d = builddelta(prev)
1016 if self._parentdelta and prev != p1r:
1016 if self._parentdelta and prev != p1r:
1017 d2 = builddelta(p1r)
1017 d2 = builddelta(p1r)
1018 if d2 < d:
1018 if d2 < d:
1019 d = d2
1019 d = d2
1020 flags = REVIDX_PARENTDELTA
1020 flags = REVIDX_PARENTDELTA
1021 dist, l, data, base = d
1021 dist, l, data, base = d
1022
1022
1023 # full versions are inserted when the needed deltas
1023 # full versions are inserted when the needed deltas
1024 # become comparable to the uncompressed text
1024 # become comparable to the uncompressed text
1025 # or the base revision is punched
1025 # or the base revision is punched
1026 if text is None:
1026 if text is None:
1027 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1027 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1028 cachedelta[1])
1028 cachedelta[1])
1029 else:
1029 else:
1030 textlen = len(text)
1030 textlen = len(text)
1031 if (d is None or dist > textlen * 2 or
1031 if (d is None or dist > textlen * 2 or
1032 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1032 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1033 text = buildtext()
1033 text = buildtext()
1034 data = compress(text)
1034 data = compress(text)
1035 l = len(data[1]) + len(data[0])
1035 l = len(data[1]) + len(data[0])
1036 base = curr
1036 base = curr
1037
1037
1038 e = (offset_type(offset, flags), l, textlen,
1038 e = (offset_type(offset, flags), l, textlen,
1039 base, link, p1r, p2r, node)
1039 base, link, p1r, p2r, node)
1040 self.index.insert(-1, e)
1040 self.index.insert(-1, e)
1041 self.nodemap[node] = curr
1041 self.nodemap[node] = curr
1042
1042
1043 entry = self._io.packentry(e, self.node, self.version, curr)
1043 entry = self._io.packentry(e, self.node, self.version, curr)
1044 if not self._inline:
1044 if not self._inline:
1045 transaction.add(self.datafile, offset)
1045 transaction.add(self.datafile, offset)
1046 transaction.add(self.indexfile, curr * len(entry))
1046 transaction.add(self.indexfile, curr * len(entry))
1047 if data[0]:
1047 if data[0]:
1048 dfh.write(data[0])
1048 dfh.write(data[0])
1049 dfh.write(data[1])
1049 dfh.write(data[1])
1050 dfh.flush()
1050 dfh.flush()
1051 ifh.write(entry)
1051 ifh.write(entry)
1052 else:
1052 else:
1053 offset += curr * self._io.size
1053 offset += curr * self._io.size
1054 transaction.add(self.indexfile, offset, curr)
1054 transaction.add(self.indexfile, offset, curr)
1055 ifh.write(entry)
1055 ifh.write(entry)
1056 ifh.write(data[0])
1056 ifh.write(data[0])
1057 ifh.write(data[1])
1057 ifh.write(data[1])
1058 self.checkinlinesize(transaction, ifh)
1058 self.checkinlinesize(transaction, ifh)
1059
1059
1060 if type(text) == str: # only accept immutable objects
1060 if type(text) == str: # only accept immutable objects
1061 self._cache = (node, curr, text)
1061 self._cache = (node, curr, text)
1062 return node
1062 return node
1063
1063
1064 def group(self, nodelist, bundler):
1064 def group(self, nodelist, bundler):
1065 """Calculate a delta group, yielding a sequence of changegroup chunks
1065 """Calculate a delta group, yielding a sequence of changegroup chunks
1066 (strings).
1066 (strings).
1067
1067
1068 Given a list of changeset revs, return a set of deltas and
1068 Given a list of changeset revs, return a set of deltas and
1069 metadata corresponding to nodes. The first delta is
1069 metadata corresponding to nodes. The first delta is
1070 first parent(nodelist[0]) -> nodelist[0], the receiver is
1070 first parent(nodelist[0]) -> nodelist[0], the receiver is
1071 guaranteed to have this parent as it has all history before
1071 guaranteed to have this parent as it has all history before
1072 these changesets. In the case firstparent is nullrev the
1072 these changesets. In the case firstparent is nullrev the
1073 changegroup starts with a full revision.
1073 changegroup starts with a full revision.
1074 """
1074 """
1075
1075
1076 revs = sorted([self.rev(n) for n in nodelist])
1076 revs = sorted([self.rev(n) for n in nodelist])
1077
1077
1078 # if we don't have any revisions touched by these changesets, bail
1078 # if we don't have any revisions touched by these changesets, bail
1079 if not revs:
1079 if not revs:
1080 yield bundler.close()
1080 yield bundler.close()
1081 return
1081 return
1082
1082
1083 # add the parent of the first rev
1083 # add the parent of the first rev
1084 p = self.parentrevs(revs[0])[0]
1084 p = self.parentrevs(revs[0])[0]
1085 revs.insert(0, p)
1085 revs.insert(0, p)
1086
1086
1087 # build deltas
1087 # build deltas
1088 for r in xrange(len(revs) - 1):
1088 for r in xrange(len(revs) - 1):
1089 a, b = revs[r], revs[r + 1]
1089 prev, curr = revs[r], revs[r + 1]
1090 nb = self.node(b)
1090 for c in bundler.revchunk(self, curr, prev):
1091 p1, p2 = self.parents(nb)
1092 prefix = ''
1093
1094 if a == nullrev:
1095 d = self.revision(nb)
1096 prefix = mdiff.trivialdiffheader(len(d))
1097 else:
1098 d = self.revdiff(a, b)
1099 for c in bundler.revchunk(self, nb, p1, p2, prefix, d):
1100 yield c
1091 yield c
1101
1092
1102 yield bundler.close()
1093 yield bundler.close()
1103
1094
1104 def addgroup(self, bundle, linkmapper, transaction):
1095 def addgroup(self, bundle, linkmapper, transaction):
1105 """
1096 """
1106 add a delta group
1097 add a delta group
1107
1098
1108 given a set of deltas, add them to the revision log. the
1099 given a set of deltas, add them to the revision log. the
1109 first delta is against its parent, which should be in our
1100 first delta is against its parent, which should be in our
1110 log, the rest are against the previous delta.
1101 log, the rest are against the previous delta.
1111 """
1102 """
1112
1103
1113 # track the base of the current delta log
1104 # track the base of the current delta log
1114 node = None
1105 node = None
1115
1106
1116 r = len(self)
1107 r = len(self)
1117 end = 0
1108 end = 0
1118 if r:
1109 if r:
1119 end = self.end(r - 1)
1110 end = self.end(r - 1)
1120 ifh = self.opener(self.indexfile, "a+")
1111 ifh = self.opener(self.indexfile, "a+")
1121 isize = r * self._io.size
1112 isize = r * self._io.size
1122 if self._inline:
1113 if self._inline:
1123 transaction.add(self.indexfile, end + isize, r)
1114 transaction.add(self.indexfile, end + isize, r)
1124 dfh = None
1115 dfh = None
1125 else:
1116 else:
1126 transaction.add(self.indexfile, isize, r)
1117 transaction.add(self.indexfile, isize, r)
1127 transaction.add(self.datafile, end)
1118 transaction.add(self.datafile, end)
1128 dfh = self.opener(self.datafile, "a")
1119 dfh = self.opener(self.datafile, "a")
1129
1120
1130 try:
1121 try:
1131 # loop through our set of deltas
1122 # loop through our set of deltas
1132 chain = None
1123 chain = None
1133 while 1:
1124 while 1:
1134 chunkdata = bundle.parsechunk(chain)
1125 chunkdata = bundle.parsechunk(chain)
1135 if not chunkdata:
1126 if not chunkdata:
1136 break
1127 break
1137 node = chunkdata['node']
1128 node = chunkdata['node']
1138 p1 = chunkdata['p1']
1129 p1 = chunkdata['p1']
1139 p2 = chunkdata['p2']
1130 p2 = chunkdata['p2']
1140 cs = chunkdata['cs']
1131 cs = chunkdata['cs']
1141 deltabase = chunkdata['deltabase']
1132 deltabase = chunkdata['deltabase']
1142 delta = chunkdata['delta']
1133 delta = chunkdata['delta']
1143
1134
1144 link = linkmapper(cs)
1135 link = linkmapper(cs)
1145 if (node in self.nodemap and
1136 if (node in self.nodemap and
1146 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1137 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1147 # this can happen if two branches make the same change
1138 # this can happen if two branches make the same change
1148 chain = node
1139 chain = node
1149 continue
1140 continue
1150
1141
1151 for p in (p1, p2):
1142 for p in (p1, p2):
1152 if not p in self.nodemap:
1143 if not p in self.nodemap:
1153 if self._shallow:
1144 if self._shallow:
1154 # add null entries for missing parents
1145 # add null entries for missing parents
1155 # XXX FIXME
1146 # XXX FIXME
1156 #if base == nullrev:
1147 #if base == nullrev:
1157 # base = len(self)
1148 # base = len(self)
1158 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1149 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1159 # 0, 0, base, nullrev, nullrev, nullrev, p)
1150 # 0, 0, base, nullrev, nullrev, nullrev, p)
1160 #self.index.insert(-1, e)
1151 #self.index.insert(-1, e)
1161 #self.nodemap[p] = r
1152 #self.nodemap[p] = r
1162 #entry = self._io.packentry(e, self.node,
1153 #entry = self._io.packentry(e, self.node,
1163 # self.version, r)
1154 # self.version, r)
1164 #ifh.write(entry)
1155 #ifh.write(entry)
1165 #t, r = r, r + 1
1156 #t, r = r, r + 1
1166 raise LookupError(p, self.indexfile,
1157 raise LookupError(p, self.indexfile,
1167 _('unknown parent'))
1158 _('unknown parent'))
1168 else:
1159 else:
1169 raise LookupError(p, self.indexfile,
1160 raise LookupError(p, self.indexfile,
1170 _('unknown parent'))
1161 _('unknown parent'))
1171
1162
1172 if deltabase not in self.nodemap:
1163 if deltabase not in self.nodemap:
1173 raise LookupError(deltabase, self.indexfile,
1164 raise LookupError(deltabase, self.indexfile,
1174 _('unknown delta base'))
1165 _('unknown delta base'))
1175
1166
1176 baserev = self.rev(deltabase)
1167 baserev = self.rev(deltabase)
1177 chain = self._addrevision(node, None, transaction, link,
1168 chain = self._addrevision(node, None, transaction, link,
1178 p1, p2, (baserev, delta), ifh, dfh)
1169 p1, p2, (baserev, delta), ifh, dfh)
1179 if not dfh and not self._inline:
1170 if not dfh and not self._inline:
1180 # addrevision switched from inline to conventional
1171 # addrevision switched from inline to conventional
1181 # reopen the index
1172 # reopen the index
1182 ifh.close()
1173 ifh.close()
1183 dfh = self.opener(self.datafile, "a")
1174 dfh = self.opener(self.datafile, "a")
1184 ifh = self.opener(self.indexfile, "a")
1175 ifh = self.opener(self.indexfile, "a")
1185 finally:
1176 finally:
1186 if dfh:
1177 if dfh:
1187 dfh.close()
1178 dfh.close()
1188 ifh.close()
1179 ifh.close()
1189
1180
1190 return node
1181 return node
1191
1182
1192 def strip(self, minlink, transaction):
1183 def strip(self, minlink, transaction):
1193 """truncate the revlog on the first revision with a linkrev >= minlink
1184 """truncate the revlog on the first revision with a linkrev >= minlink
1194
1185
1195 This function is called when we're stripping revision minlink and
1186 This function is called when we're stripping revision minlink and
1196 its descendants from the repository.
1187 its descendants from the repository.
1197
1188
1198 We have to remove all revisions with linkrev >= minlink, because
1189 We have to remove all revisions with linkrev >= minlink, because
1199 the equivalent changelog revisions will be renumbered after the
1190 the equivalent changelog revisions will be renumbered after the
1200 strip.
1191 strip.
1201
1192
1202 So we truncate the revlog on the first of these revisions, and
1193 So we truncate the revlog on the first of these revisions, and
1203 trust that the caller has saved the revisions that shouldn't be
1194 trust that the caller has saved the revisions that shouldn't be
1204 removed and that it'll readd them after this truncation.
1195 removed and that it'll readd them after this truncation.
1205 """
1196 """
1206 if len(self) == 0:
1197 if len(self) == 0:
1207 return
1198 return
1208
1199
1209 for rev in self:
1200 for rev in self:
1210 if self.index[rev][4] >= minlink:
1201 if self.index[rev][4] >= minlink:
1211 break
1202 break
1212 else:
1203 else:
1213 return
1204 return
1214
1205
1215 # first truncate the files on disk
1206 # first truncate the files on disk
1216 end = self.start(rev)
1207 end = self.start(rev)
1217 if not self._inline:
1208 if not self._inline:
1218 transaction.add(self.datafile, end)
1209 transaction.add(self.datafile, end)
1219 end = rev * self._io.size
1210 end = rev * self._io.size
1220 else:
1211 else:
1221 end += rev * self._io.size
1212 end += rev * self._io.size
1222
1213
1223 transaction.add(self.indexfile, end)
1214 transaction.add(self.indexfile, end)
1224
1215
1225 # then reset internal state in memory to forget those revisions
1216 # then reset internal state in memory to forget those revisions
1226 self._cache = None
1217 self._cache = None
1227 self._chunkclear()
1218 self._chunkclear()
1228 for x in xrange(rev, len(self)):
1219 for x in xrange(rev, len(self)):
1229 del self.nodemap[self.node(x)]
1220 del self.nodemap[self.node(x)]
1230
1221
1231 del self.index[rev:-1]
1222 del self.index[rev:-1]
1232
1223
1233 def checksize(self):
1224 def checksize(self):
1234 expected = 0
1225 expected = 0
1235 if len(self):
1226 if len(self):
1236 expected = max(0, self.end(len(self) - 1))
1227 expected = max(0, self.end(len(self) - 1))
1237
1228
1238 try:
1229 try:
1239 f = self.opener(self.datafile)
1230 f = self.opener(self.datafile)
1240 f.seek(0, 2)
1231 f.seek(0, 2)
1241 actual = f.tell()
1232 actual = f.tell()
1242 f.close()
1233 f.close()
1243 dd = actual - expected
1234 dd = actual - expected
1244 except IOError, inst:
1235 except IOError, inst:
1245 if inst.errno != errno.ENOENT:
1236 if inst.errno != errno.ENOENT:
1246 raise
1237 raise
1247 dd = 0
1238 dd = 0
1248
1239
1249 try:
1240 try:
1250 f = self.opener(self.indexfile)
1241 f = self.opener(self.indexfile)
1251 f.seek(0, 2)
1242 f.seek(0, 2)
1252 actual = f.tell()
1243 actual = f.tell()
1253 f.close()
1244 f.close()
1254 s = self._io.size
1245 s = self._io.size
1255 i = max(0, actual // s)
1246 i = max(0, actual // s)
1256 di = actual - (i * s)
1247 di = actual - (i * s)
1257 if self._inline:
1248 if self._inline:
1258 databytes = 0
1249 databytes = 0
1259 for r in self:
1250 for r in self:
1260 databytes += max(0, self.length(r))
1251 databytes += max(0, self.length(r))
1261 dd = 0
1252 dd = 0
1262 di = actual - len(self) * s - databytes
1253 di = actual - len(self) * s - databytes
1263 except IOError, inst:
1254 except IOError, inst:
1264 if inst.errno != errno.ENOENT:
1255 if inst.errno != errno.ENOENT:
1265 raise
1256 raise
1266 di = 0
1257 di = 0
1267
1258
1268 return (dd, di)
1259 return (dd, di)
1269
1260
1270 def files(self):
1261 def files(self):
1271 res = [self.indexfile]
1262 res = [self.indexfile]
1272 if not self._inline:
1263 if not self._inline:
1273 res.append(self.datafile)
1264 res.append(self.datafile)
1274 return res
1265 return res
General Comments 0
You need to be logged in to leave comments. Login now