##// END OF EJS Templates
revlog: add clone method...
Gregory Szorc -
r30778:1c7368d1 default
parent child Browse files
Show More
@@ -1,1950 +1,2064 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 parsers,
35 parsers,
36 templatefilters,
36 templatefilters,
37 util,
37 util,
38 )
38 )
39
39
40 _pack = struct.pack
40 _pack = struct.pack
41 _unpack = struct.unpack
41 _unpack = struct.unpack
42 _compress = zlib.compress
42 _compress = zlib.compress
43 _decompress = zlib.decompress
43 _decompress = zlib.decompress
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_EXTSTORED = (1 << 14) # revision data is stored externally
57 REVIDX_EXTSTORED = (1 << 14) # revision data is stored externally
58 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_DEFAULT_FLAGS = 0
59 # stable order in which flags need to be processed and their processors applied
59 # stable order in which flags need to be processed and their processors applied
60 REVIDX_FLAGS_ORDER = [
60 REVIDX_FLAGS_ORDER = [
61 REVIDX_ISCENSORED,
61 REVIDX_ISCENSORED,
62 REVIDX_EXTSTORED,
62 REVIDX_EXTSTORED,
63 ]
63 ]
64 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
64 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
65
65
66 # max size of revlog with inline data
66 # max size of revlog with inline data
67 _maxinline = 131072
67 _maxinline = 131072
68 _chunksize = 1048576
68 _chunksize = 1048576
69
69
70 RevlogError = error.RevlogError
70 RevlogError = error.RevlogError
71 LookupError = error.LookupError
71 LookupError = error.LookupError
72 CensoredNodeError = error.CensoredNodeError
72 CensoredNodeError = error.CensoredNodeError
73 ProgrammingError = error.ProgrammingError
73 ProgrammingError = error.ProgrammingError
74
74
75 # Store flag processors (cf. 'addflagprocessor()' to register)
75 # Store flag processors (cf. 'addflagprocessor()' to register)
76 _flagprocessors = {
76 _flagprocessors = {
77 REVIDX_ISCENSORED: None,
77 REVIDX_ISCENSORED: None,
78 }
78 }
79
79
80 def addflagprocessor(flag, processor):
80 def addflagprocessor(flag, processor):
81 """Register a flag processor on a revision data flag.
81 """Register a flag processor on a revision data flag.
82
82
83 Invariant:
83 Invariant:
84 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
84 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
85 - Only one flag processor can be registered on a specific flag.
85 - Only one flag processor can be registered on a specific flag.
86 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
86 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
87 following signatures:
87 following signatures:
88 - (read) f(self, text) -> newtext, bool
88 - (read) f(self, text) -> newtext, bool
89 - (write) f(self, text) -> newtext, bool
89 - (write) f(self, text) -> newtext, bool
90 - (raw) f(self, text) -> bool
90 - (raw) f(self, text) -> bool
91 The boolean returned by these transforms is used to determine whether
91 The boolean returned by these transforms is used to determine whether
92 'newtext' can be used for hash integrity checking.
92 'newtext' can be used for hash integrity checking.
93
93
94 Note: The 'raw' transform is used for changegroup generation and in some
94 Note: The 'raw' transform is used for changegroup generation and in some
95 debug commands. In this case the transform only indicates whether the
95 debug commands. In this case the transform only indicates whether the
96 contents can be used for hash integrity checks.
96 contents can be used for hash integrity checks.
97 """
97 """
98 if not flag & REVIDX_KNOWN_FLAGS:
98 if not flag & REVIDX_KNOWN_FLAGS:
99 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
99 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
100 raise ProgrammingError(msg)
100 raise ProgrammingError(msg)
101 if flag not in REVIDX_FLAGS_ORDER:
101 if flag not in REVIDX_FLAGS_ORDER:
102 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
102 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
103 raise ProgrammingError(msg)
103 raise ProgrammingError(msg)
104 if flag in _flagprocessors:
104 if flag in _flagprocessors:
105 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
105 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
106 raise error.Abort(msg)
106 raise error.Abort(msg)
107 _flagprocessors[flag] = processor
107 _flagprocessors[flag] = processor
108
108
109 def getoffset(q):
109 def getoffset(q):
110 return int(q >> 16)
110 return int(q >> 16)
111
111
112 def gettype(q):
112 def gettype(q):
113 return int(q & 0xFFFF)
113 return int(q & 0xFFFF)
114
114
115 def offset_type(offset, type):
115 def offset_type(offset, type):
116 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
116 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
117 raise ValueError('unknown revlog index flags')
117 raise ValueError('unknown revlog index flags')
118 return long(long(offset) << 16 | type)
118 return long(long(offset) << 16 | type)
119
119
120 _nullhash = hashlib.sha1(nullid)
120 _nullhash = hashlib.sha1(nullid)
121
121
122 def hash(text, p1, p2):
122 def hash(text, p1, p2):
123 """generate a hash from the given text and its parent hashes
123 """generate a hash from the given text and its parent hashes
124
124
125 This hash combines both the current file contents and its history
125 This hash combines both the current file contents and its history
126 in a manner that makes it easy to distinguish nodes with the same
126 in a manner that makes it easy to distinguish nodes with the same
127 content in the revision graph.
127 content in the revision graph.
128 """
128 """
129 # As of now, if one of the parent node is null, p2 is null
129 # As of now, if one of the parent node is null, p2 is null
130 if p2 == nullid:
130 if p2 == nullid:
131 # deep copy of a hash is faster than creating one
131 # deep copy of a hash is faster than creating one
132 s = _nullhash.copy()
132 s = _nullhash.copy()
133 s.update(p1)
133 s.update(p1)
134 else:
134 else:
135 # none of the parent nodes are nullid
135 # none of the parent nodes are nullid
136 l = [p1, p2]
136 l = [p1, p2]
137 l.sort()
137 l.sort()
138 s = hashlib.sha1(l[0])
138 s = hashlib.sha1(l[0])
139 s.update(l[1])
139 s.update(l[1])
140 s.update(text)
140 s.update(text)
141 return s.digest()
141 return s.digest()
142
142
143 def decompress(bin):
143 def decompress(bin):
144 """ decompress the given input """
144 """ decompress the given input """
145 if not bin:
145 if not bin:
146 return bin
146 return bin
147 t = bin[0]
147 t = bin[0]
148 if t == '\0':
148 if t == '\0':
149 return bin
149 return bin
150 if t == 'x':
150 if t == 'x':
151 try:
151 try:
152 return _decompress(bin)
152 return _decompress(bin)
153 except zlib.error as e:
153 except zlib.error as e:
154 raise RevlogError(_("revlog decompress error: %s") % str(e))
154 raise RevlogError(_("revlog decompress error: %s") % str(e))
155 if t == 'u':
155 if t == 'u':
156 return util.buffer(bin, 1)
156 return util.buffer(bin, 1)
157 raise RevlogError(_("unknown compression type %r") % t)
157 raise RevlogError(_("unknown compression type %r") % t)
158
158
159 # index v0:
159 # index v0:
160 # 4 bytes: offset
160 # 4 bytes: offset
161 # 4 bytes: compressed length
161 # 4 bytes: compressed length
162 # 4 bytes: base rev
162 # 4 bytes: base rev
163 # 4 bytes: link rev
163 # 4 bytes: link rev
164 # 20 bytes: parent 1 nodeid
164 # 20 bytes: parent 1 nodeid
165 # 20 bytes: parent 2 nodeid
165 # 20 bytes: parent 2 nodeid
166 # 20 bytes: nodeid
166 # 20 bytes: nodeid
167 indexformatv0 = ">4l20s20s20s"
167 indexformatv0 = ">4l20s20s20s"
168
168
169 class revlogoldio(object):
169 class revlogoldio(object):
170 def __init__(self):
170 def __init__(self):
171 self.size = struct.calcsize(indexformatv0)
171 self.size = struct.calcsize(indexformatv0)
172
172
173 def parseindex(self, data, inline):
173 def parseindex(self, data, inline):
174 s = self.size
174 s = self.size
175 index = []
175 index = []
176 nodemap = {nullid: nullrev}
176 nodemap = {nullid: nullrev}
177 n = off = 0
177 n = off = 0
178 l = len(data)
178 l = len(data)
179 while off + s <= l:
179 while off + s <= l:
180 cur = data[off:off + s]
180 cur = data[off:off + s]
181 off += s
181 off += s
182 e = _unpack(indexformatv0, cur)
182 e = _unpack(indexformatv0, cur)
183 # transform to revlogv1 format
183 # transform to revlogv1 format
184 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
184 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
185 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
185 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
186 index.append(e2)
186 index.append(e2)
187 nodemap[e[6]] = n
187 nodemap[e[6]] = n
188 n += 1
188 n += 1
189
189
190 # add the magic null revision at -1
190 # add the magic null revision at -1
191 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
191 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
192
192
193 return index, nodemap, None
193 return index, nodemap, None
194
194
195 def packentry(self, entry, node, version, rev):
195 def packentry(self, entry, node, version, rev):
196 if gettype(entry[0]):
196 if gettype(entry[0]):
197 raise RevlogError(_("index entry flags need RevlogNG"))
197 raise RevlogError(_("index entry flags need RevlogNG"))
198 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
198 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
199 node(entry[5]), node(entry[6]), entry[7])
199 node(entry[5]), node(entry[6]), entry[7])
200 return _pack(indexformatv0, *e2)
200 return _pack(indexformatv0, *e2)
201
201
202 # index ng:
202 # index ng:
203 # 6 bytes: offset
203 # 6 bytes: offset
204 # 2 bytes: flags
204 # 2 bytes: flags
205 # 4 bytes: compressed length
205 # 4 bytes: compressed length
206 # 4 bytes: uncompressed length
206 # 4 bytes: uncompressed length
207 # 4 bytes: base rev
207 # 4 bytes: base rev
208 # 4 bytes: link rev
208 # 4 bytes: link rev
209 # 4 bytes: parent 1 rev
209 # 4 bytes: parent 1 rev
210 # 4 bytes: parent 2 rev
210 # 4 bytes: parent 2 rev
211 # 32 bytes: nodeid
211 # 32 bytes: nodeid
212 indexformatng = ">Qiiiiii20s12x"
212 indexformatng = ">Qiiiiii20s12x"
213 versionformat = ">I"
213 versionformat = ">I"
214
214
215 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
215 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
216 # signed integer)
216 # signed integer)
217 _maxentrysize = 0x7fffffff
217 _maxentrysize = 0x7fffffff
218
218
219 class revlogio(object):
219 class revlogio(object):
220 def __init__(self):
220 def __init__(self):
221 self.size = struct.calcsize(indexformatng)
221 self.size = struct.calcsize(indexformatng)
222
222
223 def parseindex(self, data, inline):
223 def parseindex(self, data, inline):
224 # call the C implementation to parse the index data
224 # call the C implementation to parse the index data
225 index, cache = parsers.parse_index2(data, inline)
225 index, cache = parsers.parse_index2(data, inline)
226 return index, getattr(index, 'nodemap', None), cache
226 return index, getattr(index, 'nodemap', None), cache
227
227
228 def packentry(self, entry, node, version, rev):
228 def packentry(self, entry, node, version, rev):
229 p = _pack(indexformatng, *entry)
229 p = _pack(indexformatng, *entry)
230 if rev == 0:
230 if rev == 0:
231 p = _pack(versionformat, version) + p[4:]
231 p = _pack(versionformat, version) + p[4:]
232 return p
232 return p
233
233
234 class revlog(object):
234 class revlog(object):
235 """
235 """
236 the underlying revision storage object
236 the underlying revision storage object
237
237
238 A revlog consists of two parts, an index and the revision data.
238 A revlog consists of two parts, an index and the revision data.
239
239
240 The index is a file with a fixed record size containing
240 The index is a file with a fixed record size containing
241 information on each revision, including its nodeid (hash), the
241 information on each revision, including its nodeid (hash), the
242 nodeids of its parents, the position and offset of its data within
242 nodeids of its parents, the position and offset of its data within
243 the data file, and the revision it's based on. Finally, each entry
243 the data file, and the revision it's based on. Finally, each entry
244 contains a linkrev entry that can serve as a pointer to external
244 contains a linkrev entry that can serve as a pointer to external
245 data.
245 data.
246
246
247 The revision data itself is a linear collection of data chunks.
247 The revision data itself is a linear collection of data chunks.
248 Each chunk represents a revision and is usually represented as a
248 Each chunk represents a revision and is usually represented as a
249 delta against the previous chunk. To bound lookup time, runs of
249 delta against the previous chunk. To bound lookup time, runs of
250 deltas are limited to about 2 times the length of the original
250 deltas are limited to about 2 times the length of the original
251 version data. This makes retrieval of a version proportional to
251 version data. This makes retrieval of a version proportional to
252 its size, or O(1) relative to the number of revisions.
252 its size, or O(1) relative to the number of revisions.
253
253
254 Both pieces of the revlog are written to in an append-only
254 Both pieces of the revlog are written to in an append-only
255 fashion, which means we never need to rewrite a file to insert or
255 fashion, which means we never need to rewrite a file to insert or
256 remove data, and can use some simple techniques to avoid the need
256 remove data, and can use some simple techniques to avoid the need
257 for locking while reading.
257 for locking while reading.
258
258
259 If checkambig, indexfile is opened with checkambig=True at
259 If checkambig, indexfile is opened with checkambig=True at
260 writing, to avoid file stat ambiguity.
260 writing, to avoid file stat ambiguity.
261 """
261 """
262 def __init__(self, opener, indexfile, checkambig=False):
262 def __init__(self, opener, indexfile, checkambig=False):
263 """
263 """
264 create a revlog object
264 create a revlog object
265
265
266 opener is a function that abstracts the file opening operation
266 opener is a function that abstracts the file opening operation
267 and can be used to implement COW semantics or the like.
267 and can be used to implement COW semantics or the like.
268 """
268 """
269 self.indexfile = indexfile
269 self.indexfile = indexfile
270 self.datafile = indexfile[:-2] + ".d"
270 self.datafile = indexfile[:-2] + ".d"
271 self.opener = opener
271 self.opener = opener
272 # When True, indexfile is opened with checkambig=True at writing, to
272 # When True, indexfile is opened with checkambig=True at writing, to
273 # avoid file stat ambiguity.
273 # avoid file stat ambiguity.
274 self._checkambig = checkambig
274 self._checkambig = checkambig
275 # 3-tuple of (node, rev, text) for a raw revision.
275 # 3-tuple of (node, rev, text) for a raw revision.
276 self._cache = None
276 self._cache = None
277 # Maps rev to chain base rev.
277 # Maps rev to chain base rev.
278 self._chainbasecache = util.lrucachedict(100)
278 self._chainbasecache = util.lrucachedict(100)
279 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
279 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
280 self._chunkcache = (0, '')
280 self._chunkcache = (0, '')
281 # How much data to read and cache into the raw revlog data cache.
281 # How much data to read and cache into the raw revlog data cache.
282 self._chunkcachesize = 65536
282 self._chunkcachesize = 65536
283 self._maxchainlen = None
283 self._maxchainlen = None
284 self._aggressivemergedeltas = False
284 self._aggressivemergedeltas = False
285 self.index = []
285 self.index = []
286 # Mapping of partial identifiers to full nodes.
286 # Mapping of partial identifiers to full nodes.
287 self._pcache = {}
287 self._pcache = {}
288 # Mapping of revision integer to full node.
288 # Mapping of revision integer to full node.
289 self._nodecache = {nullid: nullrev}
289 self._nodecache = {nullid: nullrev}
290 self._nodepos = None
290 self._nodepos = None
291
291
292 v = REVLOG_DEFAULT_VERSION
292 v = REVLOG_DEFAULT_VERSION
293 opts = getattr(opener, 'options', None)
293 opts = getattr(opener, 'options', None)
294 if opts is not None:
294 if opts is not None:
295 if 'revlogv1' in opts:
295 if 'revlogv1' in opts:
296 if 'generaldelta' in opts:
296 if 'generaldelta' in opts:
297 v |= REVLOGGENERALDELTA
297 v |= REVLOGGENERALDELTA
298 else:
298 else:
299 v = 0
299 v = 0
300 if 'chunkcachesize' in opts:
300 if 'chunkcachesize' in opts:
301 self._chunkcachesize = opts['chunkcachesize']
301 self._chunkcachesize = opts['chunkcachesize']
302 if 'maxchainlen' in opts:
302 if 'maxchainlen' in opts:
303 self._maxchainlen = opts['maxchainlen']
303 self._maxchainlen = opts['maxchainlen']
304 if 'aggressivemergedeltas' in opts:
304 if 'aggressivemergedeltas' in opts:
305 self._aggressivemergedeltas = opts['aggressivemergedeltas']
305 self._aggressivemergedeltas = opts['aggressivemergedeltas']
306 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
306 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
307
307
308 if self._chunkcachesize <= 0:
308 if self._chunkcachesize <= 0:
309 raise RevlogError(_('revlog chunk cache size %r is not greater '
309 raise RevlogError(_('revlog chunk cache size %r is not greater '
310 'than 0') % self._chunkcachesize)
310 'than 0') % self._chunkcachesize)
311 elif self._chunkcachesize & (self._chunkcachesize - 1):
311 elif self._chunkcachesize & (self._chunkcachesize - 1):
312 raise RevlogError(_('revlog chunk cache size %r is not a power '
312 raise RevlogError(_('revlog chunk cache size %r is not a power '
313 'of 2') % self._chunkcachesize)
313 'of 2') % self._chunkcachesize)
314
314
315 indexdata = ''
315 indexdata = ''
316 self._initempty = True
316 self._initempty = True
317 try:
317 try:
318 f = self.opener(self.indexfile)
318 f = self.opener(self.indexfile)
319 indexdata = f.read()
319 indexdata = f.read()
320 f.close()
320 f.close()
321 if len(indexdata) > 0:
321 if len(indexdata) > 0:
322 v = struct.unpack(versionformat, indexdata[:4])[0]
322 v = struct.unpack(versionformat, indexdata[:4])[0]
323 self._initempty = False
323 self._initempty = False
324 except IOError as inst:
324 except IOError as inst:
325 if inst.errno != errno.ENOENT:
325 if inst.errno != errno.ENOENT:
326 raise
326 raise
327
327
328 self.version = v
328 self.version = v
329 self._inline = v & REVLOGNGINLINEDATA
329 self._inline = v & REVLOGNGINLINEDATA
330 self._generaldelta = v & REVLOGGENERALDELTA
330 self._generaldelta = v & REVLOGGENERALDELTA
331 flags = v & ~0xFFFF
331 flags = v & ~0xFFFF
332 fmt = v & 0xFFFF
332 fmt = v & 0xFFFF
333 if fmt == REVLOGV0 and flags:
333 if fmt == REVLOGV0 and flags:
334 raise RevlogError(_("index %s unknown flags %#04x for format v0")
334 raise RevlogError(_("index %s unknown flags %#04x for format v0")
335 % (self.indexfile, flags >> 16))
335 % (self.indexfile, flags >> 16))
336 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
336 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
337 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
337 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
338 % (self.indexfile, flags >> 16))
338 % (self.indexfile, flags >> 16))
339 elif fmt > REVLOGNG:
339 elif fmt > REVLOGNG:
340 raise RevlogError(_("index %s unknown format %d")
340 raise RevlogError(_("index %s unknown format %d")
341 % (self.indexfile, fmt))
341 % (self.indexfile, fmt))
342
342
343 self.storedeltachains = True
343 self.storedeltachains = True
344
344
345 self._io = revlogio()
345 self._io = revlogio()
346 if self.version == REVLOGV0:
346 if self.version == REVLOGV0:
347 self._io = revlogoldio()
347 self._io = revlogoldio()
348 try:
348 try:
349 d = self._io.parseindex(indexdata, self._inline)
349 d = self._io.parseindex(indexdata, self._inline)
350 except (ValueError, IndexError):
350 except (ValueError, IndexError):
351 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
351 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
352 self.index, nodemap, self._chunkcache = d
352 self.index, nodemap, self._chunkcache = d
353 if nodemap is not None:
353 if nodemap is not None:
354 self.nodemap = self._nodecache = nodemap
354 self.nodemap = self._nodecache = nodemap
355 if not self._chunkcache:
355 if not self._chunkcache:
356 self._chunkclear()
356 self._chunkclear()
357 # revnum -> (chain-length, sum-delta-length)
357 # revnum -> (chain-length, sum-delta-length)
358 self._chaininfocache = {}
358 self._chaininfocache = {}
359
359
360 def tip(self):
360 def tip(self):
361 return self.node(len(self.index) - 2)
361 return self.node(len(self.index) - 2)
362 def __contains__(self, rev):
362 def __contains__(self, rev):
363 return 0 <= rev < len(self)
363 return 0 <= rev < len(self)
364 def __len__(self):
364 def __len__(self):
365 return len(self.index) - 1
365 return len(self.index) - 1
366 def __iter__(self):
366 def __iter__(self):
367 return iter(xrange(len(self)))
367 return iter(xrange(len(self)))
368 def revs(self, start=0, stop=None):
368 def revs(self, start=0, stop=None):
369 """iterate over all rev in this revlog (from start to stop)"""
369 """iterate over all rev in this revlog (from start to stop)"""
370 step = 1
370 step = 1
371 if stop is not None:
371 if stop is not None:
372 if start > stop:
372 if start > stop:
373 step = -1
373 step = -1
374 stop += step
374 stop += step
375 else:
375 else:
376 stop = len(self)
376 stop = len(self)
377 return xrange(start, stop, step)
377 return xrange(start, stop, step)
378
378
379 @util.propertycache
379 @util.propertycache
380 def nodemap(self):
380 def nodemap(self):
381 self.rev(self.node(0))
381 self.rev(self.node(0))
382 return self._nodecache
382 return self._nodecache
383
383
384 def hasnode(self, node):
384 def hasnode(self, node):
385 try:
385 try:
386 self.rev(node)
386 self.rev(node)
387 return True
387 return True
388 except KeyError:
388 except KeyError:
389 return False
389 return False
390
390
391 def clearcaches(self):
391 def clearcaches(self):
392 self._cache = None
392 self._cache = None
393 self._chainbasecache.clear()
393 self._chainbasecache.clear()
394 self._chunkcache = (0, '')
394 self._chunkcache = (0, '')
395 self._pcache = {}
395 self._pcache = {}
396
396
397 try:
397 try:
398 self._nodecache.clearcaches()
398 self._nodecache.clearcaches()
399 except AttributeError:
399 except AttributeError:
400 self._nodecache = {nullid: nullrev}
400 self._nodecache = {nullid: nullrev}
401 self._nodepos = None
401 self._nodepos = None
402
402
403 def rev(self, node):
403 def rev(self, node):
404 try:
404 try:
405 return self._nodecache[node]
405 return self._nodecache[node]
406 except TypeError:
406 except TypeError:
407 raise
407 raise
408 except RevlogError:
408 except RevlogError:
409 # parsers.c radix tree lookup failed
409 # parsers.c radix tree lookup failed
410 raise LookupError(node, self.indexfile, _('no node'))
410 raise LookupError(node, self.indexfile, _('no node'))
411 except KeyError:
411 except KeyError:
412 # pure python cache lookup failed
412 # pure python cache lookup failed
413 n = self._nodecache
413 n = self._nodecache
414 i = self.index
414 i = self.index
415 p = self._nodepos
415 p = self._nodepos
416 if p is None:
416 if p is None:
417 p = len(i) - 2
417 p = len(i) - 2
418 for r in xrange(p, -1, -1):
418 for r in xrange(p, -1, -1):
419 v = i[r][7]
419 v = i[r][7]
420 n[v] = r
420 n[v] = r
421 if v == node:
421 if v == node:
422 self._nodepos = r - 1
422 self._nodepos = r - 1
423 return r
423 return r
424 raise LookupError(node, self.indexfile, _('no node'))
424 raise LookupError(node, self.indexfile, _('no node'))
425
425
426 # Accessors for index entries.
426 # Accessors for index entries.
427
427
428 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
428 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
429 # are flags.
429 # are flags.
430 def start(self, rev):
430 def start(self, rev):
431 return int(self.index[rev][0] >> 16)
431 return int(self.index[rev][0] >> 16)
432
432
433 def flags(self, rev):
433 def flags(self, rev):
434 return self.index[rev][0] & 0xFFFF
434 return self.index[rev][0] & 0xFFFF
435
435
436 def length(self, rev):
436 def length(self, rev):
437 return self.index[rev][1]
437 return self.index[rev][1]
438
438
439 def rawsize(self, rev):
439 def rawsize(self, rev):
440 """return the length of the uncompressed text for a given revision"""
440 """return the length of the uncompressed text for a given revision"""
441 l = self.index[rev][2]
441 l = self.index[rev][2]
442 if l >= 0:
442 if l >= 0:
443 return l
443 return l
444
444
445 t = self.revision(self.node(rev))
445 t = self.revision(self.node(rev))
446 return len(t)
446 return len(t)
447 size = rawsize
447 size = rawsize
448
448
449 def chainbase(self, rev):
449 def chainbase(self, rev):
450 base = self._chainbasecache.get(rev)
450 base = self._chainbasecache.get(rev)
451 if base is not None:
451 if base is not None:
452 return base
452 return base
453
453
454 index = self.index
454 index = self.index
455 base = index[rev][3]
455 base = index[rev][3]
456 while base != rev:
456 while base != rev:
457 rev = base
457 rev = base
458 base = index[rev][3]
458 base = index[rev][3]
459
459
460 self._chainbasecache[rev] = base
460 self._chainbasecache[rev] = base
461 return base
461 return base
462
462
463 def linkrev(self, rev):
463 def linkrev(self, rev):
464 return self.index[rev][4]
464 return self.index[rev][4]
465
465
466 def parentrevs(self, rev):
466 def parentrevs(self, rev):
467 return self.index[rev][5:7]
467 return self.index[rev][5:7]
468
468
469 def node(self, rev):
469 def node(self, rev):
470 return self.index[rev][7]
470 return self.index[rev][7]
471
471
472 # Derived from index values.
472 # Derived from index values.
473
473
474 def end(self, rev):
474 def end(self, rev):
475 return self.start(rev) + self.length(rev)
475 return self.start(rev) + self.length(rev)
476
476
477 def parents(self, node):
477 def parents(self, node):
478 i = self.index
478 i = self.index
479 d = i[self.rev(node)]
479 d = i[self.rev(node)]
480 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
480 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
481
481
482 def chainlen(self, rev):
482 def chainlen(self, rev):
483 return self._chaininfo(rev)[0]
483 return self._chaininfo(rev)[0]
484
484
485 def _chaininfo(self, rev):
485 def _chaininfo(self, rev):
486 chaininfocache = self._chaininfocache
486 chaininfocache = self._chaininfocache
487 if rev in chaininfocache:
487 if rev in chaininfocache:
488 return chaininfocache[rev]
488 return chaininfocache[rev]
489 index = self.index
489 index = self.index
490 generaldelta = self._generaldelta
490 generaldelta = self._generaldelta
491 iterrev = rev
491 iterrev = rev
492 e = index[iterrev]
492 e = index[iterrev]
493 clen = 0
493 clen = 0
494 compresseddeltalen = 0
494 compresseddeltalen = 0
495 while iterrev != e[3]:
495 while iterrev != e[3]:
496 clen += 1
496 clen += 1
497 compresseddeltalen += e[1]
497 compresseddeltalen += e[1]
498 if generaldelta:
498 if generaldelta:
499 iterrev = e[3]
499 iterrev = e[3]
500 else:
500 else:
501 iterrev -= 1
501 iterrev -= 1
502 if iterrev in chaininfocache:
502 if iterrev in chaininfocache:
503 t = chaininfocache[iterrev]
503 t = chaininfocache[iterrev]
504 clen += t[0]
504 clen += t[0]
505 compresseddeltalen += t[1]
505 compresseddeltalen += t[1]
506 break
506 break
507 e = index[iterrev]
507 e = index[iterrev]
508 else:
508 else:
509 # Add text length of base since decompressing that also takes
509 # Add text length of base since decompressing that also takes
510 # work. For cache hits the length is already included.
510 # work. For cache hits the length is already included.
511 compresseddeltalen += e[1]
511 compresseddeltalen += e[1]
512 r = (clen, compresseddeltalen)
512 r = (clen, compresseddeltalen)
513 chaininfocache[rev] = r
513 chaininfocache[rev] = r
514 return r
514 return r
515
515
516 def _deltachain(self, rev, stoprev=None):
516 def _deltachain(self, rev, stoprev=None):
517 """Obtain the delta chain for a revision.
517 """Obtain the delta chain for a revision.
518
518
519 ``stoprev`` specifies a revision to stop at. If not specified, we
519 ``stoprev`` specifies a revision to stop at. If not specified, we
520 stop at the base of the chain.
520 stop at the base of the chain.
521
521
522 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
522 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
523 revs in ascending order and ``stopped`` is a bool indicating whether
523 revs in ascending order and ``stopped`` is a bool indicating whether
524 ``stoprev`` was hit.
524 ``stoprev`` was hit.
525 """
525 """
526 chain = []
526 chain = []
527
527
528 # Alias to prevent attribute lookup in tight loop.
528 # Alias to prevent attribute lookup in tight loop.
529 index = self.index
529 index = self.index
530 generaldelta = self._generaldelta
530 generaldelta = self._generaldelta
531
531
532 iterrev = rev
532 iterrev = rev
533 e = index[iterrev]
533 e = index[iterrev]
534 while iterrev != e[3] and iterrev != stoprev:
534 while iterrev != e[3] and iterrev != stoprev:
535 chain.append(iterrev)
535 chain.append(iterrev)
536 if generaldelta:
536 if generaldelta:
537 iterrev = e[3]
537 iterrev = e[3]
538 else:
538 else:
539 iterrev -= 1
539 iterrev -= 1
540 e = index[iterrev]
540 e = index[iterrev]
541
541
542 if iterrev == stoprev:
542 if iterrev == stoprev:
543 stopped = True
543 stopped = True
544 else:
544 else:
545 chain.append(iterrev)
545 chain.append(iterrev)
546 stopped = False
546 stopped = False
547
547
548 chain.reverse()
548 chain.reverse()
549 return chain, stopped
549 return chain, stopped
550
550
551 def ancestors(self, revs, stoprev=0, inclusive=False):
551 def ancestors(self, revs, stoprev=0, inclusive=False):
552 """Generate the ancestors of 'revs' in reverse topological order.
552 """Generate the ancestors of 'revs' in reverse topological order.
553 Does not generate revs lower than stoprev.
553 Does not generate revs lower than stoprev.
554
554
555 See the documentation for ancestor.lazyancestors for more details."""
555 See the documentation for ancestor.lazyancestors for more details."""
556
556
557 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
557 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
558 inclusive=inclusive)
558 inclusive=inclusive)
559
559
560 def descendants(self, revs):
560 def descendants(self, revs):
561 """Generate the descendants of 'revs' in revision order.
561 """Generate the descendants of 'revs' in revision order.
562
562
563 Yield a sequence of revision numbers starting with a child of
563 Yield a sequence of revision numbers starting with a child of
564 some rev in revs, i.e., each revision is *not* considered a
564 some rev in revs, i.e., each revision is *not* considered a
565 descendant of itself. Results are ordered by revision number (a
565 descendant of itself. Results are ordered by revision number (a
566 topological sort)."""
566 topological sort)."""
567 first = min(revs)
567 first = min(revs)
568 if first == nullrev:
568 if first == nullrev:
569 for i in self:
569 for i in self:
570 yield i
570 yield i
571 return
571 return
572
572
573 seen = set(revs)
573 seen = set(revs)
574 for i in self.revs(start=first + 1):
574 for i in self.revs(start=first + 1):
575 for x in self.parentrevs(i):
575 for x in self.parentrevs(i):
576 if x != nullrev and x in seen:
576 if x != nullrev and x in seen:
577 seen.add(i)
577 seen.add(i)
578 yield i
578 yield i
579 break
579 break
580
580
581 def findcommonmissing(self, common=None, heads=None):
581 def findcommonmissing(self, common=None, heads=None):
582 """Return a tuple of the ancestors of common and the ancestors of heads
582 """Return a tuple of the ancestors of common and the ancestors of heads
583 that are not ancestors of common. In revset terminology, we return the
583 that are not ancestors of common. In revset terminology, we return the
584 tuple:
584 tuple:
585
585
586 ::common, (::heads) - (::common)
586 ::common, (::heads) - (::common)
587
587
588 The list is sorted by revision number, meaning it is
588 The list is sorted by revision number, meaning it is
589 topologically sorted.
589 topologically sorted.
590
590
591 'heads' and 'common' are both lists of node IDs. If heads is
591 'heads' and 'common' are both lists of node IDs. If heads is
592 not supplied, uses all of the revlog's heads. If common is not
592 not supplied, uses all of the revlog's heads. If common is not
593 supplied, uses nullid."""
593 supplied, uses nullid."""
594 if common is None:
594 if common is None:
595 common = [nullid]
595 common = [nullid]
596 if heads is None:
596 if heads is None:
597 heads = self.heads()
597 heads = self.heads()
598
598
599 common = [self.rev(n) for n in common]
599 common = [self.rev(n) for n in common]
600 heads = [self.rev(n) for n in heads]
600 heads = [self.rev(n) for n in heads]
601
601
602 # we want the ancestors, but inclusive
602 # we want the ancestors, but inclusive
603 class lazyset(object):
603 class lazyset(object):
604 def __init__(self, lazyvalues):
604 def __init__(self, lazyvalues):
605 self.addedvalues = set()
605 self.addedvalues = set()
606 self.lazyvalues = lazyvalues
606 self.lazyvalues = lazyvalues
607
607
608 def __contains__(self, value):
608 def __contains__(self, value):
609 return value in self.addedvalues or value in self.lazyvalues
609 return value in self.addedvalues or value in self.lazyvalues
610
610
611 def __iter__(self):
611 def __iter__(self):
612 added = self.addedvalues
612 added = self.addedvalues
613 for r in added:
613 for r in added:
614 yield r
614 yield r
615 for r in self.lazyvalues:
615 for r in self.lazyvalues:
616 if not r in added:
616 if not r in added:
617 yield r
617 yield r
618
618
619 def add(self, value):
619 def add(self, value):
620 self.addedvalues.add(value)
620 self.addedvalues.add(value)
621
621
622 def update(self, values):
622 def update(self, values):
623 self.addedvalues.update(values)
623 self.addedvalues.update(values)
624
624
625 has = lazyset(self.ancestors(common))
625 has = lazyset(self.ancestors(common))
626 has.add(nullrev)
626 has.add(nullrev)
627 has.update(common)
627 has.update(common)
628
628
629 # take all ancestors from heads that aren't in has
629 # take all ancestors from heads that aren't in has
630 missing = set()
630 missing = set()
631 visit = collections.deque(r for r in heads if r not in has)
631 visit = collections.deque(r for r in heads if r not in has)
632 while visit:
632 while visit:
633 r = visit.popleft()
633 r = visit.popleft()
634 if r in missing:
634 if r in missing:
635 continue
635 continue
636 else:
636 else:
637 missing.add(r)
637 missing.add(r)
638 for p in self.parentrevs(r):
638 for p in self.parentrevs(r):
639 if p not in has:
639 if p not in has:
640 visit.append(p)
640 visit.append(p)
641 missing = list(missing)
641 missing = list(missing)
642 missing.sort()
642 missing.sort()
643 return has, [self.node(miss) for miss in missing]
643 return has, [self.node(miss) for miss in missing]
644
644
645 def incrementalmissingrevs(self, common=None):
645 def incrementalmissingrevs(self, common=None):
646 """Return an object that can be used to incrementally compute the
646 """Return an object that can be used to incrementally compute the
647 revision numbers of the ancestors of arbitrary sets that are not
647 revision numbers of the ancestors of arbitrary sets that are not
648 ancestors of common. This is an ancestor.incrementalmissingancestors
648 ancestors of common. This is an ancestor.incrementalmissingancestors
649 object.
649 object.
650
650
651 'common' is a list of revision numbers. If common is not supplied, uses
651 'common' is a list of revision numbers. If common is not supplied, uses
652 nullrev.
652 nullrev.
653 """
653 """
654 if common is None:
654 if common is None:
655 common = [nullrev]
655 common = [nullrev]
656
656
657 return ancestor.incrementalmissingancestors(self.parentrevs, common)
657 return ancestor.incrementalmissingancestors(self.parentrevs, common)
658
658
659 def findmissingrevs(self, common=None, heads=None):
659 def findmissingrevs(self, common=None, heads=None):
660 """Return the revision numbers of the ancestors of heads that
660 """Return the revision numbers of the ancestors of heads that
661 are not ancestors of common.
661 are not ancestors of common.
662
662
663 More specifically, return a list of revision numbers corresponding to
663 More specifically, return a list of revision numbers corresponding to
664 nodes N such that every N satisfies the following constraints:
664 nodes N such that every N satisfies the following constraints:
665
665
666 1. N is an ancestor of some node in 'heads'
666 1. N is an ancestor of some node in 'heads'
667 2. N is not an ancestor of any node in 'common'
667 2. N is not an ancestor of any node in 'common'
668
668
669 The list is sorted by revision number, meaning it is
669 The list is sorted by revision number, meaning it is
670 topologically sorted.
670 topologically sorted.
671
671
672 'heads' and 'common' are both lists of revision numbers. If heads is
672 'heads' and 'common' are both lists of revision numbers. If heads is
673 not supplied, uses all of the revlog's heads. If common is not
673 not supplied, uses all of the revlog's heads. If common is not
674 supplied, uses nullid."""
674 supplied, uses nullid."""
675 if common is None:
675 if common is None:
676 common = [nullrev]
676 common = [nullrev]
677 if heads is None:
677 if heads is None:
678 heads = self.headrevs()
678 heads = self.headrevs()
679
679
680 inc = self.incrementalmissingrevs(common=common)
680 inc = self.incrementalmissingrevs(common=common)
681 return inc.missingancestors(heads)
681 return inc.missingancestors(heads)
682
682
683 def findmissing(self, common=None, heads=None):
683 def findmissing(self, common=None, heads=None):
684 """Return the ancestors of heads that are not ancestors of common.
684 """Return the ancestors of heads that are not ancestors of common.
685
685
686 More specifically, return a list of nodes N such that every N
686 More specifically, return a list of nodes N such that every N
687 satisfies the following constraints:
687 satisfies the following constraints:
688
688
689 1. N is an ancestor of some node in 'heads'
689 1. N is an ancestor of some node in 'heads'
690 2. N is not an ancestor of any node in 'common'
690 2. N is not an ancestor of any node in 'common'
691
691
692 The list is sorted by revision number, meaning it is
692 The list is sorted by revision number, meaning it is
693 topologically sorted.
693 topologically sorted.
694
694
695 'heads' and 'common' are both lists of node IDs. If heads is
695 'heads' and 'common' are both lists of node IDs. If heads is
696 not supplied, uses all of the revlog's heads. If common is not
696 not supplied, uses all of the revlog's heads. If common is not
697 supplied, uses nullid."""
697 supplied, uses nullid."""
698 if common is None:
698 if common is None:
699 common = [nullid]
699 common = [nullid]
700 if heads is None:
700 if heads is None:
701 heads = self.heads()
701 heads = self.heads()
702
702
703 common = [self.rev(n) for n in common]
703 common = [self.rev(n) for n in common]
704 heads = [self.rev(n) for n in heads]
704 heads = [self.rev(n) for n in heads]
705
705
706 inc = self.incrementalmissingrevs(common=common)
706 inc = self.incrementalmissingrevs(common=common)
707 return [self.node(r) for r in inc.missingancestors(heads)]
707 return [self.node(r) for r in inc.missingancestors(heads)]
708
708
709 def nodesbetween(self, roots=None, heads=None):
709 def nodesbetween(self, roots=None, heads=None):
710 """Return a topological path from 'roots' to 'heads'.
710 """Return a topological path from 'roots' to 'heads'.
711
711
712 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
712 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
713 topologically sorted list of all nodes N that satisfy both of
713 topologically sorted list of all nodes N that satisfy both of
714 these constraints:
714 these constraints:
715
715
716 1. N is a descendant of some node in 'roots'
716 1. N is a descendant of some node in 'roots'
717 2. N is an ancestor of some node in 'heads'
717 2. N is an ancestor of some node in 'heads'
718
718
719 Every node is considered to be both a descendant and an ancestor
719 Every node is considered to be both a descendant and an ancestor
720 of itself, so every reachable node in 'roots' and 'heads' will be
720 of itself, so every reachable node in 'roots' and 'heads' will be
721 included in 'nodes'.
721 included in 'nodes'.
722
722
723 'outroots' is the list of reachable nodes in 'roots', i.e., the
723 'outroots' is the list of reachable nodes in 'roots', i.e., the
724 subset of 'roots' that is returned in 'nodes'. Likewise,
724 subset of 'roots' that is returned in 'nodes'. Likewise,
725 'outheads' is the subset of 'heads' that is also in 'nodes'.
725 'outheads' is the subset of 'heads' that is also in 'nodes'.
726
726
727 'roots' and 'heads' are both lists of node IDs. If 'roots' is
727 'roots' and 'heads' are both lists of node IDs. If 'roots' is
728 unspecified, uses nullid as the only root. If 'heads' is
728 unspecified, uses nullid as the only root. If 'heads' is
729 unspecified, uses list of all of the revlog's heads."""
729 unspecified, uses list of all of the revlog's heads."""
730 nonodes = ([], [], [])
730 nonodes = ([], [], [])
731 if roots is not None:
731 if roots is not None:
732 roots = list(roots)
732 roots = list(roots)
733 if not roots:
733 if not roots:
734 return nonodes
734 return nonodes
735 lowestrev = min([self.rev(n) for n in roots])
735 lowestrev = min([self.rev(n) for n in roots])
736 else:
736 else:
737 roots = [nullid] # Everybody's a descendant of nullid
737 roots = [nullid] # Everybody's a descendant of nullid
738 lowestrev = nullrev
738 lowestrev = nullrev
739 if (lowestrev == nullrev) and (heads is None):
739 if (lowestrev == nullrev) and (heads is None):
740 # We want _all_ the nodes!
740 # We want _all_ the nodes!
741 return ([self.node(r) for r in self], [nullid], list(self.heads()))
741 return ([self.node(r) for r in self], [nullid], list(self.heads()))
742 if heads is None:
742 if heads is None:
743 # All nodes are ancestors, so the latest ancestor is the last
743 # All nodes are ancestors, so the latest ancestor is the last
744 # node.
744 # node.
745 highestrev = len(self) - 1
745 highestrev = len(self) - 1
746 # Set ancestors to None to signal that every node is an ancestor.
746 # Set ancestors to None to signal that every node is an ancestor.
747 ancestors = None
747 ancestors = None
748 # Set heads to an empty dictionary for later discovery of heads
748 # Set heads to an empty dictionary for later discovery of heads
749 heads = {}
749 heads = {}
750 else:
750 else:
751 heads = list(heads)
751 heads = list(heads)
752 if not heads:
752 if not heads:
753 return nonodes
753 return nonodes
754 ancestors = set()
754 ancestors = set()
755 # Turn heads into a dictionary so we can remove 'fake' heads.
755 # Turn heads into a dictionary so we can remove 'fake' heads.
756 # Also, later we will be using it to filter out the heads we can't
756 # Also, later we will be using it to filter out the heads we can't
757 # find from roots.
757 # find from roots.
758 heads = dict.fromkeys(heads, False)
758 heads = dict.fromkeys(heads, False)
759 # Start at the top and keep marking parents until we're done.
759 # Start at the top and keep marking parents until we're done.
760 nodestotag = set(heads)
760 nodestotag = set(heads)
761 # Remember where the top was so we can use it as a limit later.
761 # Remember where the top was so we can use it as a limit later.
762 highestrev = max([self.rev(n) for n in nodestotag])
762 highestrev = max([self.rev(n) for n in nodestotag])
763 while nodestotag:
763 while nodestotag:
764 # grab a node to tag
764 # grab a node to tag
765 n = nodestotag.pop()
765 n = nodestotag.pop()
766 # Never tag nullid
766 # Never tag nullid
767 if n == nullid:
767 if n == nullid:
768 continue
768 continue
769 # A node's revision number represents its place in a
769 # A node's revision number represents its place in a
770 # topologically sorted list of nodes.
770 # topologically sorted list of nodes.
771 r = self.rev(n)
771 r = self.rev(n)
772 if r >= lowestrev:
772 if r >= lowestrev:
773 if n not in ancestors:
773 if n not in ancestors:
774 # If we are possibly a descendant of one of the roots
774 # If we are possibly a descendant of one of the roots
775 # and we haven't already been marked as an ancestor
775 # and we haven't already been marked as an ancestor
776 ancestors.add(n) # Mark as ancestor
776 ancestors.add(n) # Mark as ancestor
777 # Add non-nullid parents to list of nodes to tag.
777 # Add non-nullid parents to list of nodes to tag.
778 nodestotag.update([p for p in self.parents(n) if
778 nodestotag.update([p for p in self.parents(n) if
779 p != nullid])
779 p != nullid])
780 elif n in heads: # We've seen it before, is it a fake head?
780 elif n in heads: # We've seen it before, is it a fake head?
781 # So it is, real heads should not be the ancestors of
781 # So it is, real heads should not be the ancestors of
782 # any other heads.
782 # any other heads.
783 heads.pop(n)
783 heads.pop(n)
784 if not ancestors:
784 if not ancestors:
785 return nonodes
785 return nonodes
786 # Now that we have our set of ancestors, we want to remove any
786 # Now that we have our set of ancestors, we want to remove any
787 # roots that are not ancestors.
787 # roots that are not ancestors.
788
788
789 # If one of the roots was nullid, everything is included anyway.
789 # If one of the roots was nullid, everything is included anyway.
790 if lowestrev > nullrev:
790 if lowestrev > nullrev:
791 # But, since we weren't, let's recompute the lowest rev to not
791 # But, since we weren't, let's recompute the lowest rev to not
792 # include roots that aren't ancestors.
792 # include roots that aren't ancestors.
793
793
794 # Filter out roots that aren't ancestors of heads
794 # Filter out roots that aren't ancestors of heads
795 roots = [root for root in roots if root in ancestors]
795 roots = [root for root in roots if root in ancestors]
796 # Recompute the lowest revision
796 # Recompute the lowest revision
797 if roots:
797 if roots:
798 lowestrev = min([self.rev(root) for root in roots])
798 lowestrev = min([self.rev(root) for root in roots])
799 else:
799 else:
800 # No more roots? Return empty list
800 # No more roots? Return empty list
801 return nonodes
801 return nonodes
802 else:
802 else:
803 # We are descending from nullid, and don't need to care about
803 # We are descending from nullid, and don't need to care about
804 # any other roots.
804 # any other roots.
805 lowestrev = nullrev
805 lowestrev = nullrev
806 roots = [nullid]
806 roots = [nullid]
807 # Transform our roots list into a set.
807 # Transform our roots list into a set.
808 descendants = set(roots)
808 descendants = set(roots)
809 # Also, keep the original roots so we can filter out roots that aren't
809 # Also, keep the original roots so we can filter out roots that aren't
810 # 'real' roots (i.e. are descended from other roots).
810 # 'real' roots (i.e. are descended from other roots).
811 roots = descendants.copy()
811 roots = descendants.copy()
812 # Our topologically sorted list of output nodes.
812 # Our topologically sorted list of output nodes.
813 orderedout = []
813 orderedout = []
814 # Don't start at nullid since we don't want nullid in our output list,
814 # Don't start at nullid since we don't want nullid in our output list,
815 # and if nullid shows up in descendants, empty parents will look like
815 # and if nullid shows up in descendants, empty parents will look like
816 # they're descendants.
816 # they're descendants.
817 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
817 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
818 n = self.node(r)
818 n = self.node(r)
819 isdescendant = False
819 isdescendant = False
820 if lowestrev == nullrev: # Everybody is a descendant of nullid
820 if lowestrev == nullrev: # Everybody is a descendant of nullid
821 isdescendant = True
821 isdescendant = True
822 elif n in descendants:
822 elif n in descendants:
823 # n is already a descendant
823 # n is already a descendant
824 isdescendant = True
824 isdescendant = True
825 # This check only needs to be done here because all the roots
825 # This check only needs to be done here because all the roots
826 # will start being marked is descendants before the loop.
826 # will start being marked is descendants before the loop.
827 if n in roots:
827 if n in roots:
828 # If n was a root, check if it's a 'real' root.
828 # If n was a root, check if it's a 'real' root.
829 p = tuple(self.parents(n))
829 p = tuple(self.parents(n))
830 # If any of its parents are descendants, it's not a root.
830 # If any of its parents are descendants, it's not a root.
831 if (p[0] in descendants) or (p[1] in descendants):
831 if (p[0] in descendants) or (p[1] in descendants):
832 roots.remove(n)
832 roots.remove(n)
833 else:
833 else:
834 p = tuple(self.parents(n))
834 p = tuple(self.parents(n))
835 # A node is a descendant if either of its parents are
835 # A node is a descendant if either of its parents are
836 # descendants. (We seeded the dependents list with the roots
836 # descendants. (We seeded the dependents list with the roots
837 # up there, remember?)
837 # up there, remember?)
838 if (p[0] in descendants) or (p[1] in descendants):
838 if (p[0] in descendants) or (p[1] in descendants):
839 descendants.add(n)
839 descendants.add(n)
840 isdescendant = True
840 isdescendant = True
841 if isdescendant and ((ancestors is None) or (n in ancestors)):
841 if isdescendant and ((ancestors is None) or (n in ancestors)):
842 # Only include nodes that are both descendants and ancestors.
842 # Only include nodes that are both descendants and ancestors.
843 orderedout.append(n)
843 orderedout.append(n)
844 if (ancestors is not None) and (n in heads):
844 if (ancestors is not None) and (n in heads):
845 # We're trying to figure out which heads are reachable
845 # We're trying to figure out which heads are reachable
846 # from roots.
846 # from roots.
847 # Mark this head as having been reached
847 # Mark this head as having been reached
848 heads[n] = True
848 heads[n] = True
849 elif ancestors is None:
849 elif ancestors is None:
850 # Otherwise, we're trying to discover the heads.
850 # Otherwise, we're trying to discover the heads.
851 # Assume this is a head because if it isn't, the next step
851 # Assume this is a head because if it isn't, the next step
852 # will eventually remove it.
852 # will eventually remove it.
853 heads[n] = True
853 heads[n] = True
854 # But, obviously its parents aren't.
854 # But, obviously its parents aren't.
855 for p in self.parents(n):
855 for p in self.parents(n):
856 heads.pop(p, None)
856 heads.pop(p, None)
857 heads = [head for head, flag in heads.iteritems() if flag]
857 heads = [head for head, flag in heads.iteritems() if flag]
858 roots = list(roots)
858 roots = list(roots)
859 assert orderedout
859 assert orderedout
860 assert roots
860 assert roots
861 assert heads
861 assert heads
862 return (orderedout, roots, heads)
862 return (orderedout, roots, heads)
863
863
864 def headrevs(self):
864 def headrevs(self):
865 try:
865 try:
866 return self.index.headrevs()
866 return self.index.headrevs()
867 except AttributeError:
867 except AttributeError:
868 return self._headrevs()
868 return self._headrevs()
869
869
870 def computephases(self, roots):
870 def computephases(self, roots):
871 return self.index.computephasesmapsets(roots)
871 return self.index.computephasesmapsets(roots)
872
872
873 def _headrevs(self):
873 def _headrevs(self):
874 count = len(self)
874 count = len(self)
875 if not count:
875 if not count:
876 return [nullrev]
876 return [nullrev]
877 # we won't iter over filtered rev so nobody is a head at start
877 # we won't iter over filtered rev so nobody is a head at start
878 ishead = [0] * (count + 1)
878 ishead = [0] * (count + 1)
879 index = self.index
879 index = self.index
880 for r in self:
880 for r in self:
881 ishead[r] = 1 # I may be an head
881 ishead[r] = 1 # I may be an head
882 e = index[r]
882 e = index[r]
883 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
883 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
884 return [r for r, val in enumerate(ishead) if val]
884 return [r for r, val in enumerate(ishead) if val]
885
885
886 def heads(self, start=None, stop=None):
886 def heads(self, start=None, stop=None):
887 """return the list of all nodes that have no children
887 """return the list of all nodes that have no children
888
888
889 if start is specified, only heads that are descendants of
889 if start is specified, only heads that are descendants of
890 start will be returned
890 start will be returned
891 if stop is specified, it will consider all the revs from stop
891 if stop is specified, it will consider all the revs from stop
892 as if they had no children
892 as if they had no children
893 """
893 """
894 if start is None and stop is None:
894 if start is None and stop is None:
895 if not len(self):
895 if not len(self):
896 return [nullid]
896 return [nullid]
897 return [self.node(r) for r in self.headrevs()]
897 return [self.node(r) for r in self.headrevs()]
898
898
899 if start is None:
899 if start is None:
900 start = nullid
900 start = nullid
901 if stop is None:
901 if stop is None:
902 stop = []
902 stop = []
903 stoprevs = set([self.rev(n) for n in stop])
903 stoprevs = set([self.rev(n) for n in stop])
904 startrev = self.rev(start)
904 startrev = self.rev(start)
905 reachable = set((startrev,))
905 reachable = set((startrev,))
906 heads = set((startrev,))
906 heads = set((startrev,))
907
907
908 parentrevs = self.parentrevs
908 parentrevs = self.parentrevs
909 for r in self.revs(start=startrev + 1):
909 for r in self.revs(start=startrev + 1):
910 for p in parentrevs(r):
910 for p in parentrevs(r):
911 if p in reachable:
911 if p in reachable:
912 if r not in stoprevs:
912 if r not in stoprevs:
913 reachable.add(r)
913 reachable.add(r)
914 heads.add(r)
914 heads.add(r)
915 if p in heads and p not in stoprevs:
915 if p in heads and p not in stoprevs:
916 heads.remove(p)
916 heads.remove(p)
917
917
918 return [self.node(r) for r in heads]
918 return [self.node(r) for r in heads]
919
919
920 def children(self, node):
920 def children(self, node):
921 """find the children of a given node"""
921 """find the children of a given node"""
922 c = []
922 c = []
923 p = self.rev(node)
923 p = self.rev(node)
924 for r in self.revs(start=p + 1):
924 for r in self.revs(start=p + 1):
925 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
925 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
926 if prevs:
926 if prevs:
927 for pr in prevs:
927 for pr in prevs:
928 if pr == p:
928 if pr == p:
929 c.append(self.node(r))
929 c.append(self.node(r))
930 elif p == nullrev:
930 elif p == nullrev:
931 c.append(self.node(r))
931 c.append(self.node(r))
932 return c
932 return c
933
933
934 def descendant(self, start, end):
934 def descendant(self, start, end):
935 if start == nullrev:
935 if start == nullrev:
936 return True
936 return True
937 for i in self.descendants([start]):
937 for i in self.descendants([start]):
938 if i == end:
938 if i == end:
939 return True
939 return True
940 elif i > end:
940 elif i > end:
941 break
941 break
942 return False
942 return False
943
943
944 def commonancestorsheads(self, a, b):
944 def commonancestorsheads(self, a, b):
945 """calculate all the heads of the common ancestors of nodes a and b"""
945 """calculate all the heads of the common ancestors of nodes a and b"""
946 a, b = self.rev(a), self.rev(b)
946 a, b = self.rev(a), self.rev(b)
947 try:
947 try:
948 ancs = self.index.commonancestorsheads(a, b)
948 ancs = self.index.commonancestorsheads(a, b)
949 except (AttributeError, OverflowError): # C implementation failed
949 except (AttributeError, OverflowError): # C implementation failed
950 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
950 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
951 return map(self.node, ancs)
951 return map(self.node, ancs)
952
952
953 def isancestor(self, a, b):
953 def isancestor(self, a, b):
954 """return True if node a is an ancestor of node b
954 """return True if node a is an ancestor of node b
955
955
956 The implementation of this is trivial but the use of
956 The implementation of this is trivial but the use of
957 commonancestorsheads is not."""
957 commonancestorsheads is not."""
958 return a in self.commonancestorsheads(a, b)
958 return a in self.commonancestorsheads(a, b)
959
959
960 def ancestor(self, a, b):
960 def ancestor(self, a, b):
961 """calculate the "best" common ancestor of nodes a and b"""
961 """calculate the "best" common ancestor of nodes a and b"""
962
962
963 a, b = self.rev(a), self.rev(b)
963 a, b = self.rev(a), self.rev(b)
964 try:
964 try:
965 ancs = self.index.ancestors(a, b)
965 ancs = self.index.ancestors(a, b)
966 except (AttributeError, OverflowError):
966 except (AttributeError, OverflowError):
967 ancs = ancestor.ancestors(self.parentrevs, a, b)
967 ancs = ancestor.ancestors(self.parentrevs, a, b)
968 if ancs:
968 if ancs:
969 # choose a consistent winner when there's a tie
969 # choose a consistent winner when there's a tie
970 return min(map(self.node, ancs))
970 return min(map(self.node, ancs))
971 return nullid
971 return nullid
972
972
973 def _match(self, id):
973 def _match(self, id):
974 if isinstance(id, int):
974 if isinstance(id, int):
975 # rev
975 # rev
976 return self.node(id)
976 return self.node(id)
977 if len(id) == 20:
977 if len(id) == 20:
978 # possibly a binary node
978 # possibly a binary node
979 # odds of a binary node being all hex in ASCII are 1 in 10**25
979 # odds of a binary node being all hex in ASCII are 1 in 10**25
980 try:
980 try:
981 node = id
981 node = id
982 self.rev(node) # quick search the index
982 self.rev(node) # quick search the index
983 return node
983 return node
984 except LookupError:
984 except LookupError:
985 pass # may be partial hex id
985 pass # may be partial hex id
986 try:
986 try:
987 # str(rev)
987 # str(rev)
988 rev = int(id)
988 rev = int(id)
989 if str(rev) != id:
989 if str(rev) != id:
990 raise ValueError
990 raise ValueError
991 if rev < 0:
991 if rev < 0:
992 rev = len(self) + rev
992 rev = len(self) + rev
993 if rev < 0 or rev >= len(self):
993 if rev < 0 or rev >= len(self):
994 raise ValueError
994 raise ValueError
995 return self.node(rev)
995 return self.node(rev)
996 except (ValueError, OverflowError):
996 except (ValueError, OverflowError):
997 pass
997 pass
998 if len(id) == 40:
998 if len(id) == 40:
999 try:
999 try:
1000 # a full hex nodeid?
1000 # a full hex nodeid?
1001 node = bin(id)
1001 node = bin(id)
1002 self.rev(node)
1002 self.rev(node)
1003 return node
1003 return node
1004 except (TypeError, LookupError):
1004 except (TypeError, LookupError):
1005 pass
1005 pass
1006
1006
1007 def _partialmatch(self, id):
1007 def _partialmatch(self, id):
1008 try:
1008 try:
1009 partial = self.index.partialmatch(id)
1009 partial = self.index.partialmatch(id)
1010 if partial and self.hasnode(partial):
1010 if partial and self.hasnode(partial):
1011 return partial
1011 return partial
1012 return None
1012 return None
1013 except RevlogError:
1013 except RevlogError:
1014 # parsers.c radix tree lookup gave multiple matches
1014 # parsers.c radix tree lookup gave multiple matches
1015 # fast path: for unfiltered changelog, radix tree is accurate
1015 # fast path: for unfiltered changelog, radix tree is accurate
1016 if not getattr(self, 'filteredrevs', None):
1016 if not getattr(self, 'filteredrevs', None):
1017 raise LookupError(id, self.indexfile,
1017 raise LookupError(id, self.indexfile,
1018 _('ambiguous identifier'))
1018 _('ambiguous identifier'))
1019 # fall through to slow path that filters hidden revisions
1019 # fall through to slow path that filters hidden revisions
1020 except (AttributeError, ValueError):
1020 except (AttributeError, ValueError):
1021 # we are pure python, or key was too short to search radix tree
1021 # we are pure python, or key was too short to search radix tree
1022 pass
1022 pass
1023
1023
1024 if id in self._pcache:
1024 if id in self._pcache:
1025 return self._pcache[id]
1025 return self._pcache[id]
1026
1026
1027 if len(id) < 40:
1027 if len(id) < 40:
1028 try:
1028 try:
1029 # hex(node)[:...]
1029 # hex(node)[:...]
1030 l = len(id) // 2 # grab an even number of digits
1030 l = len(id) // 2 # grab an even number of digits
1031 prefix = bin(id[:l * 2])
1031 prefix = bin(id[:l * 2])
1032 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1032 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1033 nl = [n for n in nl if hex(n).startswith(id) and
1033 nl = [n for n in nl if hex(n).startswith(id) and
1034 self.hasnode(n)]
1034 self.hasnode(n)]
1035 if len(nl) > 0:
1035 if len(nl) > 0:
1036 if len(nl) == 1:
1036 if len(nl) == 1:
1037 self._pcache[id] = nl[0]
1037 self._pcache[id] = nl[0]
1038 return nl[0]
1038 return nl[0]
1039 raise LookupError(id, self.indexfile,
1039 raise LookupError(id, self.indexfile,
1040 _('ambiguous identifier'))
1040 _('ambiguous identifier'))
1041 return None
1041 return None
1042 except TypeError:
1042 except TypeError:
1043 pass
1043 pass
1044
1044
1045 def lookup(self, id):
1045 def lookup(self, id):
1046 """locate a node based on:
1046 """locate a node based on:
1047 - revision number or str(revision number)
1047 - revision number or str(revision number)
1048 - nodeid or subset of hex nodeid
1048 - nodeid or subset of hex nodeid
1049 """
1049 """
1050 n = self._match(id)
1050 n = self._match(id)
1051 if n is not None:
1051 if n is not None:
1052 return n
1052 return n
1053 n = self._partialmatch(id)
1053 n = self._partialmatch(id)
1054 if n:
1054 if n:
1055 return n
1055 return n
1056
1056
1057 raise LookupError(id, self.indexfile, _('no match found'))
1057 raise LookupError(id, self.indexfile, _('no match found'))
1058
1058
1059 def cmp(self, node, text):
1059 def cmp(self, node, text):
1060 """compare text with a given file revision
1060 """compare text with a given file revision
1061
1061
1062 returns True if text is different than what is stored.
1062 returns True if text is different than what is stored.
1063 """
1063 """
1064 p1, p2 = self.parents(node)
1064 p1, p2 = self.parents(node)
1065 return hash(text, p1, p2) != node
1065 return hash(text, p1, p2) != node
1066
1066
1067 def _addchunk(self, offset, data):
1067 def _addchunk(self, offset, data):
1068 """Add a segment to the revlog cache.
1068 """Add a segment to the revlog cache.
1069
1069
1070 Accepts an absolute offset and the data that is at that location.
1070 Accepts an absolute offset and the data that is at that location.
1071 """
1071 """
1072 o, d = self._chunkcache
1072 o, d = self._chunkcache
1073 # try to add to existing cache
1073 # try to add to existing cache
1074 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1074 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1075 self._chunkcache = o, d + data
1075 self._chunkcache = o, d + data
1076 else:
1076 else:
1077 self._chunkcache = offset, data
1077 self._chunkcache = offset, data
1078
1078
1079 def _loadchunk(self, offset, length, df=None):
1079 def _loadchunk(self, offset, length, df=None):
1080 """Load a segment of raw data from the revlog.
1080 """Load a segment of raw data from the revlog.
1081
1081
1082 Accepts an absolute offset, length to read, and an optional existing
1082 Accepts an absolute offset, length to read, and an optional existing
1083 file handle to read from.
1083 file handle to read from.
1084
1084
1085 If an existing file handle is passed, it will be seeked and the
1085 If an existing file handle is passed, it will be seeked and the
1086 original seek position will NOT be restored.
1086 original seek position will NOT be restored.
1087
1087
1088 Returns a str or buffer of raw byte data.
1088 Returns a str or buffer of raw byte data.
1089 """
1089 """
1090 if df is not None:
1090 if df is not None:
1091 closehandle = False
1091 closehandle = False
1092 else:
1092 else:
1093 if self._inline:
1093 if self._inline:
1094 df = self.opener(self.indexfile)
1094 df = self.opener(self.indexfile)
1095 else:
1095 else:
1096 df = self.opener(self.datafile)
1096 df = self.opener(self.datafile)
1097 closehandle = True
1097 closehandle = True
1098
1098
1099 # Cache data both forward and backward around the requested
1099 # Cache data both forward and backward around the requested
1100 # data, in a fixed size window. This helps speed up operations
1100 # data, in a fixed size window. This helps speed up operations
1101 # involving reading the revlog backwards.
1101 # involving reading the revlog backwards.
1102 cachesize = self._chunkcachesize
1102 cachesize = self._chunkcachesize
1103 realoffset = offset & ~(cachesize - 1)
1103 realoffset = offset & ~(cachesize - 1)
1104 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1104 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1105 - realoffset)
1105 - realoffset)
1106 df.seek(realoffset)
1106 df.seek(realoffset)
1107 d = df.read(reallength)
1107 d = df.read(reallength)
1108 if closehandle:
1108 if closehandle:
1109 df.close()
1109 df.close()
1110 self._addchunk(realoffset, d)
1110 self._addchunk(realoffset, d)
1111 if offset != realoffset or reallength != length:
1111 if offset != realoffset or reallength != length:
1112 return util.buffer(d, offset - realoffset, length)
1112 return util.buffer(d, offset - realoffset, length)
1113 return d
1113 return d
1114
1114
1115 def _getchunk(self, offset, length, df=None):
1115 def _getchunk(self, offset, length, df=None):
1116 """Obtain a segment of raw data from the revlog.
1116 """Obtain a segment of raw data from the revlog.
1117
1117
1118 Accepts an absolute offset, length of bytes to obtain, and an
1118 Accepts an absolute offset, length of bytes to obtain, and an
1119 optional file handle to the already-opened revlog. If the file
1119 optional file handle to the already-opened revlog. If the file
1120 handle is used, it's original seek position will not be preserved.
1120 handle is used, it's original seek position will not be preserved.
1121
1121
1122 Requests for data may be returned from a cache.
1122 Requests for data may be returned from a cache.
1123
1123
1124 Returns a str or a buffer instance of raw byte data.
1124 Returns a str or a buffer instance of raw byte data.
1125 """
1125 """
1126 o, d = self._chunkcache
1126 o, d = self._chunkcache
1127 l = len(d)
1127 l = len(d)
1128
1128
1129 # is it in the cache?
1129 # is it in the cache?
1130 cachestart = offset - o
1130 cachestart = offset - o
1131 cacheend = cachestart + length
1131 cacheend = cachestart + length
1132 if cachestart >= 0 and cacheend <= l:
1132 if cachestart >= 0 and cacheend <= l:
1133 if cachestart == 0 and cacheend == l:
1133 if cachestart == 0 and cacheend == l:
1134 return d # avoid a copy
1134 return d # avoid a copy
1135 return util.buffer(d, cachestart, cacheend - cachestart)
1135 return util.buffer(d, cachestart, cacheend - cachestart)
1136
1136
1137 return self._loadchunk(offset, length, df=df)
1137 return self._loadchunk(offset, length, df=df)
1138
1138
1139 def _chunkraw(self, startrev, endrev, df=None):
1139 def _chunkraw(self, startrev, endrev, df=None):
1140 """Obtain a segment of raw data corresponding to a range of revisions.
1140 """Obtain a segment of raw data corresponding to a range of revisions.
1141
1141
1142 Accepts the start and end revisions and an optional already-open
1142 Accepts the start and end revisions and an optional already-open
1143 file handle to be used for reading. If the file handle is read, its
1143 file handle to be used for reading. If the file handle is read, its
1144 seek position will not be preserved.
1144 seek position will not be preserved.
1145
1145
1146 Requests for data may be satisfied by a cache.
1146 Requests for data may be satisfied by a cache.
1147
1147
1148 Returns a 2-tuple of (offset, data) for the requested range of
1148 Returns a 2-tuple of (offset, data) for the requested range of
1149 revisions. Offset is the integer offset from the beginning of the
1149 revisions. Offset is the integer offset from the beginning of the
1150 revlog and data is a str or buffer of the raw byte data.
1150 revlog and data is a str or buffer of the raw byte data.
1151
1151
1152 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1152 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1153 to determine where each revision's data begins and ends.
1153 to determine where each revision's data begins and ends.
1154 """
1154 """
1155 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1155 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1156 # (functions are expensive).
1156 # (functions are expensive).
1157 index = self.index
1157 index = self.index
1158 istart = index[startrev]
1158 istart = index[startrev]
1159 start = int(istart[0] >> 16)
1159 start = int(istart[0] >> 16)
1160 if startrev == endrev:
1160 if startrev == endrev:
1161 end = start + istart[1]
1161 end = start + istart[1]
1162 else:
1162 else:
1163 iend = index[endrev]
1163 iend = index[endrev]
1164 end = int(iend[0] >> 16) + iend[1]
1164 end = int(iend[0] >> 16) + iend[1]
1165
1165
1166 if self._inline:
1166 if self._inline:
1167 start += (startrev + 1) * self._io.size
1167 start += (startrev + 1) * self._io.size
1168 end += (endrev + 1) * self._io.size
1168 end += (endrev + 1) * self._io.size
1169 length = end - start
1169 length = end - start
1170
1170
1171 return start, self._getchunk(start, length, df=df)
1171 return start, self._getchunk(start, length, df=df)
1172
1172
1173 def _chunk(self, rev, df=None):
1173 def _chunk(self, rev, df=None):
1174 """Obtain a single decompressed chunk for a revision.
1174 """Obtain a single decompressed chunk for a revision.
1175
1175
1176 Accepts an integer revision and an optional already-open file handle
1176 Accepts an integer revision and an optional already-open file handle
1177 to be used for reading. If used, the seek position of the file will not
1177 to be used for reading. If used, the seek position of the file will not
1178 be preserved.
1178 be preserved.
1179
1179
1180 Returns a str holding uncompressed data for the requested revision.
1180 Returns a str holding uncompressed data for the requested revision.
1181 """
1181 """
1182 return decompress(self._chunkraw(rev, rev, df=df)[1])
1182 return decompress(self._chunkraw(rev, rev, df=df)[1])
1183
1183
1184 def _chunks(self, revs, df=None):
1184 def _chunks(self, revs, df=None):
1185 """Obtain decompressed chunks for the specified revisions.
1185 """Obtain decompressed chunks for the specified revisions.
1186
1186
1187 Accepts an iterable of numeric revisions that are assumed to be in
1187 Accepts an iterable of numeric revisions that are assumed to be in
1188 ascending order. Also accepts an optional already-open file handle
1188 ascending order. Also accepts an optional already-open file handle
1189 to be used for reading. If used, the seek position of the file will
1189 to be used for reading. If used, the seek position of the file will
1190 not be preserved.
1190 not be preserved.
1191
1191
1192 This function is similar to calling ``self._chunk()`` multiple times,
1192 This function is similar to calling ``self._chunk()`` multiple times,
1193 but is faster.
1193 but is faster.
1194
1194
1195 Returns a list with decompressed data for each requested revision.
1195 Returns a list with decompressed data for each requested revision.
1196 """
1196 """
1197 if not revs:
1197 if not revs:
1198 return []
1198 return []
1199 start = self.start
1199 start = self.start
1200 length = self.length
1200 length = self.length
1201 inline = self._inline
1201 inline = self._inline
1202 iosize = self._io.size
1202 iosize = self._io.size
1203 buffer = util.buffer
1203 buffer = util.buffer
1204
1204
1205 l = []
1205 l = []
1206 ladd = l.append
1206 ladd = l.append
1207
1207
1208 try:
1208 try:
1209 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1209 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1210 except OverflowError:
1210 except OverflowError:
1211 # issue4215 - we can't cache a run of chunks greater than
1211 # issue4215 - we can't cache a run of chunks greater than
1212 # 2G on Windows
1212 # 2G on Windows
1213 return [self._chunk(rev, df=df) for rev in revs]
1213 return [self._chunk(rev, df=df) for rev in revs]
1214
1214
1215 for rev in revs:
1215 for rev in revs:
1216 chunkstart = start(rev)
1216 chunkstart = start(rev)
1217 if inline:
1217 if inline:
1218 chunkstart += (rev + 1) * iosize
1218 chunkstart += (rev + 1) * iosize
1219 chunklength = length(rev)
1219 chunklength = length(rev)
1220 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1220 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1221
1221
1222 return l
1222 return l
1223
1223
1224 def _chunkclear(self):
1224 def _chunkclear(self):
1225 """Clear the raw chunk cache."""
1225 """Clear the raw chunk cache."""
1226 self._chunkcache = (0, '')
1226 self._chunkcache = (0, '')
1227
1227
1228 def deltaparent(self, rev):
1228 def deltaparent(self, rev):
1229 """return deltaparent of the given revision"""
1229 """return deltaparent of the given revision"""
1230 base = self.index[rev][3]
1230 base = self.index[rev][3]
1231 if base == rev:
1231 if base == rev:
1232 return nullrev
1232 return nullrev
1233 elif self._generaldelta:
1233 elif self._generaldelta:
1234 return base
1234 return base
1235 else:
1235 else:
1236 return rev - 1
1236 return rev - 1
1237
1237
1238 def revdiff(self, rev1, rev2):
1238 def revdiff(self, rev1, rev2):
1239 """return or calculate a delta between two revisions"""
1239 """return or calculate a delta between two revisions"""
1240 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1240 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1241 return str(self._chunk(rev2))
1241 return str(self._chunk(rev2))
1242
1242
1243 return mdiff.textdiff(self.revision(rev1),
1243 return mdiff.textdiff(self.revision(rev1),
1244 self.revision(rev2))
1244 self.revision(rev2))
1245
1245
1246 def revision(self, nodeorrev, _df=None, raw=False):
1246 def revision(self, nodeorrev, _df=None, raw=False):
1247 """return an uncompressed revision of a given node or revision
1247 """return an uncompressed revision of a given node or revision
1248 number.
1248 number.
1249
1249
1250 _df - an existing file handle to read from. (internal-only)
1250 _df - an existing file handle to read from. (internal-only)
1251 raw - an optional argument specifying if the revision data is to be
1251 raw - an optional argument specifying if the revision data is to be
1252 treated as raw data when applying flag transforms. 'raw' should be set
1252 treated as raw data when applying flag transforms. 'raw' should be set
1253 to True when generating changegroups or in debug commands.
1253 to True when generating changegroups or in debug commands.
1254 """
1254 """
1255 if isinstance(nodeorrev, int):
1255 if isinstance(nodeorrev, int):
1256 rev = nodeorrev
1256 rev = nodeorrev
1257 node = self.node(rev)
1257 node = self.node(rev)
1258 else:
1258 else:
1259 node = nodeorrev
1259 node = nodeorrev
1260 rev = None
1260 rev = None
1261
1261
1262 cachedrev = None
1262 cachedrev = None
1263 if node == nullid:
1263 if node == nullid:
1264 return ""
1264 return ""
1265 if self._cache:
1265 if self._cache:
1266 if self._cache[0] == node:
1266 if self._cache[0] == node:
1267 return self._cache[2]
1267 return self._cache[2]
1268 cachedrev = self._cache[1]
1268 cachedrev = self._cache[1]
1269
1269
1270 # look up what we need to read
1270 # look up what we need to read
1271 text = None
1271 text = None
1272 if rev is None:
1272 if rev is None:
1273 rev = self.rev(node)
1273 rev = self.rev(node)
1274
1274
1275 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1275 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1276 if stopped:
1276 if stopped:
1277 text = self._cache[2]
1277 text = self._cache[2]
1278
1278
1279 # drop cache to save memory
1279 # drop cache to save memory
1280 self._cache = None
1280 self._cache = None
1281
1281
1282 bins = self._chunks(chain, df=_df)
1282 bins = self._chunks(chain, df=_df)
1283 if text is None:
1283 if text is None:
1284 text = str(bins[0])
1284 text = str(bins[0])
1285 bins = bins[1:]
1285 bins = bins[1:]
1286
1286
1287 text = mdiff.patches(text, bins)
1287 text = mdiff.patches(text, bins)
1288
1288
1289 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1289 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1290 raw=raw)
1290 raw=raw)
1291 if validatehash:
1291 if validatehash:
1292 self.checkhash(text, node, rev=rev)
1292 self.checkhash(text, node, rev=rev)
1293
1293
1294 self._cache = (node, rev, text)
1294 self._cache = (node, rev, text)
1295 return text
1295 return text
1296
1296
1297 def hash(self, text, p1, p2):
1297 def hash(self, text, p1, p2):
1298 """Compute a node hash.
1298 """Compute a node hash.
1299
1299
1300 Available as a function so that subclasses can replace the hash
1300 Available as a function so that subclasses can replace the hash
1301 as needed.
1301 as needed.
1302 """
1302 """
1303 return hash(text, p1, p2)
1303 return hash(text, p1, p2)
1304
1304
1305 def _processflags(self, text, flags, operation, raw=False):
1305 def _processflags(self, text, flags, operation, raw=False):
1306 """Inspect revision data flags and applies transforms defined by
1306 """Inspect revision data flags and applies transforms defined by
1307 registered flag processors.
1307 registered flag processors.
1308
1308
1309 ``text`` - the revision data to process
1309 ``text`` - the revision data to process
1310 ``flags`` - the revision flags
1310 ``flags`` - the revision flags
1311 ``operation`` - the operation being performed (read or write)
1311 ``operation`` - the operation being performed (read or write)
1312 ``raw`` - an optional argument describing if the raw transform should be
1312 ``raw`` - an optional argument describing if the raw transform should be
1313 applied.
1313 applied.
1314
1314
1315 This method processes the flags in the order (or reverse order if
1315 This method processes the flags in the order (or reverse order if
1316 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1316 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1317 flag processors registered for present flags. The order of flags defined
1317 flag processors registered for present flags. The order of flags defined
1318 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1318 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1319
1319
1320 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1320 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1321 processed text and ``validatehash`` is a bool indicating whether the
1321 processed text and ``validatehash`` is a bool indicating whether the
1322 returned text should be checked for hash integrity.
1322 returned text should be checked for hash integrity.
1323
1323
1324 Note: If the ``raw`` argument is set, it has precedence over the
1324 Note: If the ``raw`` argument is set, it has precedence over the
1325 operation and will only update the value of ``validatehash``.
1325 operation and will only update the value of ``validatehash``.
1326 """
1326 """
1327 if not operation in ('read', 'write'):
1327 if not operation in ('read', 'write'):
1328 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1328 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1329 # Check all flags are known.
1329 # Check all flags are known.
1330 if flags & ~REVIDX_KNOWN_FLAGS:
1330 if flags & ~REVIDX_KNOWN_FLAGS:
1331 raise RevlogError(_("incompatible revision flag '%#x'") %
1331 raise RevlogError(_("incompatible revision flag '%#x'") %
1332 (flags & ~REVIDX_KNOWN_FLAGS))
1332 (flags & ~REVIDX_KNOWN_FLAGS))
1333 validatehash = True
1333 validatehash = True
1334 # Depending on the operation (read or write), the order might be
1334 # Depending on the operation (read or write), the order might be
1335 # reversed due to non-commutative transforms.
1335 # reversed due to non-commutative transforms.
1336 orderedflags = REVIDX_FLAGS_ORDER
1336 orderedflags = REVIDX_FLAGS_ORDER
1337 if operation == 'write':
1337 if operation == 'write':
1338 orderedflags = reversed(orderedflags)
1338 orderedflags = reversed(orderedflags)
1339
1339
1340 for flag in orderedflags:
1340 for flag in orderedflags:
1341 # If a flagprocessor has been registered for a known flag, apply the
1341 # If a flagprocessor has been registered for a known flag, apply the
1342 # related operation transform and update result tuple.
1342 # related operation transform and update result tuple.
1343 if flag & flags:
1343 if flag & flags:
1344 vhash = True
1344 vhash = True
1345
1345
1346 if flag not in _flagprocessors:
1346 if flag not in _flagprocessors:
1347 message = _("missing processor for flag '%#x'") % (flag)
1347 message = _("missing processor for flag '%#x'") % (flag)
1348 raise RevlogError(message)
1348 raise RevlogError(message)
1349
1349
1350 processor = _flagprocessors[flag]
1350 processor = _flagprocessors[flag]
1351 if processor is not None:
1351 if processor is not None:
1352 readtransform, writetransform, rawtransform = processor
1352 readtransform, writetransform, rawtransform = processor
1353
1353
1354 if raw:
1354 if raw:
1355 vhash = rawtransform(self, text)
1355 vhash = rawtransform(self, text)
1356 elif operation == 'read':
1356 elif operation == 'read':
1357 text, vhash = readtransform(self, text)
1357 text, vhash = readtransform(self, text)
1358 else: # write operation
1358 else: # write operation
1359 text, vhash = writetransform(self, text)
1359 text, vhash = writetransform(self, text)
1360 validatehash = validatehash and vhash
1360 validatehash = validatehash and vhash
1361
1361
1362 return text, validatehash
1362 return text, validatehash
1363
1363
1364 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1364 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1365 """Check node hash integrity.
1365 """Check node hash integrity.
1366
1366
1367 Available as a function so that subclasses can extend hash mismatch
1367 Available as a function so that subclasses can extend hash mismatch
1368 behaviors as needed.
1368 behaviors as needed.
1369 """
1369 """
1370 if p1 is None and p2 is None:
1370 if p1 is None and p2 is None:
1371 p1, p2 = self.parents(node)
1371 p1, p2 = self.parents(node)
1372 if node != self.hash(text, p1, p2):
1372 if node != self.hash(text, p1, p2):
1373 revornode = rev
1373 revornode = rev
1374 if revornode is None:
1374 if revornode is None:
1375 revornode = templatefilters.short(hex(node))
1375 revornode = templatefilters.short(hex(node))
1376 raise RevlogError(_("integrity check failed on %s:%s")
1376 raise RevlogError(_("integrity check failed on %s:%s")
1377 % (self.indexfile, revornode))
1377 % (self.indexfile, revornode))
1378
1378
1379 def checkinlinesize(self, tr, fp=None):
1379 def checkinlinesize(self, tr, fp=None):
1380 """Check if the revlog is too big for inline and convert if so.
1380 """Check if the revlog is too big for inline and convert if so.
1381
1381
1382 This should be called after revisions are added to the revlog. If the
1382 This should be called after revisions are added to the revlog. If the
1383 revlog has grown too large to be an inline revlog, it will convert it
1383 revlog has grown too large to be an inline revlog, it will convert it
1384 to use multiple index and data files.
1384 to use multiple index and data files.
1385 """
1385 """
1386 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1386 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1387 return
1387 return
1388
1388
1389 trinfo = tr.find(self.indexfile)
1389 trinfo = tr.find(self.indexfile)
1390 if trinfo is None:
1390 if trinfo is None:
1391 raise RevlogError(_("%s not found in the transaction")
1391 raise RevlogError(_("%s not found in the transaction")
1392 % self.indexfile)
1392 % self.indexfile)
1393
1393
1394 trindex = trinfo[2]
1394 trindex = trinfo[2]
1395 if trindex is not None:
1395 if trindex is not None:
1396 dataoff = self.start(trindex)
1396 dataoff = self.start(trindex)
1397 else:
1397 else:
1398 # revlog was stripped at start of transaction, use all leftover data
1398 # revlog was stripped at start of transaction, use all leftover data
1399 trindex = len(self) - 1
1399 trindex = len(self) - 1
1400 dataoff = self.end(-2)
1400 dataoff = self.end(-2)
1401
1401
1402 tr.add(self.datafile, dataoff)
1402 tr.add(self.datafile, dataoff)
1403
1403
1404 if fp:
1404 if fp:
1405 fp.flush()
1405 fp.flush()
1406 fp.close()
1406 fp.close()
1407
1407
1408 df = self.opener(self.datafile, 'w')
1408 df = self.opener(self.datafile, 'w')
1409 try:
1409 try:
1410 for r in self:
1410 for r in self:
1411 df.write(self._chunkraw(r, r)[1])
1411 df.write(self._chunkraw(r, r)[1])
1412 finally:
1412 finally:
1413 df.close()
1413 df.close()
1414
1414
1415 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1415 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1416 checkambig=self._checkambig)
1416 checkambig=self._checkambig)
1417 self.version &= ~(REVLOGNGINLINEDATA)
1417 self.version &= ~(REVLOGNGINLINEDATA)
1418 self._inline = False
1418 self._inline = False
1419 for i in self:
1419 for i in self:
1420 e = self._io.packentry(self.index[i], self.node, self.version, i)
1420 e = self._io.packentry(self.index[i], self.node, self.version, i)
1421 fp.write(e)
1421 fp.write(e)
1422
1422
1423 # if we don't call close, the temp file will never replace the
1423 # if we don't call close, the temp file will never replace the
1424 # real index
1424 # real index
1425 fp.close()
1425 fp.close()
1426
1426
1427 tr.replace(self.indexfile, trindex * self._io.size)
1427 tr.replace(self.indexfile, trindex * self._io.size)
1428 self._chunkclear()
1428 self._chunkclear()
1429
1429
1430 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1430 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1431 node=None, flags=REVIDX_DEFAULT_FLAGS):
1431 node=None, flags=REVIDX_DEFAULT_FLAGS):
1432 """add a revision to the log
1432 """add a revision to the log
1433
1433
1434 text - the revision data to add
1434 text - the revision data to add
1435 transaction - the transaction object used for rollback
1435 transaction - the transaction object used for rollback
1436 link - the linkrev data to add
1436 link - the linkrev data to add
1437 p1, p2 - the parent nodeids of the revision
1437 p1, p2 - the parent nodeids of the revision
1438 cachedelta - an optional precomputed delta
1438 cachedelta - an optional precomputed delta
1439 node - nodeid of revision; typically node is not specified, and it is
1439 node - nodeid of revision; typically node is not specified, and it is
1440 computed by default as hash(text, p1, p2), however subclasses might
1440 computed by default as hash(text, p1, p2), however subclasses might
1441 use different hashing method (and override checkhash() in such case)
1441 use different hashing method (and override checkhash() in such case)
1442 flags - the known flags to set on the revision
1442 flags - the known flags to set on the revision
1443 """
1443 """
1444 if link == nullrev:
1444 if link == nullrev:
1445 raise RevlogError(_("attempted to add linkrev -1 to %s")
1445 raise RevlogError(_("attempted to add linkrev -1 to %s")
1446 % self.indexfile)
1446 % self.indexfile)
1447
1447
1448 if flags:
1448 if flags:
1449 node = node or self.hash(text, p1, p2)
1449 node = node or self.hash(text, p1, p2)
1450
1450
1451 newtext, validatehash = self._processflags(text, flags, 'write')
1451 newtext, validatehash = self._processflags(text, flags, 'write')
1452
1452
1453 # If the flag processor modifies the revision data, ignore any provided
1453 # If the flag processor modifies the revision data, ignore any provided
1454 # cachedelta.
1454 # cachedelta.
1455 if newtext != text:
1455 if newtext != text:
1456 cachedelta = None
1456 cachedelta = None
1457 text = newtext
1457 text = newtext
1458
1458
1459 if len(text) > _maxentrysize:
1459 if len(text) > _maxentrysize:
1460 raise RevlogError(
1460 raise RevlogError(
1461 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1461 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1462 % (self.indexfile, len(text)))
1462 % (self.indexfile, len(text)))
1463
1463
1464 node = node or self.hash(text, p1, p2)
1464 node = node or self.hash(text, p1, p2)
1465 if node in self.nodemap:
1465 if node in self.nodemap:
1466 return node
1466 return node
1467
1467
1468 if validatehash:
1468 if validatehash:
1469 self.checkhash(text, node, p1=p1, p2=p2)
1469 self.checkhash(text, node, p1=p1, p2=p2)
1470
1470
1471 dfh = None
1471 dfh = None
1472 if not self._inline:
1472 if not self._inline:
1473 dfh = self.opener(self.datafile, "a+")
1473 dfh = self.opener(self.datafile, "a+")
1474 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1474 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1475 try:
1475 try:
1476 return self._addrevision(node, text, transaction, link, p1, p2,
1476 return self._addrevision(node, text, transaction, link, p1, p2,
1477 flags, cachedelta, ifh, dfh)
1477 flags, cachedelta, ifh, dfh)
1478 finally:
1478 finally:
1479 if dfh:
1479 if dfh:
1480 dfh.close()
1480 dfh.close()
1481 ifh.close()
1481 ifh.close()
1482
1482
1483 def compress(self, text):
1483 def compress(self, text):
1484 """ generate a possibly-compressed representation of text """
1484 """ generate a possibly-compressed representation of text """
1485 if not text:
1485 if not text:
1486 return ("", text)
1486 return ("", text)
1487 l = len(text)
1487 l = len(text)
1488 bin = None
1488 bin = None
1489 if l < 44:
1489 if l < 44:
1490 pass
1490 pass
1491 elif l > 1000000:
1491 elif l > 1000000:
1492 # zlib makes an internal copy, thus doubling memory usage for
1492 # zlib makes an internal copy, thus doubling memory usage for
1493 # large files, so lets do this in pieces
1493 # large files, so lets do this in pieces
1494 z = zlib.compressobj()
1494 z = zlib.compressobj()
1495 p = []
1495 p = []
1496 pos = 0
1496 pos = 0
1497 while pos < l:
1497 while pos < l:
1498 pos2 = pos + 2**20
1498 pos2 = pos + 2**20
1499 p.append(z.compress(text[pos:pos2]))
1499 p.append(z.compress(text[pos:pos2]))
1500 pos = pos2
1500 pos = pos2
1501 p.append(z.flush())
1501 p.append(z.flush())
1502 if sum(map(len, p)) < l:
1502 if sum(map(len, p)) < l:
1503 bin = "".join(p)
1503 bin = "".join(p)
1504 else:
1504 else:
1505 bin = _compress(text)
1505 bin = _compress(text)
1506 if bin is None or len(bin) > l:
1506 if bin is None or len(bin) > l:
1507 if text[0] == '\0':
1507 if text[0] == '\0':
1508 return ("", text)
1508 return ("", text)
1509 return ('u', text)
1509 return ('u', text)
1510 return ("", bin)
1510 return ("", bin)
1511
1511
1512 def _isgooddelta(self, d, textlen):
1512 def _isgooddelta(self, d, textlen):
1513 """Returns True if the given delta is good. Good means that it is within
1513 """Returns True if the given delta is good. Good means that it is within
1514 the disk span, disk size, and chain length bounds that we know to be
1514 the disk span, disk size, and chain length bounds that we know to be
1515 performant."""
1515 performant."""
1516 if d is None:
1516 if d is None:
1517 return False
1517 return False
1518
1518
1519 # - 'dist' is the distance from the base revision -- bounding it limits
1519 # - 'dist' is the distance from the base revision -- bounding it limits
1520 # the amount of I/O we need to do.
1520 # the amount of I/O we need to do.
1521 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1521 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1522 # to apply -- bounding it limits the amount of CPU we consume.
1522 # to apply -- bounding it limits the amount of CPU we consume.
1523 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1523 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1524 if (dist > textlen * 4 or l > textlen or
1524 if (dist > textlen * 4 or l > textlen or
1525 compresseddeltalen > textlen * 2 or
1525 compresseddeltalen > textlen * 2 or
1526 (self._maxchainlen and chainlen > self._maxchainlen)):
1526 (self._maxchainlen and chainlen > self._maxchainlen)):
1527 return False
1527 return False
1528
1528
1529 return True
1529 return True
1530
1530
1531 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1531 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1532 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1532 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1533 """internal function to add revisions to the log
1533 """internal function to add revisions to the log
1534
1534
1535 see addrevision for argument descriptions.
1535 see addrevision for argument descriptions.
1536 invariants:
1536 invariants:
1537 - text is optional (can be None); if not set, cachedelta must be set.
1537 - text is optional (can be None); if not set, cachedelta must be set.
1538 if both are set, they must correspond to each other.
1538 if both are set, they must correspond to each other.
1539 - raw is optional; if set to True, it indicates the revision data is to
1539 - raw is optional; if set to True, it indicates the revision data is to
1540 be treated by _processflags() as raw. It is usually set by changegroup
1540 be treated by _processflags() as raw. It is usually set by changegroup
1541 generation and debug commands.
1541 generation and debug commands.
1542 """
1542 """
1543 btext = [text]
1543 btext = [text]
1544 def buildtext():
1544 def buildtext():
1545 if btext[0] is not None:
1545 if btext[0] is not None:
1546 return btext[0]
1546 return btext[0]
1547 baserev = cachedelta[0]
1547 baserev = cachedelta[0]
1548 delta = cachedelta[1]
1548 delta = cachedelta[1]
1549 # special case deltas which replace entire base; no need to decode
1549 # special case deltas which replace entire base; no need to decode
1550 # base revision. this neatly avoids censored bases, which throw when
1550 # base revision. this neatly avoids censored bases, which throw when
1551 # they're decoded.
1551 # they're decoded.
1552 hlen = struct.calcsize(">lll")
1552 hlen = struct.calcsize(">lll")
1553 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1553 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1554 len(delta) - hlen):
1554 len(delta) - hlen):
1555 btext[0] = delta[hlen:]
1555 btext[0] = delta[hlen:]
1556 else:
1556 else:
1557 if self._inline:
1557 if self._inline:
1558 fh = ifh
1558 fh = ifh
1559 else:
1559 else:
1560 fh = dfh
1560 fh = dfh
1561 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1561 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1562 btext[0] = mdiff.patch(basetext, delta)
1562 btext[0] = mdiff.patch(basetext, delta)
1563
1563
1564 try:
1564 try:
1565 res = self._processflags(btext[0], flags, 'read', raw=raw)
1565 res = self._processflags(btext[0], flags, 'read', raw=raw)
1566 btext[0], validatehash = res
1566 btext[0], validatehash = res
1567 if validatehash:
1567 if validatehash:
1568 self.checkhash(btext[0], node, p1=p1, p2=p2)
1568 self.checkhash(btext[0], node, p1=p1, p2=p2)
1569 if flags & REVIDX_ISCENSORED:
1569 if flags & REVIDX_ISCENSORED:
1570 raise RevlogError(_('node %s is not censored') % node)
1570 raise RevlogError(_('node %s is not censored') % node)
1571 except CensoredNodeError:
1571 except CensoredNodeError:
1572 # must pass the censored index flag to add censored revisions
1572 # must pass the censored index flag to add censored revisions
1573 if not flags & REVIDX_ISCENSORED:
1573 if not flags & REVIDX_ISCENSORED:
1574 raise
1574 raise
1575 return btext[0]
1575 return btext[0]
1576
1576
1577 def builddelta(rev):
1577 def builddelta(rev):
1578 # can we use the cached delta?
1578 # can we use the cached delta?
1579 if cachedelta and cachedelta[0] == rev:
1579 if cachedelta and cachedelta[0] == rev:
1580 delta = cachedelta[1]
1580 delta = cachedelta[1]
1581 else:
1581 else:
1582 t = buildtext()
1582 t = buildtext()
1583 if self.iscensored(rev):
1583 if self.iscensored(rev):
1584 # deltas based on a censored revision must replace the
1584 # deltas based on a censored revision must replace the
1585 # full content in one patch, so delta works everywhere
1585 # full content in one patch, so delta works everywhere
1586 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1586 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1587 delta = header + t
1587 delta = header + t
1588 else:
1588 else:
1589 if self._inline:
1589 if self._inline:
1590 fh = ifh
1590 fh = ifh
1591 else:
1591 else:
1592 fh = dfh
1592 fh = dfh
1593 ptext = self.revision(self.node(rev), _df=fh)
1593 ptext = self.revision(self.node(rev), _df=fh)
1594 delta = mdiff.textdiff(ptext, t)
1594 delta = mdiff.textdiff(ptext, t)
1595 header, data = self.compress(delta)
1595 header, data = self.compress(delta)
1596 deltalen = len(header) + len(data)
1596 deltalen = len(header) + len(data)
1597 chainbase = self.chainbase(rev)
1597 chainbase = self.chainbase(rev)
1598 dist = deltalen + offset - self.start(chainbase)
1598 dist = deltalen + offset - self.start(chainbase)
1599 if self._generaldelta:
1599 if self._generaldelta:
1600 base = rev
1600 base = rev
1601 else:
1601 else:
1602 base = chainbase
1602 base = chainbase
1603 chainlen, compresseddeltalen = self._chaininfo(rev)
1603 chainlen, compresseddeltalen = self._chaininfo(rev)
1604 chainlen += 1
1604 chainlen += 1
1605 compresseddeltalen += deltalen
1605 compresseddeltalen += deltalen
1606 return (dist, deltalen, (header, data), base,
1606 return (dist, deltalen, (header, data), base,
1607 chainbase, chainlen, compresseddeltalen)
1607 chainbase, chainlen, compresseddeltalen)
1608
1608
1609 curr = len(self)
1609 curr = len(self)
1610 prev = curr - 1
1610 prev = curr - 1
1611 offset = self.end(prev)
1611 offset = self.end(prev)
1612 delta = None
1612 delta = None
1613 p1r, p2r = self.rev(p1), self.rev(p2)
1613 p1r, p2r = self.rev(p1), self.rev(p2)
1614
1614
1615 # full versions are inserted when the needed deltas
1615 # full versions are inserted when the needed deltas
1616 # become comparable to the uncompressed text
1616 # become comparable to the uncompressed text
1617 if text is None:
1617 if text is None:
1618 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1618 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1619 cachedelta[1])
1619 cachedelta[1])
1620 else:
1620 else:
1621 textlen = len(text)
1621 textlen = len(text)
1622
1622
1623 # should we try to build a delta?
1623 # should we try to build a delta?
1624 if prev != nullrev and self.storedeltachains:
1624 if prev != nullrev and self.storedeltachains:
1625 tested = set()
1625 tested = set()
1626 # This condition is true most of the time when processing
1626 # This condition is true most of the time when processing
1627 # changegroup data into a generaldelta repo. The only time it
1627 # changegroup data into a generaldelta repo. The only time it
1628 # isn't true is if this is the first revision in a delta chain
1628 # isn't true is if this is the first revision in a delta chain
1629 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1629 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1630 if cachedelta and self._generaldelta and self._lazydeltabase:
1630 if cachedelta and self._generaldelta and self._lazydeltabase:
1631 # Assume what we received from the server is a good choice
1631 # Assume what we received from the server is a good choice
1632 # build delta will reuse the cache
1632 # build delta will reuse the cache
1633 candidatedelta = builddelta(cachedelta[0])
1633 candidatedelta = builddelta(cachedelta[0])
1634 tested.add(cachedelta[0])
1634 tested.add(cachedelta[0])
1635 if self._isgooddelta(candidatedelta, textlen):
1635 if self._isgooddelta(candidatedelta, textlen):
1636 delta = candidatedelta
1636 delta = candidatedelta
1637 if delta is None and self._generaldelta:
1637 if delta is None and self._generaldelta:
1638 # exclude already lazy tested base if any
1638 # exclude already lazy tested base if any
1639 parents = [p for p in (p1r, p2r)
1639 parents = [p for p in (p1r, p2r)
1640 if p != nullrev and p not in tested]
1640 if p != nullrev and p not in tested]
1641 if parents and not self._aggressivemergedeltas:
1641 if parents and not self._aggressivemergedeltas:
1642 # Pick whichever parent is closer to us (to minimize the
1642 # Pick whichever parent is closer to us (to minimize the
1643 # chance of having to build a fulltext).
1643 # chance of having to build a fulltext).
1644 parents = [max(parents)]
1644 parents = [max(parents)]
1645 tested.update(parents)
1645 tested.update(parents)
1646 pdeltas = []
1646 pdeltas = []
1647 for p in parents:
1647 for p in parents:
1648 pd = builddelta(p)
1648 pd = builddelta(p)
1649 if self._isgooddelta(pd, textlen):
1649 if self._isgooddelta(pd, textlen):
1650 pdeltas.append(pd)
1650 pdeltas.append(pd)
1651 if pdeltas:
1651 if pdeltas:
1652 delta = min(pdeltas, key=lambda x: x[1])
1652 delta = min(pdeltas, key=lambda x: x[1])
1653 if delta is None and prev not in tested:
1653 if delta is None and prev not in tested:
1654 # other approach failed try against prev to hopefully save us a
1654 # other approach failed try against prev to hopefully save us a
1655 # fulltext.
1655 # fulltext.
1656 candidatedelta = builddelta(prev)
1656 candidatedelta = builddelta(prev)
1657 if self._isgooddelta(candidatedelta, textlen):
1657 if self._isgooddelta(candidatedelta, textlen):
1658 delta = candidatedelta
1658 delta = candidatedelta
1659 if delta is not None:
1659 if delta is not None:
1660 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1660 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1661 else:
1661 else:
1662 text = buildtext()
1662 text = buildtext()
1663 data = self.compress(text)
1663 data = self.compress(text)
1664 l = len(data[1]) + len(data[0])
1664 l = len(data[1]) + len(data[0])
1665 base = chainbase = curr
1665 base = chainbase = curr
1666
1666
1667 e = (offset_type(offset, flags), l, textlen,
1667 e = (offset_type(offset, flags), l, textlen,
1668 base, link, p1r, p2r, node)
1668 base, link, p1r, p2r, node)
1669 self.index.insert(-1, e)
1669 self.index.insert(-1, e)
1670 self.nodemap[node] = curr
1670 self.nodemap[node] = curr
1671
1671
1672 entry = self._io.packentry(e, self.node, self.version, curr)
1672 entry = self._io.packentry(e, self.node, self.version, curr)
1673 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1673 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1674
1674
1675 if alwayscache and text is None:
1675 if alwayscache and text is None:
1676 text = buildtext()
1676 text = buildtext()
1677
1677
1678 if type(text) == str: # only accept immutable objects
1678 if type(text) == str: # only accept immutable objects
1679 self._cache = (node, curr, text)
1679 self._cache = (node, curr, text)
1680 self._chainbasecache[curr] = chainbase
1680 self._chainbasecache[curr] = chainbase
1681 return node
1681 return node
1682
1682
1683 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1683 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1684 # Files opened in a+ mode have inconsistent behavior on various
1684 # Files opened in a+ mode have inconsistent behavior on various
1685 # platforms. Windows requires that a file positioning call be made
1685 # platforms. Windows requires that a file positioning call be made
1686 # when the file handle transitions between reads and writes. See
1686 # when the file handle transitions between reads and writes. See
1687 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1687 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1688 # platforms, Python or the platform itself can be buggy. Some versions
1688 # platforms, Python or the platform itself can be buggy. Some versions
1689 # of Solaris have been observed to not append at the end of the file
1689 # of Solaris have been observed to not append at the end of the file
1690 # if the file was seeked to before the end. See issue4943 for more.
1690 # if the file was seeked to before the end. See issue4943 for more.
1691 #
1691 #
1692 # We work around this issue by inserting a seek() before writing.
1692 # We work around this issue by inserting a seek() before writing.
1693 # Note: This is likely not necessary on Python 3.
1693 # Note: This is likely not necessary on Python 3.
1694 ifh.seek(0, os.SEEK_END)
1694 ifh.seek(0, os.SEEK_END)
1695 if dfh:
1695 if dfh:
1696 dfh.seek(0, os.SEEK_END)
1696 dfh.seek(0, os.SEEK_END)
1697
1697
1698 curr = len(self) - 1
1698 curr = len(self) - 1
1699 if not self._inline:
1699 if not self._inline:
1700 transaction.add(self.datafile, offset)
1700 transaction.add(self.datafile, offset)
1701 transaction.add(self.indexfile, curr * len(entry))
1701 transaction.add(self.indexfile, curr * len(entry))
1702 if data[0]:
1702 if data[0]:
1703 dfh.write(data[0])
1703 dfh.write(data[0])
1704 dfh.write(data[1])
1704 dfh.write(data[1])
1705 ifh.write(entry)
1705 ifh.write(entry)
1706 else:
1706 else:
1707 offset += curr * self._io.size
1707 offset += curr * self._io.size
1708 transaction.add(self.indexfile, offset, curr)
1708 transaction.add(self.indexfile, offset, curr)
1709 ifh.write(entry)
1709 ifh.write(entry)
1710 ifh.write(data[0])
1710 ifh.write(data[0])
1711 ifh.write(data[1])
1711 ifh.write(data[1])
1712 self.checkinlinesize(transaction, ifh)
1712 self.checkinlinesize(transaction, ifh)
1713
1713
1714 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1714 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1715 """
1715 """
1716 add a delta group
1716 add a delta group
1717
1717
1718 given a set of deltas, add them to the revision log. the
1718 given a set of deltas, add them to the revision log. the
1719 first delta is against its parent, which should be in our
1719 first delta is against its parent, which should be in our
1720 log, the rest are against the previous delta.
1720 log, the rest are against the previous delta.
1721
1721
1722 If ``addrevisioncb`` is defined, it will be called with arguments of
1722 If ``addrevisioncb`` is defined, it will be called with arguments of
1723 this revlog and the node that was added.
1723 this revlog and the node that was added.
1724 """
1724 """
1725
1725
1726 # track the base of the current delta log
1726 # track the base of the current delta log
1727 content = []
1727 content = []
1728 node = None
1728 node = None
1729
1729
1730 r = len(self)
1730 r = len(self)
1731 end = 0
1731 end = 0
1732 if r:
1732 if r:
1733 end = self.end(r - 1)
1733 end = self.end(r - 1)
1734 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1734 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1735 isize = r * self._io.size
1735 isize = r * self._io.size
1736 if self._inline:
1736 if self._inline:
1737 transaction.add(self.indexfile, end + isize, r)
1737 transaction.add(self.indexfile, end + isize, r)
1738 dfh = None
1738 dfh = None
1739 else:
1739 else:
1740 transaction.add(self.indexfile, isize, r)
1740 transaction.add(self.indexfile, isize, r)
1741 transaction.add(self.datafile, end)
1741 transaction.add(self.datafile, end)
1742 dfh = self.opener(self.datafile, "a+")
1742 dfh = self.opener(self.datafile, "a+")
1743 def flush():
1743 def flush():
1744 if dfh:
1744 if dfh:
1745 dfh.flush()
1745 dfh.flush()
1746 ifh.flush()
1746 ifh.flush()
1747 try:
1747 try:
1748 # loop through our set of deltas
1748 # loop through our set of deltas
1749 chain = None
1749 chain = None
1750 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1750 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1751 node = chunkdata['node']
1751 node = chunkdata['node']
1752 p1 = chunkdata['p1']
1752 p1 = chunkdata['p1']
1753 p2 = chunkdata['p2']
1753 p2 = chunkdata['p2']
1754 cs = chunkdata['cs']
1754 cs = chunkdata['cs']
1755 deltabase = chunkdata['deltabase']
1755 deltabase = chunkdata['deltabase']
1756 delta = chunkdata['delta']
1756 delta = chunkdata['delta']
1757 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1757 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1758
1758
1759 content.append(node)
1759 content.append(node)
1760
1760
1761 link = linkmapper(cs)
1761 link = linkmapper(cs)
1762 if node in self.nodemap:
1762 if node in self.nodemap:
1763 # this can happen if two branches make the same change
1763 # this can happen if two branches make the same change
1764 chain = node
1764 chain = node
1765 continue
1765 continue
1766
1766
1767 for p in (p1, p2):
1767 for p in (p1, p2):
1768 if p not in self.nodemap:
1768 if p not in self.nodemap:
1769 raise LookupError(p, self.indexfile,
1769 raise LookupError(p, self.indexfile,
1770 _('unknown parent'))
1770 _('unknown parent'))
1771
1771
1772 if deltabase not in self.nodemap:
1772 if deltabase not in self.nodemap:
1773 raise LookupError(deltabase, self.indexfile,
1773 raise LookupError(deltabase, self.indexfile,
1774 _('unknown delta base'))
1774 _('unknown delta base'))
1775
1775
1776 baserev = self.rev(deltabase)
1776 baserev = self.rev(deltabase)
1777
1777
1778 if baserev != nullrev and self.iscensored(baserev):
1778 if baserev != nullrev and self.iscensored(baserev):
1779 # if base is censored, delta must be full replacement in a
1779 # if base is censored, delta must be full replacement in a
1780 # single patch operation
1780 # single patch operation
1781 hlen = struct.calcsize(">lll")
1781 hlen = struct.calcsize(">lll")
1782 oldlen = self.rawsize(baserev)
1782 oldlen = self.rawsize(baserev)
1783 newlen = len(delta) - hlen
1783 newlen = len(delta) - hlen
1784 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1784 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1785 raise error.CensoredBaseError(self.indexfile,
1785 raise error.CensoredBaseError(self.indexfile,
1786 self.node(baserev))
1786 self.node(baserev))
1787
1787
1788 if not flags and self._peek_iscensored(baserev, delta, flush):
1788 if not flags and self._peek_iscensored(baserev, delta, flush):
1789 flags |= REVIDX_ISCENSORED
1789 flags |= REVIDX_ISCENSORED
1790
1790
1791 # We assume consumers of addrevisioncb will want to retrieve
1791 # We assume consumers of addrevisioncb will want to retrieve
1792 # the added revision, which will require a call to
1792 # the added revision, which will require a call to
1793 # revision(). revision() will fast path if there is a cache
1793 # revision(). revision() will fast path if there is a cache
1794 # hit. So, we tell _addrevision() to always cache in this case.
1794 # hit. So, we tell _addrevision() to always cache in this case.
1795 # We're only using addgroup() in the context of changegroup
1795 # We're only using addgroup() in the context of changegroup
1796 # generation so the revision data can always be handled as raw
1796 # generation so the revision data can always be handled as raw
1797 # by the flagprocessor.
1797 # by the flagprocessor.
1798 chain = self._addrevision(node, None, transaction, link,
1798 chain = self._addrevision(node, None, transaction, link,
1799 p1, p2, flags, (baserev, delta),
1799 p1, p2, flags, (baserev, delta),
1800 ifh, dfh,
1800 ifh, dfh,
1801 alwayscache=bool(addrevisioncb),
1801 alwayscache=bool(addrevisioncb),
1802 raw=True)
1802 raw=True)
1803
1803
1804 if addrevisioncb:
1804 if addrevisioncb:
1805 addrevisioncb(self, chain)
1805 addrevisioncb(self, chain)
1806
1806
1807 if not dfh and not self._inline:
1807 if not dfh and not self._inline:
1808 # addrevision switched from inline to conventional
1808 # addrevision switched from inline to conventional
1809 # reopen the index
1809 # reopen the index
1810 ifh.close()
1810 ifh.close()
1811 dfh = self.opener(self.datafile, "a+")
1811 dfh = self.opener(self.datafile, "a+")
1812 ifh = self.opener(self.indexfile, "a+",
1812 ifh = self.opener(self.indexfile, "a+",
1813 checkambig=self._checkambig)
1813 checkambig=self._checkambig)
1814 finally:
1814 finally:
1815 if dfh:
1815 if dfh:
1816 dfh.close()
1816 dfh.close()
1817 ifh.close()
1817 ifh.close()
1818
1818
1819 return content
1819 return content
1820
1820
1821 def iscensored(self, rev):
1821 def iscensored(self, rev):
1822 """Check if a file revision is censored."""
1822 """Check if a file revision is censored."""
1823 return False
1823 return False
1824
1824
1825 def _peek_iscensored(self, baserev, delta, flush):
1825 def _peek_iscensored(self, baserev, delta, flush):
1826 """Quickly check if a delta produces a censored revision."""
1826 """Quickly check if a delta produces a censored revision."""
1827 return False
1827 return False
1828
1828
1829 def getstrippoint(self, minlink):
1829 def getstrippoint(self, minlink):
1830 """find the minimum rev that must be stripped to strip the linkrev
1830 """find the minimum rev that must be stripped to strip the linkrev
1831
1831
1832 Returns a tuple containing the minimum rev and a set of all revs that
1832 Returns a tuple containing the minimum rev and a set of all revs that
1833 have linkrevs that will be broken by this strip.
1833 have linkrevs that will be broken by this strip.
1834 """
1834 """
1835 brokenrevs = set()
1835 brokenrevs = set()
1836 strippoint = len(self)
1836 strippoint = len(self)
1837
1837
1838 heads = {}
1838 heads = {}
1839 futurelargelinkrevs = set()
1839 futurelargelinkrevs = set()
1840 for head in self.headrevs():
1840 for head in self.headrevs():
1841 headlinkrev = self.linkrev(head)
1841 headlinkrev = self.linkrev(head)
1842 heads[head] = headlinkrev
1842 heads[head] = headlinkrev
1843 if headlinkrev >= minlink:
1843 if headlinkrev >= minlink:
1844 futurelargelinkrevs.add(headlinkrev)
1844 futurelargelinkrevs.add(headlinkrev)
1845
1845
1846 # This algorithm involves walking down the rev graph, starting at the
1846 # This algorithm involves walking down the rev graph, starting at the
1847 # heads. Since the revs are topologically sorted according to linkrev,
1847 # heads. Since the revs are topologically sorted according to linkrev,
1848 # once all head linkrevs are below the minlink, we know there are
1848 # once all head linkrevs are below the minlink, we know there are
1849 # no more revs that could have a linkrev greater than minlink.
1849 # no more revs that could have a linkrev greater than minlink.
1850 # So we can stop walking.
1850 # So we can stop walking.
1851 while futurelargelinkrevs:
1851 while futurelargelinkrevs:
1852 strippoint -= 1
1852 strippoint -= 1
1853 linkrev = heads.pop(strippoint)
1853 linkrev = heads.pop(strippoint)
1854
1854
1855 if linkrev < minlink:
1855 if linkrev < minlink:
1856 brokenrevs.add(strippoint)
1856 brokenrevs.add(strippoint)
1857 else:
1857 else:
1858 futurelargelinkrevs.remove(linkrev)
1858 futurelargelinkrevs.remove(linkrev)
1859
1859
1860 for p in self.parentrevs(strippoint):
1860 for p in self.parentrevs(strippoint):
1861 if p != nullrev:
1861 if p != nullrev:
1862 plinkrev = self.linkrev(p)
1862 plinkrev = self.linkrev(p)
1863 heads[p] = plinkrev
1863 heads[p] = plinkrev
1864 if plinkrev >= minlink:
1864 if plinkrev >= minlink:
1865 futurelargelinkrevs.add(plinkrev)
1865 futurelargelinkrevs.add(plinkrev)
1866
1866
1867 return strippoint, brokenrevs
1867 return strippoint, brokenrevs
1868
1868
1869 def strip(self, minlink, transaction):
1869 def strip(self, minlink, transaction):
1870 """truncate the revlog on the first revision with a linkrev >= minlink
1870 """truncate the revlog on the first revision with a linkrev >= minlink
1871
1871
1872 This function is called when we're stripping revision minlink and
1872 This function is called when we're stripping revision minlink and
1873 its descendants from the repository.
1873 its descendants from the repository.
1874
1874
1875 We have to remove all revisions with linkrev >= minlink, because
1875 We have to remove all revisions with linkrev >= minlink, because
1876 the equivalent changelog revisions will be renumbered after the
1876 the equivalent changelog revisions will be renumbered after the
1877 strip.
1877 strip.
1878
1878
1879 So we truncate the revlog on the first of these revisions, and
1879 So we truncate the revlog on the first of these revisions, and
1880 trust that the caller has saved the revisions that shouldn't be
1880 trust that the caller has saved the revisions that shouldn't be
1881 removed and that it'll re-add them after this truncation.
1881 removed and that it'll re-add them after this truncation.
1882 """
1882 """
1883 if len(self) == 0:
1883 if len(self) == 0:
1884 return
1884 return
1885
1885
1886 rev, _ = self.getstrippoint(minlink)
1886 rev, _ = self.getstrippoint(minlink)
1887 if rev == len(self):
1887 if rev == len(self):
1888 return
1888 return
1889
1889
1890 # first truncate the files on disk
1890 # first truncate the files on disk
1891 end = self.start(rev)
1891 end = self.start(rev)
1892 if not self._inline:
1892 if not self._inline:
1893 transaction.add(self.datafile, end)
1893 transaction.add(self.datafile, end)
1894 end = rev * self._io.size
1894 end = rev * self._io.size
1895 else:
1895 else:
1896 end += rev * self._io.size
1896 end += rev * self._io.size
1897
1897
1898 transaction.add(self.indexfile, end)
1898 transaction.add(self.indexfile, end)
1899
1899
1900 # then reset internal state in memory to forget those revisions
1900 # then reset internal state in memory to forget those revisions
1901 self._cache = None
1901 self._cache = None
1902 self._chaininfocache = {}
1902 self._chaininfocache = {}
1903 self._chunkclear()
1903 self._chunkclear()
1904 for x in xrange(rev, len(self)):
1904 for x in xrange(rev, len(self)):
1905 del self.nodemap[self.node(x)]
1905 del self.nodemap[self.node(x)]
1906
1906
1907 del self.index[rev:-1]
1907 del self.index[rev:-1]
1908
1908
1909 def checksize(self):
1909 def checksize(self):
1910 expected = 0
1910 expected = 0
1911 if len(self):
1911 if len(self):
1912 expected = max(0, self.end(len(self) - 1))
1912 expected = max(0, self.end(len(self) - 1))
1913
1913
1914 try:
1914 try:
1915 f = self.opener(self.datafile)
1915 f = self.opener(self.datafile)
1916 f.seek(0, 2)
1916 f.seek(0, 2)
1917 actual = f.tell()
1917 actual = f.tell()
1918 f.close()
1918 f.close()
1919 dd = actual - expected
1919 dd = actual - expected
1920 except IOError as inst:
1920 except IOError as inst:
1921 if inst.errno != errno.ENOENT:
1921 if inst.errno != errno.ENOENT:
1922 raise
1922 raise
1923 dd = 0
1923 dd = 0
1924
1924
1925 try:
1925 try:
1926 f = self.opener(self.indexfile)
1926 f = self.opener(self.indexfile)
1927 f.seek(0, 2)
1927 f.seek(0, 2)
1928 actual = f.tell()
1928 actual = f.tell()
1929 f.close()
1929 f.close()
1930 s = self._io.size
1930 s = self._io.size
1931 i = max(0, actual // s)
1931 i = max(0, actual // s)
1932 di = actual - (i * s)
1932 di = actual - (i * s)
1933 if self._inline:
1933 if self._inline:
1934 databytes = 0
1934 databytes = 0
1935 for r in self:
1935 for r in self:
1936 databytes += max(0, self.length(r))
1936 databytes += max(0, self.length(r))
1937 dd = 0
1937 dd = 0
1938 di = actual - len(self) * s - databytes
1938 di = actual - len(self) * s - databytes
1939 except IOError as inst:
1939 except IOError as inst:
1940 if inst.errno != errno.ENOENT:
1940 if inst.errno != errno.ENOENT:
1941 raise
1941 raise
1942 di = 0
1942 di = 0
1943
1943
1944 return (dd, di)
1944 return (dd, di)
1945
1945
1946 def files(self):
1946 def files(self):
1947 res = [self.indexfile]
1947 res = [self.indexfile]
1948 if not self._inline:
1948 if not self._inline:
1949 res.append(self.datafile)
1949 res.append(self.datafile)
1950 return res
1950 return res
1951
1952 DELTAREUSEALWAYS = 'always'
1953 DELTAREUSESAMEREVS = 'samerevs'
1954 DELTAREUSENEVER = 'never'
1955
1956 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
1957
1958 def clone(self, tr, destrevlog, addrevisioncb=None,
1959 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
1960 """Copy this revlog to another, possibly with format changes.
1961
1962 The destination revlog will contain the same revisions and nodes.
1963 However, it may not be bit-for-bit identical due to e.g. delta encoding
1964 differences.
1965
1966 The ``deltareuse`` argument control how deltas from the existing revlog
1967 are preserved in the destination revlog. The argument can have the
1968 following values:
1969
1970 DELTAREUSEALWAYS
1971 Deltas will always be reused (if possible), even if the destination
1972 revlog would not select the same revisions for the delta. This is the
1973 fastest mode of operation.
1974 DELTAREUSESAMEREVS
1975 Deltas will be reused if the destination revlog would pick the same
1976 revisions for the delta. This mode strikes a balance between speed
1977 and optimization.
1978 DELTAREUSENEVER
1979 Deltas will never be reused. This is the slowest mode of execution.
1980 This mode can be used to recompute deltas (e.g. if the diff/delta
1981 algorithm changes).
1982
1983 Delta computation can be slow, so the choice of delta reuse policy can
1984 significantly affect run time.
1985
1986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
1987 two extremes. Deltas will be reused if they are appropriate. But if the
1988 delta could choose a better revision, it will do so. This means if you
1989 are converting a non-generaldelta revlog to a generaldelta revlog,
1990 deltas will be recomputed if the delta's parent isn't a parent of the
1991 revision.
1992
1993 In addition to the delta policy, the ``aggressivemergedeltas`` argument
1994 controls whether to compute deltas against both parents for merges.
1995 By default, the current default is used.
1996 """
1997 if deltareuse not in self.DELTAREUSEALL:
1998 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
1999
2000 if len(destrevlog):
2001 raise ValueError(_('destination revlog is not empty'))
2002
2003 if getattr(self, 'filteredrevs', None):
2004 raise ValueError(_('source revlog has filtered revisions'))
2005 if getattr(destrevlog, 'filteredrevs', None):
2006 raise ValueError(_('destination revlog has filtered revisions'))
2007
2008 # lazydeltabase controls whether to reuse a cached delta, if possible.
2009 oldlazydeltabase = destrevlog._lazydeltabase
2010 oldamd = destrevlog._aggressivemergedeltas
2011
2012 try:
2013 if deltareuse == self.DELTAREUSEALWAYS:
2014 destrevlog._lazydeltabase = True
2015 elif deltareuse == self.DELTAREUSESAMEREVS:
2016 destrevlog._lazydeltabase = False
2017
2018 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2019
2020 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2021 self.DELTAREUSESAMEREVS)
2022
2023 index = self.index
2024 for rev in self:
2025 entry = index[rev]
2026
2027 # Some classes override linkrev to take filtered revs into
2028 # account. Use raw entry from index.
2029 flags = entry[0] & 0xffff
2030 linkrev = entry[4]
2031 p1 = index[entry[5]][7]
2032 p2 = index[entry[6]][7]
2033 node = entry[7]
2034
2035 # (Possibly) reuse the delta from the revlog if allowed and
2036 # the revlog chunk is a delta.
2037 cachedelta = None
2038 text = None
2039 if populatecachedelta:
2040 dp = self.deltaparent(rev)
2041 if dp != nullrev:
2042 cachedelta = (dp, str(self._chunk(rev)))
2043
2044 if not cachedelta:
2045 text = self.revision(rev)
2046
2047 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2048 checkambig=False)
2049 dfh = None
2050 if not destrevlog._inline:
2051 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2052 try:
2053 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
2054 flags, cachedelta, ifh, dfh)
2055 finally:
2056 if dfh:
2057 dfh.close()
2058 ifh.close()
2059
2060 if addrevisioncb:
2061 addrevisioncb(self, rev, node)
2062 finally:
2063 destrevlog._lazydeltabase = oldlazydeltabase
2064 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now