##// END OF EJS Templates
revlog: tweak wording and logic for flags validation...
Gregory Szorc -
r32391:36d3559c default
parent child Browse files
Show More
@@ -1,2152 +1,2156 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import hashlib
18 import hashlib
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullid,
27 nullid,
28 nullrev,
28 nullrev,
29 )
29 )
30 from .i18n import _
30 from .i18n import _
31 from . import (
31 from . import (
32 ancestor,
32 ancestor,
33 error,
33 error,
34 mdiff,
34 mdiff,
35 policy,
35 policy,
36 pycompat,
36 pycompat,
37 templatefilters,
37 templatefilters,
38 util,
38 util,
39 )
39 )
40
40
41 parsers = policy.importmod(r'parsers')
41 parsers = policy.importmod(r'parsers')
42
42
43 _pack = struct.pack
43 _pack = struct.pack
44 _unpack = struct.unpack
44 _unpack = struct.unpack
45 # Aliased for performance.
45 # Aliased for performance.
46 _zlibdecompress = zlib.decompress
46 _zlibdecompress = zlib.decompress
47
47
48 # revlog header flags
48 # revlog header flags
49 REVLOGV0 = 0
49 REVLOGV0 = 0
50 REVLOGV1 = 1
50 REVLOGV1 = 1
51 FLAG_INLINE_DATA = (1 << 16)
51 FLAG_INLINE_DATA = (1 << 16)
52 FLAG_GENERALDELTA = (1 << 17)
52 FLAG_GENERALDELTA = (1 << 17)
53 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
53 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
54 REVLOG_DEFAULT_FORMAT = REVLOGV1
54 REVLOG_DEFAULT_FORMAT = REVLOGV1
55 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
55 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
56 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
56 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
57
57
58 # revlog index flags
58 # revlog index flags
59 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
59 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
60 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
60 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
61 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
61 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
62 REVIDX_DEFAULT_FLAGS = 0
62 REVIDX_DEFAULT_FLAGS = 0
63 # stable order in which flags need to be processed and their processors applied
63 # stable order in which flags need to be processed and their processors applied
64 REVIDX_FLAGS_ORDER = [
64 REVIDX_FLAGS_ORDER = [
65 REVIDX_ISCENSORED,
65 REVIDX_ISCENSORED,
66 REVIDX_ELLIPSIS,
66 REVIDX_ELLIPSIS,
67 REVIDX_EXTSTORED,
67 REVIDX_EXTSTORED,
68 ]
68 ]
69 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
69 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
70
70
71 # max size of revlog with inline data
71 # max size of revlog with inline data
72 _maxinline = 131072
72 _maxinline = 131072
73 _chunksize = 1048576
73 _chunksize = 1048576
74
74
75 RevlogError = error.RevlogError
75 RevlogError = error.RevlogError
76 LookupError = error.LookupError
76 LookupError = error.LookupError
77 CensoredNodeError = error.CensoredNodeError
77 CensoredNodeError = error.CensoredNodeError
78 ProgrammingError = error.ProgrammingError
78 ProgrammingError = error.ProgrammingError
79
79
80 # Store flag processors (cf. 'addflagprocessor()' to register)
80 # Store flag processors (cf. 'addflagprocessor()' to register)
81 _flagprocessors = {
81 _flagprocessors = {
82 REVIDX_ISCENSORED: None,
82 REVIDX_ISCENSORED: None,
83 }
83 }
84
84
85 def addflagprocessor(flag, processor):
85 def addflagprocessor(flag, processor):
86 """Register a flag processor on a revision data flag.
86 """Register a flag processor on a revision data flag.
87
87
88 Invariant:
88 Invariant:
89 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
89 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
90 - Only one flag processor can be registered on a specific flag.
90 - Only one flag processor can be registered on a specific flag.
91 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
91 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
92 following signatures:
92 following signatures:
93 - (read) f(self, rawtext) -> text, bool
93 - (read) f(self, rawtext) -> text, bool
94 - (write) f(self, text) -> rawtext, bool
94 - (write) f(self, text) -> rawtext, bool
95 - (raw) f(self, rawtext) -> bool
95 - (raw) f(self, rawtext) -> bool
96 "text" is presented to the user. "rawtext" is stored in revlog data, not
96 "text" is presented to the user. "rawtext" is stored in revlog data, not
97 directly visible to the user.
97 directly visible to the user.
98 The boolean returned by these transforms is used to determine whether
98 The boolean returned by these transforms is used to determine whether
99 the returned text can be used for hash integrity checking. For example,
99 the returned text can be used for hash integrity checking. For example,
100 if "write" returns False, then "text" is used to generate hash. If
100 if "write" returns False, then "text" is used to generate hash. If
101 "write" returns True, that basically means "rawtext" returned by "write"
101 "write" returns True, that basically means "rawtext" returned by "write"
102 should be used to generate hash. Usually, "write" and "read" return
102 should be used to generate hash. Usually, "write" and "read" return
103 different booleans. And "raw" returns a same boolean as "write".
103 different booleans. And "raw" returns a same boolean as "write".
104
104
105 Note: The 'raw' transform is used for changegroup generation and in some
105 Note: The 'raw' transform is used for changegroup generation and in some
106 debug commands. In this case the transform only indicates whether the
106 debug commands. In this case the transform only indicates whether the
107 contents can be used for hash integrity checks.
107 contents can be used for hash integrity checks.
108 """
108 """
109 if not flag & REVIDX_KNOWN_FLAGS:
109 if not flag & REVIDX_KNOWN_FLAGS:
110 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
110 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
111 raise ProgrammingError(msg)
111 raise ProgrammingError(msg)
112 if flag not in REVIDX_FLAGS_ORDER:
112 if flag not in REVIDX_FLAGS_ORDER:
113 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
113 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
114 raise ProgrammingError(msg)
114 raise ProgrammingError(msg)
115 if flag in _flagprocessors:
115 if flag in _flagprocessors:
116 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
116 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
117 raise error.Abort(msg)
117 raise error.Abort(msg)
118 _flagprocessors[flag] = processor
118 _flagprocessors[flag] = processor
119
119
120 def getoffset(q):
120 def getoffset(q):
121 return int(q >> 16)
121 return int(q >> 16)
122
122
123 def gettype(q):
123 def gettype(q):
124 return int(q & 0xFFFF)
124 return int(q & 0xFFFF)
125
125
126 def offset_type(offset, type):
126 def offset_type(offset, type):
127 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
127 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
128 raise ValueError('unknown revlog index flags')
128 raise ValueError('unknown revlog index flags')
129 return int(int(offset) << 16 | type)
129 return int(int(offset) << 16 | type)
130
130
131 _nullhash = hashlib.sha1(nullid)
131 _nullhash = hashlib.sha1(nullid)
132
132
133 def hash(text, p1, p2):
133 def hash(text, p1, p2):
134 """generate a hash from the given text and its parent hashes
134 """generate a hash from the given text and its parent hashes
135
135
136 This hash combines both the current file contents and its history
136 This hash combines both the current file contents and its history
137 in a manner that makes it easy to distinguish nodes with the same
137 in a manner that makes it easy to distinguish nodes with the same
138 content in the revision graph.
138 content in the revision graph.
139 """
139 """
140 # As of now, if one of the parent node is null, p2 is null
140 # As of now, if one of the parent node is null, p2 is null
141 if p2 == nullid:
141 if p2 == nullid:
142 # deep copy of a hash is faster than creating one
142 # deep copy of a hash is faster than creating one
143 s = _nullhash.copy()
143 s = _nullhash.copy()
144 s.update(p1)
144 s.update(p1)
145 else:
145 else:
146 # none of the parent nodes are nullid
146 # none of the parent nodes are nullid
147 l = [p1, p2]
147 l = [p1, p2]
148 l.sort()
148 l.sort()
149 s = hashlib.sha1(l[0])
149 s = hashlib.sha1(l[0])
150 s.update(l[1])
150 s.update(l[1])
151 s.update(text)
151 s.update(text)
152 return s.digest()
152 return s.digest()
153
153
154 # index v0:
154 # index v0:
155 # 4 bytes: offset
155 # 4 bytes: offset
156 # 4 bytes: compressed length
156 # 4 bytes: compressed length
157 # 4 bytes: base rev
157 # 4 bytes: base rev
158 # 4 bytes: link rev
158 # 4 bytes: link rev
159 # 20 bytes: parent 1 nodeid
159 # 20 bytes: parent 1 nodeid
160 # 20 bytes: parent 2 nodeid
160 # 20 bytes: parent 2 nodeid
161 # 20 bytes: nodeid
161 # 20 bytes: nodeid
162 indexformatv0 = ">4l20s20s20s"
162 indexformatv0 = ">4l20s20s20s"
163
163
164 class revlogoldio(object):
164 class revlogoldio(object):
165 def __init__(self):
165 def __init__(self):
166 self.size = struct.calcsize(indexformatv0)
166 self.size = struct.calcsize(indexformatv0)
167
167
168 def parseindex(self, data, inline):
168 def parseindex(self, data, inline):
169 s = self.size
169 s = self.size
170 index = []
170 index = []
171 nodemap = {nullid: nullrev}
171 nodemap = {nullid: nullrev}
172 n = off = 0
172 n = off = 0
173 l = len(data)
173 l = len(data)
174 while off + s <= l:
174 while off + s <= l:
175 cur = data[off:off + s]
175 cur = data[off:off + s]
176 off += s
176 off += s
177 e = _unpack(indexformatv0, cur)
177 e = _unpack(indexformatv0, cur)
178 # transform to revlogv1 format
178 # transform to revlogv1 format
179 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
179 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
180 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
180 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
181 index.append(e2)
181 index.append(e2)
182 nodemap[e[6]] = n
182 nodemap[e[6]] = n
183 n += 1
183 n += 1
184
184
185 # add the magic null revision at -1
185 # add the magic null revision at -1
186 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
186 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
187
187
188 return index, nodemap, None
188 return index, nodemap, None
189
189
190 def packentry(self, entry, node, version, rev):
190 def packentry(self, entry, node, version, rev):
191 if gettype(entry[0]):
191 if gettype(entry[0]):
192 raise RevlogError(_("index entry flags need RevlogNG"))
192 raise RevlogError(_("index entry flags need RevlogNG"))
193 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
193 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
194 node(entry[5]), node(entry[6]), entry[7])
194 node(entry[5]), node(entry[6]), entry[7])
195 return _pack(indexformatv0, *e2)
195 return _pack(indexformatv0, *e2)
196
196
197 # index ng:
197 # index ng:
198 # 6 bytes: offset
198 # 6 bytes: offset
199 # 2 bytes: flags
199 # 2 bytes: flags
200 # 4 bytes: compressed length
200 # 4 bytes: compressed length
201 # 4 bytes: uncompressed length
201 # 4 bytes: uncompressed length
202 # 4 bytes: base rev
202 # 4 bytes: base rev
203 # 4 bytes: link rev
203 # 4 bytes: link rev
204 # 4 bytes: parent 1 rev
204 # 4 bytes: parent 1 rev
205 # 4 bytes: parent 2 rev
205 # 4 bytes: parent 2 rev
206 # 32 bytes: nodeid
206 # 32 bytes: nodeid
207 indexformatng = ">Qiiiiii20s12x"
207 indexformatng = ">Qiiiiii20s12x"
208 versionformat = ">I"
208 versionformat = ">I"
209
209
210 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
210 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
211 # signed integer)
211 # signed integer)
212 _maxentrysize = 0x7fffffff
212 _maxentrysize = 0x7fffffff
213
213
214 class revlogio(object):
214 class revlogio(object):
215 def __init__(self):
215 def __init__(self):
216 self.size = struct.calcsize(indexformatng)
216 self.size = struct.calcsize(indexformatng)
217
217
218 def parseindex(self, data, inline):
218 def parseindex(self, data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, getattr(index, 'nodemap', None), cache
221 return index, getattr(index, 'nodemap', None), cache
222
222
223 def packentry(self, entry, node, version, rev):
223 def packentry(self, entry, node, version, rev):
224 p = _pack(indexformatng, *entry)
224 p = _pack(indexformatng, *entry)
225 if rev == 0:
225 if rev == 0:
226 p = _pack(versionformat, version) + p[4:]
226 p = _pack(versionformat, version) + p[4:]
227 return p
227 return p
228
228
229 class revlog(object):
229 class revlog(object):
230 """
230 """
231 the underlying revision storage object
231 the underlying revision storage object
232
232
233 A revlog consists of two parts, an index and the revision data.
233 A revlog consists of two parts, an index and the revision data.
234
234
235 The index is a file with a fixed record size containing
235 The index is a file with a fixed record size containing
236 information on each revision, including its nodeid (hash), the
236 information on each revision, including its nodeid (hash), the
237 nodeids of its parents, the position and offset of its data within
237 nodeids of its parents, the position and offset of its data within
238 the data file, and the revision it's based on. Finally, each entry
238 the data file, and the revision it's based on. Finally, each entry
239 contains a linkrev entry that can serve as a pointer to external
239 contains a linkrev entry that can serve as a pointer to external
240 data.
240 data.
241
241
242 The revision data itself is a linear collection of data chunks.
242 The revision data itself is a linear collection of data chunks.
243 Each chunk represents a revision and is usually represented as a
243 Each chunk represents a revision and is usually represented as a
244 delta against the previous chunk. To bound lookup time, runs of
244 delta against the previous chunk. To bound lookup time, runs of
245 deltas are limited to about 2 times the length of the original
245 deltas are limited to about 2 times the length of the original
246 version data. This makes retrieval of a version proportional to
246 version data. This makes retrieval of a version proportional to
247 its size, or O(1) relative to the number of revisions.
247 its size, or O(1) relative to the number of revisions.
248
248
249 Both pieces of the revlog are written to in an append-only
249 Both pieces of the revlog are written to in an append-only
250 fashion, which means we never need to rewrite a file to insert or
250 fashion, which means we never need to rewrite a file to insert or
251 remove data, and can use some simple techniques to avoid the need
251 remove data, and can use some simple techniques to avoid the need
252 for locking while reading.
252 for locking while reading.
253
253
254 If checkambig, indexfile is opened with checkambig=True at
254 If checkambig, indexfile is opened with checkambig=True at
255 writing, to avoid file stat ambiguity.
255 writing, to avoid file stat ambiguity.
256 """
256 """
257 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
257 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
258 """
258 """
259 create a revlog object
259 create a revlog object
260
260
261 opener is a function that abstracts the file opening operation
261 opener is a function that abstracts the file opening operation
262 and can be used to implement COW semantics or the like.
262 and can be used to implement COW semantics or the like.
263 """
263 """
264 self.indexfile = indexfile
264 self.indexfile = indexfile
265 self.datafile = datafile or (indexfile[:-2] + ".d")
265 self.datafile = datafile or (indexfile[:-2] + ".d")
266 self.opener = opener
266 self.opener = opener
267 # When True, indexfile is opened with checkambig=True at writing, to
267 # When True, indexfile is opened with checkambig=True at writing, to
268 # avoid file stat ambiguity.
268 # avoid file stat ambiguity.
269 self._checkambig = checkambig
269 self._checkambig = checkambig
270 # 3-tuple of (node, rev, text) for a raw revision.
270 # 3-tuple of (node, rev, text) for a raw revision.
271 self._cache = None
271 self._cache = None
272 # Maps rev to chain base rev.
272 # Maps rev to chain base rev.
273 self._chainbasecache = util.lrucachedict(100)
273 self._chainbasecache = util.lrucachedict(100)
274 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
274 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
275 self._chunkcache = (0, '')
275 self._chunkcache = (0, '')
276 # How much data to read and cache into the raw revlog data cache.
276 # How much data to read and cache into the raw revlog data cache.
277 self._chunkcachesize = 65536
277 self._chunkcachesize = 65536
278 self._maxchainlen = None
278 self._maxchainlen = None
279 self._aggressivemergedeltas = False
279 self._aggressivemergedeltas = False
280 self.index = []
280 self.index = []
281 # Mapping of partial identifiers to full nodes.
281 # Mapping of partial identifiers to full nodes.
282 self._pcache = {}
282 self._pcache = {}
283 # Mapping of revision integer to full node.
283 # Mapping of revision integer to full node.
284 self._nodecache = {nullid: nullrev}
284 self._nodecache = {nullid: nullrev}
285 self._nodepos = None
285 self._nodepos = None
286 self._compengine = 'zlib'
286 self._compengine = 'zlib'
287
287
288 v = REVLOG_DEFAULT_VERSION
288 v = REVLOG_DEFAULT_VERSION
289 opts = getattr(opener, 'options', None)
289 opts = getattr(opener, 'options', None)
290 if opts is not None:
290 if opts is not None:
291 if 'revlogv1' in opts:
291 if 'revlogv1' in opts:
292 if 'generaldelta' in opts:
292 if 'generaldelta' in opts:
293 v |= FLAG_GENERALDELTA
293 v |= FLAG_GENERALDELTA
294 else:
294 else:
295 v = 0
295 v = 0
296 if 'chunkcachesize' in opts:
296 if 'chunkcachesize' in opts:
297 self._chunkcachesize = opts['chunkcachesize']
297 self._chunkcachesize = opts['chunkcachesize']
298 if 'maxchainlen' in opts:
298 if 'maxchainlen' in opts:
299 self._maxchainlen = opts['maxchainlen']
299 self._maxchainlen = opts['maxchainlen']
300 if 'aggressivemergedeltas' in opts:
300 if 'aggressivemergedeltas' in opts:
301 self._aggressivemergedeltas = opts['aggressivemergedeltas']
301 self._aggressivemergedeltas = opts['aggressivemergedeltas']
302 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
302 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
303 if 'compengine' in opts:
303 if 'compengine' in opts:
304 self._compengine = opts['compengine']
304 self._compengine = opts['compengine']
305
305
306 if self._chunkcachesize <= 0:
306 if self._chunkcachesize <= 0:
307 raise RevlogError(_('revlog chunk cache size %r is not greater '
307 raise RevlogError(_('revlog chunk cache size %r is not greater '
308 'than 0') % self._chunkcachesize)
308 'than 0') % self._chunkcachesize)
309 elif self._chunkcachesize & (self._chunkcachesize - 1):
309 elif self._chunkcachesize & (self._chunkcachesize - 1):
310 raise RevlogError(_('revlog chunk cache size %r is not a power '
310 raise RevlogError(_('revlog chunk cache size %r is not a power '
311 'of 2') % self._chunkcachesize)
311 'of 2') % self._chunkcachesize)
312
312
313 indexdata = ''
313 indexdata = ''
314 self._initempty = True
314 self._initempty = True
315 try:
315 try:
316 f = self.opener(self.indexfile)
316 f = self.opener(self.indexfile)
317 indexdata = f.read()
317 indexdata = f.read()
318 f.close()
318 f.close()
319 if len(indexdata) > 0:
319 if len(indexdata) > 0:
320 v = struct.unpack(versionformat, indexdata[:4])[0]
320 v = struct.unpack(versionformat, indexdata[:4])[0]
321 self._initempty = False
321 self._initempty = False
322 except IOError as inst:
322 except IOError as inst:
323 if inst.errno != errno.ENOENT:
323 if inst.errno != errno.ENOENT:
324 raise
324 raise
325
325
326 self.version = v
326 self.version = v
327 self._inline = v & FLAG_INLINE_DATA
327 self._inline = v & FLAG_INLINE_DATA
328 self._generaldelta = v & FLAG_GENERALDELTA
328 self._generaldelta = v & FLAG_GENERALDELTA
329 flags = v & ~0xFFFF
329 flags = v & ~0xFFFF
330 fmt = v & 0xFFFF
330 fmt = v & 0xFFFF
331 if fmt == REVLOGV0 and flags:
331 if fmt == REVLOGV0:
332 raise RevlogError(_("index %s unknown flags %#04x for format v0")
332 if flags:
333 % (self.indexfile, flags >> 16))
333 raise RevlogError(_('unknown flags (%#04x) in version %d '
334 elif fmt == REVLOGV1 and flags & ~REVLOGV1_FLAGS:
334 'revlog %s') %
335 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
335 (flags >> 16, fmt, self.indexfile))
336 % (self.indexfile, flags >> 16))
336 elif fmt == REVLOGV1:
337 elif fmt > REVLOGV1:
337 if flags & ~REVLOGV1_FLAGS:
338 raise RevlogError(_("index %s unknown format %d")
338 raise RevlogError(_('unknown flags (%#04x) in version %d '
339 % (self.indexfile, fmt))
339 'revlog %s') %
340 (flags >> 16, fmt, self.indexfile))
341 else:
342 raise RevlogError(_('unknown version (%d) in revlog %s') %
343 (fmt, self.indexfile))
340
344
341 self.storedeltachains = True
345 self.storedeltachains = True
342
346
343 self._io = revlogio()
347 self._io = revlogio()
344 if self.version == REVLOGV0:
348 if self.version == REVLOGV0:
345 self._io = revlogoldio()
349 self._io = revlogoldio()
346 try:
350 try:
347 d = self._io.parseindex(indexdata, self._inline)
351 d = self._io.parseindex(indexdata, self._inline)
348 except (ValueError, IndexError):
352 except (ValueError, IndexError):
349 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
353 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
350 self.index, nodemap, self._chunkcache = d
354 self.index, nodemap, self._chunkcache = d
351 if nodemap is not None:
355 if nodemap is not None:
352 self.nodemap = self._nodecache = nodemap
356 self.nodemap = self._nodecache = nodemap
353 if not self._chunkcache:
357 if not self._chunkcache:
354 self._chunkclear()
358 self._chunkclear()
355 # revnum -> (chain-length, sum-delta-length)
359 # revnum -> (chain-length, sum-delta-length)
356 self._chaininfocache = {}
360 self._chaininfocache = {}
357 # revlog header -> revlog compressor
361 # revlog header -> revlog compressor
358 self._decompressors = {}
362 self._decompressors = {}
359
363
360 @util.propertycache
364 @util.propertycache
361 def _compressor(self):
365 def _compressor(self):
362 return util.compengines[self._compengine].revlogcompressor()
366 return util.compengines[self._compengine].revlogcompressor()
363
367
364 def tip(self):
368 def tip(self):
365 return self.node(len(self.index) - 2)
369 return self.node(len(self.index) - 2)
366 def __contains__(self, rev):
370 def __contains__(self, rev):
367 return 0 <= rev < len(self)
371 return 0 <= rev < len(self)
368 def __len__(self):
372 def __len__(self):
369 return len(self.index) - 1
373 return len(self.index) - 1
370 def __iter__(self):
374 def __iter__(self):
371 return iter(xrange(len(self)))
375 return iter(xrange(len(self)))
372 def revs(self, start=0, stop=None):
376 def revs(self, start=0, stop=None):
373 """iterate over all rev in this revlog (from start to stop)"""
377 """iterate over all rev in this revlog (from start to stop)"""
374 step = 1
378 step = 1
375 if stop is not None:
379 if stop is not None:
376 if start > stop:
380 if start > stop:
377 step = -1
381 step = -1
378 stop += step
382 stop += step
379 else:
383 else:
380 stop = len(self)
384 stop = len(self)
381 return xrange(start, stop, step)
385 return xrange(start, stop, step)
382
386
383 @util.propertycache
387 @util.propertycache
384 def nodemap(self):
388 def nodemap(self):
385 self.rev(self.node(0))
389 self.rev(self.node(0))
386 return self._nodecache
390 return self._nodecache
387
391
388 def hasnode(self, node):
392 def hasnode(self, node):
389 try:
393 try:
390 self.rev(node)
394 self.rev(node)
391 return True
395 return True
392 except KeyError:
396 except KeyError:
393 return False
397 return False
394
398
395 def clearcaches(self):
399 def clearcaches(self):
396 self._cache = None
400 self._cache = None
397 self._chainbasecache.clear()
401 self._chainbasecache.clear()
398 self._chunkcache = (0, '')
402 self._chunkcache = (0, '')
399 self._pcache = {}
403 self._pcache = {}
400
404
401 try:
405 try:
402 self._nodecache.clearcaches()
406 self._nodecache.clearcaches()
403 except AttributeError:
407 except AttributeError:
404 self._nodecache = {nullid: nullrev}
408 self._nodecache = {nullid: nullrev}
405 self._nodepos = None
409 self._nodepos = None
406
410
407 def rev(self, node):
411 def rev(self, node):
408 try:
412 try:
409 return self._nodecache[node]
413 return self._nodecache[node]
410 except TypeError:
414 except TypeError:
411 raise
415 raise
412 except RevlogError:
416 except RevlogError:
413 # parsers.c radix tree lookup failed
417 # parsers.c radix tree lookup failed
414 raise LookupError(node, self.indexfile, _('no node'))
418 raise LookupError(node, self.indexfile, _('no node'))
415 except KeyError:
419 except KeyError:
416 # pure python cache lookup failed
420 # pure python cache lookup failed
417 n = self._nodecache
421 n = self._nodecache
418 i = self.index
422 i = self.index
419 p = self._nodepos
423 p = self._nodepos
420 if p is None:
424 if p is None:
421 p = len(i) - 2
425 p = len(i) - 2
422 for r in xrange(p, -1, -1):
426 for r in xrange(p, -1, -1):
423 v = i[r][7]
427 v = i[r][7]
424 n[v] = r
428 n[v] = r
425 if v == node:
429 if v == node:
426 self._nodepos = r - 1
430 self._nodepos = r - 1
427 return r
431 return r
428 raise LookupError(node, self.indexfile, _('no node'))
432 raise LookupError(node, self.indexfile, _('no node'))
429
433
430 # Accessors for index entries.
434 # Accessors for index entries.
431
435
432 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
436 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
433 # are flags.
437 # are flags.
434 def start(self, rev):
438 def start(self, rev):
435 return int(self.index[rev][0] >> 16)
439 return int(self.index[rev][0] >> 16)
436
440
437 def flags(self, rev):
441 def flags(self, rev):
438 return self.index[rev][0] & 0xFFFF
442 return self.index[rev][0] & 0xFFFF
439
443
440 def length(self, rev):
444 def length(self, rev):
441 return self.index[rev][1]
445 return self.index[rev][1]
442
446
443 def rawsize(self, rev):
447 def rawsize(self, rev):
444 """return the length of the uncompressed text for a given revision"""
448 """return the length of the uncompressed text for a given revision"""
445 l = self.index[rev][2]
449 l = self.index[rev][2]
446 if l >= 0:
450 if l >= 0:
447 return l
451 return l
448
452
449 t = self.revision(rev, raw=True)
453 t = self.revision(rev, raw=True)
450 return len(t)
454 return len(t)
451
455
452 def size(self, rev):
456 def size(self, rev):
453 """length of non-raw text (processed by a "read" flag processor)"""
457 """length of non-raw text (processed by a "read" flag processor)"""
454 # fast path: if no "read" flag processor could change the content,
458 # fast path: if no "read" flag processor could change the content,
455 # size is rawsize. note: ELLIPSIS is known to not change the content.
459 # size is rawsize. note: ELLIPSIS is known to not change the content.
456 flags = self.flags(rev)
460 flags = self.flags(rev)
457 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
461 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
458 return self.rawsize(rev)
462 return self.rawsize(rev)
459
463
460 return len(self.revision(rev, raw=False))
464 return len(self.revision(rev, raw=False))
461
465
462 def chainbase(self, rev):
466 def chainbase(self, rev):
463 base = self._chainbasecache.get(rev)
467 base = self._chainbasecache.get(rev)
464 if base is not None:
468 if base is not None:
465 return base
469 return base
466
470
467 index = self.index
471 index = self.index
468 base = index[rev][3]
472 base = index[rev][3]
469 while base != rev:
473 while base != rev:
470 rev = base
474 rev = base
471 base = index[rev][3]
475 base = index[rev][3]
472
476
473 self._chainbasecache[rev] = base
477 self._chainbasecache[rev] = base
474 return base
478 return base
475
479
476 def linkrev(self, rev):
480 def linkrev(self, rev):
477 return self.index[rev][4]
481 return self.index[rev][4]
478
482
479 def parentrevs(self, rev):
483 def parentrevs(self, rev):
480 return self.index[rev][5:7]
484 return self.index[rev][5:7]
481
485
482 def node(self, rev):
486 def node(self, rev):
483 return self.index[rev][7]
487 return self.index[rev][7]
484
488
485 # Derived from index values.
489 # Derived from index values.
486
490
487 def end(self, rev):
491 def end(self, rev):
488 return self.start(rev) + self.length(rev)
492 return self.start(rev) + self.length(rev)
489
493
490 def parents(self, node):
494 def parents(self, node):
491 i = self.index
495 i = self.index
492 d = i[self.rev(node)]
496 d = i[self.rev(node)]
493 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
497 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
494
498
495 def chainlen(self, rev):
499 def chainlen(self, rev):
496 return self._chaininfo(rev)[0]
500 return self._chaininfo(rev)[0]
497
501
498 def _chaininfo(self, rev):
502 def _chaininfo(self, rev):
499 chaininfocache = self._chaininfocache
503 chaininfocache = self._chaininfocache
500 if rev in chaininfocache:
504 if rev in chaininfocache:
501 return chaininfocache[rev]
505 return chaininfocache[rev]
502 index = self.index
506 index = self.index
503 generaldelta = self._generaldelta
507 generaldelta = self._generaldelta
504 iterrev = rev
508 iterrev = rev
505 e = index[iterrev]
509 e = index[iterrev]
506 clen = 0
510 clen = 0
507 compresseddeltalen = 0
511 compresseddeltalen = 0
508 while iterrev != e[3]:
512 while iterrev != e[3]:
509 clen += 1
513 clen += 1
510 compresseddeltalen += e[1]
514 compresseddeltalen += e[1]
511 if generaldelta:
515 if generaldelta:
512 iterrev = e[3]
516 iterrev = e[3]
513 else:
517 else:
514 iterrev -= 1
518 iterrev -= 1
515 if iterrev in chaininfocache:
519 if iterrev in chaininfocache:
516 t = chaininfocache[iterrev]
520 t = chaininfocache[iterrev]
517 clen += t[0]
521 clen += t[0]
518 compresseddeltalen += t[1]
522 compresseddeltalen += t[1]
519 break
523 break
520 e = index[iterrev]
524 e = index[iterrev]
521 else:
525 else:
522 # Add text length of base since decompressing that also takes
526 # Add text length of base since decompressing that also takes
523 # work. For cache hits the length is already included.
527 # work. For cache hits the length is already included.
524 compresseddeltalen += e[1]
528 compresseddeltalen += e[1]
525 r = (clen, compresseddeltalen)
529 r = (clen, compresseddeltalen)
526 chaininfocache[rev] = r
530 chaininfocache[rev] = r
527 return r
531 return r
528
532
529 def _deltachain(self, rev, stoprev=None):
533 def _deltachain(self, rev, stoprev=None):
530 """Obtain the delta chain for a revision.
534 """Obtain the delta chain for a revision.
531
535
532 ``stoprev`` specifies a revision to stop at. If not specified, we
536 ``stoprev`` specifies a revision to stop at. If not specified, we
533 stop at the base of the chain.
537 stop at the base of the chain.
534
538
535 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
539 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
536 revs in ascending order and ``stopped`` is a bool indicating whether
540 revs in ascending order and ``stopped`` is a bool indicating whether
537 ``stoprev`` was hit.
541 ``stoprev`` was hit.
538 """
542 """
539 chain = []
543 chain = []
540
544
541 # Alias to prevent attribute lookup in tight loop.
545 # Alias to prevent attribute lookup in tight loop.
542 index = self.index
546 index = self.index
543 generaldelta = self._generaldelta
547 generaldelta = self._generaldelta
544
548
545 iterrev = rev
549 iterrev = rev
546 e = index[iterrev]
550 e = index[iterrev]
547 while iterrev != e[3] and iterrev != stoprev:
551 while iterrev != e[3] and iterrev != stoprev:
548 chain.append(iterrev)
552 chain.append(iterrev)
549 if generaldelta:
553 if generaldelta:
550 iterrev = e[3]
554 iterrev = e[3]
551 else:
555 else:
552 iterrev -= 1
556 iterrev -= 1
553 e = index[iterrev]
557 e = index[iterrev]
554
558
555 if iterrev == stoprev:
559 if iterrev == stoprev:
556 stopped = True
560 stopped = True
557 else:
561 else:
558 chain.append(iterrev)
562 chain.append(iterrev)
559 stopped = False
563 stopped = False
560
564
561 chain.reverse()
565 chain.reverse()
562 return chain, stopped
566 return chain, stopped
563
567
564 def ancestors(self, revs, stoprev=0, inclusive=False):
568 def ancestors(self, revs, stoprev=0, inclusive=False):
565 """Generate the ancestors of 'revs' in reverse topological order.
569 """Generate the ancestors of 'revs' in reverse topological order.
566 Does not generate revs lower than stoprev.
570 Does not generate revs lower than stoprev.
567
571
568 See the documentation for ancestor.lazyancestors for more details."""
572 See the documentation for ancestor.lazyancestors for more details."""
569
573
570 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
574 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
571 inclusive=inclusive)
575 inclusive=inclusive)
572
576
573 def descendants(self, revs):
577 def descendants(self, revs):
574 """Generate the descendants of 'revs' in revision order.
578 """Generate the descendants of 'revs' in revision order.
575
579
576 Yield a sequence of revision numbers starting with a child of
580 Yield a sequence of revision numbers starting with a child of
577 some rev in revs, i.e., each revision is *not* considered a
581 some rev in revs, i.e., each revision is *not* considered a
578 descendant of itself. Results are ordered by revision number (a
582 descendant of itself. Results are ordered by revision number (a
579 topological sort)."""
583 topological sort)."""
580 first = min(revs)
584 first = min(revs)
581 if first == nullrev:
585 if first == nullrev:
582 for i in self:
586 for i in self:
583 yield i
587 yield i
584 return
588 return
585
589
586 seen = set(revs)
590 seen = set(revs)
587 for i in self.revs(start=first + 1):
591 for i in self.revs(start=first + 1):
588 for x in self.parentrevs(i):
592 for x in self.parentrevs(i):
589 if x != nullrev and x in seen:
593 if x != nullrev and x in seen:
590 seen.add(i)
594 seen.add(i)
591 yield i
595 yield i
592 break
596 break
593
597
594 def findcommonmissing(self, common=None, heads=None):
598 def findcommonmissing(self, common=None, heads=None):
595 """Return a tuple of the ancestors of common and the ancestors of heads
599 """Return a tuple of the ancestors of common and the ancestors of heads
596 that are not ancestors of common. In revset terminology, we return the
600 that are not ancestors of common. In revset terminology, we return the
597 tuple:
601 tuple:
598
602
599 ::common, (::heads) - (::common)
603 ::common, (::heads) - (::common)
600
604
601 The list is sorted by revision number, meaning it is
605 The list is sorted by revision number, meaning it is
602 topologically sorted.
606 topologically sorted.
603
607
604 'heads' and 'common' are both lists of node IDs. If heads is
608 'heads' and 'common' are both lists of node IDs. If heads is
605 not supplied, uses all of the revlog's heads. If common is not
609 not supplied, uses all of the revlog's heads. If common is not
606 supplied, uses nullid."""
610 supplied, uses nullid."""
607 if common is None:
611 if common is None:
608 common = [nullid]
612 common = [nullid]
609 if heads is None:
613 if heads is None:
610 heads = self.heads()
614 heads = self.heads()
611
615
612 common = [self.rev(n) for n in common]
616 common = [self.rev(n) for n in common]
613 heads = [self.rev(n) for n in heads]
617 heads = [self.rev(n) for n in heads]
614
618
615 # we want the ancestors, but inclusive
619 # we want the ancestors, but inclusive
616 class lazyset(object):
620 class lazyset(object):
617 def __init__(self, lazyvalues):
621 def __init__(self, lazyvalues):
618 self.addedvalues = set()
622 self.addedvalues = set()
619 self.lazyvalues = lazyvalues
623 self.lazyvalues = lazyvalues
620
624
621 def __contains__(self, value):
625 def __contains__(self, value):
622 return value in self.addedvalues or value in self.lazyvalues
626 return value in self.addedvalues or value in self.lazyvalues
623
627
624 def __iter__(self):
628 def __iter__(self):
625 added = self.addedvalues
629 added = self.addedvalues
626 for r in added:
630 for r in added:
627 yield r
631 yield r
628 for r in self.lazyvalues:
632 for r in self.lazyvalues:
629 if not r in added:
633 if not r in added:
630 yield r
634 yield r
631
635
632 def add(self, value):
636 def add(self, value):
633 self.addedvalues.add(value)
637 self.addedvalues.add(value)
634
638
635 def update(self, values):
639 def update(self, values):
636 self.addedvalues.update(values)
640 self.addedvalues.update(values)
637
641
638 has = lazyset(self.ancestors(common))
642 has = lazyset(self.ancestors(common))
639 has.add(nullrev)
643 has.add(nullrev)
640 has.update(common)
644 has.update(common)
641
645
642 # take all ancestors from heads that aren't in has
646 # take all ancestors from heads that aren't in has
643 missing = set()
647 missing = set()
644 visit = collections.deque(r for r in heads if r not in has)
648 visit = collections.deque(r for r in heads if r not in has)
645 while visit:
649 while visit:
646 r = visit.popleft()
650 r = visit.popleft()
647 if r in missing:
651 if r in missing:
648 continue
652 continue
649 else:
653 else:
650 missing.add(r)
654 missing.add(r)
651 for p in self.parentrevs(r):
655 for p in self.parentrevs(r):
652 if p not in has:
656 if p not in has:
653 visit.append(p)
657 visit.append(p)
654 missing = list(missing)
658 missing = list(missing)
655 missing.sort()
659 missing.sort()
656 return has, [self.node(miss) for miss in missing]
660 return has, [self.node(miss) for miss in missing]
657
661
658 def incrementalmissingrevs(self, common=None):
662 def incrementalmissingrevs(self, common=None):
659 """Return an object that can be used to incrementally compute the
663 """Return an object that can be used to incrementally compute the
660 revision numbers of the ancestors of arbitrary sets that are not
664 revision numbers of the ancestors of arbitrary sets that are not
661 ancestors of common. This is an ancestor.incrementalmissingancestors
665 ancestors of common. This is an ancestor.incrementalmissingancestors
662 object.
666 object.
663
667
664 'common' is a list of revision numbers. If common is not supplied, uses
668 'common' is a list of revision numbers. If common is not supplied, uses
665 nullrev.
669 nullrev.
666 """
670 """
667 if common is None:
671 if common is None:
668 common = [nullrev]
672 common = [nullrev]
669
673
670 return ancestor.incrementalmissingancestors(self.parentrevs, common)
674 return ancestor.incrementalmissingancestors(self.parentrevs, common)
671
675
672 def findmissingrevs(self, common=None, heads=None):
676 def findmissingrevs(self, common=None, heads=None):
673 """Return the revision numbers of the ancestors of heads that
677 """Return the revision numbers of the ancestors of heads that
674 are not ancestors of common.
678 are not ancestors of common.
675
679
676 More specifically, return a list of revision numbers corresponding to
680 More specifically, return a list of revision numbers corresponding to
677 nodes N such that every N satisfies the following constraints:
681 nodes N such that every N satisfies the following constraints:
678
682
679 1. N is an ancestor of some node in 'heads'
683 1. N is an ancestor of some node in 'heads'
680 2. N is not an ancestor of any node in 'common'
684 2. N is not an ancestor of any node in 'common'
681
685
682 The list is sorted by revision number, meaning it is
686 The list is sorted by revision number, meaning it is
683 topologically sorted.
687 topologically sorted.
684
688
685 'heads' and 'common' are both lists of revision numbers. If heads is
689 'heads' and 'common' are both lists of revision numbers. If heads is
686 not supplied, uses all of the revlog's heads. If common is not
690 not supplied, uses all of the revlog's heads. If common is not
687 supplied, uses nullid."""
691 supplied, uses nullid."""
688 if common is None:
692 if common is None:
689 common = [nullrev]
693 common = [nullrev]
690 if heads is None:
694 if heads is None:
691 heads = self.headrevs()
695 heads = self.headrevs()
692
696
693 inc = self.incrementalmissingrevs(common=common)
697 inc = self.incrementalmissingrevs(common=common)
694 return inc.missingancestors(heads)
698 return inc.missingancestors(heads)
695
699
696 def findmissing(self, common=None, heads=None):
700 def findmissing(self, common=None, heads=None):
697 """Return the ancestors of heads that are not ancestors of common.
701 """Return the ancestors of heads that are not ancestors of common.
698
702
699 More specifically, return a list of nodes N such that every N
703 More specifically, return a list of nodes N such that every N
700 satisfies the following constraints:
704 satisfies the following constraints:
701
705
702 1. N is an ancestor of some node in 'heads'
706 1. N is an ancestor of some node in 'heads'
703 2. N is not an ancestor of any node in 'common'
707 2. N is not an ancestor of any node in 'common'
704
708
705 The list is sorted by revision number, meaning it is
709 The list is sorted by revision number, meaning it is
706 topologically sorted.
710 topologically sorted.
707
711
708 'heads' and 'common' are both lists of node IDs. If heads is
712 'heads' and 'common' are both lists of node IDs. If heads is
709 not supplied, uses all of the revlog's heads. If common is not
713 not supplied, uses all of the revlog's heads. If common is not
710 supplied, uses nullid."""
714 supplied, uses nullid."""
711 if common is None:
715 if common is None:
712 common = [nullid]
716 common = [nullid]
713 if heads is None:
717 if heads is None:
714 heads = self.heads()
718 heads = self.heads()
715
719
716 common = [self.rev(n) for n in common]
720 common = [self.rev(n) for n in common]
717 heads = [self.rev(n) for n in heads]
721 heads = [self.rev(n) for n in heads]
718
722
719 inc = self.incrementalmissingrevs(common=common)
723 inc = self.incrementalmissingrevs(common=common)
720 return [self.node(r) for r in inc.missingancestors(heads)]
724 return [self.node(r) for r in inc.missingancestors(heads)]
721
725
722 def nodesbetween(self, roots=None, heads=None):
726 def nodesbetween(self, roots=None, heads=None):
723 """Return a topological path from 'roots' to 'heads'.
727 """Return a topological path from 'roots' to 'heads'.
724
728
725 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
729 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
726 topologically sorted list of all nodes N that satisfy both of
730 topologically sorted list of all nodes N that satisfy both of
727 these constraints:
731 these constraints:
728
732
729 1. N is a descendant of some node in 'roots'
733 1. N is a descendant of some node in 'roots'
730 2. N is an ancestor of some node in 'heads'
734 2. N is an ancestor of some node in 'heads'
731
735
732 Every node is considered to be both a descendant and an ancestor
736 Every node is considered to be both a descendant and an ancestor
733 of itself, so every reachable node in 'roots' and 'heads' will be
737 of itself, so every reachable node in 'roots' and 'heads' will be
734 included in 'nodes'.
738 included in 'nodes'.
735
739
736 'outroots' is the list of reachable nodes in 'roots', i.e., the
740 'outroots' is the list of reachable nodes in 'roots', i.e., the
737 subset of 'roots' that is returned in 'nodes'. Likewise,
741 subset of 'roots' that is returned in 'nodes'. Likewise,
738 'outheads' is the subset of 'heads' that is also in 'nodes'.
742 'outheads' is the subset of 'heads' that is also in 'nodes'.
739
743
740 'roots' and 'heads' are both lists of node IDs. If 'roots' is
744 'roots' and 'heads' are both lists of node IDs. If 'roots' is
741 unspecified, uses nullid as the only root. If 'heads' is
745 unspecified, uses nullid as the only root. If 'heads' is
742 unspecified, uses list of all of the revlog's heads."""
746 unspecified, uses list of all of the revlog's heads."""
743 nonodes = ([], [], [])
747 nonodes = ([], [], [])
744 if roots is not None:
748 if roots is not None:
745 roots = list(roots)
749 roots = list(roots)
746 if not roots:
750 if not roots:
747 return nonodes
751 return nonodes
748 lowestrev = min([self.rev(n) for n in roots])
752 lowestrev = min([self.rev(n) for n in roots])
749 else:
753 else:
750 roots = [nullid] # Everybody's a descendant of nullid
754 roots = [nullid] # Everybody's a descendant of nullid
751 lowestrev = nullrev
755 lowestrev = nullrev
752 if (lowestrev == nullrev) and (heads is None):
756 if (lowestrev == nullrev) and (heads is None):
753 # We want _all_ the nodes!
757 # We want _all_ the nodes!
754 return ([self.node(r) for r in self], [nullid], list(self.heads()))
758 return ([self.node(r) for r in self], [nullid], list(self.heads()))
755 if heads is None:
759 if heads is None:
756 # All nodes are ancestors, so the latest ancestor is the last
760 # All nodes are ancestors, so the latest ancestor is the last
757 # node.
761 # node.
758 highestrev = len(self) - 1
762 highestrev = len(self) - 1
759 # Set ancestors to None to signal that every node is an ancestor.
763 # Set ancestors to None to signal that every node is an ancestor.
760 ancestors = None
764 ancestors = None
761 # Set heads to an empty dictionary for later discovery of heads
765 # Set heads to an empty dictionary for later discovery of heads
762 heads = {}
766 heads = {}
763 else:
767 else:
764 heads = list(heads)
768 heads = list(heads)
765 if not heads:
769 if not heads:
766 return nonodes
770 return nonodes
767 ancestors = set()
771 ancestors = set()
768 # Turn heads into a dictionary so we can remove 'fake' heads.
772 # Turn heads into a dictionary so we can remove 'fake' heads.
769 # Also, later we will be using it to filter out the heads we can't
773 # Also, later we will be using it to filter out the heads we can't
770 # find from roots.
774 # find from roots.
771 heads = dict.fromkeys(heads, False)
775 heads = dict.fromkeys(heads, False)
772 # Start at the top and keep marking parents until we're done.
776 # Start at the top and keep marking parents until we're done.
773 nodestotag = set(heads)
777 nodestotag = set(heads)
774 # Remember where the top was so we can use it as a limit later.
778 # Remember where the top was so we can use it as a limit later.
775 highestrev = max([self.rev(n) for n in nodestotag])
779 highestrev = max([self.rev(n) for n in nodestotag])
776 while nodestotag:
780 while nodestotag:
777 # grab a node to tag
781 # grab a node to tag
778 n = nodestotag.pop()
782 n = nodestotag.pop()
779 # Never tag nullid
783 # Never tag nullid
780 if n == nullid:
784 if n == nullid:
781 continue
785 continue
782 # A node's revision number represents its place in a
786 # A node's revision number represents its place in a
783 # topologically sorted list of nodes.
787 # topologically sorted list of nodes.
784 r = self.rev(n)
788 r = self.rev(n)
785 if r >= lowestrev:
789 if r >= lowestrev:
786 if n not in ancestors:
790 if n not in ancestors:
787 # If we are possibly a descendant of one of the roots
791 # If we are possibly a descendant of one of the roots
788 # and we haven't already been marked as an ancestor
792 # and we haven't already been marked as an ancestor
789 ancestors.add(n) # Mark as ancestor
793 ancestors.add(n) # Mark as ancestor
790 # Add non-nullid parents to list of nodes to tag.
794 # Add non-nullid parents to list of nodes to tag.
791 nodestotag.update([p for p in self.parents(n) if
795 nodestotag.update([p for p in self.parents(n) if
792 p != nullid])
796 p != nullid])
793 elif n in heads: # We've seen it before, is it a fake head?
797 elif n in heads: # We've seen it before, is it a fake head?
794 # So it is, real heads should not be the ancestors of
798 # So it is, real heads should not be the ancestors of
795 # any other heads.
799 # any other heads.
796 heads.pop(n)
800 heads.pop(n)
797 if not ancestors:
801 if not ancestors:
798 return nonodes
802 return nonodes
799 # Now that we have our set of ancestors, we want to remove any
803 # Now that we have our set of ancestors, we want to remove any
800 # roots that are not ancestors.
804 # roots that are not ancestors.
801
805
802 # If one of the roots was nullid, everything is included anyway.
806 # If one of the roots was nullid, everything is included anyway.
803 if lowestrev > nullrev:
807 if lowestrev > nullrev:
804 # But, since we weren't, let's recompute the lowest rev to not
808 # But, since we weren't, let's recompute the lowest rev to not
805 # include roots that aren't ancestors.
809 # include roots that aren't ancestors.
806
810
807 # Filter out roots that aren't ancestors of heads
811 # Filter out roots that aren't ancestors of heads
808 roots = [root for root in roots if root in ancestors]
812 roots = [root for root in roots if root in ancestors]
809 # Recompute the lowest revision
813 # Recompute the lowest revision
810 if roots:
814 if roots:
811 lowestrev = min([self.rev(root) for root in roots])
815 lowestrev = min([self.rev(root) for root in roots])
812 else:
816 else:
813 # No more roots? Return empty list
817 # No more roots? Return empty list
814 return nonodes
818 return nonodes
815 else:
819 else:
816 # We are descending from nullid, and don't need to care about
820 # We are descending from nullid, and don't need to care about
817 # any other roots.
821 # any other roots.
818 lowestrev = nullrev
822 lowestrev = nullrev
819 roots = [nullid]
823 roots = [nullid]
820 # Transform our roots list into a set.
824 # Transform our roots list into a set.
821 descendants = set(roots)
825 descendants = set(roots)
822 # Also, keep the original roots so we can filter out roots that aren't
826 # Also, keep the original roots so we can filter out roots that aren't
823 # 'real' roots (i.e. are descended from other roots).
827 # 'real' roots (i.e. are descended from other roots).
824 roots = descendants.copy()
828 roots = descendants.copy()
825 # Our topologically sorted list of output nodes.
829 # Our topologically sorted list of output nodes.
826 orderedout = []
830 orderedout = []
827 # Don't start at nullid since we don't want nullid in our output list,
831 # Don't start at nullid since we don't want nullid in our output list,
828 # and if nullid shows up in descendants, empty parents will look like
832 # and if nullid shows up in descendants, empty parents will look like
829 # they're descendants.
833 # they're descendants.
830 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
834 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
831 n = self.node(r)
835 n = self.node(r)
832 isdescendant = False
836 isdescendant = False
833 if lowestrev == nullrev: # Everybody is a descendant of nullid
837 if lowestrev == nullrev: # Everybody is a descendant of nullid
834 isdescendant = True
838 isdescendant = True
835 elif n in descendants:
839 elif n in descendants:
836 # n is already a descendant
840 # n is already a descendant
837 isdescendant = True
841 isdescendant = True
838 # This check only needs to be done here because all the roots
842 # This check only needs to be done here because all the roots
839 # will start being marked is descendants before the loop.
843 # will start being marked is descendants before the loop.
840 if n in roots:
844 if n in roots:
841 # If n was a root, check if it's a 'real' root.
845 # If n was a root, check if it's a 'real' root.
842 p = tuple(self.parents(n))
846 p = tuple(self.parents(n))
843 # If any of its parents are descendants, it's not a root.
847 # If any of its parents are descendants, it's not a root.
844 if (p[0] in descendants) or (p[1] in descendants):
848 if (p[0] in descendants) or (p[1] in descendants):
845 roots.remove(n)
849 roots.remove(n)
846 else:
850 else:
847 p = tuple(self.parents(n))
851 p = tuple(self.parents(n))
848 # A node is a descendant if either of its parents are
852 # A node is a descendant if either of its parents are
849 # descendants. (We seeded the dependents list with the roots
853 # descendants. (We seeded the dependents list with the roots
850 # up there, remember?)
854 # up there, remember?)
851 if (p[0] in descendants) or (p[1] in descendants):
855 if (p[0] in descendants) or (p[1] in descendants):
852 descendants.add(n)
856 descendants.add(n)
853 isdescendant = True
857 isdescendant = True
854 if isdescendant and ((ancestors is None) or (n in ancestors)):
858 if isdescendant and ((ancestors is None) or (n in ancestors)):
855 # Only include nodes that are both descendants and ancestors.
859 # Only include nodes that are both descendants and ancestors.
856 orderedout.append(n)
860 orderedout.append(n)
857 if (ancestors is not None) and (n in heads):
861 if (ancestors is not None) and (n in heads):
858 # We're trying to figure out which heads are reachable
862 # We're trying to figure out which heads are reachable
859 # from roots.
863 # from roots.
860 # Mark this head as having been reached
864 # Mark this head as having been reached
861 heads[n] = True
865 heads[n] = True
862 elif ancestors is None:
866 elif ancestors is None:
863 # Otherwise, we're trying to discover the heads.
867 # Otherwise, we're trying to discover the heads.
864 # Assume this is a head because if it isn't, the next step
868 # Assume this is a head because if it isn't, the next step
865 # will eventually remove it.
869 # will eventually remove it.
866 heads[n] = True
870 heads[n] = True
867 # But, obviously its parents aren't.
871 # But, obviously its parents aren't.
868 for p in self.parents(n):
872 for p in self.parents(n):
869 heads.pop(p, None)
873 heads.pop(p, None)
870 heads = [head for head, flag in heads.iteritems() if flag]
874 heads = [head for head, flag in heads.iteritems() if flag]
871 roots = list(roots)
875 roots = list(roots)
872 assert orderedout
876 assert orderedout
873 assert roots
877 assert roots
874 assert heads
878 assert heads
875 return (orderedout, roots, heads)
879 return (orderedout, roots, heads)
876
880
877 def headrevs(self):
881 def headrevs(self):
878 try:
882 try:
879 return self.index.headrevs()
883 return self.index.headrevs()
880 except AttributeError:
884 except AttributeError:
881 return self._headrevs()
885 return self._headrevs()
882
886
883 def computephases(self, roots):
887 def computephases(self, roots):
884 return self.index.computephasesmapsets(roots)
888 return self.index.computephasesmapsets(roots)
885
889
886 def _headrevs(self):
890 def _headrevs(self):
887 count = len(self)
891 count = len(self)
888 if not count:
892 if not count:
889 return [nullrev]
893 return [nullrev]
890 # we won't iter over filtered rev so nobody is a head at start
894 # we won't iter over filtered rev so nobody is a head at start
891 ishead = [0] * (count + 1)
895 ishead = [0] * (count + 1)
892 index = self.index
896 index = self.index
893 for r in self:
897 for r in self:
894 ishead[r] = 1 # I may be an head
898 ishead[r] = 1 # I may be an head
895 e = index[r]
899 e = index[r]
896 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
900 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
897 return [r for r, val in enumerate(ishead) if val]
901 return [r for r, val in enumerate(ishead) if val]
898
902
899 def heads(self, start=None, stop=None):
903 def heads(self, start=None, stop=None):
900 """return the list of all nodes that have no children
904 """return the list of all nodes that have no children
901
905
902 if start is specified, only heads that are descendants of
906 if start is specified, only heads that are descendants of
903 start will be returned
907 start will be returned
904 if stop is specified, it will consider all the revs from stop
908 if stop is specified, it will consider all the revs from stop
905 as if they had no children
909 as if they had no children
906 """
910 """
907 if start is None and stop is None:
911 if start is None and stop is None:
908 if not len(self):
912 if not len(self):
909 return [nullid]
913 return [nullid]
910 return [self.node(r) for r in self.headrevs()]
914 return [self.node(r) for r in self.headrevs()]
911
915
912 if start is None:
916 if start is None:
913 start = nullid
917 start = nullid
914 if stop is None:
918 if stop is None:
915 stop = []
919 stop = []
916 stoprevs = set([self.rev(n) for n in stop])
920 stoprevs = set([self.rev(n) for n in stop])
917 startrev = self.rev(start)
921 startrev = self.rev(start)
918 reachable = {startrev}
922 reachable = {startrev}
919 heads = {startrev}
923 heads = {startrev}
920
924
921 parentrevs = self.parentrevs
925 parentrevs = self.parentrevs
922 for r in self.revs(start=startrev + 1):
926 for r in self.revs(start=startrev + 1):
923 for p in parentrevs(r):
927 for p in parentrevs(r):
924 if p in reachable:
928 if p in reachable:
925 if r not in stoprevs:
929 if r not in stoprevs:
926 reachable.add(r)
930 reachable.add(r)
927 heads.add(r)
931 heads.add(r)
928 if p in heads and p not in stoprevs:
932 if p in heads and p not in stoprevs:
929 heads.remove(p)
933 heads.remove(p)
930
934
931 return [self.node(r) for r in heads]
935 return [self.node(r) for r in heads]
932
936
933 def children(self, node):
937 def children(self, node):
934 """find the children of a given node"""
938 """find the children of a given node"""
935 c = []
939 c = []
936 p = self.rev(node)
940 p = self.rev(node)
937 for r in self.revs(start=p + 1):
941 for r in self.revs(start=p + 1):
938 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
942 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
939 if prevs:
943 if prevs:
940 for pr in prevs:
944 for pr in prevs:
941 if pr == p:
945 if pr == p:
942 c.append(self.node(r))
946 c.append(self.node(r))
943 elif p == nullrev:
947 elif p == nullrev:
944 c.append(self.node(r))
948 c.append(self.node(r))
945 return c
949 return c
946
950
947 def descendant(self, start, end):
951 def descendant(self, start, end):
948 if start == nullrev:
952 if start == nullrev:
949 return True
953 return True
950 for i in self.descendants([start]):
954 for i in self.descendants([start]):
951 if i == end:
955 if i == end:
952 return True
956 return True
953 elif i > end:
957 elif i > end:
954 break
958 break
955 return False
959 return False
956
960
957 def commonancestorsheads(self, a, b):
961 def commonancestorsheads(self, a, b):
958 """calculate all the heads of the common ancestors of nodes a and b"""
962 """calculate all the heads of the common ancestors of nodes a and b"""
959 a, b = self.rev(a), self.rev(b)
963 a, b = self.rev(a), self.rev(b)
960 try:
964 try:
961 ancs = self.index.commonancestorsheads(a, b)
965 ancs = self.index.commonancestorsheads(a, b)
962 except (AttributeError, OverflowError): # C implementation failed
966 except (AttributeError, OverflowError): # C implementation failed
963 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
967 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
964 return pycompat.maplist(self.node, ancs)
968 return pycompat.maplist(self.node, ancs)
965
969
966 def isancestor(self, a, b):
970 def isancestor(self, a, b):
967 """return True if node a is an ancestor of node b
971 """return True if node a is an ancestor of node b
968
972
969 The implementation of this is trivial but the use of
973 The implementation of this is trivial but the use of
970 commonancestorsheads is not."""
974 commonancestorsheads is not."""
971 return a in self.commonancestorsheads(a, b)
975 return a in self.commonancestorsheads(a, b)
972
976
973 def ancestor(self, a, b):
977 def ancestor(self, a, b):
974 """calculate the "best" common ancestor of nodes a and b"""
978 """calculate the "best" common ancestor of nodes a and b"""
975
979
976 a, b = self.rev(a), self.rev(b)
980 a, b = self.rev(a), self.rev(b)
977 try:
981 try:
978 ancs = self.index.ancestors(a, b)
982 ancs = self.index.ancestors(a, b)
979 except (AttributeError, OverflowError):
983 except (AttributeError, OverflowError):
980 ancs = ancestor.ancestors(self.parentrevs, a, b)
984 ancs = ancestor.ancestors(self.parentrevs, a, b)
981 if ancs:
985 if ancs:
982 # choose a consistent winner when there's a tie
986 # choose a consistent winner when there's a tie
983 return min(map(self.node, ancs))
987 return min(map(self.node, ancs))
984 return nullid
988 return nullid
985
989
986 def _match(self, id):
990 def _match(self, id):
987 if isinstance(id, int):
991 if isinstance(id, int):
988 # rev
992 # rev
989 return self.node(id)
993 return self.node(id)
990 if len(id) == 20:
994 if len(id) == 20:
991 # possibly a binary node
995 # possibly a binary node
992 # odds of a binary node being all hex in ASCII are 1 in 10**25
996 # odds of a binary node being all hex in ASCII are 1 in 10**25
993 try:
997 try:
994 node = id
998 node = id
995 self.rev(node) # quick search the index
999 self.rev(node) # quick search the index
996 return node
1000 return node
997 except LookupError:
1001 except LookupError:
998 pass # may be partial hex id
1002 pass # may be partial hex id
999 try:
1003 try:
1000 # str(rev)
1004 # str(rev)
1001 rev = int(id)
1005 rev = int(id)
1002 if str(rev) != id:
1006 if str(rev) != id:
1003 raise ValueError
1007 raise ValueError
1004 if rev < 0:
1008 if rev < 0:
1005 rev = len(self) + rev
1009 rev = len(self) + rev
1006 if rev < 0 or rev >= len(self):
1010 if rev < 0 or rev >= len(self):
1007 raise ValueError
1011 raise ValueError
1008 return self.node(rev)
1012 return self.node(rev)
1009 except (ValueError, OverflowError):
1013 except (ValueError, OverflowError):
1010 pass
1014 pass
1011 if len(id) == 40:
1015 if len(id) == 40:
1012 try:
1016 try:
1013 # a full hex nodeid?
1017 # a full hex nodeid?
1014 node = bin(id)
1018 node = bin(id)
1015 self.rev(node)
1019 self.rev(node)
1016 return node
1020 return node
1017 except (TypeError, LookupError):
1021 except (TypeError, LookupError):
1018 pass
1022 pass
1019
1023
1020 def _partialmatch(self, id):
1024 def _partialmatch(self, id):
1021 try:
1025 try:
1022 partial = self.index.partialmatch(id)
1026 partial = self.index.partialmatch(id)
1023 if partial and self.hasnode(partial):
1027 if partial and self.hasnode(partial):
1024 return partial
1028 return partial
1025 return None
1029 return None
1026 except RevlogError:
1030 except RevlogError:
1027 # parsers.c radix tree lookup gave multiple matches
1031 # parsers.c radix tree lookup gave multiple matches
1028 # fast path: for unfiltered changelog, radix tree is accurate
1032 # fast path: for unfiltered changelog, radix tree is accurate
1029 if not getattr(self, 'filteredrevs', None):
1033 if not getattr(self, 'filteredrevs', None):
1030 raise LookupError(id, self.indexfile,
1034 raise LookupError(id, self.indexfile,
1031 _('ambiguous identifier'))
1035 _('ambiguous identifier'))
1032 # fall through to slow path that filters hidden revisions
1036 # fall through to slow path that filters hidden revisions
1033 except (AttributeError, ValueError):
1037 except (AttributeError, ValueError):
1034 # we are pure python, or key was too short to search radix tree
1038 # we are pure python, or key was too short to search radix tree
1035 pass
1039 pass
1036
1040
1037 if id in self._pcache:
1041 if id in self._pcache:
1038 return self._pcache[id]
1042 return self._pcache[id]
1039
1043
1040 if len(id) < 40:
1044 if len(id) < 40:
1041 try:
1045 try:
1042 # hex(node)[:...]
1046 # hex(node)[:...]
1043 l = len(id) // 2 # grab an even number of digits
1047 l = len(id) // 2 # grab an even number of digits
1044 prefix = bin(id[:l * 2])
1048 prefix = bin(id[:l * 2])
1045 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1049 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1046 nl = [n for n in nl if hex(n).startswith(id) and
1050 nl = [n for n in nl if hex(n).startswith(id) and
1047 self.hasnode(n)]
1051 self.hasnode(n)]
1048 if len(nl) > 0:
1052 if len(nl) > 0:
1049 if len(nl) == 1:
1053 if len(nl) == 1:
1050 self._pcache[id] = nl[0]
1054 self._pcache[id] = nl[0]
1051 return nl[0]
1055 return nl[0]
1052 raise LookupError(id, self.indexfile,
1056 raise LookupError(id, self.indexfile,
1053 _('ambiguous identifier'))
1057 _('ambiguous identifier'))
1054 return None
1058 return None
1055 except TypeError:
1059 except TypeError:
1056 pass
1060 pass
1057
1061
1058 def lookup(self, id):
1062 def lookup(self, id):
1059 """locate a node based on:
1063 """locate a node based on:
1060 - revision number or str(revision number)
1064 - revision number or str(revision number)
1061 - nodeid or subset of hex nodeid
1065 - nodeid or subset of hex nodeid
1062 """
1066 """
1063 n = self._match(id)
1067 n = self._match(id)
1064 if n is not None:
1068 if n is not None:
1065 return n
1069 return n
1066 n = self._partialmatch(id)
1070 n = self._partialmatch(id)
1067 if n:
1071 if n:
1068 return n
1072 return n
1069
1073
1070 raise LookupError(id, self.indexfile, _('no match found'))
1074 raise LookupError(id, self.indexfile, _('no match found'))
1071
1075
1072 def cmp(self, node, text):
1076 def cmp(self, node, text):
1073 """compare text with a given file revision
1077 """compare text with a given file revision
1074
1078
1075 returns True if text is different than what is stored.
1079 returns True if text is different than what is stored.
1076 """
1080 """
1077 p1, p2 = self.parents(node)
1081 p1, p2 = self.parents(node)
1078 return hash(text, p1, p2) != node
1082 return hash(text, p1, p2) != node
1079
1083
1080 def _cachesegment(self, offset, data):
1084 def _cachesegment(self, offset, data):
1081 """Add a segment to the revlog cache.
1085 """Add a segment to the revlog cache.
1082
1086
1083 Accepts an absolute offset and the data that is at that location.
1087 Accepts an absolute offset and the data that is at that location.
1084 """
1088 """
1085 o, d = self._chunkcache
1089 o, d = self._chunkcache
1086 # try to add to existing cache
1090 # try to add to existing cache
1087 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1091 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1088 self._chunkcache = o, d + data
1092 self._chunkcache = o, d + data
1089 else:
1093 else:
1090 self._chunkcache = offset, data
1094 self._chunkcache = offset, data
1091
1095
1092 def _readsegment(self, offset, length, df=None):
1096 def _readsegment(self, offset, length, df=None):
1093 """Load a segment of raw data from the revlog.
1097 """Load a segment of raw data from the revlog.
1094
1098
1095 Accepts an absolute offset, length to read, and an optional existing
1099 Accepts an absolute offset, length to read, and an optional existing
1096 file handle to read from.
1100 file handle to read from.
1097
1101
1098 If an existing file handle is passed, it will be seeked and the
1102 If an existing file handle is passed, it will be seeked and the
1099 original seek position will NOT be restored.
1103 original seek position will NOT be restored.
1100
1104
1101 Returns a str or buffer of raw byte data.
1105 Returns a str or buffer of raw byte data.
1102 """
1106 """
1103 if df is not None:
1107 if df is not None:
1104 closehandle = False
1108 closehandle = False
1105 else:
1109 else:
1106 if self._inline:
1110 if self._inline:
1107 df = self.opener(self.indexfile)
1111 df = self.opener(self.indexfile)
1108 else:
1112 else:
1109 df = self.opener(self.datafile)
1113 df = self.opener(self.datafile)
1110 closehandle = True
1114 closehandle = True
1111
1115
1112 # Cache data both forward and backward around the requested
1116 # Cache data both forward and backward around the requested
1113 # data, in a fixed size window. This helps speed up operations
1117 # data, in a fixed size window. This helps speed up operations
1114 # involving reading the revlog backwards.
1118 # involving reading the revlog backwards.
1115 cachesize = self._chunkcachesize
1119 cachesize = self._chunkcachesize
1116 realoffset = offset & ~(cachesize - 1)
1120 realoffset = offset & ~(cachesize - 1)
1117 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1121 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1118 - realoffset)
1122 - realoffset)
1119 df.seek(realoffset)
1123 df.seek(realoffset)
1120 d = df.read(reallength)
1124 d = df.read(reallength)
1121 if closehandle:
1125 if closehandle:
1122 df.close()
1126 df.close()
1123 self._cachesegment(realoffset, d)
1127 self._cachesegment(realoffset, d)
1124 if offset != realoffset or reallength != length:
1128 if offset != realoffset or reallength != length:
1125 return util.buffer(d, offset - realoffset, length)
1129 return util.buffer(d, offset - realoffset, length)
1126 return d
1130 return d
1127
1131
1128 def _getsegment(self, offset, length, df=None):
1132 def _getsegment(self, offset, length, df=None):
1129 """Obtain a segment of raw data from the revlog.
1133 """Obtain a segment of raw data from the revlog.
1130
1134
1131 Accepts an absolute offset, length of bytes to obtain, and an
1135 Accepts an absolute offset, length of bytes to obtain, and an
1132 optional file handle to the already-opened revlog. If the file
1136 optional file handle to the already-opened revlog. If the file
1133 handle is used, it's original seek position will not be preserved.
1137 handle is used, it's original seek position will not be preserved.
1134
1138
1135 Requests for data may be returned from a cache.
1139 Requests for data may be returned from a cache.
1136
1140
1137 Returns a str or a buffer instance of raw byte data.
1141 Returns a str or a buffer instance of raw byte data.
1138 """
1142 """
1139 o, d = self._chunkcache
1143 o, d = self._chunkcache
1140 l = len(d)
1144 l = len(d)
1141
1145
1142 # is it in the cache?
1146 # is it in the cache?
1143 cachestart = offset - o
1147 cachestart = offset - o
1144 cacheend = cachestart + length
1148 cacheend = cachestart + length
1145 if cachestart >= 0 and cacheend <= l:
1149 if cachestart >= 0 and cacheend <= l:
1146 if cachestart == 0 and cacheend == l:
1150 if cachestart == 0 and cacheend == l:
1147 return d # avoid a copy
1151 return d # avoid a copy
1148 return util.buffer(d, cachestart, cacheend - cachestart)
1152 return util.buffer(d, cachestart, cacheend - cachestart)
1149
1153
1150 return self._readsegment(offset, length, df=df)
1154 return self._readsegment(offset, length, df=df)
1151
1155
1152 def _getsegmentforrevs(self, startrev, endrev, df=None):
1156 def _getsegmentforrevs(self, startrev, endrev, df=None):
1153 """Obtain a segment of raw data corresponding to a range of revisions.
1157 """Obtain a segment of raw data corresponding to a range of revisions.
1154
1158
1155 Accepts the start and end revisions and an optional already-open
1159 Accepts the start and end revisions and an optional already-open
1156 file handle to be used for reading. If the file handle is read, its
1160 file handle to be used for reading. If the file handle is read, its
1157 seek position will not be preserved.
1161 seek position will not be preserved.
1158
1162
1159 Requests for data may be satisfied by a cache.
1163 Requests for data may be satisfied by a cache.
1160
1164
1161 Returns a 2-tuple of (offset, data) for the requested range of
1165 Returns a 2-tuple of (offset, data) for the requested range of
1162 revisions. Offset is the integer offset from the beginning of the
1166 revisions. Offset is the integer offset from the beginning of the
1163 revlog and data is a str or buffer of the raw byte data.
1167 revlog and data is a str or buffer of the raw byte data.
1164
1168
1165 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1169 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1166 to determine where each revision's data begins and ends.
1170 to determine where each revision's data begins and ends.
1167 """
1171 """
1168 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1172 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1169 # (functions are expensive).
1173 # (functions are expensive).
1170 index = self.index
1174 index = self.index
1171 istart = index[startrev]
1175 istart = index[startrev]
1172 start = int(istart[0] >> 16)
1176 start = int(istart[0] >> 16)
1173 if startrev == endrev:
1177 if startrev == endrev:
1174 end = start + istart[1]
1178 end = start + istart[1]
1175 else:
1179 else:
1176 iend = index[endrev]
1180 iend = index[endrev]
1177 end = int(iend[0] >> 16) + iend[1]
1181 end = int(iend[0] >> 16) + iend[1]
1178
1182
1179 if self._inline:
1183 if self._inline:
1180 start += (startrev + 1) * self._io.size
1184 start += (startrev + 1) * self._io.size
1181 end += (endrev + 1) * self._io.size
1185 end += (endrev + 1) * self._io.size
1182 length = end - start
1186 length = end - start
1183
1187
1184 return start, self._getsegment(start, length, df=df)
1188 return start, self._getsegment(start, length, df=df)
1185
1189
1186 def _chunk(self, rev, df=None):
1190 def _chunk(self, rev, df=None):
1187 """Obtain a single decompressed chunk for a revision.
1191 """Obtain a single decompressed chunk for a revision.
1188
1192
1189 Accepts an integer revision and an optional already-open file handle
1193 Accepts an integer revision and an optional already-open file handle
1190 to be used for reading. If used, the seek position of the file will not
1194 to be used for reading. If used, the seek position of the file will not
1191 be preserved.
1195 be preserved.
1192
1196
1193 Returns a str holding uncompressed data for the requested revision.
1197 Returns a str holding uncompressed data for the requested revision.
1194 """
1198 """
1195 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1199 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1196
1200
1197 def _chunks(self, revs, df=None):
1201 def _chunks(self, revs, df=None):
1198 """Obtain decompressed chunks for the specified revisions.
1202 """Obtain decompressed chunks for the specified revisions.
1199
1203
1200 Accepts an iterable of numeric revisions that are assumed to be in
1204 Accepts an iterable of numeric revisions that are assumed to be in
1201 ascending order. Also accepts an optional already-open file handle
1205 ascending order. Also accepts an optional already-open file handle
1202 to be used for reading. If used, the seek position of the file will
1206 to be used for reading. If used, the seek position of the file will
1203 not be preserved.
1207 not be preserved.
1204
1208
1205 This function is similar to calling ``self._chunk()`` multiple times,
1209 This function is similar to calling ``self._chunk()`` multiple times,
1206 but is faster.
1210 but is faster.
1207
1211
1208 Returns a list with decompressed data for each requested revision.
1212 Returns a list with decompressed data for each requested revision.
1209 """
1213 """
1210 if not revs:
1214 if not revs:
1211 return []
1215 return []
1212 start = self.start
1216 start = self.start
1213 length = self.length
1217 length = self.length
1214 inline = self._inline
1218 inline = self._inline
1215 iosize = self._io.size
1219 iosize = self._io.size
1216 buffer = util.buffer
1220 buffer = util.buffer
1217
1221
1218 l = []
1222 l = []
1219 ladd = l.append
1223 ladd = l.append
1220
1224
1221 try:
1225 try:
1222 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1226 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1223 except OverflowError:
1227 except OverflowError:
1224 # issue4215 - we can't cache a run of chunks greater than
1228 # issue4215 - we can't cache a run of chunks greater than
1225 # 2G on Windows
1229 # 2G on Windows
1226 return [self._chunk(rev, df=df) for rev in revs]
1230 return [self._chunk(rev, df=df) for rev in revs]
1227
1231
1228 decomp = self.decompress
1232 decomp = self.decompress
1229 for rev in revs:
1233 for rev in revs:
1230 chunkstart = start(rev)
1234 chunkstart = start(rev)
1231 if inline:
1235 if inline:
1232 chunkstart += (rev + 1) * iosize
1236 chunkstart += (rev + 1) * iosize
1233 chunklength = length(rev)
1237 chunklength = length(rev)
1234 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1238 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1235
1239
1236 return l
1240 return l
1237
1241
1238 def _chunkclear(self):
1242 def _chunkclear(self):
1239 """Clear the raw chunk cache."""
1243 """Clear the raw chunk cache."""
1240 self._chunkcache = (0, '')
1244 self._chunkcache = (0, '')
1241
1245
1242 def deltaparent(self, rev):
1246 def deltaparent(self, rev):
1243 """return deltaparent of the given revision"""
1247 """return deltaparent of the given revision"""
1244 base = self.index[rev][3]
1248 base = self.index[rev][3]
1245 if base == rev:
1249 if base == rev:
1246 return nullrev
1250 return nullrev
1247 elif self._generaldelta:
1251 elif self._generaldelta:
1248 return base
1252 return base
1249 else:
1253 else:
1250 return rev - 1
1254 return rev - 1
1251
1255
1252 def revdiff(self, rev1, rev2):
1256 def revdiff(self, rev1, rev2):
1253 """return or calculate a delta between two revisions
1257 """return or calculate a delta between two revisions
1254
1258
1255 The delta calculated is in binary form and is intended to be written to
1259 The delta calculated is in binary form and is intended to be written to
1256 revlog data directly. So this function needs raw revision data.
1260 revlog data directly. So this function needs raw revision data.
1257 """
1261 """
1258 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1262 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1259 return bytes(self._chunk(rev2))
1263 return bytes(self._chunk(rev2))
1260
1264
1261 return mdiff.textdiff(self.revision(rev1, raw=True),
1265 return mdiff.textdiff(self.revision(rev1, raw=True),
1262 self.revision(rev2, raw=True))
1266 self.revision(rev2, raw=True))
1263
1267
1264 def revision(self, nodeorrev, _df=None, raw=False):
1268 def revision(self, nodeorrev, _df=None, raw=False):
1265 """return an uncompressed revision of a given node or revision
1269 """return an uncompressed revision of a given node or revision
1266 number.
1270 number.
1267
1271
1268 _df - an existing file handle to read from. (internal-only)
1272 _df - an existing file handle to read from. (internal-only)
1269 raw - an optional argument specifying if the revision data is to be
1273 raw - an optional argument specifying if the revision data is to be
1270 treated as raw data when applying flag transforms. 'raw' should be set
1274 treated as raw data when applying flag transforms. 'raw' should be set
1271 to True when generating changegroups or in debug commands.
1275 to True when generating changegroups or in debug commands.
1272 """
1276 """
1273 if isinstance(nodeorrev, int):
1277 if isinstance(nodeorrev, int):
1274 rev = nodeorrev
1278 rev = nodeorrev
1275 node = self.node(rev)
1279 node = self.node(rev)
1276 else:
1280 else:
1277 node = nodeorrev
1281 node = nodeorrev
1278 rev = None
1282 rev = None
1279
1283
1280 cachedrev = None
1284 cachedrev = None
1281 flags = None
1285 flags = None
1282 rawtext = None
1286 rawtext = None
1283 if node == nullid:
1287 if node == nullid:
1284 return ""
1288 return ""
1285 if self._cache:
1289 if self._cache:
1286 if self._cache[0] == node:
1290 if self._cache[0] == node:
1287 # _cache only stores rawtext
1291 # _cache only stores rawtext
1288 if raw:
1292 if raw:
1289 return self._cache[2]
1293 return self._cache[2]
1290 # duplicated, but good for perf
1294 # duplicated, but good for perf
1291 if rev is None:
1295 if rev is None:
1292 rev = self.rev(node)
1296 rev = self.rev(node)
1293 if flags is None:
1297 if flags is None:
1294 flags = self.flags(rev)
1298 flags = self.flags(rev)
1295 # no extra flags set, no flag processor runs, text = rawtext
1299 # no extra flags set, no flag processor runs, text = rawtext
1296 if flags == REVIDX_DEFAULT_FLAGS:
1300 if flags == REVIDX_DEFAULT_FLAGS:
1297 return self._cache[2]
1301 return self._cache[2]
1298 # rawtext is reusable. need to run flag processor
1302 # rawtext is reusable. need to run flag processor
1299 rawtext = self._cache[2]
1303 rawtext = self._cache[2]
1300
1304
1301 cachedrev = self._cache[1]
1305 cachedrev = self._cache[1]
1302
1306
1303 # look up what we need to read
1307 # look up what we need to read
1304 if rawtext is None:
1308 if rawtext is None:
1305 if rev is None:
1309 if rev is None:
1306 rev = self.rev(node)
1310 rev = self.rev(node)
1307
1311
1308 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1312 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1309 if stopped:
1313 if stopped:
1310 rawtext = self._cache[2]
1314 rawtext = self._cache[2]
1311
1315
1312 # drop cache to save memory
1316 # drop cache to save memory
1313 self._cache = None
1317 self._cache = None
1314
1318
1315 bins = self._chunks(chain, df=_df)
1319 bins = self._chunks(chain, df=_df)
1316 if rawtext is None:
1320 if rawtext is None:
1317 rawtext = bytes(bins[0])
1321 rawtext = bytes(bins[0])
1318 bins = bins[1:]
1322 bins = bins[1:]
1319
1323
1320 rawtext = mdiff.patches(rawtext, bins)
1324 rawtext = mdiff.patches(rawtext, bins)
1321 self._cache = (node, rev, rawtext)
1325 self._cache = (node, rev, rawtext)
1322
1326
1323 if flags is None:
1327 if flags is None:
1324 if rev is None:
1328 if rev is None:
1325 rev = self.rev(node)
1329 rev = self.rev(node)
1326 flags = self.flags(rev)
1330 flags = self.flags(rev)
1327
1331
1328 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1332 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1329 if validatehash:
1333 if validatehash:
1330 self.checkhash(text, node, rev=rev)
1334 self.checkhash(text, node, rev=rev)
1331
1335
1332 return text
1336 return text
1333
1337
1334 def hash(self, text, p1, p2):
1338 def hash(self, text, p1, p2):
1335 """Compute a node hash.
1339 """Compute a node hash.
1336
1340
1337 Available as a function so that subclasses can replace the hash
1341 Available as a function so that subclasses can replace the hash
1338 as needed.
1342 as needed.
1339 """
1343 """
1340 return hash(text, p1, p2)
1344 return hash(text, p1, p2)
1341
1345
1342 def _processflags(self, text, flags, operation, raw=False):
1346 def _processflags(self, text, flags, operation, raw=False):
1343 """Inspect revision data flags and applies transforms defined by
1347 """Inspect revision data flags and applies transforms defined by
1344 registered flag processors.
1348 registered flag processors.
1345
1349
1346 ``text`` - the revision data to process
1350 ``text`` - the revision data to process
1347 ``flags`` - the revision flags
1351 ``flags`` - the revision flags
1348 ``operation`` - the operation being performed (read or write)
1352 ``operation`` - the operation being performed (read or write)
1349 ``raw`` - an optional argument describing if the raw transform should be
1353 ``raw`` - an optional argument describing if the raw transform should be
1350 applied.
1354 applied.
1351
1355
1352 This method processes the flags in the order (or reverse order if
1356 This method processes the flags in the order (or reverse order if
1353 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1357 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1354 flag processors registered for present flags. The order of flags defined
1358 flag processors registered for present flags. The order of flags defined
1355 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1359 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1356
1360
1357 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1361 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1358 processed text and ``validatehash`` is a bool indicating whether the
1362 processed text and ``validatehash`` is a bool indicating whether the
1359 returned text should be checked for hash integrity.
1363 returned text should be checked for hash integrity.
1360
1364
1361 Note: If the ``raw`` argument is set, it has precedence over the
1365 Note: If the ``raw`` argument is set, it has precedence over the
1362 operation and will only update the value of ``validatehash``.
1366 operation and will only update the value of ``validatehash``.
1363 """
1367 """
1364 # fast path: no flag processors will run
1368 # fast path: no flag processors will run
1365 if flags == 0:
1369 if flags == 0:
1366 return text, True
1370 return text, True
1367 if not operation in ('read', 'write'):
1371 if not operation in ('read', 'write'):
1368 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1372 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1369 # Check all flags are known.
1373 # Check all flags are known.
1370 if flags & ~REVIDX_KNOWN_FLAGS:
1374 if flags & ~REVIDX_KNOWN_FLAGS:
1371 raise RevlogError(_("incompatible revision flag '%#x'") %
1375 raise RevlogError(_("incompatible revision flag '%#x'") %
1372 (flags & ~REVIDX_KNOWN_FLAGS))
1376 (flags & ~REVIDX_KNOWN_FLAGS))
1373 validatehash = True
1377 validatehash = True
1374 # Depending on the operation (read or write), the order might be
1378 # Depending on the operation (read or write), the order might be
1375 # reversed due to non-commutative transforms.
1379 # reversed due to non-commutative transforms.
1376 orderedflags = REVIDX_FLAGS_ORDER
1380 orderedflags = REVIDX_FLAGS_ORDER
1377 if operation == 'write':
1381 if operation == 'write':
1378 orderedflags = reversed(orderedflags)
1382 orderedflags = reversed(orderedflags)
1379
1383
1380 for flag in orderedflags:
1384 for flag in orderedflags:
1381 # If a flagprocessor has been registered for a known flag, apply the
1385 # If a flagprocessor has been registered for a known flag, apply the
1382 # related operation transform and update result tuple.
1386 # related operation transform and update result tuple.
1383 if flag & flags:
1387 if flag & flags:
1384 vhash = True
1388 vhash = True
1385
1389
1386 if flag not in _flagprocessors:
1390 if flag not in _flagprocessors:
1387 message = _("missing processor for flag '%#x'") % (flag)
1391 message = _("missing processor for flag '%#x'") % (flag)
1388 raise RevlogError(message)
1392 raise RevlogError(message)
1389
1393
1390 processor = _flagprocessors[flag]
1394 processor = _flagprocessors[flag]
1391 if processor is not None:
1395 if processor is not None:
1392 readtransform, writetransform, rawtransform = processor
1396 readtransform, writetransform, rawtransform = processor
1393
1397
1394 if raw:
1398 if raw:
1395 vhash = rawtransform(self, text)
1399 vhash = rawtransform(self, text)
1396 elif operation == 'read':
1400 elif operation == 'read':
1397 text, vhash = readtransform(self, text)
1401 text, vhash = readtransform(self, text)
1398 else: # write operation
1402 else: # write operation
1399 text, vhash = writetransform(self, text)
1403 text, vhash = writetransform(self, text)
1400 validatehash = validatehash and vhash
1404 validatehash = validatehash and vhash
1401
1405
1402 return text, validatehash
1406 return text, validatehash
1403
1407
1404 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1408 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1405 """Check node hash integrity.
1409 """Check node hash integrity.
1406
1410
1407 Available as a function so that subclasses can extend hash mismatch
1411 Available as a function so that subclasses can extend hash mismatch
1408 behaviors as needed.
1412 behaviors as needed.
1409 """
1413 """
1410 if p1 is None and p2 is None:
1414 if p1 is None and p2 is None:
1411 p1, p2 = self.parents(node)
1415 p1, p2 = self.parents(node)
1412 if node != self.hash(text, p1, p2):
1416 if node != self.hash(text, p1, p2):
1413 revornode = rev
1417 revornode = rev
1414 if revornode is None:
1418 if revornode is None:
1415 revornode = templatefilters.short(hex(node))
1419 revornode = templatefilters.short(hex(node))
1416 raise RevlogError(_("integrity check failed on %s:%s")
1420 raise RevlogError(_("integrity check failed on %s:%s")
1417 % (self.indexfile, revornode))
1421 % (self.indexfile, revornode))
1418
1422
1419 def checkinlinesize(self, tr, fp=None):
1423 def checkinlinesize(self, tr, fp=None):
1420 """Check if the revlog is too big for inline and convert if so.
1424 """Check if the revlog is too big for inline and convert if so.
1421
1425
1422 This should be called after revisions are added to the revlog. If the
1426 This should be called after revisions are added to the revlog. If the
1423 revlog has grown too large to be an inline revlog, it will convert it
1427 revlog has grown too large to be an inline revlog, it will convert it
1424 to use multiple index and data files.
1428 to use multiple index and data files.
1425 """
1429 """
1426 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1430 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1427 return
1431 return
1428
1432
1429 trinfo = tr.find(self.indexfile)
1433 trinfo = tr.find(self.indexfile)
1430 if trinfo is None:
1434 if trinfo is None:
1431 raise RevlogError(_("%s not found in the transaction")
1435 raise RevlogError(_("%s not found in the transaction")
1432 % self.indexfile)
1436 % self.indexfile)
1433
1437
1434 trindex = trinfo[2]
1438 trindex = trinfo[2]
1435 if trindex is not None:
1439 if trindex is not None:
1436 dataoff = self.start(trindex)
1440 dataoff = self.start(trindex)
1437 else:
1441 else:
1438 # revlog was stripped at start of transaction, use all leftover data
1442 # revlog was stripped at start of transaction, use all leftover data
1439 trindex = len(self) - 1
1443 trindex = len(self) - 1
1440 dataoff = self.end(-2)
1444 dataoff = self.end(-2)
1441
1445
1442 tr.add(self.datafile, dataoff)
1446 tr.add(self.datafile, dataoff)
1443
1447
1444 if fp:
1448 if fp:
1445 fp.flush()
1449 fp.flush()
1446 fp.close()
1450 fp.close()
1447
1451
1448 df = self.opener(self.datafile, 'w')
1452 df = self.opener(self.datafile, 'w')
1449 try:
1453 try:
1450 for r in self:
1454 for r in self:
1451 df.write(self._getsegmentforrevs(r, r)[1])
1455 df.write(self._getsegmentforrevs(r, r)[1])
1452 finally:
1456 finally:
1453 df.close()
1457 df.close()
1454
1458
1455 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1459 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1456 checkambig=self._checkambig)
1460 checkambig=self._checkambig)
1457 self.version &= ~FLAG_INLINE_DATA
1461 self.version &= ~FLAG_INLINE_DATA
1458 self._inline = False
1462 self._inline = False
1459 for i in self:
1463 for i in self:
1460 e = self._io.packentry(self.index[i], self.node, self.version, i)
1464 e = self._io.packentry(self.index[i], self.node, self.version, i)
1461 fp.write(e)
1465 fp.write(e)
1462
1466
1463 # if we don't call close, the temp file will never replace the
1467 # if we don't call close, the temp file will never replace the
1464 # real index
1468 # real index
1465 fp.close()
1469 fp.close()
1466
1470
1467 tr.replace(self.indexfile, trindex * self._io.size)
1471 tr.replace(self.indexfile, trindex * self._io.size)
1468 self._chunkclear()
1472 self._chunkclear()
1469
1473
1470 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1474 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1471 node=None, flags=REVIDX_DEFAULT_FLAGS):
1475 node=None, flags=REVIDX_DEFAULT_FLAGS):
1472 """add a revision to the log
1476 """add a revision to the log
1473
1477
1474 text - the revision data to add
1478 text - the revision data to add
1475 transaction - the transaction object used for rollback
1479 transaction - the transaction object used for rollback
1476 link - the linkrev data to add
1480 link - the linkrev data to add
1477 p1, p2 - the parent nodeids of the revision
1481 p1, p2 - the parent nodeids of the revision
1478 cachedelta - an optional precomputed delta
1482 cachedelta - an optional precomputed delta
1479 node - nodeid of revision; typically node is not specified, and it is
1483 node - nodeid of revision; typically node is not specified, and it is
1480 computed by default as hash(text, p1, p2), however subclasses might
1484 computed by default as hash(text, p1, p2), however subclasses might
1481 use different hashing method (and override checkhash() in such case)
1485 use different hashing method (and override checkhash() in such case)
1482 flags - the known flags to set on the revision
1486 flags - the known flags to set on the revision
1483 """
1487 """
1484 if link == nullrev:
1488 if link == nullrev:
1485 raise RevlogError(_("attempted to add linkrev -1 to %s")
1489 raise RevlogError(_("attempted to add linkrev -1 to %s")
1486 % self.indexfile)
1490 % self.indexfile)
1487
1491
1488 if flags:
1492 if flags:
1489 node = node or self.hash(text, p1, p2)
1493 node = node or self.hash(text, p1, p2)
1490
1494
1491 rawtext, validatehash = self._processflags(text, flags, 'write')
1495 rawtext, validatehash = self._processflags(text, flags, 'write')
1492
1496
1493 # If the flag processor modifies the revision data, ignore any provided
1497 # If the flag processor modifies the revision data, ignore any provided
1494 # cachedelta.
1498 # cachedelta.
1495 if rawtext != text:
1499 if rawtext != text:
1496 cachedelta = None
1500 cachedelta = None
1497
1501
1498 if len(rawtext) > _maxentrysize:
1502 if len(rawtext) > _maxentrysize:
1499 raise RevlogError(
1503 raise RevlogError(
1500 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1504 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1501 % (self.indexfile, len(rawtext)))
1505 % (self.indexfile, len(rawtext)))
1502
1506
1503 node = node or self.hash(rawtext, p1, p2)
1507 node = node or self.hash(rawtext, p1, p2)
1504 if node in self.nodemap:
1508 if node in self.nodemap:
1505 return node
1509 return node
1506
1510
1507 if validatehash:
1511 if validatehash:
1508 self.checkhash(rawtext, node, p1=p1, p2=p2)
1512 self.checkhash(rawtext, node, p1=p1, p2=p2)
1509
1513
1510 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1514 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1511 flags, cachedelta=cachedelta)
1515 flags, cachedelta=cachedelta)
1512
1516
1513 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1517 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1514 cachedelta=None):
1518 cachedelta=None):
1515 """add a raw revision with known flags, node and parents
1519 """add a raw revision with known flags, node and parents
1516 useful when reusing a revision not stored in this revlog (ex: received
1520 useful when reusing a revision not stored in this revlog (ex: received
1517 over wire, or read from an external bundle).
1521 over wire, or read from an external bundle).
1518 """
1522 """
1519 dfh = None
1523 dfh = None
1520 if not self._inline:
1524 if not self._inline:
1521 dfh = self.opener(self.datafile, "a+")
1525 dfh = self.opener(self.datafile, "a+")
1522 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1526 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1523 try:
1527 try:
1524 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1528 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1525 flags, cachedelta, ifh, dfh)
1529 flags, cachedelta, ifh, dfh)
1526 finally:
1530 finally:
1527 if dfh:
1531 if dfh:
1528 dfh.close()
1532 dfh.close()
1529 ifh.close()
1533 ifh.close()
1530
1534
1531 def compress(self, data):
1535 def compress(self, data):
1532 """Generate a possibly-compressed representation of data."""
1536 """Generate a possibly-compressed representation of data."""
1533 if not data:
1537 if not data:
1534 return '', data
1538 return '', data
1535
1539
1536 compressed = self._compressor.compress(data)
1540 compressed = self._compressor.compress(data)
1537
1541
1538 if compressed:
1542 if compressed:
1539 # The revlog compressor added the header in the returned data.
1543 # The revlog compressor added the header in the returned data.
1540 return '', compressed
1544 return '', compressed
1541
1545
1542 if data[0:1] == '\0':
1546 if data[0:1] == '\0':
1543 return '', data
1547 return '', data
1544 return 'u', data
1548 return 'u', data
1545
1549
1546 def decompress(self, data):
1550 def decompress(self, data):
1547 """Decompress a revlog chunk.
1551 """Decompress a revlog chunk.
1548
1552
1549 The chunk is expected to begin with a header identifying the
1553 The chunk is expected to begin with a header identifying the
1550 format type so it can be routed to an appropriate decompressor.
1554 format type so it can be routed to an appropriate decompressor.
1551 """
1555 """
1552 if not data:
1556 if not data:
1553 return data
1557 return data
1554
1558
1555 # Revlogs are read much more frequently than they are written and many
1559 # Revlogs are read much more frequently than they are written and many
1556 # chunks only take microseconds to decompress, so performance is
1560 # chunks only take microseconds to decompress, so performance is
1557 # important here.
1561 # important here.
1558 #
1562 #
1559 # We can make a few assumptions about revlogs:
1563 # We can make a few assumptions about revlogs:
1560 #
1564 #
1561 # 1) the majority of chunks will be compressed (as opposed to inline
1565 # 1) the majority of chunks will be compressed (as opposed to inline
1562 # raw data).
1566 # raw data).
1563 # 2) decompressing *any* data will likely by at least 10x slower than
1567 # 2) decompressing *any* data will likely by at least 10x slower than
1564 # returning raw inline data.
1568 # returning raw inline data.
1565 # 3) we want to prioritize common and officially supported compression
1569 # 3) we want to prioritize common and officially supported compression
1566 # engines
1570 # engines
1567 #
1571 #
1568 # It follows that we want to optimize for "decompress compressed data
1572 # It follows that we want to optimize for "decompress compressed data
1569 # when encoded with common and officially supported compression engines"
1573 # when encoded with common and officially supported compression engines"
1570 # case over "raw data" and "data encoded by less common or non-official
1574 # case over "raw data" and "data encoded by less common or non-official
1571 # compression engines." That is why we have the inline lookup first
1575 # compression engines." That is why we have the inline lookup first
1572 # followed by the compengines lookup.
1576 # followed by the compengines lookup.
1573 #
1577 #
1574 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1578 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1575 # compressed chunks. And this matters for changelog and manifest reads.
1579 # compressed chunks. And this matters for changelog and manifest reads.
1576 t = data[0:1]
1580 t = data[0:1]
1577
1581
1578 if t == 'x':
1582 if t == 'x':
1579 try:
1583 try:
1580 return _zlibdecompress(data)
1584 return _zlibdecompress(data)
1581 except zlib.error as e:
1585 except zlib.error as e:
1582 raise RevlogError(_('revlog decompress error: %s') % str(e))
1586 raise RevlogError(_('revlog decompress error: %s') % str(e))
1583 # '\0' is more common than 'u' so it goes first.
1587 # '\0' is more common than 'u' so it goes first.
1584 elif t == '\0':
1588 elif t == '\0':
1585 return data
1589 return data
1586 elif t == 'u':
1590 elif t == 'u':
1587 return util.buffer(data, 1)
1591 return util.buffer(data, 1)
1588
1592
1589 try:
1593 try:
1590 compressor = self._decompressors[t]
1594 compressor = self._decompressors[t]
1591 except KeyError:
1595 except KeyError:
1592 try:
1596 try:
1593 engine = util.compengines.forrevlogheader(t)
1597 engine = util.compengines.forrevlogheader(t)
1594 compressor = engine.revlogcompressor()
1598 compressor = engine.revlogcompressor()
1595 self._decompressors[t] = compressor
1599 self._decompressors[t] = compressor
1596 except KeyError:
1600 except KeyError:
1597 raise RevlogError(_('unknown compression type %r') % t)
1601 raise RevlogError(_('unknown compression type %r') % t)
1598
1602
1599 return compressor.decompress(data)
1603 return compressor.decompress(data)
1600
1604
1601 def _isgooddelta(self, d, textlen):
1605 def _isgooddelta(self, d, textlen):
1602 """Returns True if the given delta is good. Good means that it is within
1606 """Returns True if the given delta is good. Good means that it is within
1603 the disk span, disk size, and chain length bounds that we know to be
1607 the disk span, disk size, and chain length bounds that we know to be
1604 performant."""
1608 performant."""
1605 if d is None:
1609 if d is None:
1606 return False
1610 return False
1607
1611
1608 # - 'dist' is the distance from the base revision -- bounding it limits
1612 # - 'dist' is the distance from the base revision -- bounding it limits
1609 # the amount of I/O we need to do.
1613 # the amount of I/O we need to do.
1610 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1614 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1611 # to apply -- bounding it limits the amount of CPU we consume.
1615 # to apply -- bounding it limits the amount of CPU we consume.
1612 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1616 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1613 if (dist > textlen * 4 or l > textlen or
1617 if (dist > textlen * 4 or l > textlen or
1614 compresseddeltalen > textlen * 2 or
1618 compresseddeltalen > textlen * 2 or
1615 (self._maxchainlen and chainlen > self._maxchainlen)):
1619 (self._maxchainlen and chainlen > self._maxchainlen)):
1616 return False
1620 return False
1617
1621
1618 return True
1622 return True
1619
1623
1620 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1624 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1621 cachedelta, ifh, dfh, alwayscache=False):
1625 cachedelta, ifh, dfh, alwayscache=False):
1622 """internal function to add revisions to the log
1626 """internal function to add revisions to the log
1623
1627
1624 see addrevision for argument descriptions.
1628 see addrevision for argument descriptions.
1625
1629
1626 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1630 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1627
1631
1628 invariants:
1632 invariants:
1629 - rawtext is optional (can be None); if not set, cachedelta must be set.
1633 - rawtext is optional (can be None); if not set, cachedelta must be set.
1630 if both are set, they must correspond to each other.
1634 if both are set, they must correspond to each other.
1631 """
1635 """
1632 btext = [rawtext]
1636 btext = [rawtext]
1633 def buildtext():
1637 def buildtext():
1634 if btext[0] is not None:
1638 if btext[0] is not None:
1635 return btext[0]
1639 return btext[0]
1636 baserev = cachedelta[0]
1640 baserev = cachedelta[0]
1637 delta = cachedelta[1]
1641 delta = cachedelta[1]
1638 # special case deltas which replace entire base; no need to decode
1642 # special case deltas which replace entire base; no need to decode
1639 # base revision. this neatly avoids censored bases, which throw when
1643 # base revision. this neatly avoids censored bases, which throw when
1640 # they're decoded.
1644 # they're decoded.
1641 hlen = struct.calcsize(">lll")
1645 hlen = struct.calcsize(">lll")
1642 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1646 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1643 len(delta) - hlen):
1647 len(delta) - hlen):
1644 btext[0] = delta[hlen:]
1648 btext[0] = delta[hlen:]
1645 else:
1649 else:
1646 if self._inline:
1650 if self._inline:
1647 fh = ifh
1651 fh = ifh
1648 else:
1652 else:
1649 fh = dfh
1653 fh = dfh
1650 basetext = self.revision(baserev, _df=fh, raw=True)
1654 basetext = self.revision(baserev, _df=fh, raw=True)
1651 btext[0] = mdiff.patch(basetext, delta)
1655 btext[0] = mdiff.patch(basetext, delta)
1652
1656
1653 try:
1657 try:
1654 res = self._processflags(btext[0], flags, 'read', raw=True)
1658 res = self._processflags(btext[0], flags, 'read', raw=True)
1655 btext[0], validatehash = res
1659 btext[0], validatehash = res
1656 if validatehash:
1660 if validatehash:
1657 self.checkhash(btext[0], node, p1=p1, p2=p2)
1661 self.checkhash(btext[0], node, p1=p1, p2=p2)
1658 if flags & REVIDX_ISCENSORED:
1662 if flags & REVIDX_ISCENSORED:
1659 raise RevlogError(_('node %s is not censored') % node)
1663 raise RevlogError(_('node %s is not censored') % node)
1660 except CensoredNodeError:
1664 except CensoredNodeError:
1661 # must pass the censored index flag to add censored revisions
1665 # must pass the censored index flag to add censored revisions
1662 if not flags & REVIDX_ISCENSORED:
1666 if not flags & REVIDX_ISCENSORED:
1663 raise
1667 raise
1664 return btext[0]
1668 return btext[0]
1665
1669
1666 def builddelta(rev):
1670 def builddelta(rev):
1667 # can we use the cached delta?
1671 # can we use the cached delta?
1668 if cachedelta and cachedelta[0] == rev:
1672 if cachedelta and cachedelta[0] == rev:
1669 delta = cachedelta[1]
1673 delta = cachedelta[1]
1670 else:
1674 else:
1671 t = buildtext()
1675 t = buildtext()
1672 if self.iscensored(rev):
1676 if self.iscensored(rev):
1673 # deltas based on a censored revision must replace the
1677 # deltas based on a censored revision must replace the
1674 # full content in one patch, so delta works everywhere
1678 # full content in one patch, so delta works everywhere
1675 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1679 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1676 delta = header + t
1680 delta = header + t
1677 else:
1681 else:
1678 if self._inline:
1682 if self._inline:
1679 fh = ifh
1683 fh = ifh
1680 else:
1684 else:
1681 fh = dfh
1685 fh = dfh
1682 ptext = self.revision(rev, _df=fh, raw=True)
1686 ptext = self.revision(rev, _df=fh, raw=True)
1683 delta = mdiff.textdiff(ptext, t)
1687 delta = mdiff.textdiff(ptext, t)
1684 header, data = self.compress(delta)
1688 header, data = self.compress(delta)
1685 deltalen = len(header) + len(data)
1689 deltalen = len(header) + len(data)
1686 chainbase = self.chainbase(rev)
1690 chainbase = self.chainbase(rev)
1687 dist = deltalen + offset - self.start(chainbase)
1691 dist = deltalen + offset - self.start(chainbase)
1688 if self._generaldelta:
1692 if self._generaldelta:
1689 base = rev
1693 base = rev
1690 else:
1694 else:
1691 base = chainbase
1695 base = chainbase
1692 chainlen, compresseddeltalen = self._chaininfo(rev)
1696 chainlen, compresseddeltalen = self._chaininfo(rev)
1693 chainlen += 1
1697 chainlen += 1
1694 compresseddeltalen += deltalen
1698 compresseddeltalen += deltalen
1695 return (dist, deltalen, (header, data), base,
1699 return (dist, deltalen, (header, data), base,
1696 chainbase, chainlen, compresseddeltalen)
1700 chainbase, chainlen, compresseddeltalen)
1697
1701
1698 curr = len(self)
1702 curr = len(self)
1699 prev = curr - 1
1703 prev = curr - 1
1700 offset = self.end(prev)
1704 offset = self.end(prev)
1701 delta = None
1705 delta = None
1702 p1r, p2r = self.rev(p1), self.rev(p2)
1706 p1r, p2r = self.rev(p1), self.rev(p2)
1703
1707
1704 # full versions are inserted when the needed deltas
1708 # full versions are inserted when the needed deltas
1705 # become comparable to the uncompressed text
1709 # become comparable to the uncompressed text
1706 if rawtext is None:
1710 if rawtext is None:
1707 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1711 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1708 cachedelta[1])
1712 cachedelta[1])
1709 else:
1713 else:
1710 textlen = len(rawtext)
1714 textlen = len(rawtext)
1711
1715
1712 # should we try to build a delta?
1716 # should we try to build a delta?
1713 if prev != nullrev and self.storedeltachains:
1717 if prev != nullrev and self.storedeltachains:
1714 tested = set()
1718 tested = set()
1715 # This condition is true most of the time when processing
1719 # This condition is true most of the time when processing
1716 # changegroup data into a generaldelta repo. The only time it
1720 # changegroup data into a generaldelta repo. The only time it
1717 # isn't true is if this is the first revision in a delta chain
1721 # isn't true is if this is the first revision in a delta chain
1718 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1722 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1719 if cachedelta and self._generaldelta and self._lazydeltabase:
1723 if cachedelta and self._generaldelta and self._lazydeltabase:
1720 # Assume what we received from the server is a good choice
1724 # Assume what we received from the server is a good choice
1721 # build delta will reuse the cache
1725 # build delta will reuse the cache
1722 candidatedelta = builddelta(cachedelta[0])
1726 candidatedelta = builddelta(cachedelta[0])
1723 tested.add(cachedelta[0])
1727 tested.add(cachedelta[0])
1724 if self._isgooddelta(candidatedelta, textlen):
1728 if self._isgooddelta(candidatedelta, textlen):
1725 delta = candidatedelta
1729 delta = candidatedelta
1726 if delta is None and self._generaldelta:
1730 if delta is None and self._generaldelta:
1727 # exclude already lazy tested base if any
1731 # exclude already lazy tested base if any
1728 parents = [p for p in (p1r, p2r)
1732 parents = [p for p in (p1r, p2r)
1729 if p != nullrev and p not in tested]
1733 if p != nullrev and p not in tested]
1730 if parents and not self._aggressivemergedeltas:
1734 if parents and not self._aggressivemergedeltas:
1731 # Pick whichever parent is closer to us (to minimize the
1735 # Pick whichever parent is closer to us (to minimize the
1732 # chance of having to build a fulltext).
1736 # chance of having to build a fulltext).
1733 parents = [max(parents)]
1737 parents = [max(parents)]
1734 tested.update(parents)
1738 tested.update(parents)
1735 pdeltas = []
1739 pdeltas = []
1736 for p in parents:
1740 for p in parents:
1737 pd = builddelta(p)
1741 pd = builddelta(p)
1738 if self._isgooddelta(pd, textlen):
1742 if self._isgooddelta(pd, textlen):
1739 pdeltas.append(pd)
1743 pdeltas.append(pd)
1740 if pdeltas:
1744 if pdeltas:
1741 delta = min(pdeltas, key=lambda x: x[1])
1745 delta = min(pdeltas, key=lambda x: x[1])
1742 if delta is None and prev not in tested:
1746 if delta is None and prev not in tested:
1743 # other approach failed try against prev to hopefully save us a
1747 # other approach failed try against prev to hopefully save us a
1744 # fulltext.
1748 # fulltext.
1745 candidatedelta = builddelta(prev)
1749 candidatedelta = builddelta(prev)
1746 if self._isgooddelta(candidatedelta, textlen):
1750 if self._isgooddelta(candidatedelta, textlen):
1747 delta = candidatedelta
1751 delta = candidatedelta
1748 if delta is not None:
1752 if delta is not None:
1749 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1753 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1750 else:
1754 else:
1751 rawtext = buildtext()
1755 rawtext = buildtext()
1752 data = self.compress(rawtext)
1756 data = self.compress(rawtext)
1753 l = len(data[1]) + len(data[0])
1757 l = len(data[1]) + len(data[0])
1754 base = chainbase = curr
1758 base = chainbase = curr
1755
1759
1756 e = (offset_type(offset, flags), l, textlen,
1760 e = (offset_type(offset, flags), l, textlen,
1757 base, link, p1r, p2r, node)
1761 base, link, p1r, p2r, node)
1758 self.index.insert(-1, e)
1762 self.index.insert(-1, e)
1759 self.nodemap[node] = curr
1763 self.nodemap[node] = curr
1760
1764
1761 entry = self._io.packentry(e, self.node, self.version, curr)
1765 entry = self._io.packentry(e, self.node, self.version, curr)
1762 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1766 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1763
1767
1764 if alwayscache and rawtext is None:
1768 if alwayscache and rawtext is None:
1765 rawtext = buildtext()
1769 rawtext = buildtext()
1766
1770
1767 if type(rawtext) == str: # only accept immutable objects
1771 if type(rawtext) == str: # only accept immutable objects
1768 self._cache = (node, curr, rawtext)
1772 self._cache = (node, curr, rawtext)
1769 self._chainbasecache[curr] = chainbase
1773 self._chainbasecache[curr] = chainbase
1770 return node
1774 return node
1771
1775
1772 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1776 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1773 # Files opened in a+ mode have inconsistent behavior on various
1777 # Files opened in a+ mode have inconsistent behavior on various
1774 # platforms. Windows requires that a file positioning call be made
1778 # platforms. Windows requires that a file positioning call be made
1775 # when the file handle transitions between reads and writes. See
1779 # when the file handle transitions between reads and writes. See
1776 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1780 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1777 # platforms, Python or the platform itself can be buggy. Some versions
1781 # platforms, Python or the platform itself can be buggy. Some versions
1778 # of Solaris have been observed to not append at the end of the file
1782 # of Solaris have been observed to not append at the end of the file
1779 # if the file was seeked to before the end. See issue4943 for more.
1783 # if the file was seeked to before the end. See issue4943 for more.
1780 #
1784 #
1781 # We work around this issue by inserting a seek() before writing.
1785 # We work around this issue by inserting a seek() before writing.
1782 # Note: This is likely not necessary on Python 3.
1786 # Note: This is likely not necessary on Python 3.
1783 ifh.seek(0, os.SEEK_END)
1787 ifh.seek(0, os.SEEK_END)
1784 if dfh:
1788 if dfh:
1785 dfh.seek(0, os.SEEK_END)
1789 dfh.seek(0, os.SEEK_END)
1786
1790
1787 curr = len(self) - 1
1791 curr = len(self) - 1
1788 if not self._inline:
1792 if not self._inline:
1789 transaction.add(self.datafile, offset)
1793 transaction.add(self.datafile, offset)
1790 transaction.add(self.indexfile, curr * len(entry))
1794 transaction.add(self.indexfile, curr * len(entry))
1791 if data[0]:
1795 if data[0]:
1792 dfh.write(data[0])
1796 dfh.write(data[0])
1793 dfh.write(data[1])
1797 dfh.write(data[1])
1794 ifh.write(entry)
1798 ifh.write(entry)
1795 else:
1799 else:
1796 offset += curr * self._io.size
1800 offset += curr * self._io.size
1797 transaction.add(self.indexfile, offset, curr)
1801 transaction.add(self.indexfile, offset, curr)
1798 ifh.write(entry)
1802 ifh.write(entry)
1799 ifh.write(data[0])
1803 ifh.write(data[0])
1800 ifh.write(data[1])
1804 ifh.write(data[1])
1801 self.checkinlinesize(transaction, ifh)
1805 self.checkinlinesize(transaction, ifh)
1802
1806
1803 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1807 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1804 """
1808 """
1805 add a delta group
1809 add a delta group
1806
1810
1807 given a set of deltas, add them to the revision log. the
1811 given a set of deltas, add them to the revision log. the
1808 first delta is against its parent, which should be in our
1812 first delta is against its parent, which should be in our
1809 log, the rest are against the previous delta.
1813 log, the rest are against the previous delta.
1810
1814
1811 If ``addrevisioncb`` is defined, it will be called with arguments of
1815 If ``addrevisioncb`` is defined, it will be called with arguments of
1812 this revlog and the node that was added.
1816 this revlog and the node that was added.
1813 """
1817 """
1814
1818
1815 # track the base of the current delta log
1819 # track the base of the current delta log
1816 content = []
1820 content = []
1817 node = None
1821 node = None
1818
1822
1819 r = len(self)
1823 r = len(self)
1820 end = 0
1824 end = 0
1821 if r:
1825 if r:
1822 end = self.end(r - 1)
1826 end = self.end(r - 1)
1823 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1827 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1824 isize = r * self._io.size
1828 isize = r * self._io.size
1825 if self._inline:
1829 if self._inline:
1826 transaction.add(self.indexfile, end + isize, r)
1830 transaction.add(self.indexfile, end + isize, r)
1827 dfh = None
1831 dfh = None
1828 else:
1832 else:
1829 transaction.add(self.indexfile, isize, r)
1833 transaction.add(self.indexfile, isize, r)
1830 transaction.add(self.datafile, end)
1834 transaction.add(self.datafile, end)
1831 dfh = self.opener(self.datafile, "a+")
1835 dfh = self.opener(self.datafile, "a+")
1832 def flush():
1836 def flush():
1833 if dfh:
1837 if dfh:
1834 dfh.flush()
1838 dfh.flush()
1835 ifh.flush()
1839 ifh.flush()
1836 try:
1840 try:
1837 # loop through our set of deltas
1841 # loop through our set of deltas
1838 chain = None
1842 chain = None
1839 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1843 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1840 node = chunkdata['node']
1844 node = chunkdata['node']
1841 p1 = chunkdata['p1']
1845 p1 = chunkdata['p1']
1842 p2 = chunkdata['p2']
1846 p2 = chunkdata['p2']
1843 cs = chunkdata['cs']
1847 cs = chunkdata['cs']
1844 deltabase = chunkdata['deltabase']
1848 deltabase = chunkdata['deltabase']
1845 delta = chunkdata['delta']
1849 delta = chunkdata['delta']
1846 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1850 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1847
1851
1848 content.append(node)
1852 content.append(node)
1849
1853
1850 link = linkmapper(cs)
1854 link = linkmapper(cs)
1851 if node in self.nodemap:
1855 if node in self.nodemap:
1852 # this can happen if two branches make the same change
1856 # this can happen if two branches make the same change
1853 chain = node
1857 chain = node
1854 continue
1858 continue
1855
1859
1856 for p in (p1, p2):
1860 for p in (p1, p2):
1857 if p not in self.nodemap:
1861 if p not in self.nodemap:
1858 raise LookupError(p, self.indexfile,
1862 raise LookupError(p, self.indexfile,
1859 _('unknown parent'))
1863 _('unknown parent'))
1860
1864
1861 if deltabase not in self.nodemap:
1865 if deltabase not in self.nodemap:
1862 raise LookupError(deltabase, self.indexfile,
1866 raise LookupError(deltabase, self.indexfile,
1863 _('unknown delta base'))
1867 _('unknown delta base'))
1864
1868
1865 baserev = self.rev(deltabase)
1869 baserev = self.rev(deltabase)
1866
1870
1867 if baserev != nullrev and self.iscensored(baserev):
1871 if baserev != nullrev and self.iscensored(baserev):
1868 # if base is censored, delta must be full replacement in a
1872 # if base is censored, delta must be full replacement in a
1869 # single patch operation
1873 # single patch operation
1870 hlen = struct.calcsize(">lll")
1874 hlen = struct.calcsize(">lll")
1871 oldlen = self.rawsize(baserev)
1875 oldlen = self.rawsize(baserev)
1872 newlen = len(delta) - hlen
1876 newlen = len(delta) - hlen
1873 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1877 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1874 raise error.CensoredBaseError(self.indexfile,
1878 raise error.CensoredBaseError(self.indexfile,
1875 self.node(baserev))
1879 self.node(baserev))
1876
1880
1877 if not flags and self._peek_iscensored(baserev, delta, flush):
1881 if not flags and self._peek_iscensored(baserev, delta, flush):
1878 flags |= REVIDX_ISCENSORED
1882 flags |= REVIDX_ISCENSORED
1879
1883
1880 # We assume consumers of addrevisioncb will want to retrieve
1884 # We assume consumers of addrevisioncb will want to retrieve
1881 # the added revision, which will require a call to
1885 # the added revision, which will require a call to
1882 # revision(). revision() will fast path if there is a cache
1886 # revision(). revision() will fast path if there is a cache
1883 # hit. So, we tell _addrevision() to always cache in this case.
1887 # hit. So, we tell _addrevision() to always cache in this case.
1884 # We're only using addgroup() in the context of changegroup
1888 # We're only using addgroup() in the context of changegroup
1885 # generation so the revision data can always be handled as raw
1889 # generation so the revision data can always be handled as raw
1886 # by the flagprocessor.
1890 # by the flagprocessor.
1887 chain = self._addrevision(node, None, transaction, link,
1891 chain = self._addrevision(node, None, transaction, link,
1888 p1, p2, flags, (baserev, delta),
1892 p1, p2, flags, (baserev, delta),
1889 ifh, dfh,
1893 ifh, dfh,
1890 alwayscache=bool(addrevisioncb))
1894 alwayscache=bool(addrevisioncb))
1891
1895
1892 if addrevisioncb:
1896 if addrevisioncb:
1893 addrevisioncb(self, chain)
1897 addrevisioncb(self, chain)
1894
1898
1895 if not dfh and not self._inline:
1899 if not dfh and not self._inline:
1896 # addrevision switched from inline to conventional
1900 # addrevision switched from inline to conventional
1897 # reopen the index
1901 # reopen the index
1898 ifh.close()
1902 ifh.close()
1899 dfh = self.opener(self.datafile, "a+")
1903 dfh = self.opener(self.datafile, "a+")
1900 ifh = self.opener(self.indexfile, "a+",
1904 ifh = self.opener(self.indexfile, "a+",
1901 checkambig=self._checkambig)
1905 checkambig=self._checkambig)
1902 finally:
1906 finally:
1903 if dfh:
1907 if dfh:
1904 dfh.close()
1908 dfh.close()
1905 ifh.close()
1909 ifh.close()
1906
1910
1907 return content
1911 return content
1908
1912
1909 def iscensored(self, rev):
1913 def iscensored(self, rev):
1910 """Check if a file revision is censored."""
1914 """Check if a file revision is censored."""
1911 return False
1915 return False
1912
1916
1913 def _peek_iscensored(self, baserev, delta, flush):
1917 def _peek_iscensored(self, baserev, delta, flush):
1914 """Quickly check if a delta produces a censored revision."""
1918 """Quickly check if a delta produces a censored revision."""
1915 return False
1919 return False
1916
1920
1917 def getstrippoint(self, minlink):
1921 def getstrippoint(self, minlink):
1918 """find the minimum rev that must be stripped to strip the linkrev
1922 """find the minimum rev that must be stripped to strip the linkrev
1919
1923
1920 Returns a tuple containing the minimum rev and a set of all revs that
1924 Returns a tuple containing the minimum rev and a set of all revs that
1921 have linkrevs that will be broken by this strip.
1925 have linkrevs that will be broken by this strip.
1922 """
1926 """
1923 brokenrevs = set()
1927 brokenrevs = set()
1924 strippoint = len(self)
1928 strippoint = len(self)
1925
1929
1926 heads = {}
1930 heads = {}
1927 futurelargelinkrevs = set()
1931 futurelargelinkrevs = set()
1928 for head in self.headrevs():
1932 for head in self.headrevs():
1929 headlinkrev = self.linkrev(head)
1933 headlinkrev = self.linkrev(head)
1930 heads[head] = headlinkrev
1934 heads[head] = headlinkrev
1931 if headlinkrev >= minlink:
1935 if headlinkrev >= minlink:
1932 futurelargelinkrevs.add(headlinkrev)
1936 futurelargelinkrevs.add(headlinkrev)
1933
1937
1934 # This algorithm involves walking down the rev graph, starting at the
1938 # This algorithm involves walking down the rev graph, starting at the
1935 # heads. Since the revs are topologically sorted according to linkrev,
1939 # heads. Since the revs are topologically sorted according to linkrev,
1936 # once all head linkrevs are below the minlink, we know there are
1940 # once all head linkrevs are below the minlink, we know there are
1937 # no more revs that could have a linkrev greater than minlink.
1941 # no more revs that could have a linkrev greater than minlink.
1938 # So we can stop walking.
1942 # So we can stop walking.
1939 while futurelargelinkrevs:
1943 while futurelargelinkrevs:
1940 strippoint -= 1
1944 strippoint -= 1
1941 linkrev = heads.pop(strippoint)
1945 linkrev = heads.pop(strippoint)
1942
1946
1943 if linkrev < minlink:
1947 if linkrev < minlink:
1944 brokenrevs.add(strippoint)
1948 brokenrevs.add(strippoint)
1945 else:
1949 else:
1946 futurelargelinkrevs.remove(linkrev)
1950 futurelargelinkrevs.remove(linkrev)
1947
1951
1948 for p in self.parentrevs(strippoint):
1952 for p in self.parentrevs(strippoint):
1949 if p != nullrev:
1953 if p != nullrev:
1950 plinkrev = self.linkrev(p)
1954 plinkrev = self.linkrev(p)
1951 heads[p] = plinkrev
1955 heads[p] = plinkrev
1952 if plinkrev >= minlink:
1956 if plinkrev >= minlink:
1953 futurelargelinkrevs.add(plinkrev)
1957 futurelargelinkrevs.add(plinkrev)
1954
1958
1955 return strippoint, brokenrevs
1959 return strippoint, brokenrevs
1956
1960
1957 def strip(self, minlink, transaction):
1961 def strip(self, minlink, transaction):
1958 """truncate the revlog on the first revision with a linkrev >= minlink
1962 """truncate the revlog on the first revision with a linkrev >= minlink
1959
1963
1960 This function is called when we're stripping revision minlink and
1964 This function is called when we're stripping revision minlink and
1961 its descendants from the repository.
1965 its descendants from the repository.
1962
1966
1963 We have to remove all revisions with linkrev >= minlink, because
1967 We have to remove all revisions with linkrev >= minlink, because
1964 the equivalent changelog revisions will be renumbered after the
1968 the equivalent changelog revisions will be renumbered after the
1965 strip.
1969 strip.
1966
1970
1967 So we truncate the revlog on the first of these revisions, and
1971 So we truncate the revlog on the first of these revisions, and
1968 trust that the caller has saved the revisions that shouldn't be
1972 trust that the caller has saved the revisions that shouldn't be
1969 removed and that it'll re-add them after this truncation.
1973 removed and that it'll re-add them after this truncation.
1970 """
1974 """
1971 if len(self) == 0:
1975 if len(self) == 0:
1972 return
1976 return
1973
1977
1974 rev, _ = self.getstrippoint(minlink)
1978 rev, _ = self.getstrippoint(minlink)
1975 if rev == len(self):
1979 if rev == len(self):
1976 return
1980 return
1977
1981
1978 # first truncate the files on disk
1982 # first truncate the files on disk
1979 end = self.start(rev)
1983 end = self.start(rev)
1980 if not self._inline:
1984 if not self._inline:
1981 transaction.add(self.datafile, end)
1985 transaction.add(self.datafile, end)
1982 end = rev * self._io.size
1986 end = rev * self._io.size
1983 else:
1987 else:
1984 end += rev * self._io.size
1988 end += rev * self._io.size
1985
1989
1986 transaction.add(self.indexfile, end)
1990 transaction.add(self.indexfile, end)
1987
1991
1988 # then reset internal state in memory to forget those revisions
1992 # then reset internal state in memory to forget those revisions
1989 self._cache = None
1993 self._cache = None
1990 self._chaininfocache = {}
1994 self._chaininfocache = {}
1991 self._chunkclear()
1995 self._chunkclear()
1992 for x in xrange(rev, len(self)):
1996 for x in xrange(rev, len(self)):
1993 del self.nodemap[self.node(x)]
1997 del self.nodemap[self.node(x)]
1994
1998
1995 del self.index[rev:-1]
1999 del self.index[rev:-1]
1996
2000
1997 def checksize(self):
2001 def checksize(self):
1998 expected = 0
2002 expected = 0
1999 if len(self):
2003 if len(self):
2000 expected = max(0, self.end(len(self) - 1))
2004 expected = max(0, self.end(len(self) - 1))
2001
2005
2002 try:
2006 try:
2003 f = self.opener(self.datafile)
2007 f = self.opener(self.datafile)
2004 f.seek(0, 2)
2008 f.seek(0, 2)
2005 actual = f.tell()
2009 actual = f.tell()
2006 f.close()
2010 f.close()
2007 dd = actual - expected
2011 dd = actual - expected
2008 except IOError as inst:
2012 except IOError as inst:
2009 if inst.errno != errno.ENOENT:
2013 if inst.errno != errno.ENOENT:
2010 raise
2014 raise
2011 dd = 0
2015 dd = 0
2012
2016
2013 try:
2017 try:
2014 f = self.opener(self.indexfile)
2018 f = self.opener(self.indexfile)
2015 f.seek(0, 2)
2019 f.seek(0, 2)
2016 actual = f.tell()
2020 actual = f.tell()
2017 f.close()
2021 f.close()
2018 s = self._io.size
2022 s = self._io.size
2019 i = max(0, actual // s)
2023 i = max(0, actual // s)
2020 di = actual - (i * s)
2024 di = actual - (i * s)
2021 if self._inline:
2025 if self._inline:
2022 databytes = 0
2026 databytes = 0
2023 for r in self:
2027 for r in self:
2024 databytes += max(0, self.length(r))
2028 databytes += max(0, self.length(r))
2025 dd = 0
2029 dd = 0
2026 di = actual - len(self) * s - databytes
2030 di = actual - len(self) * s - databytes
2027 except IOError as inst:
2031 except IOError as inst:
2028 if inst.errno != errno.ENOENT:
2032 if inst.errno != errno.ENOENT:
2029 raise
2033 raise
2030 di = 0
2034 di = 0
2031
2035
2032 return (dd, di)
2036 return (dd, di)
2033
2037
2034 def files(self):
2038 def files(self):
2035 res = [self.indexfile]
2039 res = [self.indexfile]
2036 if not self._inline:
2040 if not self._inline:
2037 res.append(self.datafile)
2041 res.append(self.datafile)
2038 return res
2042 return res
2039
2043
2040 DELTAREUSEALWAYS = 'always'
2044 DELTAREUSEALWAYS = 'always'
2041 DELTAREUSESAMEREVS = 'samerevs'
2045 DELTAREUSESAMEREVS = 'samerevs'
2042 DELTAREUSENEVER = 'never'
2046 DELTAREUSENEVER = 'never'
2043
2047
2044 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2048 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2045
2049
2046 def clone(self, tr, destrevlog, addrevisioncb=None,
2050 def clone(self, tr, destrevlog, addrevisioncb=None,
2047 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2051 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2048 """Copy this revlog to another, possibly with format changes.
2052 """Copy this revlog to another, possibly with format changes.
2049
2053
2050 The destination revlog will contain the same revisions and nodes.
2054 The destination revlog will contain the same revisions and nodes.
2051 However, it may not be bit-for-bit identical due to e.g. delta encoding
2055 However, it may not be bit-for-bit identical due to e.g. delta encoding
2052 differences.
2056 differences.
2053
2057
2054 The ``deltareuse`` argument control how deltas from the existing revlog
2058 The ``deltareuse`` argument control how deltas from the existing revlog
2055 are preserved in the destination revlog. The argument can have the
2059 are preserved in the destination revlog. The argument can have the
2056 following values:
2060 following values:
2057
2061
2058 DELTAREUSEALWAYS
2062 DELTAREUSEALWAYS
2059 Deltas will always be reused (if possible), even if the destination
2063 Deltas will always be reused (if possible), even if the destination
2060 revlog would not select the same revisions for the delta. This is the
2064 revlog would not select the same revisions for the delta. This is the
2061 fastest mode of operation.
2065 fastest mode of operation.
2062 DELTAREUSESAMEREVS
2066 DELTAREUSESAMEREVS
2063 Deltas will be reused if the destination revlog would pick the same
2067 Deltas will be reused if the destination revlog would pick the same
2064 revisions for the delta. This mode strikes a balance between speed
2068 revisions for the delta. This mode strikes a balance between speed
2065 and optimization.
2069 and optimization.
2066 DELTAREUSENEVER
2070 DELTAREUSENEVER
2067 Deltas will never be reused. This is the slowest mode of execution.
2071 Deltas will never be reused. This is the slowest mode of execution.
2068 This mode can be used to recompute deltas (e.g. if the diff/delta
2072 This mode can be used to recompute deltas (e.g. if the diff/delta
2069 algorithm changes).
2073 algorithm changes).
2070
2074
2071 Delta computation can be slow, so the choice of delta reuse policy can
2075 Delta computation can be slow, so the choice of delta reuse policy can
2072 significantly affect run time.
2076 significantly affect run time.
2073
2077
2074 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2078 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2075 two extremes. Deltas will be reused if they are appropriate. But if the
2079 two extremes. Deltas will be reused if they are appropriate. But if the
2076 delta could choose a better revision, it will do so. This means if you
2080 delta could choose a better revision, it will do so. This means if you
2077 are converting a non-generaldelta revlog to a generaldelta revlog,
2081 are converting a non-generaldelta revlog to a generaldelta revlog,
2078 deltas will be recomputed if the delta's parent isn't a parent of the
2082 deltas will be recomputed if the delta's parent isn't a parent of the
2079 revision.
2083 revision.
2080
2084
2081 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2085 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2082 controls whether to compute deltas against both parents for merges.
2086 controls whether to compute deltas against both parents for merges.
2083 By default, the current default is used.
2087 By default, the current default is used.
2084 """
2088 """
2085 if deltareuse not in self.DELTAREUSEALL:
2089 if deltareuse not in self.DELTAREUSEALL:
2086 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2090 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2087
2091
2088 if len(destrevlog):
2092 if len(destrevlog):
2089 raise ValueError(_('destination revlog is not empty'))
2093 raise ValueError(_('destination revlog is not empty'))
2090
2094
2091 if getattr(self, 'filteredrevs', None):
2095 if getattr(self, 'filteredrevs', None):
2092 raise ValueError(_('source revlog has filtered revisions'))
2096 raise ValueError(_('source revlog has filtered revisions'))
2093 if getattr(destrevlog, 'filteredrevs', None):
2097 if getattr(destrevlog, 'filteredrevs', None):
2094 raise ValueError(_('destination revlog has filtered revisions'))
2098 raise ValueError(_('destination revlog has filtered revisions'))
2095
2099
2096 # lazydeltabase controls whether to reuse a cached delta, if possible.
2100 # lazydeltabase controls whether to reuse a cached delta, if possible.
2097 oldlazydeltabase = destrevlog._lazydeltabase
2101 oldlazydeltabase = destrevlog._lazydeltabase
2098 oldamd = destrevlog._aggressivemergedeltas
2102 oldamd = destrevlog._aggressivemergedeltas
2099
2103
2100 try:
2104 try:
2101 if deltareuse == self.DELTAREUSEALWAYS:
2105 if deltareuse == self.DELTAREUSEALWAYS:
2102 destrevlog._lazydeltabase = True
2106 destrevlog._lazydeltabase = True
2103 elif deltareuse == self.DELTAREUSESAMEREVS:
2107 elif deltareuse == self.DELTAREUSESAMEREVS:
2104 destrevlog._lazydeltabase = False
2108 destrevlog._lazydeltabase = False
2105
2109
2106 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2110 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2107
2111
2108 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2112 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2109 self.DELTAREUSESAMEREVS)
2113 self.DELTAREUSESAMEREVS)
2110
2114
2111 index = self.index
2115 index = self.index
2112 for rev in self:
2116 for rev in self:
2113 entry = index[rev]
2117 entry = index[rev]
2114
2118
2115 # Some classes override linkrev to take filtered revs into
2119 # Some classes override linkrev to take filtered revs into
2116 # account. Use raw entry from index.
2120 # account. Use raw entry from index.
2117 flags = entry[0] & 0xffff
2121 flags = entry[0] & 0xffff
2118 linkrev = entry[4]
2122 linkrev = entry[4]
2119 p1 = index[entry[5]][7]
2123 p1 = index[entry[5]][7]
2120 p2 = index[entry[6]][7]
2124 p2 = index[entry[6]][7]
2121 node = entry[7]
2125 node = entry[7]
2122
2126
2123 # (Possibly) reuse the delta from the revlog if allowed and
2127 # (Possibly) reuse the delta from the revlog if allowed and
2124 # the revlog chunk is a delta.
2128 # the revlog chunk is a delta.
2125 cachedelta = None
2129 cachedelta = None
2126 rawtext = None
2130 rawtext = None
2127 if populatecachedelta:
2131 if populatecachedelta:
2128 dp = self.deltaparent(rev)
2132 dp = self.deltaparent(rev)
2129 if dp != nullrev:
2133 if dp != nullrev:
2130 cachedelta = (dp, str(self._chunk(rev)))
2134 cachedelta = (dp, str(self._chunk(rev)))
2131
2135
2132 if not cachedelta:
2136 if not cachedelta:
2133 rawtext = self.revision(rev, raw=True)
2137 rawtext = self.revision(rev, raw=True)
2134
2138
2135 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2139 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2136 checkambig=False)
2140 checkambig=False)
2137 dfh = None
2141 dfh = None
2138 if not destrevlog._inline:
2142 if not destrevlog._inline:
2139 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2143 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2140 try:
2144 try:
2141 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2145 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2142 flags, cachedelta, ifh, dfh)
2146 flags, cachedelta, ifh, dfh)
2143 finally:
2147 finally:
2144 if dfh:
2148 if dfh:
2145 dfh.close()
2149 dfh.close()
2146 ifh.close()
2150 ifh.close()
2147
2151
2148 if addrevisioncb:
2152 if addrevisioncb:
2149 addrevisioncb(self, rev, node)
2153 addrevisioncb(self, rev, node)
2150 finally:
2154 finally:
2151 destrevlog._lazydeltabase = oldlazydeltabase
2155 destrevlog._lazydeltabase = oldlazydeltabase
2152 destrevlog._aggressivemergedeltas = oldamd
2156 destrevlog._aggressivemergedeltas = oldamd
@@ -1,72 +1,72 b''
1 $ hg init t
1 $ hg init t
2 $ cd t
2 $ cd t
3 $ echo a > a
3 $ echo a > a
4 $ hg add a
4 $ hg add a
5 $ hg commit -m test
5 $ hg commit -m test
6 $ rm .hg/requires
6 $ rm .hg/requires
7 $ hg tip
7 $ hg tip
8 abort: index 00changelog.i unknown format 2!
8 abort: unknown version (2) in revlog 00changelog.i!
9 [255]
9 [255]
10 $ echo indoor-pool > .hg/requires
10 $ echo indoor-pool > .hg/requires
11 $ hg tip
11 $ hg tip
12 abort: repository requires features unknown to this Mercurial: indoor-pool!
12 abort: repository requires features unknown to this Mercurial: indoor-pool!
13 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
13 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
14 [255]
14 [255]
15 $ echo outdoor-pool >> .hg/requires
15 $ echo outdoor-pool >> .hg/requires
16 $ hg tip
16 $ hg tip
17 abort: repository requires features unknown to this Mercurial: indoor-pool outdoor-pool!
17 abort: repository requires features unknown to this Mercurial: indoor-pool outdoor-pool!
18 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
18 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
19 [255]
19 [255]
20 $ cd ..
20 $ cd ..
21
21
22 Test checking between features supported locally and ones required in
22 Test checking between features supported locally and ones required in
23 another repository of push/pull/clone on localhost:
23 another repository of push/pull/clone on localhost:
24
24
25 $ mkdir supported-locally
25 $ mkdir supported-locally
26 $ cd supported-locally
26 $ cd supported-locally
27
27
28 $ hg init supported
28 $ hg init supported
29 $ echo a > supported/a
29 $ echo a > supported/a
30 $ hg -R supported commit -Am '#0 at supported'
30 $ hg -R supported commit -Am '#0 at supported'
31 adding a
31 adding a
32
32
33 $ echo 'featuresetup-test' >> supported/.hg/requires
33 $ echo 'featuresetup-test' >> supported/.hg/requires
34 $ cat > $TESTTMP/supported-locally/supportlocally.py <<EOF
34 $ cat > $TESTTMP/supported-locally/supportlocally.py <<EOF
35 > from mercurial import localrepo, extensions
35 > from mercurial import localrepo, extensions
36 > def featuresetup(ui, supported):
36 > def featuresetup(ui, supported):
37 > for name, module in extensions.extensions(ui):
37 > for name, module in extensions.extensions(ui):
38 > if __name__ == module.__name__:
38 > if __name__ == module.__name__:
39 > # support specific feature locally
39 > # support specific feature locally
40 > supported |= {'featuresetup-test'}
40 > supported |= {'featuresetup-test'}
41 > return
41 > return
42 > def uisetup(ui):
42 > def uisetup(ui):
43 > localrepo.localrepository.featuresetupfuncs.add(featuresetup)
43 > localrepo.localrepository.featuresetupfuncs.add(featuresetup)
44 > EOF
44 > EOF
45 $ cat > supported/.hg/hgrc <<EOF
45 $ cat > supported/.hg/hgrc <<EOF
46 > [extensions]
46 > [extensions]
47 > # enable extension locally
47 > # enable extension locally
48 > supportlocally = $TESTTMP/supported-locally/supportlocally.py
48 > supportlocally = $TESTTMP/supported-locally/supportlocally.py
49 > EOF
49 > EOF
50 $ hg -R supported status
50 $ hg -R supported status
51
51
52 $ hg init push-dst
52 $ hg init push-dst
53 $ hg -R supported push push-dst
53 $ hg -R supported push push-dst
54 pushing to push-dst
54 pushing to push-dst
55 abort: required features are not supported in the destination: featuresetup-test
55 abort: required features are not supported in the destination: featuresetup-test
56 [255]
56 [255]
57
57
58 $ hg init pull-src
58 $ hg init pull-src
59 $ hg -R pull-src pull supported
59 $ hg -R pull-src pull supported
60 pulling from supported
60 pulling from supported
61 abort: required features are not supported in the destination: featuresetup-test
61 abort: required features are not supported in the destination: featuresetup-test
62 [255]
62 [255]
63
63
64 $ hg clone supported clone-dst
64 $ hg clone supported clone-dst
65 abort: repository requires features unknown to this Mercurial: featuresetup-test!
65 abort: repository requires features unknown to this Mercurial: featuresetup-test!
66 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
66 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
67 [255]
67 [255]
68 $ hg clone --pull supported clone-dst
68 $ hg clone --pull supported clone-dst
69 abort: required features are not supported in the destination: featuresetup-test
69 abort: required features are not supported in the destination: featuresetup-test
70 [255]
70 [255]
71
71
72 $ cd ..
72 $ cd ..
@@ -1,47 +1,47 b''
1 $ hg init empty-repo
1 $ hg init empty-repo
2 $ cd empty-repo
2 $ cd empty-repo
3
3
4 Flags on revlog version 0 are rejected
4 Flags on revlog version 0 are rejected
5
5
6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
7 ... fh.write('\x00\x01\x00\x00')
7 ... fh.write('\x00\x01\x00\x00')
8
8
9 $ hg log
9 $ hg log
10 abort: index 00changelog.i unknown flags 0x01 for format v0!
10 abort: unknown flags (0x01) in version 0 revlog 00changelog.i!
11 [255]
11 [255]
12
12
13 Unknown flags on revlog version 1 are rejected
13 Unknown flags on revlog version 1 are rejected
14
14
15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
16 ... fh.write('\x00\x04\x00\x01')
16 ... fh.write('\x00\x04\x00\x01')
17
17
18 $ hg log
18 $ hg log
19 abort: index 00changelog.i unknown flags 0x04 for revlogng!
19 abort: unknown flags (0x04) in version 1 revlog 00changelog.i!
20 [255]
20 [255]
21
21
22 Unknown version is rejected
22 Unknown version is rejected
23
23
24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
25 ... fh.write('\x00\x00\x00\x02')
25 ... fh.write('\x00\x00\x00\x02')
26
26
27 $ hg log
27 $ hg log
28 abort: index 00changelog.i unknown format 2!
28 abort: unknown version (2) in revlog 00changelog.i!
29 [255]
29 [255]
30
30
31 $ cd ..
31 $ cd ..
32
32
33 Test for CVE-2016-3630
33 Test for CVE-2016-3630
34
34
35 $ hg init
35 $ hg init
36
36
37 >>> open("a.i", "w").write(
37 >>> open("a.i", "w").write(
38 ... """eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
38 ... """eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
39 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA=="""
39 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA=="""
40 ... .decode("base64").decode("zlib"))
40 ... .decode("base64").decode("zlib"))
41
41
42 $ hg debugindex a.i
42 $ hg debugindex a.i
43 rev offset length delta linkrev nodeid p1 p2
43 rev offset length delta linkrev nodeid p1 p2
44 0 0 19 -1 2 99e0332bd498 000000000000 000000000000
44 0 0 19 -1 2 99e0332bd498 000000000000 000000000000
45 1 19 12 0 3 6674f57a23d8 99e0332bd498 000000000000
45 1 19 12 0 3 6674f57a23d8 99e0332bd498 000000000000
46 $ hg debugdata a.i 1 2>&1 | egrep 'Error:.*decoded'
46 $ hg debugdata a.i 1 2>&1 | egrep 'Error:.*decoded'
47 (mercurial\.\w+\.mpatch\.)?mpatchError: patch cannot be decoded (re)
47 (mercurial\.\w+\.mpatch\.)?mpatchError: patch cannot be decoded (re)
General Comments 0
You need to be logged in to leave comments. Login now