##// END OF EJS Templates
revlog: define ellipsis flag processors in core...
Gregory Szorc -
r39803:db088e13 default
parent child Browse files
Show More
@@ -1,29 +1,19 b''
1 # narrowrevlog.py - revlog storing irrelevant nodes as "ellipsis" nodes
1 # narrowrevlog.py - revlog storing irrelevant nodes as "ellipsis" nodes
2 #
2 #
3 # Copyright 2017 Google, Inc.
3 # Copyright 2017 Google, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from mercurial import (
10 from mercurial import (
11 revlog,
11 revlog,
12 )
12 )
13
13
14 def readtransform(self, text):
14 revlog.addflagprocessor(revlog.REVIDX_ELLIPSIS, revlog.ellipsisprocessor)
15 return text, False
16
17 def writetransform(self, text):
18 return text, False
19
20 def rawtransform(self, text):
21 return False
22
23 revlog.addflagprocessor(revlog.REVIDX_ELLIPSIS,
24 (readtransform, writetransform, rawtransform))
25
15
26 def setup():
16 def setup():
27 # We just wanted to add the flag processor, which is done at module
17 # We just wanted to add the flag processor, which is done at module
28 # load time.
18 # load time.
29 pass
19 pass
@@ -1,2475 +1,2491 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 RevlogError = error.RevlogError
107 RevlogError = error.RevlogError
108 LookupError = error.LookupError
108 LookupError = error.LookupError
109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
110 CensoredNodeError = error.CensoredNodeError
110 CensoredNodeError = error.CensoredNodeError
111 ProgrammingError = error.ProgrammingError
111 ProgrammingError = error.ProgrammingError
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
120 return text, False
121
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
124
125 def ellipsisrawprocessor(rl, text):
126 return False
127
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
132 )
133
118 _mdre = re.compile('\1\n')
134 _mdre = re.compile('\1\n')
119 def parsemeta(text):
135 def parsemeta(text):
120 """return (metadatadict, metadatasize)"""
136 """return (metadatadict, metadatasize)"""
121 # text can be buffer, so we can't use .startswith or .index
137 # text can be buffer, so we can't use .startswith or .index
122 if text[:2] != '\1\n':
138 if text[:2] != '\1\n':
123 return None, None
139 return None, None
124 s = _mdre.search(text, 2).start()
140 s = _mdre.search(text, 2).start()
125 mtext = text[2:s]
141 mtext = text[2:s]
126 meta = {}
142 meta = {}
127 for l in mtext.splitlines():
143 for l in mtext.splitlines():
128 k, v = l.split(": ", 1)
144 k, v = l.split(": ", 1)
129 meta[k] = v
145 meta[k] = v
130 return meta, (s + 2)
146 return meta, (s + 2)
131
147
132 def packmeta(meta, text):
148 def packmeta(meta, text):
133 keys = sorted(meta)
149 keys = sorted(meta)
134 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
150 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
135 return "\1\n%s\1\n%s" % (metatext, text)
151 return "\1\n%s\1\n%s" % (metatext, text)
136
152
137 def _censoredtext(text):
153 def _censoredtext(text):
138 m, offs = parsemeta(text)
154 m, offs = parsemeta(text)
139 return m and "censored" in m
155 return m and "censored" in m
140
156
141 def addflagprocessor(flag, processor):
157 def addflagprocessor(flag, processor):
142 """Register a flag processor on a revision data flag.
158 """Register a flag processor on a revision data flag.
143
159
144 Invariant:
160 Invariant:
145 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
161 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
146 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
162 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
147 - Only one flag processor can be registered on a specific flag.
163 - Only one flag processor can be registered on a specific flag.
148 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
164 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
149 following signatures:
165 following signatures:
150 - (read) f(self, rawtext) -> text, bool
166 - (read) f(self, rawtext) -> text, bool
151 - (write) f(self, text) -> rawtext, bool
167 - (write) f(self, text) -> rawtext, bool
152 - (raw) f(self, rawtext) -> bool
168 - (raw) f(self, rawtext) -> bool
153 "text" is presented to the user. "rawtext" is stored in revlog data, not
169 "text" is presented to the user. "rawtext" is stored in revlog data, not
154 directly visible to the user.
170 directly visible to the user.
155 The boolean returned by these transforms is used to determine whether
171 The boolean returned by these transforms is used to determine whether
156 the returned text can be used for hash integrity checking. For example,
172 the returned text can be used for hash integrity checking. For example,
157 if "write" returns False, then "text" is used to generate hash. If
173 if "write" returns False, then "text" is used to generate hash. If
158 "write" returns True, that basically means "rawtext" returned by "write"
174 "write" returns True, that basically means "rawtext" returned by "write"
159 should be used to generate hash. Usually, "write" and "read" return
175 should be used to generate hash. Usually, "write" and "read" return
160 different booleans. And "raw" returns a same boolean as "write".
176 different booleans. And "raw" returns a same boolean as "write".
161
177
162 Note: The 'raw' transform is used for changegroup generation and in some
178 Note: The 'raw' transform is used for changegroup generation and in some
163 debug commands. In this case the transform only indicates whether the
179 debug commands. In this case the transform only indicates whether the
164 contents can be used for hash integrity checks.
180 contents can be used for hash integrity checks.
165 """
181 """
166 if not flag & REVIDX_KNOWN_FLAGS:
182 if not flag & REVIDX_KNOWN_FLAGS:
167 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
183 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
168 raise ProgrammingError(msg)
184 raise ProgrammingError(msg)
169 if flag not in REVIDX_FLAGS_ORDER:
185 if flag not in REVIDX_FLAGS_ORDER:
170 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
186 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
171 raise ProgrammingError(msg)
187 raise ProgrammingError(msg)
172 if flag in _flagprocessors:
188 if flag in _flagprocessors:
173 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
189 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
174 raise error.Abort(msg)
190 raise error.Abort(msg)
175 _flagprocessors[flag] = processor
191 _flagprocessors[flag] = processor
176
192
177 def getoffset(q):
193 def getoffset(q):
178 return int(q >> 16)
194 return int(q >> 16)
179
195
180 def gettype(q):
196 def gettype(q):
181 return int(q & 0xFFFF)
197 return int(q & 0xFFFF)
182
198
183 def offset_type(offset, type):
199 def offset_type(offset, type):
184 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
200 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
185 raise ValueError('unknown revlog index flags')
201 raise ValueError('unknown revlog index flags')
186 return int(int(offset) << 16 | type)
202 return int(int(offset) << 16 | type)
187
203
188 _nullhash = hashlib.sha1(nullid)
204 _nullhash = hashlib.sha1(nullid)
189
205
190 def hash(text, p1, p2):
206 def hash(text, p1, p2):
191 """generate a hash from the given text and its parent hashes
207 """generate a hash from the given text and its parent hashes
192
208
193 This hash combines both the current file contents and its history
209 This hash combines both the current file contents and its history
194 in a manner that makes it easy to distinguish nodes with the same
210 in a manner that makes it easy to distinguish nodes with the same
195 content in the revision graph.
211 content in the revision graph.
196 """
212 """
197 # As of now, if one of the parent node is null, p2 is null
213 # As of now, if one of the parent node is null, p2 is null
198 if p2 == nullid:
214 if p2 == nullid:
199 # deep copy of a hash is faster than creating one
215 # deep copy of a hash is faster than creating one
200 s = _nullhash.copy()
216 s = _nullhash.copy()
201 s.update(p1)
217 s.update(p1)
202 else:
218 else:
203 # none of the parent nodes are nullid
219 # none of the parent nodes are nullid
204 if p1 < p2:
220 if p1 < p2:
205 a = p1
221 a = p1
206 b = p2
222 b = p2
207 else:
223 else:
208 a = p2
224 a = p2
209 b = p1
225 b = p1
210 s = hashlib.sha1(a)
226 s = hashlib.sha1(a)
211 s.update(b)
227 s.update(b)
212 s.update(text)
228 s.update(text)
213 return s.digest()
229 return s.digest()
214
230
215 @attr.s(slots=True, frozen=True)
231 @attr.s(slots=True, frozen=True)
216 class _revisioninfo(object):
232 class _revisioninfo(object):
217 """Information about a revision that allows building its fulltext
233 """Information about a revision that allows building its fulltext
218 node: expected hash of the revision
234 node: expected hash of the revision
219 p1, p2: parent revs of the revision
235 p1, p2: parent revs of the revision
220 btext: built text cache consisting of a one-element list
236 btext: built text cache consisting of a one-element list
221 cachedelta: (baserev, uncompressed_delta) or None
237 cachedelta: (baserev, uncompressed_delta) or None
222 flags: flags associated to the revision storage
238 flags: flags associated to the revision storage
223
239
224 One of btext[0] or cachedelta must be set.
240 One of btext[0] or cachedelta must be set.
225 """
241 """
226 node = attr.ib()
242 node = attr.ib()
227 p1 = attr.ib()
243 p1 = attr.ib()
228 p2 = attr.ib()
244 p2 = attr.ib()
229 btext = attr.ib()
245 btext = attr.ib()
230 textlen = attr.ib()
246 textlen = attr.ib()
231 cachedelta = attr.ib()
247 cachedelta = attr.ib()
232 flags = attr.ib()
248 flags = attr.ib()
233
249
234 @interfaceutil.implementer(repository.irevisiondelta)
250 @interfaceutil.implementer(repository.irevisiondelta)
235 @attr.s(slots=True, frozen=True)
251 @attr.s(slots=True, frozen=True)
236 class revlogrevisiondelta(object):
252 class revlogrevisiondelta(object):
237 node = attr.ib()
253 node = attr.ib()
238 p1node = attr.ib()
254 p1node = attr.ib()
239 p2node = attr.ib()
255 p2node = attr.ib()
240 basenode = attr.ib()
256 basenode = attr.ib()
241 linknode = attr.ib()
257 linknode = attr.ib()
242 flags = attr.ib()
258 flags = attr.ib()
243 baserevisionsize = attr.ib()
259 baserevisionsize = attr.ib()
244 revision = attr.ib()
260 revision = attr.ib()
245 delta = attr.ib()
261 delta = attr.ib()
246
262
247 # index v0:
263 # index v0:
248 # 4 bytes: offset
264 # 4 bytes: offset
249 # 4 bytes: compressed length
265 # 4 bytes: compressed length
250 # 4 bytes: base rev
266 # 4 bytes: base rev
251 # 4 bytes: link rev
267 # 4 bytes: link rev
252 # 20 bytes: parent 1 nodeid
268 # 20 bytes: parent 1 nodeid
253 # 20 bytes: parent 2 nodeid
269 # 20 bytes: parent 2 nodeid
254 # 20 bytes: nodeid
270 # 20 bytes: nodeid
255 indexformatv0 = struct.Struct(">4l20s20s20s")
271 indexformatv0 = struct.Struct(">4l20s20s20s")
256 indexformatv0_pack = indexformatv0.pack
272 indexformatv0_pack = indexformatv0.pack
257 indexformatv0_unpack = indexformatv0.unpack
273 indexformatv0_unpack = indexformatv0.unpack
258
274
259 class revlogoldindex(list):
275 class revlogoldindex(list):
260 def __getitem__(self, i):
276 def __getitem__(self, i):
261 if i == -1:
277 if i == -1:
262 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 return (0, 0, 0, -1, -1, -1, -1, nullid)
263 return list.__getitem__(self, i)
279 return list.__getitem__(self, i)
264
280
265 class revlogoldio(object):
281 class revlogoldio(object):
266 def __init__(self):
282 def __init__(self):
267 self.size = indexformatv0.size
283 self.size = indexformatv0.size
268
284
269 def parseindex(self, data, inline):
285 def parseindex(self, data, inline):
270 s = self.size
286 s = self.size
271 index = []
287 index = []
272 nodemap = {nullid: nullrev}
288 nodemap = {nullid: nullrev}
273 n = off = 0
289 n = off = 0
274 l = len(data)
290 l = len(data)
275 while off + s <= l:
291 while off + s <= l:
276 cur = data[off:off + s]
292 cur = data[off:off + s]
277 off += s
293 off += s
278 e = indexformatv0_unpack(cur)
294 e = indexformatv0_unpack(cur)
279 # transform to revlogv1 format
295 # transform to revlogv1 format
280 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
296 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
281 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
297 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
282 index.append(e2)
298 index.append(e2)
283 nodemap[e[6]] = n
299 nodemap[e[6]] = n
284 n += 1
300 n += 1
285
301
286 return revlogoldindex(index), nodemap, None
302 return revlogoldindex(index), nodemap, None
287
303
288 def packentry(self, entry, node, version, rev):
304 def packentry(self, entry, node, version, rev):
289 if gettype(entry[0]):
305 if gettype(entry[0]):
290 raise RevlogError(_('index entry flags need revlog version 1'))
306 raise RevlogError(_('index entry flags need revlog version 1'))
291 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
307 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
292 node(entry[5]), node(entry[6]), entry[7])
308 node(entry[5]), node(entry[6]), entry[7])
293 return indexformatv0_pack(*e2)
309 return indexformatv0_pack(*e2)
294
310
295 # index ng:
311 # index ng:
296 # 6 bytes: offset
312 # 6 bytes: offset
297 # 2 bytes: flags
313 # 2 bytes: flags
298 # 4 bytes: compressed length
314 # 4 bytes: compressed length
299 # 4 bytes: uncompressed length
315 # 4 bytes: uncompressed length
300 # 4 bytes: base rev
316 # 4 bytes: base rev
301 # 4 bytes: link rev
317 # 4 bytes: link rev
302 # 4 bytes: parent 1 rev
318 # 4 bytes: parent 1 rev
303 # 4 bytes: parent 2 rev
319 # 4 bytes: parent 2 rev
304 # 32 bytes: nodeid
320 # 32 bytes: nodeid
305 indexformatng = struct.Struct(">Qiiiiii20s12x")
321 indexformatng = struct.Struct(">Qiiiiii20s12x")
306 indexformatng_pack = indexformatng.pack
322 indexformatng_pack = indexformatng.pack
307 versionformat = struct.Struct(">I")
323 versionformat = struct.Struct(">I")
308 versionformat_pack = versionformat.pack
324 versionformat_pack = versionformat.pack
309 versionformat_unpack = versionformat.unpack
325 versionformat_unpack = versionformat.unpack
310
326
311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
327 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
312 # signed integer)
328 # signed integer)
313 _maxentrysize = 0x7fffffff
329 _maxentrysize = 0x7fffffff
314
330
315 class revlogio(object):
331 class revlogio(object):
316 def __init__(self):
332 def __init__(self):
317 self.size = indexformatng.size
333 self.size = indexformatng.size
318
334
319 def parseindex(self, data, inline):
335 def parseindex(self, data, inline):
320 # call the C implementation to parse the index data
336 # call the C implementation to parse the index data
321 index, cache = parsers.parse_index2(data, inline)
337 index, cache = parsers.parse_index2(data, inline)
322 return index, getattr(index, 'nodemap', None), cache
338 return index, getattr(index, 'nodemap', None), cache
323
339
324 def packentry(self, entry, node, version, rev):
340 def packentry(self, entry, node, version, rev):
325 p = indexformatng_pack(*entry)
341 p = indexformatng_pack(*entry)
326 if rev == 0:
342 if rev == 0:
327 p = versionformat_pack(version) + p[4:]
343 p = versionformat_pack(version) + p[4:]
328 return p
344 return p
329
345
330 class revlog(object):
346 class revlog(object):
331 """
347 """
332 the underlying revision storage object
348 the underlying revision storage object
333
349
334 A revlog consists of two parts, an index and the revision data.
350 A revlog consists of two parts, an index and the revision data.
335
351
336 The index is a file with a fixed record size containing
352 The index is a file with a fixed record size containing
337 information on each revision, including its nodeid (hash), the
353 information on each revision, including its nodeid (hash), the
338 nodeids of its parents, the position and offset of its data within
354 nodeids of its parents, the position and offset of its data within
339 the data file, and the revision it's based on. Finally, each entry
355 the data file, and the revision it's based on. Finally, each entry
340 contains a linkrev entry that can serve as a pointer to external
356 contains a linkrev entry that can serve as a pointer to external
341 data.
357 data.
342
358
343 The revision data itself is a linear collection of data chunks.
359 The revision data itself is a linear collection of data chunks.
344 Each chunk represents a revision and is usually represented as a
360 Each chunk represents a revision and is usually represented as a
345 delta against the previous chunk. To bound lookup time, runs of
361 delta against the previous chunk. To bound lookup time, runs of
346 deltas are limited to about 2 times the length of the original
362 deltas are limited to about 2 times the length of the original
347 version data. This makes retrieval of a version proportional to
363 version data. This makes retrieval of a version proportional to
348 its size, or O(1) relative to the number of revisions.
364 its size, or O(1) relative to the number of revisions.
349
365
350 Both pieces of the revlog are written to in an append-only
366 Both pieces of the revlog are written to in an append-only
351 fashion, which means we never need to rewrite a file to insert or
367 fashion, which means we never need to rewrite a file to insert or
352 remove data, and can use some simple techniques to avoid the need
368 remove data, and can use some simple techniques to avoid the need
353 for locking while reading.
369 for locking while reading.
354
370
355 If checkambig, indexfile is opened with checkambig=True at
371 If checkambig, indexfile is opened with checkambig=True at
356 writing, to avoid file stat ambiguity.
372 writing, to avoid file stat ambiguity.
357
373
358 If mmaplargeindex is True, and an mmapindexthreshold is set, the
374 If mmaplargeindex is True, and an mmapindexthreshold is set, the
359 index will be mmapped rather than read if it is larger than the
375 index will be mmapped rather than read if it is larger than the
360 configured threshold.
376 configured threshold.
361
377
362 If censorable is True, the revlog can have censored revisions.
378 If censorable is True, the revlog can have censored revisions.
363 """
379 """
364 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
380 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
365 mmaplargeindex=False, censorable=False):
381 mmaplargeindex=False, censorable=False):
366 """
382 """
367 create a revlog object
383 create a revlog object
368
384
369 opener is a function that abstracts the file opening operation
385 opener is a function that abstracts the file opening operation
370 and can be used to implement COW semantics or the like.
386 and can be used to implement COW semantics or the like.
371 """
387 """
372 self.indexfile = indexfile
388 self.indexfile = indexfile
373 self.datafile = datafile or (indexfile[:-2] + ".d")
389 self.datafile = datafile or (indexfile[:-2] + ".d")
374 self.opener = opener
390 self.opener = opener
375 # When True, indexfile is opened with checkambig=True at writing, to
391 # When True, indexfile is opened with checkambig=True at writing, to
376 # avoid file stat ambiguity.
392 # avoid file stat ambiguity.
377 self._checkambig = checkambig
393 self._checkambig = checkambig
378 self._censorable = censorable
394 self._censorable = censorable
379 # 3-tuple of (node, rev, text) for a raw revision.
395 # 3-tuple of (node, rev, text) for a raw revision.
380 self._cache = None
396 self._cache = None
381 # Maps rev to chain base rev.
397 # Maps rev to chain base rev.
382 self._chainbasecache = util.lrucachedict(100)
398 self._chainbasecache = util.lrucachedict(100)
383 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
399 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
384 self._chunkcache = (0, '')
400 self._chunkcache = (0, '')
385 # How much data to read and cache into the raw revlog data cache.
401 # How much data to read and cache into the raw revlog data cache.
386 self._chunkcachesize = 65536
402 self._chunkcachesize = 65536
387 self._maxchainlen = None
403 self._maxchainlen = None
388 self._deltabothparents = True
404 self._deltabothparents = True
389 self.index = []
405 self.index = []
390 # Mapping of partial identifiers to full nodes.
406 # Mapping of partial identifiers to full nodes.
391 self._pcache = {}
407 self._pcache = {}
392 # Mapping of revision integer to full node.
408 # Mapping of revision integer to full node.
393 self._nodecache = {nullid: nullrev}
409 self._nodecache = {nullid: nullrev}
394 self._nodepos = None
410 self._nodepos = None
395 self._compengine = 'zlib'
411 self._compengine = 'zlib'
396 self._maxdeltachainspan = -1
412 self._maxdeltachainspan = -1
397 self._withsparseread = False
413 self._withsparseread = False
398 self._sparserevlog = False
414 self._sparserevlog = False
399 self._srdensitythreshold = 0.50
415 self._srdensitythreshold = 0.50
400 self._srmingapsize = 262144
416 self._srmingapsize = 262144
401
417
402 mmapindexthreshold = None
418 mmapindexthreshold = None
403 v = REVLOG_DEFAULT_VERSION
419 v = REVLOG_DEFAULT_VERSION
404 opts = getattr(opener, 'options', None)
420 opts = getattr(opener, 'options', None)
405 if opts is not None:
421 if opts is not None:
406 if 'revlogv2' in opts:
422 if 'revlogv2' in opts:
407 # version 2 revlogs always use generaldelta.
423 # version 2 revlogs always use generaldelta.
408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
424 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
409 elif 'revlogv1' in opts:
425 elif 'revlogv1' in opts:
410 if 'generaldelta' in opts:
426 if 'generaldelta' in opts:
411 v |= FLAG_GENERALDELTA
427 v |= FLAG_GENERALDELTA
412 else:
428 else:
413 v = 0
429 v = 0
414 if 'chunkcachesize' in opts:
430 if 'chunkcachesize' in opts:
415 self._chunkcachesize = opts['chunkcachesize']
431 self._chunkcachesize = opts['chunkcachesize']
416 if 'maxchainlen' in opts:
432 if 'maxchainlen' in opts:
417 self._maxchainlen = opts['maxchainlen']
433 self._maxchainlen = opts['maxchainlen']
418 if 'deltabothparents' in opts:
434 if 'deltabothparents' in opts:
419 self._deltabothparents = opts['deltabothparents']
435 self._deltabothparents = opts['deltabothparents']
420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
436 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
421 if 'compengine' in opts:
437 if 'compengine' in opts:
422 self._compengine = opts['compengine']
438 self._compengine = opts['compengine']
423 if 'maxdeltachainspan' in opts:
439 if 'maxdeltachainspan' in opts:
424 self._maxdeltachainspan = opts['maxdeltachainspan']
440 self._maxdeltachainspan = opts['maxdeltachainspan']
425 if mmaplargeindex and 'mmapindexthreshold' in opts:
441 if mmaplargeindex and 'mmapindexthreshold' in opts:
426 mmapindexthreshold = opts['mmapindexthreshold']
442 mmapindexthreshold = opts['mmapindexthreshold']
427 self._sparserevlog = bool(opts.get('sparse-revlog', False))
443 self._sparserevlog = bool(opts.get('sparse-revlog', False))
428 withsparseread = bool(opts.get('with-sparse-read', False))
444 withsparseread = bool(opts.get('with-sparse-read', False))
429 # sparse-revlog forces sparse-read
445 # sparse-revlog forces sparse-read
430 self._withsparseread = self._sparserevlog or withsparseread
446 self._withsparseread = self._sparserevlog or withsparseread
431 if 'sparse-read-density-threshold' in opts:
447 if 'sparse-read-density-threshold' in opts:
432 self._srdensitythreshold = opts['sparse-read-density-threshold']
448 self._srdensitythreshold = opts['sparse-read-density-threshold']
433 if 'sparse-read-min-gap-size' in opts:
449 if 'sparse-read-min-gap-size' in opts:
434 self._srmingapsize = opts['sparse-read-min-gap-size']
450 self._srmingapsize = opts['sparse-read-min-gap-size']
435
451
436 if self._chunkcachesize <= 0:
452 if self._chunkcachesize <= 0:
437 raise RevlogError(_('revlog chunk cache size %r is not greater '
453 raise RevlogError(_('revlog chunk cache size %r is not greater '
438 'than 0') % self._chunkcachesize)
454 'than 0') % self._chunkcachesize)
439 elif self._chunkcachesize & (self._chunkcachesize - 1):
455 elif self._chunkcachesize & (self._chunkcachesize - 1):
440 raise RevlogError(_('revlog chunk cache size %r is not a power '
456 raise RevlogError(_('revlog chunk cache size %r is not a power '
441 'of 2') % self._chunkcachesize)
457 'of 2') % self._chunkcachesize)
442
458
443 indexdata = ''
459 indexdata = ''
444 self._initempty = True
460 self._initempty = True
445 try:
461 try:
446 with self._indexfp() as f:
462 with self._indexfp() as f:
447 if (mmapindexthreshold is not None and
463 if (mmapindexthreshold is not None and
448 self.opener.fstat(f).st_size >= mmapindexthreshold):
464 self.opener.fstat(f).st_size >= mmapindexthreshold):
449 indexdata = util.buffer(util.mmapread(f))
465 indexdata = util.buffer(util.mmapread(f))
450 else:
466 else:
451 indexdata = f.read()
467 indexdata = f.read()
452 if len(indexdata) > 0:
468 if len(indexdata) > 0:
453 v = versionformat_unpack(indexdata[:4])[0]
469 v = versionformat_unpack(indexdata[:4])[0]
454 self._initempty = False
470 self._initempty = False
455 except IOError as inst:
471 except IOError as inst:
456 if inst.errno != errno.ENOENT:
472 if inst.errno != errno.ENOENT:
457 raise
473 raise
458
474
459 self.version = v
475 self.version = v
460 self._inline = v & FLAG_INLINE_DATA
476 self._inline = v & FLAG_INLINE_DATA
461 self._generaldelta = v & FLAG_GENERALDELTA
477 self._generaldelta = v & FLAG_GENERALDELTA
462 flags = v & ~0xFFFF
478 flags = v & ~0xFFFF
463 fmt = v & 0xFFFF
479 fmt = v & 0xFFFF
464 if fmt == REVLOGV0:
480 if fmt == REVLOGV0:
465 if flags:
481 if flags:
466 raise RevlogError(_('unknown flags (%#04x) in version %d '
482 raise RevlogError(_('unknown flags (%#04x) in version %d '
467 'revlog %s') %
483 'revlog %s') %
468 (flags >> 16, fmt, self.indexfile))
484 (flags >> 16, fmt, self.indexfile))
469 elif fmt == REVLOGV1:
485 elif fmt == REVLOGV1:
470 if flags & ~REVLOGV1_FLAGS:
486 if flags & ~REVLOGV1_FLAGS:
471 raise RevlogError(_('unknown flags (%#04x) in version %d '
487 raise RevlogError(_('unknown flags (%#04x) in version %d '
472 'revlog %s') %
488 'revlog %s') %
473 (flags >> 16, fmt, self.indexfile))
489 (flags >> 16, fmt, self.indexfile))
474 elif fmt == REVLOGV2:
490 elif fmt == REVLOGV2:
475 if flags & ~REVLOGV2_FLAGS:
491 if flags & ~REVLOGV2_FLAGS:
476 raise RevlogError(_('unknown flags (%#04x) in version %d '
492 raise RevlogError(_('unknown flags (%#04x) in version %d '
477 'revlog %s') %
493 'revlog %s') %
478 (flags >> 16, fmt, self.indexfile))
494 (flags >> 16, fmt, self.indexfile))
479 else:
495 else:
480 raise RevlogError(_('unknown version (%d) in revlog %s') %
496 raise RevlogError(_('unknown version (%d) in revlog %s') %
481 (fmt, self.indexfile))
497 (fmt, self.indexfile))
482
498
483 self._storedeltachains = True
499 self._storedeltachains = True
484
500
485 self._io = revlogio()
501 self._io = revlogio()
486 if self.version == REVLOGV0:
502 if self.version == REVLOGV0:
487 self._io = revlogoldio()
503 self._io = revlogoldio()
488 try:
504 try:
489 d = self._io.parseindex(indexdata, self._inline)
505 d = self._io.parseindex(indexdata, self._inline)
490 except (ValueError, IndexError):
506 except (ValueError, IndexError):
491 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
507 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
492 self.index, nodemap, self._chunkcache = d
508 self.index, nodemap, self._chunkcache = d
493 if nodemap is not None:
509 if nodemap is not None:
494 self.nodemap = self._nodecache = nodemap
510 self.nodemap = self._nodecache = nodemap
495 if not self._chunkcache:
511 if not self._chunkcache:
496 self._chunkclear()
512 self._chunkclear()
497 # revnum -> (chain-length, sum-delta-length)
513 # revnum -> (chain-length, sum-delta-length)
498 self._chaininfocache = {}
514 self._chaininfocache = {}
499 # revlog header -> revlog compressor
515 # revlog header -> revlog compressor
500 self._decompressors = {}
516 self._decompressors = {}
501
517
502 @util.propertycache
518 @util.propertycache
503 def _compressor(self):
519 def _compressor(self):
504 return util.compengines[self._compengine].revlogcompressor()
520 return util.compengines[self._compengine].revlogcompressor()
505
521
506 def _indexfp(self, mode='r'):
522 def _indexfp(self, mode='r'):
507 """file object for the revlog's index file"""
523 """file object for the revlog's index file"""
508 args = {r'mode': mode}
524 args = {r'mode': mode}
509 if mode != 'r':
525 if mode != 'r':
510 args[r'checkambig'] = self._checkambig
526 args[r'checkambig'] = self._checkambig
511 if mode == 'w':
527 if mode == 'w':
512 args[r'atomictemp'] = True
528 args[r'atomictemp'] = True
513 return self.opener(self.indexfile, **args)
529 return self.opener(self.indexfile, **args)
514
530
515 def _datafp(self, mode='r'):
531 def _datafp(self, mode='r'):
516 """file object for the revlog's data file"""
532 """file object for the revlog's data file"""
517 return self.opener(self.datafile, mode=mode)
533 return self.opener(self.datafile, mode=mode)
518
534
519 @contextlib.contextmanager
535 @contextlib.contextmanager
520 def _datareadfp(self, existingfp=None):
536 def _datareadfp(self, existingfp=None):
521 """file object suitable to read data"""
537 """file object suitable to read data"""
522 if existingfp is not None:
538 if existingfp is not None:
523 yield existingfp
539 yield existingfp
524 else:
540 else:
525 if self._inline:
541 if self._inline:
526 func = self._indexfp
542 func = self._indexfp
527 else:
543 else:
528 func = self._datafp
544 func = self._datafp
529 with func() as fp:
545 with func() as fp:
530 yield fp
546 yield fp
531
547
532 def tip(self):
548 def tip(self):
533 return self.node(len(self.index) - 1)
549 return self.node(len(self.index) - 1)
534 def __contains__(self, rev):
550 def __contains__(self, rev):
535 return 0 <= rev < len(self)
551 return 0 <= rev < len(self)
536 def __len__(self):
552 def __len__(self):
537 return len(self.index)
553 return len(self.index)
538 def __iter__(self):
554 def __iter__(self):
539 return iter(pycompat.xrange(len(self)))
555 return iter(pycompat.xrange(len(self)))
540 def revs(self, start=0, stop=None):
556 def revs(self, start=0, stop=None):
541 """iterate over all rev in this revlog (from start to stop)"""
557 """iterate over all rev in this revlog (from start to stop)"""
542 step = 1
558 step = 1
543 length = len(self)
559 length = len(self)
544 if stop is not None:
560 if stop is not None:
545 if start > stop:
561 if start > stop:
546 step = -1
562 step = -1
547 stop += step
563 stop += step
548 if stop > length:
564 if stop > length:
549 stop = length
565 stop = length
550 else:
566 else:
551 stop = length
567 stop = length
552 return pycompat.xrange(start, stop, step)
568 return pycompat.xrange(start, stop, step)
553
569
554 @util.propertycache
570 @util.propertycache
555 def nodemap(self):
571 def nodemap(self):
556 if self.index:
572 if self.index:
557 # populate mapping down to the initial node
573 # populate mapping down to the initial node
558 node0 = self.index[0][7] # get around changelog filtering
574 node0 = self.index[0][7] # get around changelog filtering
559 self.rev(node0)
575 self.rev(node0)
560 return self._nodecache
576 return self._nodecache
561
577
562 def hasnode(self, node):
578 def hasnode(self, node):
563 try:
579 try:
564 self.rev(node)
580 self.rev(node)
565 return True
581 return True
566 except KeyError:
582 except KeyError:
567 return False
583 return False
568
584
569 def candelta(self, baserev, rev):
585 def candelta(self, baserev, rev):
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 """whether two revisions (baserev, rev) can be delta-ed or not"""
571 # Disable delta if either rev requires a content-changing flag
587 # Disable delta if either rev requires a content-changing flag
572 # processor (ex. LFS). This is because such flag processor can alter
588 # processor (ex. LFS). This is because such flag processor can alter
573 # the rawtext content that the delta will be based on, and two clients
589 # the rawtext content that the delta will be based on, and two clients
574 # could have a same revlog node with different flags (i.e. different
590 # could have a same revlog node with different flags (i.e. different
575 # rawtext contents) and the delta could be incompatible.
591 # rawtext contents) and the delta could be incompatible.
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
578 return False
594 return False
579 return True
595 return True
580
596
581 def clearcaches(self):
597 def clearcaches(self):
582 self._cache = None
598 self._cache = None
583 self._chainbasecache.clear()
599 self._chainbasecache.clear()
584 self._chunkcache = (0, '')
600 self._chunkcache = (0, '')
585 self._pcache = {}
601 self._pcache = {}
586
602
587 try:
603 try:
588 self._nodecache.clearcaches()
604 self._nodecache.clearcaches()
589 except AttributeError:
605 except AttributeError:
590 self._nodecache = {nullid: nullrev}
606 self._nodecache = {nullid: nullrev}
591 self._nodepos = None
607 self._nodepos = None
592
608
593 def rev(self, node):
609 def rev(self, node):
594 try:
610 try:
595 return self._nodecache[node]
611 return self._nodecache[node]
596 except TypeError:
612 except TypeError:
597 raise
613 raise
598 except RevlogError:
614 except RevlogError:
599 # parsers.c radix tree lookup failed
615 # parsers.c radix tree lookup failed
600 if node == wdirid or node in wdirfilenodeids:
616 if node == wdirid or node in wdirfilenodeids:
601 raise error.WdirUnsupported
617 raise error.WdirUnsupported
602 raise LookupError(node, self.indexfile, _('no node'))
618 raise LookupError(node, self.indexfile, _('no node'))
603 except KeyError:
619 except KeyError:
604 # pure python cache lookup failed
620 # pure python cache lookup failed
605 n = self._nodecache
621 n = self._nodecache
606 i = self.index
622 i = self.index
607 p = self._nodepos
623 p = self._nodepos
608 if p is None:
624 if p is None:
609 p = len(i) - 1
625 p = len(i) - 1
610 else:
626 else:
611 assert p < len(i)
627 assert p < len(i)
612 for r in pycompat.xrange(p, -1, -1):
628 for r in pycompat.xrange(p, -1, -1):
613 v = i[r][7]
629 v = i[r][7]
614 n[v] = r
630 n[v] = r
615 if v == node:
631 if v == node:
616 self._nodepos = r - 1
632 self._nodepos = r - 1
617 return r
633 return r
618 if node == wdirid or node in wdirfilenodeids:
634 if node == wdirid or node in wdirfilenodeids:
619 raise error.WdirUnsupported
635 raise error.WdirUnsupported
620 raise LookupError(node, self.indexfile, _('no node'))
636 raise LookupError(node, self.indexfile, _('no node'))
621
637
622 # Accessors for index entries.
638 # Accessors for index entries.
623
639
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 # are flags.
641 # are flags.
626 def start(self, rev):
642 def start(self, rev):
627 return int(self.index[rev][0] >> 16)
643 return int(self.index[rev][0] >> 16)
628
644
629 def flags(self, rev):
645 def flags(self, rev):
630 return self.index[rev][0] & 0xFFFF
646 return self.index[rev][0] & 0xFFFF
631
647
632 def length(self, rev):
648 def length(self, rev):
633 return self.index[rev][1]
649 return self.index[rev][1]
634
650
635 def rawsize(self, rev):
651 def rawsize(self, rev):
636 """return the length of the uncompressed text for a given revision"""
652 """return the length of the uncompressed text for a given revision"""
637 l = self.index[rev][2]
653 l = self.index[rev][2]
638 if l >= 0:
654 if l >= 0:
639 return l
655 return l
640
656
641 t = self.revision(rev, raw=True)
657 t = self.revision(rev, raw=True)
642 return len(t)
658 return len(t)
643
659
644 def size(self, rev):
660 def size(self, rev):
645 """length of non-raw text (processed by a "read" flag processor)"""
661 """length of non-raw text (processed by a "read" flag processor)"""
646 # fast path: if no "read" flag processor could change the content,
662 # fast path: if no "read" flag processor could change the content,
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 flags = self.flags(rev)
664 flags = self.flags(rev)
649 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 return self.rawsize(rev)
666 return self.rawsize(rev)
651
667
652 return len(self.revision(rev, raw=False))
668 return len(self.revision(rev, raw=False))
653
669
654 def chainbase(self, rev):
670 def chainbase(self, rev):
655 base = self._chainbasecache.get(rev)
671 base = self._chainbasecache.get(rev)
656 if base is not None:
672 if base is not None:
657 return base
673 return base
658
674
659 index = self.index
675 index = self.index
660 iterrev = rev
676 iterrev = rev
661 base = index[iterrev][3]
677 base = index[iterrev][3]
662 while base != iterrev:
678 while base != iterrev:
663 iterrev = base
679 iterrev = base
664 base = index[iterrev][3]
680 base = index[iterrev][3]
665
681
666 self._chainbasecache[rev] = base
682 self._chainbasecache[rev] = base
667 return base
683 return base
668
684
669 def linkrev(self, rev):
685 def linkrev(self, rev):
670 return self.index[rev][4]
686 return self.index[rev][4]
671
687
672 def parentrevs(self, rev):
688 def parentrevs(self, rev):
673 try:
689 try:
674 entry = self.index[rev]
690 entry = self.index[rev]
675 except IndexError:
691 except IndexError:
676 if rev == wdirrev:
692 if rev == wdirrev:
677 raise error.WdirUnsupported
693 raise error.WdirUnsupported
678 raise
694 raise
679
695
680 return entry[5], entry[6]
696 return entry[5], entry[6]
681
697
682 def node(self, rev):
698 def node(self, rev):
683 try:
699 try:
684 return self.index[rev][7]
700 return self.index[rev][7]
685 except IndexError:
701 except IndexError:
686 if rev == wdirrev:
702 if rev == wdirrev:
687 raise error.WdirUnsupported
703 raise error.WdirUnsupported
688 raise
704 raise
689
705
690 # Derived from index values.
706 # Derived from index values.
691
707
692 def end(self, rev):
708 def end(self, rev):
693 return self.start(rev) + self.length(rev)
709 return self.start(rev) + self.length(rev)
694
710
695 def parents(self, node):
711 def parents(self, node):
696 i = self.index
712 i = self.index
697 d = i[self.rev(node)]
713 d = i[self.rev(node)]
698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
714 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
699
715
700 def chainlen(self, rev):
716 def chainlen(self, rev):
701 return self._chaininfo(rev)[0]
717 return self._chaininfo(rev)[0]
702
718
703 def _chaininfo(self, rev):
719 def _chaininfo(self, rev):
704 chaininfocache = self._chaininfocache
720 chaininfocache = self._chaininfocache
705 if rev in chaininfocache:
721 if rev in chaininfocache:
706 return chaininfocache[rev]
722 return chaininfocache[rev]
707 index = self.index
723 index = self.index
708 generaldelta = self._generaldelta
724 generaldelta = self._generaldelta
709 iterrev = rev
725 iterrev = rev
710 e = index[iterrev]
726 e = index[iterrev]
711 clen = 0
727 clen = 0
712 compresseddeltalen = 0
728 compresseddeltalen = 0
713 while iterrev != e[3]:
729 while iterrev != e[3]:
714 clen += 1
730 clen += 1
715 compresseddeltalen += e[1]
731 compresseddeltalen += e[1]
716 if generaldelta:
732 if generaldelta:
717 iterrev = e[3]
733 iterrev = e[3]
718 else:
734 else:
719 iterrev -= 1
735 iterrev -= 1
720 if iterrev in chaininfocache:
736 if iterrev in chaininfocache:
721 t = chaininfocache[iterrev]
737 t = chaininfocache[iterrev]
722 clen += t[0]
738 clen += t[0]
723 compresseddeltalen += t[1]
739 compresseddeltalen += t[1]
724 break
740 break
725 e = index[iterrev]
741 e = index[iterrev]
726 else:
742 else:
727 # Add text length of base since decompressing that also takes
743 # Add text length of base since decompressing that also takes
728 # work. For cache hits the length is already included.
744 # work. For cache hits the length is already included.
729 compresseddeltalen += e[1]
745 compresseddeltalen += e[1]
730 r = (clen, compresseddeltalen)
746 r = (clen, compresseddeltalen)
731 chaininfocache[rev] = r
747 chaininfocache[rev] = r
732 return r
748 return r
733
749
734 def _deltachain(self, rev, stoprev=None):
750 def _deltachain(self, rev, stoprev=None):
735 """Obtain the delta chain for a revision.
751 """Obtain the delta chain for a revision.
736
752
737 ``stoprev`` specifies a revision to stop at. If not specified, we
753 ``stoprev`` specifies a revision to stop at. If not specified, we
738 stop at the base of the chain.
754 stop at the base of the chain.
739
755
740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
756 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
741 revs in ascending order and ``stopped`` is a bool indicating whether
757 revs in ascending order and ``stopped`` is a bool indicating whether
742 ``stoprev`` was hit.
758 ``stoprev`` was hit.
743 """
759 """
744 # Try C implementation.
760 # Try C implementation.
745 try:
761 try:
746 return self.index.deltachain(rev, stoprev, self._generaldelta)
762 return self.index.deltachain(rev, stoprev, self._generaldelta)
747 except AttributeError:
763 except AttributeError:
748 pass
764 pass
749
765
750 chain = []
766 chain = []
751
767
752 # Alias to prevent attribute lookup in tight loop.
768 # Alias to prevent attribute lookup in tight loop.
753 index = self.index
769 index = self.index
754 generaldelta = self._generaldelta
770 generaldelta = self._generaldelta
755
771
756 iterrev = rev
772 iterrev = rev
757 e = index[iterrev]
773 e = index[iterrev]
758 while iterrev != e[3] and iterrev != stoprev:
774 while iterrev != e[3] and iterrev != stoprev:
759 chain.append(iterrev)
775 chain.append(iterrev)
760 if generaldelta:
776 if generaldelta:
761 iterrev = e[3]
777 iterrev = e[3]
762 else:
778 else:
763 iterrev -= 1
779 iterrev -= 1
764 e = index[iterrev]
780 e = index[iterrev]
765
781
766 if iterrev == stoprev:
782 if iterrev == stoprev:
767 stopped = True
783 stopped = True
768 else:
784 else:
769 chain.append(iterrev)
785 chain.append(iterrev)
770 stopped = False
786 stopped = False
771
787
772 chain.reverse()
788 chain.reverse()
773 return chain, stopped
789 return chain, stopped
774
790
775 def ancestors(self, revs, stoprev=0, inclusive=False):
791 def ancestors(self, revs, stoprev=0, inclusive=False):
776 """Generate the ancestors of 'revs' in reverse topological order.
792 """Generate the ancestors of 'revs' in reverse topological order.
777 Does not generate revs lower than stoprev.
793 Does not generate revs lower than stoprev.
778
794
779 See the documentation for ancestor.lazyancestors for more details."""
795 See the documentation for ancestor.lazyancestors for more details."""
780
796
781 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
797 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
782 inclusive=inclusive)
798 inclusive=inclusive)
783
799
784 def descendants(self, revs):
800 def descendants(self, revs):
785 """Generate the descendants of 'revs' in revision order.
801 """Generate the descendants of 'revs' in revision order.
786
802
787 Yield a sequence of revision numbers starting with a child of
803 Yield a sequence of revision numbers starting with a child of
788 some rev in revs, i.e., each revision is *not* considered a
804 some rev in revs, i.e., each revision is *not* considered a
789 descendant of itself. Results are ordered by revision number (a
805 descendant of itself. Results are ordered by revision number (a
790 topological sort)."""
806 topological sort)."""
791 first = min(revs)
807 first = min(revs)
792 if first == nullrev:
808 if first == nullrev:
793 for i in self:
809 for i in self:
794 yield i
810 yield i
795 return
811 return
796
812
797 seen = set(revs)
813 seen = set(revs)
798 for i in self.revs(start=first + 1):
814 for i in self.revs(start=first + 1):
799 for x in self.parentrevs(i):
815 for x in self.parentrevs(i):
800 if x != nullrev and x in seen:
816 if x != nullrev and x in seen:
801 seen.add(i)
817 seen.add(i)
802 yield i
818 yield i
803 break
819 break
804
820
805 def findcommonmissing(self, common=None, heads=None):
821 def findcommonmissing(self, common=None, heads=None):
806 """Return a tuple of the ancestors of common and the ancestors of heads
822 """Return a tuple of the ancestors of common and the ancestors of heads
807 that are not ancestors of common. In revset terminology, we return the
823 that are not ancestors of common. In revset terminology, we return the
808 tuple:
824 tuple:
809
825
810 ::common, (::heads) - (::common)
826 ::common, (::heads) - (::common)
811
827
812 The list is sorted by revision number, meaning it is
828 The list is sorted by revision number, meaning it is
813 topologically sorted.
829 topologically sorted.
814
830
815 'heads' and 'common' are both lists of node IDs. If heads is
831 'heads' and 'common' are both lists of node IDs. If heads is
816 not supplied, uses all of the revlog's heads. If common is not
832 not supplied, uses all of the revlog's heads. If common is not
817 supplied, uses nullid."""
833 supplied, uses nullid."""
818 if common is None:
834 if common is None:
819 common = [nullid]
835 common = [nullid]
820 if heads is None:
836 if heads is None:
821 heads = self.heads()
837 heads = self.heads()
822
838
823 common = [self.rev(n) for n in common]
839 common = [self.rev(n) for n in common]
824 heads = [self.rev(n) for n in heads]
840 heads = [self.rev(n) for n in heads]
825
841
826 # we want the ancestors, but inclusive
842 # we want the ancestors, but inclusive
827 class lazyset(object):
843 class lazyset(object):
828 def __init__(self, lazyvalues):
844 def __init__(self, lazyvalues):
829 self.addedvalues = set()
845 self.addedvalues = set()
830 self.lazyvalues = lazyvalues
846 self.lazyvalues = lazyvalues
831
847
832 def __contains__(self, value):
848 def __contains__(self, value):
833 return value in self.addedvalues or value in self.lazyvalues
849 return value in self.addedvalues or value in self.lazyvalues
834
850
835 def __iter__(self):
851 def __iter__(self):
836 added = self.addedvalues
852 added = self.addedvalues
837 for r in added:
853 for r in added:
838 yield r
854 yield r
839 for r in self.lazyvalues:
855 for r in self.lazyvalues:
840 if not r in added:
856 if not r in added:
841 yield r
857 yield r
842
858
843 def add(self, value):
859 def add(self, value):
844 self.addedvalues.add(value)
860 self.addedvalues.add(value)
845
861
846 def update(self, values):
862 def update(self, values):
847 self.addedvalues.update(values)
863 self.addedvalues.update(values)
848
864
849 has = lazyset(self.ancestors(common))
865 has = lazyset(self.ancestors(common))
850 has.add(nullrev)
866 has.add(nullrev)
851 has.update(common)
867 has.update(common)
852
868
853 # take all ancestors from heads that aren't in has
869 # take all ancestors from heads that aren't in has
854 missing = set()
870 missing = set()
855 visit = collections.deque(r for r in heads if r not in has)
871 visit = collections.deque(r for r in heads if r not in has)
856 while visit:
872 while visit:
857 r = visit.popleft()
873 r = visit.popleft()
858 if r in missing:
874 if r in missing:
859 continue
875 continue
860 else:
876 else:
861 missing.add(r)
877 missing.add(r)
862 for p in self.parentrevs(r):
878 for p in self.parentrevs(r):
863 if p not in has:
879 if p not in has:
864 visit.append(p)
880 visit.append(p)
865 missing = list(missing)
881 missing = list(missing)
866 missing.sort()
882 missing.sort()
867 return has, [self.node(miss) for miss in missing]
883 return has, [self.node(miss) for miss in missing]
868
884
869 def incrementalmissingrevs(self, common=None):
885 def incrementalmissingrevs(self, common=None):
870 """Return an object that can be used to incrementally compute the
886 """Return an object that can be used to incrementally compute the
871 revision numbers of the ancestors of arbitrary sets that are not
887 revision numbers of the ancestors of arbitrary sets that are not
872 ancestors of common. This is an ancestor.incrementalmissingancestors
888 ancestors of common. This is an ancestor.incrementalmissingancestors
873 object.
889 object.
874
890
875 'common' is a list of revision numbers. If common is not supplied, uses
891 'common' is a list of revision numbers. If common is not supplied, uses
876 nullrev.
892 nullrev.
877 """
893 """
878 if common is None:
894 if common is None:
879 common = [nullrev]
895 common = [nullrev]
880
896
881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
897 return ancestor.incrementalmissingancestors(self.parentrevs, common)
882
898
883 def findmissingrevs(self, common=None, heads=None):
899 def findmissingrevs(self, common=None, heads=None):
884 """Return the revision numbers of the ancestors of heads that
900 """Return the revision numbers of the ancestors of heads that
885 are not ancestors of common.
901 are not ancestors of common.
886
902
887 More specifically, return a list of revision numbers corresponding to
903 More specifically, return a list of revision numbers corresponding to
888 nodes N such that every N satisfies the following constraints:
904 nodes N such that every N satisfies the following constraints:
889
905
890 1. N is an ancestor of some node in 'heads'
906 1. N is an ancestor of some node in 'heads'
891 2. N is not an ancestor of any node in 'common'
907 2. N is not an ancestor of any node in 'common'
892
908
893 The list is sorted by revision number, meaning it is
909 The list is sorted by revision number, meaning it is
894 topologically sorted.
910 topologically sorted.
895
911
896 'heads' and 'common' are both lists of revision numbers. If heads is
912 'heads' and 'common' are both lists of revision numbers. If heads is
897 not supplied, uses all of the revlog's heads. If common is not
913 not supplied, uses all of the revlog's heads. If common is not
898 supplied, uses nullid."""
914 supplied, uses nullid."""
899 if common is None:
915 if common is None:
900 common = [nullrev]
916 common = [nullrev]
901 if heads is None:
917 if heads is None:
902 heads = self.headrevs()
918 heads = self.headrevs()
903
919
904 inc = self.incrementalmissingrevs(common=common)
920 inc = self.incrementalmissingrevs(common=common)
905 return inc.missingancestors(heads)
921 return inc.missingancestors(heads)
906
922
907 def findmissing(self, common=None, heads=None):
923 def findmissing(self, common=None, heads=None):
908 """Return the ancestors of heads that are not ancestors of common.
924 """Return the ancestors of heads that are not ancestors of common.
909
925
910 More specifically, return a list of nodes N such that every N
926 More specifically, return a list of nodes N such that every N
911 satisfies the following constraints:
927 satisfies the following constraints:
912
928
913 1. N is an ancestor of some node in 'heads'
929 1. N is an ancestor of some node in 'heads'
914 2. N is not an ancestor of any node in 'common'
930 2. N is not an ancestor of any node in 'common'
915
931
916 The list is sorted by revision number, meaning it is
932 The list is sorted by revision number, meaning it is
917 topologically sorted.
933 topologically sorted.
918
934
919 'heads' and 'common' are both lists of node IDs. If heads is
935 'heads' and 'common' are both lists of node IDs. If heads is
920 not supplied, uses all of the revlog's heads. If common is not
936 not supplied, uses all of the revlog's heads. If common is not
921 supplied, uses nullid."""
937 supplied, uses nullid."""
922 if common is None:
938 if common is None:
923 common = [nullid]
939 common = [nullid]
924 if heads is None:
940 if heads is None:
925 heads = self.heads()
941 heads = self.heads()
926
942
927 common = [self.rev(n) for n in common]
943 common = [self.rev(n) for n in common]
928 heads = [self.rev(n) for n in heads]
944 heads = [self.rev(n) for n in heads]
929
945
930 inc = self.incrementalmissingrevs(common=common)
946 inc = self.incrementalmissingrevs(common=common)
931 return [self.node(r) for r in inc.missingancestors(heads)]
947 return [self.node(r) for r in inc.missingancestors(heads)]
932
948
933 def nodesbetween(self, roots=None, heads=None):
949 def nodesbetween(self, roots=None, heads=None):
934 """Return a topological path from 'roots' to 'heads'.
950 """Return a topological path from 'roots' to 'heads'.
935
951
936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
952 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
937 topologically sorted list of all nodes N that satisfy both of
953 topologically sorted list of all nodes N that satisfy both of
938 these constraints:
954 these constraints:
939
955
940 1. N is a descendant of some node in 'roots'
956 1. N is a descendant of some node in 'roots'
941 2. N is an ancestor of some node in 'heads'
957 2. N is an ancestor of some node in 'heads'
942
958
943 Every node is considered to be both a descendant and an ancestor
959 Every node is considered to be both a descendant and an ancestor
944 of itself, so every reachable node in 'roots' and 'heads' will be
960 of itself, so every reachable node in 'roots' and 'heads' will be
945 included in 'nodes'.
961 included in 'nodes'.
946
962
947 'outroots' is the list of reachable nodes in 'roots', i.e., the
963 'outroots' is the list of reachable nodes in 'roots', i.e., the
948 subset of 'roots' that is returned in 'nodes'. Likewise,
964 subset of 'roots' that is returned in 'nodes'. Likewise,
949 'outheads' is the subset of 'heads' that is also in 'nodes'.
965 'outheads' is the subset of 'heads' that is also in 'nodes'.
950
966
951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
967 'roots' and 'heads' are both lists of node IDs. If 'roots' is
952 unspecified, uses nullid as the only root. If 'heads' is
968 unspecified, uses nullid as the only root. If 'heads' is
953 unspecified, uses list of all of the revlog's heads."""
969 unspecified, uses list of all of the revlog's heads."""
954 nonodes = ([], [], [])
970 nonodes = ([], [], [])
955 if roots is not None:
971 if roots is not None:
956 roots = list(roots)
972 roots = list(roots)
957 if not roots:
973 if not roots:
958 return nonodes
974 return nonodes
959 lowestrev = min([self.rev(n) for n in roots])
975 lowestrev = min([self.rev(n) for n in roots])
960 else:
976 else:
961 roots = [nullid] # Everybody's a descendant of nullid
977 roots = [nullid] # Everybody's a descendant of nullid
962 lowestrev = nullrev
978 lowestrev = nullrev
963 if (lowestrev == nullrev) and (heads is None):
979 if (lowestrev == nullrev) and (heads is None):
964 # We want _all_ the nodes!
980 # We want _all_ the nodes!
965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
981 return ([self.node(r) for r in self], [nullid], list(self.heads()))
966 if heads is None:
982 if heads is None:
967 # All nodes are ancestors, so the latest ancestor is the last
983 # All nodes are ancestors, so the latest ancestor is the last
968 # node.
984 # node.
969 highestrev = len(self) - 1
985 highestrev = len(self) - 1
970 # Set ancestors to None to signal that every node is an ancestor.
986 # Set ancestors to None to signal that every node is an ancestor.
971 ancestors = None
987 ancestors = None
972 # Set heads to an empty dictionary for later discovery of heads
988 # Set heads to an empty dictionary for later discovery of heads
973 heads = {}
989 heads = {}
974 else:
990 else:
975 heads = list(heads)
991 heads = list(heads)
976 if not heads:
992 if not heads:
977 return nonodes
993 return nonodes
978 ancestors = set()
994 ancestors = set()
979 # Turn heads into a dictionary so we can remove 'fake' heads.
995 # Turn heads into a dictionary so we can remove 'fake' heads.
980 # Also, later we will be using it to filter out the heads we can't
996 # Also, later we will be using it to filter out the heads we can't
981 # find from roots.
997 # find from roots.
982 heads = dict.fromkeys(heads, False)
998 heads = dict.fromkeys(heads, False)
983 # Start at the top and keep marking parents until we're done.
999 # Start at the top and keep marking parents until we're done.
984 nodestotag = set(heads)
1000 nodestotag = set(heads)
985 # Remember where the top was so we can use it as a limit later.
1001 # Remember where the top was so we can use it as a limit later.
986 highestrev = max([self.rev(n) for n in nodestotag])
1002 highestrev = max([self.rev(n) for n in nodestotag])
987 while nodestotag:
1003 while nodestotag:
988 # grab a node to tag
1004 # grab a node to tag
989 n = nodestotag.pop()
1005 n = nodestotag.pop()
990 # Never tag nullid
1006 # Never tag nullid
991 if n == nullid:
1007 if n == nullid:
992 continue
1008 continue
993 # A node's revision number represents its place in a
1009 # A node's revision number represents its place in a
994 # topologically sorted list of nodes.
1010 # topologically sorted list of nodes.
995 r = self.rev(n)
1011 r = self.rev(n)
996 if r >= lowestrev:
1012 if r >= lowestrev:
997 if n not in ancestors:
1013 if n not in ancestors:
998 # If we are possibly a descendant of one of the roots
1014 # If we are possibly a descendant of one of the roots
999 # and we haven't already been marked as an ancestor
1015 # and we haven't already been marked as an ancestor
1000 ancestors.add(n) # Mark as ancestor
1016 ancestors.add(n) # Mark as ancestor
1001 # Add non-nullid parents to list of nodes to tag.
1017 # Add non-nullid parents to list of nodes to tag.
1002 nodestotag.update([p for p in self.parents(n) if
1018 nodestotag.update([p for p in self.parents(n) if
1003 p != nullid])
1019 p != nullid])
1004 elif n in heads: # We've seen it before, is it a fake head?
1020 elif n in heads: # We've seen it before, is it a fake head?
1005 # So it is, real heads should not be the ancestors of
1021 # So it is, real heads should not be the ancestors of
1006 # any other heads.
1022 # any other heads.
1007 heads.pop(n)
1023 heads.pop(n)
1008 if not ancestors:
1024 if not ancestors:
1009 return nonodes
1025 return nonodes
1010 # Now that we have our set of ancestors, we want to remove any
1026 # Now that we have our set of ancestors, we want to remove any
1011 # roots that are not ancestors.
1027 # roots that are not ancestors.
1012
1028
1013 # If one of the roots was nullid, everything is included anyway.
1029 # If one of the roots was nullid, everything is included anyway.
1014 if lowestrev > nullrev:
1030 if lowestrev > nullrev:
1015 # But, since we weren't, let's recompute the lowest rev to not
1031 # But, since we weren't, let's recompute the lowest rev to not
1016 # include roots that aren't ancestors.
1032 # include roots that aren't ancestors.
1017
1033
1018 # Filter out roots that aren't ancestors of heads
1034 # Filter out roots that aren't ancestors of heads
1019 roots = [root for root in roots if root in ancestors]
1035 roots = [root for root in roots if root in ancestors]
1020 # Recompute the lowest revision
1036 # Recompute the lowest revision
1021 if roots:
1037 if roots:
1022 lowestrev = min([self.rev(root) for root in roots])
1038 lowestrev = min([self.rev(root) for root in roots])
1023 else:
1039 else:
1024 # No more roots? Return empty list
1040 # No more roots? Return empty list
1025 return nonodes
1041 return nonodes
1026 else:
1042 else:
1027 # We are descending from nullid, and don't need to care about
1043 # We are descending from nullid, and don't need to care about
1028 # any other roots.
1044 # any other roots.
1029 lowestrev = nullrev
1045 lowestrev = nullrev
1030 roots = [nullid]
1046 roots = [nullid]
1031 # Transform our roots list into a set.
1047 # Transform our roots list into a set.
1032 descendants = set(roots)
1048 descendants = set(roots)
1033 # Also, keep the original roots so we can filter out roots that aren't
1049 # Also, keep the original roots so we can filter out roots that aren't
1034 # 'real' roots (i.e. are descended from other roots).
1050 # 'real' roots (i.e. are descended from other roots).
1035 roots = descendants.copy()
1051 roots = descendants.copy()
1036 # Our topologically sorted list of output nodes.
1052 # Our topologically sorted list of output nodes.
1037 orderedout = []
1053 orderedout = []
1038 # Don't start at nullid since we don't want nullid in our output list,
1054 # Don't start at nullid since we don't want nullid in our output list,
1039 # and if nullid shows up in descendants, empty parents will look like
1055 # and if nullid shows up in descendants, empty parents will look like
1040 # they're descendants.
1056 # they're descendants.
1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1057 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1042 n = self.node(r)
1058 n = self.node(r)
1043 isdescendant = False
1059 isdescendant = False
1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1060 if lowestrev == nullrev: # Everybody is a descendant of nullid
1045 isdescendant = True
1061 isdescendant = True
1046 elif n in descendants:
1062 elif n in descendants:
1047 # n is already a descendant
1063 # n is already a descendant
1048 isdescendant = True
1064 isdescendant = True
1049 # This check only needs to be done here because all the roots
1065 # This check only needs to be done here because all the roots
1050 # will start being marked is descendants before the loop.
1066 # will start being marked is descendants before the loop.
1051 if n in roots:
1067 if n in roots:
1052 # If n was a root, check if it's a 'real' root.
1068 # If n was a root, check if it's a 'real' root.
1053 p = tuple(self.parents(n))
1069 p = tuple(self.parents(n))
1054 # If any of its parents are descendants, it's not a root.
1070 # If any of its parents are descendants, it's not a root.
1055 if (p[0] in descendants) or (p[1] in descendants):
1071 if (p[0] in descendants) or (p[1] in descendants):
1056 roots.remove(n)
1072 roots.remove(n)
1057 else:
1073 else:
1058 p = tuple(self.parents(n))
1074 p = tuple(self.parents(n))
1059 # A node is a descendant if either of its parents are
1075 # A node is a descendant if either of its parents are
1060 # descendants. (We seeded the dependents list with the roots
1076 # descendants. (We seeded the dependents list with the roots
1061 # up there, remember?)
1077 # up there, remember?)
1062 if (p[0] in descendants) or (p[1] in descendants):
1078 if (p[0] in descendants) or (p[1] in descendants):
1063 descendants.add(n)
1079 descendants.add(n)
1064 isdescendant = True
1080 isdescendant = True
1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1081 if isdescendant and ((ancestors is None) or (n in ancestors)):
1066 # Only include nodes that are both descendants and ancestors.
1082 # Only include nodes that are both descendants and ancestors.
1067 orderedout.append(n)
1083 orderedout.append(n)
1068 if (ancestors is not None) and (n in heads):
1084 if (ancestors is not None) and (n in heads):
1069 # We're trying to figure out which heads are reachable
1085 # We're trying to figure out which heads are reachable
1070 # from roots.
1086 # from roots.
1071 # Mark this head as having been reached
1087 # Mark this head as having been reached
1072 heads[n] = True
1088 heads[n] = True
1073 elif ancestors is None:
1089 elif ancestors is None:
1074 # Otherwise, we're trying to discover the heads.
1090 # Otherwise, we're trying to discover the heads.
1075 # Assume this is a head because if it isn't, the next step
1091 # Assume this is a head because if it isn't, the next step
1076 # will eventually remove it.
1092 # will eventually remove it.
1077 heads[n] = True
1093 heads[n] = True
1078 # But, obviously its parents aren't.
1094 # But, obviously its parents aren't.
1079 for p in self.parents(n):
1095 for p in self.parents(n):
1080 heads.pop(p, None)
1096 heads.pop(p, None)
1081 heads = [head for head, flag in heads.iteritems() if flag]
1097 heads = [head for head, flag in heads.iteritems() if flag]
1082 roots = list(roots)
1098 roots = list(roots)
1083 assert orderedout
1099 assert orderedout
1084 assert roots
1100 assert roots
1085 assert heads
1101 assert heads
1086 return (orderedout, roots, heads)
1102 return (orderedout, roots, heads)
1087
1103
1088 def headrevs(self):
1104 def headrevs(self):
1089 try:
1105 try:
1090 return self.index.headrevs()
1106 return self.index.headrevs()
1091 except AttributeError:
1107 except AttributeError:
1092 return self._headrevs()
1108 return self._headrevs()
1093
1109
1094 def computephases(self, roots):
1110 def computephases(self, roots):
1095 return self.index.computephasesmapsets(roots)
1111 return self.index.computephasesmapsets(roots)
1096
1112
1097 def _headrevs(self):
1113 def _headrevs(self):
1098 count = len(self)
1114 count = len(self)
1099 if not count:
1115 if not count:
1100 return [nullrev]
1116 return [nullrev]
1101 # we won't iter over filtered rev so nobody is a head at start
1117 # we won't iter over filtered rev so nobody is a head at start
1102 ishead = [0] * (count + 1)
1118 ishead = [0] * (count + 1)
1103 index = self.index
1119 index = self.index
1104 for r in self:
1120 for r in self:
1105 ishead[r] = 1 # I may be an head
1121 ishead[r] = 1 # I may be an head
1106 e = index[r]
1122 e = index[r]
1107 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1123 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1108 return [r for r, val in enumerate(ishead) if val]
1124 return [r for r, val in enumerate(ishead) if val]
1109
1125
1110 def heads(self, start=None, stop=None):
1126 def heads(self, start=None, stop=None):
1111 """return the list of all nodes that have no children
1127 """return the list of all nodes that have no children
1112
1128
1113 if start is specified, only heads that are descendants of
1129 if start is specified, only heads that are descendants of
1114 start will be returned
1130 start will be returned
1115 if stop is specified, it will consider all the revs from stop
1131 if stop is specified, it will consider all the revs from stop
1116 as if they had no children
1132 as if they had no children
1117 """
1133 """
1118 if start is None and stop is None:
1134 if start is None and stop is None:
1119 if not len(self):
1135 if not len(self):
1120 return [nullid]
1136 return [nullid]
1121 return [self.node(r) for r in self.headrevs()]
1137 return [self.node(r) for r in self.headrevs()]
1122
1138
1123 if start is None:
1139 if start is None:
1124 start = nullid
1140 start = nullid
1125 if stop is None:
1141 if stop is None:
1126 stop = []
1142 stop = []
1127 stoprevs = set([self.rev(n) for n in stop])
1143 stoprevs = set([self.rev(n) for n in stop])
1128 startrev = self.rev(start)
1144 startrev = self.rev(start)
1129 reachable = {startrev}
1145 reachable = {startrev}
1130 heads = {startrev}
1146 heads = {startrev}
1131
1147
1132 parentrevs = self.parentrevs
1148 parentrevs = self.parentrevs
1133 for r in self.revs(start=startrev + 1):
1149 for r in self.revs(start=startrev + 1):
1134 for p in parentrevs(r):
1150 for p in parentrevs(r):
1135 if p in reachable:
1151 if p in reachable:
1136 if r not in stoprevs:
1152 if r not in stoprevs:
1137 reachable.add(r)
1153 reachable.add(r)
1138 heads.add(r)
1154 heads.add(r)
1139 if p in heads and p not in stoprevs:
1155 if p in heads and p not in stoprevs:
1140 heads.remove(p)
1156 heads.remove(p)
1141
1157
1142 return [self.node(r) for r in heads]
1158 return [self.node(r) for r in heads]
1143
1159
1144 def children(self, node):
1160 def children(self, node):
1145 """find the children of a given node"""
1161 """find the children of a given node"""
1146 c = []
1162 c = []
1147 p = self.rev(node)
1163 p = self.rev(node)
1148 for r in self.revs(start=p + 1):
1164 for r in self.revs(start=p + 1):
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1165 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1150 if prevs:
1166 if prevs:
1151 for pr in prevs:
1167 for pr in prevs:
1152 if pr == p:
1168 if pr == p:
1153 c.append(self.node(r))
1169 c.append(self.node(r))
1154 elif p == nullrev:
1170 elif p == nullrev:
1155 c.append(self.node(r))
1171 c.append(self.node(r))
1156 return c
1172 return c
1157
1173
1158 def commonancestorsheads(self, a, b):
1174 def commonancestorsheads(self, a, b):
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1175 """calculate all the heads of the common ancestors of nodes a and b"""
1160 a, b = self.rev(a), self.rev(b)
1176 a, b = self.rev(a), self.rev(b)
1161 ancs = self._commonancestorsheads(a, b)
1177 ancs = self._commonancestorsheads(a, b)
1162 return pycompat.maplist(self.node, ancs)
1178 return pycompat.maplist(self.node, ancs)
1163
1179
1164 def _commonancestorsheads(self, *revs):
1180 def _commonancestorsheads(self, *revs):
1165 """calculate all the heads of the common ancestors of revs"""
1181 """calculate all the heads of the common ancestors of revs"""
1166 try:
1182 try:
1167 ancs = self.index.commonancestorsheads(*revs)
1183 ancs = self.index.commonancestorsheads(*revs)
1168 except (AttributeError, OverflowError): # C implementation failed
1184 except (AttributeError, OverflowError): # C implementation failed
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1185 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1170 return ancs
1186 return ancs
1171
1187
1172 def isancestor(self, a, b):
1188 def isancestor(self, a, b):
1173 """return True if node a is an ancestor of node b
1189 """return True if node a is an ancestor of node b
1174
1190
1175 A revision is considered an ancestor of itself."""
1191 A revision is considered an ancestor of itself."""
1176 a, b = self.rev(a), self.rev(b)
1192 a, b = self.rev(a), self.rev(b)
1177 return self.isancestorrev(a, b)
1193 return self.isancestorrev(a, b)
1178
1194
1179 def isancestorrev(self, a, b):
1195 def isancestorrev(self, a, b):
1180 """return True if revision a is an ancestor of revision b
1196 """return True if revision a is an ancestor of revision b
1181
1197
1182 A revision is considered an ancestor of itself.
1198 A revision is considered an ancestor of itself.
1183
1199
1184 The implementation of this is trivial but the use of
1200 The implementation of this is trivial but the use of
1185 commonancestorsheads is not."""
1201 commonancestorsheads is not."""
1186 if a == nullrev:
1202 if a == nullrev:
1187 return True
1203 return True
1188 elif a == b:
1204 elif a == b:
1189 return True
1205 return True
1190 elif a > b:
1206 elif a > b:
1191 return False
1207 return False
1192 return a in self._commonancestorsheads(a, b)
1208 return a in self._commonancestorsheads(a, b)
1193
1209
1194 def ancestor(self, a, b):
1210 def ancestor(self, a, b):
1195 """calculate the "best" common ancestor of nodes a and b"""
1211 """calculate the "best" common ancestor of nodes a and b"""
1196
1212
1197 a, b = self.rev(a), self.rev(b)
1213 a, b = self.rev(a), self.rev(b)
1198 try:
1214 try:
1199 ancs = self.index.ancestors(a, b)
1215 ancs = self.index.ancestors(a, b)
1200 except (AttributeError, OverflowError):
1216 except (AttributeError, OverflowError):
1201 ancs = ancestor.ancestors(self.parentrevs, a, b)
1217 ancs = ancestor.ancestors(self.parentrevs, a, b)
1202 if ancs:
1218 if ancs:
1203 # choose a consistent winner when there's a tie
1219 # choose a consistent winner when there's a tie
1204 return min(map(self.node, ancs))
1220 return min(map(self.node, ancs))
1205 return nullid
1221 return nullid
1206
1222
1207 def _match(self, id):
1223 def _match(self, id):
1208 if isinstance(id, int):
1224 if isinstance(id, int):
1209 # rev
1225 # rev
1210 return self.node(id)
1226 return self.node(id)
1211 if len(id) == 20:
1227 if len(id) == 20:
1212 # possibly a binary node
1228 # possibly a binary node
1213 # odds of a binary node being all hex in ASCII are 1 in 10**25
1229 # odds of a binary node being all hex in ASCII are 1 in 10**25
1214 try:
1230 try:
1215 node = id
1231 node = id
1216 self.rev(node) # quick search the index
1232 self.rev(node) # quick search the index
1217 return node
1233 return node
1218 except LookupError:
1234 except LookupError:
1219 pass # may be partial hex id
1235 pass # may be partial hex id
1220 try:
1236 try:
1221 # str(rev)
1237 # str(rev)
1222 rev = int(id)
1238 rev = int(id)
1223 if "%d" % rev != id:
1239 if "%d" % rev != id:
1224 raise ValueError
1240 raise ValueError
1225 if rev < 0:
1241 if rev < 0:
1226 rev = len(self) + rev
1242 rev = len(self) + rev
1227 if rev < 0 or rev >= len(self):
1243 if rev < 0 or rev >= len(self):
1228 raise ValueError
1244 raise ValueError
1229 return self.node(rev)
1245 return self.node(rev)
1230 except (ValueError, OverflowError):
1246 except (ValueError, OverflowError):
1231 pass
1247 pass
1232 if len(id) == 40:
1248 if len(id) == 40:
1233 try:
1249 try:
1234 # a full hex nodeid?
1250 # a full hex nodeid?
1235 node = bin(id)
1251 node = bin(id)
1236 self.rev(node)
1252 self.rev(node)
1237 return node
1253 return node
1238 except (TypeError, LookupError):
1254 except (TypeError, LookupError):
1239 pass
1255 pass
1240
1256
1241 def _partialmatch(self, id):
1257 def _partialmatch(self, id):
1242 # we don't care wdirfilenodeids as they should be always full hash
1258 # we don't care wdirfilenodeids as they should be always full hash
1243 maybewdir = wdirhex.startswith(id)
1259 maybewdir = wdirhex.startswith(id)
1244 try:
1260 try:
1245 partial = self.index.partialmatch(id)
1261 partial = self.index.partialmatch(id)
1246 if partial and self.hasnode(partial):
1262 if partial and self.hasnode(partial):
1247 if maybewdir:
1263 if maybewdir:
1248 # single 'ff...' match in radix tree, ambiguous with wdir
1264 # single 'ff...' match in radix tree, ambiguous with wdir
1249 raise RevlogError
1265 raise RevlogError
1250 return partial
1266 return partial
1251 if maybewdir:
1267 if maybewdir:
1252 # no 'ff...' match in radix tree, wdir identified
1268 # no 'ff...' match in radix tree, wdir identified
1253 raise error.WdirUnsupported
1269 raise error.WdirUnsupported
1254 return None
1270 return None
1255 except RevlogError:
1271 except RevlogError:
1256 # parsers.c radix tree lookup gave multiple matches
1272 # parsers.c radix tree lookup gave multiple matches
1257 # fast path: for unfiltered changelog, radix tree is accurate
1273 # fast path: for unfiltered changelog, radix tree is accurate
1258 if not getattr(self, 'filteredrevs', None):
1274 if not getattr(self, 'filteredrevs', None):
1259 raise AmbiguousPrefixLookupError(id, self.indexfile,
1275 raise AmbiguousPrefixLookupError(id, self.indexfile,
1260 _('ambiguous identifier'))
1276 _('ambiguous identifier'))
1261 # fall through to slow path that filters hidden revisions
1277 # fall through to slow path that filters hidden revisions
1262 except (AttributeError, ValueError):
1278 except (AttributeError, ValueError):
1263 # we are pure python, or key was too short to search radix tree
1279 # we are pure python, or key was too short to search radix tree
1264 pass
1280 pass
1265
1281
1266 if id in self._pcache:
1282 if id in self._pcache:
1267 return self._pcache[id]
1283 return self._pcache[id]
1268
1284
1269 if len(id) <= 40:
1285 if len(id) <= 40:
1270 try:
1286 try:
1271 # hex(node)[:...]
1287 # hex(node)[:...]
1272 l = len(id) // 2 # grab an even number of digits
1288 l = len(id) // 2 # grab an even number of digits
1273 prefix = bin(id[:l * 2])
1289 prefix = bin(id[:l * 2])
1274 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1290 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1275 nl = [n for n in nl if hex(n).startswith(id) and
1291 nl = [n for n in nl if hex(n).startswith(id) and
1276 self.hasnode(n)]
1292 self.hasnode(n)]
1277 if nullhex.startswith(id):
1293 if nullhex.startswith(id):
1278 nl.append(nullid)
1294 nl.append(nullid)
1279 if len(nl) > 0:
1295 if len(nl) > 0:
1280 if len(nl) == 1 and not maybewdir:
1296 if len(nl) == 1 and not maybewdir:
1281 self._pcache[id] = nl[0]
1297 self._pcache[id] = nl[0]
1282 return nl[0]
1298 return nl[0]
1283 raise AmbiguousPrefixLookupError(id, self.indexfile,
1299 raise AmbiguousPrefixLookupError(id, self.indexfile,
1284 _('ambiguous identifier'))
1300 _('ambiguous identifier'))
1285 if maybewdir:
1301 if maybewdir:
1286 raise error.WdirUnsupported
1302 raise error.WdirUnsupported
1287 return None
1303 return None
1288 except TypeError:
1304 except TypeError:
1289 pass
1305 pass
1290
1306
1291 def lookup(self, id):
1307 def lookup(self, id):
1292 """locate a node based on:
1308 """locate a node based on:
1293 - revision number or str(revision number)
1309 - revision number or str(revision number)
1294 - nodeid or subset of hex nodeid
1310 - nodeid or subset of hex nodeid
1295 """
1311 """
1296 n = self._match(id)
1312 n = self._match(id)
1297 if n is not None:
1313 if n is not None:
1298 return n
1314 return n
1299 n = self._partialmatch(id)
1315 n = self._partialmatch(id)
1300 if n:
1316 if n:
1301 return n
1317 return n
1302
1318
1303 raise LookupError(id, self.indexfile, _('no match found'))
1319 raise LookupError(id, self.indexfile, _('no match found'))
1304
1320
1305 def shortest(self, node, minlength=1):
1321 def shortest(self, node, minlength=1):
1306 """Find the shortest unambiguous prefix that matches node."""
1322 """Find the shortest unambiguous prefix that matches node."""
1307 def isvalid(prefix):
1323 def isvalid(prefix):
1308 try:
1324 try:
1309 node = self._partialmatch(prefix)
1325 node = self._partialmatch(prefix)
1310 except error.RevlogError:
1326 except error.RevlogError:
1311 return False
1327 return False
1312 except error.WdirUnsupported:
1328 except error.WdirUnsupported:
1313 # single 'ff...' match
1329 # single 'ff...' match
1314 return True
1330 return True
1315 if node is None:
1331 if node is None:
1316 raise LookupError(node, self.indexfile, _('no node'))
1332 raise LookupError(node, self.indexfile, _('no node'))
1317 return True
1333 return True
1318
1334
1319 def maybewdir(prefix):
1335 def maybewdir(prefix):
1320 return all(c == 'f' for c in prefix)
1336 return all(c == 'f' for c in prefix)
1321
1337
1322 hexnode = hex(node)
1338 hexnode = hex(node)
1323
1339
1324 def disambiguate(hexnode, minlength):
1340 def disambiguate(hexnode, minlength):
1325 """Disambiguate against wdirid."""
1341 """Disambiguate against wdirid."""
1326 for length in range(minlength, 41):
1342 for length in range(minlength, 41):
1327 prefix = hexnode[:length]
1343 prefix = hexnode[:length]
1328 if not maybewdir(prefix):
1344 if not maybewdir(prefix):
1329 return prefix
1345 return prefix
1330
1346
1331 if not getattr(self, 'filteredrevs', None):
1347 if not getattr(self, 'filteredrevs', None):
1332 try:
1348 try:
1333 length = max(self.index.shortest(node), minlength)
1349 length = max(self.index.shortest(node), minlength)
1334 return disambiguate(hexnode, length)
1350 return disambiguate(hexnode, length)
1335 except RevlogError:
1351 except RevlogError:
1336 if node != wdirid:
1352 if node != wdirid:
1337 raise LookupError(node, self.indexfile, _('no node'))
1353 raise LookupError(node, self.indexfile, _('no node'))
1338 except AttributeError:
1354 except AttributeError:
1339 # Fall through to pure code
1355 # Fall through to pure code
1340 pass
1356 pass
1341
1357
1342 if node == wdirid:
1358 if node == wdirid:
1343 for length in range(minlength, 41):
1359 for length in range(minlength, 41):
1344 prefix = hexnode[:length]
1360 prefix = hexnode[:length]
1345 if isvalid(prefix):
1361 if isvalid(prefix):
1346 return prefix
1362 return prefix
1347
1363
1348 for length in range(minlength, 41):
1364 for length in range(minlength, 41):
1349 prefix = hexnode[:length]
1365 prefix = hexnode[:length]
1350 if isvalid(prefix):
1366 if isvalid(prefix):
1351 return disambiguate(hexnode, length)
1367 return disambiguate(hexnode, length)
1352
1368
1353 def cmp(self, node, text):
1369 def cmp(self, node, text):
1354 """compare text with a given file revision
1370 """compare text with a given file revision
1355
1371
1356 returns True if text is different than what is stored.
1372 returns True if text is different than what is stored.
1357 """
1373 """
1358 p1, p2 = self.parents(node)
1374 p1, p2 = self.parents(node)
1359 return hash(text, p1, p2) != node
1375 return hash(text, p1, p2) != node
1360
1376
1361 def _cachesegment(self, offset, data):
1377 def _cachesegment(self, offset, data):
1362 """Add a segment to the revlog cache.
1378 """Add a segment to the revlog cache.
1363
1379
1364 Accepts an absolute offset and the data that is at that location.
1380 Accepts an absolute offset and the data that is at that location.
1365 """
1381 """
1366 o, d = self._chunkcache
1382 o, d = self._chunkcache
1367 # try to add to existing cache
1383 # try to add to existing cache
1368 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1384 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1369 self._chunkcache = o, d + data
1385 self._chunkcache = o, d + data
1370 else:
1386 else:
1371 self._chunkcache = offset, data
1387 self._chunkcache = offset, data
1372
1388
1373 def _readsegment(self, offset, length, df=None):
1389 def _readsegment(self, offset, length, df=None):
1374 """Load a segment of raw data from the revlog.
1390 """Load a segment of raw data from the revlog.
1375
1391
1376 Accepts an absolute offset, length to read, and an optional existing
1392 Accepts an absolute offset, length to read, and an optional existing
1377 file handle to read from.
1393 file handle to read from.
1378
1394
1379 If an existing file handle is passed, it will be seeked and the
1395 If an existing file handle is passed, it will be seeked and the
1380 original seek position will NOT be restored.
1396 original seek position will NOT be restored.
1381
1397
1382 Returns a str or buffer of raw byte data.
1398 Returns a str or buffer of raw byte data.
1383 """
1399 """
1384 # Cache data both forward and backward around the requested
1400 # Cache data both forward and backward around the requested
1385 # data, in a fixed size window. This helps speed up operations
1401 # data, in a fixed size window. This helps speed up operations
1386 # involving reading the revlog backwards.
1402 # involving reading the revlog backwards.
1387 cachesize = self._chunkcachesize
1403 cachesize = self._chunkcachesize
1388 realoffset = offset & ~(cachesize - 1)
1404 realoffset = offset & ~(cachesize - 1)
1389 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1390 - realoffset)
1406 - realoffset)
1391 with self._datareadfp(df) as df:
1407 with self._datareadfp(df) as df:
1392 df.seek(realoffset)
1408 df.seek(realoffset)
1393 d = df.read(reallength)
1409 d = df.read(reallength)
1394 self._cachesegment(realoffset, d)
1410 self._cachesegment(realoffset, d)
1395 if offset != realoffset or reallength != length:
1411 if offset != realoffset or reallength != length:
1396 return util.buffer(d, offset - realoffset, length)
1412 return util.buffer(d, offset - realoffset, length)
1397 return d
1413 return d
1398
1414
1399 def _getsegment(self, offset, length, df=None):
1415 def _getsegment(self, offset, length, df=None):
1400 """Obtain a segment of raw data from the revlog.
1416 """Obtain a segment of raw data from the revlog.
1401
1417
1402 Accepts an absolute offset, length of bytes to obtain, and an
1418 Accepts an absolute offset, length of bytes to obtain, and an
1403 optional file handle to the already-opened revlog. If the file
1419 optional file handle to the already-opened revlog. If the file
1404 handle is used, it's original seek position will not be preserved.
1420 handle is used, it's original seek position will not be preserved.
1405
1421
1406 Requests for data may be returned from a cache.
1422 Requests for data may be returned from a cache.
1407
1423
1408 Returns a str or a buffer instance of raw byte data.
1424 Returns a str or a buffer instance of raw byte data.
1409 """
1425 """
1410 o, d = self._chunkcache
1426 o, d = self._chunkcache
1411 l = len(d)
1427 l = len(d)
1412
1428
1413 # is it in the cache?
1429 # is it in the cache?
1414 cachestart = offset - o
1430 cachestart = offset - o
1415 cacheend = cachestart + length
1431 cacheend = cachestart + length
1416 if cachestart >= 0 and cacheend <= l:
1432 if cachestart >= 0 and cacheend <= l:
1417 if cachestart == 0 and cacheend == l:
1433 if cachestart == 0 and cacheend == l:
1418 return d # avoid a copy
1434 return d # avoid a copy
1419 return util.buffer(d, cachestart, cacheend - cachestart)
1435 return util.buffer(d, cachestart, cacheend - cachestart)
1420
1436
1421 return self._readsegment(offset, length, df=df)
1437 return self._readsegment(offset, length, df=df)
1422
1438
1423 def _getsegmentforrevs(self, startrev, endrev, df=None):
1439 def _getsegmentforrevs(self, startrev, endrev, df=None):
1424 """Obtain a segment of raw data corresponding to a range of revisions.
1440 """Obtain a segment of raw data corresponding to a range of revisions.
1425
1441
1426 Accepts the start and end revisions and an optional already-open
1442 Accepts the start and end revisions and an optional already-open
1427 file handle to be used for reading. If the file handle is read, its
1443 file handle to be used for reading. If the file handle is read, its
1428 seek position will not be preserved.
1444 seek position will not be preserved.
1429
1445
1430 Requests for data may be satisfied by a cache.
1446 Requests for data may be satisfied by a cache.
1431
1447
1432 Returns a 2-tuple of (offset, data) for the requested range of
1448 Returns a 2-tuple of (offset, data) for the requested range of
1433 revisions. Offset is the integer offset from the beginning of the
1449 revisions. Offset is the integer offset from the beginning of the
1434 revlog and data is a str or buffer of the raw byte data.
1450 revlog and data is a str or buffer of the raw byte data.
1435
1451
1436 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1452 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1437 to determine where each revision's data begins and ends.
1453 to determine where each revision's data begins and ends.
1438 """
1454 """
1439 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1455 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1440 # (functions are expensive).
1456 # (functions are expensive).
1441 index = self.index
1457 index = self.index
1442 istart = index[startrev]
1458 istart = index[startrev]
1443 start = int(istart[0] >> 16)
1459 start = int(istart[0] >> 16)
1444 if startrev == endrev:
1460 if startrev == endrev:
1445 end = start + istart[1]
1461 end = start + istart[1]
1446 else:
1462 else:
1447 iend = index[endrev]
1463 iend = index[endrev]
1448 end = int(iend[0] >> 16) + iend[1]
1464 end = int(iend[0] >> 16) + iend[1]
1449
1465
1450 if self._inline:
1466 if self._inline:
1451 start += (startrev + 1) * self._io.size
1467 start += (startrev + 1) * self._io.size
1452 end += (endrev + 1) * self._io.size
1468 end += (endrev + 1) * self._io.size
1453 length = end - start
1469 length = end - start
1454
1470
1455 return start, self._getsegment(start, length, df=df)
1471 return start, self._getsegment(start, length, df=df)
1456
1472
1457 def _chunk(self, rev, df=None):
1473 def _chunk(self, rev, df=None):
1458 """Obtain a single decompressed chunk for a revision.
1474 """Obtain a single decompressed chunk for a revision.
1459
1475
1460 Accepts an integer revision and an optional already-open file handle
1476 Accepts an integer revision and an optional already-open file handle
1461 to be used for reading. If used, the seek position of the file will not
1477 to be used for reading. If used, the seek position of the file will not
1462 be preserved.
1478 be preserved.
1463
1479
1464 Returns a str holding uncompressed data for the requested revision.
1480 Returns a str holding uncompressed data for the requested revision.
1465 """
1481 """
1466 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1482 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1467
1483
1468 def _chunks(self, revs, df=None, targetsize=None):
1484 def _chunks(self, revs, df=None, targetsize=None):
1469 """Obtain decompressed chunks for the specified revisions.
1485 """Obtain decompressed chunks for the specified revisions.
1470
1486
1471 Accepts an iterable of numeric revisions that are assumed to be in
1487 Accepts an iterable of numeric revisions that are assumed to be in
1472 ascending order. Also accepts an optional already-open file handle
1488 ascending order. Also accepts an optional already-open file handle
1473 to be used for reading. If used, the seek position of the file will
1489 to be used for reading. If used, the seek position of the file will
1474 not be preserved.
1490 not be preserved.
1475
1491
1476 This function is similar to calling ``self._chunk()`` multiple times,
1492 This function is similar to calling ``self._chunk()`` multiple times,
1477 but is faster.
1493 but is faster.
1478
1494
1479 Returns a list with decompressed data for each requested revision.
1495 Returns a list with decompressed data for each requested revision.
1480 """
1496 """
1481 if not revs:
1497 if not revs:
1482 return []
1498 return []
1483 start = self.start
1499 start = self.start
1484 length = self.length
1500 length = self.length
1485 inline = self._inline
1501 inline = self._inline
1486 iosize = self._io.size
1502 iosize = self._io.size
1487 buffer = util.buffer
1503 buffer = util.buffer
1488
1504
1489 l = []
1505 l = []
1490 ladd = l.append
1506 ladd = l.append
1491
1507
1492 if not self._withsparseread:
1508 if not self._withsparseread:
1493 slicedchunks = (revs,)
1509 slicedchunks = (revs,)
1494 else:
1510 else:
1495 slicedchunks = deltautil.slicechunk(self, revs,
1511 slicedchunks = deltautil.slicechunk(self, revs,
1496 targetsize=targetsize)
1512 targetsize=targetsize)
1497
1513
1498 for revschunk in slicedchunks:
1514 for revschunk in slicedchunks:
1499 firstrev = revschunk[0]
1515 firstrev = revschunk[0]
1500 # Skip trailing revisions with empty diff
1516 # Skip trailing revisions with empty diff
1501 for lastrev in revschunk[::-1]:
1517 for lastrev in revschunk[::-1]:
1502 if length(lastrev) != 0:
1518 if length(lastrev) != 0:
1503 break
1519 break
1504
1520
1505 try:
1521 try:
1506 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1522 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1507 except OverflowError:
1523 except OverflowError:
1508 # issue4215 - we can't cache a run of chunks greater than
1524 # issue4215 - we can't cache a run of chunks greater than
1509 # 2G on Windows
1525 # 2G on Windows
1510 return [self._chunk(rev, df=df) for rev in revschunk]
1526 return [self._chunk(rev, df=df) for rev in revschunk]
1511
1527
1512 decomp = self.decompress
1528 decomp = self.decompress
1513 for rev in revschunk:
1529 for rev in revschunk:
1514 chunkstart = start(rev)
1530 chunkstart = start(rev)
1515 if inline:
1531 if inline:
1516 chunkstart += (rev + 1) * iosize
1532 chunkstart += (rev + 1) * iosize
1517 chunklength = length(rev)
1533 chunklength = length(rev)
1518 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1534 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1519
1535
1520 return l
1536 return l
1521
1537
1522 def _chunkclear(self):
1538 def _chunkclear(self):
1523 """Clear the raw chunk cache."""
1539 """Clear the raw chunk cache."""
1524 self._chunkcache = (0, '')
1540 self._chunkcache = (0, '')
1525
1541
1526 def deltaparent(self, rev):
1542 def deltaparent(self, rev):
1527 """return deltaparent of the given revision"""
1543 """return deltaparent of the given revision"""
1528 base = self.index[rev][3]
1544 base = self.index[rev][3]
1529 if base == rev:
1545 if base == rev:
1530 return nullrev
1546 return nullrev
1531 elif self._generaldelta:
1547 elif self._generaldelta:
1532 return base
1548 return base
1533 else:
1549 else:
1534 return rev - 1
1550 return rev - 1
1535
1551
1536 def issnapshot(self, rev):
1552 def issnapshot(self, rev):
1537 """tells whether rev is a snapshot
1553 """tells whether rev is a snapshot
1538 """
1554 """
1539 if rev == nullrev:
1555 if rev == nullrev:
1540 return True
1556 return True
1541 deltap = self.deltaparent(rev)
1557 deltap = self.deltaparent(rev)
1542 if deltap == nullrev:
1558 if deltap == nullrev:
1543 return True
1559 return True
1544 p1, p2 = self.parentrevs(rev)
1560 p1, p2 = self.parentrevs(rev)
1545 if deltap in (p1, p2):
1561 if deltap in (p1, p2):
1546 return False
1562 return False
1547 return self.issnapshot(deltap)
1563 return self.issnapshot(deltap)
1548
1564
1549 def snapshotdepth(self, rev):
1565 def snapshotdepth(self, rev):
1550 """number of snapshot in the chain before this one"""
1566 """number of snapshot in the chain before this one"""
1551 if not self.issnapshot(rev):
1567 if not self.issnapshot(rev):
1552 raise ProgrammingError('revision %d not a snapshot')
1568 raise ProgrammingError('revision %d not a snapshot')
1553 return len(self._deltachain(rev)[0]) - 1
1569 return len(self._deltachain(rev)[0]) - 1
1554
1570
1555 def revdiff(self, rev1, rev2):
1571 def revdiff(self, rev1, rev2):
1556 """return or calculate a delta between two revisions
1572 """return or calculate a delta between two revisions
1557
1573
1558 The delta calculated is in binary form and is intended to be written to
1574 The delta calculated is in binary form and is intended to be written to
1559 revlog data directly. So this function needs raw revision data.
1575 revlog data directly. So this function needs raw revision data.
1560 """
1576 """
1561 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1577 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1562 return bytes(self._chunk(rev2))
1578 return bytes(self._chunk(rev2))
1563
1579
1564 return mdiff.textdiff(self.revision(rev1, raw=True),
1580 return mdiff.textdiff(self.revision(rev1, raw=True),
1565 self.revision(rev2, raw=True))
1581 self.revision(rev2, raw=True))
1566
1582
1567 def revision(self, nodeorrev, _df=None, raw=False):
1583 def revision(self, nodeorrev, _df=None, raw=False):
1568 """return an uncompressed revision of a given node or revision
1584 """return an uncompressed revision of a given node or revision
1569 number.
1585 number.
1570
1586
1571 _df - an existing file handle to read from. (internal-only)
1587 _df - an existing file handle to read from. (internal-only)
1572 raw - an optional argument specifying if the revision data is to be
1588 raw - an optional argument specifying if the revision data is to be
1573 treated as raw data when applying flag transforms. 'raw' should be set
1589 treated as raw data when applying flag transforms. 'raw' should be set
1574 to True when generating changegroups or in debug commands.
1590 to True when generating changegroups or in debug commands.
1575 """
1591 """
1576 if isinstance(nodeorrev, int):
1592 if isinstance(nodeorrev, int):
1577 rev = nodeorrev
1593 rev = nodeorrev
1578 node = self.node(rev)
1594 node = self.node(rev)
1579 else:
1595 else:
1580 node = nodeorrev
1596 node = nodeorrev
1581 rev = None
1597 rev = None
1582
1598
1583 cachedrev = None
1599 cachedrev = None
1584 flags = None
1600 flags = None
1585 rawtext = None
1601 rawtext = None
1586 if node == nullid:
1602 if node == nullid:
1587 return ""
1603 return ""
1588 if self._cache:
1604 if self._cache:
1589 if self._cache[0] == node:
1605 if self._cache[0] == node:
1590 # _cache only stores rawtext
1606 # _cache only stores rawtext
1591 if raw:
1607 if raw:
1592 return self._cache[2]
1608 return self._cache[2]
1593 # duplicated, but good for perf
1609 # duplicated, but good for perf
1594 if rev is None:
1610 if rev is None:
1595 rev = self.rev(node)
1611 rev = self.rev(node)
1596 if flags is None:
1612 if flags is None:
1597 flags = self.flags(rev)
1613 flags = self.flags(rev)
1598 # no extra flags set, no flag processor runs, text = rawtext
1614 # no extra flags set, no flag processor runs, text = rawtext
1599 if flags == REVIDX_DEFAULT_FLAGS:
1615 if flags == REVIDX_DEFAULT_FLAGS:
1600 return self._cache[2]
1616 return self._cache[2]
1601 # rawtext is reusable. need to run flag processor
1617 # rawtext is reusable. need to run flag processor
1602 rawtext = self._cache[2]
1618 rawtext = self._cache[2]
1603
1619
1604 cachedrev = self._cache[1]
1620 cachedrev = self._cache[1]
1605
1621
1606 # look up what we need to read
1622 # look up what we need to read
1607 if rawtext is None:
1623 if rawtext is None:
1608 if rev is None:
1624 if rev is None:
1609 rev = self.rev(node)
1625 rev = self.rev(node)
1610
1626
1611 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1627 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1612 if stopped:
1628 if stopped:
1613 rawtext = self._cache[2]
1629 rawtext = self._cache[2]
1614
1630
1615 # drop cache to save memory
1631 # drop cache to save memory
1616 self._cache = None
1632 self._cache = None
1617
1633
1618 targetsize = None
1634 targetsize = None
1619 rawsize = self.index[rev][2]
1635 rawsize = self.index[rev][2]
1620 if 0 <= rawsize:
1636 if 0 <= rawsize:
1621 targetsize = 4 * rawsize
1637 targetsize = 4 * rawsize
1622
1638
1623 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1639 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1624 if rawtext is None:
1640 if rawtext is None:
1625 rawtext = bytes(bins[0])
1641 rawtext = bytes(bins[0])
1626 bins = bins[1:]
1642 bins = bins[1:]
1627
1643
1628 rawtext = mdiff.patches(rawtext, bins)
1644 rawtext = mdiff.patches(rawtext, bins)
1629 self._cache = (node, rev, rawtext)
1645 self._cache = (node, rev, rawtext)
1630
1646
1631 if flags is None:
1647 if flags is None:
1632 if rev is None:
1648 if rev is None:
1633 rev = self.rev(node)
1649 rev = self.rev(node)
1634 flags = self.flags(rev)
1650 flags = self.flags(rev)
1635
1651
1636 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1652 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1637 if validatehash:
1653 if validatehash:
1638 self.checkhash(text, node, rev=rev)
1654 self.checkhash(text, node, rev=rev)
1639
1655
1640 return text
1656 return text
1641
1657
1642 def hash(self, text, p1, p2):
1658 def hash(self, text, p1, p2):
1643 """Compute a node hash.
1659 """Compute a node hash.
1644
1660
1645 Available as a function so that subclasses can replace the hash
1661 Available as a function so that subclasses can replace the hash
1646 as needed.
1662 as needed.
1647 """
1663 """
1648 return hash(text, p1, p2)
1664 return hash(text, p1, p2)
1649
1665
1650 def _processflags(self, text, flags, operation, raw=False):
1666 def _processflags(self, text, flags, operation, raw=False):
1651 """Inspect revision data flags and applies transforms defined by
1667 """Inspect revision data flags and applies transforms defined by
1652 registered flag processors.
1668 registered flag processors.
1653
1669
1654 ``text`` - the revision data to process
1670 ``text`` - the revision data to process
1655 ``flags`` - the revision flags
1671 ``flags`` - the revision flags
1656 ``operation`` - the operation being performed (read or write)
1672 ``operation`` - the operation being performed (read or write)
1657 ``raw`` - an optional argument describing if the raw transform should be
1673 ``raw`` - an optional argument describing if the raw transform should be
1658 applied.
1674 applied.
1659
1675
1660 This method processes the flags in the order (or reverse order if
1676 This method processes the flags in the order (or reverse order if
1661 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1677 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1662 flag processors registered for present flags. The order of flags defined
1678 flag processors registered for present flags. The order of flags defined
1663 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1679 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1664
1680
1665 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1681 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1666 processed text and ``validatehash`` is a bool indicating whether the
1682 processed text and ``validatehash`` is a bool indicating whether the
1667 returned text should be checked for hash integrity.
1683 returned text should be checked for hash integrity.
1668
1684
1669 Note: If the ``raw`` argument is set, it has precedence over the
1685 Note: If the ``raw`` argument is set, it has precedence over the
1670 operation and will only update the value of ``validatehash``.
1686 operation and will only update the value of ``validatehash``.
1671 """
1687 """
1672 # fast path: no flag processors will run
1688 # fast path: no flag processors will run
1673 if flags == 0:
1689 if flags == 0:
1674 return text, True
1690 return text, True
1675 if not operation in ('read', 'write'):
1691 if not operation in ('read', 'write'):
1676 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1692 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1677 # Check all flags are known.
1693 # Check all flags are known.
1678 if flags & ~REVIDX_KNOWN_FLAGS:
1694 if flags & ~REVIDX_KNOWN_FLAGS:
1679 raise RevlogError(_("incompatible revision flag '%#x'") %
1695 raise RevlogError(_("incompatible revision flag '%#x'") %
1680 (flags & ~REVIDX_KNOWN_FLAGS))
1696 (flags & ~REVIDX_KNOWN_FLAGS))
1681 validatehash = True
1697 validatehash = True
1682 # Depending on the operation (read or write), the order might be
1698 # Depending on the operation (read or write), the order might be
1683 # reversed due to non-commutative transforms.
1699 # reversed due to non-commutative transforms.
1684 orderedflags = REVIDX_FLAGS_ORDER
1700 orderedflags = REVIDX_FLAGS_ORDER
1685 if operation == 'write':
1701 if operation == 'write':
1686 orderedflags = reversed(orderedflags)
1702 orderedflags = reversed(orderedflags)
1687
1703
1688 for flag in orderedflags:
1704 for flag in orderedflags:
1689 # If a flagprocessor has been registered for a known flag, apply the
1705 # If a flagprocessor has been registered for a known flag, apply the
1690 # related operation transform and update result tuple.
1706 # related operation transform and update result tuple.
1691 if flag & flags:
1707 if flag & flags:
1692 vhash = True
1708 vhash = True
1693
1709
1694 if flag not in _flagprocessors:
1710 if flag not in _flagprocessors:
1695 message = _("missing processor for flag '%#x'") % (flag)
1711 message = _("missing processor for flag '%#x'") % (flag)
1696 raise RevlogError(message)
1712 raise RevlogError(message)
1697
1713
1698 processor = _flagprocessors[flag]
1714 processor = _flagprocessors[flag]
1699 if processor is not None:
1715 if processor is not None:
1700 readtransform, writetransform, rawtransform = processor
1716 readtransform, writetransform, rawtransform = processor
1701
1717
1702 if raw:
1718 if raw:
1703 vhash = rawtransform(self, text)
1719 vhash = rawtransform(self, text)
1704 elif operation == 'read':
1720 elif operation == 'read':
1705 text, vhash = readtransform(self, text)
1721 text, vhash = readtransform(self, text)
1706 else: # write operation
1722 else: # write operation
1707 text, vhash = writetransform(self, text)
1723 text, vhash = writetransform(self, text)
1708 validatehash = validatehash and vhash
1724 validatehash = validatehash and vhash
1709
1725
1710 return text, validatehash
1726 return text, validatehash
1711
1727
1712 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1728 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1713 """Check node hash integrity.
1729 """Check node hash integrity.
1714
1730
1715 Available as a function so that subclasses can extend hash mismatch
1731 Available as a function so that subclasses can extend hash mismatch
1716 behaviors as needed.
1732 behaviors as needed.
1717 """
1733 """
1718 try:
1734 try:
1719 if p1 is None and p2 is None:
1735 if p1 is None and p2 is None:
1720 p1, p2 = self.parents(node)
1736 p1, p2 = self.parents(node)
1721 if node != self.hash(text, p1, p2):
1737 if node != self.hash(text, p1, p2):
1722 revornode = rev
1738 revornode = rev
1723 if revornode is None:
1739 if revornode is None:
1724 revornode = templatefilters.short(hex(node))
1740 revornode = templatefilters.short(hex(node))
1725 raise RevlogError(_("integrity check failed on %s:%s")
1741 raise RevlogError(_("integrity check failed on %s:%s")
1726 % (self.indexfile, pycompat.bytestr(revornode)))
1742 % (self.indexfile, pycompat.bytestr(revornode)))
1727 except RevlogError:
1743 except RevlogError:
1728 if self._censorable and _censoredtext(text):
1744 if self._censorable and _censoredtext(text):
1729 raise error.CensoredNodeError(self.indexfile, node, text)
1745 raise error.CensoredNodeError(self.indexfile, node, text)
1730 raise
1746 raise
1731
1747
1732 def _enforceinlinesize(self, tr, fp=None):
1748 def _enforceinlinesize(self, tr, fp=None):
1733 """Check if the revlog is too big for inline and convert if so.
1749 """Check if the revlog is too big for inline and convert if so.
1734
1750
1735 This should be called after revisions are added to the revlog. If the
1751 This should be called after revisions are added to the revlog. If the
1736 revlog has grown too large to be an inline revlog, it will convert it
1752 revlog has grown too large to be an inline revlog, it will convert it
1737 to use multiple index and data files.
1753 to use multiple index and data files.
1738 """
1754 """
1739 tiprev = len(self) - 1
1755 tiprev = len(self) - 1
1740 if (not self._inline or
1756 if (not self._inline or
1741 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1757 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1742 return
1758 return
1743
1759
1744 trinfo = tr.find(self.indexfile)
1760 trinfo = tr.find(self.indexfile)
1745 if trinfo is None:
1761 if trinfo is None:
1746 raise RevlogError(_("%s not found in the transaction")
1762 raise RevlogError(_("%s not found in the transaction")
1747 % self.indexfile)
1763 % self.indexfile)
1748
1764
1749 trindex = trinfo[2]
1765 trindex = trinfo[2]
1750 if trindex is not None:
1766 if trindex is not None:
1751 dataoff = self.start(trindex)
1767 dataoff = self.start(trindex)
1752 else:
1768 else:
1753 # revlog was stripped at start of transaction, use all leftover data
1769 # revlog was stripped at start of transaction, use all leftover data
1754 trindex = len(self) - 1
1770 trindex = len(self) - 1
1755 dataoff = self.end(tiprev)
1771 dataoff = self.end(tiprev)
1756
1772
1757 tr.add(self.datafile, dataoff)
1773 tr.add(self.datafile, dataoff)
1758
1774
1759 if fp:
1775 if fp:
1760 fp.flush()
1776 fp.flush()
1761 fp.close()
1777 fp.close()
1762
1778
1763 with self._datafp('w') as df:
1779 with self._datafp('w') as df:
1764 for r in self:
1780 for r in self:
1765 df.write(self._getsegmentforrevs(r, r)[1])
1781 df.write(self._getsegmentforrevs(r, r)[1])
1766
1782
1767 with self._indexfp('w') as fp:
1783 with self._indexfp('w') as fp:
1768 self.version &= ~FLAG_INLINE_DATA
1784 self.version &= ~FLAG_INLINE_DATA
1769 self._inline = False
1785 self._inline = False
1770 io = self._io
1786 io = self._io
1771 for i in self:
1787 for i in self:
1772 e = io.packentry(self.index[i], self.node, self.version, i)
1788 e = io.packentry(self.index[i], self.node, self.version, i)
1773 fp.write(e)
1789 fp.write(e)
1774
1790
1775 # the temp file replace the real index when we exit the context
1791 # the temp file replace the real index when we exit the context
1776 # manager
1792 # manager
1777
1793
1778 tr.replace(self.indexfile, trindex * self._io.size)
1794 tr.replace(self.indexfile, trindex * self._io.size)
1779 self._chunkclear()
1795 self._chunkclear()
1780
1796
1781 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1797 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1782 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1798 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1783 """add a revision to the log
1799 """add a revision to the log
1784
1800
1785 text - the revision data to add
1801 text - the revision data to add
1786 transaction - the transaction object used for rollback
1802 transaction - the transaction object used for rollback
1787 link - the linkrev data to add
1803 link - the linkrev data to add
1788 p1, p2 - the parent nodeids of the revision
1804 p1, p2 - the parent nodeids of the revision
1789 cachedelta - an optional precomputed delta
1805 cachedelta - an optional precomputed delta
1790 node - nodeid of revision; typically node is not specified, and it is
1806 node - nodeid of revision; typically node is not specified, and it is
1791 computed by default as hash(text, p1, p2), however subclasses might
1807 computed by default as hash(text, p1, p2), however subclasses might
1792 use different hashing method (and override checkhash() in such case)
1808 use different hashing method (and override checkhash() in such case)
1793 flags - the known flags to set on the revision
1809 flags - the known flags to set on the revision
1794 deltacomputer - an optional deltacomputer instance shared between
1810 deltacomputer - an optional deltacomputer instance shared between
1795 multiple calls
1811 multiple calls
1796 """
1812 """
1797 if link == nullrev:
1813 if link == nullrev:
1798 raise RevlogError(_("attempted to add linkrev -1 to %s")
1814 raise RevlogError(_("attempted to add linkrev -1 to %s")
1799 % self.indexfile)
1815 % self.indexfile)
1800
1816
1801 if flags:
1817 if flags:
1802 node = node or self.hash(text, p1, p2)
1818 node = node or self.hash(text, p1, p2)
1803
1819
1804 rawtext, validatehash = self._processflags(text, flags, 'write')
1820 rawtext, validatehash = self._processflags(text, flags, 'write')
1805
1821
1806 # If the flag processor modifies the revision data, ignore any provided
1822 # If the flag processor modifies the revision data, ignore any provided
1807 # cachedelta.
1823 # cachedelta.
1808 if rawtext != text:
1824 if rawtext != text:
1809 cachedelta = None
1825 cachedelta = None
1810
1826
1811 if len(rawtext) > _maxentrysize:
1827 if len(rawtext) > _maxentrysize:
1812 raise RevlogError(
1828 raise RevlogError(
1813 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1829 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1814 % (self.indexfile, len(rawtext)))
1830 % (self.indexfile, len(rawtext)))
1815
1831
1816 node = node or self.hash(rawtext, p1, p2)
1832 node = node or self.hash(rawtext, p1, p2)
1817 if node in self.nodemap:
1833 if node in self.nodemap:
1818 return node
1834 return node
1819
1835
1820 if validatehash:
1836 if validatehash:
1821 self.checkhash(rawtext, node, p1=p1, p2=p2)
1837 self.checkhash(rawtext, node, p1=p1, p2=p2)
1822
1838
1823 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1839 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1824 flags, cachedelta=cachedelta,
1840 flags, cachedelta=cachedelta,
1825 deltacomputer=deltacomputer)
1841 deltacomputer=deltacomputer)
1826
1842
1827 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1843 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1828 cachedelta=None, deltacomputer=None):
1844 cachedelta=None, deltacomputer=None):
1829 """add a raw revision with known flags, node and parents
1845 """add a raw revision with known flags, node and parents
1830 useful when reusing a revision not stored in this revlog (ex: received
1846 useful when reusing a revision not stored in this revlog (ex: received
1831 over wire, or read from an external bundle).
1847 over wire, or read from an external bundle).
1832 """
1848 """
1833 dfh = None
1849 dfh = None
1834 if not self._inline:
1850 if not self._inline:
1835 dfh = self._datafp("a+")
1851 dfh = self._datafp("a+")
1836 ifh = self._indexfp("a+")
1852 ifh = self._indexfp("a+")
1837 try:
1853 try:
1838 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1854 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1839 flags, cachedelta, ifh, dfh,
1855 flags, cachedelta, ifh, dfh,
1840 deltacomputer=deltacomputer)
1856 deltacomputer=deltacomputer)
1841 finally:
1857 finally:
1842 if dfh:
1858 if dfh:
1843 dfh.close()
1859 dfh.close()
1844 ifh.close()
1860 ifh.close()
1845
1861
1846 def compress(self, data):
1862 def compress(self, data):
1847 """Generate a possibly-compressed representation of data."""
1863 """Generate a possibly-compressed representation of data."""
1848 if not data:
1864 if not data:
1849 return '', data
1865 return '', data
1850
1866
1851 compressed = self._compressor.compress(data)
1867 compressed = self._compressor.compress(data)
1852
1868
1853 if compressed:
1869 if compressed:
1854 # The revlog compressor added the header in the returned data.
1870 # The revlog compressor added the header in the returned data.
1855 return '', compressed
1871 return '', compressed
1856
1872
1857 if data[0:1] == '\0':
1873 if data[0:1] == '\0':
1858 return '', data
1874 return '', data
1859 return 'u', data
1875 return 'u', data
1860
1876
1861 def decompress(self, data):
1877 def decompress(self, data):
1862 """Decompress a revlog chunk.
1878 """Decompress a revlog chunk.
1863
1879
1864 The chunk is expected to begin with a header identifying the
1880 The chunk is expected to begin with a header identifying the
1865 format type so it can be routed to an appropriate decompressor.
1881 format type so it can be routed to an appropriate decompressor.
1866 """
1882 """
1867 if not data:
1883 if not data:
1868 return data
1884 return data
1869
1885
1870 # Revlogs are read much more frequently than they are written and many
1886 # Revlogs are read much more frequently than they are written and many
1871 # chunks only take microseconds to decompress, so performance is
1887 # chunks only take microseconds to decompress, so performance is
1872 # important here.
1888 # important here.
1873 #
1889 #
1874 # We can make a few assumptions about revlogs:
1890 # We can make a few assumptions about revlogs:
1875 #
1891 #
1876 # 1) the majority of chunks will be compressed (as opposed to inline
1892 # 1) the majority of chunks will be compressed (as opposed to inline
1877 # raw data).
1893 # raw data).
1878 # 2) decompressing *any* data will likely by at least 10x slower than
1894 # 2) decompressing *any* data will likely by at least 10x slower than
1879 # returning raw inline data.
1895 # returning raw inline data.
1880 # 3) we want to prioritize common and officially supported compression
1896 # 3) we want to prioritize common and officially supported compression
1881 # engines
1897 # engines
1882 #
1898 #
1883 # It follows that we want to optimize for "decompress compressed data
1899 # It follows that we want to optimize for "decompress compressed data
1884 # when encoded with common and officially supported compression engines"
1900 # when encoded with common and officially supported compression engines"
1885 # case over "raw data" and "data encoded by less common or non-official
1901 # case over "raw data" and "data encoded by less common or non-official
1886 # compression engines." That is why we have the inline lookup first
1902 # compression engines." That is why we have the inline lookup first
1887 # followed by the compengines lookup.
1903 # followed by the compengines lookup.
1888 #
1904 #
1889 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1905 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1890 # compressed chunks. And this matters for changelog and manifest reads.
1906 # compressed chunks. And this matters for changelog and manifest reads.
1891 t = data[0:1]
1907 t = data[0:1]
1892
1908
1893 if t == 'x':
1909 if t == 'x':
1894 try:
1910 try:
1895 return _zlibdecompress(data)
1911 return _zlibdecompress(data)
1896 except zlib.error as e:
1912 except zlib.error as e:
1897 raise RevlogError(_('revlog decompress error: %s') %
1913 raise RevlogError(_('revlog decompress error: %s') %
1898 stringutil.forcebytestr(e))
1914 stringutil.forcebytestr(e))
1899 # '\0' is more common than 'u' so it goes first.
1915 # '\0' is more common than 'u' so it goes first.
1900 elif t == '\0':
1916 elif t == '\0':
1901 return data
1917 return data
1902 elif t == 'u':
1918 elif t == 'u':
1903 return util.buffer(data, 1)
1919 return util.buffer(data, 1)
1904
1920
1905 try:
1921 try:
1906 compressor = self._decompressors[t]
1922 compressor = self._decompressors[t]
1907 except KeyError:
1923 except KeyError:
1908 try:
1924 try:
1909 engine = util.compengines.forrevlogheader(t)
1925 engine = util.compengines.forrevlogheader(t)
1910 compressor = engine.revlogcompressor()
1926 compressor = engine.revlogcompressor()
1911 self._decompressors[t] = compressor
1927 self._decompressors[t] = compressor
1912 except KeyError:
1928 except KeyError:
1913 raise RevlogError(_('unknown compression type %r') % t)
1929 raise RevlogError(_('unknown compression type %r') % t)
1914
1930
1915 return compressor.decompress(data)
1931 return compressor.decompress(data)
1916
1932
1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1933 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1918 cachedelta, ifh, dfh, alwayscache=False,
1934 cachedelta, ifh, dfh, alwayscache=False,
1919 deltacomputer=None):
1935 deltacomputer=None):
1920 """internal function to add revisions to the log
1936 """internal function to add revisions to the log
1921
1937
1922 see addrevision for argument descriptions.
1938 see addrevision for argument descriptions.
1923
1939
1924 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1940 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1925
1941
1926 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1942 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1927 be used.
1943 be used.
1928
1944
1929 invariants:
1945 invariants:
1930 - rawtext is optional (can be None); if not set, cachedelta must be set.
1946 - rawtext is optional (can be None); if not set, cachedelta must be set.
1931 if both are set, they must correspond to each other.
1947 if both are set, they must correspond to each other.
1932 """
1948 """
1933 if node == nullid:
1949 if node == nullid:
1934 raise RevlogError(_("%s: attempt to add null revision") %
1950 raise RevlogError(_("%s: attempt to add null revision") %
1935 (self.indexfile))
1951 (self.indexfile))
1936 if node == wdirid or node in wdirfilenodeids:
1952 if node == wdirid or node in wdirfilenodeids:
1937 raise RevlogError(_("%s: attempt to add wdir revision") %
1953 raise RevlogError(_("%s: attempt to add wdir revision") %
1938 (self.indexfile))
1954 (self.indexfile))
1939
1955
1940 if self._inline:
1956 if self._inline:
1941 fh = ifh
1957 fh = ifh
1942 else:
1958 else:
1943 fh = dfh
1959 fh = dfh
1944
1960
1945 btext = [rawtext]
1961 btext = [rawtext]
1946
1962
1947 curr = len(self)
1963 curr = len(self)
1948 prev = curr - 1
1964 prev = curr - 1
1949 offset = self.end(prev)
1965 offset = self.end(prev)
1950 p1r, p2r = self.rev(p1), self.rev(p2)
1966 p1r, p2r = self.rev(p1), self.rev(p2)
1951
1967
1952 # full versions are inserted when the needed deltas
1968 # full versions are inserted when the needed deltas
1953 # become comparable to the uncompressed text
1969 # become comparable to the uncompressed text
1954 if rawtext is None:
1970 if rawtext is None:
1955 # need rawtext size, before changed by flag processors, which is
1971 # need rawtext size, before changed by flag processors, which is
1956 # the non-raw size. use revlog explicitly to avoid filelog's extra
1972 # the non-raw size. use revlog explicitly to avoid filelog's extra
1957 # logic that might remove metadata size.
1973 # logic that might remove metadata size.
1958 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1974 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1959 cachedelta[1])
1975 cachedelta[1])
1960 else:
1976 else:
1961 textlen = len(rawtext)
1977 textlen = len(rawtext)
1962
1978
1963 if deltacomputer is None:
1979 if deltacomputer is None:
1964 deltacomputer = deltautil.deltacomputer(self)
1980 deltacomputer = deltautil.deltacomputer(self)
1965
1981
1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1982 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1967
1983
1968 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1984 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1969
1985
1970 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1986 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1971 deltainfo.base, link, p1r, p2r, node)
1987 deltainfo.base, link, p1r, p2r, node)
1972 self.index.append(e)
1988 self.index.append(e)
1973 self.nodemap[node] = curr
1989 self.nodemap[node] = curr
1974
1990
1975 entry = self._io.packentry(e, self.node, self.version, curr)
1991 entry = self._io.packentry(e, self.node, self.version, curr)
1976 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1992 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1977 link, offset)
1993 link, offset)
1978
1994
1979 rawtext = btext[0]
1995 rawtext = btext[0]
1980
1996
1981 if alwayscache and rawtext is None:
1997 if alwayscache and rawtext is None:
1982 rawtext = deltacomputer.buildtext(revinfo, fh)
1998 rawtext = deltacomputer.buildtext(revinfo, fh)
1983
1999
1984 if type(rawtext) == bytes: # only accept immutable objects
2000 if type(rawtext) == bytes: # only accept immutable objects
1985 self._cache = (node, curr, rawtext)
2001 self._cache = (node, curr, rawtext)
1986 self._chainbasecache[curr] = deltainfo.chainbase
2002 self._chainbasecache[curr] = deltainfo.chainbase
1987 return node
2003 return node
1988
2004
1989 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2005 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1990 # Files opened in a+ mode have inconsistent behavior on various
2006 # Files opened in a+ mode have inconsistent behavior on various
1991 # platforms. Windows requires that a file positioning call be made
2007 # platforms. Windows requires that a file positioning call be made
1992 # when the file handle transitions between reads and writes. See
2008 # when the file handle transitions between reads and writes. See
1993 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2009 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1994 # platforms, Python or the platform itself can be buggy. Some versions
2010 # platforms, Python or the platform itself can be buggy. Some versions
1995 # of Solaris have been observed to not append at the end of the file
2011 # of Solaris have been observed to not append at the end of the file
1996 # if the file was seeked to before the end. See issue4943 for more.
2012 # if the file was seeked to before the end. See issue4943 for more.
1997 #
2013 #
1998 # We work around this issue by inserting a seek() before writing.
2014 # We work around this issue by inserting a seek() before writing.
1999 # Note: This is likely not necessary on Python 3.
2015 # Note: This is likely not necessary on Python 3.
2000 ifh.seek(0, os.SEEK_END)
2016 ifh.seek(0, os.SEEK_END)
2001 if dfh:
2017 if dfh:
2002 dfh.seek(0, os.SEEK_END)
2018 dfh.seek(0, os.SEEK_END)
2003
2019
2004 curr = len(self) - 1
2020 curr = len(self) - 1
2005 if not self._inline:
2021 if not self._inline:
2006 transaction.add(self.datafile, offset)
2022 transaction.add(self.datafile, offset)
2007 transaction.add(self.indexfile, curr * len(entry))
2023 transaction.add(self.indexfile, curr * len(entry))
2008 if data[0]:
2024 if data[0]:
2009 dfh.write(data[0])
2025 dfh.write(data[0])
2010 dfh.write(data[1])
2026 dfh.write(data[1])
2011 ifh.write(entry)
2027 ifh.write(entry)
2012 else:
2028 else:
2013 offset += curr * self._io.size
2029 offset += curr * self._io.size
2014 transaction.add(self.indexfile, offset, curr)
2030 transaction.add(self.indexfile, offset, curr)
2015 ifh.write(entry)
2031 ifh.write(entry)
2016 ifh.write(data[0])
2032 ifh.write(data[0])
2017 ifh.write(data[1])
2033 ifh.write(data[1])
2018 self._enforceinlinesize(transaction, ifh)
2034 self._enforceinlinesize(transaction, ifh)
2019
2035
2020 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2036 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2021 """
2037 """
2022 add a delta group
2038 add a delta group
2023
2039
2024 given a set of deltas, add them to the revision log. the
2040 given a set of deltas, add them to the revision log. the
2025 first delta is against its parent, which should be in our
2041 first delta is against its parent, which should be in our
2026 log, the rest are against the previous delta.
2042 log, the rest are against the previous delta.
2027
2043
2028 If ``addrevisioncb`` is defined, it will be called with arguments of
2044 If ``addrevisioncb`` is defined, it will be called with arguments of
2029 this revlog and the node that was added.
2045 this revlog and the node that was added.
2030 """
2046 """
2031
2047
2032 nodes = []
2048 nodes = []
2033
2049
2034 r = len(self)
2050 r = len(self)
2035 end = 0
2051 end = 0
2036 if r:
2052 if r:
2037 end = self.end(r - 1)
2053 end = self.end(r - 1)
2038 ifh = self._indexfp("a+")
2054 ifh = self._indexfp("a+")
2039 isize = r * self._io.size
2055 isize = r * self._io.size
2040 if self._inline:
2056 if self._inline:
2041 transaction.add(self.indexfile, end + isize, r)
2057 transaction.add(self.indexfile, end + isize, r)
2042 dfh = None
2058 dfh = None
2043 else:
2059 else:
2044 transaction.add(self.indexfile, isize, r)
2060 transaction.add(self.indexfile, isize, r)
2045 transaction.add(self.datafile, end)
2061 transaction.add(self.datafile, end)
2046 dfh = self._datafp("a+")
2062 dfh = self._datafp("a+")
2047 def flush():
2063 def flush():
2048 if dfh:
2064 if dfh:
2049 dfh.flush()
2065 dfh.flush()
2050 ifh.flush()
2066 ifh.flush()
2051 try:
2067 try:
2052 deltacomputer = deltautil.deltacomputer(self)
2068 deltacomputer = deltautil.deltacomputer(self)
2053 # loop through our set of deltas
2069 # loop through our set of deltas
2054 for data in deltas:
2070 for data in deltas:
2055 node, p1, p2, linknode, deltabase, delta, flags = data
2071 node, p1, p2, linknode, deltabase, delta, flags = data
2056 link = linkmapper(linknode)
2072 link = linkmapper(linknode)
2057 flags = flags or REVIDX_DEFAULT_FLAGS
2073 flags = flags or REVIDX_DEFAULT_FLAGS
2058
2074
2059 nodes.append(node)
2075 nodes.append(node)
2060
2076
2061 if node in self.nodemap:
2077 if node in self.nodemap:
2062 # this can happen if two branches make the same change
2078 # this can happen if two branches make the same change
2063 continue
2079 continue
2064
2080
2065 for p in (p1, p2):
2081 for p in (p1, p2):
2066 if p not in self.nodemap:
2082 if p not in self.nodemap:
2067 raise LookupError(p, self.indexfile,
2083 raise LookupError(p, self.indexfile,
2068 _('unknown parent'))
2084 _('unknown parent'))
2069
2085
2070 if deltabase not in self.nodemap:
2086 if deltabase not in self.nodemap:
2071 raise LookupError(deltabase, self.indexfile,
2087 raise LookupError(deltabase, self.indexfile,
2072 _('unknown delta base'))
2088 _('unknown delta base'))
2073
2089
2074 baserev = self.rev(deltabase)
2090 baserev = self.rev(deltabase)
2075
2091
2076 if baserev != nullrev and self.iscensored(baserev):
2092 if baserev != nullrev and self.iscensored(baserev):
2077 # if base is censored, delta must be full replacement in a
2093 # if base is censored, delta must be full replacement in a
2078 # single patch operation
2094 # single patch operation
2079 hlen = struct.calcsize(">lll")
2095 hlen = struct.calcsize(">lll")
2080 oldlen = self.rawsize(baserev)
2096 oldlen = self.rawsize(baserev)
2081 newlen = len(delta) - hlen
2097 newlen = len(delta) - hlen
2082 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2098 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2083 raise error.CensoredBaseError(self.indexfile,
2099 raise error.CensoredBaseError(self.indexfile,
2084 self.node(baserev))
2100 self.node(baserev))
2085
2101
2086 if not flags and self._peek_iscensored(baserev, delta, flush):
2102 if not flags and self._peek_iscensored(baserev, delta, flush):
2087 flags |= REVIDX_ISCENSORED
2103 flags |= REVIDX_ISCENSORED
2088
2104
2089 # We assume consumers of addrevisioncb will want to retrieve
2105 # We assume consumers of addrevisioncb will want to retrieve
2090 # the added revision, which will require a call to
2106 # the added revision, which will require a call to
2091 # revision(). revision() will fast path if there is a cache
2107 # revision(). revision() will fast path if there is a cache
2092 # hit. So, we tell _addrevision() to always cache in this case.
2108 # hit. So, we tell _addrevision() to always cache in this case.
2093 # We're only using addgroup() in the context of changegroup
2109 # We're only using addgroup() in the context of changegroup
2094 # generation so the revision data can always be handled as raw
2110 # generation so the revision data can always be handled as raw
2095 # by the flagprocessor.
2111 # by the flagprocessor.
2096 self._addrevision(node, None, transaction, link,
2112 self._addrevision(node, None, transaction, link,
2097 p1, p2, flags, (baserev, delta),
2113 p1, p2, flags, (baserev, delta),
2098 ifh, dfh,
2114 ifh, dfh,
2099 alwayscache=bool(addrevisioncb),
2115 alwayscache=bool(addrevisioncb),
2100 deltacomputer=deltacomputer)
2116 deltacomputer=deltacomputer)
2101
2117
2102 if addrevisioncb:
2118 if addrevisioncb:
2103 addrevisioncb(self, node)
2119 addrevisioncb(self, node)
2104
2120
2105 if not dfh and not self._inline:
2121 if not dfh and not self._inline:
2106 # addrevision switched from inline to conventional
2122 # addrevision switched from inline to conventional
2107 # reopen the index
2123 # reopen the index
2108 ifh.close()
2124 ifh.close()
2109 dfh = self._datafp("a+")
2125 dfh = self._datafp("a+")
2110 ifh = self._indexfp("a+")
2126 ifh = self._indexfp("a+")
2111 finally:
2127 finally:
2112 if dfh:
2128 if dfh:
2113 dfh.close()
2129 dfh.close()
2114 ifh.close()
2130 ifh.close()
2115
2131
2116 return nodes
2132 return nodes
2117
2133
2118 def iscensored(self, rev):
2134 def iscensored(self, rev):
2119 """Check if a file revision is censored."""
2135 """Check if a file revision is censored."""
2120 if not self._censorable:
2136 if not self._censorable:
2121 return False
2137 return False
2122
2138
2123 return self.flags(rev) & REVIDX_ISCENSORED
2139 return self.flags(rev) & REVIDX_ISCENSORED
2124
2140
2125 def _peek_iscensored(self, baserev, delta, flush):
2141 def _peek_iscensored(self, baserev, delta, flush):
2126 """Quickly check if a delta produces a censored revision."""
2142 """Quickly check if a delta produces a censored revision."""
2127 if not self._censorable:
2143 if not self._censorable:
2128 return False
2144 return False
2129
2145
2130 # Fragile heuristic: unless new file meta keys are added alphabetically
2146 # Fragile heuristic: unless new file meta keys are added alphabetically
2131 # preceding "censored", all censored revisions are prefixed by
2147 # preceding "censored", all censored revisions are prefixed by
2132 # "\1\ncensored:". A delta producing such a censored revision must be a
2148 # "\1\ncensored:". A delta producing such a censored revision must be a
2133 # full-replacement delta, so we inspect the first and only patch in the
2149 # full-replacement delta, so we inspect the first and only patch in the
2134 # delta for this prefix.
2150 # delta for this prefix.
2135 hlen = struct.calcsize(">lll")
2151 hlen = struct.calcsize(">lll")
2136 if len(delta) <= hlen:
2152 if len(delta) <= hlen:
2137 return False
2153 return False
2138
2154
2139 oldlen = self.rawsize(baserev)
2155 oldlen = self.rawsize(baserev)
2140 newlen = len(delta) - hlen
2156 newlen = len(delta) - hlen
2141 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2157 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2142 return False
2158 return False
2143
2159
2144 add = "\1\ncensored:"
2160 add = "\1\ncensored:"
2145 addlen = len(add)
2161 addlen = len(add)
2146 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2162 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2147
2163
2148 def getstrippoint(self, minlink):
2164 def getstrippoint(self, minlink):
2149 """find the minimum rev that must be stripped to strip the linkrev
2165 """find the minimum rev that must be stripped to strip the linkrev
2150
2166
2151 Returns a tuple containing the minimum rev and a set of all revs that
2167 Returns a tuple containing the minimum rev and a set of all revs that
2152 have linkrevs that will be broken by this strip.
2168 have linkrevs that will be broken by this strip.
2153 """
2169 """
2154 brokenrevs = set()
2170 brokenrevs = set()
2155 strippoint = len(self)
2171 strippoint = len(self)
2156
2172
2157 heads = {}
2173 heads = {}
2158 futurelargelinkrevs = set()
2174 futurelargelinkrevs = set()
2159 for head in self.headrevs():
2175 for head in self.headrevs():
2160 headlinkrev = self.linkrev(head)
2176 headlinkrev = self.linkrev(head)
2161 heads[head] = headlinkrev
2177 heads[head] = headlinkrev
2162 if headlinkrev >= minlink:
2178 if headlinkrev >= minlink:
2163 futurelargelinkrevs.add(headlinkrev)
2179 futurelargelinkrevs.add(headlinkrev)
2164
2180
2165 # This algorithm involves walking down the rev graph, starting at the
2181 # This algorithm involves walking down the rev graph, starting at the
2166 # heads. Since the revs are topologically sorted according to linkrev,
2182 # heads. Since the revs are topologically sorted according to linkrev,
2167 # once all head linkrevs are below the minlink, we know there are
2183 # once all head linkrevs are below the minlink, we know there are
2168 # no more revs that could have a linkrev greater than minlink.
2184 # no more revs that could have a linkrev greater than minlink.
2169 # So we can stop walking.
2185 # So we can stop walking.
2170 while futurelargelinkrevs:
2186 while futurelargelinkrevs:
2171 strippoint -= 1
2187 strippoint -= 1
2172 linkrev = heads.pop(strippoint)
2188 linkrev = heads.pop(strippoint)
2173
2189
2174 if linkrev < minlink:
2190 if linkrev < minlink:
2175 brokenrevs.add(strippoint)
2191 brokenrevs.add(strippoint)
2176 else:
2192 else:
2177 futurelargelinkrevs.remove(linkrev)
2193 futurelargelinkrevs.remove(linkrev)
2178
2194
2179 for p in self.parentrevs(strippoint):
2195 for p in self.parentrevs(strippoint):
2180 if p != nullrev:
2196 if p != nullrev:
2181 plinkrev = self.linkrev(p)
2197 plinkrev = self.linkrev(p)
2182 heads[p] = plinkrev
2198 heads[p] = plinkrev
2183 if plinkrev >= minlink:
2199 if plinkrev >= minlink:
2184 futurelargelinkrevs.add(plinkrev)
2200 futurelargelinkrevs.add(plinkrev)
2185
2201
2186 return strippoint, brokenrevs
2202 return strippoint, brokenrevs
2187
2203
2188 def strip(self, minlink, transaction):
2204 def strip(self, minlink, transaction):
2189 """truncate the revlog on the first revision with a linkrev >= minlink
2205 """truncate the revlog on the first revision with a linkrev >= minlink
2190
2206
2191 This function is called when we're stripping revision minlink and
2207 This function is called when we're stripping revision minlink and
2192 its descendants from the repository.
2208 its descendants from the repository.
2193
2209
2194 We have to remove all revisions with linkrev >= minlink, because
2210 We have to remove all revisions with linkrev >= minlink, because
2195 the equivalent changelog revisions will be renumbered after the
2211 the equivalent changelog revisions will be renumbered after the
2196 strip.
2212 strip.
2197
2213
2198 So we truncate the revlog on the first of these revisions, and
2214 So we truncate the revlog on the first of these revisions, and
2199 trust that the caller has saved the revisions that shouldn't be
2215 trust that the caller has saved the revisions that shouldn't be
2200 removed and that it'll re-add them after this truncation.
2216 removed and that it'll re-add them after this truncation.
2201 """
2217 """
2202 if len(self) == 0:
2218 if len(self) == 0:
2203 return
2219 return
2204
2220
2205 rev, _ = self.getstrippoint(minlink)
2221 rev, _ = self.getstrippoint(minlink)
2206 if rev == len(self):
2222 if rev == len(self):
2207 return
2223 return
2208
2224
2209 # first truncate the files on disk
2225 # first truncate the files on disk
2210 end = self.start(rev)
2226 end = self.start(rev)
2211 if not self._inline:
2227 if not self._inline:
2212 transaction.add(self.datafile, end)
2228 transaction.add(self.datafile, end)
2213 end = rev * self._io.size
2229 end = rev * self._io.size
2214 else:
2230 else:
2215 end += rev * self._io.size
2231 end += rev * self._io.size
2216
2232
2217 transaction.add(self.indexfile, end)
2233 transaction.add(self.indexfile, end)
2218
2234
2219 # then reset internal state in memory to forget those revisions
2235 # then reset internal state in memory to forget those revisions
2220 self._cache = None
2236 self._cache = None
2221 self._chaininfocache = {}
2237 self._chaininfocache = {}
2222 self._chunkclear()
2238 self._chunkclear()
2223 for x in pycompat.xrange(rev, len(self)):
2239 for x in pycompat.xrange(rev, len(self)):
2224 del self.nodemap[self.node(x)]
2240 del self.nodemap[self.node(x)]
2225
2241
2226 del self.index[rev:-1]
2242 del self.index[rev:-1]
2227 self._nodepos = None
2243 self._nodepos = None
2228
2244
2229 def checksize(self):
2245 def checksize(self):
2230 expected = 0
2246 expected = 0
2231 if len(self):
2247 if len(self):
2232 expected = max(0, self.end(len(self) - 1))
2248 expected = max(0, self.end(len(self) - 1))
2233
2249
2234 try:
2250 try:
2235 with self._datafp() as f:
2251 with self._datafp() as f:
2236 f.seek(0, 2)
2252 f.seek(0, 2)
2237 actual = f.tell()
2253 actual = f.tell()
2238 dd = actual - expected
2254 dd = actual - expected
2239 except IOError as inst:
2255 except IOError as inst:
2240 if inst.errno != errno.ENOENT:
2256 if inst.errno != errno.ENOENT:
2241 raise
2257 raise
2242 dd = 0
2258 dd = 0
2243
2259
2244 try:
2260 try:
2245 f = self.opener(self.indexfile)
2261 f = self.opener(self.indexfile)
2246 f.seek(0, 2)
2262 f.seek(0, 2)
2247 actual = f.tell()
2263 actual = f.tell()
2248 f.close()
2264 f.close()
2249 s = self._io.size
2265 s = self._io.size
2250 i = max(0, actual // s)
2266 i = max(0, actual // s)
2251 di = actual - (i * s)
2267 di = actual - (i * s)
2252 if self._inline:
2268 if self._inline:
2253 databytes = 0
2269 databytes = 0
2254 for r in self:
2270 for r in self:
2255 databytes += max(0, self.length(r))
2271 databytes += max(0, self.length(r))
2256 dd = 0
2272 dd = 0
2257 di = actual - len(self) * s - databytes
2273 di = actual - len(self) * s - databytes
2258 except IOError as inst:
2274 except IOError as inst:
2259 if inst.errno != errno.ENOENT:
2275 if inst.errno != errno.ENOENT:
2260 raise
2276 raise
2261 di = 0
2277 di = 0
2262
2278
2263 return (dd, di)
2279 return (dd, di)
2264
2280
2265 def files(self):
2281 def files(self):
2266 res = [self.indexfile]
2282 res = [self.indexfile]
2267 if not self._inline:
2283 if not self._inline:
2268 res.append(self.datafile)
2284 res.append(self.datafile)
2269 return res
2285 return res
2270
2286
2271 def emitrevisiondeltas(self, requests):
2287 def emitrevisiondeltas(self, requests):
2272 frev = self.rev
2288 frev = self.rev
2273
2289
2274 prevrev = None
2290 prevrev = None
2275 for request in requests:
2291 for request in requests:
2276 node = request.node
2292 node = request.node
2277 rev = frev(node)
2293 rev = frev(node)
2278
2294
2279 if prevrev is None:
2295 if prevrev is None:
2280 prevrev = self.index[rev][5]
2296 prevrev = self.index[rev][5]
2281
2297
2282 # Requesting a full revision.
2298 # Requesting a full revision.
2283 if request.basenode == nullid:
2299 if request.basenode == nullid:
2284 baserev = nullrev
2300 baserev = nullrev
2285 # Requesting an explicit revision.
2301 # Requesting an explicit revision.
2286 elif request.basenode is not None:
2302 elif request.basenode is not None:
2287 baserev = frev(request.basenode)
2303 baserev = frev(request.basenode)
2288 # Allowing us to choose.
2304 # Allowing us to choose.
2289 else:
2305 else:
2290 p1rev, p2rev = self.parentrevs(rev)
2306 p1rev, p2rev = self.parentrevs(rev)
2291 deltaparentrev = self.deltaparent(rev)
2307 deltaparentrev = self.deltaparent(rev)
2292
2308
2293 # Avoid sending full revisions when delta parent is null. Pick
2309 # Avoid sending full revisions when delta parent is null. Pick
2294 # prev in that case. It's tempting to pick p1 in this case, as
2310 # prev in that case. It's tempting to pick p1 in this case, as
2295 # p1 will be smaller in the common case. However, computing a
2311 # p1 will be smaller in the common case. However, computing a
2296 # delta against p1 may require resolving the raw text of p1,
2312 # delta against p1 may require resolving the raw text of p1,
2297 # which could be expensive. The revlog caches should have prev
2313 # which could be expensive. The revlog caches should have prev
2298 # cached, meaning less CPU for delta generation. There is
2314 # cached, meaning less CPU for delta generation. There is
2299 # likely room to add a flag and/or config option to control this
2315 # likely room to add a flag and/or config option to control this
2300 # behavior.
2316 # behavior.
2301 if deltaparentrev == nullrev and self._storedeltachains:
2317 if deltaparentrev == nullrev and self._storedeltachains:
2302 baserev = prevrev
2318 baserev = prevrev
2303
2319
2304 # Revlog is configured to use full snapshot for a reason.
2320 # Revlog is configured to use full snapshot for a reason.
2305 # Stick to full snapshot.
2321 # Stick to full snapshot.
2306 elif deltaparentrev == nullrev:
2322 elif deltaparentrev == nullrev:
2307 baserev = nullrev
2323 baserev = nullrev
2308
2324
2309 # Pick previous when we can't be sure the base is available
2325 # Pick previous when we can't be sure the base is available
2310 # on consumer.
2326 # on consumer.
2311 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2327 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2312 baserev = prevrev
2328 baserev = prevrev
2313 else:
2329 else:
2314 baserev = deltaparentrev
2330 baserev = deltaparentrev
2315
2331
2316 if baserev != nullrev and not self.candelta(baserev, rev):
2332 if baserev != nullrev and not self.candelta(baserev, rev):
2317 baserev = nullrev
2333 baserev = nullrev
2318
2334
2319 revision = None
2335 revision = None
2320 delta = None
2336 delta = None
2321 baserevisionsize = None
2337 baserevisionsize = None
2322
2338
2323 if self.iscensored(baserev) or self.iscensored(rev):
2339 if self.iscensored(baserev) or self.iscensored(rev):
2324 try:
2340 try:
2325 revision = self.revision(node, raw=True)
2341 revision = self.revision(node, raw=True)
2326 except error.CensoredNodeError as e:
2342 except error.CensoredNodeError as e:
2327 revision = e.tombstone
2343 revision = e.tombstone
2328
2344
2329 if baserev != nullrev:
2345 if baserev != nullrev:
2330 baserevisionsize = self.rawsize(baserev)
2346 baserevisionsize = self.rawsize(baserev)
2331
2347
2332 elif baserev == nullrev:
2348 elif baserev == nullrev:
2333 revision = self.revision(node, raw=True)
2349 revision = self.revision(node, raw=True)
2334 else:
2350 else:
2335 delta = self.revdiff(baserev, rev)
2351 delta = self.revdiff(baserev, rev)
2336
2352
2337 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2353 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2338
2354
2339 yield revlogrevisiondelta(
2355 yield revlogrevisiondelta(
2340 node=node,
2356 node=node,
2341 p1node=request.p1node,
2357 p1node=request.p1node,
2342 p2node=request.p2node,
2358 p2node=request.p2node,
2343 linknode=request.linknode,
2359 linknode=request.linknode,
2344 basenode=self.node(baserev),
2360 basenode=self.node(baserev),
2345 flags=self.flags(rev) | extraflags,
2361 flags=self.flags(rev) | extraflags,
2346 baserevisionsize=baserevisionsize,
2362 baserevisionsize=baserevisionsize,
2347 revision=revision,
2363 revision=revision,
2348 delta=delta)
2364 delta=delta)
2349
2365
2350 prevrev = rev
2366 prevrev = rev
2351
2367
2352 DELTAREUSEALWAYS = 'always'
2368 DELTAREUSEALWAYS = 'always'
2353 DELTAREUSESAMEREVS = 'samerevs'
2369 DELTAREUSESAMEREVS = 'samerevs'
2354 DELTAREUSENEVER = 'never'
2370 DELTAREUSENEVER = 'never'
2355
2371
2356 DELTAREUSEFULLADD = 'fulladd'
2372 DELTAREUSEFULLADD = 'fulladd'
2357
2373
2358 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2374 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2359
2375
2360 def clone(self, tr, destrevlog, addrevisioncb=None,
2376 def clone(self, tr, destrevlog, addrevisioncb=None,
2361 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2377 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2362 """Copy this revlog to another, possibly with format changes.
2378 """Copy this revlog to another, possibly with format changes.
2363
2379
2364 The destination revlog will contain the same revisions and nodes.
2380 The destination revlog will contain the same revisions and nodes.
2365 However, it may not be bit-for-bit identical due to e.g. delta encoding
2381 However, it may not be bit-for-bit identical due to e.g. delta encoding
2366 differences.
2382 differences.
2367
2383
2368 The ``deltareuse`` argument control how deltas from the existing revlog
2384 The ``deltareuse`` argument control how deltas from the existing revlog
2369 are preserved in the destination revlog. The argument can have the
2385 are preserved in the destination revlog. The argument can have the
2370 following values:
2386 following values:
2371
2387
2372 DELTAREUSEALWAYS
2388 DELTAREUSEALWAYS
2373 Deltas will always be reused (if possible), even if the destination
2389 Deltas will always be reused (if possible), even if the destination
2374 revlog would not select the same revisions for the delta. This is the
2390 revlog would not select the same revisions for the delta. This is the
2375 fastest mode of operation.
2391 fastest mode of operation.
2376 DELTAREUSESAMEREVS
2392 DELTAREUSESAMEREVS
2377 Deltas will be reused if the destination revlog would pick the same
2393 Deltas will be reused if the destination revlog would pick the same
2378 revisions for the delta. This mode strikes a balance between speed
2394 revisions for the delta. This mode strikes a balance between speed
2379 and optimization.
2395 and optimization.
2380 DELTAREUSENEVER
2396 DELTAREUSENEVER
2381 Deltas will never be reused. This is the slowest mode of execution.
2397 Deltas will never be reused. This is the slowest mode of execution.
2382 This mode can be used to recompute deltas (e.g. if the diff/delta
2398 This mode can be used to recompute deltas (e.g. if the diff/delta
2383 algorithm changes).
2399 algorithm changes).
2384
2400
2385 Delta computation can be slow, so the choice of delta reuse policy can
2401 Delta computation can be slow, so the choice of delta reuse policy can
2386 significantly affect run time.
2402 significantly affect run time.
2387
2403
2388 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2404 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2389 two extremes. Deltas will be reused if they are appropriate. But if the
2405 two extremes. Deltas will be reused if they are appropriate. But if the
2390 delta could choose a better revision, it will do so. This means if you
2406 delta could choose a better revision, it will do so. This means if you
2391 are converting a non-generaldelta revlog to a generaldelta revlog,
2407 are converting a non-generaldelta revlog to a generaldelta revlog,
2392 deltas will be recomputed if the delta's parent isn't a parent of the
2408 deltas will be recomputed if the delta's parent isn't a parent of the
2393 revision.
2409 revision.
2394
2410
2395 In addition to the delta policy, the ``deltabothparents`` argument
2411 In addition to the delta policy, the ``deltabothparents`` argument
2396 controls whether to compute deltas against both parents for merges.
2412 controls whether to compute deltas against both parents for merges.
2397 By default, the current default is used.
2413 By default, the current default is used.
2398 """
2414 """
2399 if deltareuse not in self.DELTAREUSEALL:
2415 if deltareuse not in self.DELTAREUSEALL:
2400 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2416 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2401
2417
2402 if len(destrevlog):
2418 if len(destrevlog):
2403 raise ValueError(_('destination revlog is not empty'))
2419 raise ValueError(_('destination revlog is not empty'))
2404
2420
2405 if getattr(self, 'filteredrevs', None):
2421 if getattr(self, 'filteredrevs', None):
2406 raise ValueError(_('source revlog has filtered revisions'))
2422 raise ValueError(_('source revlog has filtered revisions'))
2407 if getattr(destrevlog, 'filteredrevs', None):
2423 if getattr(destrevlog, 'filteredrevs', None):
2408 raise ValueError(_('destination revlog has filtered revisions'))
2424 raise ValueError(_('destination revlog has filtered revisions'))
2409
2425
2410 # lazydeltabase controls whether to reuse a cached delta, if possible.
2426 # lazydeltabase controls whether to reuse a cached delta, if possible.
2411 oldlazydeltabase = destrevlog._lazydeltabase
2427 oldlazydeltabase = destrevlog._lazydeltabase
2412 oldamd = destrevlog._deltabothparents
2428 oldamd = destrevlog._deltabothparents
2413
2429
2414 try:
2430 try:
2415 if deltareuse == self.DELTAREUSEALWAYS:
2431 if deltareuse == self.DELTAREUSEALWAYS:
2416 destrevlog._lazydeltabase = True
2432 destrevlog._lazydeltabase = True
2417 elif deltareuse == self.DELTAREUSESAMEREVS:
2433 elif deltareuse == self.DELTAREUSESAMEREVS:
2418 destrevlog._lazydeltabase = False
2434 destrevlog._lazydeltabase = False
2419
2435
2420 destrevlog._deltabothparents = deltabothparents or oldamd
2436 destrevlog._deltabothparents = deltabothparents or oldamd
2421
2437
2422 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2423 self.DELTAREUSESAMEREVS)
2439 self.DELTAREUSESAMEREVS)
2424
2440
2425 deltacomputer = deltautil.deltacomputer(destrevlog)
2441 deltacomputer = deltautil.deltacomputer(destrevlog)
2426 index = self.index
2442 index = self.index
2427 for rev in self:
2443 for rev in self:
2428 entry = index[rev]
2444 entry = index[rev]
2429
2445
2430 # Some classes override linkrev to take filtered revs into
2446 # Some classes override linkrev to take filtered revs into
2431 # account. Use raw entry from index.
2447 # account. Use raw entry from index.
2432 flags = entry[0] & 0xffff
2448 flags = entry[0] & 0xffff
2433 linkrev = entry[4]
2449 linkrev = entry[4]
2434 p1 = index[entry[5]][7]
2450 p1 = index[entry[5]][7]
2435 p2 = index[entry[6]][7]
2451 p2 = index[entry[6]][7]
2436 node = entry[7]
2452 node = entry[7]
2437
2453
2438 # (Possibly) reuse the delta from the revlog if allowed and
2454 # (Possibly) reuse the delta from the revlog if allowed and
2439 # the revlog chunk is a delta.
2455 # the revlog chunk is a delta.
2440 cachedelta = None
2456 cachedelta = None
2441 rawtext = None
2457 rawtext = None
2442 if populatecachedelta:
2458 if populatecachedelta:
2443 dp = self.deltaparent(rev)
2459 dp = self.deltaparent(rev)
2444 if dp != nullrev:
2460 if dp != nullrev:
2445 cachedelta = (dp, bytes(self._chunk(rev)))
2461 cachedelta = (dp, bytes(self._chunk(rev)))
2446
2462
2447 if not cachedelta:
2463 if not cachedelta:
2448 rawtext = self.revision(rev, raw=True)
2464 rawtext = self.revision(rev, raw=True)
2449
2465
2450
2466
2451 if deltareuse == self.DELTAREUSEFULLADD:
2467 if deltareuse == self.DELTAREUSEFULLADD:
2452 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2453 cachedelta=cachedelta,
2469 cachedelta=cachedelta,
2454 node=node, flags=flags,
2470 node=node, flags=flags,
2455 deltacomputer=deltacomputer)
2471 deltacomputer=deltacomputer)
2456 else:
2472 else:
2457 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2458 checkambig=False)
2474 checkambig=False)
2459 dfh = None
2475 dfh = None
2460 if not destrevlog._inline:
2476 if not destrevlog._inline:
2461 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2462 try:
2478 try:
2463 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2464 p2, flags, cachedelta, ifh, dfh,
2480 p2, flags, cachedelta, ifh, dfh,
2465 deltacomputer=deltacomputer)
2481 deltacomputer=deltacomputer)
2466 finally:
2482 finally:
2467 if dfh:
2483 if dfh:
2468 dfh.close()
2484 dfh.close()
2469 ifh.close()
2485 ifh.close()
2470
2486
2471 if addrevisioncb:
2487 if addrevisioncb:
2472 addrevisioncb(self, rev, node)
2488 addrevisioncb(self, rev, node)
2473 finally:
2489 finally:
2474 destrevlog._lazydeltabase = oldlazydeltabase
2490 destrevlog._lazydeltabase = oldlazydeltabase
2475 destrevlog._deltabothparents = oldamd
2491 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now