##// END OF EJS Templates
revlog: fix pure version of _partialmatch() to include nullid...
Martin von Zweigbergk -
r39227:5517d62c default
parent child Browse files
Show More
@@ -1,3073 +1,3076 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullhex,
30 nullid,
31 nullid,
31 nullrev,
32 nullrev,
32 wdirfilenodeids,
33 wdirfilenodeids,
33 wdirhex,
34 wdirhex,
34 wdirid,
35 wdirid,
35 wdirrev,
36 wdirrev,
36 )
37 )
37 from .i18n import _
38 from .i18n import _
38 from .thirdparty import (
39 from .thirdparty import (
39 attr,
40 attr,
40 )
41 )
41 from . import (
42 from . import (
42 ancestor,
43 ancestor,
43 error,
44 error,
44 mdiff,
45 mdiff,
45 policy,
46 policy,
46 pycompat,
47 pycompat,
47 templatefilters,
48 templatefilters,
48 util,
49 util,
49 )
50 )
50 from .utils import (
51 from .utils import (
51 stringutil,
52 stringutil,
52 )
53 )
53
54
54 parsers = policy.importmod(r'parsers')
55 parsers = policy.importmod(r'parsers')
55
56
56 # Aliased for performance.
57 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
58 _zlibdecompress = zlib.decompress
58
59
59 # revlog header flags
60 # revlog header flags
60 REVLOGV0 = 0
61 REVLOGV0 = 0
61 REVLOGV1 = 1
62 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
63 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
65 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
67 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
73
73 # revlog index flags
74 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
78 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
79 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
81 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
82 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
83 REVIDX_EXTSTORED,
83 ]
84 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
86 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
88
88 # max size of revlog with inline data
89 # max size of revlog with inline data
89 _maxinline = 131072
90 _maxinline = 131072
90 _chunksize = 1048576
91 _chunksize = 1048576
91
92
92 RevlogError = error.RevlogError
93 RevlogError = error.RevlogError
93 LookupError = error.LookupError
94 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
96 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
97 ProgrammingError = error.ProgrammingError
97
98
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
100 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
101 REVIDX_ISCENSORED: None,
101 }
102 }
102
103
103 _mdre = re.compile('\1\n')
104 _mdre = re.compile('\1\n')
104 def parsemeta(text):
105 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
106 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
107 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
108 if text[:2] != '\1\n':
108 return None, None
109 return None, None
109 s = _mdre.search(text, 2).start()
110 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
111 mtext = text[2:s]
111 meta = {}
112 meta = {}
112 for l in mtext.splitlines():
113 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
114 k, v = l.split(": ", 1)
114 meta[k] = v
115 meta[k] = v
115 return meta, (s + 2)
116 return meta, (s + 2)
116
117
117 def packmeta(meta, text):
118 def packmeta(meta, text):
118 keys = sorted(meta)
119 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
121 return "\1\n%s\1\n%s" % (metatext, text)
121
122
122 def _censoredtext(text):
123 def _censoredtext(text):
123 m, offs = parsemeta(text)
124 m, offs = parsemeta(text)
124 return m and "censored" in m
125 return m and "censored" in m
125
126
126 def addflagprocessor(flag, processor):
127 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
128 """Register a flag processor on a revision data flag.
128
129
129 Invariant:
130 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
133 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
135 following signatures:
135 - (read) f(self, rawtext) -> text, bool
136 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
138 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
140 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
141 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
142 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
143 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
144 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
145 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
146 different booleans. And "raw" returns a same boolean as "write".
146
147
147 Note: The 'raw' transform is used for changegroup generation and in some
148 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
149 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
150 contents can be used for hash integrity checks.
150 """
151 """
151 if not flag & REVIDX_KNOWN_FLAGS:
152 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
154 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
155 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
157 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
158 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
160 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
161 _flagprocessors[flag] = processor
161
162
162 def getoffset(q):
163 def getoffset(q):
163 return int(q >> 16)
164 return int(q >> 16)
164
165
165 def gettype(q):
166 def gettype(q):
166 return int(q & 0xFFFF)
167 return int(q & 0xFFFF)
167
168
168 def offset_type(offset, type):
169 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
171 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
172 return int(int(offset) << 16 | type)
172
173
173 _nullhash = hashlib.sha1(nullid)
174 _nullhash = hashlib.sha1(nullid)
174
175
175 def hash(text, p1, p2):
176 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
177 """generate a hash from the given text and its parent hashes
177
178
178 This hash combines both the current file contents and its history
179 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
180 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
181 content in the revision graph.
181 """
182 """
182 # As of now, if one of the parent node is null, p2 is null
183 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
184 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
185 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
186 s = _nullhash.copy()
186 s.update(p1)
187 s.update(p1)
187 else:
188 else:
188 # none of the parent nodes are nullid
189 # none of the parent nodes are nullid
189 if p1 < p2:
190 if p1 < p2:
190 a = p1
191 a = p1
191 b = p2
192 b = p2
192 else:
193 else:
193 a = p2
194 a = p2
194 b = p1
195 b = p1
195 s = hashlib.sha1(a)
196 s = hashlib.sha1(a)
196 s.update(b)
197 s.update(b)
197 s.update(text)
198 s.update(text)
198 return s.digest()
199 return s.digest()
199
200
200 class _testrevlog(object):
201 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
202 """minimalist fake revlog to use in doctests"""
202
203
203 def __init__(self, data, density=0.5, mingap=0):
204 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
205 """data is an list of revision payload boundaries"""
205 self._data = data
206 self._data = data
206 self._srdensitythreshold = density
207 self._srdensitythreshold = density
207 self._srmingapsize = mingap
208 self._srmingapsize = mingap
208
209
209 def start(self, rev):
210 def start(self, rev):
210 if rev == 0:
211 if rev == 0:
211 return 0
212 return 0
212 return self._data[rev - 1]
213 return self._data[rev - 1]
213
214
214 def end(self, rev):
215 def end(self, rev):
215 return self._data[rev]
216 return self._data[rev]
216
217
217 def length(self, rev):
218 def length(self, rev):
218 return self.end(rev) - self.start(rev)
219 return self.end(rev) - self.start(rev)
219
220
220 def __len__(self):
221 def __len__(self):
221 return len(self._data)
222 return len(self._data)
222
223
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
225 """returns revs[startidx:endidx] without empty trailing revs
225
226
226 Doctest Setup
227 Doctest Setup
227 >>> revlog = _testrevlog([
228 >>> revlog = _testrevlog([
228 ... 5, #0
229 ... 5, #0
229 ... 10, #1
230 ... 10, #1
230 ... 12, #2
231 ... 12, #2
231 ... 12, #3 (empty)
232 ... 12, #3 (empty)
232 ... 17, #4
233 ... 17, #4
233 ... 21, #5
234 ... 21, #5
234 ... 21, #6 (empty)
235 ... 21, #6 (empty)
235 ... ])
236 ... ])
236
237
237 Contiguous cases:
238 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
240 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
242 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
244 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
246 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
248 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
250 [3, 4]
250
251
251 Discontiguous cases:
252 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
254 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
256 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
258 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
260 [3, 5]
260 """
261 """
261 length = revlog.length
262 length = revlog.length
262
263
263 if endidx is None:
264 if endidx is None:
264 endidx = len(revs)
265 endidx = len(revs)
265
266
266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 # If we have a non-emtpy delta candidate, there are nothing to trim
267 if revs[endidx - 1] < len(revlog):
268 if revs[endidx - 1] < len(revlog):
268 # Trim empty revs at the end, except the very first revision of a chain
269 # Trim empty revs at the end, except the very first revision of a chain
269 while (endidx > 1
270 while (endidx > 1
270 and endidx > startidx
271 and endidx > startidx
271 and length(revs[endidx - 1]) == 0):
272 and length(revs[endidx - 1]) == 0):
272 endidx -= 1
273 endidx -= 1
273
274
274 return revs[startidx:endidx]
275 return revs[startidx:endidx]
275
276
276 def _segmentspan(revlog, revs, deltainfo=None):
277 def _segmentspan(revlog, revs, deltainfo=None):
277 """Get the byte span of a segment of revisions
278 """Get the byte span of a segment of revisions
278
279
279 revs is a sorted array of revision numbers
280 revs is a sorted array of revision numbers
280
281
281 >>> revlog = _testrevlog([
282 >>> revlog = _testrevlog([
282 ... 5, #0
283 ... 5, #0
283 ... 10, #1
284 ... 10, #1
284 ... 12, #2
285 ... 12, #2
285 ... 12, #3 (empty)
286 ... 12, #3 (empty)
286 ... 17, #4
287 ... 17, #4
287 ... ])
288 ... ])
288
289
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 17
291 17
291 >>> _segmentspan(revlog, [0, 4])
292 >>> _segmentspan(revlog, [0, 4])
292 17
293 17
293 >>> _segmentspan(revlog, [3, 4])
294 >>> _segmentspan(revlog, [3, 4])
294 5
295 5
295 >>> _segmentspan(revlog, [1, 2, 3,])
296 >>> _segmentspan(revlog, [1, 2, 3,])
296 7
297 7
297 >>> _segmentspan(revlog, [1, 3])
298 >>> _segmentspan(revlog, [1, 3])
298 7
299 7
299 """
300 """
300 if not revs:
301 if not revs:
301 return 0
302 return 0
302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if len(revs) == 1:
304 if len(revs) == 1:
304 return deltainfo.deltalen
305 return deltainfo.deltalen
305 offset = revlog.end(len(revlog) - 1)
306 offset = revlog.end(len(revlog) - 1)
306 end = deltainfo.deltalen + offset
307 end = deltainfo.deltalen + offset
307 else:
308 else:
308 end = revlog.end(revs[-1])
309 end = revlog.end(revs[-1])
309 return end - revlog.start(revs[0])
310 return end - revlog.start(revs[0])
310
311
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 """slice revs to reduce the amount of unrelated data to be read from disk.
313 """slice revs to reduce the amount of unrelated data to be read from disk.
313
314
314 ``revs`` is sliced into groups that should be read in one time.
315 ``revs`` is sliced into groups that should be read in one time.
315 Assume that revs are sorted.
316 Assume that revs are sorted.
316
317
317 The initial chunk is sliced until the overall density (payload/chunks-span
318 The initial chunk is sliced until the overall density (payload/chunks-span
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 `revlog._srmingapsize` is skipped.
320 `revlog._srmingapsize` is skipped.
320
321
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 For consistency with other slicing choice, this limit won't go lower than
323 For consistency with other slicing choice, this limit won't go lower than
323 `revlog._srmingapsize`.
324 `revlog._srmingapsize`.
324
325
325 If individual revisions chunk are larger than this limit, they will still
326 If individual revisions chunk are larger than this limit, they will still
326 be raised individually.
327 be raised individually.
327
328
328 >>> revlog = _testrevlog([
329 >>> revlog = _testrevlog([
329 ... 5, #00 (5)
330 ... 5, #00 (5)
330 ... 10, #01 (5)
331 ... 10, #01 (5)
331 ... 12, #02 (2)
332 ... 12, #02 (2)
332 ... 12, #03 (empty)
333 ... 12, #03 (empty)
333 ... 27, #04 (15)
334 ... 27, #04 (15)
334 ... 31, #05 (4)
335 ... 31, #05 (4)
335 ... 31, #06 (empty)
336 ... 31, #06 (empty)
336 ... 42, #07 (11)
337 ... 42, #07 (11)
337 ... 47, #08 (5)
338 ... 47, #08 (5)
338 ... 47, #09 (empty)
339 ... 47, #09 (empty)
339 ... 48, #10 (1)
340 ... 48, #10 (1)
340 ... 51, #11 (3)
341 ... 51, #11 (3)
341 ... 74, #12 (23)
342 ... 74, #12 (23)
342 ... 85, #13 (11)
343 ... 85, #13 (11)
343 ... 86, #14 (1)
344 ... 86, #14 (1)
344 ... 91, #15 (5)
345 ... 91, #15 (5)
345 ... ])
346 ... ])
346
347
347 >>> list(_slicechunk(revlog, list(range(16))))
348 >>> list(_slicechunk(revlog, list(range(16))))
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 >>> list(_slicechunk(revlog, [0, 15]))
350 >>> list(_slicechunk(revlog, [0, 15]))
350 [[0], [15]]
351 [[0], [15]]
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 [[0], [11], [15]]
353 [[0], [11], [15]]
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 [[0], [11, 13, 15]]
355 [[0], [11, 13, 15]]
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
357
358
358 Slicing with a maximum chunk size
359 Slicing with a maximum chunk size
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 [[0], [11], [13], [15]]
361 [[0], [11], [13], [15]]
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 [[0], [11], [13, 15]]
363 [[0], [11], [13, 15]]
363 """
364 """
364 if targetsize is not None:
365 if targetsize is not None:
365 targetsize = max(targetsize, revlog._srmingapsize)
366 targetsize = max(targetsize, revlog._srmingapsize)
366 # targetsize should not be specified when evaluating delta candidates:
367 # targetsize should not be specified when evaluating delta candidates:
367 # * targetsize is used to ensure we stay within specification when reading,
368 # * targetsize is used to ensure we stay within specification when reading,
368 # * deltainfo is used to pick are good delta chain when writing.
369 # * deltainfo is used to pick are good delta chain when writing.
369 if not (deltainfo is None or targetsize is None):
370 if not (deltainfo is None or targetsize is None):
370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 msg = 'cannot use `targetsize` with a `deltainfo`'
371 raise error.ProgrammingError(msg)
372 raise error.ProgrammingError(msg)
372 for chunk in _slicechunktodensity(revlog, revs,
373 for chunk in _slicechunktodensity(revlog, revs,
373 deltainfo,
374 deltainfo,
374 revlog._srdensitythreshold,
375 revlog._srdensitythreshold,
375 revlog._srmingapsize):
376 revlog._srmingapsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 yield subchunk
378 yield subchunk
378
379
379 def _slicechunktosize(revlog, revs, targetsize=None):
380 def _slicechunktosize(revlog, revs, targetsize=None):
380 """slice revs to match the target size
381 """slice revs to match the target size
381
382
382 This is intended to be used on chunk that density slicing selected by that
383 This is intended to be used on chunk that density slicing selected by that
383 are still too large compared to the read garantee of revlog. This might
384 are still too large compared to the read garantee of revlog. This might
384 happens when "minimal gap size" interrupted the slicing or when chain are
385 happens when "minimal gap size" interrupted the slicing or when chain are
385 built in a way that create large blocks next to each other.
386 built in a way that create large blocks next to each other.
386
387
387 >>> revlog = _testrevlog([
388 >>> revlog = _testrevlog([
388 ... 3, #0 (3)
389 ... 3, #0 (3)
389 ... 5, #1 (2)
390 ... 5, #1 (2)
390 ... 6, #2 (1)
391 ... 6, #2 (1)
391 ... 8, #3 (2)
392 ... 8, #3 (2)
392 ... 8, #4 (empty)
393 ... 8, #4 (empty)
393 ... 11, #5 (3)
394 ... 11, #5 (3)
394 ... 12, #6 (1)
395 ... 12, #6 (1)
395 ... 13, #7 (1)
396 ... 13, #7 (1)
396 ... 14, #8 (1)
397 ... 14, #8 (1)
397 ... ])
398 ... ])
398
399
399 Cases where chunk is already small enough
400 Cases where chunk is already small enough
400 >>> list(_slicechunktosize(revlog, [0], 3))
401 >>> list(_slicechunktosize(revlog, [0], 3))
401 [[0]]
402 [[0]]
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 [[6, 7]]
404 [[6, 7]]
404 >>> list(_slicechunktosize(revlog, [0], None))
405 >>> list(_slicechunktosize(revlog, [0], None))
405 [[0]]
406 [[0]]
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 [[6, 7]]
408 [[6, 7]]
408
409
409 cases where we need actual slicing
410 cases where we need actual slicing
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 [[0], [1]]
412 [[0], [1]]
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 [[1], [3]]
414 [[1], [3]]
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 [[1, 2], [3]]
416 [[1, 2], [3]]
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 [[3], [5]]
418 [[3], [5]]
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 [[3], [5]]
420 [[3], [5]]
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 [[5], [6, 7, 8]]
422 [[5], [6, 7, 8]]
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424 [[0], [1, 2], [3], [5], [6, 7, 8]]
424
425
425 Case with too large individual chunk (must return valid chunk)
426 Case with too large individual chunk (must return valid chunk)
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 [[0], [1]]
428 [[0], [1]]
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 [[1], [3]]
430 [[1], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 [[3], [5]]
432 [[3], [5]]
432 """
433 """
433 assert targetsize is None or 0 <= targetsize
434 assert targetsize is None or 0 <= targetsize
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 yield revs
436 yield revs
436 return
437 return
437
438
438 startrevidx = 0
439 startrevidx = 0
439 startdata = revlog.start(revs[0])
440 startdata = revlog.start(revs[0])
440 endrevidx = 0
441 endrevidx = 0
441 iterrevs = enumerate(revs)
442 iterrevs = enumerate(revs)
442 next(iterrevs) # skip first rev.
443 next(iterrevs) # skip first rev.
443 for idx, r in iterrevs:
444 for idx, r in iterrevs:
444 span = revlog.end(r) - startdata
445 span = revlog.end(r) - startdata
445 if span <= targetsize:
446 if span <= targetsize:
446 endrevidx = idx
447 endrevidx = idx
447 else:
448 else:
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 if chunk:
450 if chunk:
450 yield chunk
451 yield chunk
451 startrevidx = idx
452 startrevidx = idx
452 startdata = revlog.start(r)
453 startdata = revlog.start(r)
453 endrevidx = idx
454 endrevidx = idx
454 yield _trimchunk(revlog, revs, startrevidx)
455 yield _trimchunk(revlog, revs, startrevidx)
455
456
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 mingapsize=0):
458 mingapsize=0):
458 """slice revs to reduce the amount of unrelated data to be read from disk.
459 """slice revs to reduce the amount of unrelated data to be read from disk.
459
460
460 ``revs`` is sliced into groups that should be read in one time.
461 ``revs`` is sliced into groups that should be read in one time.
461 Assume that revs are sorted.
462 Assume that revs are sorted.
462
463
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 to the top of the revlog.
465 to the top of the revlog.
465
466
466 The initial chunk is sliced until the overall density (payload/chunks-span
467 The initial chunk is sliced until the overall density (payload/chunks-span
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 skipped.
469 skipped.
469
470
470 >>> revlog = _testrevlog([
471 >>> revlog = _testrevlog([
471 ... 5, #00 (5)
472 ... 5, #00 (5)
472 ... 10, #01 (5)
473 ... 10, #01 (5)
473 ... 12, #02 (2)
474 ... 12, #02 (2)
474 ... 12, #03 (empty)
475 ... 12, #03 (empty)
475 ... 27, #04 (15)
476 ... 27, #04 (15)
476 ... 31, #05 (4)
477 ... 31, #05 (4)
477 ... 31, #06 (empty)
478 ... 31, #06 (empty)
478 ... 42, #07 (11)
479 ... 42, #07 (11)
479 ... 47, #08 (5)
480 ... 47, #08 (5)
480 ... 47, #09 (empty)
481 ... 47, #09 (empty)
481 ... 48, #10 (1)
482 ... 48, #10 (1)
482 ... 51, #11 (3)
483 ... 51, #11 (3)
483 ... 74, #12 (23)
484 ... 74, #12 (23)
484 ... 85, #13 (11)
485 ... 85, #13 (11)
485 ... 86, #14 (1)
486 ... 86, #14 (1)
486 ... 91, #15 (5)
487 ... 91, #15 (5)
487 ... ])
488 ... ])
488
489
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 [[0], [15]]
493 [[0], [15]]
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 [[0], [11], [15]]
495 [[0], [11], [15]]
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 [[0], [11, 13, 15]]
497 [[0], [11, 13, 15]]
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 [[1, 2], [5, 8, 10, 11], [14]]
499 [[1, 2], [5, 8, 10, 11], [14]]
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 ... mingapsize=20))
501 ... mingapsize=20))
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 ... targetdensity=0.95))
504 ... targetdensity=0.95))
504 [[1, 2], [5], [8, 10, 11], [14]]
505 [[1, 2], [5], [8, 10, 11], [14]]
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 ... targetdensity=0.95, mingapsize=12))
507 ... targetdensity=0.95, mingapsize=12))
507 [[1, 2], [5, 8, 10, 11], [14]]
508 [[1, 2], [5, 8, 10, 11], [14]]
508 """
509 """
509 start = revlog.start
510 start = revlog.start
510 length = revlog.length
511 length = revlog.length
511
512
512 if len(revs) <= 1:
513 if len(revs) <= 1:
513 yield revs
514 yield revs
514 return
515 return
515
516
516 nextrev = len(revlog)
517 nextrev = len(revlog)
517 nextoffset = revlog.end(nextrev - 1)
518 nextoffset = revlog.end(nextrev - 1)
518
519
519 if deltainfo is None:
520 if deltainfo is None:
520 deltachainspan = _segmentspan(revlog, revs)
521 deltachainspan = _segmentspan(revlog, revs)
521 chainpayload = sum(length(r) for r in revs)
522 chainpayload = sum(length(r) for r in revs)
522 else:
523 else:
523 deltachainspan = deltainfo.distance
524 deltachainspan = deltainfo.distance
524 chainpayload = deltainfo.compresseddeltalen
525 chainpayload = deltainfo.compresseddeltalen
525
526
526 if deltachainspan < mingapsize:
527 if deltachainspan < mingapsize:
527 yield revs
528 yield revs
528 return
529 return
529
530
530 readdata = deltachainspan
531 readdata = deltachainspan
531
532
532 if deltachainspan:
533 if deltachainspan:
533 density = chainpayload / float(deltachainspan)
534 density = chainpayload / float(deltachainspan)
534 else:
535 else:
535 density = 1.0
536 density = 1.0
536
537
537 if density >= targetdensity:
538 if density >= targetdensity:
538 yield revs
539 yield revs
539 return
540 return
540
541
541 if deltainfo is not None and deltainfo.deltalen:
542 if deltainfo is not None and deltainfo.deltalen:
542 revs = list(revs)
543 revs = list(revs)
543 revs.append(nextrev)
544 revs.append(nextrev)
544
545
545 # Store the gaps in a heap to have them sorted by decreasing size
546 # Store the gaps in a heap to have them sorted by decreasing size
546 gapsheap = []
547 gapsheap = []
547 heapq.heapify(gapsheap)
548 heapq.heapify(gapsheap)
548 prevend = None
549 prevend = None
549 for i, rev in enumerate(revs):
550 for i, rev in enumerate(revs):
550 if rev < nextrev:
551 if rev < nextrev:
551 revstart = start(rev)
552 revstart = start(rev)
552 revlen = length(rev)
553 revlen = length(rev)
553 else:
554 else:
554 revstart = nextoffset
555 revstart = nextoffset
555 revlen = deltainfo.deltalen
556 revlen = deltainfo.deltalen
556
557
557 # Skip empty revisions to form larger holes
558 # Skip empty revisions to form larger holes
558 if revlen == 0:
559 if revlen == 0:
559 continue
560 continue
560
561
561 if prevend is not None:
562 if prevend is not None:
562 gapsize = revstart - prevend
563 gapsize = revstart - prevend
563 # only consider holes that are large enough
564 # only consider holes that are large enough
564 if gapsize > mingapsize:
565 if gapsize > mingapsize:
565 heapq.heappush(gapsheap, (-gapsize, i))
566 heapq.heappush(gapsheap, (-gapsize, i))
566
567
567 prevend = revstart + revlen
568 prevend = revstart + revlen
568
569
569 # Collect the indices of the largest holes until the density is acceptable
570 # Collect the indices of the largest holes until the density is acceptable
570 indicesheap = []
571 indicesheap = []
571 heapq.heapify(indicesheap)
572 heapq.heapify(indicesheap)
572 while gapsheap and density < targetdensity:
573 while gapsheap and density < targetdensity:
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574 oppgapsize, gapidx = heapq.heappop(gapsheap)
574
575
575 heapq.heappush(indicesheap, gapidx)
576 heapq.heappush(indicesheap, gapidx)
576
577
577 # the gap sizes are stored as negatives to be sorted decreasingly
578 # the gap sizes are stored as negatives to be sorted decreasingly
578 # by the heap
579 # by the heap
579 readdata -= (-oppgapsize)
580 readdata -= (-oppgapsize)
580 if readdata > 0:
581 if readdata > 0:
581 density = chainpayload / float(readdata)
582 density = chainpayload / float(readdata)
582 else:
583 else:
583 density = 1.0
584 density = 1.0
584
585
585 # Cut the revs at collected indices
586 # Cut the revs at collected indices
586 previdx = 0
587 previdx = 0
587 while indicesheap:
588 while indicesheap:
588 idx = heapq.heappop(indicesheap)
589 idx = heapq.heappop(indicesheap)
589
590
590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 chunk = _trimchunk(revlog, revs, previdx, idx)
591 if chunk:
592 if chunk:
592 yield chunk
593 yield chunk
593
594
594 previdx = idx
595 previdx = idx
595
596
596 chunk = _trimchunk(revlog, revs, previdx)
597 chunk = _trimchunk(revlog, revs, previdx)
597 if chunk:
598 if chunk:
598 yield chunk
599 yield chunk
599
600
600 @attr.s(slots=True, frozen=True)
601 @attr.s(slots=True, frozen=True)
601 class _deltainfo(object):
602 class _deltainfo(object):
602 distance = attr.ib()
603 distance = attr.ib()
603 deltalen = attr.ib()
604 deltalen = attr.ib()
604 data = attr.ib()
605 data = attr.ib()
605 base = attr.ib()
606 base = attr.ib()
606 chainbase = attr.ib()
607 chainbase = attr.ib()
607 chainlen = attr.ib()
608 chainlen = attr.ib()
608 compresseddeltalen = attr.ib()
609 compresseddeltalen = attr.ib()
609 snapshotdepth = attr.ib()
610 snapshotdepth = attr.ib()
610
611
611 class _deltacomputer(object):
612 class _deltacomputer(object):
612 def __init__(self, revlog):
613 def __init__(self, revlog):
613 self.revlog = revlog
614 self.revlog = revlog
614
615
615 def _getcandidaterevs(self, p1, p2, cachedelta):
616 def _getcandidaterevs(self, p1, p2, cachedelta):
616 """
617 """
617 Provides revisions that present an interest to be diffed against,
618 Provides revisions that present an interest to be diffed against,
618 grouped by level of easiness.
619 grouped by level of easiness.
619 """
620 """
620 revlog = self.revlog
621 revlog = self.revlog
621 gdelta = revlog._generaldelta
622 gdelta = revlog._generaldelta
622 curr = len(revlog)
623 curr = len(revlog)
623 prev = curr - 1
624 prev = curr - 1
624 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
625 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
625
626
626 # should we try to build a delta?
627 # should we try to build a delta?
627 if prev != nullrev and revlog.storedeltachains:
628 if prev != nullrev and revlog.storedeltachains:
628 tested = set()
629 tested = set()
629 # This condition is true most of the time when processing
630 # This condition is true most of the time when processing
630 # changegroup data into a generaldelta repo. The only time it
631 # changegroup data into a generaldelta repo. The only time it
631 # isn't true is if this is the first revision in a delta chain
632 # isn't true is if this is the first revision in a delta chain
632 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
633 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
633 if cachedelta and gdelta and revlog._lazydeltabase:
634 if cachedelta and gdelta and revlog._lazydeltabase:
634 # Assume what we received from the server is a good choice
635 # Assume what we received from the server is a good choice
635 # build delta will reuse the cache
636 # build delta will reuse the cache
636 yield (cachedelta[0],)
637 yield (cachedelta[0],)
637 tested.add(cachedelta[0])
638 tested.add(cachedelta[0])
638
639
639 if gdelta:
640 if gdelta:
640 # exclude already lazy tested base if any
641 # exclude already lazy tested base if any
641 parents = [p for p in (p1r, p2r)
642 parents = [p for p in (p1r, p2r)
642 if p != nullrev and p not in tested]
643 if p != nullrev and p not in tested]
643
644
644 if not revlog._deltabothparents and len(parents) == 2:
645 if not revlog._deltabothparents and len(parents) == 2:
645 parents.sort()
646 parents.sort()
646 # To minimize the chance of having to build a fulltext,
647 # To minimize the chance of having to build a fulltext,
647 # pick first whichever parent is closest to us (max rev)
648 # pick first whichever parent is closest to us (max rev)
648 yield (parents[1],)
649 yield (parents[1],)
649 # then the other one (min rev) if the first did not fit
650 # then the other one (min rev) if the first did not fit
650 yield (parents[0],)
651 yield (parents[0],)
651 tested.update(parents)
652 tested.update(parents)
652 elif len(parents) > 0:
653 elif len(parents) > 0:
653 # Test all parents (1 or 2), and keep the best candidate
654 # Test all parents (1 or 2), and keep the best candidate
654 yield parents
655 yield parents
655 tested.update(parents)
656 tested.update(parents)
656
657
657 if prev not in tested:
658 if prev not in tested:
658 # other approach failed try against prev to hopefully save us a
659 # other approach failed try against prev to hopefully save us a
659 # fulltext.
660 # fulltext.
660 yield (prev,)
661 yield (prev,)
661 tested.add(prev)
662 tested.add(prev)
662
663
663 def buildtext(self, revinfo, fh):
664 def buildtext(self, revinfo, fh):
664 """Builds a fulltext version of a revision
665 """Builds a fulltext version of a revision
665
666
666 revinfo: _revisioninfo instance that contains all needed info
667 revinfo: _revisioninfo instance that contains all needed info
667 fh: file handle to either the .i or the .d revlog file,
668 fh: file handle to either the .i or the .d revlog file,
668 depending on whether it is inlined or not
669 depending on whether it is inlined or not
669 """
670 """
670 btext = revinfo.btext
671 btext = revinfo.btext
671 if btext[0] is not None:
672 if btext[0] is not None:
672 return btext[0]
673 return btext[0]
673
674
674 revlog = self.revlog
675 revlog = self.revlog
675 cachedelta = revinfo.cachedelta
676 cachedelta = revinfo.cachedelta
676 flags = revinfo.flags
677 flags = revinfo.flags
677 node = revinfo.node
678 node = revinfo.node
678
679
679 baserev = cachedelta[0]
680 baserev = cachedelta[0]
680 delta = cachedelta[1]
681 delta = cachedelta[1]
681 # special case deltas which replace entire base; no need to decode
682 # special case deltas which replace entire base; no need to decode
682 # base revision. this neatly avoids censored bases, which throw when
683 # base revision. this neatly avoids censored bases, which throw when
683 # they're decoded.
684 # they're decoded.
684 hlen = struct.calcsize(">lll")
685 hlen = struct.calcsize(">lll")
685 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
686 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
686 len(delta) - hlen):
687 len(delta) - hlen):
687 btext[0] = delta[hlen:]
688 btext[0] = delta[hlen:]
688 else:
689 else:
689 # deltabase is rawtext before changed by flag processors, which is
690 # deltabase is rawtext before changed by flag processors, which is
690 # equivalent to non-raw text
691 # equivalent to non-raw text
691 basetext = revlog.revision(baserev, _df=fh, raw=False)
692 basetext = revlog.revision(baserev, _df=fh, raw=False)
692 btext[0] = mdiff.patch(basetext, delta)
693 btext[0] = mdiff.patch(basetext, delta)
693
694
694 try:
695 try:
695 res = revlog._processflags(btext[0], flags, 'read', raw=True)
696 res = revlog._processflags(btext[0], flags, 'read', raw=True)
696 btext[0], validatehash = res
697 btext[0], validatehash = res
697 if validatehash:
698 if validatehash:
698 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
699 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
699 if flags & REVIDX_ISCENSORED:
700 if flags & REVIDX_ISCENSORED:
700 raise RevlogError(_('node %s is not censored') % node)
701 raise RevlogError(_('node %s is not censored') % node)
701 except CensoredNodeError:
702 except CensoredNodeError:
702 # must pass the censored index flag to add censored revisions
703 # must pass the censored index flag to add censored revisions
703 if not flags & REVIDX_ISCENSORED:
704 if not flags & REVIDX_ISCENSORED:
704 raise
705 raise
705 return btext[0]
706 return btext[0]
706
707
707 def _builddeltadiff(self, base, revinfo, fh):
708 def _builddeltadiff(self, base, revinfo, fh):
708 revlog = self.revlog
709 revlog = self.revlog
709 t = self.buildtext(revinfo, fh)
710 t = self.buildtext(revinfo, fh)
710 if revlog.iscensored(base):
711 if revlog.iscensored(base):
711 # deltas based on a censored revision must replace the
712 # deltas based on a censored revision must replace the
712 # full content in one patch, so delta works everywhere
713 # full content in one patch, so delta works everywhere
713 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
714 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
714 delta = header + t
715 delta = header + t
715 else:
716 else:
716 ptext = revlog.revision(base, _df=fh, raw=True)
717 ptext = revlog.revision(base, _df=fh, raw=True)
717 delta = mdiff.textdiff(ptext, t)
718 delta = mdiff.textdiff(ptext, t)
718
719
719 return delta
720 return delta
720
721
721 def _builddeltainfo(self, revinfo, base, fh):
722 def _builddeltainfo(self, revinfo, base, fh):
722 # can we use the cached delta?
723 # can we use the cached delta?
723 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
724 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
724 delta = revinfo.cachedelta[1]
725 delta = revinfo.cachedelta[1]
725 else:
726 else:
726 delta = self._builddeltadiff(base, revinfo, fh)
727 delta = self._builddeltadiff(base, revinfo, fh)
727 revlog = self.revlog
728 revlog = self.revlog
728 header, data = revlog.compress(delta)
729 header, data = revlog.compress(delta)
729 deltalen = len(header) + len(data)
730 deltalen = len(header) + len(data)
730 chainbase = revlog.chainbase(base)
731 chainbase = revlog.chainbase(base)
731 offset = revlog.end(len(revlog) - 1)
732 offset = revlog.end(len(revlog) - 1)
732 dist = deltalen + offset - revlog.start(chainbase)
733 dist = deltalen + offset - revlog.start(chainbase)
733 if revlog._generaldelta:
734 if revlog._generaldelta:
734 deltabase = base
735 deltabase = base
735 else:
736 else:
736 deltabase = chainbase
737 deltabase = chainbase
737 chainlen, compresseddeltalen = revlog._chaininfo(base)
738 chainlen, compresseddeltalen = revlog._chaininfo(base)
738 chainlen += 1
739 chainlen += 1
739 compresseddeltalen += deltalen
740 compresseddeltalen += deltalen
740
741
741 revlog = self.revlog
742 revlog = self.revlog
742 snapshotdepth = None
743 snapshotdepth = None
743 if deltabase == nullrev:
744 if deltabase == nullrev:
744 snapshotdepth = 0
745 snapshotdepth = 0
745 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 # A delta chain should always be one full snapshot,
747 # A delta chain should always be one full snapshot,
747 # zero or more semi-snapshots, and zero or more deltas
748 # zero or more semi-snapshots, and zero or more deltas
748 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751
752
752 return _deltainfo(dist, deltalen, (header, data), deltabase,
753 return _deltainfo(dist, deltalen, (header, data), deltabase,
753 chainbase, chainlen, compresseddeltalen,
754 chainbase, chainlen, compresseddeltalen,
754 snapshotdepth)
755 snapshotdepth)
755
756
756 def finddeltainfo(self, revinfo, fh):
757 def finddeltainfo(self, revinfo, fh):
757 """Find an acceptable delta against a candidate revision
758 """Find an acceptable delta against a candidate revision
758
759
759 revinfo: information about the revision (instance of _revisioninfo)
760 revinfo: information about the revision (instance of _revisioninfo)
760 fh: file handle to either the .i or the .d revlog file,
761 fh: file handle to either the .i or the .d revlog file,
761 depending on whether it is inlined or not
762 depending on whether it is inlined or not
762
763
763 Returns the first acceptable candidate revision, as ordered by
764 Returns the first acceptable candidate revision, as ordered by
764 _getcandidaterevs
765 _getcandidaterevs
765 """
766 """
766 if not revinfo.textlen:
767 if not revinfo.textlen:
767 return None # empty file do not need delta
768 return None # empty file do not need delta
768
769
769 cachedelta = revinfo.cachedelta
770 cachedelta = revinfo.cachedelta
770 p1 = revinfo.p1
771 p1 = revinfo.p1
771 p2 = revinfo.p2
772 p2 = revinfo.p2
772 revlog = self.revlog
773 revlog = self.revlog
773
774
774 deltalength = self.revlog.length
775 deltalength = self.revlog.length
775 deltaparent = self.revlog.deltaparent
776 deltaparent = self.revlog.deltaparent
776
777
777 deltainfo = None
778 deltainfo = None
778 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
779 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
779 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
780 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
780 # filter out delta base that will never produce good delta
781 # filter out delta base that will never produce good delta
781 candidaterevs = [r for r in candidaterevs
782 candidaterevs = [r for r in candidaterevs
782 if self.revlog.length(r) <= deltas_limit]
783 if self.revlog.length(r) <= deltas_limit]
783 nominateddeltas = []
784 nominateddeltas = []
784 for candidaterev in candidaterevs:
785 for candidaterev in candidaterevs:
785 # skip over empty delta (no need to include them in a chain)
786 # skip over empty delta (no need to include them in a chain)
786 while candidaterev != nullrev and not deltalength(candidaterev):
787 while candidaterev != nullrev and not deltalength(candidaterev):
787 candidaterev = deltaparent(candidaterev)
788 candidaterev = deltaparent(candidaterev)
788 # no need to try a delta against nullid, this will be handled
789 # no need to try a delta against nullid, this will be handled
789 # by fulltext later.
790 # by fulltext later.
790 if candidaterev == nullrev:
791 if candidaterev == nullrev:
791 continue
792 continue
792 # no delta for rawtext-changing revs (see "candelta" for why)
793 # no delta for rawtext-changing revs (see "candelta" for why)
793 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 continue
795 continue
795 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
796 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
796 if revlog._isgooddeltainfo(candidatedelta, revinfo):
797 if revlog._isgooddeltainfo(candidatedelta, revinfo):
797 nominateddeltas.append(candidatedelta)
798 nominateddeltas.append(candidatedelta)
798 if nominateddeltas:
799 if nominateddeltas:
799 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
800 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
800 break
801 break
801
802
802 return deltainfo
803 return deltainfo
803
804
804 @attr.s(slots=True, frozen=True)
805 @attr.s(slots=True, frozen=True)
805 class _revisioninfo(object):
806 class _revisioninfo(object):
806 """Information about a revision that allows building its fulltext
807 """Information about a revision that allows building its fulltext
807 node: expected hash of the revision
808 node: expected hash of the revision
808 p1, p2: parent revs of the revision
809 p1, p2: parent revs of the revision
809 btext: built text cache consisting of a one-element list
810 btext: built text cache consisting of a one-element list
810 cachedelta: (baserev, uncompressed_delta) or None
811 cachedelta: (baserev, uncompressed_delta) or None
811 flags: flags associated to the revision storage
812 flags: flags associated to the revision storage
812
813
813 One of btext[0] or cachedelta must be set.
814 One of btext[0] or cachedelta must be set.
814 """
815 """
815 node = attr.ib()
816 node = attr.ib()
816 p1 = attr.ib()
817 p1 = attr.ib()
817 p2 = attr.ib()
818 p2 = attr.ib()
818 btext = attr.ib()
819 btext = attr.ib()
819 textlen = attr.ib()
820 textlen = attr.ib()
820 cachedelta = attr.ib()
821 cachedelta = attr.ib()
821 flags = attr.ib()
822 flags = attr.ib()
822
823
823 # index v0:
824 # index v0:
824 # 4 bytes: offset
825 # 4 bytes: offset
825 # 4 bytes: compressed length
826 # 4 bytes: compressed length
826 # 4 bytes: base rev
827 # 4 bytes: base rev
827 # 4 bytes: link rev
828 # 4 bytes: link rev
828 # 20 bytes: parent 1 nodeid
829 # 20 bytes: parent 1 nodeid
829 # 20 bytes: parent 2 nodeid
830 # 20 bytes: parent 2 nodeid
830 # 20 bytes: nodeid
831 # 20 bytes: nodeid
831 indexformatv0 = struct.Struct(">4l20s20s20s")
832 indexformatv0 = struct.Struct(">4l20s20s20s")
832 indexformatv0_pack = indexformatv0.pack
833 indexformatv0_pack = indexformatv0.pack
833 indexformatv0_unpack = indexformatv0.unpack
834 indexformatv0_unpack = indexformatv0.unpack
834
835
835 class revlogoldindex(list):
836 class revlogoldindex(list):
836 def __getitem__(self, i):
837 def __getitem__(self, i):
837 if i == -1:
838 if i == -1:
838 return (0, 0, 0, -1, -1, -1, -1, nullid)
839 return (0, 0, 0, -1, -1, -1, -1, nullid)
839 return list.__getitem__(self, i)
840 return list.__getitem__(self, i)
840
841
841 # maximum <delta-chain-data>/<revision-text-length> ratio
842 # maximum <delta-chain-data>/<revision-text-length> ratio
842 LIMIT_DELTA2TEXT = 2
843 LIMIT_DELTA2TEXT = 2
843
844
844 class revlogoldio(object):
845 class revlogoldio(object):
845 def __init__(self):
846 def __init__(self):
846 self.size = indexformatv0.size
847 self.size = indexformatv0.size
847
848
848 def parseindex(self, data, inline):
849 def parseindex(self, data, inline):
849 s = self.size
850 s = self.size
850 index = []
851 index = []
851 nodemap = {nullid: nullrev}
852 nodemap = {nullid: nullrev}
852 n = off = 0
853 n = off = 0
853 l = len(data)
854 l = len(data)
854 while off + s <= l:
855 while off + s <= l:
855 cur = data[off:off + s]
856 cur = data[off:off + s]
856 off += s
857 off += s
857 e = indexformatv0_unpack(cur)
858 e = indexformatv0_unpack(cur)
858 # transform to revlogv1 format
859 # transform to revlogv1 format
859 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
860 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
860 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
861 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
861 index.append(e2)
862 index.append(e2)
862 nodemap[e[6]] = n
863 nodemap[e[6]] = n
863 n += 1
864 n += 1
864
865
865 return revlogoldindex(index), nodemap, None
866 return revlogoldindex(index), nodemap, None
866
867
867 def packentry(self, entry, node, version, rev):
868 def packentry(self, entry, node, version, rev):
868 if gettype(entry[0]):
869 if gettype(entry[0]):
869 raise RevlogError(_('index entry flags need revlog version 1'))
870 raise RevlogError(_('index entry flags need revlog version 1'))
870 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
871 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
871 node(entry[5]), node(entry[6]), entry[7])
872 node(entry[5]), node(entry[6]), entry[7])
872 return indexformatv0_pack(*e2)
873 return indexformatv0_pack(*e2)
873
874
874 # index ng:
875 # index ng:
875 # 6 bytes: offset
876 # 6 bytes: offset
876 # 2 bytes: flags
877 # 2 bytes: flags
877 # 4 bytes: compressed length
878 # 4 bytes: compressed length
878 # 4 bytes: uncompressed length
879 # 4 bytes: uncompressed length
879 # 4 bytes: base rev
880 # 4 bytes: base rev
880 # 4 bytes: link rev
881 # 4 bytes: link rev
881 # 4 bytes: parent 1 rev
882 # 4 bytes: parent 1 rev
882 # 4 bytes: parent 2 rev
883 # 4 bytes: parent 2 rev
883 # 32 bytes: nodeid
884 # 32 bytes: nodeid
884 indexformatng = struct.Struct(">Qiiiiii20s12x")
885 indexformatng = struct.Struct(">Qiiiiii20s12x")
885 indexformatng_pack = indexformatng.pack
886 indexformatng_pack = indexformatng.pack
886 versionformat = struct.Struct(">I")
887 versionformat = struct.Struct(">I")
887 versionformat_pack = versionformat.pack
888 versionformat_pack = versionformat.pack
888 versionformat_unpack = versionformat.unpack
889 versionformat_unpack = versionformat.unpack
889
890
890 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
891 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
891 # signed integer)
892 # signed integer)
892 _maxentrysize = 0x7fffffff
893 _maxentrysize = 0x7fffffff
893
894
894 class revlogio(object):
895 class revlogio(object):
895 def __init__(self):
896 def __init__(self):
896 self.size = indexformatng.size
897 self.size = indexformatng.size
897
898
898 def parseindex(self, data, inline):
899 def parseindex(self, data, inline):
899 # call the C implementation to parse the index data
900 # call the C implementation to parse the index data
900 index, cache = parsers.parse_index2(data, inline)
901 index, cache = parsers.parse_index2(data, inline)
901 return index, getattr(index, 'nodemap', None), cache
902 return index, getattr(index, 'nodemap', None), cache
902
903
903 def packentry(self, entry, node, version, rev):
904 def packentry(self, entry, node, version, rev):
904 p = indexformatng_pack(*entry)
905 p = indexformatng_pack(*entry)
905 if rev == 0:
906 if rev == 0:
906 p = versionformat_pack(version) + p[4:]
907 p = versionformat_pack(version) + p[4:]
907 return p
908 return p
908
909
909 class revlog(object):
910 class revlog(object):
910 """
911 """
911 the underlying revision storage object
912 the underlying revision storage object
912
913
913 A revlog consists of two parts, an index and the revision data.
914 A revlog consists of two parts, an index and the revision data.
914
915
915 The index is a file with a fixed record size containing
916 The index is a file with a fixed record size containing
916 information on each revision, including its nodeid (hash), the
917 information on each revision, including its nodeid (hash), the
917 nodeids of its parents, the position and offset of its data within
918 nodeids of its parents, the position and offset of its data within
918 the data file, and the revision it's based on. Finally, each entry
919 the data file, and the revision it's based on. Finally, each entry
919 contains a linkrev entry that can serve as a pointer to external
920 contains a linkrev entry that can serve as a pointer to external
920 data.
921 data.
921
922
922 The revision data itself is a linear collection of data chunks.
923 The revision data itself is a linear collection of data chunks.
923 Each chunk represents a revision and is usually represented as a
924 Each chunk represents a revision and is usually represented as a
924 delta against the previous chunk. To bound lookup time, runs of
925 delta against the previous chunk. To bound lookup time, runs of
925 deltas are limited to about 2 times the length of the original
926 deltas are limited to about 2 times the length of the original
926 version data. This makes retrieval of a version proportional to
927 version data. This makes retrieval of a version proportional to
927 its size, or O(1) relative to the number of revisions.
928 its size, or O(1) relative to the number of revisions.
928
929
929 Both pieces of the revlog are written to in an append-only
930 Both pieces of the revlog are written to in an append-only
930 fashion, which means we never need to rewrite a file to insert or
931 fashion, which means we never need to rewrite a file to insert or
931 remove data, and can use some simple techniques to avoid the need
932 remove data, and can use some simple techniques to avoid the need
932 for locking while reading.
933 for locking while reading.
933
934
934 If checkambig, indexfile is opened with checkambig=True at
935 If checkambig, indexfile is opened with checkambig=True at
935 writing, to avoid file stat ambiguity.
936 writing, to avoid file stat ambiguity.
936
937
937 If mmaplargeindex is True, and an mmapindexthreshold is set, the
938 If mmaplargeindex is True, and an mmapindexthreshold is set, the
938 index will be mmapped rather than read if it is larger than the
939 index will be mmapped rather than read if it is larger than the
939 configured threshold.
940 configured threshold.
940
941
941 If censorable is True, the revlog can have censored revisions.
942 If censorable is True, the revlog can have censored revisions.
942 """
943 """
943 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
944 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
944 mmaplargeindex=False, censorable=False):
945 mmaplargeindex=False, censorable=False):
945 """
946 """
946 create a revlog object
947 create a revlog object
947
948
948 opener is a function that abstracts the file opening operation
949 opener is a function that abstracts the file opening operation
949 and can be used to implement COW semantics or the like.
950 and can be used to implement COW semantics or the like.
950 """
951 """
951 self.indexfile = indexfile
952 self.indexfile = indexfile
952 self.datafile = datafile or (indexfile[:-2] + ".d")
953 self.datafile = datafile or (indexfile[:-2] + ".d")
953 self.opener = opener
954 self.opener = opener
954 # When True, indexfile is opened with checkambig=True at writing, to
955 # When True, indexfile is opened with checkambig=True at writing, to
955 # avoid file stat ambiguity.
956 # avoid file stat ambiguity.
956 self._checkambig = checkambig
957 self._checkambig = checkambig
957 self._censorable = censorable
958 self._censorable = censorable
958 # 3-tuple of (node, rev, text) for a raw revision.
959 # 3-tuple of (node, rev, text) for a raw revision.
959 self._cache = None
960 self._cache = None
960 # Maps rev to chain base rev.
961 # Maps rev to chain base rev.
961 self._chainbasecache = util.lrucachedict(100)
962 self._chainbasecache = util.lrucachedict(100)
962 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
963 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
963 self._chunkcache = (0, '')
964 self._chunkcache = (0, '')
964 # How much data to read and cache into the raw revlog data cache.
965 # How much data to read and cache into the raw revlog data cache.
965 self._chunkcachesize = 65536
966 self._chunkcachesize = 65536
966 self._maxchainlen = None
967 self._maxchainlen = None
967 self._deltabothparents = True
968 self._deltabothparents = True
968 self.index = []
969 self.index = []
969 # Mapping of partial identifiers to full nodes.
970 # Mapping of partial identifiers to full nodes.
970 self._pcache = {}
971 self._pcache = {}
971 # Mapping of revision integer to full node.
972 # Mapping of revision integer to full node.
972 self._nodecache = {nullid: nullrev}
973 self._nodecache = {nullid: nullrev}
973 self._nodepos = None
974 self._nodepos = None
974 self._compengine = 'zlib'
975 self._compengine = 'zlib'
975 self._maxdeltachainspan = -1
976 self._maxdeltachainspan = -1
976 self._withsparseread = False
977 self._withsparseread = False
977 self._sparserevlog = False
978 self._sparserevlog = False
978 self._srdensitythreshold = 0.50
979 self._srdensitythreshold = 0.50
979 self._srmingapsize = 262144
980 self._srmingapsize = 262144
980
981
981 mmapindexthreshold = None
982 mmapindexthreshold = None
982 v = REVLOG_DEFAULT_VERSION
983 v = REVLOG_DEFAULT_VERSION
983 opts = getattr(opener, 'options', None)
984 opts = getattr(opener, 'options', None)
984 if opts is not None:
985 if opts is not None:
985 if 'revlogv2' in opts:
986 if 'revlogv2' in opts:
986 # version 2 revlogs always use generaldelta.
987 # version 2 revlogs always use generaldelta.
987 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
988 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
988 elif 'revlogv1' in opts:
989 elif 'revlogv1' in opts:
989 if 'generaldelta' in opts:
990 if 'generaldelta' in opts:
990 v |= FLAG_GENERALDELTA
991 v |= FLAG_GENERALDELTA
991 else:
992 else:
992 v = 0
993 v = 0
993 if 'chunkcachesize' in opts:
994 if 'chunkcachesize' in opts:
994 self._chunkcachesize = opts['chunkcachesize']
995 self._chunkcachesize = opts['chunkcachesize']
995 if 'maxchainlen' in opts:
996 if 'maxchainlen' in opts:
996 self._maxchainlen = opts['maxchainlen']
997 self._maxchainlen = opts['maxchainlen']
997 if 'deltabothparents' in opts:
998 if 'deltabothparents' in opts:
998 self._deltabothparents = opts['deltabothparents']
999 self._deltabothparents = opts['deltabothparents']
999 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1000 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1000 if 'compengine' in opts:
1001 if 'compengine' in opts:
1001 self._compengine = opts['compengine']
1002 self._compengine = opts['compengine']
1002 if 'maxdeltachainspan' in opts:
1003 if 'maxdeltachainspan' in opts:
1003 self._maxdeltachainspan = opts['maxdeltachainspan']
1004 self._maxdeltachainspan = opts['maxdeltachainspan']
1004 if mmaplargeindex and 'mmapindexthreshold' in opts:
1005 if mmaplargeindex and 'mmapindexthreshold' in opts:
1005 mmapindexthreshold = opts['mmapindexthreshold']
1006 mmapindexthreshold = opts['mmapindexthreshold']
1006 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1007 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1007 withsparseread = bool(opts.get('with-sparse-read', False))
1008 withsparseread = bool(opts.get('with-sparse-read', False))
1008 # sparse-revlog forces sparse-read
1009 # sparse-revlog forces sparse-read
1009 self._withsparseread = self._sparserevlog or withsparseread
1010 self._withsparseread = self._sparserevlog or withsparseread
1010 if 'sparse-read-density-threshold' in opts:
1011 if 'sparse-read-density-threshold' in opts:
1011 self._srdensitythreshold = opts['sparse-read-density-threshold']
1012 self._srdensitythreshold = opts['sparse-read-density-threshold']
1012 if 'sparse-read-min-gap-size' in opts:
1013 if 'sparse-read-min-gap-size' in opts:
1013 self._srmingapsize = opts['sparse-read-min-gap-size']
1014 self._srmingapsize = opts['sparse-read-min-gap-size']
1014
1015
1015 if self._chunkcachesize <= 0:
1016 if self._chunkcachesize <= 0:
1016 raise RevlogError(_('revlog chunk cache size %r is not greater '
1017 raise RevlogError(_('revlog chunk cache size %r is not greater '
1017 'than 0') % self._chunkcachesize)
1018 'than 0') % self._chunkcachesize)
1018 elif self._chunkcachesize & (self._chunkcachesize - 1):
1019 elif self._chunkcachesize & (self._chunkcachesize - 1):
1019 raise RevlogError(_('revlog chunk cache size %r is not a power '
1020 raise RevlogError(_('revlog chunk cache size %r is not a power '
1020 'of 2') % self._chunkcachesize)
1021 'of 2') % self._chunkcachesize)
1021
1022
1022 indexdata = ''
1023 indexdata = ''
1023 self._initempty = True
1024 self._initempty = True
1024 try:
1025 try:
1025 with self._indexfp() as f:
1026 with self._indexfp() as f:
1026 if (mmapindexthreshold is not None and
1027 if (mmapindexthreshold is not None and
1027 self.opener.fstat(f).st_size >= mmapindexthreshold):
1028 self.opener.fstat(f).st_size >= mmapindexthreshold):
1028 indexdata = util.buffer(util.mmapread(f))
1029 indexdata = util.buffer(util.mmapread(f))
1029 else:
1030 else:
1030 indexdata = f.read()
1031 indexdata = f.read()
1031 if len(indexdata) > 0:
1032 if len(indexdata) > 0:
1032 v = versionformat_unpack(indexdata[:4])[0]
1033 v = versionformat_unpack(indexdata[:4])[0]
1033 self._initempty = False
1034 self._initempty = False
1034 except IOError as inst:
1035 except IOError as inst:
1035 if inst.errno != errno.ENOENT:
1036 if inst.errno != errno.ENOENT:
1036 raise
1037 raise
1037
1038
1038 self.version = v
1039 self.version = v
1039 self._inline = v & FLAG_INLINE_DATA
1040 self._inline = v & FLAG_INLINE_DATA
1040 self._generaldelta = v & FLAG_GENERALDELTA
1041 self._generaldelta = v & FLAG_GENERALDELTA
1041 flags = v & ~0xFFFF
1042 flags = v & ~0xFFFF
1042 fmt = v & 0xFFFF
1043 fmt = v & 0xFFFF
1043 if fmt == REVLOGV0:
1044 if fmt == REVLOGV0:
1044 if flags:
1045 if flags:
1045 raise RevlogError(_('unknown flags (%#04x) in version %d '
1046 raise RevlogError(_('unknown flags (%#04x) in version %d '
1046 'revlog %s') %
1047 'revlog %s') %
1047 (flags >> 16, fmt, self.indexfile))
1048 (flags >> 16, fmt, self.indexfile))
1048 elif fmt == REVLOGV1:
1049 elif fmt == REVLOGV1:
1049 if flags & ~REVLOGV1_FLAGS:
1050 if flags & ~REVLOGV1_FLAGS:
1050 raise RevlogError(_('unknown flags (%#04x) in version %d '
1051 raise RevlogError(_('unknown flags (%#04x) in version %d '
1051 'revlog %s') %
1052 'revlog %s') %
1052 (flags >> 16, fmt, self.indexfile))
1053 (flags >> 16, fmt, self.indexfile))
1053 elif fmt == REVLOGV2:
1054 elif fmt == REVLOGV2:
1054 if flags & ~REVLOGV2_FLAGS:
1055 if flags & ~REVLOGV2_FLAGS:
1055 raise RevlogError(_('unknown flags (%#04x) in version %d '
1056 raise RevlogError(_('unknown flags (%#04x) in version %d '
1056 'revlog %s') %
1057 'revlog %s') %
1057 (flags >> 16, fmt, self.indexfile))
1058 (flags >> 16, fmt, self.indexfile))
1058 else:
1059 else:
1059 raise RevlogError(_('unknown version (%d) in revlog %s') %
1060 raise RevlogError(_('unknown version (%d) in revlog %s') %
1060 (fmt, self.indexfile))
1061 (fmt, self.indexfile))
1061
1062
1062 self.storedeltachains = True
1063 self.storedeltachains = True
1063
1064
1064 self._io = revlogio()
1065 self._io = revlogio()
1065 if self.version == REVLOGV0:
1066 if self.version == REVLOGV0:
1066 self._io = revlogoldio()
1067 self._io = revlogoldio()
1067 try:
1068 try:
1068 d = self._io.parseindex(indexdata, self._inline)
1069 d = self._io.parseindex(indexdata, self._inline)
1069 except (ValueError, IndexError):
1070 except (ValueError, IndexError):
1070 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1071 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1071 self.index, nodemap, self._chunkcache = d
1072 self.index, nodemap, self._chunkcache = d
1072 if nodemap is not None:
1073 if nodemap is not None:
1073 self.nodemap = self._nodecache = nodemap
1074 self.nodemap = self._nodecache = nodemap
1074 if not self._chunkcache:
1075 if not self._chunkcache:
1075 self._chunkclear()
1076 self._chunkclear()
1076 # revnum -> (chain-length, sum-delta-length)
1077 # revnum -> (chain-length, sum-delta-length)
1077 self._chaininfocache = {}
1078 self._chaininfocache = {}
1078 # revlog header -> revlog compressor
1079 # revlog header -> revlog compressor
1079 self._decompressors = {}
1080 self._decompressors = {}
1080
1081
1081 @util.propertycache
1082 @util.propertycache
1082 def _compressor(self):
1083 def _compressor(self):
1083 return util.compengines[self._compengine].revlogcompressor()
1084 return util.compengines[self._compengine].revlogcompressor()
1084
1085
1085 def _indexfp(self, mode='r'):
1086 def _indexfp(self, mode='r'):
1086 """file object for the revlog's index file"""
1087 """file object for the revlog's index file"""
1087 args = {r'mode': mode}
1088 args = {r'mode': mode}
1088 if mode != 'r':
1089 if mode != 'r':
1089 args[r'checkambig'] = self._checkambig
1090 args[r'checkambig'] = self._checkambig
1090 if mode == 'w':
1091 if mode == 'w':
1091 args[r'atomictemp'] = True
1092 args[r'atomictemp'] = True
1092 return self.opener(self.indexfile, **args)
1093 return self.opener(self.indexfile, **args)
1093
1094
1094 def _datafp(self, mode='r'):
1095 def _datafp(self, mode='r'):
1095 """file object for the revlog's data file"""
1096 """file object for the revlog's data file"""
1096 return self.opener(self.datafile, mode=mode)
1097 return self.opener(self.datafile, mode=mode)
1097
1098
1098 @contextlib.contextmanager
1099 @contextlib.contextmanager
1099 def _datareadfp(self, existingfp=None):
1100 def _datareadfp(self, existingfp=None):
1100 """file object suitable to read data"""
1101 """file object suitable to read data"""
1101 if existingfp is not None:
1102 if existingfp is not None:
1102 yield existingfp
1103 yield existingfp
1103 else:
1104 else:
1104 if self._inline:
1105 if self._inline:
1105 func = self._indexfp
1106 func = self._indexfp
1106 else:
1107 else:
1107 func = self._datafp
1108 func = self._datafp
1108 with func() as fp:
1109 with func() as fp:
1109 yield fp
1110 yield fp
1110
1111
1111 def tip(self):
1112 def tip(self):
1112 return self.node(len(self.index) - 1)
1113 return self.node(len(self.index) - 1)
1113 def __contains__(self, rev):
1114 def __contains__(self, rev):
1114 return 0 <= rev < len(self)
1115 return 0 <= rev < len(self)
1115 def __len__(self):
1116 def __len__(self):
1116 return len(self.index)
1117 return len(self.index)
1117 def __iter__(self):
1118 def __iter__(self):
1118 return iter(pycompat.xrange(len(self)))
1119 return iter(pycompat.xrange(len(self)))
1119 def revs(self, start=0, stop=None):
1120 def revs(self, start=0, stop=None):
1120 """iterate over all rev in this revlog (from start to stop)"""
1121 """iterate over all rev in this revlog (from start to stop)"""
1121 step = 1
1122 step = 1
1122 length = len(self)
1123 length = len(self)
1123 if stop is not None:
1124 if stop is not None:
1124 if start > stop:
1125 if start > stop:
1125 step = -1
1126 step = -1
1126 stop += step
1127 stop += step
1127 if stop > length:
1128 if stop > length:
1128 stop = length
1129 stop = length
1129 else:
1130 else:
1130 stop = length
1131 stop = length
1131 return pycompat.xrange(start, stop, step)
1132 return pycompat.xrange(start, stop, step)
1132
1133
1133 @util.propertycache
1134 @util.propertycache
1134 def nodemap(self):
1135 def nodemap(self):
1135 if self.index:
1136 if self.index:
1136 # populate mapping down to the initial node
1137 # populate mapping down to the initial node
1137 node0 = self.index[0][7] # get around changelog filtering
1138 node0 = self.index[0][7] # get around changelog filtering
1138 self.rev(node0)
1139 self.rev(node0)
1139 return self._nodecache
1140 return self._nodecache
1140
1141
1141 def hasnode(self, node):
1142 def hasnode(self, node):
1142 try:
1143 try:
1143 self.rev(node)
1144 self.rev(node)
1144 return True
1145 return True
1145 except KeyError:
1146 except KeyError:
1146 return False
1147 return False
1147
1148
1148 def candelta(self, baserev, rev):
1149 def candelta(self, baserev, rev):
1149 """whether two revisions (baserev, rev) can be delta-ed or not"""
1150 """whether two revisions (baserev, rev) can be delta-ed or not"""
1150 # Disable delta if either rev requires a content-changing flag
1151 # Disable delta if either rev requires a content-changing flag
1151 # processor (ex. LFS). This is because such flag processor can alter
1152 # processor (ex. LFS). This is because such flag processor can alter
1152 # the rawtext content that the delta will be based on, and two clients
1153 # the rawtext content that the delta will be based on, and two clients
1153 # could have a same revlog node with different flags (i.e. different
1154 # could have a same revlog node with different flags (i.e. different
1154 # rawtext contents) and the delta could be incompatible.
1155 # rawtext contents) and the delta could be incompatible.
1155 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1156 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1156 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1157 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1157 return False
1158 return False
1158 return True
1159 return True
1159
1160
1160 def clearcaches(self):
1161 def clearcaches(self):
1161 self._cache = None
1162 self._cache = None
1162 self._chainbasecache.clear()
1163 self._chainbasecache.clear()
1163 self._chunkcache = (0, '')
1164 self._chunkcache = (0, '')
1164 self._pcache = {}
1165 self._pcache = {}
1165
1166
1166 try:
1167 try:
1167 self._nodecache.clearcaches()
1168 self._nodecache.clearcaches()
1168 except AttributeError:
1169 except AttributeError:
1169 self._nodecache = {nullid: nullrev}
1170 self._nodecache = {nullid: nullrev}
1170 self._nodepos = None
1171 self._nodepos = None
1171
1172
1172 def rev(self, node):
1173 def rev(self, node):
1173 try:
1174 try:
1174 return self._nodecache[node]
1175 return self._nodecache[node]
1175 except TypeError:
1176 except TypeError:
1176 raise
1177 raise
1177 except RevlogError:
1178 except RevlogError:
1178 # parsers.c radix tree lookup failed
1179 # parsers.c radix tree lookup failed
1179 if node == wdirid or node in wdirfilenodeids:
1180 if node == wdirid or node in wdirfilenodeids:
1180 raise error.WdirUnsupported
1181 raise error.WdirUnsupported
1181 raise LookupError(node, self.indexfile, _('no node'))
1182 raise LookupError(node, self.indexfile, _('no node'))
1182 except KeyError:
1183 except KeyError:
1183 # pure python cache lookup failed
1184 # pure python cache lookup failed
1184 n = self._nodecache
1185 n = self._nodecache
1185 i = self.index
1186 i = self.index
1186 p = self._nodepos
1187 p = self._nodepos
1187 if p is None:
1188 if p is None:
1188 p = len(i) - 1
1189 p = len(i) - 1
1189 else:
1190 else:
1190 assert p < len(i)
1191 assert p < len(i)
1191 for r in pycompat.xrange(p, -1, -1):
1192 for r in pycompat.xrange(p, -1, -1):
1192 v = i[r][7]
1193 v = i[r][7]
1193 n[v] = r
1194 n[v] = r
1194 if v == node:
1195 if v == node:
1195 self._nodepos = r - 1
1196 self._nodepos = r - 1
1196 return r
1197 return r
1197 if node == wdirid or node in wdirfilenodeids:
1198 if node == wdirid or node in wdirfilenodeids:
1198 raise error.WdirUnsupported
1199 raise error.WdirUnsupported
1199 raise LookupError(node, self.indexfile, _('no node'))
1200 raise LookupError(node, self.indexfile, _('no node'))
1200
1201
1201 # Accessors for index entries.
1202 # Accessors for index entries.
1202
1203
1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 # are flags.
1205 # are flags.
1205 def start(self, rev):
1206 def start(self, rev):
1206 return int(self.index[rev][0] >> 16)
1207 return int(self.index[rev][0] >> 16)
1207
1208
1208 def flags(self, rev):
1209 def flags(self, rev):
1209 return self.index[rev][0] & 0xFFFF
1210 return self.index[rev][0] & 0xFFFF
1210
1211
1211 def length(self, rev):
1212 def length(self, rev):
1212 return self.index[rev][1]
1213 return self.index[rev][1]
1213
1214
1214 def rawsize(self, rev):
1215 def rawsize(self, rev):
1215 """return the length of the uncompressed text for a given revision"""
1216 """return the length of the uncompressed text for a given revision"""
1216 l = self.index[rev][2]
1217 l = self.index[rev][2]
1217 if l >= 0:
1218 if l >= 0:
1218 return l
1219 return l
1219
1220
1220 t = self.revision(rev, raw=True)
1221 t = self.revision(rev, raw=True)
1221 return len(t)
1222 return len(t)
1222
1223
1223 def size(self, rev):
1224 def size(self, rev):
1224 """length of non-raw text (processed by a "read" flag processor)"""
1225 """length of non-raw text (processed by a "read" flag processor)"""
1225 # fast path: if no "read" flag processor could change the content,
1226 # fast path: if no "read" flag processor could change the content,
1226 # size is rawsize. note: ELLIPSIS is known to not change the content.
1227 # size is rawsize. note: ELLIPSIS is known to not change the content.
1227 flags = self.flags(rev)
1228 flags = self.flags(rev)
1228 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1229 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1229 return self.rawsize(rev)
1230 return self.rawsize(rev)
1230
1231
1231 return len(self.revision(rev, raw=False))
1232 return len(self.revision(rev, raw=False))
1232
1233
1233 def chainbase(self, rev):
1234 def chainbase(self, rev):
1234 base = self._chainbasecache.get(rev)
1235 base = self._chainbasecache.get(rev)
1235 if base is not None:
1236 if base is not None:
1236 return base
1237 return base
1237
1238
1238 index = self.index
1239 index = self.index
1239 iterrev = rev
1240 iterrev = rev
1240 base = index[iterrev][3]
1241 base = index[iterrev][3]
1241 while base != iterrev:
1242 while base != iterrev:
1242 iterrev = base
1243 iterrev = base
1243 base = index[iterrev][3]
1244 base = index[iterrev][3]
1244
1245
1245 self._chainbasecache[rev] = base
1246 self._chainbasecache[rev] = base
1246 return base
1247 return base
1247
1248
1248 def linkrev(self, rev):
1249 def linkrev(self, rev):
1249 return self.index[rev][4]
1250 return self.index[rev][4]
1250
1251
1251 def parentrevs(self, rev):
1252 def parentrevs(self, rev):
1252 try:
1253 try:
1253 entry = self.index[rev]
1254 entry = self.index[rev]
1254 except IndexError:
1255 except IndexError:
1255 if rev == wdirrev:
1256 if rev == wdirrev:
1256 raise error.WdirUnsupported
1257 raise error.WdirUnsupported
1257 raise
1258 raise
1258
1259
1259 return entry[5], entry[6]
1260 return entry[5], entry[6]
1260
1261
1261 def node(self, rev):
1262 def node(self, rev):
1262 try:
1263 try:
1263 return self.index[rev][7]
1264 return self.index[rev][7]
1264 except IndexError:
1265 except IndexError:
1265 if rev == wdirrev:
1266 if rev == wdirrev:
1266 raise error.WdirUnsupported
1267 raise error.WdirUnsupported
1267 raise
1268 raise
1268
1269
1269 # Derived from index values.
1270 # Derived from index values.
1270
1271
1271 def end(self, rev):
1272 def end(self, rev):
1272 return self.start(rev) + self.length(rev)
1273 return self.start(rev) + self.length(rev)
1273
1274
1274 def parents(self, node):
1275 def parents(self, node):
1275 i = self.index
1276 i = self.index
1276 d = i[self.rev(node)]
1277 d = i[self.rev(node)]
1277 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1278 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1278
1279
1279 def chainlen(self, rev):
1280 def chainlen(self, rev):
1280 return self._chaininfo(rev)[0]
1281 return self._chaininfo(rev)[0]
1281
1282
1282 def _chaininfo(self, rev):
1283 def _chaininfo(self, rev):
1283 chaininfocache = self._chaininfocache
1284 chaininfocache = self._chaininfocache
1284 if rev in chaininfocache:
1285 if rev in chaininfocache:
1285 return chaininfocache[rev]
1286 return chaininfocache[rev]
1286 index = self.index
1287 index = self.index
1287 generaldelta = self._generaldelta
1288 generaldelta = self._generaldelta
1288 iterrev = rev
1289 iterrev = rev
1289 e = index[iterrev]
1290 e = index[iterrev]
1290 clen = 0
1291 clen = 0
1291 compresseddeltalen = 0
1292 compresseddeltalen = 0
1292 while iterrev != e[3]:
1293 while iterrev != e[3]:
1293 clen += 1
1294 clen += 1
1294 compresseddeltalen += e[1]
1295 compresseddeltalen += e[1]
1295 if generaldelta:
1296 if generaldelta:
1296 iterrev = e[3]
1297 iterrev = e[3]
1297 else:
1298 else:
1298 iterrev -= 1
1299 iterrev -= 1
1299 if iterrev in chaininfocache:
1300 if iterrev in chaininfocache:
1300 t = chaininfocache[iterrev]
1301 t = chaininfocache[iterrev]
1301 clen += t[0]
1302 clen += t[0]
1302 compresseddeltalen += t[1]
1303 compresseddeltalen += t[1]
1303 break
1304 break
1304 e = index[iterrev]
1305 e = index[iterrev]
1305 else:
1306 else:
1306 # Add text length of base since decompressing that also takes
1307 # Add text length of base since decompressing that also takes
1307 # work. For cache hits the length is already included.
1308 # work. For cache hits the length is already included.
1308 compresseddeltalen += e[1]
1309 compresseddeltalen += e[1]
1309 r = (clen, compresseddeltalen)
1310 r = (clen, compresseddeltalen)
1310 chaininfocache[rev] = r
1311 chaininfocache[rev] = r
1311 return r
1312 return r
1312
1313
1313 def _deltachain(self, rev, stoprev=None):
1314 def _deltachain(self, rev, stoprev=None):
1314 """Obtain the delta chain for a revision.
1315 """Obtain the delta chain for a revision.
1315
1316
1316 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 stop at the base of the chain.
1318 stop at the base of the chain.
1318
1319
1319 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 revs in ascending order and ``stopped`` is a bool indicating whether
1321 revs in ascending order and ``stopped`` is a bool indicating whether
1321 ``stoprev`` was hit.
1322 ``stoprev`` was hit.
1322 """
1323 """
1323 # Try C implementation.
1324 # Try C implementation.
1324 try:
1325 try:
1325 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 except AttributeError:
1327 except AttributeError:
1327 pass
1328 pass
1328
1329
1329 chain = []
1330 chain = []
1330
1331
1331 # Alias to prevent attribute lookup in tight loop.
1332 # Alias to prevent attribute lookup in tight loop.
1332 index = self.index
1333 index = self.index
1333 generaldelta = self._generaldelta
1334 generaldelta = self._generaldelta
1334
1335
1335 iterrev = rev
1336 iterrev = rev
1336 e = index[iterrev]
1337 e = index[iterrev]
1337 while iterrev != e[3] and iterrev != stoprev:
1338 while iterrev != e[3] and iterrev != stoprev:
1338 chain.append(iterrev)
1339 chain.append(iterrev)
1339 if generaldelta:
1340 if generaldelta:
1340 iterrev = e[3]
1341 iterrev = e[3]
1341 else:
1342 else:
1342 iterrev -= 1
1343 iterrev -= 1
1343 e = index[iterrev]
1344 e = index[iterrev]
1344
1345
1345 if iterrev == stoprev:
1346 if iterrev == stoprev:
1346 stopped = True
1347 stopped = True
1347 else:
1348 else:
1348 chain.append(iterrev)
1349 chain.append(iterrev)
1349 stopped = False
1350 stopped = False
1350
1351
1351 chain.reverse()
1352 chain.reverse()
1352 return chain, stopped
1353 return chain, stopped
1353
1354
1354 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 """Generate the ancestors of 'revs' in reverse topological order.
1356 """Generate the ancestors of 'revs' in reverse topological order.
1356 Does not generate revs lower than stoprev.
1357 Does not generate revs lower than stoprev.
1357
1358
1358 See the documentation for ancestor.lazyancestors for more details."""
1359 See the documentation for ancestor.lazyancestors for more details."""
1359
1360
1360 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1361 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1361 inclusive=inclusive)
1362 inclusive=inclusive)
1362
1363
1363 def descendants(self, revs):
1364 def descendants(self, revs):
1364 """Generate the descendants of 'revs' in revision order.
1365 """Generate the descendants of 'revs' in revision order.
1365
1366
1366 Yield a sequence of revision numbers starting with a child of
1367 Yield a sequence of revision numbers starting with a child of
1367 some rev in revs, i.e., each revision is *not* considered a
1368 some rev in revs, i.e., each revision is *not* considered a
1368 descendant of itself. Results are ordered by revision number (a
1369 descendant of itself. Results are ordered by revision number (a
1369 topological sort)."""
1370 topological sort)."""
1370 first = min(revs)
1371 first = min(revs)
1371 if first == nullrev:
1372 if first == nullrev:
1372 for i in self:
1373 for i in self:
1373 yield i
1374 yield i
1374 return
1375 return
1375
1376
1376 seen = set(revs)
1377 seen = set(revs)
1377 for i in self.revs(start=first + 1):
1378 for i in self.revs(start=first + 1):
1378 for x in self.parentrevs(i):
1379 for x in self.parentrevs(i):
1379 if x != nullrev and x in seen:
1380 if x != nullrev and x in seen:
1380 seen.add(i)
1381 seen.add(i)
1381 yield i
1382 yield i
1382 break
1383 break
1383
1384
1384 def findcommonmissing(self, common=None, heads=None):
1385 def findcommonmissing(self, common=None, heads=None):
1385 """Return a tuple of the ancestors of common and the ancestors of heads
1386 """Return a tuple of the ancestors of common and the ancestors of heads
1386 that are not ancestors of common. In revset terminology, we return the
1387 that are not ancestors of common. In revset terminology, we return the
1387 tuple:
1388 tuple:
1388
1389
1389 ::common, (::heads) - (::common)
1390 ::common, (::heads) - (::common)
1390
1391
1391 The list is sorted by revision number, meaning it is
1392 The list is sorted by revision number, meaning it is
1392 topologically sorted.
1393 topologically sorted.
1393
1394
1394 'heads' and 'common' are both lists of node IDs. If heads is
1395 'heads' and 'common' are both lists of node IDs. If heads is
1395 not supplied, uses all of the revlog's heads. If common is not
1396 not supplied, uses all of the revlog's heads. If common is not
1396 supplied, uses nullid."""
1397 supplied, uses nullid."""
1397 if common is None:
1398 if common is None:
1398 common = [nullid]
1399 common = [nullid]
1399 if heads is None:
1400 if heads is None:
1400 heads = self.heads()
1401 heads = self.heads()
1401
1402
1402 common = [self.rev(n) for n in common]
1403 common = [self.rev(n) for n in common]
1403 heads = [self.rev(n) for n in heads]
1404 heads = [self.rev(n) for n in heads]
1404
1405
1405 # we want the ancestors, but inclusive
1406 # we want the ancestors, but inclusive
1406 class lazyset(object):
1407 class lazyset(object):
1407 def __init__(self, lazyvalues):
1408 def __init__(self, lazyvalues):
1408 self.addedvalues = set()
1409 self.addedvalues = set()
1409 self.lazyvalues = lazyvalues
1410 self.lazyvalues = lazyvalues
1410
1411
1411 def __contains__(self, value):
1412 def __contains__(self, value):
1412 return value in self.addedvalues or value in self.lazyvalues
1413 return value in self.addedvalues or value in self.lazyvalues
1413
1414
1414 def __iter__(self):
1415 def __iter__(self):
1415 added = self.addedvalues
1416 added = self.addedvalues
1416 for r in added:
1417 for r in added:
1417 yield r
1418 yield r
1418 for r in self.lazyvalues:
1419 for r in self.lazyvalues:
1419 if not r in added:
1420 if not r in added:
1420 yield r
1421 yield r
1421
1422
1422 def add(self, value):
1423 def add(self, value):
1423 self.addedvalues.add(value)
1424 self.addedvalues.add(value)
1424
1425
1425 def update(self, values):
1426 def update(self, values):
1426 self.addedvalues.update(values)
1427 self.addedvalues.update(values)
1427
1428
1428 has = lazyset(self.ancestors(common))
1429 has = lazyset(self.ancestors(common))
1429 has.add(nullrev)
1430 has.add(nullrev)
1430 has.update(common)
1431 has.update(common)
1431
1432
1432 # take all ancestors from heads that aren't in has
1433 # take all ancestors from heads that aren't in has
1433 missing = set()
1434 missing = set()
1434 visit = collections.deque(r for r in heads if r not in has)
1435 visit = collections.deque(r for r in heads if r not in has)
1435 while visit:
1436 while visit:
1436 r = visit.popleft()
1437 r = visit.popleft()
1437 if r in missing:
1438 if r in missing:
1438 continue
1439 continue
1439 else:
1440 else:
1440 missing.add(r)
1441 missing.add(r)
1441 for p in self.parentrevs(r):
1442 for p in self.parentrevs(r):
1442 if p not in has:
1443 if p not in has:
1443 visit.append(p)
1444 visit.append(p)
1444 missing = list(missing)
1445 missing = list(missing)
1445 missing.sort()
1446 missing.sort()
1446 return has, [self.node(miss) for miss in missing]
1447 return has, [self.node(miss) for miss in missing]
1447
1448
1448 def incrementalmissingrevs(self, common=None):
1449 def incrementalmissingrevs(self, common=None):
1449 """Return an object that can be used to incrementally compute the
1450 """Return an object that can be used to incrementally compute the
1450 revision numbers of the ancestors of arbitrary sets that are not
1451 revision numbers of the ancestors of arbitrary sets that are not
1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 object.
1453 object.
1453
1454
1454 'common' is a list of revision numbers. If common is not supplied, uses
1455 'common' is a list of revision numbers. If common is not supplied, uses
1455 nullrev.
1456 nullrev.
1456 """
1457 """
1457 if common is None:
1458 if common is None:
1458 common = [nullrev]
1459 common = [nullrev]
1459
1460
1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1461 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1461
1462
1462 def findmissingrevs(self, common=None, heads=None):
1463 def findmissingrevs(self, common=None, heads=None):
1463 """Return the revision numbers of the ancestors of heads that
1464 """Return the revision numbers of the ancestors of heads that
1464 are not ancestors of common.
1465 are not ancestors of common.
1465
1466
1466 More specifically, return a list of revision numbers corresponding to
1467 More specifically, return a list of revision numbers corresponding to
1467 nodes N such that every N satisfies the following constraints:
1468 nodes N such that every N satisfies the following constraints:
1468
1469
1469 1. N is an ancestor of some node in 'heads'
1470 1. N is an ancestor of some node in 'heads'
1470 2. N is not an ancestor of any node in 'common'
1471 2. N is not an ancestor of any node in 'common'
1471
1472
1472 The list is sorted by revision number, meaning it is
1473 The list is sorted by revision number, meaning it is
1473 topologically sorted.
1474 topologically sorted.
1474
1475
1475 'heads' and 'common' are both lists of revision numbers. If heads is
1476 'heads' and 'common' are both lists of revision numbers. If heads is
1476 not supplied, uses all of the revlog's heads. If common is not
1477 not supplied, uses all of the revlog's heads. If common is not
1477 supplied, uses nullid."""
1478 supplied, uses nullid."""
1478 if common is None:
1479 if common is None:
1479 common = [nullrev]
1480 common = [nullrev]
1480 if heads is None:
1481 if heads is None:
1481 heads = self.headrevs()
1482 heads = self.headrevs()
1482
1483
1483 inc = self.incrementalmissingrevs(common=common)
1484 inc = self.incrementalmissingrevs(common=common)
1484 return inc.missingancestors(heads)
1485 return inc.missingancestors(heads)
1485
1486
1486 def findmissing(self, common=None, heads=None):
1487 def findmissing(self, common=None, heads=None):
1487 """Return the ancestors of heads that are not ancestors of common.
1488 """Return the ancestors of heads that are not ancestors of common.
1488
1489
1489 More specifically, return a list of nodes N such that every N
1490 More specifically, return a list of nodes N such that every N
1490 satisfies the following constraints:
1491 satisfies the following constraints:
1491
1492
1492 1. N is an ancestor of some node in 'heads'
1493 1. N is an ancestor of some node in 'heads'
1493 2. N is not an ancestor of any node in 'common'
1494 2. N is not an ancestor of any node in 'common'
1494
1495
1495 The list is sorted by revision number, meaning it is
1496 The list is sorted by revision number, meaning it is
1496 topologically sorted.
1497 topologically sorted.
1497
1498
1498 'heads' and 'common' are both lists of node IDs. If heads is
1499 'heads' and 'common' are both lists of node IDs. If heads is
1499 not supplied, uses all of the revlog's heads. If common is not
1500 not supplied, uses all of the revlog's heads. If common is not
1500 supplied, uses nullid."""
1501 supplied, uses nullid."""
1501 if common is None:
1502 if common is None:
1502 common = [nullid]
1503 common = [nullid]
1503 if heads is None:
1504 if heads is None:
1504 heads = self.heads()
1505 heads = self.heads()
1505
1506
1506 common = [self.rev(n) for n in common]
1507 common = [self.rev(n) for n in common]
1507 heads = [self.rev(n) for n in heads]
1508 heads = [self.rev(n) for n in heads]
1508
1509
1509 inc = self.incrementalmissingrevs(common=common)
1510 inc = self.incrementalmissingrevs(common=common)
1510 return [self.node(r) for r in inc.missingancestors(heads)]
1511 return [self.node(r) for r in inc.missingancestors(heads)]
1511
1512
1512 def nodesbetween(self, roots=None, heads=None):
1513 def nodesbetween(self, roots=None, heads=None):
1513 """Return a topological path from 'roots' to 'heads'.
1514 """Return a topological path from 'roots' to 'heads'.
1514
1515
1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1516 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1516 topologically sorted list of all nodes N that satisfy both of
1517 topologically sorted list of all nodes N that satisfy both of
1517 these constraints:
1518 these constraints:
1518
1519
1519 1. N is a descendant of some node in 'roots'
1520 1. N is a descendant of some node in 'roots'
1520 2. N is an ancestor of some node in 'heads'
1521 2. N is an ancestor of some node in 'heads'
1521
1522
1522 Every node is considered to be both a descendant and an ancestor
1523 Every node is considered to be both a descendant and an ancestor
1523 of itself, so every reachable node in 'roots' and 'heads' will be
1524 of itself, so every reachable node in 'roots' and 'heads' will be
1524 included in 'nodes'.
1525 included in 'nodes'.
1525
1526
1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1527 'outroots' is the list of reachable nodes in 'roots', i.e., the
1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1528 subset of 'roots' that is returned in 'nodes'. Likewise,
1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1529 'outheads' is the subset of 'heads' that is also in 'nodes'.
1529
1530
1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1531 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1531 unspecified, uses nullid as the only root. If 'heads' is
1532 unspecified, uses nullid as the only root. If 'heads' is
1532 unspecified, uses list of all of the revlog's heads."""
1533 unspecified, uses list of all of the revlog's heads."""
1533 nonodes = ([], [], [])
1534 nonodes = ([], [], [])
1534 if roots is not None:
1535 if roots is not None:
1535 roots = list(roots)
1536 roots = list(roots)
1536 if not roots:
1537 if not roots:
1537 return nonodes
1538 return nonodes
1538 lowestrev = min([self.rev(n) for n in roots])
1539 lowestrev = min([self.rev(n) for n in roots])
1539 else:
1540 else:
1540 roots = [nullid] # Everybody's a descendant of nullid
1541 roots = [nullid] # Everybody's a descendant of nullid
1541 lowestrev = nullrev
1542 lowestrev = nullrev
1542 if (lowestrev == nullrev) and (heads is None):
1543 if (lowestrev == nullrev) and (heads is None):
1543 # We want _all_ the nodes!
1544 # We want _all_ the nodes!
1544 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1545 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1545 if heads is None:
1546 if heads is None:
1546 # All nodes are ancestors, so the latest ancestor is the last
1547 # All nodes are ancestors, so the latest ancestor is the last
1547 # node.
1548 # node.
1548 highestrev = len(self) - 1
1549 highestrev = len(self) - 1
1549 # Set ancestors to None to signal that every node is an ancestor.
1550 # Set ancestors to None to signal that every node is an ancestor.
1550 ancestors = None
1551 ancestors = None
1551 # Set heads to an empty dictionary for later discovery of heads
1552 # Set heads to an empty dictionary for later discovery of heads
1552 heads = {}
1553 heads = {}
1553 else:
1554 else:
1554 heads = list(heads)
1555 heads = list(heads)
1555 if not heads:
1556 if not heads:
1556 return nonodes
1557 return nonodes
1557 ancestors = set()
1558 ancestors = set()
1558 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 # Also, later we will be using it to filter out the heads we can't
1560 # Also, later we will be using it to filter out the heads we can't
1560 # find from roots.
1561 # find from roots.
1561 heads = dict.fromkeys(heads, False)
1562 heads = dict.fromkeys(heads, False)
1562 # Start at the top and keep marking parents until we're done.
1563 # Start at the top and keep marking parents until we're done.
1563 nodestotag = set(heads)
1564 nodestotag = set(heads)
1564 # Remember where the top was so we can use it as a limit later.
1565 # Remember where the top was so we can use it as a limit later.
1565 highestrev = max([self.rev(n) for n in nodestotag])
1566 highestrev = max([self.rev(n) for n in nodestotag])
1566 while nodestotag:
1567 while nodestotag:
1567 # grab a node to tag
1568 # grab a node to tag
1568 n = nodestotag.pop()
1569 n = nodestotag.pop()
1569 # Never tag nullid
1570 # Never tag nullid
1570 if n == nullid:
1571 if n == nullid:
1571 continue
1572 continue
1572 # A node's revision number represents its place in a
1573 # A node's revision number represents its place in a
1573 # topologically sorted list of nodes.
1574 # topologically sorted list of nodes.
1574 r = self.rev(n)
1575 r = self.rev(n)
1575 if r >= lowestrev:
1576 if r >= lowestrev:
1576 if n not in ancestors:
1577 if n not in ancestors:
1577 # If we are possibly a descendant of one of the roots
1578 # If we are possibly a descendant of one of the roots
1578 # and we haven't already been marked as an ancestor
1579 # and we haven't already been marked as an ancestor
1579 ancestors.add(n) # Mark as ancestor
1580 ancestors.add(n) # Mark as ancestor
1580 # Add non-nullid parents to list of nodes to tag.
1581 # Add non-nullid parents to list of nodes to tag.
1581 nodestotag.update([p for p in self.parents(n) if
1582 nodestotag.update([p for p in self.parents(n) if
1582 p != nullid])
1583 p != nullid])
1583 elif n in heads: # We've seen it before, is it a fake head?
1584 elif n in heads: # We've seen it before, is it a fake head?
1584 # So it is, real heads should not be the ancestors of
1585 # So it is, real heads should not be the ancestors of
1585 # any other heads.
1586 # any other heads.
1586 heads.pop(n)
1587 heads.pop(n)
1587 if not ancestors:
1588 if not ancestors:
1588 return nonodes
1589 return nonodes
1589 # Now that we have our set of ancestors, we want to remove any
1590 # Now that we have our set of ancestors, we want to remove any
1590 # roots that are not ancestors.
1591 # roots that are not ancestors.
1591
1592
1592 # If one of the roots was nullid, everything is included anyway.
1593 # If one of the roots was nullid, everything is included anyway.
1593 if lowestrev > nullrev:
1594 if lowestrev > nullrev:
1594 # But, since we weren't, let's recompute the lowest rev to not
1595 # But, since we weren't, let's recompute the lowest rev to not
1595 # include roots that aren't ancestors.
1596 # include roots that aren't ancestors.
1596
1597
1597 # Filter out roots that aren't ancestors of heads
1598 # Filter out roots that aren't ancestors of heads
1598 roots = [root for root in roots if root in ancestors]
1599 roots = [root for root in roots if root in ancestors]
1599 # Recompute the lowest revision
1600 # Recompute the lowest revision
1600 if roots:
1601 if roots:
1601 lowestrev = min([self.rev(root) for root in roots])
1602 lowestrev = min([self.rev(root) for root in roots])
1602 else:
1603 else:
1603 # No more roots? Return empty list
1604 # No more roots? Return empty list
1604 return nonodes
1605 return nonodes
1605 else:
1606 else:
1606 # We are descending from nullid, and don't need to care about
1607 # We are descending from nullid, and don't need to care about
1607 # any other roots.
1608 # any other roots.
1608 lowestrev = nullrev
1609 lowestrev = nullrev
1609 roots = [nullid]
1610 roots = [nullid]
1610 # Transform our roots list into a set.
1611 # Transform our roots list into a set.
1611 descendants = set(roots)
1612 descendants = set(roots)
1612 # Also, keep the original roots so we can filter out roots that aren't
1613 # Also, keep the original roots so we can filter out roots that aren't
1613 # 'real' roots (i.e. are descended from other roots).
1614 # 'real' roots (i.e. are descended from other roots).
1614 roots = descendants.copy()
1615 roots = descendants.copy()
1615 # Our topologically sorted list of output nodes.
1616 # Our topologically sorted list of output nodes.
1616 orderedout = []
1617 orderedout = []
1617 # Don't start at nullid since we don't want nullid in our output list,
1618 # Don't start at nullid since we don't want nullid in our output list,
1618 # and if nullid shows up in descendants, empty parents will look like
1619 # and if nullid shows up in descendants, empty parents will look like
1619 # they're descendants.
1620 # they're descendants.
1620 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1621 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1621 n = self.node(r)
1622 n = self.node(r)
1622 isdescendant = False
1623 isdescendant = False
1623 if lowestrev == nullrev: # Everybody is a descendant of nullid
1624 if lowestrev == nullrev: # Everybody is a descendant of nullid
1624 isdescendant = True
1625 isdescendant = True
1625 elif n in descendants:
1626 elif n in descendants:
1626 # n is already a descendant
1627 # n is already a descendant
1627 isdescendant = True
1628 isdescendant = True
1628 # This check only needs to be done here because all the roots
1629 # This check only needs to be done here because all the roots
1629 # will start being marked is descendants before the loop.
1630 # will start being marked is descendants before the loop.
1630 if n in roots:
1631 if n in roots:
1631 # If n was a root, check if it's a 'real' root.
1632 # If n was a root, check if it's a 'real' root.
1632 p = tuple(self.parents(n))
1633 p = tuple(self.parents(n))
1633 # If any of its parents are descendants, it's not a root.
1634 # If any of its parents are descendants, it's not a root.
1634 if (p[0] in descendants) or (p[1] in descendants):
1635 if (p[0] in descendants) or (p[1] in descendants):
1635 roots.remove(n)
1636 roots.remove(n)
1636 else:
1637 else:
1637 p = tuple(self.parents(n))
1638 p = tuple(self.parents(n))
1638 # A node is a descendant if either of its parents are
1639 # A node is a descendant if either of its parents are
1639 # descendants. (We seeded the dependents list with the roots
1640 # descendants. (We seeded the dependents list with the roots
1640 # up there, remember?)
1641 # up there, remember?)
1641 if (p[0] in descendants) or (p[1] in descendants):
1642 if (p[0] in descendants) or (p[1] in descendants):
1642 descendants.add(n)
1643 descendants.add(n)
1643 isdescendant = True
1644 isdescendant = True
1644 if isdescendant and ((ancestors is None) or (n in ancestors)):
1645 if isdescendant and ((ancestors is None) or (n in ancestors)):
1645 # Only include nodes that are both descendants and ancestors.
1646 # Only include nodes that are both descendants and ancestors.
1646 orderedout.append(n)
1647 orderedout.append(n)
1647 if (ancestors is not None) and (n in heads):
1648 if (ancestors is not None) and (n in heads):
1648 # We're trying to figure out which heads are reachable
1649 # We're trying to figure out which heads are reachable
1649 # from roots.
1650 # from roots.
1650 # Mark this head as having been reached
1651 # Mark this head as having been reached
1651 heads[n] = True
1652 heads[n] = True
1652 elif ancestors is None:
1653 elif ancestors is None:
1653 # Otherwise, we're trying to discover the heads.
1654 # Otherwise, we're trying to discover the heads.
1654 # Assume this is a head because if it isn't, the next step
1655 # Assume this is a head because if it isn't, the next step
1655 # will eventually remove it.
1656 # will eventually remove it.
1656 heads[n] = True
1657 heads[n] = True
1657 # But, obviously its parents aren't.
1658 # But, obviously its parents aren't.
1658 for p in self.parents(n):
1659 for p in self.parents(n):
1659 heads.pop(p, None)
1660 heads.pop(p, None)
1660 heads = [head for head, flag in heads.iteritems() if flag]
1661 heads = [head for head, flag in heads.iteritems() if flag]
1661 roots = list(roots)
1662 roots = list(roots)
1662 assert orderedout
1663 assert orderedout
1663 assert roots
1664 assert roots
1664 assert heads
1665 assert heads
1665 return (orderedout, roots, heads)
1666 return (orderedout, roots, heads)
1666
1667
1667 def headrevs(self):
1668 def headrevs(self):
1668 try:
1669 try:
1669 return self.index.headrevs()
1670 return self.index.headrevs()
1670 except AttributeError:
1671 except AttributeError:
1671 return self._headrevs()
1672 return self._headrevs()
1672
1673
1673 def computephases(self, roots):
1674 def computephases(self, roots):
1674 return self.index.computephasesmapsets(roots)
1675 return self.index.computephasesmapsets(roots)
1675
1676
1676 def _headrevs(self):
1677 def _headrevs(self):
1677 count = len(self)
1678 count = len(self)
1678 if not count:
1679 if not count:
1679 return [nullrev]
1680 return [nullrev]
1680 # we won't iter over filtered rev so nobody is a head at start
1681 # we won't iter over filtered rev so nobody is a head at start
1681 ishead = [0] * (count + 1)
1682 ishead = [0] * (count + 1)
1682 index = self.index
1683 index = self.index
1683 for r in self:
1684 for r in self:
1684 ishead[r] = 1 # I may be an head
1685 ishead[r] = 1 # I may be an head
1685 e = index[r]
1686 e = index[r]
1686 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1687 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1687 return [r for r, val in enumerate(ishead) if val]
1688 return [r for r, val in enumerate(ishead) if val]
1688
1689
1689 def heads(self, start=None, stop=None):
1690 def heads(self, start=None, stop=None):
1690 """return the list of all nodes that have no children
1691 """return the list of all nodes that have no children
1691
1692
1692 if start is specified, only heads that are descendants of
1693 if start is specified, only heads that are descendants of
1693 start will be returned
1694 start will be returned
1694 if stop is specified, it will consider all the revs from stop
1695 if stop is specified, it will consider all the revs from stop
1695 as if they had no children
1696 as if they had no children
1696 """
1697 """
1697 if start is None and stop is None:
1698 if start is None and stop is None:
1698 if not len(self):
1699 if not len(self):
1699 return [nullid]
1700 return [nullid]
1700 return [self.node(r) for r in self.headrevs()]
1701 return [self.node(r) for r in self.headrevs()]
1701
1702
1702 if start is None:
1703 if start is None:
1703 start = nullid
1704 start = nullid
1704 if stop is None:
1705 if stop is None:
1705 stop = []
1706 stop = []
1706 stoprevs = set([self.rev(n) for n in stop])
1707 stoprevs = set([self.rev(n) for n in stop])
1707 startrev = self.rev(start)
1708 startrev = self.rev(start)
1708 reachable = {startrev}
1709 reachable = {startrev}
1709 heads = {startrev}
1710 heads = {startrev}
1710
1711
1711 parentrevs = self.parentrevs
1712 parentrevs = self.parentrevs
1712 for r in self.revs(start=startrev + 1):
1713 for r in self.revs(start=startrev + 1):
1713 for p in parentrevs(r):
1714 for p in parentrevs(r):
1714 if p in reachable:
1715 if p in reachable:
1715 if r not in stoprevs:
1716 if r not in stoprevs:
1716 reachable.add(r)
1717 reachable.add(r)
1717 heads.add(r)
1718 heads.add(r)
1718 if p in heads and p not in stoprevs:
1719 if p in heads and p not in stoprevs:
1719 heads.remove(p)
1720 heads.remove(p)
1720
1721
1721 return [self.node(r) for r in heads]
1722 return [self.node(r) for r in heads]
1722
1723
1723 def children(self, node):
1724 def children(self, node):
1724 """find the children of a given node"""
1725 """find the children of a given node"""
1725 c = []
1726 c = []
1726 p = self.rev(node)
1727 p = self.rev(node)
1727 for r in self.revs(start=p + 1):
1728 for r in self.revs(start=p + 1):
1728 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1729 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1729 if prevs:
1730 if prevs:
1730 for pr in prevs:
1731 for pr in prevs:
1731 if pr == p:
1732 if pr == p:
1732 c.append(self.node(r))
1733 c.append(self.node(r))
1733 elif p == nullrev:
1734 elif p == nullrev:
1734 c.append(self.node(r))
1735 c.append(self.node(r))
1735 return c
1736 return c
1736
1737
1737 def commonancestorsheads(self, a, b):
1738 def commonancestorsheads(self, a, b):
1738 """calculate all the heads of the common ancestors of nodes a and b"""
1739 """calculate all the heads of the common ancestors of nodes a and b"""
1739 a, b = self.rev(a), self.rev(b)
1740 a, b = self.rev(a), self.rev(b)
1740 ancs = self._commonancestorsheads(a, b)
1741 ancs = self._commonancestorsheads(a, b)
1741 return pycompat.maplist(self.node, ancs)
1742 return pycompat.maplist(self.node, ancs)
1742
1743
1743 def _commonancestorsheads(self, *revs):
1744 def _commonancestorsheads(self, *revs):
1744 """calculate all the heads of the common ancestors of revs"""
1745 """calculate all the heads of the common ancestors of revs"""
1745 try:
1746 try:
1746 ancs = self.index.commonancestorsheads(*revs)
1747 ancs = self.index.commonancestorsheads(*revs)
1747 except (AttributeError, OverflowError): # C implementation failed
1748 except (AttributeError, OverflowError): # C implementation failed
1748 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1749 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1749 return ancs
1750 return ancs
1750
1751
1751 def isancestor(self, a, b):
1752 def isancestor(self, a, b):
1752 """return True if node a is an ancestor of node b
1753 """return True if node a is an ancestor of node b
1753
1754
1754 A revision is considered an ancestor of itself."""
1755 A revision is considered an ancestor of itself."""
1755 a, b = self.rev(a), self.rev(b)
1756 a, b = self.rev(a), self.rev(b)
1756 return self.isancestorrev(a, b)
1757 return self.isancestorrev(a, b)
1757
1758
1758 def isancestorrev(self, a, b):
1759 def isancestorrev(self, a, b):
1759 """return True if revision a is an ancestor of revision b
1760 """return True if revision a is an ancestor of revision b
1760
1761
1761 A revision is considered an ancestor of itself.
1762 A revision is considered an ancestor of itself.
1762
1763
1763 The implementation of this is trivial but the use of
1764 The implementation of this is trivial but the use of
1764 commonancestorsheads is not."""
1765 commonancestorsheads is not."""
1765 if a == nullrev:
1766 if a == nullrev:
1766 return True
1767 return True
1767 elif a == b:
1768 elif a == b:
1768 return True
1769 return True
1769 elif a > b:
1770 elif a > b:
1770 return False
1771 return False
1771 return a in self._commonancestorsheads(a, b)
1772 return a in self._commonancestorsheads(a, b)
1772
1773
1773 def ancestor(self, a, b):
1774 def ancestor(self, a, b):
1774 """calculate the "best" common ancestor of nodes a and b"""
1775 """calculate the "best" common ancestor of nodes a and b"""
1775
1776
1776 a, b = self.rev(a), self.rev(b)
1777 a, b = self.rev(a), self.rev(b)
1777 try:
1778 try:
1778 ancs = self.index.ancestors(a, b)
1779 ancs = self.index.ancestors(a, b)
1779 except (AttributeError, OverflowError):
1780 except (AttributeError, OverflowError):
1780 ancs = ancestor.ancestors(self.parentrevs, a, b)
1781 ancs = ancestor.ancestors(self.parentrevs, a, b)
1781 if ancs:
1782 if ancs:
1782 # choose a consistent winner when there's a tie
1783 # choose a consistent winner when there's a tie
1783 return min(map(self.node, ancs))
1784 return min(map(self.node, ancs))
1784 return nullid
1785 return nullid
1785
1786
1786 def _match(self, id):
1787 def _match(self, id):
1787 if isinstance(id, int):
1788 if isinstance(id, int):
1788 # rev
1789 # rev
1789 return self.node(id)
1790 return self.node(id)
1790 if len(id) == 20:
1791 if len(id) == 20:
1791 # possibly a binary node
1792 # possibly a binary node
1792 # odds of a binary node being all hex in ASCII are 1 in 10**25
1793 # odds of a binary node being all hex in ASCII are 1 in 10**25
1793 try:
1794 try:
1794 node = id
1795 node = id
1795 self.rev(node) # quick search the index
1796 self.rev(node) # quick search the index
1796 return node
1797 return node
1797 except LookupError:
1798 except LookupError:
1798 pass # may be partial hex id
1799 pass # may be partial hex id
1799 try:
1800 try:
1800 # str(rev)
1801 # str(rev)
1801 rev = int(id)
1802 rev = int(id)
1802 if "%d" % rev != id:
1803 if "%d" % rev != id:
1803 raise ValueError
1804 raise ValueError
1804 if rev < 0:
1805 if rev < 0:
1805 rev = len(self) + rev
1806 rev = len(self) + rev
1806 if rev < 0 or rev >= len(self):
1807 if rev < 0 or rev >= len(self):
1807 raise ValueError
1808 raise ValueError
1808 return self.node(rev)
1809 return self.node(rev)
1809 except (ValueError, OverflowError):
1810 except (ValueError, OverflowError):
1810 pass
1811 pass
1811 if len(id) == 40:
1812 if len(id) == 40:
1812 try:
1813 try:
1813 # a full hex nodeid?
1814 # a full hex nodeid?
1814 node = bin(id)
1815 node = bin(id)
1815 self.rev(node)
1816 self.rev(node)
1816 return node
1817 return node
1817 except (TypeError, LookupError):
1818 except (TypeError, LookupError):
1818 pass
1819 pass
1819
1820
1820 def _partialmatch(self, id):
1821 def _partialmatch(self, id):
1821 # we don't care wdirfilenodeids as they should be always full hash
1822 # we don't care wdirfilenodeids as they should be always full hash
1822 maybewdir = wdirhex.startswith(id)
1823 maybewdir = wdirhex.startswith(id)
1823 try:
1824 try:
1824 partial = self.index.partialmatch(id)
1825 partial = self.index.partialmatch(id)
1825 if partial and self.hasnode(partial):
1826 if partial and self.hasnode(partial):
1826 if maybewdir:
1827 if maybewdir:
1827 # single 'ff...' match in radix tree, ambiguous with wdir
1828 # single 'ff...' match in radix tree, ambiguous with wdir
1828 raise RevlogError
1829 raise RevlogError
1829 return partial
1830 return partial
1830 if maybewdir:
1831 if maybewdir:
1831 # no 'ff...' match in radix tree, wdir identified
1832 # no 'ff...' match in radix tree, wdir identified
1832 raise error.WdirUnsupported
1833 raise error.WdirUnsupported
1833 return None
1834 return None
1834 except RevlogError:
1835 except RevlogError:
1835 # parsers.c radix tree lookup gave multiple matches
1836 # parsers.c radix tree lookup gave multiple matches
1836 # fast path: for unfiltered changelog, radix tree is accurate
1837 # fast path: for unfiltered changelog, radix tree is accurate
1837 if not getattr(self, 'filteredrevs', None):
1838 if not getattr(self, 'filteredrevs', None):
1838 raise AmbiguousPrefixLookupError(id, self.indexfile,
1839 raise AmbiguousPrefixLookupError(id, self.indexfile,
1839 _('ambiguous identifier'))
1840 _('ambiguous identifier'))
1840 # fall through to slow path that filters hidden revisions
1841 # fall through to slow path that filters hidden revisions
1841 except (AttributeError, ValueError):
1842 except (AttributeError, ValueError):
1842 # we are pure python, or key was too short to search radix tree
1843 # we are pure python, or key was too short to search radix tree
1843 pass
1844 pass
1844
1845
1845 if id in self._pcache:
1846 if id in self._pcache:
1846 return self._pcache[id]
1847 return self._pcache[id]
1847
1848
1848 if len(id) <= 40:
1849 if len(id) <= 40:
1849 try:
1850 try:
1850 # hex(node)[:...]
1851 # hex(node)[:...]
1851 l = len(id) // 2 # grab an even number of digits
1852 l = len(id) // 2 # grab an even number of digits
1852 prefix = bin(id[:l * 2])
1853 prefix = bin(id[:l * 2])
1853 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1854 nl = [n for n in nl if hex(n).startswith(id) and
1855 nl = [n for n in nl if hex(n).startswith(id) and
1855 self.hasnode(n)]
1856 self.hasnode(n)]
1857 if nullhex.startswith(id):
1858 nl.append(nullid)
1856 if len(nl) > 0:
1859 if len(nl) > 0:
1857 if len(nl) == 1 and not maybewdir:
1860 if len(nl) == 1 and not maybewdir:
1858 self._pcache[id] = nl[0]
1861 self._pcache[id] = nl[0]
1859 return nl[0]
1862 return nl[0]
1860 raise AmbiguousPrefixLookupError(id, self.indexfile,
1863 raise AmbiguousPrefixLookupError(id, self.indexfile,
1861 _('ambiguous identifier'))
1864 _('ambiguous identifier'))
1862 if maybewdir:
1865 if maybewdir:
1863 raise error.WdirUnsupported
1866 raise error.WdirUnsupported
1864 return None
1867 return None
1865 except TypeError:
1868 except TypeError:
1866 pass
1869 pass
1867
1870
1868 def lookup(self, id):
1871 def lookup(self, id):
1869 """locate a node based on:
1872 """locate a node based on:
1870 - revision number or str(revision number)
1873 - revision number or str(revision number)
1871 - nodeid or subset of hex nodeid
1874 - nodeid or subset of hex nodeid
1872 """
1875 """
1873 n = self._match(id)
1876 n = self._match(id)
1874 if n is not None:
1877 if n is not None:
1875 return n
1878 return n
1876 n = self._partialmatch(id)
1879 n = self._partialmatch(id)
1877 if n:
1880 if n:
1878 return n
1881 return n
1879
1882
1880 raise LookupError(id, self.indexfile, _('no match found'))
1883 raise LookupError(id, self.indexfile, _('no match found'))
1881
1884
1882 def shortest(self, node, minlength=1):
1885 def shortest(self, node, minlength=1):
1883 """Find the shortest unambiguous prefix that matches node."""
1886 """Find the shortest unambiguous prefix that matches node."""
1884 def isvalid(prefix):
1887 def isvalid(prefix):
1885 try:
1888 try:
1886 node = self._partialmatch(prefix)
1889 node = self._partialmatch(prefix)
1887 except error.RevlogError:
1890 except error.RevlogError:
1888 return False
1891 return False
1889 except error.WdirUnsupported:
1892 except error.WdirUnsupported:
1890 # single 'ff...' match
1893 # single 'ff...' match
1891 return True
1894 return True
1892 if node is None:
1895 if node is None:
1893 raise LookupError(node, self.indexfile, _('no node'))
1896 raise LookupError(node, self.indexfile, _('no node'))
1894 return True
1897 return True
1895
1898
1896 def maybewdir(prefix):
1899 def maybewdir(prefix):
1897 return all(c == 'f' for c in prefix)
1900 return all(c == 'f' for c in prefix)
1898
1901
1899 hexnode = hex(node)
1902 hexnode = hex(node)
1900
1903
1901 def disambiguate(hexnode, minlength):
1904 def disambiguate(hexnode, minlength):
1902 """Disambiguate against wdirid."""
1905 """Disambiguate against wdirid."""
1903 for length in range(minlength, 41):
1906 for length in range(minlength, 41):
1904 prefix = hexnode[:length]
1907 prefix = hexnode[:length]
1905 if not maybewdir(prefix):
1908 if not maybewdir(prefix):
1906 return prefix
1909 return prefix
1907
1910
1908 if not getattr(self, 'filteredrevs', None):
1911 if not getattr(self, 'filteredrevs', None):
1909 try:
1912 try:
1910 length = max(self.index.shortest(node), minlength)
1913 length = max(self.index.shortest(node), minlength)
1911 return disambiguate(hexnode, length)
1914 return disambiguate(hexnode, length)
1912 except RevlogError:
1915 except RevlogError:
1913 if node != wdirid:
1916 if node != wdirid:
1914 raise LookupError(node, self.indexfile, _('no node'))
1917 raise LookupError(node, self.indexfile, _('no node'))
1915 except AttributeError:
1918 except AttributeError:
1916 # Fall through to pure code
1919 # Fall through to pure code
1917 pass
1920 pass
1918
1921
1919 if node == wdirid:
1922 if node == wdirid:
1920 for length in range(minlength, 41):
1923 for length in range(minlength, 41):
1921 prefix = hexnode[:length]
1924 prefix = hexnode[:length]
1922 if isvalid(prefix):
1925 if isvalid(prefix):
1923 return prefix
1926 return prefix
1924
1927
1925 for length in range(minlength, 41):
1928 for length in range(minlength, 41):
1926 prefix = hexnode[:length]
1929 prefix = hexnode[:length]
1927 if isvalid(prefix):
1930 if isvalid(prefix):
1928 return disambiguate(hexnode, length)
1931 return disambiguate(hexnode, length)
1929
1932
1930 def cmp(self, node, text):
1933 def cmp(self, node, text):
1931 """compare text with a given file revision
1934 """compare text with a given file revision
1932
1935
1933 returns True if text is different than what is stored.
1936 returns True if text is different than what is stored.
1934 """
1937 """
1935 p1, p2 = self.parents(node)
1938 p1, p2 = self.parents(node)
1936 return hash(text, p1, p2) != node
1939 return hash(text, p1, p2) != node
1937
1940
1938 def _cachesegment(self, offset, data):
1941 def _cachesegment(self, offset, data):
1939 """Add a segment to the revlog cache.
1942 """Add a segment to the revlog cache.
1940
1943
1941 Accepts an absolute offset and the data that is at that location.
1944 Accepts an absolute offset and the data that is at that location.
1942 """
1945 """
1943 o, d = self._chunkcache
1946 o, d = self._chunkcache
1944 # try to add to existing cache
1947 # try to add to existing cache
1945 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1948 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1946 self._chunkcache = o, d + data
1949 self._chunkcache = o, d + data
1947 else:
1950 else:
1948 self._chunkcache = offset, data
1951 self._chunkcache = offset, data
1949
1952
1950 def _readsegment(self, offset, length, df=None):
1953 def _readsegment(self, offset, length, df=None):
1951 """Load a segment of raw data from the revlog.
1954 """Load a segment of raw data from the revlog.
1952
1955
1953 Accepts an absolute offset, length to read, and an optional existing
1956 Accepts an absolute offset, length to read, and an optional existing
1954 file handle to read from.
1957 file handle to read from.
1955
1958
1956 If an existing file handle is passed, it will be seeked and the
1959 If an existing file handle is passed, it will be seeked and the
1957 original seek position will NOT be restored.
1960 original seek position will NOT be restored.
1958
1961
1959 Returns a str or buffer of raw byte data.
1962 Returns a str or buffer of raw byte data.
1960 """
1963 """
1961 # Cache data both forward and backward around the requested
1964 # Cache data both forward and backward around the requested
1962 # data, in a fixed size window. This helps speed up operations
1965 # data, in a fixed size window. This helps speed up operations
1963 # involving reading the revlog backwards.
1966 # involving reading the revlog backwards.
1964 cachesize = self._chunkcachesize
1967 cachesize = self._chunkcachesize
1965 realoffset = offset & ~(cachesize - 1)
1968 realoffset = offset & ~(cachesize - 1)
1966 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1969 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1967 - realoffset)
1970 - realoffset)
1968 with self._datareadfp(df) as df:
1971 with self._datareadfp(df) as df:
1969 df.seek(realoffset)
1972 df.seek(realoffset)
1970 d = df.read(reallength)
1973 d = df.read(reallength)
1971 self._cachesegment(realoffset, d)
1974 self._cachesegment(realoffset, d)
1972 if offset != realoffset or reallength != length:
1975 if offset != realoffset or reallength != length:
1973 return util.buffer(d, offset - realoffset, length)
1976 return util.buffer(d, offset - realoffset, length)
1974 return d
1977 return d
1975
1978
1976 def _getsegment(self, offset, length, df=None):
1979 def _getsegment(self, offset, length, df=None):
1977 """Obtain a segment of raw data from the revlog.
1980 """Obtain a segment of raw data from the revlog.
1978
1981
1979 Accepts an absolute offset, length of bytes to obtain, and an
1982 Accepts an absolute offset, length of bytes to obtain, and an
1980 optional file handle to the already-opened revlog. If the file
1983 optional file handle to the already-opened revlog. If the file
1981 handle is used, it's original seek position will not be preserved.
1984 handle is used, it's original seek position will not be preserved.
1982
1985
1983 Requests for data may be returned from a cache.
1986 Requests for data may be returned from a cache.
1984
1987
1985 Returns a str or a buffer instance of raw byte data.
1988 Returns a str or a buffer instance of raw byte data.
1986 """
1989 """
1987 o, d = self._chunkcache
1990 o, d = self._chunkcache
1988 l = len(d)
1991 l = len(d)
1989
1992
1990 # is it in the cache?
1993 # is it in the cache?
1991 cachestart = offset - o
1994 cachestart = offset - o
1992 cacheend = cachestart + length
1995 cacheend = cachestart + length
1993 if cachestart >= 0 and cacheend <= l:
1996 if cachestart >= 0 and cacheend <= l:
1994 if cachestart == 0 and cacheend == l:
1997 if cachestart == 0 and cacheend == l:
1995 return d # avoid a copy
1998 return d # avoid a copy
1996 return util.buffer(d, cachestart, cacheend - cachestart)
1999 return util.buffer(d, cachestart, cacheend - cachestart)
1997
2000
1998 return self._readsegment(offset, length, df=df)
2001 return self._readsegment(offset, length, df=df)
1999
2002
2000 def _getsegmentforrevs(self, startrev, endrev, df=None):
2003 def _getsegmentforrevs(self, startrev, endrev, df=None):
2001 """Obtain a segment of raw data corresponding to a range of revisions.
2004 """Obtain a segment of raw data corresponding to a range of revisions.
2002
2005
2003 Accepts the start and end revisions and an optional already-open
2006 Accepts the start and end revisions and an optional already-open
2004 file handle to be used for reading. If the file handle is read, its
2007 file handle to be used for reading. If the file handle is read, its
2005 seek position will not be preserved.
2008 seek position will not be preserved.
2006
2009
2007 Requests for data may be satisfied by a cache.
2010 Requests for data may be satisfied by a cache.
2008
2011
2009 Returns a 2-tuple of (offset, data) for the requested range of
2012 Returns a 2-tuple of (offset, data) for the requested range of
2010 revisions. Offset is the integer offset from the beginning of the
2013 revisions. Offset is the integer offset from the beginning of the
2011 revlog and data is a str or buffer of the raw byte data.
2014 revlog and data is a str or buffer of the raw byte data.
2012
2015
2013 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2016 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2014 to determine where each revision's data begins and ends.
2017 to determine where each revision's data begins and ends.
2015 """
2018 """
2016 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2019 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2017 # (functions are expensive).
2020 # (functions are expensive).
2018 index = self.index
2021 index = self.index
2019 istart = index[startrev]
2022 istart = index[startrev]
2020 start = int(istart[0] >> 16)
2023 start = int(istart[0] >> 16)
2021 if startrev == endrev:
2024 if startrev == endrev:
2022 end = start + istart[1]
2025 end = start + istart[1]
2023 else:
2026 else:
2024 iend = index[endrev]
2027 iend = index[endrev]
2025 end = int(iend[0] >> 16) + iend[1]
2028 end = int(iend[0] >> 16) + iend[1]
2026
2029
2027 if self._inline:
2030 if self._inline:
2028 start += (startrev + 1) * self._io.size
2031 start += (startrev + 1) * self._io.size
2029 end += (endrev + 1) * self._io.size
2032 end += (endrev + 1) * self._io.size
2030 length = end - start
2033 length = end - start
2031
2034
2032 return start, self._getsegment(start, length, df=df)
2035 return start, self._getsegment(start, length, df=df)
2033
2036
2034 def _chunk(self, rev, df=None):
2037 def _chunk(self, rev, df=None):
2035 """Obtain a single decompressed chunk for a revision.
2038 """Obtain a single decompressed chunk for a revision.
2036
2039
2037 Accepts an integer revision and an optional already-open file handle
2040 Accepts an integer revision and an optional already-open file handle
2038 to be used for reading. If used, the seek position of the file will not
2041 to be used for reading. If used, the seek position of the file will not
2039 be preserved.
2042 be preserved.
2040
2043
2041 Returns a str holding uncompressed data for the requested revision.
2044 Returns a str holding uncompressed data for the requested revision.
2042 """
2045 """
2043 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2046 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2044
2047
2045 def _chunks(self, revs, df=None, targetsize=None):
2048 def _chunks(self, revs, df=None, targetsize=None):
2046 """Obtain decompressed chunks for the specified revisions.
2049 """Obtain decompressed chunks for the specified revisions.
2047
2050
2048 Accepts an iterable of numeric revisions that are assumed to be in
2051 Accepts an iterable of numeric revisions that are assumed to be in
2049 ascending order. Also accepts an optional already-open file handle
2052 ascending order. Also accepts an optional already-open file handle
2050 to be used for reading. If used, the seek position of the file will
2053 to be used for reading. If used, the seek position of the file will
2051 not be preserved.
2054 not be preserved.
2052
2055
2053 This function is similar to calling ``self._chunk()`` multiple times,
2056 This function is similar to calling ``self._chunk()`` multiple times,
2054 but is faster.
2057 but is faster.
2055
2058
2056 Returns a list with decompressed data for each requested revision.
2059 Returns a list with decompressed data for each requested revision.
2057 """
2060 """
2058 if not revs:
2061 if not revs:
2059 return []
2062 return []
2060 start = self.start
2063 start = self.start
2061 length = self.length
2064 length = self.length
2062 inline = self._inline
2065 inline = self._inline
2063 iosize = self._io.size
2066 iosize = self._io.size
2064 buffer = util.buffer
2067 buffer = util.buffer
2065
2068
2066 l = []
2069 l = []
2067 ladd = l.append
2070 ladd = l.append
2068
2071
2069 if not self._withsparseread:
2072 if not self._withsparseread:
2070 slicedchunks = (revs,)
2073 slicedchunks = (revs,)
2071 else:
2074 else:
2072 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2075 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2073
2076
2074 for revschunk in slicedchunks:
2077 for revschunk in slicedchunks:
2075 firstrev = revschunk[0]
2078 firstrev = revschunk[0]
2076 # Skip trailing revisions with empty diff
2079 # Skip trailing revisions with empty diff
2077 for lastrev in revschunk[::-1]:
2080 for lastrev in revschunk[::-1]:
2078 if length(lastrev) != 0:
2081 if length(lastrev) != 0:
2079 break
2082 break
2080
2083
2081 try:
2084 try:
2082 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2085 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2083 except OverflowError:
2086 except OverflowError:
2084 # issue4215 - we can't cache a run of chunks greater than
2087 # issue4215 - we can't cache a run of chunks greater than
2085 # 2G on Windows
2088 # 2G on Windows
2086 return [self._chunk(rev, df=df) for rev in revschunk]
2089 return [self._chunk(rev, df=df) for rev in revschunk]
2087
2090
2088 decomp = self.decompress
2091 decomp = self.decompress
2089 for rev in revschunk:
2092 for rev in revschunk:
2090 chunkstart = start(rev)
2093 chunkstart = start(rev)
2091 if inline:
2094 if inline:
2092 chunkstart += (rev + 1) * iosize
2095 chunkstart += (rev + 1) * iosize
2093 chunklength = length(rev)
2096 chunklength = length(rev)
2094 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2097 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2095
2098
2096 return l
2099 return l
2097
2100
2098 def _chunkclear(self):
2101 def _chunkclear(self):
2099 """Clear the raw chunk cache."""
2102 """Clear the raw chunk cache."""
2100 self._chunkcache = (0, '')
2103 self._chunkcache = (0, '')
2101
2104
2102 def deltaparent(self, rev):
2105 def deltaparent(self, rev):
2103 """return deltaparent of the given revision"""
2106 """return deltaparent of the given revision"""
2104 base = self.index[rev][3]
2107 base = self.index[rev][3]
2105 if base == rev:
2108 if base == rev:
2106 return nullrev
2109 return nullrev
2107 elif self._generaldelta:
2110 elif self._generaldelta:
2108 return base
2111 return base
2109 else:
2112 else:
2110 return rev - 1
2113 return rev - 1
2111
2114
2112 def issnapshot(self, rev):
2115 def issnapshot(self, rev):
2113 """tells whether rev is a snapshot
2116 """tells whether rev is a snapshot
2114 """
2117 """
2115 if rev == nullrev:
2118 if rev == nullrev:
2116 return True
2119 return True
2117 deltap = self.deltaparent(rev)
2120 deltap = self.deltaparent(rev)
2118 if deltap == nullrev:
2121 if deltap == nullrev:
2119 return True
2122 return True
2120 p1, p2 = self.parentrevs(rev)
2123 p1, p2 = self.parentrevs(rev)
2121 if deltap in (p1, p2):
2124 if deltap in (p1, p2):
2122 return False
2125 return False
2123 return self.issnapshot(deltap)
2126 return self.issnapshot(deltap)
2124
2127
2125 def snapshotdepth(self, rev):
2128 def snapshotdepth(self, rev):
2126 """number of snapshot in the chain before this one"""
2129 """number of snapshot in the chain before this one"""
2127 if not self.issnapshot(rev):
2130 if not self.issnapshot(rev):
2128 raise ProgrammingError('revision %d not a snapshot')
2131 raise ProgrammingError('revision %d not a snapshot')
2129 return len(self._deltachain(rev)[0]) - 1
2132 return len(self._deltachain(rev)[0]) - 1
2130
2133
2131 def revdiff(self, rev1, rev2):
2134 def revdiff(self, rev1, rev2):
2132 """return or calculate a delta between two revisions
2135 """return or calculate a delta between two revisions
2133
2136
2134 The delta calculated is in binary form and is intended to be written to
2137 The delta calculated is in binary form and is intended to be written to
2135 revlog data directly. So this function needs raw revision data.
2138 revlog data directly. So this function needs raw revision data.
2136 """
2139 """
2137 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2140 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2138 return bytes(self._chunk(rev2))
2141 return bytes(self._chunk(rev2))
2139
2142
2140 return mdiff.textdiff(self.revision(rev1, raw=True),
2143 return mdiff.textdiff(self.revision(rev1, raw=True),
2141 self.revision(rev2, raw=True))
2144 self.revision(rev2, raw=True))
2142
2145
2143 def revision(self, nodeorrev, _df=None, raw=False):
2146 def revision(self, nodeorrev, _df=None, raw=False):
2144 """return an uncompressed revision of a given node or revision
2147 """return an uncompressed revision of a given node or revision
2145 number.
2148 number.
2146
2149
2147 _df - an existing file handle to read from. (internal-only)
2150 _df - an existing file handle to read from. (internal-only)
2148 raw - an optional argument specifying if the revision data is to be
2151 raw - an optional argument specifying if the revision data is to be
2149 treated as raw data when applying flag transforms. 'raw' should be set
2152 treated as raw data when applying flag transforms. 'raw' should be set
2150 to True when generating changegroups or in debug commands.
2153 to True when generating changegroups or in debug commands.
2151 """
2154 """
2152 if isinstance(nodeorrev, int):
2155 if isinstance(nodeorrev, int):
2153 rev = nodeorrev
2156 rev = nodeorrev
2154 node = self.node(rev)
2157 node = self.node(rev)
2155 else:
2158 else:
2156 node = nodeorrev
2159 node = nodeorrev
2157 rev = None
2160 rev = None
2158
2161
2159 cachedrev = None
2162 cachedrev = None
2160 flags = None
2163 flags = None
2161 rawtext = None
2164 rawtext = None
2162 if node == nullid:
2165 if node == nullid:
2163 return ""
2166 return ""
2164 if self._cache:
2167 if self._cache:
2165 if self._cache[0] == node:
2168 if self._cache[0] == node:
2166 # _cache only stores rawtext
2169 # _cache only stores rawtext
2167 if raw:
2170 if raw:
2168 return self._cache[2]
2171 return self._cache[2]
2169 # duplicated, but good for perf
2172 # duplicated, but good for perf
2170 if rev is None:
2173 if rev is None:
2171 rev = self.rev(node)
2174 rev = self.rev(node)
2172 if flags is None:
2175 if flags is None:
2173 flags = self.flags(rev)
2176 flags = self.flags(rev)
2174 # no extra flags set, no flag processor runs, text = rawtext
2177 # no extra flags set, no flag processor runs, text = rawtext
2175 if flags == REVIDX_DEFAULT_FLAGS:
2178 if flags == REVIDX_DEFAULT_FLAGS:
2176 return self._cache[2]
2179 return self._cache[2]
2177 # rawtext is reusable. need to run flag processor
2180 # rawtext is reusable. need to run flag processor
2178 rawtext = self._cache[2]
2181 rawtext = self._cache[2]
2179
2182
2180 cachedrev = self._cache[1]
2183 cachedrev = self._cache[1]
2181
2184
2182 # look up what we need to read
2185 # look up what we need to read
2183 if rawtext is None:
2186 if rawtext is None:
2184 if rev is None:
2187 if rev is None:
2185 rev = self.rev(node)
2188 rev = self.rev(node)
2186
2189
2187 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2190 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2188 if stopped:
2191 if stopped:
2189 rawtext = self._cache[2]
2192 rawtext = self._cache[2]
2190
2193
2191 # drop cache to save memory
2194 # drop cache to save memory
2192 self._cache = None
2195 self._cache = None
2193
2196
2194 targetsize = None
2197 targetsize = None
2195 rawsize = self.index[rev][2]
2198 rawsize = self.index[rev][2]
2196 if 0 <= rawsize:
2199 if 0 <= rawsize:
2197 targetsize = 4 * rawsize
2200 targetsize = 4 * rawsize
2198
2201
2199 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2202 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2200 if rawtext is None:
2203 if rawtext is None:
2201 rawtext = bytes(bins[0])
2204 rawtext = bytes(bins[0])
2202 bins = bins[1:]
2205 bins = bins[1:]
2203
2206
2204 rawtext = mdiff.patches(rawtext, bins)
2207 rawtext = mdiff.patches(rawtext, bins)
2205 self._cache = (node, rev, rawtext)
2208 self._cache = (node, rev, rawtext)
2206
2209
2207 if flags is None:
2210 if flags is None:
2208 if rev is None:
2211 if rev is None:
2209 rev = self.rev(node)
2212 rev = self.rev(node)
2210 flags = self.flags(rev)
2213 flags = self.flags(rev)
2211
2214
2212 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2215 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2213 if validatehash:
2216 if validatehash:
2214 self.checkhash(text, node, rev=rev)
2217 self.checkhash(text, node, rev=rev)
2215
2218
2216 return text
2219 return text
2217
2220
2218 def hash(self, text, p1, p2):
2221 def hash(self, text, p1, p2):
2219 """Compute a node hash.
2222 """Compute a node hash.
2220
2223
2221 Available as a function so that subclasses can replace the hash
2224 Available as a function so that subclasses can replace the hash
2222 as needed.
2225 as needed.
2223 """
2226 """
2224 return hash(text, p1, p2)
2227 return hash(text, p1, p2)
2225
2228
2226 def _processflags(self, text, flags, operation, raw=False):
2229 def _processflags(self, text, flags, operation, raw=False):
2227 """Inspect revision data flags and applies transforms defined by
2230 """Inspect revision data flags and applies transforms defined by
2228 registered flag processors.
2231 registered flag processors.
2229
2232
2230 ``text`` - the revision data to process
2233 ``text`` - the revision data to process
2231 ``flags`` - the revision flags
2234 ``flags`` - the revision flags
2232 ``operation`` - the operation being performed (read or write)
2235 ``operation`` - the operation being performed (read or write)
2233 ``raw`` - an optional argument describing if the raw transform should be
2236 ``raw`` - an optional argument describing if the raw transform should be
2234 applied.
2237 applied.
2235
2238
2236 This method processes the flags in the order (or reverse order if
2239 This method processes the flags in the order (or reverse order if
2237 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2240 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2238 flag processors registered for present flags. The order of flags defined
2241 flag processors registered for present flags. The order of flags defined
2239 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2242 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2240
2243
2241 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2244 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2242 processed text and ``validatehash`` is a bool indicating whether the
2245 processed text and ``validatehash`` is a bool indicating whether the
2243 returned text should be checked for hash integrity.
2246 returned text should be checked for hash integrity.
2244
2247
2245 Note: If the ``raw`` argument is set, it has precedence over the
2248 Note: If the ``raw`` argument is set, it has precedence over the
2246 operation and will only update the value of ``validatehash``.
2249 operation and will only update the value of ``validatehash``.
2247 """
2250 """
2248 # fast path: no flag processors will run
2251 # fast path: no flag processors will run
2249 if flags == 0:
2252 if flags == 0:
2250 return text, True
2253 return text, True
2251 if not operation in ('read', 'write'):
2254 if not operation in ('read', 'write'):
2252 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2255 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2253 # Check all flags are known.
2256 # Check all flags are known.
2254 if flags & ~REVIDX_KNOWN_FLAGS:
2257 if flags & ~REVIDX_KNOWN_FLAGS:
2255 raise RevlogError(_("incompatible revision flag '%#x'") %
2258 raise RevlogError(_("incompatible revision flag '%#x'") %
2256 (flags & ~REVIDX_KNOWN_FLAGS))
2259 (flags & ~REVIDX_KNOWN_FLAGS))
2257 validatehash = True
2260 validatehash = True
2258 # Depending on the operation (read or write), the order might be
2261 # Depending on the operation (read or write), the order might be
2259 # reversed due to non-commutative transforms.
2262 # reversed due to non-commutative transforms.
2260 orderedflags = REVIDX_FLAGS_ORDER
2263 orderedflags = REVIDX_FLAGS_ORDER
2261 if operation == 'write':
2264 if operation == 'write':
2262 orderedflags = reversed(orderedflags)
2265 orderedflags = reversed(orderedflags)
2263
2266
2264 for flag in orderedflags:
2267 for flag in orderedflags:
2265 # If a flagprocessor has been registered for a known flag, apply the
2268 # If a flagprocessor has been registered for a known flag, apply the
2266 # related operation transform and update result tuple.
2269 # related operation transform and update result tuple.
2267 if flag & flags:
2270 if flag & flags:
2268 vhash = True
2271 vhash = True
2269
2272
2270 if flag not in _flagprocessors:
2273 if flag not in _flagprocessors:
2271 message = _("missing processor for flag '%#x'") % (flag)
2274 message = _("missing processor for flag '%#x'") % (flag)
2272 raise RevlogError(message)
2275 raise RevlogError(message)
2273
2276
2274 processor = _flagprocessors[flag]
2277 processor = _flagprocessors[flag]
2275 if processor is not None:
2278 if processor is not None:
2276 readtransform, writetransform, rawtransform = processor
2279 readtransform, writetransform, rawtransform = processor
2277
2280
2278 if raw:
2281 if raw:
2279 vhash = rawtransform(self, text)
2282 vhash = rawtransform(self, text)
2280 elif operation == 'read':
2283 elif operation == 'read':
2281 text, vhash = readtransform(self, text)
2284 text, vhash = readtransform(self, text)
2282 else: # write operation
2285 else: # write operation
2283 text, vhash = writetransform(self, text)
2286 text, vhash = writetransform(self, text)
2284 validatehash = validatehash and vhash
2287 validatehash = validatehash and vhash
2285
2288
2286 return text, validatehash
2289 return text, validatehash
2287
2290
2288 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2291 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2289 """Check node hash integrity.
2292 """Check node hash integrity.
2290
2293
2291 Available as a function so that subclasses can extend hash mismatch
2294 Available as a function so that subclasses can extend hash mismatch
2292 behaviors as needed.
2295 behaviors as needed.
2293 """
2296 """
2294 try:
2297 try:
2295 if p1 is None and p2 is None:
2298 if p1 is None and p2 is None:
2296 p1, p2 = self.parents(node)
2299 p1, p2 = self.parents(node)
2297 if node != self.hash(text, p1, p2):
2300 if node != self.hash(text, p1, p2):
2298 revornode = rev
2301 revornode = rev
2299 if revornode is None:
2302 if revornode is None:
2300 revornode = templatefilters.short(hex(node))
2303 revornode = templatefilters.short(hex(node))
2301 raise RevlogError(_("integrity check failed on %s:%s")
2304 raise RevlogError(_("integrity check failed on %s:%s")
2302 % (self.indexfile, pycompat.bytestr(revornode)))
2305 % (self.indexfile, pycompat.bytestr(revornode)))
2303 except RevlogError:
2306 except RevlogError:
2304 if self._censorable and _censoredtext(text):
2307 if self._censorable and _censoredtext(text):
2305 raise error.CensoredNodeError(self.indexfile, node, text)
2308 raise error.CensoredNodeError(self.indexfile, node, text)
2306 raise
2309 raise
2307
2310
2308 def _enforceinlinesize(self, tr, fp=None):
2311 def _enforceinlinesize(self, tr, fp=None):
2309 """Check if the revlog is too big for inline and convert if so.
2312 """Check if the revlog is too big for inline and convert if so.
2310
2313
2311 This should be called after revisions are added to the revlog. If the
2314 This should be called after revisions are added to the revlog. If the
2312 revlog has grown too large to be an inline revlog, it will convert it
2315 revlog has grown too large to be an inline revlog, it will convert it
2313 to use multiple index and data files.
2316 to use multiple index and data files.
2314 """
2317 """
2315 tiprev = len(self) - 1
2318 tiprev = len(self) - 1
2316 if (not self._inline or
2319 if (not self._inline or
2317 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2320 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2318 return
2321 return
2319
2322
2320 trinfo = tr.find(self.indexfile)
2323 trinfo = tr.find(self.indexfile)
2321 if trinfo is None:
2324 if trinfo is None:
2322 raise RevlogError(_("%s not found in the transaction")
2325 raise RevlogError(_("%s not found in the transaction")
2323 % self.indexfile)
2326 % self.indexfile)
2324
2327
2325 trindex = trinfo[2]
2328 trindex = trinfo[2]
2326 if trindex is not None:
2329 if trindex is not None:
2327 dataoff = self.start(trindex)
2330 dataoff = self.start(trindex)
2328 else:
2331 else:
2329 # revlog was stripped at start of transaction, use all leftover data
2332 # revlog was stripped at start of transaction, use all leftover data
2330 trindex = len(self) - 1
2333 trindex = len(self) - 1
2331 dataoff = self.end(tiprev)
2334 dataoff = self.end(tiprev)
2332
2335
2333 tr.add(self.datafile, dataoff)
2336 tr.add(self.datafile, dataoff)
2334
2337
2335 if fp:
2338 if fp:
2336 fp.flush()
2339 fp.flush()
2337 fp.close()
2340 fp.close()
2338
2341
2339 with self._datafp('w') as df:
2342 with self._datafp('w') as df:
2340 for r in self:
2343 for r in self:
2341 df.write(self._getsegmentforrevs(r, r)[1])
2344 df.write(self._getsegmentforrevs(r, r)[1])
2342
2345
2343 with self._indexfp('w') as fp:
2346 with self._indexfp('w') as fp:
2344 self.version &= ~FLAG_INLINE_DATA
2347 self.version &= ~FLAG_INLINE_DATA
2345 self._inline = False
2348 self._inline = False
2346 io = self._io
2349 io = self._io
2347 for i in self:
2350 for i in self:
2348 e = io.packentry(self.index[i], self.node, self.version, i)
2351 e = io.packentry(self.index[i], self.node, self.version, i)
2349 fp.write(e)
2352 fp.write(e)
2350
2353
2351 # the temp file replace the real index when we exit the context
2354 # the temp file replace the real index when we exit the context
2352 # manager
2355 # manager
2353
2356
2354 tr.replace(self.indexfile, trindex * self._io.size)
2357 tr.replace(self.indexfile, trindex * self._io.size)
2355 self._chunkclear()
2358 self._chunkclear()
2356
2359
2357 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2360 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2358 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2361 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2359 """add a revision to the log
2362 """add a revision to the log
2360
2363
2361 text - the revision data to add
2364 text - the revision data to add
2362 transaction - the transaction object used for rollback
2365 transaction - the transaction object used for rollback
2363 link - the linkrev data to add
2366 link - the linkrev data to add
2364 p1, p2 - the parent nodeids of the revision
2367 p1, p2 - the parent nodeids of the revision
2365 cachedelta - an optional precomputed delta
2368 cachedelta - an optional precomputed delta
2366 node - nodeid of revision; typically node is not specified, and it is
2369 node - nodeid of revision; typically node is not specified, and it is
2367 computed by default as hash(text, p1, p2), however subclasses might
2370 computed by default as hash(text, p1, p2), however subclasses might
2368 use different hashing method (and override checkhash() in such case)
2371 use different hashing method (and override checkhash() in such case)
2369 flags - the known flags to set on the revision
2372 flags - the known flags to set on the revision
2370 deltacomputer - an optional _deltacomputer instance shared between
2373 deltacomputer - an optional _deltacomputer instance shared between
2371 multiple calls
2374 multiple calls
2372 """
2375 """
2373 if link == nullrev:
2376 if link == nullrev:
2374 raise RevlogError(_("attempted to add linkrev -1 to %s")
2377 raise RevlogError(_("attempted to add linkrev -1 to %s")
2375 % self.indexfile)
2378 % self.indexfile)
2376
2379
2377 if flags:
2380 if flags:
2378 node = node or self.hash(text, p1, p2)
2381 node = node or self.hash(text, p1, p2)
2379
2382
2380 rawtext, validatehash = self._processflags(text, flags, 'write')
2383 rawtext, validatehash = self._processflags(text, flags, 'write')
2381
2384
2382 # If the flag processor modifies the revision data, ignore any provided
2385 # If the flag processor modifies the revision data, ignore any provided
2383 # cachedelta.
2386 # cachedelta.
2384 if rawtext != text:
2387 if rawtext != text:
2385 cachedelta = None
2388 cachedelta = None
2386
2389
2387 if len(rawtext) > _maxentrysize:
2390 if len(rawtext) > _maxentrysize:
2388 raise RevlogError(
2391 raise RevlogError(
2389 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2392 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2390 % (self.indexfile, len(rawtext)))
2393 % (self.indexfile, len(rawtext)))
2391
2394
2392 node = node or self.hash(rawtext, p1, p2)
2395 node = node or self.hash(rawtext, p1, p2)
2393 if node in self.nodemap:
2396 if node in self.nodemap:
2394 return node
2397 return node
2395
2398
2396 if validatehash:
2399 if validatehash:
2397 self.checkhash(rawtext, node, p1=p1, p2=p2)
2400 self.checkhash(rawtext, node, p1=p1, p2=p2)
2398
2401
2399 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2402 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2400 flags, cachedelta=cachedelta,
2403 flags, cachedelta=cachedelta,
2401 deltacomputer=deltacomputer)
2404 deltacomputer=deltacomputer)
2402
2405
2403 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2406 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2404 cachedelta=None, deltacomputer=None):
2407 cachedelta=None, deltacomputer=None):
2405 """add a raw revision with known flags, node and parents
2408 """add a raw revision with known flags, node and parents
2406 useful when reusing a revision not stored in this revlog (ex: received
2409 useful when reusing a revision not stored in this revlog (ex: received
2407 over wire, or read from an external bundle).
2410 over wire, or read from an external bundle).
2408 """
2411 """
2409 dfh = None
2412 dfh = None
2410 if not self._inline:
2413 if not self._inline:
2411 dfh = self._datafp("a+")
2414 dfh = self._datafp("a+")
2412 ifh = self._indexfp("a+")
2415 ifh = self._indexfp("a+")
2413 try:
2416 try:
2414 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2417 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2415 flags, cachedelta, ifh, dfh,
2418 flags, cachedelta, ifh, dfh,
2416 deltacomputer=deltacomputer)
2419 deltacomputer=deltacomputer)
2417 finally:
2420 finally:
2418 if dfh:
2421 if dfh:
2419 dfh.close()
2422 dfh.close()
2420 ifh.close()
2423 ifh.close()
2421
2424
2422 def compress(self, data):
2425 def compress(self, data):
2423 """Generate a possibly-compressed representation of data."""
2426 """Generate a possibly-compressed representation of data."""
2424 if not data:
2427 if not data:
2425 return '', data
2428 return '', data
2426
2429
2427 compressed = self._compressor.compress(data)
2430 compressed = self._compressor.compress(data)
2428
2431
2429 if compressed:
2432 if compressed:
2430 # The revlog compressor added the header in the returned data.
2433 # The revlog compressor added the header in the returned data.
2431 return '', compressed
2434 return '', compressed
2432
2435
2433 if data[0:1] == '\0':
2436 if data[0:1] == '\0':
2434 return '', data
2437 return '', data
2435 return 'u', data
2438 return 'u', data
2436
2439
2437 def decompress(self, data):
2440 def decompress(self, data):
2438 """Decompress a revlog chunk.
2441 """Decompress a revlog chunk.
2439
2442
2440 The chunk is expected to begin with a header identifying the
2443 The chunk is expected to begin with a header identifying the
2441 format type so it can be routed to an appropriate decompressor.
2444 format type so it can be routed to an appropriate decompressor.
2442 """
2445 """
2443 if not data:
2446 if not data:
2444 return data
2447 return data
2445
2448
2446 # Revlogs are read much more frequently than they are written and many
2449 # Revlogs are read much more frequently than they are written and many
2447 # chunks only take microseconds to decompress, so performance is
2450 # chunks only take microseconds to decompress, so performance is
2448 # important here.
2451 # important here.
2449 #
2452 #
2450 # We can make a few assumptions about revlogs:
2453 # We can make a few assumptions about revlogs:
2451 #
2454 #
2452 # 1) the majority of chunks will be compressed (as opposed to inline
2455 # 1) the majority of chunks will be compressed (as opposed to inline
2453 # raw data).
2456 # raw data).
2454 # 2) decompressing *any* data will likely by at least 10x slower than
2457 # 2) decompressing *any* data will likely by at least 10x slower than
2455 # returning raw inline data.
2458 # returning raw inline data.
2456 # 3) we want to prioritize common and officially supported compression
2459 # 3) we want to prioritize common and officially supported compression
2457 # engines
2460 # engines
2458 #
2461 #
2459 # It follows that we want to optimize for "decompress compressed data
2462 # It follows that we want to optimize for "decompress compressed data
2460 # when encoded with common and officially supported compression engines"
2463 # when encoded with common and officially supported compression engines"
2461 # case over "raw data" and "data encoded by less common or non-official
2464 # case over "raw data" and "data encoded by less common or non-official
2462 # compression engines." That is why we have the inline lookup first
2465 # compression engines." That is why we have the inline lookup first
2463 # followed by the compengines lookup.
2466 # followed by the compengines lookup.
2464 #
2467 #
2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2468 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2466 # compressed chunks. And this matters for changelog and manifest reads.
2469 # compressed chunks. And this matters for changelog and manifest reads.
2467 t = data[0:1]
2470 t = data[0:1]
2468
2471
2469 if t == 'x':
2472 if t == 'x':
2470 try:
2473 try:
2471 return _zlibdecompress(data)
2474 return _zlibdecompress(data)
2472 except zlib.error as e:
2475 except zlib.error as e:
2473 raise RevlogError(_('revlog decompress error: %s') %
2476 raise RevlogError(_('revlog decompress error: %s') %
2474 stringutil.forcebytestr(e))
2477 stringutil.forcebytestr(e))
2475 # '\0' is more common than 'u' so it goes first.
2478 # '\0' is more common than 'u' so it goes first.
2476 elif t == '\0':
2479 elif t == '\0':
2477 return data
2480 return data
2478 elif t == 'u':
2481 elif t == 'u':
2479 return util.buffer(data, 1)
2482 return util.buffer(data, 1)
2480
2483
2481 try:
2484 try:
2482 compressor = self._decompressors[t]
2485 compressor = self._decompressors[t]
2483 except KeyError:
2486 except KeyError:
2484 try:
2487 try:
2485 engine = util.compengines.forrevlogheader(t)
2488 engine = util.compengines.forrevlogheader(t)
2486 compressor = engine.revlogcompressor()
2489 compressor = engine.revlogcompressor()
2487 self._decompressors[t] = compressor
2490 self._decompressors[t] = compressor
2488 except KeyError:
2491 except KeyError:
2489 raise RevlogError(_('unknown compression type %r') % t)
2492 raise RevlogError(_('unknown compression type %r') % t)
2490
2493
2491 return compressor.decompress(data)
2494 return compressor.decompress(data)
2492
2495
2493 def _isgooddeltainfo(self, deltainfo, revinfo):
2496 def _isgooddeltainfo(self, deltainfo, revinfo):
2494 """Returns True if the given delta is good. Good means that it is within
2497 """Returns True if the given delta is good. Good means that it is within
2495 the disk span, disk size, and chain length bounds that we know to be
2498 the disk span, disk size, and chain length bounds that we know to be
2496 performant."""
2499 performant."""
2497 if deltainfo is None:
2500 if deltainfo is None:
2498 return False
2501 return False
2499
2502
2500 # - 'deltainfo.distance' is the distance from the base revision --
2503 # - 'deltainfo.distance' is the distance from the base revision --
2501 # bounding it limits the amount of I/O we need to do.
2504 # bounding it limits the amount of I/O we need to do.
2502 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2505 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2503 # deltas we need to apply -- bounding it limits the amount of CPU
2506 # deltas we need to apply -- bounding it limits the amount of CPU
2504 # we consume.
2507 # we consume.
2505
2508
2506 if self._sparserevlog:
2509 if self._sparserevlog:
2507 # As sparse-read will be used, we can consider that the distance,
2510 # As sparse-read will be used, we can consider that the distance,
2508 # instead of being the span of the whole chunk,
2511 # instead of being the span of the whole chunk,
2509 # is the span of the largest read chunk
2512 # is the span of the largest read chunk
2510 base = deltainfo.base
2513 base = deltainfo.base
2511
2514
2512 if base != nullrev:
2515 if base != nullrev:
2513 deltachain = self._deltachain(base)[0]
2516 deltachain = self._deltachain(base)[0]
2514 else:
2517 else:
2515 deltachain = []
2518 deltachain = []
2516
2519
2517 # search for the first non-snapshot revision
2520 # search for the first non-snapshot revision
2518 for idx, r in enumerate(deltachain):
2521 for idx, r in enumerate(deltachain):
2519 if not self.issnapshot(r):
2522 if not self.issnapshot(r):
2520 break
2523 break
2521 deltachain = deltachain[idx:]
2524 deltachain = deltachain[idx:]
2522 chunks = _slicechunk(self, deltachain, deltainfo)
2525 chunks = _slicechunk(self, deltachain, deltainfo)
2523 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2526 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2524 distance = max(all_span)
2527 distance = max(all_span)
2525 else:
2528 else:
2526 distance = deltainfo.distance
2529 distance = deltainfo.distance
2527
2530
2528 textlen = revinfo.textlen
2531 textlen = revinfo.textlen
2529 defaultmax = textlen * 4
2532 defaultmax = textlen * 4
2530 maxdist = self._maxdeltachainspan
2533 maxdist = self._maxdeltachainspan
2531 if not maxdist:
2534 if not maxdist:
2532 maxdist = distance # ensure the conditional pass
2535 maxdist = distance # ensure the conditional pass
2533 maxdist = max(maxdist, defaultmax)
2536 maxdist = max(maxdist, defaultmax)
2534 if self._sparserevlog and maxdist < self._srmingapsize:
2537 if self._sparserevlog and maxdist < self._srmingapsize:
2535 # In multiple place, we are ignoring irrelevant data range below a
2538 # In multiple place, we are ignoring irrelevant data range below a
2536 # certain size. Be also apply this tradeoff here and relax span
2539 # certain size. Be also apply this tradeoff here and relax span
2537 # constraint for small enought content.
2540 # constraint for small enought content.
2538 maxdist = self._srmingapsize
2541 maxdist = self._srmingapsize
2539
2542
2540 # Bad delta from read span:
2543 # Bad delta from read span:
2541 #
2544 #
2542 # If the span of data read is larger than the maximum allowed.
2545 # If the span of data read is larger than the maximum allowed.
2543 if maxdist < distance:
2546 if maxdist < distance:
2544 return False
2547 return False
2545
2548
2546 # Bad delta from new delta size:
2549 # Bad delta from new delta size:
2547 #
2550 #
2548 # If the delta size is larger than the target text, storing the
2551 # If the delta size is larger than the target text, storing the
2549 # delta will be inefficient.
2552 # delta will be inefficient.
2550 if textlen < deltainfo.deltalen:
2553 if textlen < deltainfo.deltalen:
2551 return False
2554 return False
2552
2555
2553 # Bad delta from cumulated payload size:
2556 # Bad delta from cumulated payload size:
2554 #
2557 #
2555 # If the sum of delta get larger than K * target text length.
2558 # If the sum of delta get larger than K * target text length.
2556 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2559 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2557 return False
2560 return False
2558
2561
2559 # Bad delta from chain length:
2562 # Bad delta from chain length:
2560 #
2563 #
2561 # If the number of delta in the chain gets too high.
2564 # If the number of delta in the chain gets too high.
2562 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2565 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2563 return False
2566 return False
2564
2567
2565 # bad delta from intermediate snapshot size limit
2568 # bad delta from intermediate snapshot size limit
2566 #
2569 #
2567 # If an intermediate snapshot size is higher than the limit. The
2570 # If an intermediate snapshot size is higher than the limit. The
2568 # limit exist to prevent endless chain of intermediate delta to be
2571 # limit exist to prevent endless chain of intermediate delta to be
2569 # created.
2572 # created.
2570 if (deltainfo.snapshotdepth is not None and
2573 if (deltainfo.snapshotdepth is not None and
2571 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2574 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2572 return False
2575 return False
2573
2576
2574 # bad delta if new intermediate snapshot is larger than the previous
2577 # bad delta if new intermediate snapshot is larger than the previous
2575 # snapshot
2578 # snapshot
2576 if (deltainfo.snapshotdepth
2579 if (deltainfo.snapshotdepth
2577 and self.length(deltainfo.base) < deltainfo.deltalen):
2580 and self.length(deltainfo.base) < deltainfo.deltalen):
2578 return False
2581 return False
2579
2582
2580 return True
2583 return True
2581
2584
2582 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2585 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2583 cachedelta, ifh, dfh, alwayscache=False,
2586 cachedelta, ifh, dfh, alwayscache=False,
2584 deltacomputer=None):
2587 deltacomputer=None):
2585 """internal function to add revisions to the log
2588 """internal function to add revisions to the log
2586
2589
2587 see addrevision for argument descriptions.
2590 see addrevision for argument descriptions.
2588
2591
2589 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2592 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2590
2593
2591 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2594 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2592 be used.
2595 be used.
2593
2596
2594 invariants:
2597 invariants:
2595 - rawtext is optional (can be None); if not set, cachedelta must be set.
2598 - rawtext is optional (can be None); if not set, cachedelta must be set.
2596 if both are set, they must correspond to each other.
2599 if both are set, they must correspond to each other.
2597 """
2600 """
2598 if node == nullid:
2601 if node == nullid:
2599 raise RevlogError(_("%s: attempt to add null revision") %
2602 raise RevlogError(_("%s: attempt to add null revision") %
2600 (self.indexfile))
2603 (self.indexfile))
2601 if node == wdirid or node in wdirfilenodeids:
2604 if node == wdirid or node in wdirfilenodeids:
2602 raise RevlogError(_("%s: attempt to add wdir revision") %
2605 raise RevlogError(_("%s: attempt to add wdir revision") %
2603 (self.indexfile))
2606 (self.indexfile))
2604
2607
2605 if self._inline:
2608 if self._inline:
2606 fh = ifh
2609 fh = ifh
2607 else:
2610 else:
2608 fh = dfh
2611 fh = dfh
2609
2612
2610 btext = [rawtext]
2613 btext = [rawtext]
2611
2614
2612 curr = len(self)
2615 curr = len(self)
2613 prev = curr - 1
2616 prev = curr - 1
2614 offset = self.end(prev)
2617 offset = self.end(prev)
2615 p1r, p2r = self.rev(p1), self.rev(p2)
2618 p1r, p2r = self.rev(p1), self.rev(p2)
2616
2619
2617 # full versions are inserted when the needed deltas
2620 # full versions are inserted when the needed deltas
2618 # become comparable to the uncompressed text
2621 # become comparable to the uncompressed text
2619 if rawtext is None:
2622 if rawtext is None:
2620 # need rawtext size, before changed by flag processors, which is
2623 # need rawtext size, before changed by flag processors, which is
2621 # the non-raw size. use revlog explicitly to avoid filelog's extra
2624 # the non-raw size. use revlog explicitly to avoid filelog's extra
2622 # logic that might remove metadata size.
2625 # logic that might remove metadata size.
2623 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2626 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2624 cachedelta[1])
2627 cachedelta[1])
2625 else:
2628 else:
2626 textlen = len(rawtext)
2629 textlen = len(rawtext)
2627
2630
2628 if deltacomputer is None:
2631 if deltacomputer is None:
2629 deltacomputer = _deltacomputer(self)
2632 deltacomputer = _deltacomputer(self)
2630
2633
2631 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2634 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2632
2635
2633 # no delta for flag processor revision (see "candelta" for why)
2636 # no delta for flag processor revision (see "candelta" for why)
2634 # not calling candelta since only one revision needs test, also to
2637 # not calling candelta since only one revision needs test, also to
2635 # avoid overhead fetching flags again.
2638 # avoid overhead fetching flags again.
2636 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2639 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2637 deltainfo = None
2640 deltainfo = None
2638 else:
2641 else:
2639 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2642 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2640
2643
2641 if deltainfo is not None:
2644 if deltainfo is not None:
2642 base = deltainfo.base
2645 base = deltainfo.base
2643 chainbase = deltainfo.chainbase
2646 chainbase = deltainfo.chainbase
2644 data = deltainfo.data
2647 data = deltainfo.data
2645 l = deltainfo.deltalen
2648 l = deltainfo.deltalen
2646 else:
2649 else:
2647 rawtext = deltacomputer.buildtext(revinfo, fh)
2650 rawtext = deltacomputer.buildtext(revinfo, fh)
2648 data = self.compress(rawtext)
2651 data = self.compress(rawtext)
2649 l = len(data[1]) + len(data[0])
2652 l = len(data[1]) + len(data[0])
2650 base = chainbase = curr
2653 base = chainbase = curr
2651
2654
2652 e = (offset_type(offset, flags), l, textlen,
2655 e = (offset_type(offset, flags), l, textlen,
2653 base, link, p1r, p2r, node)
2656 base, link, p1r, p2r, node)
2654 self.index.append(e)
2657 self.index.append(e)
2655 self.nodemap[node] = curr
2658 self.nodemap[node] = curr
2656
2659
2657 entry = self._io.packentry(e, self.node, self.version, curr)
2660 entry = self._io.packentry(e, self.node, self.version, curr)
2658 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2661 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2659
2662
2660 if alwayscache and rawtext is None:
2663 if alwayscache and rawtext is None:
2661 rawtext = deltacomputer._buildtext(revinfo, fh)
2664 rawtext = deltacomputer._buildtext(revinfo, fh)
2662
2665
2663 if type(rawtext) == bytes: # only accept immutable objects
2666 if type(rawtext) == bytes: # only accept immutable objects
2664 self._cache = (node, curr, rawtext)
2667 self._cache = (node, curr, rawtext)
2665 self._chainbasecache[curr] = chainbase
2668 self._chainbasecache[curr] = chainbase
2666 return node
2669 return node
2667
2670
2668 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2671 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2669 # Files opened in a+ mode have inconsistent behavior on various
2672 # Files opened in a+ mode have inconsistent behavior on various
2670 # platforms. Windows requires that a file positioning call be made
2673 # platforms. Windows requires that a file positioning call be made
2671 # when the file handle transitions between reads and writes. See
2674 # when the file handle transitions between reads and writes. See
2672 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2675 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2673 # platforms, Python or the platform itself can be buggy. Some versions
2676 # platforms, Python or the platform itself can be buggy. Some versions
2674 # of Solaris have been observed to not append at the end of the file
2677 # of Solaris have been observed to not append at the end of the file
2675 # if the file was seeked to before the end. See issue4943 for more.
2678 # if the file was seeked to before the end. See issue4943 for more.
2676 #
2679 #
2677 # We work around this issue by inserting a seek() before writing.
2680 # We work around this issue by inserting a seek() before writing.
2678 # Note: This is likely not necessary on Python 3.
2681 # Note: This is likely not necessary on Python 3.
2679 ifh.seek(0, os.SEEK_END)
2682 ifh.seek(0, os.SEEK_END)
2680 if dfh:
2683 if dfh:
2681 dfh.seek(0, os.SEEK_END)
2684 dfh.seek(0, os.SEEK_END)
2682
2685
2683 curr = len(self) - 1
2686 curr = len(self) - 1
2684 if not self._inline:
2687 if not self._inline:
2685 transaction.add(self.datafile, offset)
2688 transaction.add(self.datafile, offset)
2686 transaction.add(self.indexfile, curr * len(entry))
2689 transaction.add(self.indexfile, curr * len(entry))
2687 if data[0]:
2690 if data[0]:
2688 dfh.write(data[0])
2691 dfh.write(data[0])
2689 dfh.write(data[1])
2692 dfh.write(data[1])
2690 ifh.write(entry)
2693 ifh.write(entry)
2691 else:
2694 else:
2692 offset += curr * self._io.size
2695 offset += curr * self._io.size
2693 transaction.add(self.indexfile, offset, curr)
2696 transaction.add(self.indexfile, offset, curr)
2694 ifh.write(entry)
2697 ifh.write(entry)
2695 ifh.write(data[0])
2698 ifh.write(data[0])
2696 ifh.write(data[1])
2699 ifh.write(data[1])
2697 self._enforceinlinesize(transaction, ifh)
2700 self._enforceinlinesize(transaction, ifh)
2698
2701
2699 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2702 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2700 """
2703 """
2701 add a delta group
2704 add a delta group
2702
2705
2703 given a set of deltas, add them to the revision log. the
2706 given a set of deltas, add them to the revision log. the
2704 first delta is against its parent, which should be in our
2707 first delta is against its parent, which should be in our
2705 log, the rest are against the previous delta.
2708 log, the rest are against the previous delta.
2706
2709
2707 If ``addrevisioncb`` is defined, it will be called with arguments of
2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2708 this revlog and the node that was added.
2711 this revlog and the node that was added.
2709 """
2712 """
2710
2713
2711 nodes = []
2714 nodes = []
2712
2715
2713 r = len(self)
2716 r = len(self)
2714 end = 0
2717 end = 0
2715 if r:
2718 if r:
2716 end = self.end(r - 1)
2719 end = self.end(r - 1)
2717 ifh = self._indexfp("a+")
2720 ifh = self._indexfp("a+")
2718 isize = r * self._io.size
2721 isize = r * self._io.size
2719 if self._inline:
2722 if self._inline:
2720 transaction.add(self.indexfile, end + isize, r)
2723 transaction.add(self.indexfile, end + isize, r)
2721 dfh = None
2724 dfh = None
2722 else:
2725 else:
2723 transaction.add(self.indexfile, isize, r)
2726 transaction.add(self.indexfile, isize, r)
2724 transaction.add(self.datafile, end)
2727 transaction.add(self.datafile, end)
2725 dfh = self._datafp("a+")
2728 dfh = self._datafp("a+")
2726 def flush():
2729 def flush():
2727 if dfh:
2730 if dfh:
2728 dfh.flush()
2731 dfh.flush()
2729 ifh.flush()
2732 ifh.flush()
2730 try:
2733 try:
2731 deltacomputer = _deltacomputer(self)
2734 deltacomputer = _deltacomputer(self)
2732 # loop through our set of deltas
2735 # loop through our set of deltas
2733 for data in deltas:
2736 for data in deltas:
2734 node, p1, p2, linknode, deltabase, delta, flags = data
2737 node, p1, p2, linknode, deltabase, delta, flags = data
2735 link = linkmapper(linknode)
2738 link = linkmapper(linknode)
2736 flags = flags or REVIDX_DEFAULT_FLAGS
2739 flags = flags or REVIDX_DEFAULT_FLAGS
2737
2740
2738 nodes.append(node)
2741 nodes.append(node)
2739
2742
2740 if node in self.nodemap:
2743 if node in self.nodemap:
2741 # this can happen if two branches make the same change
2744 # this can happen if two branches make the same change
2742 continue
2745 continue
2743
2746
2744 for p in (p1, p2):
2747 for p in (p1, p2):
2745 if p not in self.nodemap:
2748 if p not in self.nodemap:
2746 raise LookupError(p, self.indexfile,
2749 raise LookupError(p, self.indexfile,
2747 _('unknown parent'))
2750 _('unknown parent'))
2748
2751
2749 if deltabase not in self.nodemap:
2752 if deltabase not in self.nodemap:
2750 raise LookupError(deltabase, self.indexfile,
2753 raise LookupError(deltabase, self.indexfile,
2751 _('unknown delta base'))
2754 _('unknown delta base'))
2752
2755
2753 baserev = self.rev(deltabase)
2756 baserev = self.rev(deltabase)
2754
2757
2755 if baserev != nullrev and self.iscensored(baserev):
2758 if baserev != nullrev and self.iscensored(baserev):
2756 # if base is censored, delta must be full replacement in a
2759 # if base is censored, delta must be full replacement in a
2757 # single patch operation
2760 # single patch operation
2758 hlen = struct.calcsize(">lll")
2761 hlen = struct.calcsize(">lll")
2759 oldlen = self.rawsize(baserev)
2762 oldlen = self.rawsize(baserev)
2760 newlen = len(delta) - hlen
2763 newlen = len(delta) - hlen
2761 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2764 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2762 raise error.CensoredBaseError(self.indexfile,
2765 raise error.CensoredBaseError(self.indexfile,
2763 self.node(baserev))
2766 self.node(baserev))
2764
2767
2765 if not flags and self._peek_iscensored(baserev, delta, flush):
2768 if not flags and self._peek_iscensored(baserev, delta, flush):
2766 flags |= REVIDX_ISCENSORED
2769 flags |= REVIDX_ISCENSORED
2767
2770
2768 # We assume consumers of addrevisioncb will want to retrieve
2771 # We assume consumers of addrevisioncb will want to retrieve
2769 # the added revision, which will require a call to
2772 # the added revision, which will require a call to
2770 # revision(). revision() will fast path if there is a cache
2773 # revision(). revision() will fast path if there is a cache
2771 # hit. So, we tell _addrevision() to always cache in this case.
2774 # hit. So, we tell _addrevision() to always cache in this case.
2772 # We're only using addgroup() in the context of changegroup
2775 # We're only using addgroup() in the context of changegroup
2773 # generation so the revision data can always be handled as raw
2776 # generation so the revision data can always be handled as raw
2774 # by the flagprocessor.
2777 # by the flagprocessor.
2775 self._addrevision(node, None, transaction, link,
2778 self._addrevision(node, None, transaction, link,
2776 p1, p2, flags, (baserev, delta),
2779 p1, p2, flags, (baserev, delta),
2777 ifh, dfh,
2780 ifh, dfh,
2778 alwayscache=bool(addrevisioncb),
2781 alwayscache=bool(addrevisioncb),
2779 deltacomputer=deltacomputer)
2782 deltacomputer=deltacomputer)
2780
2783
2781 if addrevisioncb:
2784 if addrevisioncb:
2782 addrevisioncb(self, node)
2785 addrevisioncb(self, node)
2783
2786
2784 if not dfh and not self._inline:
2787 if not dfh and not self._inline:
2785 # addrevision switched from inline to conventional
2788 # addrevision switched from inline to conventional
2786 # reopen the index
2789 # reopen the index
2787 ifh.close()
2790 ifh.close()
2788 dfh = self._datafp("a+")
2791 dfh = self._datafp("a+")
2789 ifh = self._indexfp("a+")
2792 ifh = self._indexfp("a+")
2790 finally:
2793 finally:
2791 if dfh:
2794 if dfh:
2792 dfh.close()
2795 dfh.close()
2793 ifh.close()
2796 ifh.close()
2794
2797
2795 return nodes
2798 return nodes
2796
2799
2797 def iscensored(self, rev):
2800 def iscensored(self, rev):
2798 """Check if a file revision is censored."""
2801 """Check if a file revision is censored."""
2799 if not self._censorable:
2802 if not self._censorable:
2800 return False
2803 return False
2801
2804
2802 return self.flags(rev) & REVIDX_ISCENSORED
2805 return self.flags(rev) & REVIDX_ISCENSORED
2803
2806
2804 def _peek_iscensored(self, baserev, delta, flush):
2807 def _peek_iscensored(self, baserev, delta, flush):
2805 """Quickly check if a delta produces a censored revision."""
2808 """Quickly check if a delta produces a censored revision."""
2806 if not self._censorable:
2809 if not self._censorable:
2807 return False
2810 return False
2808
2811
2809 # Fragile heuristic: unless new file meta keys are added alphabetically
2812 # Fragile heuristic: unless new file meta keys are added alphabetically
2810 # preceding "censored", all censored revisions are prefixed by
2813 # preceding "censored", all censored revisions are prefixed by
2811 # "\1\ncensored:". A delta producing such a censored revision must be a
2814 # "\1\ncensored:". A delta producing such a censored revision must be a
2812 # full-replacement delta, so we inspect the first and only patch in the
2815 # full-replacement delta, so we inspect the first and only patch in the
2813 # delta for this prefix.
2816 # delta for this prefix.
2814 hlen = struct.calcsize(">lll")
2817 hlen = struct.calcsize(">lll")
2815 if len(delta) <= hlen:
2818 if len(delta) <= hlen:
2816 return False
2819 return False
2817
2820
2818 oldlen = self.rawsize(baserev)
2821 oldlen = self.rawsize(baserev)
2819 newlen = len(delta) - hlen
2822 newlen = len(delta) - hlen
2820 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2823 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2821 return False
2824 return False
2822
2825
2823 add = "\1\ncensored:"
2826 add = "\1\ncensored:"
2824 addlen = len(add)
2827 addlen = len(add)
2825 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2828 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2826
2829
2827 def getstrippoint(self, minlink):
2830 def getstrippoint(self, minlink):
2828 """find the minimum rev that must be stripped to strip the linkrev
2831 """find the minimum rev that must be stripped to strip the linkrev
2829
2832
2830 Returns a tuple containing the minimum rev and a set of all revs that
2833 Returns a tuple containing the minimum rev and a set of all revs that
2831 have linkrevs that will be broken by this strip.
2834 have linkrevs that will be broken by this strip.
2832 """
2835 """
2833 brokenrevs = set()
2836 brokenrevs = set()
2834 strippoint = len(self)
2837 strippoint = len(self)
2835
2838
2836 heads = {}
2839 heads = {}
2837 futurelargelinkrevs = set()
2840 futurelargelinkrevs = set()
2838 for head in self.headrevs():
2841 for head in self.headrevs():
2839 headlinkrev = self.linkrev(head)
2842 headlinkrev = self.linkrev(head)
2840 heads[head] = headlinkrev
2843 heads[head] = headlinkrev
2841 if headlinkrev >= minlink:
2844 if headlinkrev >= minlink:
2842 futurelargelinkrevs.add(headlinkrev)
2845 futurelargelinkrevs.add(headlinkrev)
2843
2846
2844 # This algorithm involves walking down the rev graph, starting at the
2847 # This algorithm involves walking down the rev graph, starting at the
2845 # heads. Since the revs are topologically sorted according to linkrev,
2848 # heads. Since the revs are topologically sorted according to linkrev,
2846 # once all head linkrevs are below the minlink, we know there are
2849 # once all head linkrevs are below the minlink, we know there are
2847 # no more revs that could have a linkrev greater than minlink.
2850 # no more revs that could have a linkrev greater than minlink.
2848 # So we can stop walking.
2851 # So we can stop walking.
2849 while futurelargelinkrevs:
2852 while futurelargelinkrevs:
2850 strippoint -= 1
2853 strippoint -= 1
2851 linkrev = heads.pop(strippoint)
2854 linkrev = heads.pop(strippoint)
2852
2855
2853 if linkrev < minlink:
2856 if linkrev < minlink:
2854 brokenrevs.add(strippoint)
2857 brokenrevs.add(strippoint)
2855 else:
2858 else:
2856 futurelargelinkrevs.remove(linkrev)
2859 futurelargelinkrevs.remove(linkrev)
2857
2860
2858 for p in self.parentrevs(strippoint):
2861 for p in self.parentrevs(strippoint):
2859 if p != nullrev:
2862 if p != nullrev:
2860 plinkrev = self.linkrev(p)
2863 plinkrev = self.linkrev(p)
2861 heads[p] = plinkrev
2864 heads[p] = plinkrev
2862 if plinkrev >= minlink:
2865 if plinkrev >= minlink:
2863 futurelargelinkrevs.add(plinkrev)
2866 futurelargelinkrevs.add(plinkrev)
2864
2867
2865 return strippoint, brokenrevs
2868 return strippoint, brokenrevs
2866
2869
2867 def strip(self, minlink, transaction):
2870 def strip(self, minlink, transaction):
2868 """truncate the revlog on the first revision with a linkrev >= minlink
2871 """truncate the revlog on the first revision with a linkrev >= minlink
2869
2872
2870 This function is called when we're stripping revision minlink and
2873 This function is called when we're stripping revision minlink and
2871 its descendants from the repository.
2874 its descendants from the repository.
2872
2875
2873 We have to remove all revisions with linkrev >= minlink, because
2876 We have to remove all revisions with linkrev >= minlink, because
2874 the equivalent changelog revisions will be renumbered after the
2877 the equivalent changelog revisions will be renumbered after the
2875 strip.
2878 strip.
2876
2879
2877 So we truncate the revlog on the first of these revisions, and
2880 So we truncate the revlog on the first of these revisions, and
2878 trust that the caller has saved the revisions that shouldn't be
2881 trust that the caller has saved the revisions that shouldn't be
2879 removed and that it'll re-add them after this truncation.
2882 removed and that it'll re-add them after this truncation.
2880 """
2883 """
2881 if len(self) == 0:
2884 if len(self) == 0:
2882 return
2885 return
2883
2886
2884 rev, _ = self.getstrippoint(minlink)
2887 rev, _ = self.getstrippoint(minlink)
2885 if rev == len(self):
2888 if rev == len(self):
2886 return
2889 return
2887
2890
2888 # first truncate the files on disk
2891 # first truncate the files on disk
2889 end = self.start(rev)
2892 end = self.start(rev)
2890 if not self._inline:
2893 if not self._inline:
2891 transaction.add(self.datafile, end)
2894 transaction.add(self.datafile, end)
2892 end = rev * self._io.size
2895 end = rev * self._io.size
2893 else:
2896 else:
2894 end += rev * self._io.size
2897 end += rev * self._io.size
2895
2898
2896 transaction.add(self.indexfile, end)
2899 transaction.add(self.indexfile, end)
2897
2900
2898 # then reset internal state in memory to forget those revisions
2901 # then reset internal state in memory to forget those revisions
2899 self._cache = None
2902 self._cache = None
2900 self._chaininfocache = {}
2903 self._chaininfocache = {}
2901 self._chunkclear()
2904 self._chunkclear()
2902 for x in pycompat.xrange(rev, len(self)):
2905 for x in pycompat.xrange(rev, len(self)):
2903 del self.nodemap[self.node(x)]
2906 del self.nodemap[self.node(x)]
2904
2907
2905 del self.index[rev:-1]
2908 del self.index[rev:-1]
2906 self._nodepos = None
2909 self._nodepos = None
2907
2910
2908 def checksize(self):
2911 def checksize(self):
2909 expected = 0
2912 expected = 0
2910 if len(self):
2913 if len(self):
2911 expected = max(0, self.end(len(self) - 1))
2914 expected = max(0, self.end(len(self) - 1))
2912
2915
2913 try:
2916 try:
2914 with self._datafp() as f:
2917 with self._datafp() as f:
2915 f.seek(0, 2)
2918 f.seek(0, 2)
2916 actual = f.tell()
2919 actual = f.tell()
2917 dd = actual - expected
2920 dd = actual - expected
2918 except IOError as inst:
2921 except IOError as inst:
2919 if inst.errno != errno.ENOENT:
2922 if inst.errno != errno.ENOENT:
2920 raise
2923 raise
2921 dd = 0
2924 dd = 0
2922
2925
2923 try:
2926 try:
2924 f = self.opener(self.indexfile)
2927 f = self.opener(self.indexfile)
2925 f.seek(0, 2)
2928 f.seek(0, 2)
2926 actual = f.tell()
2929 actual = f.tell()
2927 f.close()
2930 f.close()
2928 s = self._io.size
2931 s = self._io.size
2929 i = max(0, actual // s)
2932 i = max(0, actual // s)
2930 di = actual - (i * s)
2933 di = actual - (i * s)
2931 if self._inline:
2934 if self._inline:
2932 databytes = 0
2935 databytes = 0
2933 for r in self:
2936 for r in self:
2934 databytes += max(0, self.length(r))
2937 databytes += max(0, self.length(r))
2935 dd = 0
2938 dd = 0
2936 di = actual - len(self) * s - databytes
2939 di = actual - len(self) * s - databytes
2937 except IOError as inst:
2940 except IOError as inst:
2938 if inst.errno != errno.ENOENT:
2941 if inst.errno != errno.ENOENT:
2939 raise
2942 raise
2940 di = 0
2943 di = 0
2941
2944
2942 return (dd, di)
2945 return (dd, di)
2943
2946
2944 def files(self):
2947 def files(self):
2945 res = [self.indexfile]
2948 res = [self.indexfile]
2946 if not self._inline:
2949 if not self._inline:
2947 res.append(self.datafile)
2950 res.append(self.datafile)
2948 return res
2951 return res
2949
2952
2950 DELTAREUSEALWAYS = 'always'
2953 DELTAREUSEALWAYS = 'always'
2951 DELTAREUSESAMEREVS = 'samerevs'
2954 DELTAREUSESAMEREVS = 'samerevs'
2952 DELTAREUSENEVER = 'never'
2955 DELTAREUSENEVER = 'never'
2953
2956
2954 DELTAREUSEFULLADD = 'fulladd'
2957 DELTAREUSEFULLADD = 'fulladd'
2955
2958
2956 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2959 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2957
2960
2958 def clone(self, tr, destrevlog, addrevisioncb=None,
2961 def clone(self, tr, destrevlog, addrevisioncb=None,
2959 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2962 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2960 """Copy this revlog to another, possibly with format changes.
2963 """Copy this revlog to another, possibly with format changes.
2961
2964
2962 The destination revlog will contain the same revisions and nodes.
2965 The destination revlog will contain the same revisions and nodes.
2963 However, it may not be bit-for-bit identical due to e.g. delta encoding
2966 However, it may not be bit-for-bit identical due to e.g. delta encoding
2964 differences.
2967 differences.
2965
2968
2966 The ``deltareuse`` argument control how deltas from the existing revlog
2969 The ``deltareuse`` argument control how deltas from the existing revlog
2967 are preserved in the destination revlog. The argument can have the
2970 are preserved in the destination revlog. The argument can have the
2968 following values:
2971 following values:
2969
2972
2970 DELTAREUSEALWAYS
2973 DELTAREUSEALWAYS
2971 Deltas will always be reused (if possible), even if the destination
2974 Deltas will always be reused (if possible), even if the destination
2972 revlog would not select the same revisions for the delta. This is the
2975 revlog would not select the same revisions for the delta. This is the
2973 fastest mode of operation.
2976 fastest mode of operation.
2974 DELTAREUSESAMEREVS
2977 DELTAREUSESAMEREVS
2975 Deltas will be reused if the destination revlog would pick the same
2978 Deltas will be reused if the destination revlog would pick the same
2976 revisions for the delta. This mode strikes a balance between speed
2979 revisions for the delta. This mode strikes a balance between speed
2977 and optimization.
2980 and optimization.
2978 DELTAREUSENEVER
2981 DELTAREUSENEVER
2979 Deltas will never be reused. This is the slowest mode of execution.
2982 Deltas will never be reused. This is the slowest mode of execution.
2980 This mode can be used to recompute deltas (e.g. if the diff/delta
2983 This mode can be used to recompute deltas (e.g. if the diff/delta
2981 algorithm changes).
2984 algorithm changes).
2982
2985
2983 Delta computation can be slow, so the choice of delta reuse policy can
2986 Delta computation can be slow, so the choice of delta reuse policy can
2984 significantly affect run time.
2987 significantly affect run time.
2985
2988
2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2989 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2987 two extremes. Deltas will be reused if they are appropriate. But if the
2990 two extremes. Deltas will be reused if they are appropriate. But if the
2988 delta could choose a better revision, it will do so. This means if you
2991 delta could choose a better revision, it will do so. This means if you
2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2992 are converting a non-generaldelta revlog to a generaldelta revlog,
2990 deltas will be recomputed if the delta's parent isn't a parent of the
2993 deltas will be recomputed if the delta's parent isn't a parent of the
2991 revision.
2994 revision.
2992
2995
2993 In addition to the delta policy, the ``deltabothparents`` argument
2996 In addition to the delta policy, the ``deltabothparents`` argument
2994 controls whether to compute deltas against both parents for merges.
2997 controls whether to compute deltas against both parents for merges.
2995 By default, the current default is used.
2998 By default, the current default is used.
2996 """
2999 """
2997 if deltareuse not in self.DELTAREUSEALL:
3000 if deltareuse not in self.DELTAREUSEALL:
2998 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3001 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2999
3002
3000 if len(destrevlog):
3003 if len(destrevlog):
3001 raise ValueError(_('destination revlog is not empty'))
3004 raise ValueError(_('destination revlog is not empty'))
3002
3005
3003 if getattr(self, 'filteredrevs', None):
3006 if getattr(self, 'filteredrevs', None):
3004 raise ValueError(_('source revlog has filtered revisions'))
3007 raise ValueError(_('source revlog has filtered revisions'))
3005 if getattr(destrevlog, 'filteredrevs', None):
3008 if getattr(destrevlog, 'filteredrevs', None):
3006 raise ValueError(_('destination revlog has filtered revisions'))
3009 raise ValueError(_('destination revlog has filtered revisions'))
3007
3010
3008 # lazydeltabase controls whether to reuse a cached delta, if possible.
3011 # lazydeltabase controls whether to reuse a cached delta, if possible.
3009 oldlazydeltabase = destrevlog._lazydeltabase
3012 oldlazydeltabase = destrevlog._lazydeltabase
3010 oldamd = destrevlog._deltabothparents
3013 oldamd = destrevlog._deltabothparents
3011
3014
3012 try:
3015 try:
3013 if deltareuse == self.DELTAREUSEALWAYS:
3016 if deltareuse == self.DELTAREUSEALWAYS:
3014 destrevlog._lazydeltabase = True
3017 destrevlog._lazydeltabase = True
3015 elif deltareuse == self.DELTAREUSESAMEREVS:
3018 elif deltareuse == self.DELTAREUSESAMEREVS:
3016 destrevlog._lazydeltabase = False
3019 destrevlog._lazydeltabase = False
3017
3020
3018 destrevlog._deltabothparents = deltabothparents or oldamd
3021 destrevlog._deltabothparents = deltabothparents or oldamd
3019
3022
3020 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3023 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3021 self.DELTAREUSESAMEREVS)
3024 self.DELTAREUSESAMEREVS)
3022
3025
3023 deltacomputer = _deltacomputer(destrevlog)
3026 deltacomputer = _deltacomputer(destrevlog)
3024 index = self.index
3027 index = self.index
3025 for rev in self:
3028 for rev in self:
3026 entry = index[rev]
3029 entry = index[rev]
3027
3030
3028 # Some classes override linkrev to take filtered revs into
3031 # Some classes override linkrev to take filtered revs into
3029 # account. Use raw entry from index.
3032 # account. Use raw entry from index.
3030 flags = entry[0] & 0xffff
3033 flags = entry[0] & 0xffff
3031 linkrev = entry[4]
3034 linkrev = entry[4]
3032 p1 = index[entry[5]][7]
3035 p1 = index[entry[5]][7]
3033 p2 = index[entry[6]][7]
3036 p2 = index[entry[6]][7]
3034 node = entry[7]
3037 node = entry[7]
3035
3038
3036 # (Possibly) reuse the delta from the revlog if allowed and
3039 # (Possibly) reuse the delta from the revlog if allowed and
3037 # the revlog chunk is a delta.
3040 # the revlog chunk is a delta.
3038 cachedelta = None
3041 cachedelta = None
3039 rawtext = None
3042 rawtext = None
3040 if populatecachedelta:
3043 if populatecachedelta:
3041 dp = self.deltaparent(rev)
3044 dp = self.deltaparent(rev)
3042 if dp != nullrev:
3045 if dp != nullrev:
3043 cachedelta = (dp, bytes(self._chunk(rev)))
3046 cachedelta = (dp, bytes(self._chunk(rev)))
3044
3047
3045 if not cachedelta:
3048 if not cachedelta:
3046 rawtext = self.revision(rev, raw=True)
3049 rawtext = self.revision(rev, raw=True)
3047
3050
3048
3051
3049 if deltareuse == self.DELTAREUSEFULLADD:
3052 if deltareuse == self.DELTAREUSEFULLADD:
3050 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3053 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3051 cachedelta=cachedelta,
3054 cachedelta=cachedelta,
3052 node=node, flags=flags,
3055 node=node, flags=flags,
3053 deltacomputer=deltacomputer)
3056 deltacomputer=deltacomputer)
3054 else:
3057 else:
3055 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3058 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3056 checkambig=False)
3059 checkambig=False)
3057 dfh = None
3060 dfh = None
3058 if not destrevlog._inline:
3061 if not destrevlog._inline:
3059 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3062 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3060 try:
3063 try:
3061 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3064 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3062 p2, flags, cachedelta, ifh, dfh,
3065 p2, flags, cachedelta, ifh, dfh,
3063 deltacomputer=deltacomputer)
3066 deltacomputer=deltacomputer)
3064 finally:
3067 finally:
3065 if dfh:
3068 if dfh:
3066 dfh.close()
3069 dfh.close()
3067 ifh.close()
3070 ifh.close()
3068
3071
3069 if addrevisioncb:
3072 if addrevisioncb:
3070 addrevisioncb(self, rev, node)
3073 addrevisioncb(self, rev, node)
3071 finally:
3074 finally:
3072 destrevlog._lazydeltabase = oldlazydeltabase
3075 destrevlog._lazydeltabase = oldlazydeltabase
3073 destrevlog._deltabothparents = oldamd
3076 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now