##// END OF EJS Templates
revlog: fix typo in 'buildtext' name...
Boris Feld -
r39228:dbc5ead9 default
parent child Browse files
Show More
@@ -1,3076 +1,3076
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullhex,
30 nullhex,
31 nullid,
31 nullid,
32 nullrev,
32 nullrev,
33 wdirfilenodeids,
33 wdirfilenodeids,
34 wdirhex,
34 wdirhex,
35 wdirid,
35 wdirid,
36 wdirrev,
36 wdirrev,
37 )
37 )
38 from .i18n import _
38 from .i18n import _
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from . import (
42 from . import (
43 ancestor,
43 ancestor,
44 error,
44 error,
45 mdiff,
45 mdiff,
46 policy,
46 policy,
47 pycompat,
47 pycompat,
48 templatefilters,
48 templatefilters,
49 util,
49 util,
50 )
50 )
51 from .utils import (
51 from .utils import (
52 stringutil,
52 stringutil,
53 )
53 )
54
54
55 parsers = policy.importmod(r'parsers')
55 parsers = policy.importmod(r'parsers')
56
56
57 # Aliased for performance.
57 # Aliased for performance.
58 _zlibdecompress = zlib.decompress
58 _zlibdecompress = zlib.decompress
59
59
60 # revlog header flags
60 # revlog header flags
61 REVLOGV0 = 0
61 REVLOGV0 = 0
62 REVLOGV1 = 1
62 REVLOGV1 = 1
63 # Dummy value until file format is finalized.
63 # Dummy value until file format is finalized.
64 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 # Reminder: change the bounds check in revlog.__init__ when this is changed.
65 REVLOGV2 = 0xDEAD
65 REVLOGV2 = 0xDEAD
66 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_INLINE_DATA = (1 << 16)
67 FLAG_GENERALDELTA = (1 << 17)
67 FLAG_GENERALDELTA = (1 << 17)
68 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
69 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_FORMAT = REVLOGV1
70 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
71 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
72 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 REVLOGV2_FLAGS = REVLOGV1_FLAGS
73
73
74 # revlog index flags
74 # revlog index flags
75 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
76 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
77 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
78 REVIDX_DEFAULT_FLAGS = 0
78 REVIDX_DEFAULT_FLAGS = 0
79 # stable order in which flags need to be processed and their processors applied
79 # stable order in which flags need to be processed and their processors applied
80 REVIDX_FLAGS_ORDER = [
80 REVIDX_FLAGS_ORDER = [
81 REVIDX_ISCENSORED,
81 REVIDX_ISCENSORED,
82 REVIDX_ELLIPSIS,
82 REVIDX_ELLIPSIS,
83 REVIDX_EXTSTORED,
83 REVIDX_EXTSTORED,
84 ]
84 ]
85 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
86 # bitmark for flags that could cause rawdata content change
86 # bitmark for flags that could cause rawdata content change
87 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
88
88
89 # max size of revlog with inline data
89 # max size of revlog with inline data
90 _maxinline = 131072
90 _maxinline = 131072
91 _chunksize = 1048576
91 _chunksize = 1048576
92
92
93 RevlogError = error.RevlogError
93 RevlogError = error.RevlogError
94 LookupError = error.LookupError
94 LookupError = error.LookupError
95 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
96 CensoredNodeError = error.CensoredNodeError
96 CensoredNodeError = error.CensoredNodeError
97 ProgrammingError = error.ProgrammingError
97 ProgrammingError = error.ProgrammingError
98
98
99 # Store flag processors (cf. 'addflagprocessor()' to register)
99 # Store flag processors (cf. 'addflagprocessor()' to register)
100 _flagprocessors = {
100 _flagprocessors = {
101 REVIDX_ISCENSORED: None,
101 REVIDX_ISCENSORED: None,
102 }
102 }
103
103
104 _mdre = re.compile('\1\n')
104 _mdre = re.compile('\1\n')
105 def parsemeta(text):
105 def parsemeta(text):
106 """return (metadatadict, metadatasize)"""
106 """return (metadatadict, metadatasize)"""
107 # text can be buffer, so we can't use .startswith or .index
107 # text can be buffer, so we can't use .startswith or .index
108 if text[:2] != '\1\n':
108 if text[:2] != '\1\n':
109 return None, None
109 return None, None
110 s = _mdre.search(text, 2).start()
110 s = _mdre.search(text, 2).start()
111 mtext = text[2:s]
111 mtext = text[2:s]
112 meta = {}
112 meta = {}
113 for l in mtext.splitlines():
113 for l in mtext.splitlines():
114 k, v = l.split(": ", 1)
114 k, v = l.split(": ", 1)
115 meta[k] = v
115 meta[k] = v
116 return meta, (s + 2)
116 return meta, (s + 2)
117
117
118 def packmeta(meta, text):
118 def packmeta(meta, text):
119 keys = sorted(meta)
119 keys = sorted(meta)
120 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
121 return "\1\n%s\1\n%s" % (metatext, text)
121 return "\1\n%s\1\n%s" % (metatext, text)
122
122
123 def _censoredtext(text):
123 def _censoredtext(text):
124 m, offs = parsemeta(text)
124 m, offs = parsemeta(text)
125 return m and "censored" in m
125 return m and "censored" in m
126
126
127 def addflagprocessor(flag, processor):
127 def addflagprocessor(flag, processor):
128 """Register a flag processor on a revision data flag.
128 """Register a flag processor on a revision data flag.
129
129
130 Invariant:
130 Invariant:
131 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
132 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
133 - Only one flag processor can be registered on a specific flag.
133 - Only one flag processor can be registered on a specific flag.
134 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
135 following signatures:
135 following signatures:
136 - (read) f(self, rawtext) -> text, bool
136 - (read) f(self, rawtext) -> text, bool
137 - (write) f(self, text) -> rawtext, bool
137 - (write) f(self, text) -> rawtext, bool
138 - (raw) f(self, rawtext) -> bool
138 - (raw) f(self, rawtext) -> bool
139 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 "text" is presented to the user. "rawtext" is stored in revlog data, not
140 directly visible to the user.
140 directly visible to the user.
141 The boolean returned by these transforms is used to determine whether
141 The boolean returned by these transforms is used to determine whether
142 the returned text can be used for hash integrity checking. For example,
142 the returned text can be used for hash integrity checking. For example,
143 if "write" returns False, then "text" is used to generate hash. If
143 if "write" returns False, then "text" is used to generate hash. If
144 "write" returns True, that basically means "rawtext" returned by "write"
144 "write" returns True, that basically means "rawtext" returned by "write"
145 should be used to generate hash. Usually, "write" and "read" return
145 should be used to generate hash. Usually, "write" and "read" return
146 different booleans. And "raw" returns a same boolean as "write".
146 different booleans. And "raw" returns a same boolean as "write".
147
147
148 Note: The 'raw' transform is used for changegroup generation and in some
148 Note: The 'raw' transform is used for changegroup generation and in some
149 debug commands. In this case the transform only indicates whether the
149 debug commands. In this case the transform only indicates whether the
150 contents can be used for hash integrity checks.
150 contents can be used for hash integrity checks.
151 """
151 """
152 if not flag & REVIDX_KNOWN_FLAGS:
152 if not flag & REVIDX_KNOWN_FLAGS:
153 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
154 raise ProgrammingError(msg)
154 raise ProgrammingError(msg)
155 if flag not in REVIDX_FLAGS_ORDER:
155 if flag not in REVIDX_FLAGS_ORDER:
156 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
157 raise ProgrammingError(msg)
157 raise ProgrammingError(msg)
158 if flag in _flagprocessors:
158 if flag in _flagprocessors:
159 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
160 raise error.Abort(msg)
160 raise error.Abort(msg)
161 _flagprocessors[flag] = processor
161 _flagprocessors[flag] = processor
162
162
163 def getoffset(q):
163 def getoffset(q):
164 return int(q >> 16)
164 return int(q >> 16)
165
165
166 def gettype(q):
166 def gettype(q):
167 return int(q & 0xFFFF)
167 return int(q & 0xFFFF)
168
168
169 def offset_type(offset, type):
169 def offset_type(offset, type):
170 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
171 raise ValueError('unknown revlog index flags')
171 raise ValueError('unknown revlog index flags')
172 return int(int(offset) << 16 | type)
172 return int(int(offset) << 16 | type)
173
173
174 _nullhash = hashlib.sha1(nullid)
174 _nullhash = hashlib.sha1(nullid)
175
175
176 def hash(text, p1, p2):
176 def hash(text, p1, p2):
177 """generate a hash from the given text and its parent hashes
177 """generate a hash from the given text and its parent hashes
178
178
179 This hash combines both the current file contents and its history
179 This hash combines both the current file contents and its history
180 in a manner that makes it easy to distinguish nodes with the same
180 in a manner that makes it easy to distinguish nodes with the same
181 content in the revision graph.
181 content in the revision graph.
182 """
182 """
183 # As of now, if one of the parent node is null, p2 is null
183 # As of now, if one of the parent node is null, p2 is null
184 if p2 == nullid:
184 if p2 == nullid:
185 # deep copy of a hash is faster than creating one
185 # deep copy of a hash is faster than creating one
186 s = _nullhash.copy()
186 s = _nullhash.copy()
187 s.update(p1)
187 s.update(p1)
188 else:
188 else:
189 # none of the parent nodes are nullid
189 # none of the parent nodes are nullid
190 if p1 < p2:
190 if p1 < p2:
191 a = p1
191 a = p1
192 b = p2
192 b = p2
193 else:
193 else:
194 a = p2
194 a = p2
195 b = p1
195 b = p1
196 s = hashlib.sha1(a)
196 s = hashlib.sha1(a)
197 s.update(b)
197 s.update(b)
198 s.update(text)
198 s.update(text)
199 return s.digest()
199 return s.digest()
200
200
201 class _testrevlog(object):
201 class _testrevlog(object):
202 """minimalist fake revlog to use in doctests"""
202 """minimalist fake revlog to use in doctests"""
203
203
204 def __init__(self, data, density=0.5, mingap=0):
204 def __init__(self, data, density=0.5, mingap=0):
205 """data is an list of revision payload boundaries"""
205 """data is an list of revision payload boundaries"""
206 self._data = data
206 self._data = data
207 self._srdensitythreshold = density
207 self._srdensitythreshold = density
208 self._srmingapsize = mingap
208 self._srmingapsize = mingap
209
209
210 def start(self, rev):
210 def start(self, rev):
211 if rev == 0:
211 if rev == 0:
212 return 0
212 return 0
213 return self._data[rev - 1]
213 return self._data[rev - 1]
214
214
215 def end(self, rev):
215 def end(self, rev):
216 return self._data[rev]
216 return self._data[rev]
217
217
218 def length(self, rev):
218 def length(self, rev):
219 return self.end(rev) - self.start(rev)
219 return self.end(rev) - self.start(rev)
220
220
221 def __len__(self):
221 def __len__(self):
222 return len(self._data)
222 return len(self._data)
223
223
224 def _trimchunk(revlog, revs, startidx, endidx=None):
224 def _trimchunk(revlog, revs, startidx, endidx=None):
225 """returns revs[startidx:endidx] without empty trailing revs
225 """returns revs[startidx:endidx] without empty trailing revs
226
226
227 Doctest Setup
227 Doctest Setup
228 >>> revlog = _testrevlog([
228 >>> revlog = _testrevlog([
229 ... 5, #0
229 ... 5, #0
230 ... 10, #1
230 ... 10, #1
231 ... 12, #2
231 ... 12, #2
232 ... 12, #3 (empty)
232 ... 12, #3 (empty)
233 ... 17, #4
233 ... 17, #4
234 ... 21, #5
234 ... 21, #5
235 ... 21, #6 (empty)
235 ... 21, #6 (empty)
236 ... ])
236 ... ])
237
237
238 Contiguous cases:
238 Contiguous cases:
239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
240 [0, 1, 2, 3, 4, 5]
240 [0, 1, 2, 3, 4, 5]
241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
242 [0, 1, 2, 3, 4]
242 [0, 1, 2, 3, 4]
243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
244 [0, 1, 2]
244 [0, 1, 2]
245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
246 [2]
246 [2]
247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
248 [3, 4, 5]
248 [3, 4, 5]
249 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
250 [3, 4]
250 [3, 4]
251
251
252 Discontiguous cases:
252 Discontiguous cases:
253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
254 [1, 3, 5]
254 [1, 3, 5]
255 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
256 [1]
256 [1]
257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
258 [3, 5]
258 [3, 5]
259 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
260 [3, 5]
260 [3, 5]
261 """
261 """
262 length = revlog.length
262 length = revlog.length
263
263
264 if endidx is None:
264 if endidx is None:
265 endidx = len(revs)
265 endidx = len(revs)
266
266
267 # If we have a non-emtpy delta candidate, there are nothing to trim
267 # If we have a non-emtpy delta candidate, there are nothing to trim
268 if revs[endidx - 1] < len(revlog):
268 if revs[endidx - 1] < len(revlog):
269 # Trim empty revs at the end, except the very first revision of a chain
269 # Trim empty revs at the end, except the very first revision of a chain
270 while (endidx > 1
270 while (endidx > 1
271 and endidx > startidx
271 and endidx > startidx
272 and length(revs[endidx - 1]) == 0):
272 and length(revs[endidx - 1]) == 0):
273 endidx -= 1
273 endidx -= 1
274
274
275 return revs[startidx:endidx]
275 return revs[startidx:endidx]
276
276
277 def _segmentspan(revlog, revs, deltainfo=None):
277 def _segmentspan(revlog, revs, deltainfo=None):
278 """Get the byte span of a segment of revisions
278 """Get the byte span of a segment of revisions
279
279
280 revs is a sorted array of revision numbers
280 revs is a sorted array of revision numbers
281
281
282 >>> revlog = _testrevlog([
282 >>> revlog = _testrevlog([
283 ... 5, #0
283 ... 5, #0
284 ... 10, #1
284 ... 10, #1
285 ... 12, #2
285 ... 12, #2
286 ... 12, #3 (empty)
286 ... 12, #3 (empty)
287 ... 17, #4
287 ... 17, #4
288 ... ])
288 ... ])
289
289
290 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
291 17
291 17
292 >>> _segmentspan(revlog, [0, 4])
292 >>> _segmentspan(revlog, [0, 4])
293 17
293 17
294 >>> _segmentspan(revlog, [3, 4])
294 >>> _segmentspan(revlog, [3, 4])
295 5
295 5
296 >>> _segmentspan(revlog, [1, 2, 3,])
296 >>> _segmentspan(revlog, [1, 2, 3,])
297 7
297 7
298 >>> _segmentspan(revlog, [1, 3])
298 >>> _segmentspan(revlog, [1, 3])
299 7
299 7
300 """
300 """
301 if not revs:
301 if not revs:
302 return 0
302 return 0
303 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if deltainfo is not None and len(revlog) <= revs[-1]:
304 if len(revs) == 1:
304 if len(revs) == 1:
305 return deltainfo.deltalen
305 return deltainfo.deltalen
306 offset = revlog.end(len(revlog) - 1)
306 offset = revlog.end(len(revlog) - 1)
307 end = deltainfo.deltalen + offset
307 end = deltainfo.deltalen + offset
308 else:
308 else:
309 end = revlog.end(revs[-1])
309 end = revlog.end(revs[-1])
310 return end - revlog.start(revs[0])
310 return end - revlog.start(revs[0])
311
311
312 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
313 """slice revs to reduce the amount of unrelated data to be read from disk.
313 """slice revs to reduce the amount of unrelated data to be read from disk.
314
314
315 ``revs`` is sliced into groups that should be read in one time.
315 ``revs`` is sliced into groups that should be read in one time.
316 Assume that revs are sorted.
316 Assume that revs are sorted.
317
317
318 The initial chunk is sliced until the overall density (payload/chunks-span
318 The initial chunk is sliced until the overall density (payload/chunks-span
319 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
320 `revlog._srmingapsize` is skipped.
320 `revlog._srmingapsize` is skipped.
321
321
322 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
323 For consistency with other slicing choice, this limit won't go lower than
323 For consistency with other slicing choice, this limit won't go lower than
324 `revlog._srmingapsize`.
324 `revlog._srmingapsize`.
325
325
326 If individual revisions chunk are larger than this limit, they will still
326 If individual revisions chunk are larger than this limit, they will still
327 be raised individually.
327 be raised individually.
328
328
329 >>> revlog = _testrevlog([
329 >>> revlog = _testrevlog([
330 ... 5, #00 (5)
330 ... 5, #00 (5)
331 ... 10, #01 (5)
331 ... 10, #01 (5)
332 ... 12, #02 (2)
332 ... 12, #02 (2)
333 ... 12, #03 (empty)
333 ... 12, #03 (empty)
334 ... 27, #04 (15)
334 ... 27, #04 (15)
335 ... 31, #05 (4)
335 ... 31, #05 (4)
336 ... 31, #06 (empty)
336 ... 31, #06 (empty)
337 ... 42, #07 (11)
337 ... 42, #07 (11)
338 ... 47, #08 (5)
338 ... 47, #08 (5)
339 ... 47, #09 (empty)
339 ... 47, #09 (empty)
340 ... 48, #10 (1)
340 ... 48, #10 (1)
341 ... 51, #11 (3)
341 ... 51, #11 (3)
342 ... 74, #12 (23)
342 ... 74, #12 (23)
343 ... 85, #13 (11)
343 ... 85, #13 (11)
344 ... 86, #14 (1)
344 ... 86, #14 (1)
345 ... 91, #15 (5)
345 ... 91, #15 (5)
346 ... ])
346 ... ])
347
347
348 >>> list(_slicechunk(revlog, list(range(16))))
348 >>> list(_slicechunk(revlog, list(range(16))))
349 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
350 >>> list(_slicechunk(revlog, [0, 15]))
350 >>> list(_slicechunk(revlog, [0, 15]))
351 [[0], [15]]
351 [[0], [15]]
352 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 >>> list(_slicechunk(revlog, [0, 11, 15]))
353 [[0], [11], [15]]
353 [[0], [11], [15]]
354 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
355 [[0], [11, 13, 15]]
355 [[0], [11, 13, 15]]
356 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
357 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
358
358
359 Slicing with a maximum chunk size
359 Slicing with a maximum chunk size
360 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
361 [[0], [11], [13], [15]]
361 [[0], [11], [13], [15]]
362 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
363 [[0], [11], [13, 15]]
363 [[0], [11], [13, 15]]
364 """
364 """
365 if targetsize is not None:
365 if targetsize is not None:
366 targetsize = max(targetsize, revlog._srmingapsize)
366 targetsize = max(targetsize, revlog._srmingapsize)
367 # targetsize should not be specified when evaluating delta candidates:
367 # targetsize should not be specified when evaluating delta candidates:
368 # * targetsize is used to ensure we stay within specification when reading,
368 # * targetsize is used to ensure we stay within specification when reading,
369 # * deltainfo is used to pick are good delta chain when writing.
369 # * deltainfo is used to pick are good delta chain when writing.
370 if not (deltainfo is None or targetsize is None):
370 if not (deltainfo is None or targetsize is None):
371 msg = 'cannot use `targetsize` with a `deltainfo`'
371 msg = 'cannot use `targetsize` with a `deltainfo`'
372 raise error.ProgrammingError(msg)
372 raise error.ProgrammingError(msg)
373 for chunk in _slicechunktodensity(revlog, revs,
373 for chunk in _slicechunktodensity(revlog, revs,
374 deltainfo,
374 deltainfo,
375 revlog._srdensitythreshold,
375 revlog._srdensitythreshold,
376 revlog._srmingapsize):
376 revlog._srmingapsize):
377 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
378 yield subchunk
378 yield subchunk
379
379
380 def _slicechunktosize(revlog, revs, targetsize=None):
380 def _slicechunktosize(revlog, revs, targetsize=None):
381 """slice revs to match the target size
381 """slice revs to match the target size
382
382
383 This is intended to be used on chunk that density slicing selected by that
383 This is intended to be used on chunk that density slicing selected by that
384 are still too large compared to the read garantee of revlog. This might
384 are still too large compared to the read garantee of revlog. This might
385 happens when "minimal gap size" interrupted the slicing or when chain are
385 happens when "minimal gap size" interrupted the slicing or when chain are
386 built in a way that create large blocks next to each other.
386 built in a way that create large blocks next to each other.
387
387
388 >>> revlog = _testrevlog([
388 >>> revlog = _testrevlog([
389 ... 3, #0 (3)
389 ... 3, #0 (3)
390 ... 5, #1 (2)
390 ... 5, #1 (2)
391 ... 6, #2 (1)
391 ... 6, #2 (1)
392 ... 8, #3 (2)
392 ... 8, #3 (2)
393 ... 8, #4 (empty)
393 ... 8, #4 (empty)
394 ... 11, #5 (3)
394 ... 11, #5 (3)
395 ... 12, #6 (1)
395 ... 12, #6 (1)
396 ... 13, #7 (1)
396 ... 13, #7 (1)
397 ... 14, #8 (1)
397 ... 14, #8 (1)
398 ... ])
398 ... ])
399
399
400 Cases where chunk is already small enough
400 Cases where chunk is already small enough
401 >>> list(_slicechunktosize(revlog, [0], 3))
401 >>> list(_slicechunktosize(revlog, [0], 3))
402 [[0]]
402 [[0]]
403 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 >>> list(_slicechunktosize(revlog, [6, 7], 3))
404 [[6, 7]]
404 [[6, 7]]
405 >>> list(_slicechunktosize(revlog, [0], None))
405 >>> list(_slicechunktosize(revlog, [0], None))
406 [[0]]
406 [[0]]
407 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 >>> list(_slicechunktosize(revlog, [6, 7], None))
408 [[6, 7]]
408 [[6, 7]]
409
409
410 cases where we need actual slicing
410 cases where we need actual slicing
411 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1], 3))
412 [[0], [1]]
412 [[0], [1]]
413 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 >>> list(_slicechunktosize(revlog, [1, 3], 3))
414 [[1], [3]]
414 [[1], [3]]
415 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
416 [[1, 2], [3]]
416 [[1, 2], [3]]
417 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 >>> list(_slicechunktosize(revlog, [3, 5], 3))
418 [[3], [5]]
418 [[3], [5]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
420 [[3], [5]]
420 [[3], [5]]
421 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
422 [[5], [6, 7, 8]]
422 [[5], [6, 7, 8]]
423 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
424 [[0], [1, 2], [3], [5], [6, 7, 8]]
424 [[0], [1, 2], [3], [5], [6, 7, 8]]
425
425
426 Case with too large individual chunk (must return valid chunk)
426 Case with too large individual chunk (must return valid chunk)
427 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 >>> list(_slicechunktosize(revlog, [0, 1], 2))
428 [[0], [1]]
428 [[0], [1]]
429 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 >>> list(_slicechunktosize(revlog, [1, 3], 1))
430 [[1], [3]]
430 [[1], [3]]
431 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
432 [[3], [5]]
432 [[3], [5]]
433 """
433 """
434 assert targetsize is None or 0 <= targetsize
434 assert targetsize is None or 0 <= targetsize
435 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
436 yield revs
436 yield revs
437 return
437 return
438
438
439 startrevidx = 0
439 startrevidx = 0
440 startdata = revlog.start(revs[0])
440 startdata = revlog.start(revs[0])
441 endrevidx = 0
441 endrevidx = 0
442 iterrevs = enumerate(revs)
442 iterrevs = enumerate(revs)
443 next(iterrevs) # skip first rev.
443 next(iterrevs) # skip first rev.
444 for idx, r in iterrevs:
444 for idx, r in iterrevs:
445 span = revlog.end(r) - startdata
445 span = revlog.end(r) - startdata
446 if span <= targetsize:
446 if span <= targetsize:
447 endrevidx = idx
447 endrevidx = idx
448 else:
448 else:
449 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
450 if chunk:
450 if chunk:
451 yield chunk
451 yield chunk
452 startrevidx = idx
452 startrevidx = idx
453 startdata = revlog.start(r)
453 startdata = revlog.start(r)
454 endrevidx = idx
454 endrevidx = idx
455 yield _trimchunk(revlog, revs, startrevidx)
455 yield _trimchunk(revlog, revs, startrevidx)
456
456
457 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
458 mingapsize=0):
458 mingapsize=0):
459 """slice revs to reduce the amount of unrelated data to be read from disk.
459 """slice revs to reduce the amount of unrelated data to be read from disk.
460
460
461 ``revs`` is sliced into groups that should be read in one time.
461 ``revs`` is sliced into groups that should be read in one time.
462 Assume that revs are sorted.
462 Assume that revs are sorted.
463
463
464 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 ``deltainfo`` is a _deltainfo instance of a revision that we would append
465 to the top of the revlog.
465 to the top of the revlog.
466
466
467 The initial chunk is sliced until the overall density (payload/chunks-span
467 The initial chunk is sliced until the overall density (payload/chunks-span
468 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
469 skipped.
469 skipped.
470
470
471 >>> revlog = _testrevlog([
471 >>> revlog = _testrevlog([
472 ... 5, #00 (5)
472 ... 5, #00 (5)
473 ... 10, #01 (5)
473 ... 10, #01 (5)
474 ... 12, #02 (2)
474 ... 12, #02 (2)
475 ... 12, #03 (empty)
475 ... 12, #03 (empty)
476 ... 27, #04 (15)
476 ... 27, #04 (15)
477 ... 31, #05 (4)
477 ... 31, #05 (4)
478 ... 31, #06 (empty)
478 ... 31, #06 (empty)
479 ... 42, #07 (11)
479 ... 42, #07 (11)
480 ... 47, #08 (5)
480 ... 47, #08 (5)
481 ... 47, #09 (empty)
481 ... 47, #09 (empty)
482 ... 48, #10 (1)
482 ... 48, #10 (1)
483 ... 51, #11 (3)
483 ... 51, #11 (3)
484 ... 74, #12 (23)
484 ... 74, #12 (23)
485 ... 85, #13 (11)
485 ... 85, #13 (11)
486 ... 86, #14 (1)
486 ... 86, #14 (1)
487 ... 91, #15 (5)
487 ... 91, #15 (5)
488 ... ])
488 ... ])
489
489
490 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 >>> list(_slicechunktodensity(revlog, list(range(16))))
491 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
492 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 >>> list(_slicechunktodensity(revlog, [0, 15]))
493 [[0], [15]]
493 [[0], [15]]
494 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
495 [[0], [11], [15]]
495 [[0], [11], [15]]
496 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
497 [[0], [11, 13, 15]]
497 [[0], [11, 13, 15]]
498 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
499 [[1, 2], [5, 8, 10, 11], [14]]
499 [[1, 2], [5, 8, 10, 11], [14]]
500 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
501 ... mingapsize=20))
501 ... mingapsize=20))
502 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 [[1, 2, 3, 5, 8, 10, 11], [14]]
503 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
504 ... targetdensity=0.95))
504 ... targetdensity=0.95))
505 [[1, 2], [5], [8, 10, 11], [14]]
505 [[1, 2], [5], [8, 10, 11], [14]]
506 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
507 ... targetdensity=0.95, mingapsize=12))
507 ... targetdensity=0.95, mingapsize=12))
508 [[1, 2], [5, 8, 10, 11], [14]]
508 [[1, 2], [5, 8, 10, 11], [14]]
509 """
509 """
510 start = revlog.start
510 start = revlog.start
511 length = revlog.length
511 length = revlog.length
512
512
513 if len(revs) <= 1:
513 if len(revs) <= 1:
514 yield revs
514 yield revs
515 return
515 return
516
516
517 nextrev = len(revlog)
517 nextrev = len(revlog)
518 nextoffset = revlog.end(nextrev - 1)
518 nextoffset = revlog.end(nextrev - 1)
519
519
520 if deltainfo is None:
520 if deltainfo is None:
521 deltachainspan = _segmentspan(revlog, revs)
521 deltachainspan = _segmentspan(revlog, revs)
522 chainpayload = sum(length(r) for r in revs)
522 chainpayload = sum(length(r) for r in revs)
523 else:
523 else:
524 deltachainspan = deltainfo.distance
524 deltachainspan = deltainfo.distance
525 chainpayload = deltainfo.compresseddeltalen
525 chainpayload = deltainfo.compresseddeltalen
526
526
527 if deltachainspan < mingapsize:
527 if deltachainspan < mingapsize:
528 yield revs
528 yield revs
529 return
529 return
530
530
531 readdata = deltachainspan
531 readdata = deltachainspan
532
532
533 if deltachainspan:
533 if deltachainspan:
534 density = chainpayload / float(deltachainspan)
534 density = chainpayload / float(deltachainspan)
535 else:
535 else:
536 density = 1.0
536 density = 1.0
537
537
538 if density >= targetdensity:
538 if density >= targetdensity:
539 yield revs
539 yield revs
540 return
540 return
541
541
542 if deltainfo is not None and deltainfo.deltalen:
542 if deltainfo is not None and deltainfo.deltalen:
543 revs = list(revs)
543 revs = list(revs)
544 revs.append(nextrev)
544 revs.append(nextrev)
545
545
546 # Store the gaps in a heap to have them sorted by decreasing size
546 # Store the gaps in a heap to have them sorted by decreasing size
547 gapsheap = []
547 gapsheap = []
548 heapq.heapify(gapsheap)
548 heapq.heapify(gapsheap)
549 prevend = None
549 prevend = None
550 for i, rev in enumerate(revs):
550 for i, rev in enumerate(revs):
551 if rev < nextrev:
551 if rev < nextrev:
552 revstart = start(rev)
552 revstart = start(rev)
553 revlen = length(rev)
553 revlen = length(rev)
554 else:
554 else:
555 revstart = nextoffset
555 revstart = nextoffset
556 revlen = deltainfo.deltalen
556 revlen = deltainfo.deltalen
557
557
558 # Skip empty revisions to form larger holes
558 # Skip empty revisions to form larger holes
559 if revlen == 0:
559 if revlen == 0:
560 continue
560 continue
561
561
562 if prevend is not None:
562 if prevend is not None:
563 gapsize = revstart - prevend
563 gapsize = revstart - prevend
564 # only consider holes that are large enough
564 # only consider holes that are large enough
565 if gapsize > mingapsize:
565 if gapsize > mingapsize:
566 heapq.heappush(gapsheap, (-gapsize, i))
566 heapq.heappush(gapsheap, (-gapsize, i))
567
567
568 prevend = revstart + revlen
568 prevend = revstart + revlen
569
569
570 # Collect the indices of the largest holes until the density is acceptable
570 # Collect the indices of the largest holes until the density is acceptable
571 indicesheap = []
571 indicesheap = []
572 heapq.heapify(indicesheap)
572 heapq.heapify(indicesheap)
573 while gapsheap and density < targetdensity:
573 while gapsheap and density < targetdensity:
574 oppgapsize, gapidx = heapq.heappop(gapsheap)
574 oppgapsize, gapidx = heapq.heappop(gapsheap)
575
575
576 heapq.heappush(indicesheap, gapidx)
576 heapq.heappush(indicesheap, gapidx)
577
577
578 # the gap sizes are stored as negatives to be sorted decreasingly
578 # the gap sizes are stored as negatives to be sorted decreasingly
579 # by the heap
579 # by the heap
580 readdata -= (-oppgapsize)
580 readdata -= (-oppgapsize)
581 if readdata > 0:
581 if readdata > 0:
582 density = chainpayload / float(readdata)
582 density = chainpayload / float(readdata)
583 else:
583 else:
584 density = 1.0
584 density = 1.0
585
585
586 # Cut the revs at collected indices
586 # Cut the revs at collected indices
587 previdx = 0
587 previdx = 0
588 while indicesheap:
588 while indicesheap:
589 idx = heapq.heappop(indicesheap)
589 idx = heapq.heappop(indicesheap)
590
590
591 chunk = _trimchunk(revlog, revs, previdx, idx)
591 chunk = _trimchunk(revlog, revs, previdx, idx)
592 if chunk:
592 if chunk:
593 yield chunk
593 yield chunk
594
594
595 previdx = idx
595 previdx = idx
596
596
597 chunk = _trimchunk(revlog, revs, previdx)
597 chunk = _trimchunk(revlog, revs, previdx)
598 if chunk:
598 if chunk:
599 yield chunk
599 yield chunk
600
600
601 @attr.s(slots=True, frozen=True)
601 @attr.s(slots=True, frozen=True)
602 class _deltainfo(object):
602 class _deltainfo(object):
603 distance = attr.ib()
603 distance = attr.ib()
604 deltalen = attr.ib()
604 deltalen = attr.ib()
605 data = attr.ib()
605 data = attr.ib()
606 base = attr.ib()
606 base = attr.ib()
607 chainbase = attr.ib()
607 chainbase = attr.ib()
608 chainlen = attr.ib()
608 chainlen = attr.ib()
609 compresseddeltalen = attr.ib()
609 compresseddeltalen = attr.ib()
610 snapshotdepth = attr.ib()
610 snapshotdepth = attr.ib()
611
611
612 class _deltacomputer(object):
612 class _deltacomputer(object):
613 def __init__(self, revlog):
613 def __init__(self, revlog):
614 self.revlog = revlog
614 self.revlog = revlog
615
615
616 def _getcandidaterevs(self, p1, p2, cachedelta):
616 def _getcandidaterevs(self, p1, p2, cachedelta):
617 """
617 """
618 Provides revisions that present an interest to be diffed against,
618 Provides revisions that present an interest to be diffed against,
619 grouped by level of easiness.
619 grouped by level of easiness.
620 """
620 """
621 revlog = self.revlog
621 revlog = self.revlog
622 gdelta = revlog._generaldelta
622 gdelta = revlog._generaldelta
623 curr = len(revlog)
623 curr = len(revlog)
624 prev = curr - 1
624 prev = curr - 1
625 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
625 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
626
626
627 # should we try to build a delta?
627 # should we try to build a delta?
628 if prev != nullrev and revlog.storedeltachains:
628 if prev != nullrev and revlog.storedeltachains:
629 tested = set()
629 tested = set()
630 # This condition is true most of the time when processing
630 # This condition is true most of the time when processing
631 # changegroup data into a generaldelta repo. The only time it
631 # changegroup data into a generaldelta repo. The only time it
632 # isn't true is if this is the first revision in a delta chain
632 # isn't true is if this is the first revision in a delta chain
633 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
633 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
634 if cachedelta and gdelta and revlog._lazydeltabase:
634 if cachedelta and gdelta and revlog._lazydeltabase:
635 # Assume what we received from the server is a good choice
635 # Assume what we received from the server is a good choice
636 # build delta will reuse the cache
636 # build delta will reuse the cache
637 yield (cachedelta[0],)
637 yield (cachedelta[0],)
638 tested.add(cachedelta[0])
638 tested.add(cachedelta[0])
639
639
640 if gdelta:
640 if gdelta:
641 # exclude already lazy tested base if any
641 # exclude already lazy tested base if any
642 parents = [p for p in (p1r, p2r)
642 parents = [p for p in (p1r, p2r)
643 if p != nullrev and p not in tested]
643 if p != nullrev and p not in tested]
644
644
645 if not revlog._deltabothparents and len(parents) == 2:
645 if not revlog._deltabothparents and len(parents) == 2:
646 parents.sort()
646 parents.sort()
647 # To minimize the chance of having to build a fulltext,
647 # To minimize the chance of having to build a fulltext,
648 # pick first whichever parent is closest to us (max rev)
648 # pick first whichever parent is closest to us (max rev)
649 yield (parents[1],)
649 yield (parents[1],)
650 # then the other one (min rev) if the first did not fit
650 # then the other one (min rev) if the first did not fit
651 yield (parents[0],)
651 yield (parents[0],)
652 tested.update(parents)
652 tested.update(parents)
653 elif len(parents) > 0:
653 elif len(parents) > 0:
654 # Test all parents (1 or 2), and keep the best candidate
654 # Test all parents (1 or 2), and keep the best candidate
655 yield parents
655 yield parents
656 tested.update(parents)
656 tested.update(parents)
657
657
658 if prev not in tested:
658 if prev not in tested:
659 # other approach failed try against prev to hopefully save us a
659 # other approach failed try against prev to hopefully save us a
660 # fulltext.
660 # fulltext.
661 yield (prev,)
661 yield (prev,)
662 tested.add(prev)
662 tested.add(prev)
663
663
664 def buildtext(self, revinfo, fh):
664 def buildtext(self, revinfo, fh):
665 """Builds a fulltext version of a revision
665 """Builds a fulltext version of a revision
666
666
667 revinfo: _revisioninfo instance that contains all needed info
667 revinfo: _revisioninfo instance that contains all needed info
668 fh: file handle to either the .i or the .d revlog file,
668 fh: file handle to either the .i or the .d revlog file,
669 depending on whether it is inlined or not
669 depending on whether it is inlined or not
670 """
670 """
671 btext = revinfo.btext
671 btext = revinfo.btext
672 if btext[0] is not None:
672 if btext[0] is not None:
673 return btext[0]
673 return btext[0]
674
674
675 revlog = self.revlog
675 revlog = self.revlog
676 cachedelta = revinfo.cachedelta
676 cachedelta = revinfo.cachedelta
677 flags = revinfo.flags
677 flags = revinfo.flags
678 node = revinfo.node
678 node = revinfo.node
679
679
680 baserev = cachedelta[0]
680 baserev = cachedelta[0]
681 delta = cachedelta[1]
681 delta = cachedelta[1]
682 # special case deltas which replace entire base; no need to decode
682 # special case deltas which replace entire base; no need to decode
683 # base revision. this neatly avoids censored bases, which throw when
683 # base revision. this neatly avoids censored bases, which throw when
684 # they're decoded.
684 # they're decoded.
685 hlen = struct.calcsize(">lll")
685 hlen = struct.calcsize(">lll")
686 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
686 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
687 len(delta) - hlen):
687 len(delta) - hlen):
688 btext[0] = delta[hlen:]
688 btext[0] = delta[hlen:]
689 else:
689 else:
690 # deltabase is rawtext before changed by flag processors, which is
690 # deltabase is rawtext before changed by flag processors, which is
691 # equivalent to non-raw text
691 # equivalent to non-raw text
692 basetext = revlog.revision(baserev, _df=fh, raw=False)
692 basetext = revlog.revision(baserev, _df=fh, raw=False)
693 btext[0] = mdiff.patch(basetext, delta)
693 btext[0] = mdiff.patch(basetext, delta)
694
694
695 try:
695 try:
696 res = revlog._processflags(btext[0], flags, 'read', raw=True)
696 res = revlog._processflags(btext[0], flags, 'read', raw=True)
697 btext[0], validatehash = res
697 btext[0], validatehash = res
698 if validatehash:
698 if validatehash:
699 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
699 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
700 if flags & REVIDX_ISCENSORED:
700 if flags & REVIDX_ISCENSORED:
701 raise RevlogError(_('node %s is not censored') % node)
701 raise RevlogError(_('node %s is not censored') % node)
702 except CensoredNodeError:
702 except CensoredNodeError:
703 # must pass the censored index flag to add censored revisions
703 # must pass the censored index flag to add censored revisions
704 if not flags & REVIDX_ISCENSORED:
704 if not flags & REVIDX_ISCENSORED:
705 raise
705 raise
706 return btext[0]
706 return btext[0]
707
707
708 def _builddeltadiff(self, base, revinfo, fh):
708 def _builddeltadiff(self, base, revinfo, fh):
709 revlog = self.revlog
709 revlog = self.revlog
710 t = self.buildtext(revinfo, fh)
710 t = self.buildtext(revinfo, fh)
711 if revlog.iscensored(base):
711 if revlog.iscensored(base):
712 # deltas based on a censored revision must replace the
712 # deltas based on a censored revision must replace the
713 # full content in one patch, so delta works everywhere
713 # full content in one patch, so delta works everywhere
714 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
714 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
715 delta = header + t
715 delta = header + t
716 else:
716 else:
717 ptext = revlog.revision(base, _df=fh, raw=True)
717 ptext = revlog.revision(base, _df=fh, raw=True)
718 delta = mdiff.textdiff(ptext, t)
718 delta = mdiff.textdiff(ptext, t)
719
719
720 return delta
720 return delta
721
721
722 def _builddeltainfo(self, revinfo, base, fh):
722 def _builddeltainfo(self, revinfo, base, fh):
723 # can we use the cached delta?
723 # can we use the cached delta?
724 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
724 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
725 delta = revinfo.cachedelta[1]
725 delta = revinfo.cachedelta[1]
726 else:
726 else:
727 delta = self._builddeltadiff(base, revinfo, fh)
727 delta = self._builddeltadiff(base, revinfo, fh)
728 revlog = self.revlog
728 revlog = self.revlog
729 header, data = revlog.compress(delta)
729 header, data = revlog.compress(delta)
730 deltalen = len(header) + len(data)
730 deltalen = len(header) + len(data)
731 chainbase = revlog.chainbase(base)
731 chainbase = revlog.chainbase(base)
732 offset = revlog.end(len(revlog) - 1)
732 offset = revlog.end(len(revlog) - 1)
733 dist = deltalen + offset - revlog.start(chainbase)
733 dist = deltalen + offset - revlog.start(chainbase)
734 if revlog._generaldelta:
734 if revlog._generaldelta:
735 deltabase = base
735 deltabase = base
736 else:
736 else:
737 deltabase = chainbase
737 deltabase = chainbase
738 chainlen, compresseddeltalen = revlog._chaininfo(base)
738 chainlen, compresseddeltalen = revlog._chaininfo(base)
739 chainlen += 1
739 chainlen += 1
740 compresseddeltalen += deltalen
740 compresseddeltalen += deltalen
741
741
742 revlog = self.revlog
742 revlog = self.revlog
743 snapshotdepth = None
743 snapshotdepth = None
744 if deltabase == nullrev:
744 if deltabase == nullrev:
745 snapshotdepth = 0
745 snapshotdepth = 0
746 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
747 # A delta chain should always be one full snapshot,
747 # A delta chain should always be one full snapshot,
748 # zero or more semi-snapshots, and zero or more deltas
748 # zero or more semi-snapshots, and zero or more deltas
749 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
750 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
751 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751 snapshotdepth = len(revlog._deltachain(deltabase)[0])
752
752
753 return _deltainfo(dist, deltalen, (header, data), deltabase,
753 return _deltainfo(dist, deltalen, (header, data), deltabase,
754 chainbase, chainlen, compresseddeltalen,
754 chainbase, chainlen, compresseddeltalen,
755 snapshotdepth)
755 snapshotdepth)
756
756
757 def finddeltainfo(self, revinfo, fh):
757 def finddeltainfo(self, revinfo, fh):
758 """Find an acceptable delta against a candidate revision
758 """Find an acceptable delta against a candidate revision
759
759
760 revinfo: information about the revision (instance of _revisioninfo)
760 revinfo: information about the revision (instance of _revisioninfo)
761 fh: file handle to either the .i or the .d revlog file,
761 fh: file handle to either the .i or the .d revlog file,
762 depending on whether it is inlined or not
762 depending on whether it is inlined or not
763
763
764 Returns the first acceptable candidate revision, as ordered by
764 Returns the first acceptable candidate revision, as ordered by
765 _getcandidaterevs
765 _getcandidaterevs
766 """
766 """
767 if not revinfo.textlen:
767 if not revinfo.textlen:
768 return None # empty file do not need delta
768 return None # empty file do not need delta
769
769
770 cachedelta = revinfo.cachedelta
770 cachedelta = revinfo.cachedelta
771 p1 = revinfo.p1
771 p1 = revinfo.p1
772 p2 = revinfo.p2
772 p2 = revinfo.p2
773 revlog = self.revlog
773 revlog = self.revlog
774
774
775 deltalength = self.revlog.length
775 deltalength = self.revlog.length
776 deltaparent = self.revlog.deltaparent
776 deltaparent = self.revlog.deltaparent
777
777
778 deltainfo = None
778 deltainfo = None
779 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
779 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
780 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
780 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
781 # filter out delta base that will never produce good delta
781 # filter out delta base that will never produce good delta
782 candidaterevs = [r for r in candidaterevs
782 candidaterevs = [r for r in candidaterevs
783 if self.revlog.length(r) <= deltas_limit]
783 if self.revlog.length(r) <= deltas_limit]
784 nominateddeltas = []
784 nominateddeltas = []
785 for candidaterev in candidaterevs:
785 for candidaterev in candidaterevs:
786 # skip over empty delta (no need to include them in a chain)
786 # skip over empty delta (no need to include them in a chain)
787 while candidaterev != nullrev and not deltalength(candidaterev):
787 while candidaterev != nullrev and not deltalength(candidaterev):
788 candidaterev = deltaparent(candidaterev)
788 candidaterev = deltaparent(candidaterev)
789 # no need to try a delta against nullid, this will be handled
789 # no need to try a delta against nullid, this will be handled
790 # by fulltext later.
790 # by fulltext later.
791 if candidaterev == nullrev:
791 if candidaterev == nullrev:
792 continue
792 continue
793 # no delta for rawtext-changing revs (see "candelta" for why)
793 # no delta for rawtext-changing revs (see "candelta" for why)
794 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
795 continue
795 continue
796 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
796 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
797 if revlog._isgooddeltainfo(candidatedelta, revinfo):
797 if revlog._isgooddeltainfo(candidatedelta, revinfo):
798 nominateddeltas.append(candidatedelta)
798 nominateddeltas.append(candidatedelta)
799 if nominateddeltas:
799 if nominateddeltas:
800 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
800 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
801 break
801 break
802
802
803 return deltainfo
803 return deltainfo
804
804
805 @attr.s(slots=True, frozen=True)
805 @attr.s(slots=True, frozen=True)
806 class _revisioninfo(object):
806 class _revisioninfo(object):
807 """Information about a revision that allows building its fulltext
807 """Information about a revision that allows building its fulltext
808 node: expected hash of the revision
808 node: expected hash of the revision
809 p1, p2: parent revs of the revision
809 p1, p2: parent revs of the revision
810 btext: built text cache consisting of a one-element list
810 btext: built text cache consisting of a one-element list
811 cachedelta: (baserev, uncompressed_delta) or None
811 cachedelta: (baserev, uncompressed_delta) or None
812 flags: flags associated to the revision storage
812 flags: flags associated to the revision storage
813
813
814 One of btext[0] or cachedelta must be set.
814 One of btext[0] or cachedelta must be set.
815 """
815 """
816 node = attr.ib()
816 node = attr.ib()
817 p1 = attr.ib()
817 p1 = attr.ib()
818 p2 = attr.ib()
818 p2 = attr.ib()
819 btext = attr.ib()
819 btext = attr.ib()
820 textlen = attr.ib()
820 textlen = attr.ib()
821 cachedelta = attr.ib()
821 cachedelta = attr.ib()
822 flags = attr.ib()
822 flags = attr.ib()
823
823
824 # index v0:
824 # index v0:
825 # 4 bytes: offset
825 # 4 bytes: offset
826 # 4 bytes: compressed length
826 # 4 bytes: compressed length
827 # 4 bytes: base rev
827 # 4 bytes: base rev
828 # 4 bytes: link rev
828 # 4 bytes: link rev
829 # 20 bytes: parent 1 nodeid
829 # 20 bytes: parent 1 nodeid
830 # 20 bytes: parent 2 nodeid
830 # 20 bytes: parent 2 nodeid
831 # 20 bytes: nodeid
831 # 20 bytes: nodeid
832 indexformatv0 = struct.Struct(">4l20s20s20s")
832 indexformatv0 = struct.Struct(">4l20s20s20s")
833 indexformatv0_pack = indexformatv0.pack
833 indexformatv0_pack = indexformatv0.pack
834 indexformatv0_unpack = indexformatv0.unpack
834 indexformatv0_unpack = indexformatv0.unpack
835
835
836 class revlogoldindex(list):
836 class revlogoldindex(list):
837 def __getitem__(self, i):
837 def __getitem__(self, i):
838 if i == -1:
838 if i == -1:
839 return (0, 0, 0, -1, -1, -1, -1, nullid)
839 return (0, 0, 0, -1, -1, -1, -1, nullid)
840 return list.__getitem__(self, i)
840 return list.__getitem__(self, i)
841
841
842 # maximum <delta-chain-data>/<revision-text-length> ratio
842 # maximum <delta-chain-data>/<revision-text-length> ratio
843 LIMIT_DELTA2TEXT = 2
843 LIMIT_DELTA2TEXT = 2
844
844
845 class revlogoldio(object):
845 class revlogoldio(object):
846 def __init__(self):
846 def __init__(self):
847 self.size = indexformatv0.size
847 self.size = indexformatv0.size
848
848
849 def parseindex(self, data, inline):
849 def parseindex(self, data, inline):
850 s = self.size
850 s = self.size
851 index = []
851 index = []
852 nodemap = {nullid: nullrev}
852 nodemap = {nullid: nullrev}
853 n = off = 0
853 n = off = 0
854 l = len(data)
854 l = len(data)
855 while off + s <= l:
855 while off + s <= l:
856 cur = data[off:off + s]
856 cur = data[off:off + s]
857 off += s
857 off += s
858 e = indexformatv0_unpack(cur)
858 e = indexformatv0_unpack(cur)
859 # transform to revlogv1 format
859 # transform to revlogv1 format
860 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
860 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
861 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
861 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
862 index.append(e2)
862 index.append(e2)
863 nodemap[e[6]] = n
863 nodemap[e[6]] = n
864 n += 1
864 n += 1
865
865
866 return revlogoldindex(index), nodemap, None
866 return revlogoldindex(index), nodemap, None
867
867
868 def packentry(self, entry, node, version, rev):
868 def packentry(self, entry, node, version, rev):
869 if gettype(entry[0]):
869 if gettype(entry[0]):
870 raise RevlogError(_('index entry flags need revlog version 1'))
870 raise RevlogError(_('index entry flags need revlog version 1'))
871 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
871 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
872 node(entry[5]), node(entry[6]), entry[7])
872 node(entry[5]), node(entry[6]), entry[7])
873 return indexformatv0_pack(*e2)
873 return indexformatv0_pack(*e2)
874
874
875 # index ng:
875 # index ng:
876 # 6 bytes: offset
876 # 6 bytes: offset
877 # 2 bytes: flags
877 # 2 bytes: flags
878 # 4 bytes: compressed length
878 # 4 bytes: compressed length
879 # 4 bytes: uncompressed length
879 # 4 bytes: uncompressed length
880 # 4 bytes: base rev
880 # 4 bytes: base rev
881 # 4 bytes: link rev
881 # 4 bytes: link rev
882 # 4 bytes: parent 1 rev
882 # 4 bytes: parent 1 rev
883 # 4 bytes: parent 2 rev
883 # 4 bytes: parent 2 rev
884 # 32 bytes: nodeid
884 # 32 bytes: nodeid
885 indexformatng = struct.Struct(">Qiiiiii20s12x")
885 indexformatng = struct.Struct(">Qiiiiii20s12x")
886 indexformatng_pack = indexformatng.pack
886 indexformatng_pack = indexformatng.pack
887 versionformat = struct.Struct(">I")
887 versionformat = struct.Struct(">I")
888 versionformat_pack = versionformat.pack
888 versionformat_pack = versionformat.pack
889 versionformat_unpack = versionformat.unpack
889 versionformat_unpack = versionformat.unpack
890
890
891 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
891 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
892 # signed integer)
892 # signed integer)
893 _maxentrysize = 0x7fffffff
893 _maxentrysize = 0x7fffffff
894
894
895 class revlogio(object):
895 class revlogio(object):
896 def __init__(self):
896 def __init__(self):
897 self.size = indexformatng.size
897 self.size = indexformatng.size
898
898
899 def parseindex(self, data, inline):
899 def parseindex(self, data, inline):
900 # call the C implementation to parse the index data
900 # call the C implementation to parse the index data
901 index, cache = parsers.parse_index2(data, inline)
901 index, cache = parsers.parse_index2(data, inline)
902 return index, getattr(index, 'nodemap', None), cache
902 return index, getattr(index, 'nodemap', None), cache
903
903
904 def packentry(self, entry, node, version, rev):
904 def packentry(self, entry, node, version, rev):
905 p = indexformatng_pack(*entry)
905 p = indexformatng_pack(*entry)
906 if rev == 0:
906 if rev == 0:
907 p = versionformat_pack(version) + p[4:]
907 p = versionformat_pack(version) + p[4:]
908 return p
908 return p
909
909
910 class revlog(object):
910 class revlog(object):
911 """
911 """
912 the underlying revision storage object
912 the underlying revision storage object
913
913
914 A revlog consists of two parts, an index and the revision data.
914 A revlog consists of two parts, an index and the revision data.
915
915
916 The index is a file with a fixed record size containing
916 The index is a file with a fixed record size containing
917 information on each revision, including its nodeid (hash), the
917 information on each revision, including its nodeid (hash), the
918 nodeids of its parents, the position and offset of its data within
918 nodeids of its parents, the position and offset of its data within
919 the data file, and the revision it's based on. Finally, each entry
919 the data file, and the revision it's based on. Finally, each entry
920 contains a linkrev entry that can serve as a pointer to external
920 contains a linkrev entry that can serve as a pointer to external
921 data.
921 data.
922
922
923 The revision data itself is a linear collection of data chunks.
923 The revision data itself is a linear collection of data chunks.
924 Each chunk represents a revision and is usually represented as a
924 Each chunk represents a revision and is usually represented as a
925 delta against the previous chunk. To bound lookup time, runs of
925 delta against the previous chunk. To bound lookup time, runs of
926 deltas are limited to about 2 times the length of the original
926 deltas are limited to about 2 times the length of the original
927 version data. This makes retrieval of a version proportional to
927 version data. This makes retrieval of a version proportional to
928 its size, or O(1) relative to the number of revisions.
928 its size, or O(1) relative to the number of revisions.
929
929
930 Both pieces of the revlog are written to in an append-only
930 Both pieces of the revlog are written to in an append-only
931 fashion, which means we never need to rewrite a file to insert or
931 fashion, which means we never need to rewrite a file to insert or
932 remove data, and can use some simple techniques to avoid the need
932 remove data, and can use some simple techniques to avoid the need
933 for locking while reading.
933 for locking while reading.
934
934
935 If checkambig, indexfile is opened with checkambig=True at
935 If checkambig, indexfile is opened with checkambig=True at
936 writing, to avoid file stat ambiguity.
936 writing, to avoid file stat ambiguity.
937
937
938 If mmaplargeindex is True, and an mmapindexthreshold is set, the
938 If mmaplargeindex is True, and an mmapindexthreshold is set, the
939 index will be mmapped rather than read if it is larger than the
939 index will be mmapped rather than read if it is larger than the
940 configured threshold.
940 configured threshold.
941
941
942 If censorable is True, the revlog can have censored revisions.
942 If censorable is True, the revlog can have censored revisions.
943 """
943 """
944 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
944 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
945 mmaplargeindex=False, censorable=False):
945 mmaplargeindex=False, censorable=False):
946 """
946 """
947 create a revlog object
947 create a revlog object
948
948
949 opener is a function that abstracts the file opening operation
949 opener is a function that abstracts the file opening operation
950 and can be used to implement COW semantics or the like.
950 and can be used to implement COW semantics or the like.
951 """
951 """
952 self.indexfile = indexfile
952 self.indexfile = indexfile
953 self.datafile = datafile or (indexfile[:-2] + ".d")
953 self.datafile = datafile or (indexfile[:-2] + ".d")
954 self.opener = opener
954 self.opener = opener
955 # When True, indexfile is opened with checkambig=True at writing, to
955 # When True, indexfile is opened with checkambig=True at writing, to
956 # avoid file stat ambiguity.
956 # avoid file stat ambiguity.
957 self._checkambig = checkambig
957 self._checkambig = checkambig
958 self._censorable = censorable
958 self._censorable = censorable
959 # 3-tuple of (node, rev, text) for a raw revision.
959 # 3-tuple of (node, rev, text) for a raw revision.
960 self._cache = None
960 self._cache = None
961 # Maps rev to chain base rev.
961 # Maps rev to chain base rev.
962 self._chainbasecache = util.lrucachedict(100)
962 self._chainbasecache = util.lrucachedict(100)
963 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
963 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
964 self._chunkcache = (0, '')
964 self._chunkcache = (0, '')
965 # How much data to read and cache into the raw revlog data cache.
965 # How much data to read and cache into the raw revlog data cache.
966 self._chunkcachesize = 65536
966 self._chunkcachesize = 65536
967 self._maxchainlen = None
967 self._maxchainlen = None
968 self._deltabothparents = True
968 self._deltabothparents = True
969 self.index = []
969 self.index = []
970 # Mapping of partial identifiers to full nodes.
970 # Mapping of partial identifiers to full nodes.
971 self._pcache = {}
971 self._pcache = {}
972 # Mapping of revision integer to full node.
972 # Mapping of revision integer to full node.
973 self._nodecache = {nullid: nullrev}
973 self._nodecache = {nullid: nullrev}
974 self._nodepos = None
974 self._nodepos = None
975 self._compengine = 'zlib'
975 self._compengine = 'zlib'
976 self._maxdeltachainspan = -1
976 self._maxdeltachainspan = -1
977 self._withsparseread = False
977 self._withsparseread = False
978 self._sparserevlog = False
978 self._sparserevlog = False
979 self._srdensitythreshold = 0.50
979 self._srdensitythreshold = 0.50
980 self._srmingapsize = 262144
980 self._srmingapsize = 262144
981
981
982 mmapindexthreshold = None
982 mmapindexthreshold = None
983 v = REVLOG_DEFAULT_VERSION
983 v = REVLOG_DEFAULT_VERSION
984 opts = getattr(opener, 'options', None)
984 opts = getattr(opener, 'options', None)
985 if opts is not None:
985 if opts is not None:
986 if 'revlogv2' in opts:
986 if 'revlogv2' in opts:
987 # version 2 revlogs always use generaldelta.
987 # version 2 revlogs always use generaldelta.
988 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
988 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
989 elif 'revlogv1' in opts:
989 elif 'revlogv1' in opts:
990 if 'generaldelta' in opts:
990 if 'generaldelta' in opts:
991 v |= FLAG_GENERALDELTA
991 v |= FLAG_GENERALDELTA
992 else:
992 else:
993 v = 0
993 v = 0
994 if 'chunkcachesize' in opts:
994 if 'chunkcachesize' in opts:
995 self._chunkcachesize = opts['chunkcachesize']
995 self._chunkcachesize = opts['chunkcachesize']
996 if 'maxchainlen' in opts:
996 if 'maxchainlen' in opts:
997 self._maxchainlen = opts['maxchainlen']
997 self._maxchainlen = opts['maxchainlen']
998 if 'deltabothparents' in opts:
998 if 'deltabothparents' in opts:
999 self._deltabothparents = opts['deltabothparents']
999 self._deltabothparents = opts['deltabothparents']
1000 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1000 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1001 if 'compengine' in opts:
1001 if 'compengine' in opts:
1002 self._compengine = opts['compengine']
1002 self._compengine = opts['compengine']
1003 if 'maxdeltachainspan' in opts:
1003 if 'maxdeltachainspan' in opts:
1004 self._maxdeltachainspan = opts['maxdeltachainspan']
1004 self._maxdeltachainspan = opts['maxdeltachainspan']
1005 if mmaplargeindex and 'mmapindexthreshold' in opts:
1005 if mmaplargeindex and 'mmapindexthreshold' in opts:
1006 mmapindexthreshold = opts['mmapindexthreshold']
1006 mmapindexthreshold = opts['mmapindexthreshold']
1007 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1007 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1008 withsparseread = bool(opts.get('with-sparse-read', False))
1008 withsparseread = bool(opts.get('with-sparse-read', False))
1009 # sparse-revlog forces sparse-read
1009 # sparse-revlog forces sparse-read
1010 self._withsparseread = self._sparserevlog or withsparseread
1010 self._withsparseread = self._sparserevlog or withsparseread
1011 if 'sparse-read-density-threshold' in opts:
1011 if 'sparse-read-density-threshold' in opts:
1012 self._srdensitythreshold = opts['sparse-read-density-threshold']
1012 self._srdensitythreshold = opts['sparse-read-density-threshold']
1013 if 'sparse-read-min-gap-size' in opts:
1013 if 'sparse-read-min-gap-size' in opts:
1014 self._srmingapsize = opts['sparse-read-min-gap-size']
1014 self._srmingapsize = opts['sparse-read-min-gap-size']
1015
1015
1016 if self._chunkcachesize <= 0:
1016 if self._chunkcachesize <= 0:
1017 raise RevlogError(_('revlog chunk cache size %r is not greater '
1017 raise RevlogError(_('revlog chunk cache size %r is not greater '
1018 'than 0') % self._chunkcachesize)
1018 'than 0') % self._chunkcachesize)
1019 elif self._chunkcachesize & (self._chunkcachesize - 1):
1019 elif self._chunkcachesize & (self._chunkcachesize - 1):
1020 raise RevlogError(_('revlog chunk cache size %r is not a power '
1020 raise RevlogError(_('revlog chunk cache size %r is not a power '
1021 'of 2') % self._chunkcachesize)
1021 'of 2') % self._chunkcachesize)
1022
1022
1023 indexdata = ''
1023 indexdata = ''
1024 self._initempty = True
1024 self._initempty = True
1025 try:
1025 try:
1026 with self._indexfp() as f:
1026 with self._indexfp() as f:
1027 if (mmapindexthreshold is not None and
1027 if (mmapindexthreshold is not None and
1028 self.opener.fstat(f).st_size >= mmapindexthreshold):
1028 self.opener.fstat(f).st_size >= mmapindexthreshold):
1029 indexdata = util.buffer(util.mmapread(f))
1029 indexdata = util.buffer(util.mmapread(f))
1030 else:
1030 else:
1031 indexdata = f.read()
1031 indexdata = f.read()
1032 if len(indexdata) > 0:
1032 if len(indexdata) > 0:
1033 v = versionformat_unpack(indexdata[:4])[0]
1033 v = versionformat_unpack(indexdata[:4])[0]
1034 self._initempty = False
1034 self._initempty = False
1035 except IOError as inst:
1035 except IOError as inst:
1036 if inst.errno != errno.ENOENT:
1036 if inst.errno != errno.ENOENT:
1037 raise
1037 raise
1038
1038
1039 self.version = v
1039 self.version = v
1040 self._inline = v & FLAG_INLINE_DATA
1040 self._inline = v & FLAG_INLINE_DATA
1041 self._generaldelta = v & FLAG_GENERALDELTA
1041 self._generaldelta = v & FLAG_GENERALDELTA
1042 flags = v & ~0xFFFF
1042 flags = v & ~0xFFFF
1043 fmt = v & 0xFFFF
1043 fmt = v & 0xFFFF
1044 if fmt == REVLOGV0:
1044 if fmt == REVLOGV0:
1045 if flags:
1045 if flags:
1046 raise RevlogError(_('unknown flags (%#04x) in version %d '
1046 raise RevlogError(_('unknown flags (%#04x) in version %d '
1047 'revlog %s') %
1047 'revlog %s') %
1048 (flags >> 16, fmt, self.indexfile))
1048 (flags >> 16, fmt, self.indexfile))
1049 elif fmt == REVLOGV1:
1049 elif fmt == REVLOGV1:
1050 if flags & ~REVLOGV1_FLAGS:
1050 if flags & ~REVLOGV1_FLAGS:
1051 raise RevlogError(_('unknown flags (%#04x) in version %d '
1051 raise RevlogError(_('unknown flags (%#04x) in version %d '
1052 'revlog %s') %
1052 'revlog %s') %
1053 (flags >> 16, fmt, self.indexfile))
1053 (flags >> 16, fmt, self.indexfile))
1054 elif fmt == REVLOGV2:
1054 elif fmt == REVLOGV2:
1055 if flags & ~REVLOGV2_FLAGS:
1055 if flags & ~REVLOGV2_FLAGS:
1056 raise RevlogError(_('unknown flags (%#04x) in version %d '
1056 raise RevlogError(_('unknown flags (%#04x) in version %d '
1057 'revlog %s') %
1057 'revlog %s') %
1058 (flags >> 16, fmt, self.indexfile))
1058 (flags >> 16, fmt, self.indexfile))
1059 else:
1059 else:
1060 raise RevlogError(_('unknown version (%d) in revlog %s') %
1060 raise RevlogError(_('unknown version (%d) in revlog %s') %
1061 (fmt, self.indexfile))
1061 (fmt, self.indexfile))
1062
1062
1063 self.storedeltachains = True
1063 self.storedeltachains = True
1064
1064
1065 self._io = revlogio()
1065 self._io = revlogio()
1066 if self.version == REVLOGV0:
1066 if self.version == REVLOGV0:
1067 self._io = revlogoldio()
1067 self._io = revlogoldio()
1068 try:
1068 try:
1069 d = self._io.parseindex(indexdata, self._inline)
1069 d = self._io.parseindex(indexdata, self._inline)
1070 except (ValueError, IndexError):
1070 except (ValueError, IndexError):
1071 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1071 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1072 self.index, nodemap, self._chunkcache = d
1072 self.index, nodemap, self._chunkcache = d
1073 if nodemap is not None:
1073 if nodemap is not None:
1074 self.nodemap = self._nodecache = nodemap
1074 self.nodemap = self._nodecache = nodemap
1075 if not self._chunkcache:
1075 if not self._chunkcache:
1076 self._chunkclear()
1076 self._chunkclear()
1077 # revnum -> (chain-length, sum-delta-length)
1077 # revnum -> (chain-length, sum-delta-length)
1078 self._chaininfocache = {}
1078 self._chaininfocache = {}
1079 # revlog header -> revlog compressor
1079 # revlog header -> revlog compressor
1080 self._decompressors = {}
1080 self._decompressors = {}
1081
1081
1082 @util.propertycache
1082 @util.propertycache
1083 def _compressor(self):
1083 def _compressor(self):
1084 return util.compengines[self._compengine].revlogcompressor()
1084 return util.compengines[self._compengine].revlogcompressor()
1085
1085
1086 def _indexfp(self, mode='r'):
1086 def _indexfp(self, mode='r'):
1087 """file object for the revlog's index file"""
1087 """file object for the revlog's index file"""
1088 args = {r'mode': mode}
1088 args = {r'mode': mode}
1089 if mode != 'r':
1089 if mode != 'r':
1090 args[r'checkambig'] = self._checkambig
1090 args[r'checkambig'] = self._checkambig
1091 if mode == 'w':
1091 if mode == 'w':
1092 args[r'atomictemp'] = True
1092 args[r'atomictemp'] = True
1093 return self.opener(self.indexfile, **args)
1093 return self.opener(self.indexfile, **args)
1094
1094
1095 def _datafp(self, mode='r'):
1095 def _datafp(self, mode='r'):
1096 """file object for the revlog's data file"""
1096 """file object for the revlog's data file"""
1097 return self.opener(self.datafile, mode=mode)
1097 return self.opener(self.datafile, mode=mode)
1098
1098
1099 @contextlib.contextmanager
1099 @contextlib.contextmanager
1100 def _datareadfp(self, existingfp=None):
1100 def _datareadfp(self, existingfp=None):
1101 """file object suitable to read data"""
1101 """file object suitable to read data"""
1102 if existingfp is not None:
1102 if existingfp is not None:
1103 yield existingfp
1103 yield existingfp
1104 else:
1104 else:
1105 if self._inline:
1105 if self._inline:
1106 func = self._indexfp
1106 func = self._indexfp
1107 else:
1107 else:
1108 func = self._datafp
1108 func = self._datafp
1109 with func() as fp:
1109 with func() as fp:
1110 yield fp
1110 yield fp
1111
1111
1112 def tip(self):
1112 def tip(self):
1113 return self.node(len(self.index) - 1)
1113 return self.node(len(self.index) - 1)
1114 def __contains__(self, rev):
1114 def __contains__(self, rev):
1115 return 0 <= rev < len(self)
1115 return 0 <= rev < len(self)
1116 def __len__(self):
1116 def __len__(self):
1117 return len(self.index)
1117 return len(self.index)
1118 def __iter__(self):
1118 def __iter__(self):
1119 return iter(pycompat.xrange(len(self)))
1119 return iter(pycompat.xrange(len(self)))
1120 def revs(self, start=0, stop=None):
1120 def revs(self, start=0, stop=None):
1121 """iterate over all rev in this revlog (from start to stop)"""
1121 """iterate over all rev in this revlog (from start to stop)"""
1122 step = 1
1122 step = 1
1123 length = len(self)
1123 length = len(self)
1124 if stop is not None:
1124 if stop is not None:
1125 if start > stop:
1125 if start > stop:
1126 step = -1
1126 step = -1
1127 stop += step
1127 stop += step
1128 if stop > length:
1128 if stop > length:
1129 stop = length
1129 stop = length
1130 else:
1130 else:
1131 stop = length
1131 stop = length
1132 return pycompat.xrange(start, stop, step)
1132 return pycompat.xrange(start, stop, step)
1133
1133
1134 @util.propertycache
1134 @util.propertycache
1135 def nodemap(self):
1135 def nodemap(self):
1136 if self.index:
1136 if self.index:
1137 # populate mapping down to the initial node
1137 # populate mapping down to the initial node
1138 node0 = self.index[0][7] # get around changelog filtering
1138 node0 = self.index[0][7] # get around changelog filtering
1139 self.rev(node0)
1139 self.rev(node0)
1140 return self._nodecache
1140 return self._nodecache
1141
1141
1142 def hasnode(self, node):
1142 def hasnode(self, node):
1143 try:
1143 try:
1144 self.rev(node)
1144 self.rev(node)
1145 return True
1145 return True
1146 except KeyError:
1146 except KeyError:
1147 return False
1147 return False
1148
1148
1149 def candelta(self, baserev, rev):
1149 def candelta(self, baserev, rev):
1150 """whether two revisions (baserev, rev) can be delta-ed or not"""
1150 """whether two revisions (baserev, rev) can be delta-ed or not"""
1151 # Disable delta if either rev requires a content-changing flag
1151 # Disable delta if either rev requires a content-changing flag
1152 # processor (ex. LFS). This is because such flag processor can alter
1152 # processor (ex. LFS). This is because such flag processor can alter
1153 # the rawtext content that the delta will be based on, and two clients
1153 # the rawtext content that the delta will be based on, and two clients
1154 # could have a same revlog node with different flags (i.e. different
1154 # could have a same revlog node with different flags (i.e. different
1155 # rawtext contents) and the delta could be incompatible.
1155 # rawtext contents) and the delta could be incompatible.
1156 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1156 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1157 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1157 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1158 return False
1158 return False
1159 return True
1159 return True
1160
1160
1161 def clearcaches(self):
1161 def clearcaches(self):
1162 self._cache = None
1162 self._cache = None
1163 self._chainbasecache.clear()
1163 self._chainbasecache.clear()
1164 self._chunkcache = (0, '')
1164 self._chunkcache = (0, '')
1165 self._pcache = {}
1165 self._pcache = {}
1166
1166
1167 try:
1167 try:
1168 self._nodecache.clearcaches()
1168 self._nodecache.clearcaches()
1169 except AttributeError:
1169 except AttributeError:
1170 self._nodecache = {nullid: nullrev}
1170 self._nodecache = {nullid: nullrev}
1171 self._nodepos = None
1171 self._nodepos = None
1172
1172
1173 def rev(self, node):
1173 def rev(self, node):
1174 try:
1174 try:
1175 return self._nodecache[node]
1175 return self._nodecache[node]
1176 except TypeError:
1176 except TypeError:
1177 raise
1177 raise
1178 except RevlogError:
1178 except RevlogError:
1179 # parsers.c radix tree lookup failed
1179 # parsers.c radix tree lookup failed
1180 if node == wdirid or node in wdirfilenodeids:
1180 if node == wdirid or node in wdirfilenodeids:
1181 raise error.WdirUnsupported
1181 raise error.WdirUnsupported
1182 raise LookupError(node, self.indexfile, _('no node'))
1182 raise LookupError(node, self.indexfile, _('no node'))
1183 except KeyError:
1183 except KeyError:
1184 # pure python cache lookup failed
1184 # pure python cache lookup failed
1185 n = self._nodecache
1185 n = self._nodecache
1186 i = self.index
1186 i = self.index
1187 p = self._nodepos
1187 p = self._nodepos
1188 if p is None:
1188 if p is None:
1189 p = len(i) - 1
1189 p = len(i) - 1
1190 else:
1190 else:
1191 assert p < len(i)
1191 assert p < len(i)
1192 for r in pycompat.xrange(p, -1, -1):
1192 for r in pycompat.xrange(p, -1, -1):
1193 v = i[r][7]
1193 v = i[r][7]
1194 n[v] = r
1194 n[v] = r
1195 if v == node:
1195 if v == node:
1196 self._nodepos = r - 1
1196 self._nodepos = r - 1
1197 return r
1197 return r
1198 if node == wdirid or node in wdirfilenodeids:
1198 if node == wdirid or node in wdirfilenodeids:
1199 raise error.WdirUnsupported
1199 raise error.WdirUnsupported
1200 raise LookupError(node, self.indexfile, _('no node'))
1200 raise LookupError(node, self.indexfile, _('no node'))
1201
1201
1202 # Accessors for index entries.
1202 # Accessors for index entries.
1203
1203
1204 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1205 # are flags.
1205 # are flags.
1206 def start(self, rev):
1206 def start(self, rev):
1207 return int(self.index[rev][0] >> 16)
1207 return int(self.index[rev][0] >> 16)
1208
1208
1209 def flags(self, rev):
1209 def flags(self, rev):
1210 return self.index[rev][0] & 0xFFFF
1210 return self.index[rev][0] & 0xFFFF
1211
1211
1212 def length(self, rev):
1212 def length(self, rev):
1213 return self.index[rev][1]
1213 return self.index[rev][1]
1214
1214
1215 def rawsize(self, rev):
1215 def rawsize(self, rev):
1216 """return the length of the uncompressed text for a given revision"""
1216 """return the length of the uncompressed text for a given revision"""
1217 l = self.index[rev][2]
1217 l = self.index[rev][2]
1218 if l >= 0:
1218 if l >= 0:
1219 return l
1219 return l
1220
1220
1221 t = self.revision(rev, raw=True)
1221 t = self.revision(rev, raw=True)
1222 return len(t)
1222 return len(t)
1223
1223
1224 def size(self, rev):
1224 def size(self, rev):
1225 """length of non-raw text (processed by a "read" flag processor)"""
1225 """length of non-raw text (processed by a "read" flag processor)"""
1226 # fast path: if no "read" flag processor could change the content,
1226 # fast path: if no "read" flag processor could change the content,
1227 # size is rawsize. note: ELLIPSIS is known to not change the content.
1227 # size is rawsize. note: ELLIPSIS is known to not change the content.
1228 flags = self.flags(rev)
1228 flags = self.flags(rev)
1229 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1229 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1230 return self.rawsize(rev)
1230 return self.rawsize(rev)
1231
1231
1232 return len(self.revision(rev, raw=False))
1232 return len(self.revision(rev, raw=False))
1233
1233
1234 def chainbase(self, rev):
1234 def chainbase(self, rev):
1235 base = self._chainbasecache.get(rev)
1235 base = self._chainbasecache.get(rev)
1236 if base is not None:
1236 if base is not None:
1237 return base
1237 return base
1238
1238
1239 index = self.index
1239 index = self.index
1240 iterrev = rev
1240 iterrev = rev
1241 base = index[iterrev][3]
1241 base = index[iterrev][3]
1242 while base != iterrev:
1242 while base != iterrev:
1243 iterrev = base
1243 iterrev = base
1244 base = index[iterrev][3]
1244 base = index[iterrev][3]
1245
1245
1246 self._chainbasecache[rev] = base
1246 self._chainbasecache[rev] = base
1247 return base
1247 return base
1248
1248
1249 def linkrev(self, rev):
1249 def linkrev(self, rev):
1250 return self.index[rev][4]
1250 return self.index[rev][4]
1251
1251
1252 def parentrevs(self, rev):
1252 def parentrevs(self, rev):
1253 try:
1253 try:
1254 entry = self.index[rev]
1254 entry = self.index[rev]
1255 except IndexError:
1255 except IndexError:
1256 if rev == wdirrev:
1256 if rev == wdirrev:
1257 raise error.WdirUnsupported
1257 raise error.WdirUnsupported
1258 raise
1258 raise
1259
1259
1260 return entry[5], entry[6]
1260 return entry[5], entry[6]
1261
1261
1262 def node(self, rev):
1262 def node(self, rev):
1263 try:
1263 try:
1264 return self.index[rev][7]
1264 return self.index[rev][7]
1265 except IndexError:
1265 except IndexError:
1266 if rev == wdirrev:
1266 if rev == wdirrev:
1267 raise error.WdirUnsupported
1267 raise error.WdirUnsupported
1268 raise
1268 raise
1269
1269
1270 # Derived from index values.
1270 # Derived from index values.
1271
1271
1272 def end(self, rev):
1272 def end(self, rev):
1273 return self.start(rev) + self.length(rev)
1273 return self.start(rev) + self.length(rev)
1274
1274
1275 def parents(self, node):
1275 def parents(self, node):
1276 i = self.index
1276 i = self.index
1277 d = i[self.rev(node)]
1277 d = i[self.rev(node)]
1278 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1278 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1279
1279
1280 def chainlen(self, rev):
1280 def chainlen(self, rev):
1281 return self._chaininfo(rev)[0]
1281 return self._chaininfo(rev)[0]
1282
1282
1283 def _chaininfo(self, rev):
1283 def _chaininfo(self, rev):
1284 chaininfocache = self._chaininfocache
1284 chaininfocache = self._chaininfocache
1285 if rev in chaininfocache:
1285 if rev in chaininfocache:
1286 return chaininfocache[rev]
1286 return chaininfocache[rev]
1287 index = self.index
1287 index = self.index
1288 generaldelta = self._generaldelta
1288 generaldelta = self._generaldelta
1289 iterrev = rev
1289 iterrev = rev
1290 e = index[iterrev]
1290 e = index[iterrev]
1291 clen = 0
1291 clen = 0
1292 compresseddeltalen = 0
1292 compresseddeltalen = 0
1293 while iterrev != e[3]:
1293 while iterrev != e[3]:
1294 clen += 1
1294 clen += 1
1295 compresseddeltalen += e[1]
1295 compresseddeltalen += e[1]
1296 if generaldelta:
1296 if generaldelta:
1297 iterrev = e[3]
1297 iterrev = e[3]
1298 else:
1298 else:
1299 iterrev -= 1
1299 iterrev -= 1
1300 if iterrev in chaininfocache:
1300 if iterrev in chaininfocache:
1301 t = chaininfocache[iterrev]
1301 t = chaininfocache[iterrev]
1302 clen += t[0]
1302 clen += t[0]
1303 compresseddeltalen += t[1]
1303 compresseddeltalen += t[1]
1304 break
1304 break
1305 e = index[iterrev]
1305 e = index[iterrev]
1306 else:
1306 else:
1307 # Add text length of base since decompressing that also takes
1307 # Add text length of base since decompressing that also takes
1308 # work. For cache hits the length is already included.
1308 # work. For cache hits the length is already included.
1309 compresseddeltalen += e[1]
1309 compresseddeltalen += e[1]
1310 r = (clen, compresseddeltalen)
1310 r = (clen, compresseddeltalen)
1311 chaininfocache[rev] = r
1311 chaininfocache[rev] = r
1312 return r
1312 return r
1313
1313
1314 def _deltachain(self, rev, stoprev=None):
1314 def _deltachain(self, rev, stoprev=None):
1315 """Obtain the delta chain for a revision.
1315 """Obtain the delta chain for a revision.
1316
1316
1317 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 ``stoprev`` specifies a revision to stop at. If not specified, we
1318 stop at the base of the chain.
1318 stop at the base of the chain.
1319
1319
1320 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1321 revs in ascending order and ``stopped`` is a bool indicating whether
1321 revs in ascending order and ``stopped`` is a bool indicating whether
1322 ``stoprev`` was hit.
1322 ``stoprev`` was hit.
1323 """
1323 """
1324 # Try C implementation.
1324 # Try C implementation.
1325 try:
1325 try:
1326 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 return self.index.deltachain(rev, stoprev, self._generaldelta)
1327 except AttributeError:
1327 except AttributeError:
1328 pass
1328 pass
1329
1329
1330 chain = []
1330 chain = []
1331
1331
1332 # Alias to prevent attribute lookup in tight loop.
1332 # Alias to prevent attribute lookup in tight loop.
1333 index = self.index
1333 index = self.index
1334 generaldelta = self._generaldelta
1334 generaldelta = self._generaldelta
1335
1335
1336 iterrev = rev
1336 iterrev = rev
1337 e = index[iterrev]
1337 e = index[iterrev]
1338 while iterrev != e[3] and iterrev != stoprev:
1338 while iterrev != e[3] and iterrev != stoprev:
1339 chain.append(iterrev)
1339 chain.append(iterrev)
1340 if generaldelta:
1340 if generaldelta:
1341 iterrev = e[3]
1341 iterrev = e[3]
1342 else:
1342 else:
1343 iterrev -= 1
1343 iterrev -= 1
1344 e = index[iterrev]
1344 e = index[iterrev]
1345
1345
1346 if iterrev == stoprev:
1346 if iterrev == stoprev:
1347 stopped = True
1347 stopped = True
1348 else:
1348 else:
1349 chain.append(iterrev)
1349 chain.append(iterrev)
1350 stopped = False
1350 stopped = False
1351
1351
1352 chain.reverse()
1352 chain.reverse()
1353 return chain, stopped
1353 return chain, stopped
1354
1354
1355 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 def ancestors(self, revs, stoprev=0, inclusive=False):
1356 """Generate the ancestors of 'revs' in reverse topological order.
1356 """Generate the ancestors of 'revs' in reverse topological order.
1357 Does not generate revs lower than stoprev.
1357 Does not generate revs lower than stoprev.
1358
1358
1359 See the documentation for ancestor.lazyancestors for more details."""
1359 See the documentation for ancestor.lazyancestors for more details."""
1360
1360
1361 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1361 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1362 inclusive=inclusive)
1362 inclusive=inclusive)
1363
1363
1364 def descendants(self, revs):
1364 def descendants(self, revs):
1365 """Generate the descendants of 'revs' in revision order.
1365 """Generate the descendants of 'revs' in revision order.
1366
1366
1367 Yield a sequence of revision numbers starting with a child of
1367 Yield a sequence of revision numbers starting with a child of
1368 some rev in revs, i.e., each revision is *not* considered a
1368 some rev in revs, i.e., each revision is *not* considered a
1369 descendant of itself. Results are ordered by revision number (a
1369 descendant of itself. Results are ordered by revision number (a
1370 topological sort)."""
1370 topological sort)."""
1371 first = min(revs)
1371 first = min(revs)
1372 if first == nullrev:
1372 if first == nullrev:
1373 for i in self:
1373 for i in self:
1374 yield i
1374 yield i
1375 return
1375 return
1376
1376
1377 seen = set(revs)
1377 seen = set(revs)
1378 for i in self.revs(start=first + 1):
1378 for i in self.revs(start=first + 1):
1379 for x in self.parentrevs(i):
1379 for x in self.parentrevs(i):
1380 if x != nullrev and x in seen:
1380 if x != nullrev and x in seen:
1381 seen.add(i)
1381 seen.add(i)
1382 yield i
1382 yield i
1383 break
1383 break
1384
1384
1385 def findcommonmissing(self, common=None, heads=None):
1385 def findcommonmissing(self, common=None, heads=None):
1386 """Return a tuple of the ancestors of common and the ancestors of heads
1386 """Return a tuple of the ancestors of common and the ancestors of heads
1387 that are not ancestors of common. In revset terminology, we return the
1387 that are not ancestors of common. In revset terminology, we return the
1388 tuple:
1388 tuple:
1389
1389
1390 ::common, (::heads) - (::common)
1390 ::common, (::heads) - (::common)
1391
1391
1392 The list is sorted by revision number, meaning it is
1392 The list is sorted by revision number, meaning it is
1393 topologically sorted.
1393 topologically sorted.
1394
1394
1395 'heads' and 'common' are both lists of node IDs. If heads is
1395 'heads' and 'common' are both lists of node IDs. If heads is
1396 not supplied, uses all of the revlog's heads. If common is not
1396 not supplied, uses all of the revlog's heads. If common is not
1397 supplied, uses nullid."""
1397 supplied, uses nullid."""
1398 if common is None:
1398 if common is None:
1399 common = [nullid]
1399 common = [nullid]
1400 if heads is None:
1400 if heads is None:
1401 heads = self.heads()
1401 heads = self.heads()
1402
1402
1403 common = [self.rev(n) for n in common]
1403 common = [self.rev(n) for n in common]
1404 heads = [self.rev(n) for n in heads]
1404 heads = [self.rev(n) for n in heads]
1405
1405
1406 # we want the ancestors, but inclusive
1406 # we want the ancestors, but inclusive
1407 class lazyset(object):
1407 class lazyset(object):
1408 def __init__(self, lazyvalues):
1408 def __init__(self, lazyvalues):
1409 self.addedvalues = set()
1409 self.addedvalues = set()
1410 self.lazyvalues = lazyvalues
1410 self.lazyvalues = lazyvalues
1411
1411
1412 def __contains__(self, value):
1412 def __contains__(self, value):
1413 return value in self.addedvalues or value in self.lazyvalues
1413 return value in self.addedvalues or value in self.lazyvalues
1414
1414
1415 def __iter__(self):
1415 def __iter__(self):
1416 added = self.addedvalues
1416 added = self.addedvalues
1417 for r in added:
1417 for r in added:
1418 yield r
1418 yield r
1419 for r in self.lazyvalues:
1419 for r in self.lazyvalues:
1420 if not r in added:
1420 if not r in added:
1421 yield r
1421 yield r
1422
1422
1423 def add(self, value):
1423 def add(self, value):
1424 self.addedvalues.add(value)
1424 self.addedvalues.add(value)
1425
1425
1426 def update(self, values):
1426 def update(self, values):
1427 self.addedvalues.update(values)
1427 self.addedvalues.update(values)
1428
1428
1429 has = lazyset(self.ancestors(common))
1429 has = lazyset(self.ancestors(common))
1430 has.add(nullrev)
1430 has.add(nullrev)
1431 has.update(common)
1431 has.update(common)
1432
1432
1433 # take all ancestors from heads that aren't in has
1433 # take all ancestors from heads that aren't in has
1434 missing = set()
1434 missing = set()
1435 visit = collections.deque(r for r in heads if r not in has)
1435 visit = collections.deque(r for r in heads if r not in has)
1436 while visit:
1436 while visit:
1437 r = visit.popleft()
1437 r = visit.popleft()
1438 if r in missing:
1438 if r in missing:
1439 continue
1439 continue
1440 else:
1440 else:
1441 missing.add(r)
1441 missing.add(r)
1442 for p in self.parentrevs(r):
1442 for p in self.parentrevs(r):
1443 if p not in has:
1443 if p not in has:
1444 visit.append(p)
1444 visit.append(p)
1445 missing = list(missing)
1445 missing = list(missing)
1446 missing.sort()
1446 missing.sort()
1447 return has, [self.node(miss) for miss in missing]
1447 return has, [self.node(miss) for miss in missing]
1448
1448
1449 def incrementalmissingrevs(self, common=None):
1449 def incrementalmissingrevs(self, common=None):
1450 """Return an object that can be used to incrementally compute the
1450 """Return an object that can be used to incrementally compute the
1451 revision numbers of the ancestors of arbitrary sets that are not
1451 revision numbers of the ancestors of arbitrary sets that are not
1452 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 ancestors of common. This is an ancestor.incrementalmissingancestors
1453 object.
1453 object.
1454
1454
1455 'common' is a list of revision numbers. If common is not supplied, uses
1455 'common' is a list of revision numbers. If common is not supplied, uses
1456 nullrev.
1456 nullrev.
1457 """
1457 """
1458 if common is None:
1458 if common is None:
1459 common = [nullrev]
1459 common = [nullrev]
1460
1460
1461 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1461 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1462
1462
1463 def findmissingrevs(self, common=None, heads=None):
1463 def findmissingrevs(self, common=None, heads=None):
1464 """Return the revision numbers of the ancestors of heads that
1464 """Return the revision numbers of the ancestors of heads that
1465 are not ancestors of common.
1465 are not ancestors of common.
1466
1466
1467 More specifically, return a list of revision numbers corresponding to
1467 More specifically, return a list of revision numbers corresponding to
1468 nodes N such that every N satisfies the following constraints:
1468 nodes N such that every N satisfies the following constraints:
1469
1469
1470 1. N is an ancestor of some node in 'heads'
1470 1. N is an ancestor of some node in 'heads'
1471 2. N is not an ancestor of any node in 'common'
1471 2. N is not an ancestor of any node in 'common'
1472
1472
1473 The list is sorted by revision number, meaning it is
1473 The list is sorted by revision number, meaning it is
1474 topologically sorted.
1474 topologically sorted.
1475
1475
1476 'heads' and 'common' are both lists of revision numbers. If heads is
1476 'heads' and 'common' are both lists of revision numbers. If heads is
1477 not supplied, uses all of the revlog's heads. If common is not
1477 not supplied, uses all of the revlog's heads. If common is not
1478 supplied, uses nullid."""
1478 supplied, uses nullid."""
1479 if common is None:
1479 if common is None:
1480 common = [nullrev]
1480 common = [nullrev]
1481 if heads is None:
1481 if heads is None:
1482 heads = self.headrevs()
1482 heads = self.headrevs()
1483
1483
1484 inc = self.incrementalmissingrevs(common=common)
1484 inc = self.incrementalmissingrevs(common=common)
1485 return inc.missingancestors(heads)
1485 return inc.missingancestors(heads)
1486
1486
1487 def findmissing(self, common=None, heads=None):
1487 def findmissing(self, common=None, heads=None):
1488 """Return the ancestors of heads that are not ancestors of common.
1488 """Return the ancestors of heads that are not ancestors of common.
1489
1489
1490 More specifically, return a list of nodes N such that every N
1490 More specifically, return a list of nodes N such that every N
1491 satisfies the following constraints:
1491 satisfies the following constraints:
1492
1492
1493 1. N is an ancestor of some node in 'heads'
1493 1. N is an ancestor of some node in 'heads'
1494 2. N is not an ancestor of any node in 'common'
1494 2. N is not an ancestor of any node in 'common'
1495
1495
1496 The list is sorted by revision number, meaning it is
1496 The list is sorted by revision number, meaning it is
1497 topologically sorted.
1497 topologically sorted.
1498
1498
1499 'heads' and 'common' are both lists of node IDs. If heads is
1499 'heads' and 'common' are both lists of node IDs. If heads is
1500 not supplied, uses all of the revlog's heads. If common is not
1500 not supplied, uses all of the revlog's heads. If common is not
1501 supplied, uses nullid."""
1501 supplied, uses nullid."""
1502 if common is None:
1502 if common is None:
1503 common = [nullid]
1503 common = [nullid]
1504 if heads is None:
1504 if heads is None:
1505 heads = self.heads()
1505 heads = self.heads()
1506
1506
1507 common = [self.rev(n) for n in common]
1507 common = [self.rev(n) for n in common]
1508 heads = [self.rev(n) for n in heads]
1508 heads = [self.rev(n) for n in heads]
1509
1509
1510 inc = self.incrementalmissingrevs(common=common)
1510 inc = self.incrementalmissingrevs(common=common)
1511 return [self.node(r) for r in inc.missingancestors(heads)]
1511 return [self.node(r) for r in inc.missingancestors(heads)]
1512
1512
1513 def nodesbetween(self, roots=None, heads=None):
1513 def nodesbetween(self, roots=None, heads=None):
1514 """Return a topological path from 'roots' to 'heads'.
1514 """Return a topological path from 'roots' to 'heads'.
1515
1515
1516 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1516 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1517 topologically sorted list of all nodes N that satisfy both of
1517 topologically sorted list of all nodes N that satisfy both of
1518 these constraints:
1518 these constraints:
1519
1519
1520 1. N is a descendant of some node in 'roots'
1520 1. N is a descendant of some node in 'roots'
1521 2. N is an ancestor of some node in 'heads'
1521 2. N is an ancestor of some node in 'heads'
1522
1522
1523 Every node is considered to be both a descendant and an ancestor
1523 Every node is considered to be both a descendant and an ancestor
1524 of itself, so every reachable node in 'roots' and 'heads' will be
1524 of itself, so every reachable node in 'roots' and 'heads' will be
1525 included in 'nodes'.
1525 included in 'nodes'.
1526
1526
1527 'outroots' is the list of reachable nodes in 'roots', i.e., the
1527 'outroots' is the list of reachable nodes in 'roots', i.e., the
1528 subset of 'roots' that is returned in 'nodes'. Likewise,
1528 subset of 'roots' that is returned in 'nodes'. Likewise,
1529 'outheads' is the subset of 'heads' that is also in 'nodes'.
1529 'outheads' is the subset of 'heads' that is also in 'nodes'.
1530
1530
1531 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1531 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1532 unspecified, uses nullid as the only root. If 'heads' is
1532 unspecified, uses nullid as the only root. If 'heads' is
1533 unspecified, uses list of all of the revlog's heads."""
1533 unspecified, uses list of all of the revlog's heads."""
1534 nonodes = ([], [], [])
1534 nonodes = ([], [], [])
1535 if roots is not None:
1535 if roots is not None:
1536 roots = list(roots)
1536 roots = list(roots)
1537 if not roots:
1537 if not roots:
1538 return nonodes
1538 return nonodes
1539 lowestrev = min([self.rev(n) for n in roots])
1539 lowestrev = min([self.rev(n) for n in roots])
1540 else:
1540 else:
1541 roots = [nullid] # Everybody's a descendant of nullid
1541 roots = [nullid] # Everybody's a descendant of nullid
1542 lowestrev = nullrev
1542 lowestrev = nullrev
1543 if (lowestrev == nullrev) and (heads is None):
1543 if (lowestrev == nullrev) and (heads is None):
1544 # We want _all_ the nodes!
1544 # We want _all_ the nodes!
1545 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1545 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1546 if heads is None:
1546 if heads is None:
1547 # All nodes are ancestors, so the latest ancestor is the last
1547 # All nodes are ancestors, so the latest ancestor is the last
1548 # node.
1548 # node.
1549 highestrev = len(self) - 1
1549 highestrev = len(self) - 1
1550 # Set ancestors to None to signal that every node is an ancestor.
1550 # Set ancestors to None to signal that every node is an ancestor.
1551 ancestors = None
1551 ancestors = None
1552 # Set heads to an empty dictionary for later discovery of heads
1552 # Set heads to an empty dictionary for later discovery of heads
1553 heads = {}
1553 heads = {}
1554 else:
1554 else:
1555 heads = list(heads)
1555 heads = list(heads)
1556 if not heads:
1556 if not heads:
1557 return nonodes
1557 return nonodes
1558 ancestors = set()
1558 ancestors = set()
1559 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 # Turn heads into a dictionary so we can remove 'fake' heads.
1560 # Also, later we will be using it to filter out the heads we can't
1560 # Also, later we will be using it to filter out the heads we can't
1561 # find from roots.
1561 # find from roots.
1562 heads = dict.fromkeys(heads, False)
1562 heads = dict.fromkeys(heads, False)
1563 # Start at the top and keep marking parents until we're done.
1563 # Start at the top and keep marking parents until we're done.
1564 nodestotag = set(heads)
1564 nodestotag = set(heads)
1565 # Remember where the top was so we can use it as a limit later.
1565 # Remember where the top was so we can use it as a limit later.
1566 highestrev = max([self.rev(n) for n in nodestotag])
1566 highestrev = max([self.rev(n) for n in nodestotag])
1567 while nodestotag:
1567 while nodestotag:
1568 # grab a node to tag
1568 # grab a node to tag
1569 n = nodestotag.pop()
1569 n = nodestotag.pop()
1570 # Never tag nullid
1570 # Never tag nullid
1571 if n == nullid:
1571 if n == nullid:
1572 continue
1572 continue
1573 # A node's revision number represents its place in a
1573 # A node's revision number represents its place in a
1574 # topologically sorted list of nodes.
1574 # topologically sorted list of nodes.
1575 r = self.rev(n)
1575 r = self.rev(n)
1576 if r >= lowestrev:
1576 if r >= lowestrev:
1577 if n not in ancestors:
1577 if n not in ancestors:
1578 # If we are possibly a descendant of one of the roots
1578 # If we are possibly a descendant of one of the roots
1579 # and we haven't already been marked as an ancestor
1579 # and we haven't already been marked as an ancestor
1580 ancestors.add(n) # Mark as ancestor
1580 ancestors.add(n) # Mark as ancestor
1581 # Add non-nullid parents to list of nodes to tag.
1581 # Add non-nullid parents to list of nodes to tag.
1582 nodestotag.update([p for p in self.parents(n) if
1582 nodestotag.update([p for p in self.parents(n) if
1583 p != nullid])
1583 p != nullid])
1584 elif n in heads: # We've seen it before, is it a fake head?
1584 elif n in heads: # We've seen it before, is it a fake head?
1585 # So it is, real heads should not be the ancestors of
1585 # So it is, real heads should not be the ancestors of
1586 # any other heads.
1586 # any other heads.
1587 heads.pop(n)
1587 heads.pop(n)
1588 if not ancestors:
1588 if not ancestors:
1589 return nonodes
1589 return nonodes
1590 # Now that we have our set of ancestors, we want to remove any
1590 # Now that we have our set of ancestors, we want to remove any
1591 # roots that are not ancestors.
1591 # roots that are not ancestors.
1592
1592
1593 # If one of the roots was nullid, everything is included anyway.
1593 # If one of the roots was nullid, everything is included anyway.
1594 if lowestrev > nullrev:
1594 if lowestrev > nullrev:
1595 # But, since we weren't, let's recompute the lowest rev to not
1595 # But, since we weren't, let's recompute the lowest rev to not
1596 # include roots that aren't ancestors.
1596 # include roots that aren't ancestors.
1597
1597
1598 # Filter out roots that aren't ancestors of heads
1598 # Filter out roots that aren't ancestors of heads
1599 roots = [root for root in roots if root in ancestors]
1599 roots = [root for root in roots if root in ancestors]
1600 # Recompute the lowest revision
1600 # Recompute the lowest revision
1601 if roots:
1601 if roots:
1602 lowestrev = min([self.rev(root) for root in roots])
1602 lowestrev = min([self.rev(root) for root in roots])
1603 else:
1603 else:
1604 # No more roots? Return empty list
1604 # No more roots? Return empty list
1605 return nonodes
1605 return nonodes
1606 else:
1606 else:
1607 # We are descending from nullid, and don't need to care about
1607 # We are descending from nullid, and don't need to care about
1608 # any other roots.
1608 # any other roots.
1609 lowestrev = nullrev
1609 lowestrev = nullrev
1610 roots = [nullid]
1610 roots = [nullid]
1611 # Transform our roots list into a set.
1611 # Transform our roots list into a set.
1612 descendants = set(roots)
1612 descendants = set(roots)
1613 # Also, keep the original roots so we can filter out roots that aren't
1613 # Also, keep the original roots so we can filter out roots that aren't
1614 # 'real' roots (i.e. are descended from other roots).
1614 # 'real' roots (i.e. are descended from other roots).
1615 roots = descendants.copy()
1615 roots = descendants.copy()
1616 # Our topologically sorted list of output nodes.
1616 # Our topologically sorted list of output nodes.
1617 orderedout = []
1617 orderedout = []
1618 # Don't start at nullid since we don't want nullid in our output list,
1618 # Don't start at nullid since we don't want nullid in our output list,
1619 # and if nullid shows up in descendants, empty parents will look like
1619 # and if nullid shows up in descendants, empty parents will look like
1620 # they're descendants.
1620 # they're descendants.
1621 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1621 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1622 n = self.node(r)
1622 n = self.node(r)
1623 isdescendant = False
1623 isdescendant = False
1624 if lowestrev == nullrev: # Everybody is a descendant of nullid
1624 if lowestrev == nullrev: # Everybody is a descendant of nullid
1625 isdescendant = True
1625 isdescendant = True
1626 elif n in descendants:
1626 elif n in descendants:
1627 # n is already a descendant
1627 # n is already a descendant
1628 isdescendant = True
1628 isdescendant = True
1629 # This check only needs to be done here because all the roots
1629 # This check only needs to be done here because all the roots
1630 # will start being marked is descendants before the loop.
1630 # will start being marked is descendants before the loop.
1631 if n in roots:
1631 if n in roots:
1632 # If n was a root, check if it's a 'real' root.
1632 # If n was a root, check if it's a 'real' root.
1633 p = tuple(self.parents(n))
1633 p = tuple(self.parents(n))
1634 # If any of its parents are descendants, it's not a root.
1634 # If any of its parents are descendants, it's not a root.
1635 if (p[0] in descendants) or (p[1] in descendants):
1635 if (p[0] in descendants) or (p[1] in descendants):
1636 roots.remove(n)
1636 roots.remove(n)
1637 else:
1637 else:
1638 p = tuple(self.parents(n))
1638 p = tuple(self.parents(n))
1639 # A node is a descendant if either of its parents are
1639 # A node is a descendant if either of its parents are
1640 # descendants. (We seeded the dependents list with the roots
1640 # descendants. (We seeded the dependents list with the roots
1641 # up there, remember?)
1641 # up there, remember?)
1642 if (p[0] in descendants) or (p[1] in descendants):
1642 if (p[0] in descendants) or (p[1] in descendants):
1643 descendants.add(n)
1643 descendants.add(n)
1644 isdescendant = True
1644 isdescendant = True
1645 if isdescendant and ((ancestors is None) or (n in ancestors)):
1645 if isdescendant and ((ancestors is None) or (n in ancestors)):
1646 # Only include nodes that are both descendants and ancestors.
1646 # Only include nodes that are both descendants and ancestors.
1647 orderedout.append(n)
1647 orderedout.append(n)
1648 if (ancestors is not None) and (n in heads):
1648 if (ancestors is not None) and (n in heads):
1649 # We're trying to figure out which heads are reachable
1649 # We're trying to figure out which heads are reachable
1650 # from roots.
1650 # from roots.
1651 # Mark this head as having been reached
1651 # Mark this head as having been reached
1652 heads[n] = True
1652 heads[n] = True
1653 elif ancestors is None:
1653 elif ancestors is None:
1654 # Otherwise, we're trying to discover the heads.
1654 # Otherwise, we're trying to discover the heads.
1655 # Assume this is a head because if it isn't, the next step
1655 # Assume this is a head because if it isn't, the next step
1656 # will eventually remove it.
1656 # will eventually remove it.
1657 heads[n] = True
1657 heads[n] = True
1658 # But, obviously its parents aren't.
1658 # But, obviously its parents aren't.
1659 for p in self.parents(n):
1659 for p in self.parents(n):
1660 heads.pop(p, None)
1660 heads.pop(p, None)
1661 heads = [head for head, flag in heads.iteritems() if flag]
1661 heads = [head for head, flag in heads.iteritems() if flag]
1662 roots = list(roots)
1662 roots = list(roots)
1663 assert orderedout
1663 assert orderedout
1664 assert roots
1664 assert roots
1665 assert heads
1665 assert heads
1666 return (orderedout, roots, heads)
1666 return (orderedout, roots, heads)
1667
1667
1668 def headrevs(self):
1668 def headrevs(self):
1669 try:
1669 try:
1670 return self.index.headrevs()
1670 return self.index.headrevs()
1671 except AttributeError:
1671 except AttributeError:
1672 return self._headrevs()
1672 return self._headrevs()
1673
1673
1674 def computephases(self, roots):
1674 def computephases(self, roots):
1675 return self.index.computephasesmapsets(roots)
1675 return self.index.computephasesmapsets(roots)
1676
1676
1677 def _headrevs(self):
1677 def _headrevs(self):
1678 count = len(self)
1678 count = len(self)
1679 if not count:
1679 if not count:
1680 return [nullrev]
1680 return [nullrev]
1681 # we won't iter over filtered rev so nobody is a head at start
1681 # we won't iter over filtered rev so nobody is a head at start
1682 ishead = [0] * (count + 1)
1682 ishead = [0] * (count + 1)
1683 index = self.index
1683 index = self.index
1684 for r in self:
1684 for r in self:
1685 ishead[r] = 1 # I may be an head
1685 ishead[r] = 1 # I may be an head
1686 e = index[r]
1686 e = index[r]
1687 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1687 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1688 return [r for r, val in enumerate(ishead) if val]
1688 return [r for r, val in enumerate(ishead) if val]
1689
1689
1690 def heads(self, start=None, stop=None):
1690 def heads(self, start=None, stop=None):
1691 """return the list of all nodes that have no children
1691 """return the list of all nodes that have no children
1692
1692
1693 if start is specified, only heads that are descendants of
1693 if start is specified, only heads that are descendants of
1694 start will be returned
1694 start will be returned
1695 if stop is specified, it will consider all the revs from stop
1695 if stop is specified, it will consider all the revs from stop
1696 as if they had no children
1696 as if they had no children
1697 """
1697 """
1698 if start is None and stop is None:
1698 if start is None and stop is None:
1699 if not len(self):
1699 if not len(self):
1700 return [nullid]
1700 return [nullid]
1701 return [self.node(r) for r in self.headrevs()]
1701 return [self.node(r) for r in self.headrevs()]
1702
1702
1703 if start is None:
1703 if start is None:
1704 start = nullid
1704 start = nullid
1705 if stop is None:
1705 if stop is None:
1706 stop = []
1706 stop = []
1707 stoprevs = set([self.rev(n) for n in stop])
1707 stoprevs = set([self.rev(n) for n in stop])
1708 startrev = self.rev(start)
1708 startrev = self.rev(start)
1709 reachable = {startrev}
1709 reachable = {startrev}
1710 heads = {startrev}
1710 heads = {startrev}
1711
1711
1712 parentrevs = self.parentrevs
1712 parentrevs = self.parentrevs
1713 for r in self.revs(start=startrev + 1):
1713 for r in self.revs(start=startrev + 1):
1714 for p in parentrevs(r):
1714 for p in parentrevs(r):
1715 if p in reachable:
1715 if p in reachable:
1716 if r not in stoprevs:
1716 if r not in stoprevs:
1717 reachable.add(r)
1717 reachable.add(r)
1718 heads.add(r)
1718 heads.add(r)
1719 if p in heads and p not in stoprevs:
1719 if p in heads and p not in stoprevs:
1720 heads.remove(p)
1720 heads.remove(p)
1721
1721
1722 return [self.node(r) for r in heads]
1722 return [self.node(r) for r in heads]
1723
1723
1724 def children(self, node):
1724 def children(self, node):
1725 """find the children of a given node"""
1725 """find the children of a given node"""
1726 c = []
1726 c = []
1727 p = self.rev(node)
1727 p = self.rev(node)
1728 for r in self.revs(start=p + 1):
1728 for r in self.revs(start=p + 1):
1729 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1729 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1730 if prevs:
1730 if prevs:
1731 for pr in prevs:
1731 for pr in prevs:
1732 if pr == p:
1732 if pr == p:
1733 c.append(self.node(r))
1733 c.append(self.node(r))
1734 elif p == nullrev:
1734 elif p == nullrev:
1735 c.append(self.node(r))
1735 c.append(self.node(r))
1736 return c
1736 return c
1737
1737
1738 def commonancestorsheads(self, a, b):
1738 def commonancestorsheads(self, a, b):
1739 """calculate all the heads of the common ancestors of nodes a and b"""
1739 """calculate all the heads of the common ancestors of nodes a and b"""
1740 a, b = self.rev(a), self.rev(b)
1740 a, b = self.rev(a), self.rev(b)
1741 ancs = self._commonancestorsheads(a, b)
1741 ancs = self._commonancestorsheads(a, b)
1742 return pycompat.maplist(self.node, ancs)
1742 return pycompat.maplist(self.node, ancs)
1743
1743
1744 def _commonancestorsheads(self, *revs):
1744 def _commonancestorsheads(self, *revs):
1745 """calculate all the heads of the common ancestors of revs"""
1745 """calculate all the heads of the common ancestors of revs"""
1746 try:
1746 try:
1747 ancs = self.index.commonancestorsheads(*revs)
1747 ancs = self.index.commonancestorsheads(*revs)
1748 except (AttributeError, OverflowError): # C implementation failed
1748 except (AttributeError, OverflowError): # C implementation failed
1749 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1749 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1750 return ancs
1750 return ancs
1751
1751
1752 def isancestor(self, a, b):
1752 def isancestor(self, a, b):
1753 """return True if node a is an ancestor of node b
1753 """return True if node a is an ancestor of node b
1754
1754
1755 A revision is considered an ancestor of itself."""
1755 A revision is considered an ancestor of itself."""
1756 a, b = self.rev(a), self.rev(b)
1756 a, b = self.rev(a), self.rev(b)
1757 return self.isancestorrev(a, b)
1757 return self.isancestorrev(a, b)
1758
1758
1759 def isancestorrev(self, a, b):
1759 def isancestorrev(self, a, b):
1760 """return True if revision a is an ancestor of revision b
1760 """return True if revision a is an ancestor of revision b
1761
1761
1762 A revision is considered an ancestor of itself.
1762 A revision is considered an ancestor of itself.
1763
1763
1764 The implementation of this is trivial but the use of
1764 The implementation of this is trivial but the use of
1765 commonancestorsheads is not."""
1765 commonancestorsheads is not."""
1766 if a == nullrev:
1766 if a == nullrev:
1767 return True
1767 return True
1768 elif a == b:
1768 elif a == b:
1769 return True
1769 return True
1770 elif a > b:
1770 elif a > b:
1771 return False
1771 return False
1772 return a in self._commonancestorsheads(a, b)
1772 return a in self._commonancestorsheads(a, b)
1773
1773
1774 def ancestor(self, a, b):
1774 def ancestor(self, a, b):
1775 """calculate the "best" common ancestor of nodes a and b"""
1775 """calculate the "best" common ancestor of nodes a and b"""
1776
1776
1777 a, b = self.rev(a), self.rev(b)
1777 a, b = self.rev(a), self.rev(b)
1778 try:
1778 try:
1779 ancs = self.index.ancestors(a, b)
1779 ancs = self.index.ancestors(a, b)
1780 except (AttributeError, OverflowError):
1780 except (AttributeError, OverflowError):
1781 ancs = ancestor.ancestors(self.parentrevs, a, b)
1781 ancs = ancestor.ancestors(self.parentrevs, a, b)
1782 if ancs:
1782 if ancs:
1783 # choose a consistent winner when there's a tie
1783 # choose a consistent winner when there's a tie
1784 return min(map(self.node, ancs))
1784 return min(map(self.node, ancs))
1785 return nullid
1785 return nullid
1786
1786
1787 def _match(self, id):
1787 def _match(self, id):
1788 if isinstance(id, int):
1788 if isinstance(id, int):
1789 # rev
1789 # rev
1790 return self.node(id)
1790 return self.node(id)
1791 if len(id) == 20:
1791 if len(id) == 20:
1792 # possibly a binary node
1792 # possibly a binary node
1793 # odds of a binary node being all hex in ASCII are 1 in 10**25
1793 # odds of a binary node being all hex in ASCII are 1 in 10**25
1794 try:
1794 try:
1795 node = id
1795 node = id
1796 self.rev(node) # quick search the index
1796 self.rev(node) # quick search the index
1797 return node
1797 return node
1798 except LookupError:
1798 except LookupError:
1799 pass # may be partial hex id
1799 pass # may be partial hex id
1800 try:
1800 try:
1801 # str(rev)
1801 # str(rev)
1802 rev = int(id)
1802 rev = int(id)
1803 if "%d" % rev != id:
1803 if "%d" % rev != id:
1804 raise ValueError
1804 raise ValueError
1805 if rev < 0:
1805 if rev < 0:
1806 rev = len(self) + rev
1806 rev = len(self) + rev
1807 if rev < 0 or rev >= len(self):
1807 if rev < 0 or rev >= len(self):
1808 raise ValueError
1808 raise ValueError
1809 return self.node(rev)
1809 return self.node(rev)
1810 except (ValueError, OverflowError):
1810 except (ValueError, OverflowError):
1811 pass
1811 pass
1812 if len(id) == 40:
1812 if len(id) == 40:
1813 try:
1813 try:
1814 # a full hex nodeid?
1814 # a full hex nodeid?
1815 node = bin(id)
1815 node = bin(id)
1816 self.rev(node)
1816 self.rev(node)
1817 return node
1817 return node
1818 except (TypeError, LookupError):
1818 except (TypeError, LookupError):
1819 pass
1819 pass
1820
1820
1821 def _partialmatch(self, id):
1821 def _partialmatch(self, id):
1822 # we don't care wdirfilenodeids as they should be always full hash
1822 # we don't care wdirfilenodeids as they should be always full hash
1823 maybewdir = wdirhex.startswith(id)
1823 maybewdir = wdirhex.startswith(id)
1824 try:
1824 try:
1825 partial = self.index.partialmatch(id)
1825 partial = self.index.partialmatch(id)
1826 if partial and self.hasnode(partial):
1826 if partial and self.hasnode(partial):
1827 if maybewdir:
1827 if maybewdir:
1828 # single 'ff...' match in radix tree, ambiguous with wdir
1828 # single 'ff...' match in radix tree, ambiguous with wdir
1829 raise RevlogError
1829 raise RevlogError
1830 return partial
1830 return partial
1831 if maybewdir:
1831 if maybewdir:
1832 # no 'ff...' match in radix tree, wdir identified
1832 # no 'ff...' match in radix tree, wdir identified
1833 raise error.WdirUnsupported
1833 raise error.WdirUnsupported
1834 return None
1834 return None
1835 except RevlogError:
1835 except RevlogError:
1836 # parsers.c radix tree lookup gave multiple matches
1836 # parsers.c radix tree lookup gave multiple matches
1837 # fast path: for unfiltered changelog, radix tree is accurate
1837 # fast path: for unfiltered changelog, radix tree is accurate
1838 if not getattr(self, 'filteredrevs', None):
1838 if not getattr(self, 'filteredrevs', None):
1839 raise AmbiguousPrefixLookupError(id, self.indexfile,
1839 raise AmbiguousPrefixLookupError(id, self.indexfile,
1840 _('ambiguous identifier'))
1840 _('ambiguous identifier'))
1841 # fall through to slow path that filters hidden revisions
1841 # fall through to slow path that filters hidden revisions
1842 except (AttributeError, ValueError):
1842 except (AttributeError, ValueError):
1843 # we are pure python, or key was too short to search radix tree
1843 # we are pure python, or key was too short to search radix tree
1844 pass
1844 pass
1845
1845
1846 if id in self._pcache:
1846 if id in self._pcache:
1847 return self._pcache[id]
1847 return self._pcache[id]
1848
1848
1849 if len(id) <= 40:
1849 if len(id) <= 40:
1850 try:
1850 try:
1851 # hex(node)[:...]
1851 # hex(node)[:...]
1852 l = len(id) // 2 # grab an even number of digits
1852 l = len(id) // 2 # grab an even number of digits
1853 prefix = bin(id[:l * 2])
1853 prefix = bin(id[:l * 2])
1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1855 nl = [n for n in nl if hex(n).startswith(id) and
1855 nl = [n for n in nl if hex(n).startswith(id) and
1856 self.hasnode(n)]
1856 self.hasnode(n)]
1857 if nullhex.startswith(id):
1857 if nullhex.startswith(id):
1858 nl.append(nullid)
1858 nl.append(nullid)
1859 if len(nl) > 0:
1859 if len(nl) > 0:
1860 if len(nl) == 1 and not maybewdir:
1860 if len(nl) == 1 and not maybewdir:
1861 self._pcache[id] = nl[0]
1861 self._pcache[id] = nl[0]
1862 return nl[0]
1862 return nl[0]
1863 raise AmbiguousPrefixLookupError(id, self.indexfile,
1863 raise AmbiguousPrefixLookupError(id, self.indexfile,
1864 _('ambiguous identifier'))
1864 _('ambiguous identifier'))
1865 if maybewdir:
1865 if maybewdir:
1866 raise error.WdirUnsupported
1866 raise error.WdirUnsupported
1867 return None
1867 return None
1868 except TypeError:
1868 except TypeError:
1869 pass
1869 pass
1870
1870
1871 def lookup(self, id):
1871 def lookup(self, id):
1872 """locate a node based on:
1872 """locate a node based on:
1873 - revision number or str(revision number)
1873 - revision number or str(revision number)
1874 - nodeid or subset of hex nodeid
1874 - nodeid or subset of hex nodeid
1875 """
1875 """
1876 n = self._match(id)
1876 n = self._match(id)
1877 if n is not None:
1877 if n is not None:
1878 return n
1878 return n
1879 n = self._partialmatch(id)
1879 n = self._partialmatch(id)
1880 if n:
1880 if n:
1881 return n
1881 return n
1882
1882
1883 raise LookupError(id, self.indexfile, _('no match found'))
1883 raise LookupError(id, self.indexfile, _('no match found'))
1884
1884
1885 def shortest(self, node, minlength=1):
1885 def shortest(self, node, minlength=1):
1886 """Find the shortest unambiguous prefix that matches node."""
1886 """Find the shortest unambiguous prefix that matches node."""
1887 def isvalid(prefix):
1887 def isvalid(prefix):
1888 try:
1888 try:
1889 node = self._partialmatch(prefix)
1889 node = self._partialmatch(prefix)
1890 except error.RevlogError:
1890 except error.RevlogError:
1891 return False
1891 return False
1892 except error.WdirUnsupported:
1892 except error.WdirUnsupported:
1893 # single 'ff...' match
1893 # single 'ff...' match
1894 return True
1894 return True
1895 if node is None:
1895 if node is None:
1896 raise LookupError(node, self.indexfile, _('no node'))
1896 raise LookupError(node, self.indexfile, _('no node'))
1897 return True
1897 return True
1898
1898
1899 def maybewdir(prefix):
1899 def maybewdir(prefix):
1900 return all(c == 'f' for c in prefix)
1900 return all(c == 'f' for c in prefix)
1901
1901
1902 hexnode = hex(node)
1902 hexnode = hex(node)
1903
1903
1904 def disambiguate(hexnode, minlength):
1904 def disambiguate(hexnode, minlength):
1905 """Disambiguate against wdirid."""
1905 """Disambiguate against wdirid."""
1906 for length in range(minlength, 41):
1906 for length in range(minlength, 41):
1907 prefix = hexnode[:length]
1907 prefix = hexnode[:length]
1908 if not maybewdir(prefix):
1908 if not maybewdir(prefix):
1909 return prefix
1909 return prefix
1910
1910
1911 if not getattr(self, 'filteredrevs', None):
1911 if not getattr(self, 'filteredrevs', None):
1912 try:
1912 try:
1913 length = max(self.index.shortest(node), minlength)
1913 length = max(self.index.shortest(node), minlength)
1914 return disambiguate(hexnode, length)
1914 return disambiguate(hexnode, length)
1915 except RevlogError:
1915 except RevlogError:
1916 if node != wdirid:
1916 if node != wdirid:
1917 raise LookupError(node, self.indexfile, _('no node'))
1917 raise LookupError(node, self.indexfile, _('no node'))
1918 except AttributeError:
1918 except AttributeError:
1919 # Fall through to pure code
1919 # Fall through to pure code
1920 pass
1920 pass
1921
1921
1922 if node == wdirid:
1922 if node == wdirid:
1923 for length in range(minlength, 41):
1923 for length in range(minlength, 41):
1924 prefix = hexnode[:length]
1924 prefix = hexnode[:length]
1925 if isvalid(prefix):
1925 if isvalid(prefix):
1926 return prefix
1926 return prefix
1927
1927
1928 for length in range(minlength, 41):
1928 for length in range(minlength, 41):
1929 prefix = hexnode[:length]
1929 prefix = hexnode[:length]
1930 if isvalid(prefix):
1930 if isvalid(prefix):
1931 return disambiguate(hexnode, length)
1931 return disambiguate(hexnode, length)
1932
1932
1933 def cmp(self, node, text):
1933 def cmp(self, node, text):
1934 """compare text with a given file revision
1934 """compare text with a given file revision
1935
1935
1936 returns True if text is different than what is stored.
1936 returns True if text is different than what is stored.
1937 """
1937 """
1938 p1, p2 = self.parents(node)
1938 p1, p2 = self.parents(node)
1939 return hash(text, p1, p2) != node
1939 return hash(text, p1, p2) != node
1940
1940
1941 def _cachesegment(self, offset, data):
1941 def _cachesegment(self, offset, data):
1942 """Add a segment to the revlog cache.
1942 """Add a segment to the revlog cache.
1943
1943
1944 Accepts an absolute offset and the data that is at that location.
1944 Accepts an absolute offset and the data that is at that location.
1945 """
1945 """
1946 o, d = self._chunkcache
1946 o, d = self._chunkcache
1947 # try to add to existing cache
1947 # try to add to existing cache
1948 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1948 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1949 self._chunkcache = o, d + data
1949 self._chunkcache = o, d + data
1950 else:
1950 else:
1951 self._chunkcache = offset, data
1951 self._chunkcache = offset, data
1952
1952
1953 def _readsegment(self, offset, length, df=None):
1953 def _readsegment(self, offset, length, df=None):
1954 """Load a segment of raw data from the revlog.
1954 """Load a segment of raw data from the revlog.
1955
1955
1956 Accepts an absolute offset, length to read, and an optional existing
1956 Accepts an absolute offset, length to read, and an optional existing
1957 file handle to read from.
1957 file handle to read from.
1958
1958
1959 If an existing file handle is passed, it will be seeked and the
1959 If an existing file handle is passed, it will be seeked and the
1960 original seek position will NOT be restored.
1960 original seek position will NOT be restored.
1961
1961
1962 Returns a str or buffer of raw byte data.
1962 Returns a str or buffer of raw byte data.
1963 """
1963 """
1964 # Cache data both forward and backward around the requested
1964 # Cache data both forward and backward around the requested
1965 # data, in a fixed size window. This helps speed up operations
1965 # data, in a fixed size window. This helps speed up operations
1966 # involving reading the revlog backwards.
1966 # involving reading the revlog backwards.
1967 cachesize = self._chunkcachesize
1967 cachesize = self._chunkcachesize
1968 realoffset = offset & ~(cachesize - 1)
1968 realoffset = offset & ~(cachesize - 1)
1969 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1969 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1970 - realoffset)
1970 - realoffset)
1971 with self._datareadfp(df) as df:
1971 with self._datareadfp(df) as df:
1972 df.seek(realoffset)
1972 df.seek(realoffset)
1973 d = df.read(reallength)
1973 d = df.read(reallength)
1974 self._cachesegment(realoffset, d)
1974 self._cachesegment(realoffset, d)
1975 if offset != realoffset or reallength != length:
1975 if offset != realoffset or reallength != length:
1976 return util.buffer(d, offset - realoffset, length)
1976 return util.buffer(d, offset - realoffset, length)
1977 return d
1977 return d
1978
1978
1979 def _getsegment(self, offset, length, df=None):
1979 def _getsegment(self, offset, length, df=None):
1980 """Obtain a segment of raw data from the revlog.
1980 """Obtain a segment of raw data from the revlog.
1981
1981
1982 Accepts an absolute offset, length of bytes to obtain, and an
1982 Accepts an absolute offset, length of bytes to obtain, and an
1983 optional file handle to the already-opened revlog. If the file
1983 optional file handle to the already-opened revlog. If the file
1984 handle is used, it's original seek position will not be preserved.
1984 handle is used, it's original seek position will not be preserved.
1985
1985
1986 Requests for data may be returned from a cache.
1986 Requests for data may be returned from a cache.
1987
1987
1988 Returns a str or a buffer instance of raw byte data.
1988 Returns a str or a buffer instance of raw byte data.
1989 """
1989 """
1990 o, d = self._chunkcache
1990 o, d = self._chunkcache
1991 l = len(d)
1991 l = len(d)
1992
1992
1993 # is it in the cache?
1993 # is it in the cache?
1994 cachestart = offset - o
1994 cachestart = offset - o
1995 cacheend = cachestart + length
1995 cacheend = cachestart + length
1996 if cachestart >= 0 and cacheend <= l:
1996 if cachestart >= 0 and cacheend <= l:
1997 if cachestart == 0 and cacheend == l:
1997 if cachestart == 0 and cacheend == l:
1998 return d # avoid a copy
1998 return d # avoid a copy
1999 return util.buffer(d, cachestart, cacheend - cachestart)
1999 return util.buffer(d, cachestart, cacheend - cachestart)
2000
2000
2001 return self._readsegment(offset, length, df=df)
2001 return self._readsegment(offset, length, df=df)
2002
2002
2003 def _getsegmentforrevs(self, startrev, endrev, df=None):
2003 def _getsegmentforrevs(self, startrev, endrev, df=None):
2004 """Obtain a segment of raw data corresponding to a range of revisions.
2004 """Obtain a segment of raw data corresponding to a range of revisions.
2005
2005
2006 Accepts the start and end revisions and an optional already-open
2006 Accepts the start and end revisions and an optional already-open
2007 file handle to be used for reading. If the file handle is read, its
2007 file handle to be used for reading. If the file handle is read, its
2008 seek position will not be preserved.
2008 seek position will not be preserved.
2009
2009
2010 Requests for data may be satisfied by a cache.
2010 Requests for data may be satisfied by a cache.
2011
2011
2012 Returns a 2-tuple of (offset, data) for the requested range of
2012 Returns a 2-tuple of (offset, data) for the requested range of
2013 revisions. Offset is the integer offset from the beginning of the
2013 revisions. Offset is the integer offset from the beginning of the
2014 revlog and data is a str or buffer of the raw byte data.
2014 revlog and data is a str or buffer of the raw byte data.
2015
2015
2016 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2016 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2017 to determine where each revision's data begins and ends.
2017 to determine where each revision's data begins and ends.
2018 """
2018 """
2019 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2019 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2020 # (functions are expensive).
2020 # (functions are expensive).
2021 index = self.index
2021 index = self.index
2022 istart = index[startrev]
2022 istart = index[startrev]
2023 start = int(istart[0] >> 16)
2023 start = int(istart[0] >> 16)
2024 if startrev == endrev:
2024 if startrev == endrev:
2025 end = start + istart[1]
2025 end = start + istart[1]
2026 else:
2026 else:
2027 iend = index[endrev]
2027 iend = index[endrev]
2028 end = int(iend[0] >> 16) + iend[1]
2028 end = int(iend[0] >> 16) + iend[1]
2029
2029
2030 if self._inline:
2030 if self._inline:
2031 start += (startrev + 1) * self._io.size
2031 start += (startrev + 1) * self._io.size
2032 end += (endrev + 1) * self._io.size
2032 end += (endrev + 1) * self._io.size
2033 length = end - start
2033 length = end - start
2034
2034
2035 return start, self._getsegment(start, length, df=df)
2035 return start, self._getsegment(start, length, df=df)
2036
2036
2037 def _chunk(self, rev, df=None):
2037 def _chunk(self, rev, df=None):
2038 """Obtain a single decompressed chunk for a revision.
2038 """Obtain a single decompressed chunk for a revision.
2039
2039
2040 Accepts an integer revision and an optional already-open file handle
2040 Accepts an integer revision and an optional already-open file handle
2041 to be used for reading. If used, the seek position of the file will not
2041 to be used for reading. If used, the seek position of the file will not
2042 be preserved.
2042 be preserved.
2043
2043
2044 Returns a str holding uncompressed data for the requested revision.
2044 Returns a str holding uncompressed data for the requested revision.
2045 """
2045 """
2046 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2046 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2047
2047
2048 def _chunks(self, revs, df=None, targetsize=None):
2048 def _chunks(self, revs, df=None, targetsize=None):
2049 """Obtain decompressed chunks for the specified revisions.
2049 """Obtain decompressed chunks for the specified revisions.
2050
2050
2051 Accepts an iterable of numeric revisions that are assumed to be in
2051 Accepts an iterable of numeric revisions that are assumed to be in
2052 ascending order. Also accepts an optional already-open file handle
2052 ascending order. Also accepts an optional already-open file handle
2053 to be used for reading. If used, the seek position of the file will
2053 to be used for reading. If used, the seek position of the file will
2054 not be preserved.
2054 not be preserved.
2055
2055
2056 This function is similar to calling ``self._chunk()`` multiple times,
2056 This function is similar to calling ``self._chunk()`` multiple times,
2057 but is faster.
2057 but is faster.
2058
2058
2059 Returns a list with decompressed data for each requested revision.
2059 Returns a list with decompressed data for each requested revision.
2060 """
2060 """
2061 if not revs:
2061 if not revs:
2062 return []
2062 return []
2063 start = self.start
2063 start = self.start
2064 length = self.length
2064 length = self.length
2065 inline = self._inline
2065 inline = self._inline
2066 iosize = self._io.size
2066 iosize = self._io.size
2067 buffer = util.buffer
2067 buffer = util.buffer
2068
2068
2069 l = []
2069 l = []
2070 ladd = l.append
2070 ladd = l.append
2071
2071
2072 if not self._withsparseread:
2072 if not self._withsparseread:
2073 slicedchunks = (revs,)
2073 slicedchunks = (revs,)
2074 else:
2074 else:
2075 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2075 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2076
2076
2077 for revschunk in slicedchunks:
2077 for revschunk in slicedchunks:
2078 firstrev = revschunk[0]
2078 firstrev = revschunk[0]
2079 # Skip trailing revisions with empty diff
2079 # Skip trailing revisions with empty diff
2080 for lastrev in revschunk[::-1]:
2080 for lastrev in revschunk[::-1]:
2081 if length(lastrev) != 0:
2081 if length(lastrev) != 0:
2082 break
2082 break
2083
2083
2084 try:
2084 try:
2085 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2085 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2086 except OverflowError:
2086 except OverflowError:
2087 # issue4215 - we can't cache a run of chunks greater than
2087 # issue4215 - we can't cache a run of chunks greater than
2088 # 2G on Windows
2088 # 2G on Windows
2089 return [self._chunk(rev, df=df) for rev in revschunk]
2089 return [self._chunk(rev, df=df) for rev in revschunk]
2090
2090
2091 decomp = self.decompress
2091 decomp = self.decompress
2092 for rev in revschunk:
2092 for rev in revschunk:
2093 chunkstart = start(rev)
2093 chunkstart = start(rev)
2094 if inline:
2094 if inline:
2095 chunkstart += (rev + 1) * iosize
2095 chunkstart += (rev + 1) * iosize
2096 chunklength = length(rev)
2096 chunklength = length(rev)
2097 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2097 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2098
2098
2099 return l
2099 return l
2100
2100
2101 def _chunkclear(self):
2101 def _chunkclear(self):
2102 """Clear the raw chunk cache."""
2102 """Clear the raw chunk cache."""
2103 self._chunkcache = (0, '')
2103 self._chunkcache = (0, '')
2104
2104
2105 def deltaparent(self, rev):
2105 def deltaparent(self, rev):
2106 """return deltaparent of the given revision"""
2106 """return deltaparent of the given revision"""
2107 base = self.index[rev][3]
2107 base = self.index[rev][3]
2108 if base == rev:
2108 if base == rev:
2109 return nullrev
2109 return nullrev
2110 elif self._generaldelta:
2110 elif self._generaldelta:
2111 return base
2111 return base
2112 else:
2112 else:
2113 return rev - 1
2113 return rev - 1
2114
2114
2115 def issnapshot(self, rev):
2115 def issnapshot(self, rev):
2116 """tells whether rev is a snapshot
2116 """tells whether rev is a snapshot
2117 """
2117 """
2118 if rev == nullrev:
2118 if rev == nullrev:
2119 return True
2119 return True
2120 deltap = self.deltaparent(rev)
2120 deltap = self.deltaparent(rev)
2121 if deltap == nullrev:
2121 if deltap == nullrev:
2122 return True
2122 return True
2123 p1, p2 = self.parentrevs(rev)
2123 p1, p2 = self.parentrevs(rev)
2124 if deltap in (p1, p2):
2124 if deltap in (p1, p2):
2125 return False
2125 return False
2126 return self.issnapshot(deltap)
2126 return self.issnapshot(deltap)
2127
2127
2128 def snapshotdepth(self, rev):
2128 def snapshotdepth(self, rev):
2129 """number of snapshot in the chain before this one"""
2129 """number of snapshot in the chain before this one"""
2130 if not self.issnapshot(rev):
2130 if not self.issnapshot(rev):
2131 raise ProgrammingError('revision %d not a snapshot')
2131 raise ProgrammingError('revision %d not a snapshot')
2132 return len(self._deltachain(rev)[0]) - 1
2132 return len(self._deltachain(rev)[0]) - 1
2133
2133
2134 def revdiff(self, rev1, rev2):
2134 def revdiff(self, rev1, rev2):
2135 """return or calculate a delta between two revisions
2135 """return or calculate a delta between two revisions
2136
2136
2137 The delta calculated is in binary form and is intended to be written to
2137 The delta calculated is in binary form and is intended to be written to
2138 revlog data directly. So this function needs raw revision data.
2138 revlog data directly. So this function needs raw revision data.
2139 """
2139 """
2140 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2140 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2141 return bytes(self._chunk(rev2))
2141 return bytes(self._chunk(rev2))
2142
2142
2143 return mdiff.textdiff(self.revision(rev1, raw=True),
2143 return mdiff.textdiff(self.revision(rev1, raw=True),
2144 self.revision(rev2, raw=True))
2144 self.revision(rev2, raw=True))
2145
2145
2146 def revision(self, nodeorrev, _df=None, raw=False):
2146 def revision(self, nodeorrev, _df=None, raw=False):
2147 """return an uncompressed revision of a given node or revision
2147 """return an uncompressed revision of a given node or revision
2148 number.
2148 number.
2149
2149
2150 _df - an existing file handle to read from. (internal-only)
2150 _df - an existing file handle to read from. (internal-only)
2151 raw - an optional argument specifying if the revision data is to be
2151 raw - an optional argument specifying if the revision data is to be
2152 treated as raw data when applying flag transforms. 'raw' should be set
2152 treated as raw data when applying flag transforms. 'raw' should be set
2153 to True when generating changegroups or in debug commands.
2153 to True when generating changegroups or in debug commands.
2154 """
2154 """
2155 if isinstance(nodeorrev, int):
2155 if isinstance(nodeorrev, int):
2156 rev = nodeorrev
2156 rev = nodeorrev
2157 node = self.node(rev)
2157 node = self.node(rev)
2158 else:
2158 else:
2159 node = nodeorrev
2159 node = nodeorrev
2160 rev = None
2160 rev = None
2161
2161
2162 cachedrev = None
2162 cachedrev = None
2163 flags = None
2163 flags = None
2164 rawtext = None
2164 rawtext = None
2165 if node == nullid:
2165 if node == nullid:
2166 return ""
2166 return ""
2167 if self._cache:
2167 if self._cache:
2168 if self._cache[0] == node:
2168 if self._cache[0] == node:
2169 # _cache only stores rawtext
2169 # _cache only stores rawtext
2170 if raw:
2170 if raw:
2171 return self._cache[2]
2171 return self._cache[2]
2172 # duplicated, but good for perf
2172 # duplicated, but good for perf
2173 if rev is None:
2173 if rev is None:
2174 rev = self.rev(node)
2174 rev = self.rev(node)
2175 if flags is None:
2175 if flags is None:
2176 flags = self.flags(rev)
2176 flags = self.flags(rev)
2177 # no extra flags set, no flag processor runs, text = rawtext
2177 # no extra flags set, no flag processor runs, text = rawtext
2178 if flags == REVIDX_DEFAULT_FLAGS:
2178 if flags == REVIDX_DEFAULT_FLAGS:
2179 return self._cache[2]
2179 return self._cache[2]
2180 # rawtext is reusable. need to run flag processor
2180 # rawtext is reusable. need to run flag processor
2181 rawtext = self._cache[2]
2181 rawtext = self._cache[2]
2182
2182
2183 cachedrev = self._cache[1]
2183 cachedrev = self._cache[1]
2184
2184
2185 # look up what we need to read
2185 # look up what we need to read
2186 if rawtext is None:
2186 if rawtext is None:
2187 if rev is None:
2187 if rev is None:
2188 rev = self.rev(node)
2188 rev = self.rev(node)
2189
2189
2190 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2190 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2191 if stopped:
2191 if stopped:
2192 rawtext = self._cache[2]
2192 rawtext = self._cache[2]
2193
2193
2194 # drop cache to save memory
2194 # drop cache to save memory
2195 self._cache = None
2195 self._cache = None
2196
2196
2197 targetsize = None
2197 targetsize = None
2198 rawsize = self.index[rev][2]
2198 rawsize = self.index[rev][2]
2199 if 0 <= rawsize:
2199 if 0 <= rawsize:
2200 targetsize = 4 * rawsize
2200 targetsize = 4 * rawsize
2201
2201
2202 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2202 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2203 if rawtext is None:
2203 if rawtext is None:
2204 rawtext = bytes(bins[0])
2204 rawtext = bytes(bins[0])
2205 bins = bins[1:]
2205 bins = bins[1:]
2206
2206
2207 rawtext = mdiff.patches(rawtext, bins)
2207 rawtext = mdiff.patches(rawtext, bins)
2208 self._cache = (node, rev, rawtext)
2208 self._cache = (node, rev, rawtext)
2209
2209
2210 if flags is None:
2210 if flags is None:
2211 if rev is None:
2211 if rev is None:
2212 rev = self.rev(node)
2212 rev = self.rev(node)
2213 flags = self.flags(rev)
2213 flags = self.flags(rev)
2214
2214
2215 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2215 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2216 if validatehash:
2216 if validatehash:
2217 self.checkhash(text, node, rev=rev)
2217 self.checkhash(text, node, rev=rev)
2218
2218
2219 return text
2219 return text
2220
2220
2221 def hash(self, text, p1, p2):
2221 def hash(self, text, p1, p2):
2222 """Compute a node hash.
2222 """Compute a node hash.
2223
2223
2224 Available as a function so that subclasses can replace the hash
2224 Available as a function so that subclasses can replace the hash
2225 as needed.
2225 as needed.
2226 """
2226 """
2227 return hash(text, p1, p2)
2227 return hash(text, p1, p2)
2228
2228
2229 def _processflags(self, text, flags, operation, raw=False):
2229 def _processflags(self, text, flags, operation, raw=False):
2230 """Inspect revision data flags and applies transforms defined by
2230 """Inspect revision data flags and applies transforms defined by
2231 registered flag processors.
2231 registered flag processors.
2232
2232
2233 ``text`` - the revision data to process
2233 ``text`` - the revision data to process
2234 ``flags`` - the revision flags
2234 ``flags`` - the revision flags
2235 ``operation`` - the operation being performed (read or write)
2235 ``operation`` - the operation being performed (read or write)
2236 ``raw`` - an optional argument describing if the raw transform should be
2236 ``raw`` - an optional argument describing if the raw transform should be
2237 applied.
2237 applied.
2238
2238
2239 This method processes the flags in the order (or reverse order if
2239 This method processes the flags in the order (or reverse order if
2240 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2240 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2241 flag processors registered for present flags. The order of flags defined
2241 flag processors registered for present flags. The order of flags defined
2242 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2242 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2243
2243
2244 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2244 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2245 processed text and ``validatehash`` is a bool indicating whether the
2245 processed text and ``validatehash`` is a bool indicating whether the
2246 returned text should be checked for hash integrity.
2246 returned text should be checked for hash integrity.
2247
2247
2248 Note: If the ``raw`` argument is set, it has precedence over the
2248 Note: If the ``raw`` argument is set, it has precedence over the
2249 operation and will only update the value of ``validatehash``.
2249 operation and will only update the value of ``validatehash``.
2250 """
2250 """
2251 # fast path: no flag processors will run
2251 # fast path: no flag processors will run
2252 if flags == 0:
2252 if flags == 0:
2253 return text, True
2253 return text, True
2254 if not operation in ('read', 'write'):
2254 if not operation in ('read', 'write'):
2255 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2255 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2256 # Check all flags are known.
2256 # Check all flags are known.
2257 if flags & ~REVIDX_KNOWN_FLAGS:
2257 if flags & ~REVIDX_KNOWN_FLAGS:
2258 raise RevlogError(_("incompatible revision flag '%#x'") %
2258 raise RevlogError(_("incompatible revision flag '%#x'") %
2259 (flags & ~REVIDX_KNOWN_FLAGS))
2259 (flags & ~REVIDX_KNOWN_FLAGS))
2260 validatehash = True
2260 validatehash = True
2261 # Depending on the operation (read or write), the order might be
2261 # Depending on the operation (read or write), the order might be
2262 # reversed due to non-commutative transforms.
2262 # reversed due to non-commutative transforms.
2263 orderedflags = REVIDX_FLAGS_ORDER
2263 orderedflags = REVIDX_FLAGS_ORDER
2264 if operation == 'write':
2264 if operation == 'write':
2265 orderedflags = reversed(orderedflags)
2265 orderedflags = reversed(orderedflags)
2266
2266
2267 for flag in orderedflags:
2267 for flag in orderedflags:
2268 # If a flagprocessor has been registered for a known flag, apply the
2268 # If a flagprocessor has been registered for a known flag, apply the
2269 # related operation transform and update result tuple.
2269 # related operation transform and update result tuple.
2270 if flag & flags:
2270 if flag & flags:
2271 vhash = True
2271 vhash = True
2272
2272
2273 if flag not in _flagprocessors:
2273 if flag not in _flagprocessors:
2274 message = _("missing processor for flag '%#x'") % (flag)
2274 message = _("missing processor for flag '%#x'") % (flag)
2275 raise RevlogError(message)
2275 raise RevlogError(message)
2276
2276
2277 processor = _flagprocessors[flag]
2277 processor = _flagprocessors[flag]
2278 if processor is not None:
2278 if processor is not None:
2279 readtransform, writetransform, rawtransform = processor
2279 readtransform, writetransform, rawtransform = processor
2280
2280
2281 if raw:
2281 if raw:
2282 vhash = rawtransform(self, text)
2282 vhash = rawtransform(self, text)
2283 elif operation == 'read':
2283 elif operation == 'read':
2284 text, vhash = readtransform(self, text)
2284 text, vhash = readtransform(self, text)
2285 else: # write operation
2285 else: # write operation
2286 text, vhash = writetransform(self, text)
2286 text, vhash = writetransform(self, text)
2287 validatehash = validatehash and vhash
2287 validatehash = validatehash and vhash
2288
2288
2289 return text, validatehash
2289 return text, validatehash
2290
2290
2291 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2291 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2292 """Check node hash integrity.
2292 """Check node hash integrity.
2293
2293
2294 Available as a function so that subclasses can extend hash mismatch
2294 Available as a function so that subclasses can extend hash mismatch
2295 behaviors as needed.
2295 behaviors as needed.
2296 """
2296 """
2297 try:
2297 try:
2298 if p1 is None and p2 is None:
2298 if p1 is None and p2 is None:
2299 p1, p2 = self.parents(node)
2299 p1, p2 = self.parents(node)
2300 if node != self.hash(text, p1, p2):
2300 if node != self.hash(text, p1, p2):
2301 revornode = rev
2301 revornode = rev
2302 if revornode is None:
2302 if revornode is None:
2303 revornode = templatefilters.short(hex(node))
2303 revornode = templatefilters.short(hex(node))
2304 raise RevlogError(_("integrity check failed on %s:%s")
2304 raise RevlogError(_("integrity check failed on %s:%s")
2305 % (self.indexfile, pycompat.bytestr(revornode)))
2305 % (self.indexfile, pycompat.bytestr(revornode)))
2306 except RevlogError:
2306 except RevlogError:
2307 if self._censorable and _censoredtext(text):
2307 if self._censorable and _censoredtext(text):
2308 raise error.CensoredNodeError(self.indexfile, node, text)
2308 raise error.CensoredNodeError(self.indexfile, node, text)
2309 raise
2309 raise
2310
2310
2311 def _enforceinlinesize(self, tr, fp=None):
2311 def _enforceinlinesize(self, tr, fp=None):
2312 """Check if the revlog is too big for inline and convert if so.
2312 """Check if the revlog is too big for inline and convert if so.
2313
2313
2314 This should be called after revisions are added to the revlog. If the
2314 This should be called after revisions are added to the revlog. If the
2315 revlog has grown too large to be an inline revlog, it will convert it
2315 revlog has grown too large to be an inline revlog, it will convert it
2316 to use multiple index and data files.
2316 to use multiple index and data files.
2317 """
2317 """
2318 tiprev = len(self) - 1
2318 tiprev = len(self) - 1
2319 if (not self._inline or
2319 if (not self._inline or
2320 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2320 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2321 return
2321 return
2322
2322
2323 trinfo = tr.find(self.indexfile)
2323 trinfo = tr.find(self.indexfile)
2324 if trinfo is None:
2324 if trinfo is None:
2325 raise RevlogError(_("%s not found in the transaction")
2325 raise RevlogError(_("%s not found in the transaction")
2326 % self.indexfile)
2326 % self.indexfile)
2327
2327
2328 trindex = trinfo[2]
2328 trindex = trinfo[2]
2329 if trindex is not None:
2329 if trindex is not None:
2330 dataoff = self.start(trindex)
2330 dataoff = self.start(trindex)
2331 else:
2331 else:
2332 # revlog was stripped at start of transaction, use all leftover data
2332 # revlog was stripped at start of transaction, use all leftover data
2333 trindex = len(self) - 1
2333 trindex = len(self) - 1
2334 dataoff = self.end(tiprev)
2334 dataoff = self.end(tiprev)
2335
2335
2336 tr.add(self.datafile, dataoff)
2336 tr.add(self.datafile, dataoff)
2337
2337
2338 if fp:
2338 if fp:
2339 fp.flush()
2339 fp.flush()
2340 fp.close()
2340 fp.close()
2341
2341
2342 with self._datafp('w') as df:
2342 with self._datafp('w') as df:
2343 for r in self:
2343 for r in self:
2344 df.write(self._getsegmentforrevs(r, r)[1])
2344 df.write(self._getsegmentforrevs(r, r)[1])
2345
2345
2346 with self._indexfp('w') as fp:
2346 with self._indexfp('w') as fp:
2347 self.version &= ~FLAG_INLINE_DATA
2347 self.version &= ~FLAG_INLINE_DATA
2348 self._inline = False
2348 self._inline = False
2349 io = self._io
2349 io = self._io
2350 for i in self:
2350 for i in self:
2351 e = io.packentry(self.index[i], self.node, self.version, i)
2351 e = io.packentry(self.index[i], self.node, self.version, i)
2352 fp.write(e)
2352 fp.write(e)
2353
2353
2354 # the temp file replace the real index when we exit the context
2354 # the temp file replace the real index when we exit the context
2355 # manager
2355 # manager
2356
2356
2357 tr.replace(self.indexfile, trindex * self._io.size)
2357 tr.replace(self.indexfile, trindex * self._io.size)
2358 self._chunkclear()
2358 self._chunkclear()
2359
2359
2360 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2360 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2361 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2361 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2362 """add a revision to the log
2362 """add a revision to the log
2363
2363
2364 text - the revision data to add
2364 text - the revision data to add
2365 transaction - the transaction object used for rollback
2365 transaction - the transaction object used for rollback
2366 link - the linkrev data to add
2366 link - the linkrev data to add
2367 p1, p2 - the parent nodeids of the revision
2367 p1, p2 - the parent nodeids of the revision
2368 cachedelta - an optional precomputed delta
2368 cachedelta - an optional precomputed delta
2369 node - nodeid of revision; typically node is not specified, and it is
2369 node - nodeid of revision; typically node is not specified, and it is
2370 computed by default as hash(text, p1, p2), however subclasses might
2370 computed by default as hash(text, p1, p2), however subclasses might
2371 use different hashing method (and override checkhash() in such case)
2371 use different hashing method (and override checkhash() in such case)
2372 flags - the known flags to set on the revision
2372 flags - the known flags to set on the revision
2373 deltacomputer - an optional _deltacomputer instance shared between
2373 deltacomputer - an optional _deltacomputer instance shared between
2374 multiple calls
2374 multiple calls
2375 """
2375 """
2376 if link == nullrev:
2376 if link == nullrev:
2377 raise RevlogError(_("attempted to add linkrev -1 to %s")
2377 raise RevlogError(_("attempted to add linkrev -1 to %s")
2378 % self.indexfile)
2378 % self.indexfile)
2379
2379
2380 if flags:
2380 if flags:
2381 node = node or self.hash(text, p1, p2)
2381 node = node or self.hash(text, p1, p2)
2382
2382
2383 rawtext, validatehash = self._processflags(text, flags, 'write')
2383 rawtext, validatehash = self._processflags(text, flags, 'write')
2384
2384
2385 # If the flag processor modifies the revision data, ignore any provided
2385 # If the flag processor modifies the revision data, ignore any provided
2386 # cachedelta.
2386 # cachedelta.
2387 if rawtext != text:
2387 if rawtext != text:
2388 cachedelta = None
2388 cachedelta = None
2389
2389
2390 if len(rawtext) > _maxentrysize:
2390 if len(rawtext) > _maxentrysize:
2391 raise RevlogError(
2391 raise RevlogError(
2392 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2392 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2393 % (self.indexfile, len(rawtext)))
2393 % (self.indexfile, len(rawtext)))
2394
2394
2395 node = node or self.hash(rawtext, p1, p2)
2395 node = node or self.hash(rawtext, p1, p2)
2396 if node in self.nodemap:
2396 if node in self.nodemap:
2397 return node
2397 return node
2398
2398
2399 if validatehash:
2399 if validatehash:
2400 self.checkhash(rawtext, node, p1=p1, p2=p2)
2400 self.checkhash(rawtext, node, p1=p1, p2=p2)
2401
2401
2402 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2402 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2403 flags, cachedelta=cachedelta,
2403 flags, cachedelta=cachedelta,
2404 deltacomputer=deltacomputer)
2404 deltacomputer=deltacomputer)
2405
2405
2406 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2406 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2407 cachedelta=None, deltacomputer=None):
2407 cachedelta=None, deltacomputer=None):
2408 """add a raw revision with known flags, node and parents
2408 """add a raw revision with known flags, node and parents
2409 useful when reusing a revision not stored in this revlog (ex: received
2409 useful when reusing a revision not stored in this revlog (ex: received
2410 over wire, or read from an external bundle).
2410 over wire, or read from an external bundle).
2411 """
2411 """
2412 dfh = None
2412 dfh = None
2413 if not self._inline:
2413 if not self._inline:
2414 dfh = self._datafp("a+")
2414 dfh = self._datafp("a+")
2415 ifh = self._indexfp("a+")
2415 ifh = self._indexfp("a+")
2416 try:
2416 try:
2417 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2417 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2418 flags, cachedelta, ifh, dfh,
2418 flags, cachedelta, ifh, dfh,
2419 deltacomputer=deltacomputer)
2419 deltacomputer=deltacomputer)
2420 finally:
2420 finally:
2421 if dfh:
2421 if dfh:
2422 dfh.close()
2422 dfh.close()
2423 ifh.close()
2423 ifh.close()
2424
2424
2425 def compress(self, data):
2425 def compress(self, data):
2426 """Generate a possibly-compressed representation of data."""
2426 """Generate a possibly-compressed representation of data."""
2427 if not data:
2427 if not data:
2428 return '', data
2428 return '', data
2429
2429
2430 compressed = self._compressor.compress(data)
2430 compressed = self._compressor.compress(data)
2431
2431
2432 if compressed:
2432 if compressed:
2433 # The revlog compressor added the header in the returned data.
2433 # The revlog compressor added the header in the returned data.
2434 return '', compressed
2434 return '', compressed
2435
2435
2436 if data[0:1] == '\0':
2436 if data[0:1] == '\0':
2437 return '', data
2437 return '', data
2438 return 'u', data
2438 return 'u', data
2439
2439
2440 def decompress(self, data):
2440 def decompress(self, data):
2441 """Decompress a revlog chunk.
2441 """Decompress a revlog chunk.
2442
2442
2443 The chunk is expected to begin with a header identifying the
2443 The chunk is expected to begin with a header identifying the
2444 format type so it can be routed to an appropriate decompressor.
2444 format type so it can be routed to an appropriate decompressor.
2445 """
2445 """
2446 if not data:
2446 if not data:
2447 return data
2447 return data
2448
2448
2449 # Revlogs are read much more frequently than they are written and many
2449 # Revlogs are read much more frequently than they are written and many
2450 # chunks only take microseconds to decompress, so performance is
2450 # chunks only take microseconds to decompress, so performance is
2451 # important here.
2451 # important here.
2452 #
2452 #
2453 # We can make a few assumptions about revlogs:
2453 # We can make a few assumptions about revlogs:
2454 #
2454 #
2455 # 1) the majority of chunks will be compressed (as opposed to inline
2455 # 1) the majority of chunks will be compressed (as opposed to inline
2456 # raw data).
2456 # raw data).
2457 # 2) decompressing *any* data will likely by at least 10x slower than
2457 # 2) decompressing *any* data will likely by at least 10x slower than
2458 # returning raw inline data.
2458 # returning raw inline data.
2459 # 3) we want to prioritize common and officially supported compression
2459 # 3) we want to prioritize common and officially supported compression
2460 # engines
2460 # engines
2461 #
2461 #
2462 # It follows that we want to optimize for "decompress compressed data
2462 # It follows that we want to optimize for "decompress compressed data
2463 # when encoded with common and officially supported compression engines"
2463 # when encoded with common and officially supported compression engines"
2464 # case over "raw data" and "data encoded by less common or non-official
2464 # case over "raw data" and "data encoded by less common or non-official
2465 # compression engines." That is why we have the inline lookup first
2465 # compression engines." That is why we have the inline lookup first
2466 # followed by the compengines lookup.
2466 # followed by the compengines lookup.
2467 #
2467 #
2468 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2468 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2469 # compressed chunks. And this matters for changelog and manifest reads.
2469 # compressed chunks. And this matters for changelog and manifest reads.
2470 t = data[0:1]
2470 t = data[0:1]
2471
2471
2472 if t == 'x':
2472 if t == 'x':
2473 try:
2473 try:
2474 return _zlibdecompress(data)
2474 return _zlibdecompress(data)
2475 except zlib.error as e:
2475 except zlib.error as e:
2476 raise RevlogError(_('revlog decompress error: %s') %
2476 raise RevlogError(_('revlog decompress error: %s') %
2477 stringutil.forcebytestr(e))
2477 stringutil.forcebytestr(e))
2478 # '\0' is more common than 'u' so it goes first.
2478 # '\0' is more common than 'u' so it goes first.
2479 elif t == '\0':
2479 elif t == '\0':
2480 return data
2480 return data
2481 elif t == 'u':
2481 elif t == 'u':
2482 return util.buffer(data, 1)
2482 return util.buffer(data, 1)
2483
2483
2484 try:
2484 try:
2485 compressor = self._decompressors[t]
2485 compressor = self._decompressors[t]
2486 except KeyError:
2486 except KeyError:
2487 try:
2487 try:
2488 engine = util.compengines.forrevlogheader(t)
2488 engine = util.compengines.forrevlogheader(t)
2489 compressor = engine.revlogcompressor()
2489 compressor = engine.revlogcompressor()
2490 self._decompressors[t] = compressor
2490 self._decompressors[t] = compressor
2491 except KeyError:
2491 except KeyError:
2492 raise RevlogError(_('unknown compression type %r') % t)
2492 raise RevlogError(_('unknown compression type %r') % t)
2493
2493
2494 return compressor.decompress(data)
2494 return compressor.decompress(data)
2495
2495
2496 def _isgooddeltainfo(self, deltainfo, revinfo):
2496 def _isgooddeltainfo(self, deltainfo, revinfo):
2497 """Returns True if the given delta is good. Good means that it is within
2497 """Returns True if the given delta is good. Good means that it is within
2498 the disk span, disk size, and chain length bounds that we know to be
2498 the disk span, disk size, and chain length bounds that we know to be
2499 performant."""
2499 performant."""
2500 if deltainfo is None:
2500 if deltainfo is None:
2501 return False
2501 return False
2502
2502
2503 # - 'deltainfo.distance' is the distance from the base revision --
2503 # - 'deltainfo.distance' is the distance from the base revision --
2504 # bounding it limits the amount of I/O we need to do.
2504 # bounding it limits the amount of I/O we need to do.
2505 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2505 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2506 # deltas we need to apply -- bounding it limits the amount of CPU
2506 # deltas we need to apply -- bounding it limits the amount of CPU
2507 # we consume.
2507 # we consume.
2508
2508
2509 if self._sparserevlog:
2509 if self._sparserevlog:
2510 # As sparse-read will be used, we can consider that the distance,
2510 # As sparse-read will be used, we can consider that the distance,
2511 # instead of being the span of the whole chunk,
2511 # instead of being the span of the whole chunk,
2512 # is the span of the largest read chunk
2512 # is the span of the largest read chunk
2513 base = deltainfo.base
2513 base = deltainfo.base
2514
2514
2515 if base != nullrev:
2515 if base != nullrev:
2516 deltachain = self._deltachain(base)[0]
2516 deltachain = self._deltachain(base)[0]
2517 else:
2517 else:
2518 deltachain = []
2518 deltachain = []
2519
2519
2520 # search for the first non-snapshot revision
2520 # search for the first non-snapshot revision
2521 for idx, r in enumerate(deltachain):
2521 for idx, r in enumerate(deltachain):
2522 if not self.issnapshot(r):
2522 if not self.issnapshot(r):
2523 break
2523 break
2524 deltachain = deltachain[idx:]
2524 deltachain = deltachain[idx:]
2525 chunks = _slicechunk(self, deltachain, deltainfo)
2525 chunks = _slicechunk(self, deltachain, deltainfo)
2526 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2526 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2527 distance = max(all_span)
2527 distance = max(all_span)
2528 else:
2528 else:
2529 distance = deltainfo.distance
2529 distance = deltainfo.distance
2530
2530
2531 textlen = revinfo.textlen
2531 textlen = revinfo.textlen
2532 defaultmax = textlen * 4
2532 defaultmax = textlen * 4
2533 maxdist = self._maxdeltachainspan
2533 maxdist = self._maxdeltachainspan
2534 if not maxdist:
2534 if not maxdist:
2535 maxdist = distance # ensure the conditional pass
2535 maxdist = distance # ensure the conditional pass
2536 maxdist = max(maxdist, defaultmax)
2536 maxdist = max(maxdist, defaultmax)
2537 if self._sparserevlog and maxdist < self._srmingapsize:
2537 if self._sparserevlog and maxdist < self._srmingapsize:
2538 # In multiple place, we are ignoring irrelevant data range below a
2538 # In multiple place, we are ignoring irrelevant data range below a
2539 # certain size. Be also apply this tradeoff here and relax span
2539 # certain size. Be also apply this tradeoff here and relax span
2540 # constraint for small enought content.
2540 # constraint for small enought content.
2541 maxdist = self._srmingapsize
2541 maxdist = self._srmingapsize
2542
2542
2543 # Bad delta from read span:
2543 # Bad delta from read span:
2544 #
2544 #
2545 # If the span of data read is larger than the maximum allowed.
2545 # If the span of data read is larger than the maximum allowed.
2546 if maxdist < distance:
2546 if maxdist < distance:
2547 return False
2547 return False
2548
2548
2549 # Bad delta from new delta size:
2549 # Bad delta from new delta size:
2550 #
2550 #
2551 # If the delta size is larger than the target text, storing the
2551 # If the delta size is larger than the target text, storing the
2552 # delta will be inefficient.
2552 # delta will be inefficient.
2553 if textlen < deltainfo.deltalen:
2553 if textlen < deltainfo.deltalen:
2554 return False
2554 return False
2555
2555
2556 # Bad delta from cumulated payload size:
2556 # Bad delta from cumulated payload size:
2557 #
2557 #
2558 # If the sum of delta get larger than K * target text length.
2558 # If the sum of delta get larger than K * target text length.
2559 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2559 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2560 return False
2560 return False
2561
2561
2562 # Bad delta from chain length:
2562 # Bad delta from chain length:
2563 #
2563 #
2564 # If the number of delta in the chain gets too high.
2564 # If the number of delta in the chain gets too high.
2565 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2565 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2566 return False
2566 return False
2567
2567
2568 # bad delta from intermediate snapshot size limit
2568 # bad delta from intermediate snapshot size limit
2569 #
2569 #
2570 # If an intermediate snapshot size is higher than the limit. The
2570 # If an intermediate snapshot size is higher than the limit. The
2571 # limit exist to prevent endless chain of intermediate delta to be
2571 # limit exist to prevent endless chain of intermediate delta to be
2572 # created.
2572 # created.
2573 if (deltainfo.snapshotdepth is not None and
2573 if (deltainfo.snapshotdepth is not None and
2574 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2574 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2575 return False
2575 return False
2576
2576
2577 # bad delta if new intermediate snapshot is larger than the previous
2577 # bad delta if new intermediate snapshot is larger than the previous
2578 # snapshot
2578 # snapshot
2579 if (deltainfo.snapshotdepth
2579 if (deltainfo.snapshotdepth
2580 and self.length(deltainfo.base) < deltainfo.deltalen):
2580 and self.length(deltainfo.base) < deltainfo.deltalen):
2581 return False
2581 return False
2582
2582
2583 return True
2583 return True
2584
2584
2585 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2585 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2586 cachedelta, ifh, dfh, alwayscache=False,
2586 cachedelta, ifh, dfh, alwayscache=False,
2587 deltacomputer=None):
2587 deltacomputer=None):
2588 """internal function to add revisions to the log
2588 """internal function to add revisions to the log
2589
2589
2590 see addrevision for argument descriptions.
2590 see addrevision for argument descriptions.
2591
2591
2592 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2592 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2593
2593
2594 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2594 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2595 be used.
2595 be used.
2596
2596
2597 invariants:
2597 invariants:
2598 - rawtext is optional (can be None); if not set, cachedelta must be set.
2598 - rawtext is optional (can be None); if not set, cachedelta must be set.
2599 if both are set, they must correspond to each other.
2599 if both are set, they must correspond to each other.
2600 """
2600 """
2601 if node == nullid:
2601 if node == nullid:
2602 raise RevlogError(_("%s: attempt to add null revision") %
2602 raise RevlogError(_("%s: attempt to add null revision") %
2603 (self.indexfile))
2603 (self.indexfile))
2604 if node == wdirid or node in wdirfilenodeids:
2604 if node == wdirid or node in wdirfilenodeids:
2605 raise RevlogError(_("%s: attempt to add wdir revision") %
2605 raise RevlogError(_("%s: attempt to add wdir revision") %
2606 (self.indexfile))
2606 (self.indexfile))
2607
2607
2608 if self._inline:
2608 if self._inline:
2609 fh = ifh
2609 fh = ifh
2610 else:
2610 else:
2611 fh = dfh
2611 fh = dfh
2612
2612
2613 btext = [rawtext]
2613 btext = [rawtext]
2614
2614
2615 curr = len(self)
2615 curr = len(self)
2616 prev = curr - 1
2616 prev = curr - 1
2617 offset = self.end(prev)
2617 offset = self.end(prev)
2618 p1r, p2r = self.rev(p1), self.rev(p2)
2618 p1r, p2r = self.rev(p1), self.rev(p2)
2619
2619
2620 # full versions are inserted when the needed deltas
2620 # full versions are inserted when the needed deltas
2621 # become comparable to the uncompressed text
2621 # become comparable to the uncompressed text
2622 if rawtext is None:
2622 if rawtext is None:
2623 # need rawtext size, before changed by flag processors, which is
2623 # need rawtext size, before changed by flag processors, which is
2624 # the non-raw size. use revlog explicitly to avoid filelog's extra
2624 # the non-raw size. use revlog explicitly to avoid filelog's extra
2625 # logic that might remove metadata size.
2625 # logic that might remove metadata size.
2626 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2626 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2627 cachedelta[1])
2627 cachedelta[1])
2628 else:
2628 else:
2629 textlen = len(rawtext)
2629 textlen = len(rawtext)
2630
2630
2631 if deltacomputer is None:
2631 if deltacomputer is None:
2632 deltacomputer = _deltacomputer(self)
2632 deltacomputer = _deltacomputer(self)
2633
2633
2634 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2634 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2635
2635
2636 # no delta for flag processor revision (see "candelta" for why)
2636 # no delta for flag processor revision (see "candelta" for why)
2637 # not calling candelta since only one revision needs test, also to
2637 # not calling candelta since only one revision needs test, also to
2638 # avoid overhead fetching flags again.
2638 # avoid overhead fetching flags again.
2639 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2639 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2640 deltainfo = None
2640 deltainfo = None
2641 else:
2641 else:
2642 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2642 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2643
2643
2644 if deltainfo is not None:
2644 if deltainfo is not None:
2645 base = deltainfo.base
2645 base = deltainfo.base
2646 chainbase = deltainfo.chainbase
2646 chainbase = deltainfo.chainbase
2647 data = deltainfo.data
2647 data = deltainfo.data
2648 l = deltainfo.deltalen
2648 l = deltainfo.deltalen
2649 else:
2649 else:
2650 rawtext = deltacomputer.buildtext(revinfo, fh)
2650 rawtext = deltacomputer.buildtext(revinfo, fh)
2651 data = self.compress(rawtext)
2651 data = self.compress(rawtext)
2652 l = len(data[1]) + len(data[0])
2652 l = len(data[1]) + len(data[0])
2653 base = chainbase = curr
2653 base = chainbase = curr
2654
2654
2655 e = (offset_type(offset, flags), l, textlen,
2655 e = (offset_type(offset, flags), l, textlen,
2656 base, link, p1r, p2r, node)
2656 base, link, p1r, p2r, node)
2657 self.index.append(e)
2657 self.index.append(e)
2658 self.nodemap[node] = curr
2658 self.nodemap[node] = curr
2659
2659
2660 entry = self._io.packentry(e, self.node, self.version, curr)
2660 entry = self._io.packentry(e, self.node, self.version, curr)
2661 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2661 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2662
2662
2663 if alwayscache and rawtext is None:
2663 if alwayscache and rawtext is None:
2664 rawtext = deltacomputer._buildtext(revinfo, fh)
2664 rawtext = deltacomputer.buildtext(revinfo, fh)
2665
2665
2666 if type(rawtext) == bytes: # only accept immutable objects
2666 if type(rawtext) == bytes: # only accept immutable objects
2667 self._cache = (node, curr, rawtext)
2667 self._cache = (node, curr, rawtext)
2668 self._chainbasecache[curr] = chainbase
2668 self._chainbasecache[curr] = chainbase
2669 return node
2669 return node
2670
2670
2671 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2671 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2672 # Files opened in a+ mode have inconsistent behavior on various
2672 # Files opened in a+ mode have inconsistent behavior on various
2673 # platforms. Windows requires that a file positioning call be made
2673 # platforms. Windows requires that a file positioning call be made
2674 # when the file handle transitions between reads and writes. See
2674 # when the file handle transitions between reads and writes. See
2675 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2675 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2676 # platforms, Python or the platform itself can be buggy. Some versions
2676 # platforms, Python or the platform itself can be buggy. Some versions
2677 # of Solaris have been observed to not append at the end of the file
2677 # of Solaris have been observed to not append at the end of the file
2678 # if the file was seeked to before the end. See issue4943 for more.
2678 # if the file was seeked to before the end. See issue4943 for more.
2679 #
2679 #
2680 # We work around this issue by inserting a seek() before writing.
2680 # We work around this issue by inserting a seek() before writing.
2681 # Note: This is likely not necessary on Python 3.
2681 # Note: This is likely not necessary on Python 3.
2682 ifh.seek(0, os.SEEK_END)
2682 ifh.seek(0, os.SEEK_END)
2683 if dfh:
2683 if dfh:
2684 dfh.seek(0, os.SEEK_END)
2684 dfh.seek(0, os.SEEK_END)
2685
2685
2686 curr = len(self) - 1
2686 curr = len(self) - 1
2687 if not self._inline:
2687 if not self._inline:
2688 transaction.add(self.datafile, offset)
2688 transaction.add(self.datafile, offset)
2689 transaction.add(self.indexfile, curr * len(entry))
2689 transaction.add(self.indexfile, curr * len(entry))
2690 if data[0]:
2690 if data[0]:
2691 dfh.write(data[0])
2691 dfh.write(data[0])
2692 dfh.write(data[1])
2692 dfh.write(data[1])
2693 ifh.write(entry)
2693 ifh.write(entry)
2694 else:
2694 else:
2695 offset += curr * self._io.size
2695 offset += curr * self._io.size
2696 transaction.add(self.indexfile, offset, curr)
2696 transaction.add(self.indexfile, offset, curr)
2697 ifh.write(entry)
2697 ifh.write(entry)
2698 ifh.write(data[0])
2698 ifh.write(data[0])
2699 ifh.write(data[1])
2699 ifh.write(data[1])
2700 self._enforceinlinesize(transaction, ifh)
2700 self._enforceinlinesize(transaction, ifh)
2701
2701
2702 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2702 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2703 """
2703 """
2704 add a delta group
2704 add a delta group
2705
2705
2706 given a set of deltas, add them to the revision log. the
2706 given a set of deltas, add them to the revision log. the
2707 first delta is against its parent, which should be in our
2707 first delta is against its parent, which should be in our
2708 log, the rest are against the previous delta.
2708 log, the rest are against the previous delta.
2709
2709
2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2711 this revlog and the node that was added.
2711 this revlog and the node that was added.
2712 """
2712 """
2713
2713
2714 nodes = []
2714 nodes = []
2715
2715
2716 r = len(self)
2716 r = len(self)
2717 end = 0
2717 end = 0
2718 if r:
2718 if r:
2719 end = self.end(r - 1)
2719 end = self.end(r - 1)
2720 ifh = self._indexfp("a+")
2720 ifh = self._indexfp("a+")
2721 isize = r * self._io.size
2721 isize = r * self._io.size
2722 if self._inline:
2722 if self._inline:
2723 transaction.add(self.indexfile, end + isize, r)
2723 transaction.add(self.indexfile, end + isize, r)
2724 dfh = None
2724 dfh = None
2725 else:
2725 else:
2726 transaction.add(self.indexfile, isize, r)
2726 transaction.add(self.indexfile, isize, r)
2727 transaction.add(self.datafile, end)
2727 transaction.add(self.datafile, end)
2728 dfh = self._datafp("a+")
2728 dfh = self._datafp("a+")
2729 def flush():
2729 def flush():
2730 if dfh:
2730 if dfh:
2731 dfh.flush()
2731 dfh.flush()
2732 ifh.flush()
2732 ifh.flush()
2733 try:
2733 try:
2734 deltacomputer = _deltacomputer(self)
2734 deltacomputer = _deltacomputer(self)
2735 # loop through our set of deltas
2735 # loop through our set of deltas
2736 for data in deltas:
2736 for data in deltas:
2737 node, p1, p2, linknode, deltabase, delta, flags = data
2737 node, p1, p2, linknode, deltabase, delta, flags = data
2738 link = linkmapper(linknode)
2738 link = linkmapper(linknode)
2739 flags = flags or REVIDX_DEFAULT_FLAGS
2739 flags = flags or REVIDX_DEFAULT_FLAGS
2740
2740
2741 nodes.append(node)
2741 nodes.append(node)
2742
2742
2743 if node in self.nodemap:
2743 if node in self.nodemap:
2744 # this can happen if two branches make the same change
2744 # this can happen if two branches make the same change
2745 continue
2745 continue
2746
2746
2747 for p in (p1, p2):
2747 for p in (p1, p2):
2748 if p not in self.nodemap:
2748 if p not in self.nodemap:
2749 raise LookupError(p, self.indexfile,
2749 raise LookupError(p, self.indexfile,
2750 _('unknown parent'))
2750 _('unknown parent'))
2751
2751
2752 if deltabase not in self.nodemap:
2752 if deltabase not in self.nodemap:
2753 raise LookupError(deltabase, self.indexfile,
2753 raise LookupError(deltabase, self.indexfile,
2754 _('unknown delta base'))
2754 _('unknown delta base'))
2755
2755
2756 baserev = self.rev(deltabase)
2756 baserev = self.rev(deltabase)
2757
2757
2758 if baserev != nullrev and self.iscensored(baserev):
2758 if baserev != nullrev and self.iscensored(baserev):
2759 # if base is censored, delta must be full replacement in a
2759 # if base is censored, delta must be full replacement in a
2760 # single patch operation
2760 # single patch operation
2761 hlen = struct.calcsize(">lll")
2761 hlen = struct.calcsize(">lll")
2762 oldlen = self.rawsize(baserev)
2762 oldlen = self.rawsize(baserev)
2763 newlen = len(delta) - hlen
2763 newlen = len(delta) - hlen
2764 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2764 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2765 raise error.CensoredBaseError(self.indexfile,
2765 raise error.CensoredBaseError(self.indexfile,
2766 self.node(baserev))
2766 self.node(baserev))
2767
2767
2768 if not flags and self._peek_iscensored(baserev, delta, flush):
2768 if not flags and self._peek_iscensored(baserev, delta, flush):
2769 flags |= REVIDX_ISCENSORED
2769 flags |= REVIDX_ISCENSORED
2770
2770
2771 # We assume consumers of addrevisioncb will want to retrieve
2771 # We assume consumers of addrevisioncb will want to retrieve
2772 # the added revision, which will require a call to
2772 # the added revision, which will require a call to
2773 # revision(). revision() will fast path if there is a cache
2773 # revision(). revision() will fast path if there is a cache
2774 # hit. So, we tell _addrevision() to always cache in this case.
2774 # hit. So, we tell _addrevision() to always cache in this case.
2775 # We're only using addgroup() in the context of changegroup
2775 # We're only using addgroup() in the context of changegroup
2776 # generation so the revision data can always be handled as raw
2776 # generation so the revision data can always be handled as raw
2777 # by the flagprocessor.
2777 # by the flagprocessor.
2778 self._addrevision(node, None, transaction, link,
2778 self._addrevision(node, None, transaction, link,
2779 p1, p2, flags, (baserev, delta),
2779 p1, p2, flags, (baserev, delta),
2780 ifh, dfh,
2780 ifh, dfh,
2781 alwayscache=bool(addrevisioncb),
2781 alwayscache=bool(addrevisioncb),
2782 deltacomputer=deltacomputer)
2782 deltacomputer=deltacomputer)
2783
2783
2784 if addrevisioncb:
2784 if addrevisioncb:
2785 addrevisioncb(self, node)
2785 addrevisioncb(self, node)
2786
2786
2787 if not dfh and not self._inline:
2787 if not dfh and not self._inline:
2788 # addrevision switched from inline to conventional
2788 # addrevision switched from inline to conventional
2789 # reopen the index
2789 # reopen the index
2790 ifh.close()
2790 ifh.close()
2791 dfh = self._datafp("a+")
2791 dfh = self._datafp("a+")
2792 ifh = self._indexfp("a+")
2792 ifh = self._indexfp("a+")
2793 finally:
2793 finally:
2794 if dfh:
2794 if dfh:
2795 dfh.close()
2795 dfh.close()
2796 ifh.close()
2796 ifh.close()
2797
2797
2798 return nodes
2798 return nodes
2799
2799
2800 def iscensored(self, rev):
2800 def iscensored(self, rev):
2801 """Check if a file revision is censored."""
2801 """Check if a file revision is censored."""
2802 if not self._censorable:
2802 if not self._censorable:
2803 return False
2803 return False
2804
2804
2805 return self.flags(rev) & REVIDX_ISCENSORED
2805 return self.flags(rev) & REVIDX_ISCENSORED
2806
2806
2807 def _peek_iscensored(self, baserev, delta, flush):
2807 def _peek_iscensored(self, baserev, delta, flush):
2808 """Quickly check if a delta produces a censored revision."""
2808 """Quickly check if a delta produces a censored revision."""
2809 if not self._censorable:
2809 if not self._censorable:
2810 return False
2810 return False
2811
2811
2812 # Fragile heuristic: unless new file meta keys are added alphabetically
2812 # Fragile heuristic: unless new file meta keys are added alphabetically
2813 # preceding "censored", all censored revisions are prefixed by
2813 # preceding "censored", all censored revisions are prefixed by
2814 # "\1\ncensored:". A delta producing such a censored revision must be a
2814 # "\1\ncensored:". A delta producing such a censored revision must be a
2815 # full-replacement delta, so we inspect the first and only patch in the
2815 # full-replacement delta, so we inspect the first and only patch in the
2816 # delta for this prefix.
2816 # delta for this prefix.
2817 hlen = struct.calcsize(">lll")
2817 hlen = struct.calcsize(">lll")
2818 if len(delta) <= hlen:
2818 if len(delta) <= hlen:
2819 return False
2819 return False
2820
2820
2821 oldlen = self.rawsize(baserev)
2821 oldlen = self.rawsize(baserev)
2822 newlen = len(delta) - hlen
2822 newlen = len(delta) - hlen
2823 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2823 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2824 return False
2824 return False
2825
2825
2826 add = "\1\ncensored:"
2826 add = "\1\ncensored:"
2827 addlen = len(add)
2827 addlen = len(add)
2828 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2828 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2829
2829
2830 def getstrippoint(self, minlink):
2830 def getstrippoint(self, minlink):
2831 """find the minimum rev that must be stripped to strip the linkrev
2831 """find the minimum rev that must be stripped to strip the linkrev
2832
2832
2833 Returns a tuple containing the minimum rev and a set of all revs that
2833 Returns a tuple containing the minimum rev and a set of all revs that
2834 have linkrevs that will be broken by this strip.
2834 have linkrevs that will be broken by this strip.
2835 """
2835 """
2836 brokenrevs = set()
2836 brokenrevs = set()
2837 strippoint = len(self)
2837 strippoint = len(self)
2838
2838
2839 heads = {}
2839 heads = {}
2840 futurelargelinkrevs = set()
2840 futurelargelinkrevs = set()
2841 for head in self.headrevs():
2841 for head in self.headrevs():
2842 headlinkrev = self.linkrev(head)
2842 headlinkrev = self.linkrev(head)
2843 heads[head] = headlinkrev
2843 heads[head] = headlinkrev
2844 if headlinkrev >= minlink:
2844 if headlinkrev >= minlink:
2845 futurelargelinkrevs.add(headlinkrev)
2845 futurelargelinkrevs.add(headlinkrev)
2846
2846
2847 # This algorithm involves walking down the rev graph, starting at the
2847 # This algorithm involves walking down the rev graph, starting at the
2848 # heads. Since the revs are topologically sorted according to linkrev,
2848 # heads. Since the revs are topologically sorted according to linkrev,
2849 # once all head linkrevs are below the minlink, we know there are
2849 # once all head linkrevs are below the minlink, we know there are
2850 # no more revs that could have a linkrev greater than minlink.
2850 # no more revs that could have a linkrev greater than minlink.
2851 # So we can stop walking.
2851 # So we can stop walking.
2852 while futurelargelinkrevs:
2852 while futurelargelinkrevs:
2853 strippoint -= 1
2853 strippoint -= 1
2854 linkrev = heads.pop(strippoint)
2854 linkrev = heads.pop(strippoint)
2855
2855
2856 if linkrev < minlink:
2856 if linkrev < minlink:
2857 brokenrevs.add(strippoint)
2857 brokenrevs.add(strippoint)
2858 else:
2858 else:
2859 futurelargelinkrevs.remove(linkrev)
2859 futurelargelinkrevs.remove(linkrev)
2860
2860
2861 for p in self.parentrevs(strippoint):
2861 for p in self.parentrevs(strippoint):
2862 if p != nullrev:
2862 if p != nullrev:
2863 plinkrev = self.linkrev(p)
2863 plinkrev = self.linkrev(p)
2864 heads[p] = plinkrev
2864 heads[p] = plinkrev
2865 if plinkrev >= minlink:
2865 if plinkrev >= minlink:
2866 futurelargelinkrevs.add(plinkrev)
2866 futurelargelinkrevs.add(plinkrev)
2867
2867
2868 return strippoint, brokenrevs
2868 return strippoint, brokenrevs
2869
2869
2870 def strip(self, minlink, transaction):
2870 def strip(self, minlink, transaction):
2871 """truncate the revlog on the first revision with a linkrev >= minlink
2871 """truncate the revlog on the first revision with a linkrev >= minlink
2872
2872
2873 This function is called when we're stripping revision minlink and
2873 This function is called when we're stripping revision minlink and
2874 its descendants from the repository.
2874 its descendants from the repository.
2875
2875
2876 We have to remove all revisions with linkrev >= minlink, because
2876 We have to remove all revisions with linkrev >= minlink, because
2877 the equivalent changelog revisions will be renumbered after the
2877 the equivalent changelog revisions will be renumbered after the
2878 strip.
2878 strip.
2879
2879
2880 So we truncate the revlog on the first of these revisions, and
2880 So we truncate the revlog on the first of these revisions, and
2881 trust that the caller has saved the revisions that shouldn't be
2881 trust that the caller has saved the revisions that shouldn't be
2882 removed and that it'll re-add them after this truncation.
2882 removed and that it'll re-add them after this truncation.
2883 """
2883 """
2884 if len(self) == 0:
2884 if len(self) == 0:
2885 return
2885 return
2886
2886
2887 rev, _ = self.getstrippoint(minlink)
2887 rev, _ = self.getstrippoint(minlink)
2888 if rev == len(self):
2888 if rev == len(self):
2889 return
2889 return
2890
2890
2891 # first truncate the files on disk
2891 # first truncate the files on disk
2892 end = self.start(rev)
2892 end = self.start(rev)
2893 if not self._inline:
2893 if not self._inline:
2894 transaction.add(self.datafile, end)
2894 transaction.add(self.datafile, end)
2895 end = rev * self._io.size
2895 end = rev * self._io.size
2896 else:
2896 else:
2897 end += rev * self._io.size
2897 end += rev * self._io.size
2898
2898
2899 transaction.add(self.indexfile, end)
2899 transaction.add(self.indexfile, end)
2900
2900
2901 # then reset internal state in memory to forget those revisions
2901 # then reset internal state in memory to forget those revisions
2902 self._cache = None
2902 self._cache = None
2903 self._chaininfocache = {}
2903 self._chaininfocache = {}
2904 self._chunkclear()
2904 self._chunkclear()
2905 for x in pycompat.xrange(rev, len(self)):
2905 for x in pycompat.xrange(rev, len(self)):
2906 del self.nodemap[self.node(x)]
2906 del self.nodemap[self.node(x)]
2907
2907
2908 del self.index[rev:-1]
2908 del self.index[rev:-1]
2909 self._nodepos = None
2909 self._nodepos = None
2910
2910
2911 def checksize(self):
2911 def checksize(self):
2912 expected = 0
2912 expected = 0
2913 if len(self):
2913 if len(self):
2914 expected = max(0, self.end(len(self) - 1))
2914 expected = max(0, self.end(len(self) - 1))
2915
2915
2916 try:
2916 try:
2917 with self._datafp() as f:
2917 with self._datafp() as f:
2918 f.seek(0, 2)
2918 f.seek(0, 2)
2919 actual = f.tell()
2919 actual = f.tell()
2920 dd = actual - expected
2920 dd = actual - expected
2921 except IOError as inst:
2921 except IOError as inst:
2922 if inst.errno != errno.ENOENT:
2922 if inst.errno != errno.ENOENT:
2923 raise
2923 raise
2924 dd = 0
2924 dd = 0
2925
2925
2926 try:
2926 try:
2927 f = self.opener(self.indexfile)
2927 f = self.opener(self.indexfile)
2928 f.seek(0, 2)
2928 f.seek(0, 2)
2929 actual = f.tell()
2929 actual = f.tell()
2930 f.close()
2930 f.close()
2931 s = self._io.size
2931 s = self._io.size
2932 i = max(0, actual // s)
2932 i = max(0, actual // s)
2933 di = actual - (i * s)
2933 di = actual - (i * s)
2934 if self._inline:
2934 if self._inline:
2935 databytes = 0
2935 databytes = 0
2936 for r in self:
2936 for r in self:
2937 databytes += max(0, self.length(r))
2937 databytes += max(0, self.length(r))
2938 dd = 0
2938 dd = 0
2939 di = actual - len(self) * s - databytes
2939 di = actual - len(self) * s - databytes
2940 except IOError as inst:
2940 except IOError as inst:
2941 if inst.errno != errno.ENOENT:
2941 if inst.errno != errno.ENOENT:
2942 raise
2942 raise
2943 di = 0
2943 di = 0
2944
2944
2945 return (dd, di)
2945 return (dd, di)
2946
2946
2947 def files(self):
2947 def files(self):
2948 res = [self.indexfile]
2948 res = [self.indexfile]
2949 if not self._inline:
2949 if not self._inline:
2950 res.append(self.datafile)
2950 res.append(self.datafile)
2951 return res
2951 return res
2952
2952
2953 DELTAREUSEALWAYS = 'always'
2953 DELTAREUSEALWAYS = 'always'
2954 DELTAREUSESAMEREVS = 'samerevs'
2954 DELTAREUSESAMEREVS = 'samerevs'
2955 DELTAREUSENEVER = 'never'
2955 DELTAREUSENEVER = 'never'
2956
2956
2957 DELTAREUSEFULLADD = 'fulladd'
2957 DELTAREUSEFULLADD = 'fulladd'
2958
2958
2959 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2959 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2960
2960
2961 def clone(self, tr, destrevlog, addrevisioncb=None,
2961 def clone(self, tr, destrevlog, addrevisioncb=None,
2962 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2962 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2963 """Copy this revlog to another, possibly with format changes.
2963 """Copy this revlog to another, possibly with format changes.
2964
2964
2965 The destination revlog will contain the same revisions and nodes.
2965 The destination revlog will contain the same revisions and nodes.
2966 However, it may not be bit-for-bit identical due to e.g. delta encoding
2966 However, it may not be bit-for-bit identical due to e.g. delta encoding
2967 differences.
2967 differences.
2968
2968
2969 The ``deltareuse`` argument control how deltas from the existing revlog
2969 The ``deltareuse`` argument control how deltas from the existing revlog
2970 are preserved in the destination revlog. The argument can have the
2970 are preserved in the destination revlog. The argument can have the
2971 following values:
2971 following values:
2972
2972
2973 DELTAREUSEALWAYS
2973 DELTAREUSEALWAYS
2974 Deltas will always be reused (if possible), even if the destination
2974 Deltas will always be reused (if possible), even if the destination
2975 revlog would not select the same revisions for the delta. This is the
2975 revlog would not select the same revisions for the delta. This is the
2976 fastest mode of operation.
2976 fastest mode of operation.
2977 DELTAREUSESAMEREVS
2977 DELTAREUSESAMEREVS
2978 Deltas will be reused if the destination revlog would pick the same
2978 Deltas will be reused if the destination revlog would pick the same
2979 revisions for the delta. This mode strikes a balance between speed
2979 revisions for the delta. This mode strikes a balance between speed
2980 and optimization.
2980 and optimization.
2981 DELTAREUSENEVER
2981 DELTAREUSENEVER
2982 Deltas will never be reused. This is the slowest mode of execution.
2982 Deltas will never be reused. This is the slowest mode of execution.
2983 This mode can be used to recompute deltas (e.g. if the diff/delta
2983 This mode can be used to recompute deltas (e.g. if the diff/delta
2984 algorithm changes).
2984 algorithm changes).
2985
2985
2986 Delta computation can be slow, so the choice of delta reuse policy can
2986 Delta computation can be slow, so the choice of delta reuse policy can
2987 significantly affect run time.
2987 significantly affect run time.
2988
2988
2989 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2989 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2990 two extremes. Deltas will be reused if they are appropriate. But if the
2990 two extremes. Deltas will be reused if they are appropriate. But if the
2991 delta could choose a better revision, it will do so. This means if you
2991 delta could choose a better revision, it will do so. This means if you
2992 are converting a non-generaldelta revlog to a generaldelta revlog,
2992 are converting a non-generaldelta revlog to a generaldelta revlog,
2993 deltas will be recomputed if the delta's parent isn't a parent of the
2993 deltas will be recomputed if the delta's parent isn't a parent of the
2994 revision.
2994 revision.
2995
2995
2996 In addition to the delta policy, the ``deltabothparents`` argument
2996 In addition to the delta policy, the ``deltabothparents`` argument
2997 controls whether to compute deltas against both parents for merges.
2997 controls whether to compute deltas against both parents for merges.
2998 By default, the current default is used.
2998 By default, the current default is used.
2999 """
2999 """
3000 if deltareuse not in self.DELTAREUSEALL:
3000 if deltareuse not in self.DELTAREUSEALL:
3001 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3001 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3002
3002
3003 if len(destrevlog):
3003 if len(destrevlog):
3004 raise ValueError(_('destination revlog is not empty'))
3004 raise ValueError(_('destination revlog is not empty'))
3005
3005
3006 if getattr(self, 'filteredrevs', None):
3006 if getattr(self, 'filteredrevs', None):
3007 raise ValueError(_('source revlog has filtered revisions'))
3007 raise ValueError(_('source revlog has filtered revisions'))
3008 if getattr(destrevlog, 'filteredrevs', None):
3008 if getattr(destrevlog, 'filteredrevs', None):
3009 raise ValueError(_('destination revlog has filtered revisions'))
3009 raise ValueError(_('destination revlog has filtered revisions'))
3010
3010
3011 # lazydeltabase controls whether to reuse a cached delta, if possible.
3011 # lazydeltabase controls whether to reuse a cached delta, if possible.
3012 oldlazydeltabase = destrevlog._lazydeltabase
3012 oldlazydeltabase = destrevlog._lazydeltabase
3013 oldamd = destrevlog._deltabothparents
3013 oldamd = destrevlog._deltabothparents
3014
3014
3015 try:
3015 try:
3016 if deltareuse == self.DELTAREUSEALWAYS:
3016 if deltareuse == self.DELTAREUSEALWAYS:
3017 destrevlog._lazydeltabase = True
3017 destrevlog._lazydeltabase = True
3018 elif deltareuse == self.DELTAREUSESAMEREVS:
3018 elif deltareuse == self.DELTAREUSESAMEREVS:
3019 destrevlog._lazydeltabase = False
3019 destrevlog._lazydeltabase = False
3020
3020
3021 destrevlog._deltabothparents = deltabothparents or oldamd
3021 destrevlog._deltabothparents = deltabothparents or oldamd
3022
3022
3023 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3023 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3024 self.DELTAREUSESAMEREVS)
3024 self.DELTAREUSESAMEREVS)
3025
3025
3026 deltacomputer = _deltacomputer(destrevlog)
3026 deltacomputer = _deltacomputer(destrevlog)
3027 index = self.index
3027 index = self.index
3028 for rev in self:
3028 for rev in self:
3029 entry = index[rev]
3029 entry = index[rev]
3030
3030
3031 # Some classes override linkrev to take filtered revs into
3031 # Some classes override linkrev to take filtered revs into
3032 # account. Use raw entry from index.
3032 # account. Use raw entry from index.
3033 flags = entry[0] & 0xffff
3033 flags = entry[0] & 0xffff
3034 linkrev = entry[4]
3034 linkrev = entry[4]
3035 p1 = index[entry[5]][7]
3035 p1 = index[entry[5]][7]
3036 p2 = index[entry[6]][7]
3036 p2 = index[entry[6]][7]
3037 node = entry[7]
3037 node = entry[7]
3038
3038
3039 # (Possibly) reuse the delta from the revlog if allowed and
3039 # (Possibly) reuse the delta from the revlog if allowed and
3040 # the revlog chunk is a delta.
3040 # the revlog chunk is a delta.
3041 cachedelta = None
3041 cachedelta = None
3042 rawtext = None
3042 rawtext = None
3043 if populatecachedelta:
3043 if populatecachedelta:
3044 dp = self.deltaparent(rev)
3044 dp = self.deltaparent(rev)
3045 if dp != nullrev:
3045 if dp != nullrev:
3046 cachedelta = (dp, bytes(self._chunk(rev)))
3046 cachedelta = (dp, bytes(self._chunk(rev)))
3047
3047
3048 if not cachedelta:
3048 if not cachedelta:
3049 rawtext = self.revision(rev, raw=True)
3049 rawtext = self.revision(rev, raw=True)
3050
3050
3051
3051
3052 if deltareuse == self.DELTAREUSEFULLADD:
3052 if deltareuse == self.DELTAREUSEFULLADD:
3053 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3053 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3054 cachedelta=cachedelta,
3054 cachedelta=cachedelta,
3055 node=node, flags=flags,
3055 node=node, flags=flags,
3056 deltacomputer=deltacomputer)
3056 deltacomputer=deltacomputer)
3057 else:
3057 else:
3058 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3058 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3059 checkambig=False)
3059 checkambig=False)
3060 dfh = None
3060 dfh = None
3061 if not destrevlog._inline:
3061 if not destrevlog._inline:
3062 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3062 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3063 try:
3063 try:
3064 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3064 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3065 p2, flags, cachedelta, ifh, dfh,
3065 p2, flags, cachedelta, ifh, dfh,
3066 deltacomputer=deltacomputer)
3066 deltacomputer=deltacomputer)
3067 finally:
3067 finally:
3068 if dfh:
3068 if dfh:
3069 dfh.close()
3069 dfh.close()
3070 ifh.close()
3070 ifh.close()
3071
3071
3072 if addrevisioncb:
3072 if addrevisioncb:
3073 addrevisioncb(self, rev, node)
3073 addrevisioncb(self, rev, node)
3074 finally:
3074 finally:
3075 destrevlog._lazydeltabase = oldlazydeltabase
3075 destrevlog._lazydeltabase = oldlazydeltabase
3076 destrevlog._deltabothparents = oldamd
3076 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now