##// END OF EJS Templates
revlog: only consider the span of the delta section...
Paul Morelle -
r39193:a98e926b default
parent child Browse files
Show More
@@ -1,3068 +1,3073 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # If we have a non-emtpy delta candidate, there are nothing to trim
266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 if revs[endidx - 1] < len(revlog):
267 if revs[endidx - 1] < len(revlog):
268 # Trim empty revs at the end, except the very first revision of a chain
268 # Trim empty revs at the end, except the very first revision of a chain
269 while (endidx > 1
269 while (endidx > 1
270 and endidx > startidx
270 and endidx > startidx
271 and length(revs[endidx - 1]) == 0):
271 and length(revs[endidx - 1]) == 0):
272 endidx -= 1
272 endidx -= 1
273
273
274 return revs[startidx:endidx]
274 return revs[startidx:endidx]
275
275
276 def _segmentspan(revlog, revs, deltainfo=None):
276 def _segmentspan(revlog, revs, deltainfo=None):
277 """Get the byte span of a segment of revisions
277 """Get the byte span of a segment of revisions
278
278
279 revs is a sorted array of revision numbers
279 revs is a sorted array of revision numbers
280
280
281 >>> revlog = _testrevlog([
281 >>> revlog = _testrevlog([
282 ... 5, #0
282 ... 5, #0
283 ... 10, #1
283 ... 10, #1
284 ... 12, #2
284 ... 12, #2
285 ... 12, #3 (empty)
285 ... 12, #3 (empty)
286 ... 17, #4
286 ... 17, #4
287 ... ])
287 ... ])
288
288
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 17
290 17
291 >>> _segmentspan(revlog, [0, 4])
291 >>> _segmentspan(revlog, [0, 4])
292 17
292 17
293 >>> _segmentspan(revlog, [3, 4])
293 >>> _segmentspan(revlog, [3, 4])
294 5
294 5
295 >>> _segmentspan(revlog, [1, 2, 3,])
295 >>> _segmentspan(revlog, [1, 2, 3,])
296 7
296 7
297 >>> _segmentspan(revlog, [1, 3])
297 >>> _segmentspan(revlog, [1, 3])
298 7
298 7
299 """
299 """
300 if not revs:
300 if not revs:
301 return 0
301 return 0
302 if deltainfo is not None and len(revlog) <= revs[-1]:
302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if len(revs) == 1:
303 if len(revs) == 1:
304 return deltainfo.deltalen
304 return deltainfo.deltalen
305 offset = revlog.end(len(revlog) - 1)
305 offset = revlog.end(len(revlog) - 1)
306 end = deltainfo.deltalen + offset
306 end = deltainfo.deltalen + offset
307 else:
307 else:
308 end = revlog.end(revs[-1])
308 end = revlog.end(revs[-1])
309 return end - revlog.start(revs[0])
309 return end - revlog.start(revs[0])
310
310
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 """slice revs to reduce the amount of unrelated data to be read from disk.
312 """slice revs to reduce the amount of unrelated data to be read from disk.
313
313
314 ``revs`` is sliced into groups that should be read in one time.
314 ``revs`` is sliced into groups that should be read in one time.
315 Assume that revs are sorted.
315 Assume that revs are sorted.
316
316
317 The initial chunk is sliced until the overall density (payload/chunks-span
317 The initial chunk is sliced until the overall density (payload/chunks-span
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 `revlog._srmingapsize` is skipped.
319 `revlog._srmingapsize` is skipped.
320
320
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 For consistency with other slicing choice, this limit won't go lower than
322 For consistency with other slicing choice, this limit won't go lower than
323 `revlog._srmingapsize`.
323 `revlog._srmingapsize`.
324
324
325 If individual revisions chunk are larger than this limit, they will still
325 If individual revisions chunk are larger than this limit, they will still
326 be raised individually.
326 be raised individually.
327
327
328 >>> revlog = _testrevlog([
328 >>> revlog = _testrevlog([
329 ... 5, #00 (5)
329 ... 5, #00 (5)
330 ... 10, #01 (5)
330 ... 10, #01 (5)
331 ... 12, #02 (2)
331 ... 12, #02 (2)
332 ... 12, #03 (empty)
332 ... 12, #03 (empty)
333 ... 27, #04 (15)
333 ... 27, #04 (15)
334 ... 31, #05 (4)
334 ... 31, #05 (4)
335 ... 31, #06 (empty)
335 ... 31, #06 (empty)
336 ... 42, #07 (11)
336 ... 42, #07 (11)
337 ... 47, #08 (5)
337 ... 47, #08 (5)
338 ... 47, #09 (empty)
338 ... 47, #09 (empty)
339 ... 48, #10 (1)
339 ... 48, #10 (1)
340 ... 51, #11 (3)
340 ... 51, #11 (3)
341 ... 74, #12 (23)
341 ... 74, #12 (23)
342 ... 85, #13 (11)
342 ... 85, #13 (11)
343 ... 86, #14 (1)
343 ... 86, #14 (1)
344 ... 91, #15 (5)
344 ... 91, #15 (5)
345 ... ])
345 ... ])
346
346
347 >>> list(_slicechunk(revlog, list(range(16))))
347 >>> list(_slicechunk(revlog, list(range(16))))
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 >>> list(_slicechunk(revlog, [0, 15]))
349 >>> list(_slicechunk(revlog, [0, 15]))
350 [[0], [15]]
350 [[0], [15]]
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 [[0], [11], [15]]
352 [[0], [11], [15]]
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 [[0], [11, 13, 15]]
354 [[0], [11, 13, 15]]
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357
357
358 Slicing with a maximum chunk size
358 Slicing with a maximum chunk size
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 [[0], [11], [13], [15]]
360 [[0], [11], [13], [15]]
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 [[0], [11], [13, 15]]
362 [[0], [11], [13, 15]]
363 """
363 """
364 if targetsize is not None:
364 if targetsize is not None:
365 targetsize = max(targetsize, revlog._srmingapsize)
365 targetsize = max(targetsize, revlog._srmingapsize)
366 # targetsize should not be specified when evaluating delta candidates:
366 # targetsize should not be specified when evaluating delta candidates:
367 # * targetsize is used to ensure we stay within specification when reading,
367 # * targetsize is used to ensure we stay within specification when reading,
368 # * deltainfo is used to pick are good delta chain when writing.
368 # * deltainfo is used to pick are good delta chain when writing.
369 if not (deltainfo is None or targetsize is None):
369 if not (deltainfo is None or targetsize is None):
370 msg = 'cannot use `targetsize` with a `deltainfo`'
370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 raise error.ProgrammingError(msg)
371 raise error.ProgrammingError(msg)
372 for chunk in _slicechunktodensity(revlog, revs,
372 for chunk in _slicechunktodensity(revlog, revs,
373 deltainfo,
373 deltainfo,
374 revlog._srdensitythreshold,
374 revlog._srdensitythreshold,
375 revlog._srmingapsize):
375 revlog._srmingapsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 yield subchunk
377 yield subchunk
378
378
379 def _slicechunktosize(revlog, revs, targetsize=None):
379 def _slicechunktosize(revlog, revs, targetsize=None):
380 """slice revs to match the target size
380 """slice revs to match the target size
381
381
382 This is intended to be used on chunk that density slicing selected by that
382 This is intended to be used on chunk that density slicing selected by that
383 are still too large compared to the read garantee of revlog. This might
383 are still too large compared to the read garantee of revlog. This might
384 happens when "minimal gap size" interrupted the slicing or when chain are
384 happens when "minimal gap size" interrupted the slicing or when chain are
385 built in a way that create large blocks next to each other.
385 built in a way that create large blocks next to each other.
386
386
387 >>> revlog = _testrevlog([
387 >>> revlog = _testrevlog([
388 ... 3, #0 (3)
388 ... 3, #0 (3)
389 ... 5, #1 (2)
389 ... 5, #1 (2)
390 ... 6, #2 (1)
390 ... 6, #2 (1)
391 ... 8, #3 (2)
391 ... 8, #3 (2)
392 ... 8, #4 (empty)
392 ... 8, #4 (empty)
393 ... 11, #5 (3)
393 ... 11, #5 (3)
394 ... 12, #6 (1)
394 ... 12, #6 (1)
395 ... 13, #7 (1)
395 ... 13, #7 (1)
396 ... 14, #8 (1)
396 ... 14, #8 (1)
397 ... ])
397 ... ])
398
398
399 Cases where chunk is already small enough
399 Cases where chunk is already small enough
400 >>> list(_slicechunktosize(revlog, [0], 3))
400 >>> list(_slicechunktosize(revlog, [0], 3))
401 [[0]]
401 [[0]]
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 [[6, 7]]
403 [[6, 7]]
404 >>> list(_slicechunktosize(revlog, [0], None))
404 >>> list(_slicechunktosize(revlog, [0], None))
405 [[0]]
405 [[0]]
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 [[6, 7]]
407 [[6, 7]]
408
408
409 cases where we need actual slicing
409 cases where we need actual slicing
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 [[0], [1]]
411 [[0], [1]]
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 [[1], [3]]
413 [[1], [3]]
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 [[1, 2], [3]]
415 [[1, 2], [3]]
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 [[3], [5]]
417 [[3], [5]]
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 [[3], [5]]
419 [[3], [5]]
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 [[5], [6, 7, 8]]
421 [[5], [6, 7, 8]]
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424
424
425 Case with too large individual chunk (must return valid chunk)
425 Case with too large individual chunk (must return valid chunk)
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 [[0], [1]]
427 [[0], [1]]
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 [[1], [3]]
429 [[1], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 [[3], [5]]
431 [[3], [5]]
432 """
432 """
433 assert targetsize is None or 0 <= targetsize
433 assert targetsize is None or 0 <= targetsize
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 yield revs
435 yield revs
436 return
436 return
437
437
438 startrevidx = 0
438 startrevidx = 0
439 startdata = revlog.start(revs[0])
439 startdata = revlog.start(revs[0])
440 endrevidx = 0
440 endrevidx = 0
441 iterrevs = enumerate(revs)
441 iterrevs = enumerate(revs)
442 next(iterrevs) # skip first rev.
442 next(iterrevs) # skip first rev.
443 for idx, r in iterrevs:
443 for idx, r in iterrevs:
444 span = revlog.end(r) - startdata
444 span = revlog.end(r) - startdata
445 if span <= targetsize:
445 if span <= targetsize:
446 endrevidx = idx
446 endrevidx = idx
447 else:
447 else:
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 if chunk:
449 if chunk:
450 yield chunk
450 yield chunk
451 startrevidx = idx
451 startrevidx = idx
452 startdata = revlog.start(r)
452 startdata = revlog.start(r)
453 endrevidx = idx
453 endrevidx = idx
454 yield _trimchunk(revlog, revs, startrevidx)
454 yield _trimchunk(revlog, revs, startrevidx)
455
455
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 mingapsize=0):
457 mingapsize=0):
458 """slice revs to reduce the amount of unrelated data to be read from disk.
458 """slice revs to reduce the amount of unrelated data to be read from disk.
459
459
460 ``revs`` is sliced into groups that should be read in one time.
460 ``revs`` is sliced into groups that should be read in one time.
461 Assume that revs are sorted.
461 Assume that revs are sorted.
462
462
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 to the top of the revlog.
464 to the top of the revlog.
465
465
466 The initial chunk is sliced until the overall density (payload/chunks-span
466 The initial chunk is sliced until the overall density (payload/chunks-span
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 skipped.
468 skipped.
469
469
470 >>> revlog = _testrevlog([
470 >>> revlog = _testrevlog([
471 ... 5, #00 (5)
471 ... 5, #00 (5)
472 ... 10, #01 (5)
472 ... 10, #01 (5)
473 ... 12, #02 (2)
473 ... 12, #02 (2)
474 ... 12, #03 (empty)
474 ... 12, #03 (empty)
475 ... 27, #04 (15)
475 ... 27, #04 (15)
476 ... 31, #05 (4)
476 ... 31, #05 (4)
477 ... 31, #06 (empty)
477 ... 31, #06 (empty)
478 ... 42, #07 (11)
478 ... 42, #07 (11)
479 ... 47, #08 (5)
479 ... 47, #08 (5)
480 ... 47, #09 (empty)
480 ... 47, #09 (empty)
481 ... 48, #10 (1)
481 ... 48, #10 (1)
482 ... 51, #11 (3)
482 ... 51, #11 (3)
483 ... 74, #12 (23)
483 ... 74, #12 (23)
484 ... 85, #13 (11)
484 ... 85, #13 (11)
485 ... 86, #14 (1)
485 ... 86, #14 (1)
486 ... 91, #15 (5)
486 ... 91, #15 (5)
487 ... ])
487 ... ])
488
488
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 [[0], [15]]
492 [[0], [15]]
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 [[0], [11], [15]]
494 [[0], [11], [15]]
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 [[0], [11, 13, 15]]
496 [[0], [11, 13, 15]]
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 [[1, 2], [5, 8, 10, 11], [14]]
498 [[1, 2], [5, 8, 10, 11], [14]]
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 ... mingapsize=20))
500 ... mingapsize=20))
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 ... targetdensity=0.95))
503 ... targetdensity=0.95))
504 [[1, 2], [5], [8, 10, 11], [14]]
504 [[1, 2], [5], [8, 10, 11], [14]]
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 ... targetdensity=0.95, mingapsize=12))
506 ... targetdensity=0.95, mingapsize=12))
507 [[1, 2], [5, 8, 10, 11], [14]]
507 [[1, 2], [5, 8, 10, 11], [14]]
508 """
508 """
509 start = revlog.start
509 start = revlog.start
510 length = revlog.length
510 length = revlog.length
511
511
512 if len(revs) <= 1:
512 if len(revs) <= 1:
513 yield revs
513 yield revs
514 return
514 return
515
515
516 nextrev = len(revlog)
516 nextrev = len(revlog)
517 nextoffset = revlog.end(nextrev - 1)
517 nextoffset = revlog.end(nextrev - 1)
518
518
519 if deltainfo is None:
519 if deltainfo is None:
520 deltachainspan = _segmentspan(revlog, revs)
520 deltachainspan = _segmentspan(revlog, revs)
521 chainpayload = sum(length(r) for r in revs)
521 chainpayload = sum(length(r) for r in revs)
522 else:
522 else:
523 deltachainspan = deltainfo.distance
523 deltachainspan = deltainfo.distance
524 chainpayload = deltainfo.compresseddeltalen
524 chainpayload = deltainfo.compresseddeltalen
525
525
526 if deltachainspan < mingapsize:
526 if deltachainspan < mingapsize:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 readdata = deltachainspan
530 readdata = deltachainspan
531
531
532 if deltachainspan:
532 if deltachainspan:
533 density = chainpayload / float(deltachainspan)
533 density = chainpayload / float(deltachainspan)
534 else:
534 else:
535 density = 1.0
535 density = 1.0
536
536
537 if density >= targetdensity:
537 if density >= targetdensity:
538 yield revs
538 yield revs
539 return
539 return
540
540
541 if deltainfo is not None and deltainfo.deltalen:
541 if deltainfo is not None and deltainfo.deltalen:
542 revs = list(revs)
542 revs = list(revs)
543 revs.append(nextrev)
543 revs.append(nextrev)
544
544
545 # Store the gaps in a heap to have them sorted by decreasing size
545 # Store the gaps in a heap to have them sorted by decreasing size
546 gapsheap = []
546 gapsheap = []
547 heapq.heapify(gapsheap)
547 heapq.heapify(gapsheap)
548 prevend = None
548 prevend = None
549 for i, rev in enumerate(revs):
549 for i, rev in enumerate(revs):
550 if rev < nextrev:
550 if rev < nextrev:
551 revstart = start(rev)
551 revstart = start(rev)
552 revlen = length(rev)
552 revlen = length(rev)
553 else:
553 else:
554 revstart = nextoffset
554 revstart = nextoffset
555 revlen = deltainfo.deltalen
555 revlen = deltainfo.deltalen
556
556
557 # Skip empty revisions to form larger holes
557 # Skip empty revisions to form larger holes
558 if revlen == 0:
558 if revlen == 0:
559 continue
559 continue
560
560
561 if prevend is not None:
561 if prevend is not None:
562 gapsize = revstart - prevend
562 gapsize = revstart - prevend
563 # only consider holes that are large enough
563 # only consider holes that are large enough
564 if gapsize > mingapsize:
564 if gapsize > mingapsize:
565 heapq.heappush(gapsheap, (-gapsize, i))
565 heapq.heappush(gapsheap, (-gapsize, i))
566
566
567 prevend = revstart + revlen
567 prevend = revstart + revlen
568
568
569 # Collect the indices of the largest holes until the density is acceptable
569 # Collect the indices of the largest holes until the density is acceptable
570 indicesheap = []
570 indicesheap = []
571 heapq.heapify(indicesheap)
571 heapq.heapify(indicesheap)
572 while gapsheap and density < targetdensity:
572 while gapsheap and density < targetdensity:
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574
574
575 heapq.heappush(indicesheap, gapidx)
575 heapq.heappush(indicesheap, gapidx)
576
576
577 # the gap sizes are stored as negatives to be sorted decreasingly
577 # the gap sizes are stored as negatives to be sorted decreasingly
578 # by the heap
578 # by the heap
579 readdata -= (-oppgapsize)
579 readdata -= (-oppgapsize)
580 if readdata > 0:
580 if readdata > 0:
581 density = chainpayload / float(readdata)
581 density = chainpayload / float(readdata)
582 else:
582 else:
583 density = 1.0
583 density = 1.0
584
584
585 # Cut the revs at collected indices
585 # Cut the revs at collected indices
586 previdx = 0
586 previdx = 0
587 while indicesheap:
587 while indicesheap:
588 idx = heapq.heappop(indicesheap)
588 idx = heapq.heappop(indicesheap)
589
589
590 chunk = _trimchunk(revlog, revs, previdx, idx)
590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 if chunk:
591 if chunk:
592 yield chunk
592 yield chunk
593
593
594 previdx = idx
594 previdx = idx
595
595
596 chunk = _trimchunk(revlog, revs, previdx)
596 chunk = _trimchunk(revlog, revs, previdx)
597 if chunk:
597 if chunk:
598 yield chunk
598 yield chunk
599
599
600 @attr.s(slots=True, frozen=True)
600 @attr.s(slots=True, frozen=True)
601 class _deltainfo(object):
601 class _deltainfo(object):
602 distance = attr.ib()
602 distance = attr.ib()
603 deltalen = attr.ib()
603 deltalen = attr.ib()
604 data = attr.ib()
604 data = attr.ib()
605 base = attr.ib()
605 base = attr.ib()
606 chainbase = attr.ib()
606 chainbase = attr.ib()
607 chainlen = attr.ib()
607 chainlen = attr.ib()
608 compresseddeltalen = attr.ib()
608 compresseddeltalen = attr.ib()
609 snapshotdepth = attr.ib()
609 snapshotdepth = attr.ib()
610
610
611 class _deltacomputer(object):
611 class _deltacomputer(object):
612 def __init__(self, revlog):
612 def __init__(self, revlog):
613 self.revlog = revlog
613 self.revlog = revlog
614
614
615 def _getcandidaterevs(self, p1, p2, cachedelta):
615 def _getcandidaterevs(self, p1, p2, cachedelta):
616 """
616 """
617 Provides revisions that present an interest to be diffed against,
617 Provides revisions that present an interest to be diffed against,
618 grouped by level of easiness.
618 grouped by level of easiness.
619 """
619 """
620 revlog = self.revlog
620 revlog = self.revlog
621 gdelta = revlog._generaldelta
621 gdelta = revlog._generaldelta
622 curr = len(revlog)
622 curr = len(revlog)
623 prev = curr - 1
623 prev = curr - 1
624 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
625
625
626 # should we try to build a delta?
626 # should we try to build a delta?
627 if prev != nullrev and revlog.storedeltachains:
627 if prev != nullrev and revlog.storedeltachains:
628 tested = set()
628 tested = set()
629 # This condition is true most of the time when processing
629 # This condition is true most of the time when processing
630 # changegroup data into a generaldelta repo. The only time it
630 # changegroup data into a generaldelta repo. The only time it
631 # isn't true is if this is the first revision in a delta chain
631 # isn't true is if this is the first revision in a delta chain
632 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
633 if cachedelta and gdelta and revlog._lazydeltabase:
633 if cachedelta and gdelta and revlog._lazydeltabase:
634 # Assume what we received from the server is a good choice
634 # Assume what we received from the server is a good choice
635 # build delta will reuse the cache
635 # build delta will reuse the cache
636 yield (cachedelta[0],)
636 yield (cachedelta[0],)
637 tested.add(cachedelta[0])
637 tested.add(cachedelta[0])
638
638
639 if gdelta:
639 if gdelta:
640 # exclude already lazy tested base if any
640 # exclude already lazy tested base if any
641 parents = [p for p in (p1r, p2r)
641 parents = [p for p in (p1r, p2r)
642 if p != nullrev and p not in tested]
642 if p != nullrev and p not in tested]
643
643
644 if not revlog._deltabothparents and len(parents) == 2:
644 if not revlog._deltabothparents and len(parents) == 2:
645 parents.sort()
645 parents.sort()
646 # To minimize the chance of having to build a fulltext,
646 # To minimize the chance of having to build a fulltext,
647 # pick first whichever parent is closest to us (max rev)
647 # pick first whichever parent is closest to us (max rev)
648 yield (parents[1],)
648 yield (parents[1],)
649 # then the other one (min rev) if the first did not fit
649 # then the other one (min rev) if the first did not fit
650 yield (parents[0],)
650 yield (parents[0],)
651 tested.update(parents)
651 tested.update(parents)
652 elif len(parents) > 0:
652 elif len(parents) > 0:
653 # Test all parents (1 or 2), and keep the best candidate
653 # Test all parents (1 or 2), and keep the best candidate
654 yield parents
654 yield parents
655 tested.update(parents)
655 tested.update(parents)
656
656
657 if prev not in tested:
657 if prev not in tested:
658 # other approach failed try against prev to hopefully save us a
658 # other approach failed try against prev to hopefully save us a
659 # fulltext.
659 # fulltext.
660 yield (prev,)
660 yield (prev,)
661 tested.add(prev)
661 tested.add(prev)
662
662
663 def buildtext(self, revinfo, fh):
663 def buildtext(self, revinfo, fh):
664 """Builds a fulltext version of a revision
664 """Builds a fulltext version of a revision
665
665
666 revinfo: _revisioninfo instance that contains all needed info
666 revinfo: _revisioninfo instance that contains all needed info
667 fh: file handle to either the .i or the .d revlog file,
667 fh: file handle to either the .i or the .d revlog file,
668 depending on whether it is inlined or not
668 depending on whether it is inlined or not
669 """
669 """
670 btext = revinfo.btext
670 btext = revinfo.btext
671 if btext[0] is not None:
671 if btext[0] is not None:
672 return btext[0]
672 return btext[0]
673
673
674 revlog = self.revlog
674 revlog = self.revlog
675 cachedelta = revinfo.cachedelta
675 cachedelta = revinfo.cachedelta
676 flags = revinfo.flags
676 flags = revinfo.flags
677 node = revinfo.node
677 node = revinfo.node
678
678
679 baserev = cachedelta[0]
679 baserev = cachedelta[0]
680 delta = cachedelta[1]
680 delta = cachedelta[1]
681 # special case deltas which replace entire base; no need to decode
681 # special case deltas which replace entire base; no need to decode
682 # base revision. this neatly avoids censored bases, which throw when
682 # base revision. this neatly avoids censored bases, which throw when
683 # they're decoded.
683 # they're decoded.
684 hlen = struct.calcsize(">lll")
684 hlen = struct.calcsize(">lll")
685 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
686 len(delta) - hlen):
686 len(delta) - hlen):
687 btext[0] = delta[hlen:]
687 btext[0] = delta[hlen:]
688 else:
688 else:
689 # deltabase is rawtext before changed by flag processors, which is
689 # deltabase is rawtext before changed by flag processors, which is
690 # equivalent to non-raw text
690 # equivalent to non-raw text
691 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 basetext = revlog.revision(baserev, _df=fh, raw=False)
692 btext[0] = mdiff.patch(basetext, delta)
692 btext[0] = mdiff.patch(basetext, delta)
693
693
694 try:
694 try:
695 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 res = revlog._processflags(btext[0], flags, 'read', raw=True)
696 btext[0], validatehash = res
696 btext[0], validatehash = res
697 if validatehash:
697 if validatehash:
698 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
699 if flags & REVIDX_ISCENSORED:
699 if flags & REVIDX_ISCENSORED:
700 raise RevlogError(_('node %s is not censored') % node)
700 raise RevlogError(_('node %s is not censored') % node)
701 except CensoredNodeError:
701 except CensoredNodeError:
702 # must pass the censored index flag to add censored revisions
702 # must pass the censored index flag to add censored revisions
703 if not flags & REVIDX_ISCENSORED:
703 if not flags & REVIDX_ISCENSORED:
704 raise
704 raise
705 return btext[0]
705 return btext[0]
706
706
707 def _builddeltadiff(self, base, revinfo, fh):
707 def _builddeltadiff(self, base, revinfo, fh):
708 revlog = self.revlog
708 revlog = self.revlog
709 t = self.buildtext(revinfo, fh)
709 t = self.buildtext(revinfo, fh)
710 if revlog.iscensored(base):
710 if revlog.iscensored(base):
711 # deltas based on a censored revision must replace the
711 # deltas based on a censored revision must replace the
712 # full content in one patch, so delta works everywhere
712 # full content in one patch, so delta works everywhere
713 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
714 delta = header + t
714 delta = header + t
715 else:
715 else:
716 ptext = revlog.revision(base, _df=fh, raw=True)
716 ptext = revlog.revision(base, _df=fh, raw=True)
717 delta = mdiff.textdiff(ptext, t)
717 delta = mdiff.textdiff(ptext, t)
718
718
719 return delta
719 return delta
720
720
721 def _builddeltainfo(self, revinfo, base, fh):
721 def _builddeltainfo(self, revinfo, base, fh):
722 # can we use the cached delta?
722 # can we use the cached delta?
723 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
724 delta = revinfo.cachedelta[1]
724 delta = revinfo.cachedelta[1]
725 else:
725 else:
726 delta = self._builddeltadiff(base, revinfo, fh)
726 delta = self._builddeltadiff(base, revinfo, fh)
727 revlog = self.revlog
727 revlog = self.revlog
728 header, data = revlog.compress(delta)
728 header, data = revlog.compress(delta)
729 deltalen = len(header) + len(data)
729 deltalen = len(header) + len(data)
730 chainbase = revlog.chainbase(base)
730 chainbase = revlog.chainbase(base)
731 offset = revlog.end(len(revlog) - 1)
731 offset = revlog.end(len(revlog) - 1)
732 dist = deltalen + offset - revlog.start(chainbase)
732 dist = deltalen + offset - revlog.start(chainbase)
733 if revlog._generaldelta:
733 if revlog._generaldelta:
734 deltabase = base
734 deltabase = base
735 else:
735 else:
736 deltabase = chainbase
736 deltabase = chainbase
737 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 chainlen, compresseddeltalen = revlog._chaininfo(base)
738 chainlen += 1
738 chainlen += 1
739 compresseddeltalen += deltalen
739 compresseddeltalen += deltalen
740
740
741 revlog = self.revlog
741 revlog = self.revlog
742 snapshotdepth = None
742 snapshotdepth = None
743 if deltabase == nullrev:
743 if deltabase == nullrev:
744 snapshotdepth = 0
744 snapshotdepth = 0
745 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
745 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 # A delta chain should always be one full snapshot,
746 # A delta chain should always be one full snapshot,
747 # zero or more semi-snapshots, and zero or more deltas
747 # zero or more semi-snapshots, and zero or more deltas
748 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
748 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
749 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 snapshotdepth = len(revlog._deltachain(deltabase)[0])
750 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751
751
752 return _deltainfo(dist, deltalen, (header, data), deltabase,
752 return _deltainfo(dist, deltalen, (header, data), deltabase,
753 chainbase, chainlen, compresseddeltalen,
753 chainbase, chainlen, compresseddeltalen,
754 snapshotdepth)
754 snapshotdepth)
755
755
756 def finddeltainfo(self, revinfo, fh):
756 def finddeltainfo(self, revinfo, fh):
757 """Find an acceptable delta against a candidate revision
757 """Find an acceptable delta against a candidate revision
758
758
759 revinfo: information about the revision (instance of _revisioninfo)
759 revinfo: information about the revision (instance of _revisioninfo)
760 fh: file handle to either the .i or the .d revlog file,
760 fh: file handle to either the .i or the .d revlog file,
761 depending on whether it is inlined or not
761 depending on whether it is inlined or not
762
762
763 Returns the first acceptable candidate revision, as ordered by
763 Returns the first acceptable candidate revision, as ordered by
764 _getcandidaterevs
764 _getcandidaterevs
765 """
765 """
766 if not revinfo.textlen:
766 if not revinfo.textlen:
767 return None # empty file do not need delta
767 return None # empty file do not need delta
768
768
769 cachedelta = revinfo.cachedelta
769 cachedelta = revinfo.cachedelta
770 p1 = revinfo.p1
770 p1 = revinfo.p1
771 p2 = revinfo.p2
771 p2 = revinfo.p2
772 revlog = self.revlog
772 revlog = self.revlog
773
773
774 deltalength = self.revlog.length
774 deltalength = self.revlog.length
775 deltaparent = self.revlog.deltaparent
775 deltaparent = self.revlog.deltaparent
776
776
777 deltainfo = None
777 deltainfo = None
778 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
778 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
779 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
779 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
780 # filter out delta base that will never produce good delta
780 # filter out delta base that will never produce good delta
781 candidaterevs = [r for r in candidaterevs
781 candidaterevs = [r for r in candidaterevs
782 if self.revlog.length(r) <= deltas_limit]
782 if self.revlog.length(r) <= deltas_limit]
783 nominateddeltas = []
783 nominateddeltas = []
784 for candidaterev in candidaterevs:
784 for candidaterev in candidaterevs:
785 # skip over empty delta (no need to include them in a chain)
785 # skip over empty delta (no need to include them in a chain)
786 while candidaterev != nullrev and not deltalength(candidaterev):
786 while candidaterev != nullrev and not deltalength(candidaterev):
787 candidaterev = deltaparent(candidaterev)
787 candidaterev = deltaparent(candidaterev)
788 # no need to try a delta against nullid, this will be handled
788 # no need to try a delta against nullid, this will be handled
789 # by fulltext later.
789 # by fulltext later.
790 if candidaterev == nullrev:
790 if candidaterev == nullrev:
791 continue
791 continue
792 # no delta for rawtext-changing revs (see "candelta" for why)
792 # no delta for rawtext-changing revs (see "candelta" for why)
793 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
793 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 continue
794 continue
795 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
795 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
796 if revlog._isgooddeltainfo(candidatedelta, revinfo):
796 if revlog._isgooddeltainfo(candidatedelta, revinfo):
797 nominateddeltas.append(candidatedelta)
797 nominateddeltas.append(candidatedelta)
798 if nominateddeltas:
798 if nominateddeltas:
799 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
799 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
800 break
800 break
801
801
802 return deltainfo
802 return deltainfo
803
803
804 @attr.s(slots=True, frozen=True)
804 @attr.s(slots=True, frozen=True)
805 class _revisioninfo(object):
805 class _revisioninfo(object):
806 """Information about a revision that allows building its fulltext
806 """Information about a revision that allows building its fulltext
807 node: expected hash of the revision
807 node: expected hash of the revision
808 p1, p2: parent revs of the revision
808 p1, p2: parent revs of the revision
809 btext: built text cache consisting of a one-element list
809 btext: built text cache consisting of a one-element list
810 cachedelta: (baserev, uncompressed_delta) or None
810 cachedelta: (baserev, uncompressed_delta) or None
811 flags: flags associated to the revision storage
811 flags: flags associated to the revision storage
812
812
813 One of btext[0] or cachedelta must be set.
813 One of btext[0] or cachedelta must be set.
814 """
814 """
815 node = attr.ib()
815 node = attr.ib()
816 p1 = attr.ib()
816 p1 = attr.ib()
817 p2 = attr.ib()
817 p2 = attr.ib()
818 btext = attr.ib()
818 btext = attr.ib()
819 textlen = attr.ib()
819 textlen = attr.ib()
820 cachedelta = attr.ib()
820 cachedelta = attr.ib()
821 flags = attr.ib()
821 flags = attr.ib()
822
822
823 # index v0:
823 # index v0:
824 # 4 bytes: offset
824 # 4 bytes: offset
825 # 4 bytes: compressed length
825 # 4 bytes: compressed length
826 # 4 bytes: base rev
826 # 4 bytes: base rev
827 # 4 bytes: link rev
827 # 4 bytes: link rev
828 # 20 bytes: parent 1 nodeid
828 # 20 bytes: parent 1 nodeid
829 # 20 bytes: parent 2 nodeid
829 # 20 bytes: parent 2 nodeid
830 # 20 bytes: nodeid
830 # 20 bytes: nodeid
831 indexformatv0 = struct.Struct(">4l20s20s20s")
831 indexformatv0 = struct.Struct(">4l20s20s20s")
832 indexformatv0_pack = indexformatv0.pack
832 indexformatv0_pack = indexformatv0.pack
833 indexformatv0_unpack = indexformatv0.unpack
833 indexformatv0_unpack = indexformatv0.unpack
834
834
835 class revlogoldindex(list):
835 class revlogoldindex(list):
836 def __getitem__(self, i):
836 def __getitem__(self, i):
837 if i == -1:
837 if i == -1:
838 return (0, 0, 0, -1, -1, -1, -1, nullid)
838 return (0, 0, 0, -1, -1, -1, -1, nullid)
839 return list.__getitem__(self, i)
839 return list.__getitem__(self, i)
840
840
841 # maximum <delta-chain-data>/<revision-text-length> ratio
841 # maximum <delta-chain-data>/<revision-text-length> ratio
842 LIMIT_DELTA2TEXT = 2
842 LIMIT_DELTA2TEXT = 2
843
843
844 class revlogoldio(object):
844 class revlogoldio(object):
845 def __init__(self):
845 def __init__(self):
846 self.size = indexformatv0.size
846 self.size = indexformatv0.size
847
847
848 def parseindex(self, data, inline):
848 def parseindex(self, data, inline):
849 s = self.size
849 s = self.size
850 index = []
850 index = []
851 nodemap = {nullid: nullrev}
851 nodemap = {nullid: nullrev}
852 n = off = 0
852 n = off = 0
853 l = len(data)
853 l = len(data)
854 while off + s <= l:
854 while off + s <= l:
855 cur = data[off:off + s]
855 cur = data[off:off + s]
856 off += s
856 off += s
857 e = indexformatv0_unpack(cur)
857 e = indexformatv0_unpack(cur)
858 # transform to revlogv1 format
858 # transform to revlogv1 format
859 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
859 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
860 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
860 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
861 index.append(e2)
861 index.append(e2)
862 nodemap[e[6]] = n
862 nodemap[e[6]] = n
863 n += 1
863 n += 1
864
864
865 return revlogoldindex(index), nodemap, None
865 return revlogoldindex(index), nodemap, None
866
866
867 def packentry(self, entry, node, version, rev):
867 def packentry(self, entry, node, version, rev):
868 if gettype(entry[0]):
868 if gettype(entry[0]):
869 raise RevlogError(_('index entry flags need revlog version 1'))
869 raise RevlogError(_('index entry flags need revlog version 1'))
870 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
870 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
871 node(entry[5]), node(entry[6]), entry[7])
871 node(entry[5]), node(entry[6]), entry[7])
872 return indexformatv0_pack(*e2)
872 return indexformatv0_pack(*e2)
873
873
874 # index ng:
874 # index ng:
875 # 6 bytes: offset
875 # 6 bytes: offset
876 # 2 bytes: flags
876 # 2 bytes: flags
877 # 4 bytes: compressed length
877 # 4 bytes: compressed length
878 # 4 bytes: uncompressed length
878 # 4 bytes: uncompressed length
879 # 4 bytes: base rev
879 # 4 bytes: base rev
880 # 4 bytes: link rev
880 # 4 bytes: link rev
881 # 4 bytes: parent 1 rev
881 # 4 bytes: parent 1 rev
882 # 4 bytes: parent 2 rev
882 # 4 bytes: parent 2 rev
883 # 32 bytes: nodeid
883 # 32 bytes: nodeid
884 indexformatng = struct.Struct(">Qiiiiii20s12x")
884 indexformatng = struct.Struct(">Qiiiiii20s12x")
885 indexformatng_pack = indexformatng.pack
885 indexformatng_pack = indexformatng.pack
886 versionformat = struct.Struct(">I")
886 versionformat = struct.Struct(">I")
887 versionformat_pack = versionformat.pack
887 versionformat_pack = versionformat.pack
888 versionformat_unpack = versionformat.unpack
888 versionformat_unpack = versionformat.unpack
889
889
890 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
890 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
891 # signed integer)
891 # signed integer)
892 _maxentrysize = 0x7fffffff
892 _maxentrysize = 0x7fffffff
893
893
894 class revlogio(object):
894 class revlogio(object):
895 def __init__(self):
895 def __init__(self):
896 self.size = indexformatng.size
896 self.size = indexformatng.size
897
897
898 def parseindex(self, data, inline):
898 def parseindex(self, data, inline):
899 # call the C implementation to parse the index data
899 # call the C implementation to parse the index data
900 index, cache = parsers.parse_index2(data, inline)
900 index, cache = parsers.parse_index2(data, inline)
901 return index, getattr(index, 'nodemap', None), cache
901 return index, getattr(index, 'nodemap', None), cache
902
902
903 def packentry(self, entry, node, version, rev):
903 def packentry(self, entry, node, version, rev):
904 p = indexformatng_pack(*entry)
904 p = indexformatng_pack(*entry)
905 if rev == 0:
905 if rev == 0:
906 p = versionformat_pack(version) + p[4:]
906 p = versionformat_pack(version) + p[4:]
907 return p
907 return p
908
908
909 class revlog(object):
909 class revlog(object):
910 """
910 """
911 the underlying revision storage object
911 the underlying revision storage object
912
912
913 A revlog consists of two parts, an index and the revision data.
913 A revlog consists of two parts, an index and the revision data.
914
914
915 The index is a file with a fixed record size containing
915 The index is a file with a fixed record size containing
916 information on each revision, including its nodeid (hash), the
916 information on each revision, including its nodeid (hash), the
917 nodeids of its parents, the position and offset of its data within
917 nodeids of its parents, the position and offset of its data within
918 the data file, and the revision it's based on. Finally, each entry
918 the data file, and the revision it's based on. Finally, each entry
919 contains a linkrev entry that can serve as a pointer to external
919 contains a linkrev entry that can serve as a pointer to external
920 data.
920 data.
921
921
922 The revision data itself is a linear collection of data chunks.
922 The revision data itself is a linear collection of data chunks.
923 Each chunk represents a revision and is usually represented as a
923 Each chunk represents a revision and is usually represented as a
924 delta against the previous chunk. To bound lookup time, runs of
924 delta against the previous chunk. To bound lookup time, runs of
925 deltas are limited to about 2 times the length of the original
925 deltas are limited to about 2 times the length of the original
926 version data. This makes retrieval of a version proportional to
926 version data. This makes retrieval of a version proportional to
927 its size, or O(1) relative to the number of revisions.
927 its size, or O(1) relative to the number of revisions.
928
928
929 Both pieces of the revlog are written to in an append-only
929 Both pieces of the revlog are written to in an append-only
930 fashion, which means we never need to rewrite a file to insert or
930 fashion, which means we never need to rewrite a file to insert or
931 remove data, and can use some simple techniques to avoid the need
931 remove data, and can use some simple techniques to avoid the need
932 for locking while reading.
932 for locking while reading.
933
933
934 If checkambig, indexfile is opened with checkambig=True at
934 If checkambig, indexfile is opened with checkambig=True at
935 writing, to avoid file stat ambiguity.
935 writing, to avoid file stat ambiguity.
936
936
937 If mmaplargeindex is True, and an mmapindexthreshold is set, the
937 If mmaplargeindex is True, and an mmapindexthreshold is set, the
938 index will be mmapped rather than read if it is larger than the
938 index will be mmapped rather than read if it is larger than the
939 configured threshold.
939 configured threshold.
940
940
941 If censorable is True, the revlog can have censored revisions.
941 If censorable is True, the revlog can have censored revisions.
942 """
942 """
943 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
943 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
944 mmaplargeindex=False, censorable=False):
944 mmaplargeindex=False, censorable=False):
945 """
945 """
946 create a revlog object
946 create a revlog object
947
947
948 opener is a function that abstracts the file opening operation
948 opener is a function that abstracts the file opening operation
949 and can be used to implement COW semantics or the like.
949 and can be used to implement COW semantics or the like.
950 """
950 """
951 self.indexfile = indexfile
951 self.indexfile = indexfile
952 self.datafile = datafile or (indexfile[:-2] + ".d")
952 self.datafile = datafile or (indexfile[:-2] + ".d")
953 self.opener = opener
953 self.opener = opener
954 # When True, indexfile is opened with checkambig=True at writing, to
954 # When True, indexfile is opened with checkambig=True at writing, to
955 # avoid file stat ambiguity.
955 # avoid file stat ambiguity.
956 self._checkambig = checkambig
956 self._checkambig = checkambig
957 self._censorable = censorable
957 self._censorable = censorable
958 # 3-tuple of (node, rev, text) for a raw revision.
958 # 3-tuple of (node, rev, text) for a raw revision.
959 self._cache = None
959 self._cache = None
960 # Maps rev to chain base rev.
960 # Maps rev to chain base rev.
961 self._chainbasecache = util.lrucachedict(100)
961 self._chainbasecache = util.lrucachedict(100)
962 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
962 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
963 self._chunkcache = (0, '')
963 self._chunkcache = (0, '')
964 # How much data to read and cache into the raw revlog data cache.
964 # How much data to read and cache into the raw revlog data cache.
965 self._chunkcachesize = 65536
965 self._chunkcachesize = 65536
966 self._maxchainlen = None
966 self._maxchainlen = None
967 self._deltabothparents = True
967 self._deltabothparents = True
968 self.index = []
968 self.index = []
969 # Mapping of partial identifiers to full nodes.
969 # Mapping of partial identifiers to full nodes.
970 self._pcache = {}
970 self._pcache = {}
971 # Mapping of revision integer to full node.
971 # Mapping of revision integer to full node.
972 self._nodecache = {nullid: nullrev}
972 self._nodecache = {nullid: nullrev}
973 self._nodepos = None
973 self._nodepos = None
974 self._compengine = 'zlib'
974 self._compengine = 'zlib'
975 self._maxdeltachainspan = -1
975 self._maxdeltachainspan = -1
976 self._withsparseread = False
976 self._withsparseread = False
977 self._sparserevlog = False
977 self._sparserevlog = False
978 self._srdensitythreshold = 0.50
978 self._srdensitythreshold = 0.50
979 self._srmingapsize = 262144
979 self._srmingapsize = 262144
980
980
981 mmapindexthreshold = None
981 mmapindexthreshold = None
982 v = REVLOG_DEFAULT_VERSION
982 v = REVLOG_DEFAULT_VERSION
983 opts = getattr(opener, 'options', None)
983 opts = getattr(opener, 'options', None)
984 if opts is not None:
984 if opts is not None:
985 if 'revlogv2' in opts:
985 if 'revlogv2' in opts:
986 # version 2 revlogs always use generaldelta.
986 # version 2 revlogs always use generaldelta.
987 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
987 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
988 elif 'revlogv1' in opts:
988 elif 'revlogv1' in opts:
989 if 'generaldelta' in opts:
989 if 'generaldelta' in opts:
990 v |= FLAG_GENERALDELTA
990 v |= FLAG_GENERALDELTA
991 else:
991 else:
992 v = 0
992 v = 0
993 if 'chunkcachesize' in opts:
993 if 'chunkcachesize' in opts:
994 self._chunkcachesize = opts['chunkcachesize']
994 self._chunkcachesize = opts['chunkcachesize']
995 if 'maxchainlen' in opts:
995 if 'maxchainlen' in opts:
996 self._maxchainlen = opts['maxchainlen']
996 self._maxchainlen = opts['maxchainlen']
997 if 'deltabothparents' in opts:
997 if 'deltabothparents' in opts:
998 self._deltabothparents = opts['deltabothparents']
998 self._deltabothparents = opts['deltabothparents']
999 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
999 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1000 if 'compengine' in opts:
1000 if 'compengine' in opts:
1001 self._compengine = opts['compengine']
1001 self._compengine = opts['compengine']
1002 if 'maxdeltachainspan' in opts:
1002 if 'maxdeltachainspan' in opts:
1003 self._maxdeltachainspan = opts['maxdeltachainspan']
1003 self._maxdeltachainspan = opts['maxdeltachainspan']
1004 if mmaplargeindex and 'mmapindexthreshold' in opts:
1004 if mmaplargeindex and 'mmapindexthreshold' in opts:
1005 mmapindexthreshold = opts['mmapindexthreshold']
1005 mmapindexthreshold = opts['mmapindexthreshold']
1006 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1006 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1007 withsparseread = bool(opts.get('with-sparse-read', False))
1007 withsparseread = bool(opts.get('with-sparse-read', False))
1008 # sparse-revlog forces sparse-read
1008 # sparse-revlog forces sparse-read
1009 self._withsparseread = self._sparserevlog or withsparseread
1009 self._withsparseread = self._sparserevlog or withsparseread
1010 if 'sparse-read-density-threshold' in opts:
1010 if 'sparse-read-density-threshold' in opts:
1011 self._srdensitythreshold = opts['sparse-read-density-threshold']
1011 self._srdensitythreshold = opts['sparse-read-density-threshold']
1012 if 'sparse-read-min-gap-size' in opts:
1012 if 'sparse-read-min-gap-size' in opts:
1013 self._srmingapsize = opts['sparse-read-min-gap-size']
1013 self._srmingapsize = opts['sparse-read-min-gap-size']
1014
1014
1015 if self._chunkcachesize <= 0:
1015 if self._chunkcachesize <= 0:
1016 raise RevlogError(_('revlog chunk cache size %r is not greater '
1016 raise RevlogError(_('revlog chunk cache size %r is not greater '
1017 'than 0') % self._chunkcachesize)
1017 'than 0') % self._chunkcachesize)
1018 elif self._chunkcachesize & (self._chunkcachesize - 1):
1018 elif self._chunkcachesize & (self._chunkcachesize - 1):
1019 raise RevlogError(_('revlog chunk cache size %r is not a power '
1019 raise RevlogError(_('revlog chunk cache size %r is not a power '
1020 'of 2') % self._chunkcachesize)
1020 'of 2') % self._chunkcachesize)
1021
1021
1022 indexdata = ''
1022 indexdata = ''
1023 self._initempty = True
1023 self._initempty = True
1024 try:
1024 try:
1025 with self._indexfp() as f:
1025 with self._indexfp() as f:
1026 if (mmapindexthreshold is not None and
1026 if (mmapindexthreshold is not None and
1027 self.opener.fstat(f).st_size >= mmapindexthreshold):
1027 self.opener.fstat(f).st_size >= mmapindexthreshold):
1028 indexdata = util.buffer(util.mmapread(f))
1028 indexdata = util.buffer(util.mmapread(f))
1029 else:
1029 else:
1030 indexdata = f.read()
1030 indexdata = f.read()
1031 if len(indexdata) > 0:
1031 if len(indexdata) > 0:
1032 v = versionformat_unpack(indexdata[:4])[0]
1032 v = versionformat_unpack(indexdata[:4])[0]
1033 self._initempty = False
1033 self._initempty = False
1034 except IOError as inst:
1034 except IOError as inst:
1035 if inst.errno != errno.ENOENT:
1035 if inst.errno != errno.ENOENT:
1036 raise
1036 raise
1037
1037
1038 self.version = v
1038 self.version = v
1039 self._inline = v & FLAG_INLINE_DATA
1039 self._inline = v & FLAG_INLINE_DATA
1040 self._generaldelta = v & FLAG_GENERALDELTA
1040 self._generaldelta = v & FLAG_GENERALDELTA
1041 flags = v & ~0xFFFF
1041 flags = v & ~0xFFFF
1042 fmt = v & 0xFFFF
1042 fmt = v & 0xFFFF
1043 if fmt == REVLOGV0:
1043 if fmt == REVLOGV0:
1044 if flags:
1044 if flags:
1045 raise RevlogError(_('unknown flags (%#04x) in version %d '
1045 raise RevlogError(_('unknown flags (%#04x) in version %d '
1046 'revlog %s') %
1046 'revlog %s') %
1047 (flags >> 16, fmt, self.indexfile))
1047 (flags >> 16, fmt, self.indexfile))
1048 elif fmt == REVLOGV1:
1048 elif fmt == REVLOGV1:
1049 if flags & ~REVLOGV1_FLAGS:
1049 if flags & ~REVLOGV1_FLAGS:
1050 raise RevlogError(_('unknown flags (%#04x) in version %d '
1050 raise RevlogError(_('unknown flags (%#04x) in version %d '
1051 'revlog %s') %
1051 'revlog %s') %
1052 (flags >> 16, fmt, self.indexfile))
1052 (flags >> 16, fmt, self.indexfile))
1053 elif fmt == REVLOGV2:
1053 elif fmt == REVLOGV2:
1054 if flags & ~REVLOGV2_FLAGS:
1054 if flags & ~REVLOGV2_FLAGS:
1055 raise RevlogError(_('unknown flags (%#04x) in version %d '
1055 raise RevlogError(_('unknown flags (%#04x) in version %d '
1056 'revlog %s') %
1056 'revlog %s') %
1057 (flags >> 16, fmt, self.indexfile))
1057 (flags >> 16, fmt, self.indexfile))
1058 else:
1058 else:
1059 raise RevlogError(_('unknown version (%d) in revlog %s') %
1059 raise RevlogError(_('unknown version (%d) in revlog %s') %
1060 (fmt, self.indexfile))
1060 (fmt, self.indexfile))
1061
1061
1062 self.storedeltachains = True
1062 self.storedeltachains = True
1063
1063
1064 self._io = revlogio()
1064 self._io = revlogio()
1065 if self.version == REVLOGV0:
1065 if self.version == REVLOGV0:
1066 self._io = revlogoldio()
1066 self._io = revlogoldio()
1067 try:
1067 try:
1068 d = self._io.parseindex(indexdata, self._inline)
1068 d = self._io.parseindex(indexdata, self._inline)
1069 except (ValueError, IndexError):
1069 except (ValueError, IndexError):
1070 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1070 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1071 self.index, nodemap, self._chunkcache = d
1071 self.index, nodemap, self._chunkcache = d
1072 if nodemap is not None:
1072 if nodemap is not None:
1073 self.nodemap = self._nodecache = nodemap
1073 self.nodemap = self._nodecache = nodemap
1074 if not self._chunkcache:
1074 if not self._chunkcache:
1075 self._chunkclear()
1075 self._chunkclear()
1076 # revnum -> (chain-length, sum-delta-length)
1076 # revnum -> (chain-length, sum-delta-length)
1077 self._chaininfocache = {}
1077 self._chaininfocache = {}
1078 # revlog header -> revlog compressor
1078 # revlog header -> revlog compressor
1079 self._decompressors = {}
1079 self._decompressors = {}
1080
1080
1081 @util.propertycache
1081 @util.propertycache
1082 def _compressor(self):
1082 def _compressor(self):
1083 return util.compengines[self._compengine].revlogcompressor()
1083 return util.compengines[self._compengine].revlogcompressor()
1084
1084
1085 def _indexfp(self, mode='r'):
1085 def _indexfp(self, mode='r'):
1086 """file object for the revlog's index file"""
1086 """file object for the revlog's index file"""
1087 args = {r'mode': mode}
1087 args = {r'mode': mode}
1088 if mode != 'r':
1088 if mode != 'r':
1089 args[r'checkambig'] = self._checkambig
1089 args[r'checkambig'] = self._checkambig
1090 if mode == 'w':
1090 if mode == 'w':
1091 args[r'atomictemp'] = True
1091 args[r'atomictemp'] = True
1092 return self.opener(self.indexfile, **args)
1092 return self.opener(self.indexfile, **args)
1093
1093
1094 def _datafp(self, mode='r'):
1094 def _datafp(self, mode='r'):
1095 """file object for the revlog's data file"""
1095 """file object for the revlog's data file"""
1096 return self.opener(self.datafile, mode=mode)
1096 return self.opener(self.datafile, mode=mode)
1097
1097
1098 @contextlib.contextmanager
1098 @contextlib.contextmanager
1099 def _datareadfp(self, existingfp=None):
1099 def _datareadfp(self, existingfp=None):
1100 """file object suitable to read data"""
1100 """file object suitable to read data"""
1101 if existingfp is not None:
1101 if existingfp is not None:
1102 yield existingfp
1102 yield existingfp
1103 else:
1103 else:
1104 if self._inline:
1104 if self._inline:
1105 func = self._indexfp
1105 func = self._indexfp
1106 else:
1106 else:
1107 func = self._datafp
1107 func = self._datafp
1108 with func() as fp:
1108 with func() as fp:
1109 yield fp
1109 yield fp
1110
1110
1111 def tip(self):
1111 def tip(self):
1112 return self.node(len(self.index) - 1)
1112 return self.node(len(self.index) - 1)
1113 def __contains__(self, rev):
1113 def __contains__(self, rev):
1114 return 0 <= rev < len(self)
1114 return 0 <= rev < len(self)
1115 def __len__(self):
1115 def __len__(self):
1116 return len(self.index)
1116 return len(self.index)
1117 def __iter__(self):
1117 def __iter__(self):
1118 return iter(pycompat.xrange(len(self)))
1118 return iter(pycompat.xrange(len(self)))
1119 def revs(self, start=0, stop=None):
1119 def revs(self, start=0, stop=None):
1120 """iterate over all rev in this revlog (from start to stop)"""
1120 """iterate over all rev in this revlog (from start to stop)"""
1121 step = 1
1121 step = 1
1122 length = len(self)
1122 length = len(self)
1123 if stop is not None:
1123 if stop is not None:
1124 if start > stop:
1124 if start > stop:
1125 step = -1
1125 step = -1
1126 stop += step
1126 stop += step
1127 if stop > length:
1127 if stop > length:
1128 stop = length
1128 stop = length
1129 else:
1129 else:
1130 stop = length
1130 stop = length
1131 return pycompat.xrange(start, stop, step)
1131 return pycompat.xrange(start, stop, step)
1132
1132
1133 @util.propertycache
1133 @util.propertycache
1134 def nodemap(self):
1134 def nodemap(self):
1135 if self.index:
1135 if self.index:
1136 # populate mapping down to the initial node
1136 # populate mapping down to the initial node
1137 node0 = self.index[0][7] # get around changelog filtering
1137 node0 = self.index[0][7] # get around changelog filtering
1138 self.rev(node0)
1138 self.rev(node0)
1139 return self._nodecache
1139 return self._nodecache
1140
1140
1141 def hasnode(self, node):
1141 def hasnode(self, node):
1142 try:
1142 try:
1143 self.rev(node)
1143 self.rev(node)
1144 return True
1144 return True
1145 except KeyError:
1145 except KeyError:
1146 return False
1146 return False
1147
1147
1148 def candelta(self, baserev, rev):
1148 def candelta(self, baserev, rev):
1149 """whether two revisions (baserev, rev) can be delta-ed or not"""
1149 """whether two revisions (baserev, rev) can be delta-ed or not"""
1150 # Disable delta if either rev requires a content-changing flag
1150 # Disable delta if either rev requires a content-changing flag
1151 # processor (ex. LFS). This is because such flag processor can alter
1151 # processor (ex. LFS). This is because such flag processor can alter
1152 # the rawtext content that the delta will be based on, and two clients
1152 # the rawtext content that the delta will be based on, and two clients
1153 # could have a same revlog node with different flags (i.e. different
1153 # could have a same revlog node with different flags (i.e. different
1154 # rawtext contents) and the delta could be incompatible.
1154 # rawtext contents) and the delta could be incompatible.
1155 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1155 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1156 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1156 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1157 return False
1157 return False
1158 return True
1158 return True
1159
1159
1160 def clearcaches(self):
1160 def clearcaches(self):
1161 self._cache = None
1161 self._cache = None
1162 self._chainbasecache.clear()
1162 self._chainbasecache.clear()
1163 self._chunkcache = (0, '')
1163 self._chunkcache = (0, '')
1164 self._pcache = {}
1164 self._pcache = {}
1165
1165
1166 try:
1166 try:
1167 self._nodecache.clearcaches()
1167 self._nodecache.clearcaches()
1168 except AttributeError:
1168 except AttributeError:
1169 self._nodecache = {nullid: nullrev}
1169 self._nodecache = {nullid: nullrev}
1170 self._nodepos = None
1170 self._nodepos = None
1171
1171
1172 def rev(self, node):
1172 def rev(self, node):
1173 try:
1173 try:
1174 return self._nodecache[node]
1174 return self._nodecache[node]
1175 except TypeError:
1175 except TypeError:
1176 raise
1176 raise
1177 except RevlogError:
1177 except RevlogError:
1178 # parsers.c radix tree lookup failed
1178 # parsers.c radix tree lookup failed
1179 if node == wdirid or node in wdirfilenodeids:
1179 if node == wdirid or node in wdirfilenodeids:
1180 raise error.WdirUnsupported
1180 raise error.WdirUnsupported
1181 raise LookupError(node, self.indexfile, _('no node'))
1181 raise LookupError(node, self.indexfile, _('no node'))
1182 except KeyError:
1182 except KeyError:
1183 # pure python cache lookup failed
1183 # pure python cache lookup failed
1184 n = self._nodecache
1184 n = self._nodecache
1185 i = self.index
1185 i = self.index
1186 p = self._nodepos
1186 p = self._nodepos
1187 if p is None:
1187 if p is None:
1188 p = len(i) - 1
1188 p = len(i) - 1
1189 else:
1189 else:
1190 assert p < len(i)
1190 assert p < len(i)
1191 for r in pycompat.xrange(p, -1, -1):
1191 for r in pycompat.xrange(p, -1, -1):
1192 v = i[r][7]
1192 v = i[r][7]
1193 n[v] = r
1193 n[v] = r
1194 if v == node:
1194 if v == node:
1195 self._nodepos = r - 1
1195 self._nodepos = r - 1
1196 return r
1196 return r
1197 if node == wdirid or node in wdirfilenodeids:
1197 if node == wdirid or node in wdirfilenodeids:
1198 raise error.WdirUnsupported
1198 raise error.WdirUnsupported
1199 raise LookupError(node, self.indexfile, _('no node'))
1199 raise LookupError(node, self.indexfile, _('no node'))
1200
1200
1201 # Accessors for index entries.
1201 # Accessors for index entries.
1202
1202
1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 # are flags.
1204 # are flags.
1205 def start(self, rev):
1205 def start(self, rev):
1206 return int(self.index[rev][0] >> 16)
1206 return int(self.index[rev][0] >> 16)
1207
1207
1208 def flags(self, rev):
1208 def flags(self, rev):
1209 return self.index[rev][0] & 0xFFFF
1209 return self.index[rev][0] & 0xFFFF
1210
1210
1211 def length(self, rev):
1211 def length(self, rev):
1212 return self.index[rev][1]
1212 return self.index[rev][1]
1213
1213
1214 def rawsize(self, rev):
1214 def rawsize(self, rev):
1215 """return the length of the uncompressed text for a given revision"""
1215 """return the length of the uncompressed text for a given revision"""
1216 l = self.index[rev][2]
1216 l = self.index[rev][2]
1217 if l >= 0:
1217 if l >= 0:
1218 return l
1218 return l
1219
1219
1220 t = self.revision(rev, raw=True)
1220 t = self.revision(rev, raw=True)
1221 return len(t)
1221 return len(t)
1222
1222
1223 def size(self, rev):
1223 def size(self, rev):
1224 """length of non-raw text (processed by a "read" flag processor)"""
1224 """length of non-raw text (processed by a "read" flag processor)"""
1225 # fast path: if no "read" flag processor could change the content,
1225 # fast path: if no "read" flag processor could change the content,
1226 # size is rawsize. note: ELLIPSIS is known to not change the content.
1226 # size is rawsize. note: ELLIPSIS is known to not change the content.
1227 flags = self.flags(rev)
1227 flags = self.flags(rev)
1228 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1228 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1229 return self.rawsize(rev)
1229 return self.rawsize(rev)
1230
1230
1231 return len(self.revision(rev, raw=False))
1231 return len(self.revision(rev, raw=False))
1232
1232
1233 def chainbase(self, rev):
1233 def chainbase(self, rev):
1234 base = self._chainbasecache.get(rev)
1234 base = self._chainbasecache.get(rev)
1235 if base is not None:
1235 if base is not None:
1236 return base
1236 return base
1237
1237
1238 index = self.index
1238 index = self.index
1239 iterrev = rev
1239 iterrev = rev
1240 base = index[iterrev][3]
1240 base = index[iterrev][3]
1241 while base != iterrev:
1241 while base != iterrev:
1242 iterrev = base
1242 iterrev = base
1243 base = index[iterrev][3]
1243 base = index[iterrev][3]
1244
1244
1245 self._chainbasecache[rev] = base
1245 self._chainbasecache[rev] = base
1246 return base
1246 return base
1247
1247
1248 def linkrev(self, rev):
1248 def linkrev(self, rev):
1249 return self.index[rev][4]
1249 return self.index[rev][4]
1250
1250
1251 def parentrevs(self, rev):
1251 def parentrevs(self, rev):
1252 try:
1252 try:
1253 entry = self.index[rev]
1253 entry = self.index[rev]
1254 except IndexError:
1254 except IndexError:
1255 if rev == wdirrev:
1255 if rev == wdirrev:
1256 raise error.WdirUnsupported
1256 raise error.WdirUnsupported
1257 raise
1257 raise
1258
1258
1259 return entry[5], entry[6]
1259 return entry[5], entry[6]
1260
1260
1261 def node(self, rev):
1261 def node(self, rev):
1262 try:
1262 try:
1263 return self.index[rev][7]
1263 return self.index[rev][7]
1264 except IndexError:
1264 except IndexError:
1265 if rev == wdirrev:
1265 if rev == wdirrev:
1266 raise error.WdirUnsupported
1266 raise error.WdirUnsupported
1267 raise
1267 raise
1268
1268
1269 # Derived from index values.
1269 # Derived from index values.
1270
1270
1271 def end(self, rev):
1271 def end(self, rev):
1272 return self.start(rev) + self.length(rev)
1272 return self.start(rev) + self.length(rev)
1273
1273
1274 def parents(self, node):
1274 def parents(self, node):
1275 i = self.index
1275 i = self.index
1276 d = i[self.rev(node)]
1276 d = i[self.rev(node)]
1277 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1277 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1278
1278
1279 def chainlen(self, rev):
1279 def chainlen(self, rev):
1280 return self._chaininfo(rev)[0]
1280 return self._chaininfo(rev)[0]
1281
1281
1282 def _chaininfo(self, rev):
1282 def _chaininfo(self, rev):
1283 chaininfocache = self._chaininfocache
1283 chaininfocache = self._chaininfocache
1284 if rev in chaininfocache:
1284 if rev in chaininfocache:
1285 return chaininfocache[rev]
1285 return chaininfocache[rev]
1286 index = self.index
1286 index = self.index
1287 generaldelta = self._generaldelta
1287 generaldelta = self._generaldelta
1288 iterrev = rev
1288 iterrev = rev
1289 e = index[iterrev]
1289 e = index[iterrev]
1290 clen = 0
1290 clen = 0
1291 compresseddeltalen = 0
1291 compresseddeltalen = 0
1292 while iterrev != e[3]:
1292 while iterrev != e[3]:
1293 clen += 1
1293 clen += 1
1294 compresseddeltalen += e[1]
1294 compresseddeltalen += e[1]
1295 if generaldelta:
1295 if generaldelta:
1296 iterrev = e[3]
1296 iterrev = e[3]
1297 else:
1297 else:
1298 iterrev -= 1
1298 iterrev -= 1
1299 if iterrev in chaininfocache:
1299 if iterrev in chaininfocache:
1300 t = chaininfocache[iterrev]
1300 t = chaininfocache[iterrev]
1301 clen += t[0]
1301 clen += t[0]
1302 compresseddeltalen += t[1]
1302 compresseddeltalen += t[1]
1303 break
1303 break
1304 e = index[iterrev]
1304 e = index[iterrev]
1305 else:
1305 else:
1306 # Add text length of base since decompressing that also takes
1306 # Add text length of base since decompressing that also takes
1307 # work. For cache hits the length is already included.
1307 # work. For cache hits the length is already included.
1308 compresseddeltalen += e[1]
1308 compresseddeltalen += e[1]
1309 r = (clen, compresseddeltalen)
1309 r = (clen, compresseddeltalen)
1310 chaininfocache[rev] = r
1310 chaininfocache[rev] = r
1311 return r
1311 return r
1312
1312
1313 def _deltachain(self, rev, stoprev=None):
1313 def _deltachain(self, rev, stoprev=None):
1314 """Obtain the delta chain for a revision.
1314 """Obtain the delta chain for a revision.
1315
1315
1316 ``stoprev`` specifies a revision to stop at. If not specified, we
1316 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 stop at the base of the chain.
1317 stop at the base of the chain.
1318
1318
1319 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1319 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 revs in ascending order and ``stopped`` is a bool indicating whether
1320 revs in ascending order and ``stopped`` is a bool indicating whether
1321 ``stoprev`` was hit.
1321 ``stoprev`` was hit.
1322 """
1322 """
1323 # Try C implementation.
1323 # Try C implementation.
1324 try:
1324 try:
1325 return self.index.deltachain(rev, stoprev, self._generaldelta)
1325 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 except AttributeError:
1326 except AttributeError:
1327 pass
1327 pass
1328
1328
1329 chain = []
1329 chain = []
1330
1330
1331 # Alias to prevent attribute lookup in tight loop.
1331 # Alias to prevent attribute lookup in tight loop.
1332 index = self.index
1332 index = self.index
1333 generaldelta = self._generaldelta
1333 generaldelta = self._generaldelta
1334
1334
1335 iterrev = rev
1335 iterrev = rev
1336 e = index[iterrev]
1336 e = index[iterrev]
1337 while iterrev != e[3] and iterrev != stoprev:
1337 while iterrev != e[3] and iterrev != stoprev:
1338 chain.append(iterrev)
1338 chain.append(iterrev)
1339 if generaldelta:
1339 if generaldelta:
1340 iterrev = e[3]
1340 iterrev = e[3]
1341 else:
1341 else:
1342 iterrev -= 1
1342 iterrev -= 1
1343 e = index[iterrev]
1343 e = index[iterrev]
1344
1344
1345 if iterrev == stoprev:
1345 if iterrev == stoprev:
1346 stopped = True
1346 stopped = True
1347 else:
1347 else:
1348 chain.append(iterrev)
1348 chain.append(iterrev)
1349 stopped = False
1349 stopped = False
1350
1350
1351 chain.reverse()
1351 chain.reverse()
1352 return chain, stopped
1352 return chain, stopped
1353
1353
1354 def ancestors(self, revs, stoprev=0, inclusive=False):
1354 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 """Generate the ancestors of 'revs' in reverse topological order.
1355 """Generate the ancestors of 'revs' in reverse topological order.
1356 Does not generate revs lower than stoprev.
1356 Does not generate revs lower than stoprev.
1357
1357
1358 See the documentation for ancestor.lazyancestors for more details."""
1358 See the documentation for ancestor.lazyancestors for more details."""
1359
1359
1360 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1360 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1361 inclusive=inclusive)
1361 inclusive=inclusive)
1362
1362
1363 def descendants(self, revs):
1363 def descendants(self, revs):
1364 """Generate the descendants of 'revs' in revision order.
1364 """Generate the descendants of 'revs' in revision order.
1365
1365
1366 Yield a sequence of revision numbers starting with a child of
1366 Yield a sequence of revision numbers starting with a child of
1367 some rev in revs, i.e., each revision is *not* considered a
1367 some rev in revs, i.e., each revision is *not* considered a
1368 descendant of itself. Results are ordered by revision number (a
1368 descendant of itself. Results are ordered by revision number (a
1369 topological sort)."""
1369 topological sort)."""
1370 first = min(revs)
1370 first = min(revs)
1371 if first == nullrev:
1371 if first == nullrev:
1372 for i in self:
1372 for i in self:
1373 yield i
1373 yield i
1374 return
1374 return
1375
1375
1376 seen = set(revs)
1376 seen = set(revs)
1377 for i in self.revs(start=first + 1):
1377 for i in self.revs(start=first + 1):
1378 for x in self.parentrevs(i):
1378 for x in self.parentrevs(i):
1379 if x != nullrev and x in seen:
1379 if x != nullrev and x in seen:
1380 seen.add(i)
1380 seen.add(i)
1381 yield i
1381 yield i
1382 break
1382 break
1383
1383
1384 def findcommonmissing(self, common=None, heads=None):
1384 def findcommonmissing(self, common=None, heads=None):
1385 """Return a tuple of the ancestors of common and the ancestors of heads
1385 """Return a tuple of the ancestors of common and the ancestors of heads
1386 that are not ancestors of common. In revset terminology, we return the
1386 that are not ancestors of common. In revset terminology, we return the
1387 tuple:
1387 tuple:
1388
1388
1389 ::common, (::heads) - (::common)
1389 ::common, (::heads) - (::common)
1390
1390
1391 The list is sorted by revision number, meaning it is
1391 The list is sorted by revision number, meaning it is
1392 topologically sorted.
1392 topologically sorted.
1393
1393
1394 'heads' and 'common' are both lists of node IDs. If heads is
1394 'heads' and 'common' are both lists of node IDs. If heads is
1395 not supplied, uses all of the revlog's heads. If common is not
1395 not supplied, uses all of the revlog's heads. If common is not
1396 supplied, uses nullid."""
1396 supplied, uses nullid."""
1397 if common is None:
1397 if common is None:
1398 common = [nullid]
1398 common = [nullid]
1399 if heads is None:
1399 if heads is None:
1400 heads = self.heads()
1400 heads = self.heads()
1401
1401
1402 common = [self.rev(n) for n in common]
1402 common = [self.rev(n) for n in common]
1403 heads = [self.rev(n) for n in heads]
1403 heads = [self.rev(n) for n in heads]
1404
1404
1405 # we want the ancestors, but inclusive
1405 # we want the ancestors, but inclusive
1406 class lazyset(object):
1406 class lazyset(object):
1407 def __init__(self, lazyvalues):
1407 def __init__(self, lazyvalues):
1408 self.addedvalues = set()
1408 self.addedvalues = set()
1409 self.lazyvalues = lazyvalues
1409 self.lazyvalues = lazyvalues
1410
1410
1411 def __contains__(self, value):
1411 def __contains__(self, value):
1412 return value in self.addedvalues or value in self.lazyvalues
1412 return value in self.addedvalues or value in self.lazyvalues
1413
1413
1414 def __iter__(self):
1414 def __iter__(self):
1415 added = self.addedvalues
1415 added = self.addedvalues
1416 for r in added:
1416 for r in added:
1417 yield r
1417 yield r
1418 for r in self.lazyvalues:
1418 for r in self.lazyvalues:
1419 if not r in added:
1419 if not r in added:
1420 yield r
1420 yield r
1421
1421
1422 def add(self, value):
1422 def add(self, value):
1423 self.addedvalues.add(value)
1423 self.addedvalues.add(value)
1424
1424
1425 def update(self, values):
1425 def update(self, values):
1426 self.addedvalues.update(values)
1426 self.addedvalues.update(values)
1427
1427
1428 has = lazyset(self.ancestors(common))
1428 has = lazyset(self.ancestors(common))
1429 has.add(nullrev)
1429 has.add(nullrev)
1430 has.update(common)
1430 has.update(common)
1431
1431
1432 # take all ancestors from heads that aren't in has
1432 # take all ancestors from heads that aren't in has
1433 missing = set()
1433 missing = set()
1434 visit = collections.deque(r for r in heads if r not in has)
1434 visit = collections.deque(r for r in heads if r not in has)
1435 while visit:
1435 while visit:
1436 r = visit.popleft()
1436 r = visit.popleft()
1437 if r in missing:
1437 if r in missing:
1438 continue
1438 continue
1439 else:
1439 else:
1440 missing.add(r)
1440 missing.add(r)
1441 for p in self.parentrevs(r):
1441 for p in self.parentrevs(r):
1442 if p not in has:
1442 if p not in has:
1443 visit.append(p)
1443 visit.append(p)
1444 missing = list(missing)
1444 missing = list(missing)
1445 missing.sort()
1445 missing.sort()
1446 return has, [self.node(miss) for miss in missing]
1446 return has, [self.node(miss) for miss in missing]
1447
1447
1448 def incrementalmissingrevs(self, common=None):
1448 def incrementalmissingrevs(self, common=None):
1449 """Return an object that can be used to incrementally compute the
1449 """Return an object that can be used to incrementally compute the
1450 revision numbers of the ancestors of arbitrary sets that are not
1450 revision numbers of the ancestors of arbitrary sets that are not
1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 object.
1452 object.
1453
1453
1454 'common' is a list of revision numbers. If common is not supplied, uses
1454 'common' is a list of revision numbers. If common is not supplied, uses
1455 nullrev.
1455 nullrev.
1456 """
1456 """
1457 if common is None:
1457 if common is None:
1458 common = [nullrev]
1458 common = [nullrev]
1459
1459
1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1461
1461
1462 def findmissingrevs(self, common=None, heads=None):
1462 def findmissingrevs(self, common=None, heads=None):
1463 """Return the revision numbers of the ancestors of heads that
1463 """Return the revision numbers of the ancestors of heads that
1464 are not ancestors of common.
1464 are not ancestors of common.
1465
1465
1466 More specifically, return a list of revision numbers corresponding to
1466 More specifically, return a list of revision numbers corresponding to
1467 nodes N such that every N satisfies the following constraints:
1467 nodes N such that every N satisfies the following constraints:
1468
1468
1469 1. N is an ancestor of some node in 'heads'
1469 1. N is an ancestor of some node in 'heads'
1470 2. N is not an ancestor of any node in 'common'
1470 2. N is not an ancestor of any node in 'common'
1471
1471
1472 The list is sorted by revision number, meaning it is
1472 The list is sorted by revision number, meaning it is
1473 topologically sorted.
1473 topologically sorted.
1474
1474
1475 'heads' and 'common' are both lists of revision numbers. If heads is
1475 'heads' and 'common' are both lists of revision numbers. If heads is
1476 not supplied, uses all of the revlog's heads. If common is not
1476 not supplied, uses all of the revlog's heads. If common is not
1477 supplied, uses nullid."""
1477 supplied, uses nullid."""
1478 if common is None:
1478 if common is None:
1479 common = [nullrev]
1479 common = [nullrev]
1480 if heads is None:
1480 if heads is None:
1481 heads = self.headrevs()
1481 heads = self.headrevs()
1482
1482
1483 inc = self.incrementalmissingrevs(common=common)
1483 inc = self.incrementalmissingrevs(common=common)
1484 return inc.missingancestors(heads)
1484 return inc.missingancestors(heads)
1485
1485
1486 def findmissing(self, common=None, heads=None):
1486 def findmissing(self, common=None, heads=None):
1487 """Return the ancestors of heads that are not ancestors of common.
1487 """Return the ancestors of heads that are not ancestors of common.
1488
1488
1489 More specifically, return a list of nodes N such that every N
1489 More specifically, return a list of nodes N such that every N
1490 satisfies the following constraints:
1490 satisfies the following constraints:
1491
1491
1492 1. N is an ancestor of some node in 'heads'
1492 1. N is an ancestor of some node in 'heads'
1493 2. N is not an ancestor of any node in 'common'
1493 2. N is not an ancestor of any node in 'common'
1494
1494
1495 The list is sorted by revision number, meaning it is
1495 The list is sorted by revision number, meaning it is
1496 topologically sorted.
1496 topologically sorted.
1497
1497
1498 'heads' and 'common' are both lists of node IDs. If heads is
1498 'heads' and 'common' are both lists of node IDs. If heads is
1499 not supplied, uses all of the revlog's heads. If common is not
1499 not supplied, uses all of the revlog's heads. If common is not
1500 supplied, uses nullid."""
1500 supplied, uses nullid."""
1501 if common is None:
1501 if common is None:
1502 common = [nullid]
1502 common = [nullid]
1503 if heads is None:
1503 if heads is None:
1504 heads = self.heads()
1504 heads = self.heads()
1505
1505
1506 common = [self.rev(n) for n in common]
1506 common = [self.rev(n) for n in common]
1507 heads = [self.rev(n) for n in heads]
1507 heads = [self.rev(n) for n in heads]
1508
1508
1509 inc = self.incrementalmissingrevs(common=common)
1509 inc = self.incrementalmissingrevs(common=common)
1510 return [self.node(r) for r in inc.missingancestors(heads)]
1510 return [self.node(r) for r in inc.missingancestors(heads)]
1511
1511
1512 def nodesbetween(self, roots=None, heads=None):
1512 def nodesbetween(self, roots=None, heads=None):
1513 """Return a topological path from 'roots' to 'heads'.
1513 """Return a topological path from 'roots' to 'heads'.
1514
1514
1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1516 topologically sorted list of all nodes N that satisfy both of
1516 topologically sorted list of all nodes N that satisfy both of
1517 these constraints:
1517 these constraints:
1518
1518
1519 1. N is a descendant of some node in 'roots'
1519 1. N is a descendant of some node in 'roots'
1520 2. N is an ancestor of some node in 'heads'
1520 2. N is an ancestor of some node in 'heads'
1521
1521
1522 Every node is considered to be both a descendant and an ancestor
1522 Every node is considered to be both a descendant and an ancestor
1523 of itself, so every reachable node in 'roots' and 'heads' will be
1523 of itself, so every reachable node in 'roots' and 'heads' will be
1524 included in 'nodes'.
1524 included in 'nodes'.
1525
1525
1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1529
1529
1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1531 unspecified, uses nullid as the only root. If 'heads' is
1531 unspecified, uses nullid as the only root. If 'heads' is
1532 unspecified, uses list of all of the revlog's heads."""
1532 unspecified, uses list of all of the revlog's heads."""
1533 nonodes = ([], [], [])
1533 nonodes = ([], [], [])
1534 if roots is not None:
1534 if roots is not None:
1535 roots = list(roots)
1535 roots = list(roots)
1536 if not roots:
1536 if not roots:
1537 return nonodes
1537 return nonodes
1538 lowestrev = min([self.rev(n) for n in roots])
1538 lowestrev = min([self.rev(n) for n in roots])
1539 else:
1539 else:
1540 roots = [nullid] # Everybody's a descendant of nullid
1540 roots = [nullid] # Everybody's a descendant of nullid
1541 lowestrev = nullrev
1541 lowestrev = nullrev
1542 if (lowestrev == nullrev) and (heads is None):
1542 if (lowestrev == nullrev) and (heads is None):
1543 # We want _all_ the nodes!
1543 # We want _all_ the nodes!
1544 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1544 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1545 if heads is None:
1545 if heads is None:
1546 # All nodes are ancestors, so the latest ancestor is the last
1546 # All nodes are ancestors, so the latest ancestor is the last
1547 # node.
1547 # node.
1548 highestrev = len(self) - 1
1548 highestrev = len(self) - 1
1549 # Set ancestors to None to signal that every node is an ancestor.
1549 # Set ancestors to None to signal that every node is an ancestor.
1550 ancestors = None
1550 ancestors = None
1551 # Set heads to an empty dictionary for later discovery of heads
1551 # Set heads to an empty dictionary for later discovery of heads
1552 heads = {}
1552 heads = {}
1553 else:
1553 else:
1554 heads = list(heads)
1554 heads = list(heads)
1555 if not heads:
1555 if not heads:
1556 return nonodes
1556 return nonodes
1557 ancestors = set()
1557 ancestors = set()
1558 # Turn heads into a dictionary so we can remove 'fake' heads.
1558 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 # Also, later we will be using it to filter out the heads we can't
1559 # Also, later we will be using it to filter out the heads we can't
1560 # find from roots.
1560 # find from roots.
1561 heads = dict.fromkeys(heads, False)
1561 heads = dict.fromkeys(heads, False)
1562 # Start at the top and keep marking parents until we're done.
1562 # Start at the top and keep marking parents until we're done.
1563 nodestotag = set(heads)
1563 nodestotag = set(heads)
1564 # Remember where the top was so we can use it as a limit later.
1564 # Remember where the top was so we can use it as a limit later.
1565 highestrev = max([self.rev(n) for n in nodestotag])
1565 highestrev = max([self.rev(n) for n in nodestotag])
1566 while nodestotag:
1566 while nodestotag:
1567 # grab a node to tag
1567 # grab a node to tag
1568 n = nodestotag.pop()
1568 n = nodestotag.pop()
1569 # Never tag nullid
1569 # Never tag nullid
1570 if n == nullid:
1570 if n == nullid:
1571 continue
1571 continue
1572 # A node's revision number represents its place in a
1572 # A node's revision number represents its place in a
1573 # topologically sorted list of nodes.
1573 # topologically sorted list of nodes.
1574 r = self.rev(n)
1574 r = self.rev(n)
1575 if r >= lowestrev:
1575 if r >= lowestrev:
1576 if n not in ancestors:
1576 if n not in ancestors:
1577 # If we are possibly a descendant of one of the roots
1577 # If we are possibly a descendant of one of the roots
1578 # and we haven't already been marked as an ancestor
1578 # and we haven't already been marked as an ancestor
1579 ancestors.add(n) # Mark as ancestor
1579 ancestors.add(n) # Mark as ancestor
1580 # Add non-nullid parents to list of nodes to tag.
1580 # Add non-nullid parents to list of nodes to tag.
1581 nodestotag.update([p for p in self.parents(n) if
1581 nodestotag.update([p for p in self.parents(n) if
1582 p != nullid])
1582 p != nullid])
1583 elif n in heads: # We've seen it before, is it a fake head?
1583 elif n in heads: # We've seen it before, is it a fake head?
1584 # So it is, real heads should not be the ancestors of
1584 # So it is, real heads should not be the ancestors of
1585 # any other heads.
1585 # any other heads.
1586 heads.pop(n)
1586 heads.pop(n)
1587 if not ancestors:
1587 if not ancestors:
1588 return nonodes
1588 return nonodes
1589 # Now that we have our set of ancestors, we want to remove any
1589 # Now that we have our set of ancestors, we want to remove any
1590 # roots that are not ancestors.
1590 # roots that are not ancestors.
1591
1591
1592 # If one of the roots was nullid, everything is included anyway.
1592 # If one of the roots was nullid, everything is included anyway.
1593 if lowestrev > nullrev:
1593 if lowestrev > nullrev:
1594 # But, since we weren't, let's recompute the lowest rev to not
1594 # But, since we weren't, let's recompute the lowest rev to not
1595 # include roots that aren't ancestors.
1595 # include roots that aren't ancestors.
1596
1596
1597 # Filter out roots that aren't ancestors of heads
1597 # Filter out roots that aren't ancestors of heads
1598 roots = [root for root in roots if root in ancestors]
1598 roots = [root for root in roots if root in ancestors]
1599 # Recompute the lowest revision
1599 # Recompute the lowest revision
1600 if roots:
1600 if roots:
1601 lowestrev = min([self.rev(root) for root in roots])
1601 lowestrev = min([self.rev(root) for root in roots])
1602 else:
1602 else:
1603 # No more roots? Return empty list
1603 # No more roots? Return empty list
1604 return nonodes
1604 return nonodes
1605 else:
1605 else:
1606 # We are descending from nullid, and don't need to care about
1606 # We are descending from nullid, and don't need to care about
1607 # any other roots.
1607 # any other roots.
1608 lowestrev = nullrev
1608 lowestrev = nullrev
1609 roots = [nullid]
1609 roots = [nullid]
1610 # Transform our roots list into a set.
1610 # Transform our roots list into a set.
1611 descendants = set(roots)
1611 descendants = set(roots)
1612 # Also, keep the original roots so we can filter out roots that aren't
1612 # Also, keep the original roots so we can filter out roots that aren't
1613 # 'real' roots (i.e. are descended from other roots).
1613 # 'real' roots (i.e. are descended from other roots).
1614 roots = descendants.copy()
1614 roots = descendants.copy()
1615 # Our topologically sorted list of output nodes.
1615 # Our topologically sorted list of output nodes.
1616 orderedout = []
1616 orderedout = []
1617 # Don't start at nullid since we don't want nullid in our output list,
1617 # Don't start at nullid since we don't want nullid in our output list,
1618 # and if nullid shows up in descendants, empty parents will look like
1618 # and if nullid shows up in descendants, empty parents will look like
1619 # they're descendants.
1619 # they're descendants.
1620 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1620 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1621 n = self.node(r)
1621 n = self.node(r)
1622 isdescendant = False
1622 isdescendant = False
1623 if lowestrev == nullrev: # Everybody is a descendant of nullid
1623 if lowestrev == nullrev: # Everybody is a descendant of nullid
1624 isdescendant = True
1624 isdescendant = True
1625 elif n in descendants:
1625 elif n in descendants:
1626 # n is already a descendant
1626 # n is already a descendant
1627 isdescendant = True
1627 isdescendant = True
1628 # This check only needs to be done here because all the roots
1628 # This check only needs to be done here because all the roots
1629 # will start being marked is descendants before the loop.
1629 # will start being marked is descendants before the loop.
1630 if n in roots:
1630 if n in roots:
1631 # If n was a root, check if it's a 'real' root.
1631 # If n was a root, check if it's a 'real' root.
1632 p = tuple(self.parents(n))
1632 p = tuple(self.parents(n))
1633 # If any of its parents are descendants, it's not a root.
1633 # If any of its parents are descendants, it's not a root.
1634 if (p[0] in descendants) or (p[1] in descendants):
1634 if (p[0] in descendants) or (p[1] in descendants):
1635 roots.remove(n)
1635 roots.remove(n)
1636 else:
1636 else:
1637 p = tuple(self.parents(n))
1637 p = tuple(self.parents(n))
1638 # A node is a descendant if either of its parents are
1638 # A node is a descendant if either of its parents are
1639 # descendants. (We seeded the dependents list with the roots
1639 # descendants. (We seeded the dependents list with the roots
1640 # up there, remember?)
1640 # up there, remember?)
1641 if (p[0] in descendants) or (p[1] in descendants):
1641 if (p[0] in descendants) or (p[1] in descendants):
1642 descendants.add(n)
1642 descendants.add(n)
1643 isdescendant = True
1643 isdescendant = True
1644 if isdescendant and ((ancestors is None) or (n in ancestors)):
1644 if isdescendant and ((ancestors is None) or (n in ancestors)):
1645 # Only include nodes that are both descendants and ancestors.
1645 # Only include nodes that are both descendants and ancestors.
1646 orderedout.append(n)
1646 orderedout.append(n)
1647 if (ancestors is not None) and (n in heads):
1647 if (ancestors is not None) and (n in heads):
1648 # We're trying to figure out which heads are reachable
1648 # We're trying to figure out which heads are reachable
1649 # from roots.
1649 # from roots.
1650 # Mark this head as having been reached
1650 # Mark this head as having been reached
1651 heads[n] = True
1651 heads[n] = True
1652 elif ancestors is None:
1652 elif ancestors is None:
1653 # Otherwise, we're trying to discover the heads.
1653 # Otherwise, we're trying to discover the heads.
1654 # Assume this is a head because if it isn't, the next step
1654 # Assume this is a head because if it isn't, the next step
1655 # will eventually remove it.
1655 # will eventually remove it.
1656 heads[n] = True
1656 heads[n] = True
1657 # But, obviously its parents aren't.
1657 # But, obviously its parents aren't.
1658 for p in self.parents(n):
1658 for p in self.parents(n):
1659 heads.pop(p, None)
1659 heads.pop(p, None)
1660 heads = [head for head, flag in heads.iteritems() if flag]
1660 heads = [head for head, flag in heads.iteritems() if flag]
1661 roots = list(roots)
1661 roots = list(roots)
1662 assert orderedout
1662 assert orderedout
1663 assert roots
1663 assert roots
1664 assert heads
1664 assert heads
1665 return (orderedout, roots, heads)
1665 return (orderedout, roots, heads)
1666
1666
1667 def headrevs(self):
1667 def headrevs(self):
1668 try:
1668 try:
1669 return self.index.headrevs()
1669 return self.index.headrevs()
1670 except AttributeError:
1670 except AttributeError:
1671 return self._headrevs()
1671 return self._headrevs()
1672
1672
1673 def computephases(self, roots):
1673 def computephases(self, roots):
1674 return self.index.computephasesmapsets(roots)
1674 return self.index.computephasesmapsets(roots)
1675
1675
1676 def _headrevs(self):
1676 def _headrevs(self):
1677 count = len(self)
1677 count = len(self)
1678 if not count:
1678 if not count:
1679 return [nullrev]
1679 return [nullrev]
1680 # we won't iter over filtered rev so nobody is a head at start
1680 # we won't iter over filtered rev so nobody is a head at start
1681 ishead = [0] * (count + 1)
1681 ishead = [0] * (count + 1)
1682 index = self.index
1682 index = self.index
1683 for r in self:
1683 for r in self:
1684 ishead[r] = 1 # I may be an head
1684 ishead[r] = 1 # I may be an head
1685 e = index[r]
1685 e = index[r]
1686 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1686 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1687 return [r for r, val in enumerate(ishead) if val]
1687 return [r for r, val in enumerate(ishead) if val]
1688
1688
1689 def heads(self, start=None, stop=None):
1689 def heads(self, start=None, stop=None):
1690 """return the list of all nodes that have no children
1690 """return the list of all nodes that have no children
1691
1691
1692 if start is specified, only heads that are descendants of
1692 if start is specified, only heads that are descendants of
1693 start will be returned
1693 start will be returned
1694 if stop is specified, it will consider all the revs from stop
1694 if stop is specified, it will consider all the revs from stop
1695 as if they had no children
1695 as if they had no children
1696 """
1696 """
1697 if start is None and stop is None:
1697 if start is None and stop is None:
1698 if not len(self):
1698 if not len(self):
1699 return [nullid]
1699 return [nullid]
1700 return [self.node(r) for r in self.headrevs()]
1700 return [self.node(r) for r in self.headrevs()]
1701
1701
1702 if start is None:
1702 if start is None:
1703 start = nullid
1703 start = nullid
1704 if stop is None:
1704 if stop is None:
1705 stop = []
1705 stop = []
1706 stoprevs = set([self.rev(n) for n in stop])
1706 stoprevs = set([self.rev(n) for n in stop])
1707 startrev = self.rev(start)
1707 startrev = self.rev(start)
1708 reachable = {startrev}
1708 reachable = {startrev}
1709 heads = {startrev}
1709 heads = {startrev}
1710
1710
1711 parentrevs = self.parentrevs
1711 parentrevs = self.parentrevs
1712 for r in self.revs(start=startrev + 1):
1712 for r in self.revs(start=startrev + 1):
1713 for p in parentrevs(r):
1713 for p in parentrevs(r):
1714 if p in reachable:
1714 if p in reachable:
1715 if r not in stoprevs:
1715 if r not in stoprevs:
1716 reachable.add(r)
1716 reachable.add(r)
1717 heads.add(r)
1717 heads.add(r)
1718 if p in heads and p not in stoprevs:
1718 if p in heads and p not in stoprevs:
1719 heads.remove(p)
1719 heads.remove(p)
1720
1720
1721 return [self.node(r) for r in heads]
1721 return [self.node(r) for r in heads]
1722
1722
1723 def children(self, node):
1723 def children(self, node):
1724 """find the children of a given node"""
1724 """find the children of a given node"""
1725 c = []
1725 c = []
1726 p = self.rev(node)
1726 p = self.rev(node)
1727 for r in self.revs(start=p + 1):
1727 for r in self.revs(start=p + 1):
1728 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1728 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1729 if prevs:
1729 if prevs:
1730 for pr in prevs:
1730 for pr in prevs:
1731 if pr == p:
1731 if pr == p:
1732 c.append(self.node(r))
1732 c.append(self.node(r))
1733 elif p == nullrev:
1733 elif p == nullrev:
1734 c.append(self.node(r))
1734 c.append(self.node(r))
1735 return c
1735 return c
1736
1736
1737 def commonancestorsheads(self, a, b):
1737 def commonancestorsheads(self, a, b):
1738 """calculate all the heads of the common ancestors of nodes a and b"""
1738 """calculate all the heads of the common ancestors of nodes a and b"""
1739 a, b = self.rev(a), self.rev(b)
1739 a, b = self.rev(a), self.rev(b)
1740 ancs = self._commonancestorsheads(a, b)
1740 ancs = self._commonancestorsheads(a, b)
1741 return pycompat.maplist(self.node, ancs)
1741 return pycompat.maplist(self.node, ancs)
1742
1742
1743 def _commonancestorsheads(self, *revs):
1743 def _commonancestorsheads(self, *revs):
1744 """calculate all the heads of the common ancestors of revs"""
1744 """calculate all the heads of the common ancestors of revs"""
1745 try:
1745 try:
1746 ancs = self.index.commonancestorsheads(*revs)
1746 ancs = self.index.commonancestorsheads(*revs)
1747 except (AttributeError, OverflowError): # C implementation failed
1747 except (AttributeError, OverflowError): # C implementation failed
1748 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1748 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1749 return ancs
1749 return ancs
1750
1750
1751 def isancestor(self, a, b):
1751 def isancestor(self, a, b):
1752 """return True if node a is an ancestor of node b
1752 """return True if node a is an ancestor of node b
1753
1753
1754 A revision is considered an ancestor of itself."""
1754 A revision is considered an ancestor of itself."""
1755 a, b = self.rev(a), self.rev(b)
1755 a, b = self.rev(a), self.rev(b)
1756 return self.isancestorrev(a, b)
1756 return self.isancestorrev(a, b)
1757
1757
1758 def isancestorrev(self, a, b):
1758 def isancestorrev(self, a, b):
1759 """return True if revision a is an ancestor of revision b
1759 """return True if revision a is an ancestor of revision b
1760
1760
1761 A revision is considered an ancestor of itself.
1761 A revision is considered an ancestor of itself.
1762
1762
1763 The implementation of this is trivial but the use of
1763 The implementation of this is trivial but the use of
1764 commonancestorsheads is not."""
1764 commonancestorsheads is not."""
1765 if a == nullrev:
1765 if a == nullrev:
1766 return True
1766 return True
1767 elif a == b:
1767 elif a == b:
1768 return True
1768 return True
1769 elif a > b:
1769 elif a > b:
1770 return False
1770 return False
1771 return a in self._commonancestorsheads(a, b)
1771 return a in self._commonancestorsheads(a, b)
1772
1772
1773 def ancestor(self, a, b):
1773 def ancestor(self, a, b):
1774 """calculate the "best" common ancestor of nodes a and b"""
1774 """calculate the "best" common ancestor of nodes a and b"""
1775
1775
1776 a, b = self.rev(a), self.rev(b)
1776 a, b = self.rev(a), self.rev(b)
1777 try:
1777 try:
1778 ancs = self.index.ancestors(a, b)
1778 ancs = self.index.ancestors(a, b)
1779 except (AttributeError, OverflowError):
1779 except (AttributeError, OverflowError):
1780 ancs = ancestor.ancestors(self.parentrevs, a, b)
1780 ancs = ancestor.ancestors(self.parentrevs, a, b)
1781 if ancs:
1781 if ancs:
1782 # choose a consistent winner when there's a tie
1782 # choose a consistent winner when there's a tie
1783 return min(map(self.node, ancs))
1783 return min(map(self.node, ancs))
1784 return nullid
1784 return nullid
1785
1785
1786 def _match(self, id):
1786 def _match(self, id):
1787 if isinstance(id, int):
1787 if isinstance(id, int):
1788 # rev
1788 # rev
1789 return self.node(id)
1789 return self.node(id)
1790 if len(id) == 20:
1790 if len(id) == 20:
1791 # possibly a binary node
1791 # possibly a binary node
1792 # odds of a binary node being all hex in ASCII are 1 in 10**25
1792 # odds of a binary node being all hex in ASCII are 1 in 10**25
1793 try:
1793 try:
1794 node = id
1794 node = id
1795 self.rev(node) # quick search the index
1795 self.rev(node) # quick search the index
1796 return node
1796 return node
1797 except LookupError:
1797 except LookupError:
1798 pass # may be partial hex id
1798 pass # may be partial hex id
1799 try:
1799 try:
1800 # str(rev)
1800 # str(rev)
1801 rev = int(id)
1801 rev = int(id)
1802 if "%d" % rev != id:
1802 if "%d" % rev != id:
1803 raise ValueError
1803 raise ValueError
1804 if rev < 0:
1804 if rev < 0:
1805 rev = len(self) + rev
1805 rev = len(self) + rev
1806 if rev < 0 or rev >= len(self):
1806 if rev < 0 or rev >= len(self):
1807 raise ValueError
1807 raise ValueError
1808 return self.node(rev)
1808 return self.node(rev)
1809 except (ValueError, OverflowError):
1809 except (ValueError, OverflowError):
1810 pass
1810 pass
1811 if len(id) == 40:
1811 if len(id) == 40:
1812 try:
1812 try:
1813 # a full hex nodeid?
1813 # a full hex nodeid?
1814 node = bin(id)
1814 node = bin(id)
1815 self.rev(node)
1815 self.rev(node)
1816 return node
1816 return node
1817 except (TypeError, LookupError):
1817 except (TypeError, LookupError):
1818 pass
1818 pass
1819
1819
1820 def _partialmatch(self, id):
1820 def _partialmatch(self, id):
1821 # we don't care wdirfilenodeids as they should be always full hash
1821 # we don't care wdirfilenodeids as they should be always full hash
1822 maybewdir = wdirhex.startswith(id)
1822 maybewdir = wdirhex.startswith(id)
1823 try:
1823 try:
1824 partial = self.index.partialmatch(id)
1824 partial = self.index.partialmatch(id)
1825 if partial and self.hasnode(partial):
1825 if partial and self.hasnode(partial):
1826 if maybewdir:
1826 if maybewdir:
1827 # single 'ff...' match in radix tree, ambiguous with wdir
1827 # single 'ff...' match in radix tree, ambiguous with wdir
1828 raise RevlogError
1828 raise RevlogError
1829 return partial
1829 return partial
1830 if maybewdir:
1830 if maybewdir:
1831 # no 'ff...' match in radix tree, wdir identified
1831 # no 'ff...' match in radix tree, wdir identified
1832 raise error.WdirUnsupported
1832 raise error.WdirUnsupported
1833 return None
1833 return None
1834 except RevlogError:
1834 except RevlogError:
1835 # parsers.c radix tree lookup gave multiple matches
1835 # parsers.c radix tree lookup gave multiple matches
1836 # fast path: for unfiltered changelog, radix tree is accurate
1836 # fast path: for unfiltered changelog, radix tree is accurate
1837 if not getattr(self, 'filteredrevs', None):
1837 if not getattr(self, 'filteredrevs', None):
1838 raise AmbiguousPrefixLookupError(id, self.indexfile,
1838 raise AmbiguousPrefixLookupError(id, self.indexfile,
1839 _('ambiguous identifier'))
1839 _('ambiguous identifier'))
1840 # fall through to slow path that filters hidden revisions
1840 # fall through to slow path that filters hidden revisions
1841 except (AttributeError, ValueError):
1841 except (AttributeError, ValueError):
1842 # we are pure python, or key was too short to search radix tree
1842 # we are pure python, or key was too short to search radix tree
1843 pass
1843 pass
1844
1844
1845 if id in self._pcache:
1845 if id in self._pcache:
1846 return self._pcache[id]
1846 return self._pcache[id]
1847
1847
1848 if len(id) <= 40:
1848 if len(id) <= 40:
1849 try:
1849 try:
1850 # hex(node)[:...]
1850 # hex(node)[:...]
1851 l = len(id) // 2 # grab an even number of digits
1851 l = len(id) // 2 # grab an even number of digits
1852 prefix = bin(id[:l * 2])
1852 prefix = bin(id[:l * 2])
1853 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1853 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1854 nl = [n for n in nl if hex(n).startswith(id) and
1854 nl = [n for n in nl if hex(n).startswith(id) and
1855 self.hasnode(n)]
1855 self.hasnode(n)]
1856 if len(nl) > 0:
1856 if len(nl) > 0:
1857 if len(nl) == 1 and not maybewdir:
1857 if len(nl) == 1 and not maybewdir:
1858 self._pcache[id] = nl[0]
1858 self._pcache[id] = nl[0]
1859 return nl[0]
1859 return nl[0]
1860 raise AmbiguousPrefixLookupError(id, self.indexfile,
1860 raise AmbiguousPrefixLookupError(id, self.indexfile,
1861 _('ambiguous identifier'))
1861 _('ambiguous identifier'))
1862 if maybewdir:
1862 if maybewdir:
1863 raise error.WdirUnsupported
1863 raise error.WdirUnsupported
1864 return None
1864 return None
1865 except TypeError:
1865 except TypeError:
1866 pass
1866 pass
1867
1867
1868 def lookup(self, id):
1868 def lookup(self, id):
1869 """locate a node based on:
1869 """locate a node based on:
1870 - revision number or str(revision number)
1870 - revision number or str(revision number)
1871 - nodeid or subset of hex nodeid
1871 - nodeid or subset of hex nodeid
1872 """
1872 """
1873 n = self._match(id)
1873 n = self._match(id)
1874 if n is not None:
1874 if n is not None:
1875 return n
1875 return n
1876 n = self._partialmatch(id)
1876 n = self._partialmatch(id)
1877 if n:
1877 if n:
1878 return n
1878 return n
1879
1879
1880 raise LookupError(id, self.indexfile, _('no match found'))
1880 raise LookupError(id, self.indexfile, _('no match found'))
1881
1881
1882 def shortest(self, node, minlength=1):
1882 def shortest(self, node, minlength=1):
1883 """Find the shortest unambiguous prefix that matches node."""
1883 """Find the shortest unambiguous prefix that matches node."""
1884 def isvalid(prefix):
1884 def isvalid(prefix):
1885 try:
1885 try:
1886 node = self._partialmatch(prefix)
1886 node = self._partialmatch(prefix)
1887 except error.RevlogError:
1887 except error.RevlogError:
1888 return False
1888 return False
1889 except error.WdirUnsupported:
1889 except error.WdirUnsupported:
1890 # single 'ff...' match
1890 # single 'ff...' match
1891 return True
1891 return True
1892 if node is None:
1892 if node is None:
1893 raise LookupError(node, self.indexfile, _('no node'))
1893 raise LookupError(node, self.indexfile, _('no node'))
1894 return True
1894 return True
1895
1895
1896 def maybewdir(prefix):
1896 def maybewdir(prefix):
1897 return all(c == 'f' for c in prefix)
1897 return all(c == 'f' for c in prefix)
1898
1898
1899 hexnode = hex(node)
1899 hexnode = hex(node)
1900
1900
1901 def disambiguate(hexnode, minlength):
1901 def disambiguate(hexnode, minlength):
1902 """Disambiguate against wdirid."""
1902 """Disambiguate against wdirid."""
1903 for length in range(minlength, 41):
1903 for length in range(minlength, 41):
1904 prefix = hexnode[:length]
1904 prefix = hexnode[:length]
1905 if not maybewdir(prefix):
1905 if not maybewdir(prefix):
1906 return prefix
1906 return prefix
1907
1907
1908 if not getattr(self, 'filteredrevs', None):
1908 if not getattr(self, 'filteredrevs', None):
1909 try:
1909 try:
1910 length = max(self.index.shortest(node), minlength)
1910 length = max(self.index.shortest(node), minlength)
1911 return disambiguate(hexnode, length)
1911 return disambiguate(hexnode, length)
1912 except RevlogError:
1912 except RevlogError:
1913 if node != wdirid:
1913 if node != wdirid:
1914 raise LookupError(node, self.indexfile, _('no node'))
1914 raise LookupError(node, self.indexfile, _('no node'))
1915 except AttributeError:
1915 except AttributeError:
1916 # Fall through to pure code
1916 # Fall through to pure code
1917 pass
1917 pass
1918
1918
1919 if node == wdirid:
1919 if node == wdirid:
1920 for length in range(minlength, 41):
1920 for length in range(minlength, 41):
1921 prefix = hexnode[:length]
1921 prefix = hexnode[:length]
1922 if isvalid(prefix):
1922 if isvalid(prefix):
1923 return prefix
1923 return prefix
1924
1924
1925 for length in range(minlength, 41):
1925 for length in range(minlength, 41):
1926 prefix = hexnode[:length]
1926 prefix = hexnode[:length]
1927 if isvalid(prefix):
1927 if isvalid(prefix):
1928 return disambiguate(hexnode, length)
1928 return disambiguate(hexnode, length)
1929
1929
1930 def cmp(self, node, text):
1930 def cmp(self, node, text):
1931 """compare text with a given file revision
1931 """compare text with a given file revision
1932
1932
1933 returns True if text is different than what is stored.
1933 returns True if text is different than what is stored.
1934 """
1934 """
1935 p1, p2 = self.parents(node)
1935 p1, p2 = self.parents(node)
1936 return hash(text, p1, p2) != node
1936 return hash(text, p1, p2) != node
1937
1937
1938 def _cachesegment(self, offset, data):
1938 def _cachesegment(self, offset, data):
1939 """Add a segment to the revlog cache.
1939 """Add a segment to the revlog cache.
1940
1940
1941 Accepts an absolute offset and the data that is at that location.
1941 Accepts an absolute offset and the data that is at that location.
1942 """
1942 """
1943 o, d = self._chunkcache
1943 o, d = self._chunkcache
1944 # try to add to existing cache
1944 # try to add to existing cache
1945 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1945 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1946 self._chunkcache = o, d + data
1946 self._chunkcache = o, d + data
1947 else:
1947 else:
1948 self._chunkcache = offset, data
1948 self._chunkcache = offset, data
1949
1949
1950 def _readsegment(self, offset, length, df=None):
1950 def _readsegment(self, offset, length, df=None):
1951 """Load a segment of raw data from the revlog.
1951 """Load a segment of raw data from the revlog.
1952
1952
1953 Accepts an absolute offset, length to read, and an optional existing
1953 Accepts an absolute offset, length to read, and an optional existing
1954 file handle to read from.
1954 file handle to read from.
1955
1955
1956 If an existing file handle is passed, it will be seeked and the
1956 If an existing file handle is passed, it will be seeked and the
1957 original seek position will NOT be restored.
1957 original seek position will NOT be restored.
1958
1958
1959 Returns a str or buffer of raw byte data.
1959 Returns a str or buffer of raw byte data.
1960 """
1960 """
1961 # Cache data both forward and backward around the requested
1961 # Cache data both forward and backward around the requested
1962 # data, in a fixed size window. This helps speed up operations
1962 # data, in a fixed size window. This helps speed up operations
1963 # involving reading the revlog backwards.
1963 # involving reading the revlog backwards.
1964 cachesize = self._chunkcachesize
1964 cachesize = self._chunkcachesize
1965 realoffset = offset & ~(cachesize - 1)
1965 realoffset = offset & ~(cachesize - 1)
1966 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1966 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1967 - realoffset)
1967 - realoffset)
1968 with self._datareadfp(df) as df:
1968 with self._datareadfp(df) as df:
1969 df.seek(realoffset)
1969 df.seek(realoffset)
1970 d = df.read(reallength)
1970 d = df.read(reallength)
1971 self._cachesegment(realoffset, d)
1971 self._cachesegment(realoffset, d)
1972 if offset != realoffset or reallength != length:
1972 if offset != realoffset or reallength != length:
1973 return util.buffer(d, offset - realoffset, length)
1973 return util.buffer(d, offset - realoffset, length)
1974 return d
1974 return d
1975
1975
1976 def _getsegment(self, offset, length, df=None):
1976 def _getsegment(self, offset, length, df=None):
1977 """Obtain a segment of raw data from the revlog.
1977 """Obtain a segment of raw data from the revlog.
1978
1978
1979 Accepts an absolute offset, length of bytes to obtain, and an
1979 Accepts an absolute offset, length of bytes to obtain, and an
1980 optional file handle to the already-opened revlog. If the file
1980 optional file handle to the already-opened revlog. If the file
1981 handle is used, it's original seek position will not be preserved.
1981 handle is used, it's original seek position will not be preserved.
1982
1982
1983 Requests for data may be returned from a cache.
1983 Requests for data may be returned from a cache.
1984
1984
1985 Returns a str or a buffer instance of raw byte data.
1985 Returns a str or a buffer instance of raw byte data.
1986 """
1986 """
1987 o, d = self._chunkcache
1987 o, d = self._chunkcache
1988 l = len(d)
1988 l = len(d)
1989
1989
1990 # is it in the cache?
1990 # is it in the cache?
1991 cachestart = offset - o
1991 cachestart = offset - o
1992 cacheend = cachestart + length
1992 cacheend = cachestart + length
1993 if cachestart >= 0 and cacheend <= l:
1993 if cachestart >= 0 and cacheend <= l:
1994 if cachestart == 0 and cacheend == l:
1994 if cachestart == 0 and cacheend == l:
1995 return d # avoid a copy
1995 return d # avoid a copy
1996 return util.buffer(d, cachestart, cacheend - cachestart)
1996 return util.buffer(d, cachestart, cacheend - cachestart)
1997
1997
1998 return self._readsegment(offset, length, df=df)
1998 return self._readsegment(offset, length, df=df)
1999
1999
2000 def _getsegmentforrevs(self, startrev, endrev, df=None):
2000 def _getsegmentforrevs(self, startrev, endrev, df=None):
2001 """Obtain a segment of raw data corresponding to a range of revisions.
2001 """Obtain a segment of raw data corresponding to a range of revisions.
2002
2002
2003 Accepts the start and end revisions and an optional already-open
2003 Accepts the start and end revisions and an optional already-open
2004 file handle to be used for reading. If the file handle is read, its
2004 file handle to be used for reading. If the file handle is read, its
2005 seek position will not be preserved.
2005 seek position will not be preserved.
2006
2006
2007 Requests for data may be satisfied by a cache.
2007 Requests for data may be satisfied by a cache.
2008
2008
2009 Returns a 2-tuple of (offset, data) for the requested range of
2009 Returns a 2-tuple of (offset, data) for the requested range of
2010 revisions. Offset is the integer offset from the beginning of the
2010 revisions. Offset is the integer offset from the beginning of the
2011 revlog and data is a str or buffer of the raw byte data.
2011 revlog and data is a str or buffer of the raw byte data.
2012
2012
2013 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2013 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2014 to determine where each revision's data begins and ends.
2014 to determine where each revision's data begins and ends.
2015 """
2015 """
2016 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2016 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2017 # (functions are expensive).
2017 # (functions are expensive).
2018 index = self.index
2018 index = self.index
2019 istart = index[startrev]
2019 istart = index[startrev]
2020 start = int(istart[0] >> 16)
2020 start = int(istart[0] >> 16)
2021 if startrev == endrev:
2021 if startrev == endrev:
2022 end = start + istart[1]
2022 end = start + istart[1]
2023 else:
2023 else:
2024 iend = index[endrev]
2024 iend = index[endrev]
2025 end = int(iend[0] >> 16) + iend[1]
2025 end = int(iend[0] >> 16) + iend[1]
2026
2026
2027 if self._inline:
2027 if self._inline:
2028 start += (startrev + 1) * self._io.size
2028 start += (startrev + 1) * self._io.size
2029 end += (endrev + 1) * self._io.size
2029 end += (endrev + 1) * self._io.size
2030 length = end - start
2030 length = end - start
2031
2031
2032 return start, self._getsegment(start, length, df=df)
2032 return start, self._getsegment(start, length, df=df)
2033
2033
2034 def _chunk(self, rev, df=None):
2034 def _chunk(self, rev, df=None):
2035 """Obtain a single decompressed chunk for a revision.
2035 """Obtain a single decompressed chunk for a revision.
2036
2036
2037 Accepts an integer revision and an optional already-open file handle
2037 Accepts an integer revision and an optional already-open file handle
2038 to be used for reading. If used, the seek position of the file will not
2038 to be used for reading. If used, the seek position of the file will not
2039 be preserved.
2039 be preserved.
2040
2040
2041 Returns a str holding uncompressed data for the requested revision.
2041 Returns a str holding uncompressed data for the requested revision.
2042 """
2042 """
2043 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2043 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2044
2044
2045 def _chunks(self, revs, df=None, targetsize=None):
2045 def _chunks(self, revs, df=None, targetsize=None):
2046 """Obtain decompressed chunks for the specified revisions.
2046 """Obtain decompressed chunks for the specified revisions.
2047
2047
2048 Accepts an iterable of numeric revisions that are assumed to be in
2048 Accepts an iterable of numeric revisions that are assumed to be in
2049 ascending order. Also accepts an optional already-open file handle
2049 ascending order. Also accepts an optional already-open file handle
2050 to be used for reading. If used, the seek position of the file will
2050 to be used for reading. If used, the seek position of the file will
2051 not be preserved.
2051 not be preserved.
2052
2052
2053 This function is similar to calling ``self._chunk()`` multiple times,
2053 This function is similar to calling ``self._chunk()`` multiple times,
2054 but is faster.
2054 but is faster.
2055
2055
2056 Returns a list with decompressed data for each requested revision.
2056 Returns a list with decompressed data for each requested revision.
2057 """
2057 """
2058 if not revs:
2058 if not revs:
2059 return []
2059 return []
2060 start = self.start
2060 start = self.start
2061 length = self.length
2061 length = self.length
2062 inline = self._inline
2062 inline = self._inline
2063 iosize = self._io.size
2063 iosize = self._io.size
2064 buffer = util.buffer
2064 buffer = util.buffer
2065
2065
2066 l = []
2066 l = []
2067 ladd = l.append
2067 ladd = l.append
2068
2068
2069 if not self._withsparseread:
2069 if not self._withsparseread:
2070 slicedchunks = (revs,)
2070 slicedchunks = (revs,)
2071 else:
2071 else:
2072 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2072 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2073
2073
2074 for revschunk in slicedchunks:
2074 for revschunk in slicedchunks:
2075 firstrev = revschunk[0]
2075 firstrev = revschunk[0]
2076 # Skip trailing revisions with empty diff
2076 # Skip trailing revisions with empty diff
2077 for lastrev in revschunk[::-1]:
2077 for lastrev in revschunk[::-1]:
2078 if length(lastrev) != 0:
2078 if length(lastrev) != 0:
2079 break
2079 break
2080
2080
2081 try:
2081 try:
2082 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2082 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2083 except OverflowError:
2083 except OverflowError:
2084 # issue4215 - we can't cache a run of chunks greater than
2084 # issue4215 - we can't cache a run of chunks greater than
2085 # 2G on Windows
2085 # 2G on Windows
2086 return [self._chunk(rev, df=df) for rev in revschunk]
2086 return [self._chunk(rev, df=df) for rev in revschunk]
2087
2087
2088 decomp = self.decompress
2088 decomp = self.decompress
2089 for rev in revschunk:
2089 for rev in revschunk:
2090 chunkstart = start(rev)
2090 chunkstart = start(rev)
2091 if inline:
2091 if inline:
2092 chunkstart += (rev + 1) * iosize
2092 chunkstart += (rev + 1) * iosize
2093 chunklength = length(rev)
2093 chunklength = length(rev)
2094 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2094 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2095
2095
2096 return l
2096 return l
2097
2097
2098 def _chunkclear(self):
2098 def _chunkclear(self):
2099 """Clear the raw chunk cache."""
2099 """Clear the raw chunk cache."""
2100 self._chunkcache = (0, '')
2100 self._chunkcache = (0, '')
2101
2101
2102 def deltaparent(self, rev):
2102 def deltaparent(self, rev):
2103 """return deltaparent of the given revision"""
2103 """return deltaparent of the given revision"""
2104 base = self.index[rev][3]
2104 base = self.index[rev][3]
2105 if base == rev:
2105 if base == rev:
2106 return nullrev
2106 return nullrev
2107 elif self._generaldelta:
2107 elif self._generaldelta:
2108 return base
2108 return base
2109 else:
2109 else:
2110 return rev - 1
2110 return rev - 1
2111
2111
2112 def issnapshot(self, rev):
2112 def issnapshot(self, rev):
2113 """tells whether rev is a snapshot
2113 """tells whether rev is a snapshot
2114 """
2114 """
2115 if rev == nullrev:
2115 if rev == nullrev:
2116 return True
2116 return True
2117 deltap = self.deltaparent(rev)
2117 deltap = self.deltaparent(rev)
2118 if deltap == nullrev:
2118 if deltap == nullrev:
2119 return True
2119 return True
2120 p1, p2 = self.parentrevs(rev)
2120 p1, p2 = self.parentrevs(rev)
2121 if deltap in (p1, p2):
2121 if deltap in (p1, p2):
2122 return False
2122 return False
2123 return self.issnapshot(deltap)
2123 return self.issnapshot(deltap)
2124
2124
2125 def snapshotdepth(self, rev):
2125 def snapshotdepth(self, rev):
2126 """number of snapshot in the chain before this one"""
2126 """number of snapshot in the chain before this one"""
2127 if not self.issnapshot(rev):
2127 if not self.issnapshot(rev):
2128 raise ProgrammingError('revision %d not a snapshot')
2128 raise ProgrammingError('revision %d not a snapshot')
2129 return len(self._deltachain(rev)[0]) - 1
2129 return len(self._deltachain(rev)[0]) - 1
2130
2130
2131 def revdiff(self, rev1, rev2):
2131 def revdiff(self, rev1, rev2):
2132 """return or calculate a delta between two revisions
2132 """return or calculate a delta between two revisions
2133
2133
2134 The delta calculated is in binary form and is intended to be written to
2134 The delta calculated is in binary form and is intended to be written to
2135 revlog data directly. So this function needs raw revision data.
2135 revlog data directly. So this function needs raw revision data.
2136 """
2136 """
2137 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2137 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2138 return bytes(self._chunk(rev2))
2138 return bytes(self._chunk(rev2))
2139
2139
2140 return mdiff.textdiff(self.revision(rev1, raw=True),
2140 return mdiff.textdiff(self.revision(rev1, raw=True),
2141 self.revision(rev2, raw=True))
2141 self.revision(rev2, raw=True))
2142
2142
2143 def revision(self, nodeorrev, _df=None, raw=False):
2143 def revision(self, nodeorrev, _df=None, raw=False):
2144 """return an uncompressed revision of a given node or revision
2144 """return an uncompressed revision of a given node or revision
2145 number.
2145 number.
2146
2146
2147 _df - an existing file handle to read from. (internal-only)
2147 _df - an existing file handle to read from. (internal-only)
2148 raw - an optional argument specifying if the revision data is to be
2148 raw - an optional argument specifying if the revision data is to be
2149 treated as raw data when applying flag transforms. 'raw' should be set
2149 treated as raw data when applying flag transforms. 'raw' should be set
2150 to True when generating changegroups or in debug commands.
2150 to True when generating changegroups or in debug commands.
2151 """
2151 """
2152 if isinstance(nodeorrev, int):
2152 if isinstance(nodeorrev, int):
2153 rev = nodeorrev
2153 rev = nodeorrev
2154 node = self.node(rev)
2154 node = self.node(rev)
2155 else:
2155 else:
2156 node = nodeorrev
2156 node = nodeorrev
2157 rev = None
2157 rev = None
2158
2158
2159 cachedrev = None
2159 cachedrev = None
2160 flags = None
2160 flags = None
2161 rawtext = None
2161 rawtext = None
2162 if node == nullid:
2162 if node == nullid:
2163 return ""
2163 return ""
2164 if self._cache:
2164 if self._cache:
2165 if self._cache[0] == node:
2165 if self._cache[0] == node:
2166 # _cache only stores rawtext
2166 # _cache only stores rawtext
2167 if raw:
2167 if raw:
2168 return self._cache[2]
2168 return self._cache[2]
2169 # duplicated, but good for perf
2169 # duplicated, but good for perf
2170 if rev is None:
2170 if rev is None:
2171 rev = self.rev(node)
2171 rev = self.rev(node)
2172 if flags is None:
2172 if flags is None:
2173 flags = self.flags(rev)
2173 flags = self.flags(rev)
2174 # no extra flags set, no flag processor runs, text = rawtext
2174 # no extra flags set, no flag processor runs, text = rawtext
2175 if flags == REVIDX_DEFAULT_FLAGS:
2175 if flags == REVIDX_DEFAULT_FLAGS:
2176 return self._cache[2]
2176 return self._cache[2]
2177 # rawtext is reusable. need to run flag processor
2177 # rawtext is reusable. need to run flag processor
2178 rawtext = self._cache[2]
2178 rawtext = self._cache[2]
2179
2179
2180 cachedrev = self._cache[1]
2180 cachedrev = self._cache[1]
2181
2181
2182 # look up what we need to read
2182 # look up what we need to read
2183 if rawtext is None:
2183 if rawtext is None:
2184 if rev is None:
2184 if rev is None:
2185 rev = self.rev(node)
2185 rev = self.rev(node)
2186
2186
2187 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2187 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2188 if stopped:
2188 if stopped:
2189 rawtext = self._cache[2]
2189 rawtext = self._cache[2]
2190
2190
2191 # drop cache to save memory
2191 # drop cache to save memory
2192 self._cache = None
2192 self._cache = None
2193
2193
2194 targetsize = None
2194 targetsize = None
2195 rawsize = self.index[rev][2]
2195 rawsize = self.index[rev][2]
2196 if 0 <= rawsize:
2196 if 0 <= rawsize:
2197 targetsize = 4 * rawsize
2197 targetsize = 4 * rawsize
2198
2198
2199 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2199 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2200 if rawtext is None:
2200 if rawtext is None:
2201 rawtext = bytes(bins[0])
2201 rawtext = bytes(bins[0])
2202 bins = bins[1:]
2202 bins = bins[1:]
2203
2203
2204 rawtext = mdiff.patches(rawtext, bins)
2204 rawtext = mdiff.patches(rawtext, bins)
2205 self._cache = (node, rev, rawtext)
2205 self._cache = (node, rev, rawtext)
2206
2206
2207 if flags is None:
2207 if flags is None:
2208 if rev is None:
2208 if rev is None:
2209 rev = self.rev(node)
2209 rev = self.rev(node)
2210 flags = self.flags(rev)
2210 flags = self.flags(rev)
2211
2211
2212 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2212 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2213 if validatehash:
2213 if validatehash:
2214 self.checkhash(text, node, rev=rev)
2214 self.checkhash(text, node, rev=rev)
2215
2215
2216 return text
2216 return text
2217
2217
2218 def hash(self, text, p1, p2):
2218 def hash(self, text, p1, p2):
2219 """Compute a node hash.
2219 """Compute a node hash.
2220
2220
2221 Available as a function so that subclasses can replace the hash
2221 Available as a function so that subclasses can replace the hash
2222 as needed.
2222 as needed.
2223 """
2223 """
2224 return hash(text, p1, p2)
2224 return hash(text, p1, p2)
2225
2225
2226 def _processflags(self, text, flags, operation, raw=False):
2226 def _processflags(self, text, flags, operation, raw=False):
2227 """Inspect revision data flags and applies transforms defined by
2227 """Inspect revision data flags and applies transforms defined by
2228 registered flag processors.
2228 registered flag processors.
2229
2229
2230 ``text`` - the revision data to process
2230 ``text`` - the revision data to process
2231 ``flags`` - the revision flags
2231 ``flags`` - the revision flags
2232 ``operation`` - the operation being performed (read or write)
2232 ``operation`` - the operation being performed (read or write)
2233 ``raw`` - an optional argument describing if the raw transform should be
2233 ``raw`` - an optional argument describing if the raw transform should be
2234 applied.
2234 applied.
2235
2235
2236 This method processes the flags in the order (or reverse order if
2236 This method processes the flags in the order (or reverse order if
2237 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2237 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2238 flag processors registered for present flags. The order of flags defined
2238 flag processors registered for present flags. The order of flags defined
2239 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2239 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2240
2240
2241 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2241 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2242 processed text and ``validatehash`` is a bool indicating whether the
2242 processed text and ``validatehash`` is a bool indicating whether the
2243 returned text should be checked for hash integrity.
2243 returned text should be checked for hash integrity.
2244
2244
2245 Note: If the ``raw`` argument is set, it has precedence over the
2245 Note: If the ``raw`` argument is set, it has precedence over the
2246 operation and will only update the value of ``validatehash``.
2246 operation and will only update the value of ``validatehash``.
2247 """
2247 """
2248 # fast path: no flag processors will run
2248 # fast path: no flag processors will run
2249 if flags == 0:
2249 if flags == 0:
2250 return text, True
2250 return text, True
2251 if not operation in ('read', 'write'):
2251 if not operation in ('read', 'write'):
2252 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2252 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2253 # Check all flags are known.
2253 # Check all flags are known.
2254 if flags & ~REVIDX_KNOWN_FLAGS:
2254 if flags & ~REVIDX_KNOWN_FLAGS:
2255 raise RevlogError(_("incompatible revision flag '%#x'") %
2255 raise RevlogError(_("incompatible revision flag '%#x'") %
2256 (flags & ~REVIDX_KNOWN_FLAGS))
2256 (flags & ~REVIDX_KNOWN_FLAGS))
2257 validatehash = True
2257 validatehash = True
2258 # Depending on the operation (read or write), the order might be
2258 # Depending on the operation (read or write), the order might be
2259 # reversed due to non-commutative transforms.
2259 # reversed due to non-commutative transforms.
2260 orderedflags = REVIDX_FLAGS_ORDER
2260 orderedflags = REVIDX_FLAGS_ORDER
2261 if operation == 'write':
2261 if operation == 'write':
2262 orderedflags = reversed(orderedflags)
2262 orderedflags = reversed(orderedflags)
2263
2263
2264 for flag in orderedflags:
2264 for flag in orderedflags:
2265 # If a flagprocessor has been registered for a known flag, apply the
2265 # If a flagprocessor has been registered for a known flag, apply the
2266 # related operation transform and update result tuple.
2266 # related operation transform and update result tuple.
2267 if flag & flags:
2267 if flag & flags:
2268 vhash = True
2268 vhash = True
2269
2269
2270 if flag not in _flagprocessors:
2270 if flag not in _flagprocessors:
2271 message = _("missing processor for flag '%#x'") % (flag)
2271 message = _("missing processor for flag '%#x'") % (flag)
2272 raise RevlogError(message)
2272 raise RevlogError(message)
2273
2273
2274 processor = _flagprocessors[flag]
2274 processor = _flagprocessors[flag]
2275 if processor is not None:
2275 if processor is not None:
2276 readtransform, writetransform, rawtransform = processor
2276 readtransform, writetransform, rawtransform = processor
2277
2277
2278 if raw:
2278 if raw:
2279 vhash = rawtransform(self, text)
2279 vhash = rawtransform(self, text)
2280 elif operation == 'read':
2280 elif operation == 'read':
2281 text, vhash = readtransform(self, text)
2281 text, vhash = readtransform(self, text)
2282 else: # write operation
2282 else: # write operation
2283 text, vhash = writetransform(self, text)
2283 text, vhash = writetransform(self, text)
2284 validatehash = validatehash and vhash
2284 validatehash = validatehash and vhash
2285
2285
2286 return text, validatehash
2286 return text, validatehash
2287
2287
2288 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2288 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2289 """Check node hash integrity.
2289 """Check node hash integrity.
2290
2290
2291 Available as a function so that subclasses can extend hash mismatch
2291 Available as a function so that subclasses can extend hash mismatch
2292 behaviors as needed.
2292 behaviors as needed.
2293 """
2293 """
2294 try:
2294 try:
2295 if p1 is None and p2 is None:
2295 if p1 is None and p2 is None:
2296 p1, p2 = self.parents(node)
2296 p1, p2 = self.parents(node)
2297 if node != self.hash(text, p1, p2):
2297 if node != self.hash(text, p1, p2):
2298 revornode = rev
2298 revornode = rev
2299 if revornode is None:
2299 if revornode is None:
2300 revornode = templatefilters.short(hex(node))
2300 revornode = templatefilters.short(hex(node))
2301 raise RevlogError(_("integrity check failed on %s:%s")
2301 raise RevlogError(_("integrity check failed on %s:%s")
2302 % (self.indexfile, pycompat.bytestr(revornode)))
2302 % (self.indexfile, pycompat.bytestr(revornode)))
2303 except RevlogError:
2303 except RevlogError:
2304 if self._censorable and _censoredtext(text):
2304 if self._censorable and _censoredtext(text):
2305 raise error.CensoredNodeError(self.indexfile, node, text)
2305 raise error.CensoredNodeError(self.indexfile, node, text)
2306 raise
2306 raise
2307
2307
2308 def _enforceinlinesize(self, tr, fp=None):
2308 def _enforceinlinesize(self, tr, fp=None):
2309 """Check if the revlog is too big for inline and convert if so.
2309 """Check if the revlog is too big for inline and convert if so.
2310
2310
2311 This should be called after revisions are added to the revlog. If the
2311 This should be called after revisions are added to the revlog. If the
2312 revlog has grown too large to be an inline revlog, it will convert it
2312 revlog has grown too large to be an inline revlog, it will convert it
2313 to use multiple index and data files.
2313 to use multiple index and data files.
2314 """
2314 """
2315 tiprev = len(self) - 1
2315 tiprev = len(self) - 1
2316 if (not self._inline or
2316 if (not self._inline or
2317 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2317 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2318 return
2318 return
2319
2319
2320 trinfo = tr.find(self.indexfile)
2320 trinfo = tr.find(self.indexfile)
2321 if trinfo is None:
2321 if trinfo is None:
2322 raise RevlogError(_("%s not found in the transaction")
2322 raise RevlogError(_("%s not found in the transaction")
2323 % self.indexfile)
2323 % self.indexfile)
2324
2324
2325 trindex = trinfo[2]
2325 trindex = trinfo[2]
2326 if trindex is not None:
2326 if trindex is not None:
2327 dataoff = self.start(trindex)
2327 dataoff = self.start(trindex)
2328 else:
2328 else:
2329 # revlog was stripped at start of transaction, use all leftover data
2329 # revlog was stripped at start of transaction, use all leftover data
2330 trindex = len(self) - 1
2330 trindex = len(self) - 1
2331 dataoff = self.end(tiprev)
2331 dataoff = self.end(tiprev)
2332
2332
2333 tr.add(self.datafile, dataoff)
2333 tr.add(self.datafile, dataoff)
2334
2334
2335 if fp:
2335 if fp:
2336 fp.flush()
2336 fp.flush()
2337 fp.close()
2337 fp.close()
2338
2338
2339 with self._datafp('w') as df:
2339 with self._datafp('w') as df:
2340 for r in self:
2340 for r in self:
2341 df.write(self._getsegmentforrevs(r, r)[1])
2341 df.write(self._getsegmentforrevs(r, r)[1])
2342
2342
2343 with self._indexfp('w') as fp:
2343 with self._indexfp('w') as fp:
2344 self.version &= ~FLAG_INLINE_DATA
2344 self.version &= ~FLAG_INLINE_DATA
2345 self._inline = False
2345 self._inline = False
2346 io = self._io
2346 io = self._io
2347 for i in self:
2347 for i in self:
2348 e = io.packentry(self.index[i], self.node, self.version, i)
2348 e = io.packentry(self.index[i], self.node, self.version, i)
2349 fp.write(e)
2349 fp.write(e)
2350
2350
2351 # the temp file replace the real index when we exit the context
2351 # the temp file replace the real index when we exit the context
2352 # manager
2352 # manager
2353
2353
2354 tr.replace(self.indexfile, trindex * self._io.size)
2354 tr.replace(self.indexfile, trindex * self._io.size)
2355 self._chunkclear()
2355 self._chunkclear()
2356
2356
2357 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2357 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2358 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2358 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2359 """add a revision to the log
2359 """add a revision to the log
2360
2360
2361 text - the revision data to add
2361 text - the revision data to add
2362 transaction - the transaction object used for rollback
2362 transaction - the transaction object used for rollback
2363 link - the linkrev data to add
2363 link - the linkrev data to add
2364 p1, p2 - the parent nodeids of the revision
2364 p1, p2 - the parent nodeids of the revision
2365 cachedelta - an optional precomputed delta
2365 cachedelta - an optional precomputed delta
2366 node - nodeid of revision; typically node is not specified, and it is
2366 node - nodeid of revision; typically node is not specified, and it is
2367 computed by default as hash(text, p1, p2), however subclasses might
2367 computed by default as hash(text, p1, p2), however subclasses might
2368 use different hashing method (and override checkhash() in such case)
2368 use different hashing method (and override checkhash() in such case)
2369 flags - the known flags to set on the revision
2369 flags - the known flags to set on the revision
2370 deltacomputer - an optional _deltacomputer instance shared between
2370 deltacomputer - an optional _deltacomputer instance shared between
2371 multiple calls
2371 multiple calls
2372 """
2372 """
2373 if link == nullrev:
2373 if link == nullrev:
2374 raise RevlogError(_("attempted to add linkrev -1 to %s")
2374 raise RevlogError(_("attempted to add linkrev -1 to %s")
2375 % self.indexfile)
2375 % self.indexfile)
2376
2376
2377 if flags:
2377 if flags:
2378 node = node or self.hash(text, p1, p2)
2378 node = node or self.hash(text, p1, p2)
2379
2379
2380 rawtext, validatehash = self._processflags(text, flags, 'write')
2380 rawtext, validatehash = self._processflags(text, flags, 'write')
2381
2381
2382 # If the flag processor modifies the revision data, ignore any provided
2382 # If the flag processor modifies the revision data, ignore any provided
2383 # cachedelta.
2383 # cachedelta.
2384 if rawtext != text:
2384 if rawtext != text:
2385 cachedelta = None
2385 cachedelta = None
2386
2386
2387 if len(rawtext) > _maxentrysize:
2387 if len(rawtext) > _maxentrysize:
2388 raise RevlogError(
2388 raise RevlogError(
2389 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2389 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2390 % (self.indexfile, len(rawtext)))
2390 % (self.indexfile, len(rawtext)))
2391
2391
2392 node = node or self.hash(rawtext, p1, p2)
2392 node = node or self.hash(rawtext, p1, p2)
2393 if node in self.nodemap:
2393 if node in self.nodemap:
2394 return node
2394 return node
2395
2395
2396 if validatehash:
2396 if validatehash:
2397 self.checkhash(rawtext, node, p1=p1, p2=p2)
2397 self.checkhash(rawtext, node, p1=p1, p2=p2)
2398
2398
2399 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2399 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2400 flags, cachedelta=cachedelta,
2400 flags, cachedelta=cachedelta,
2401 deltacomputer=deltacomputer)
2401 deltacomputer=deltacomputer)
2402
2402
2403 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2403 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2404 cachedelta=None, deltacomputer=None):
2404 cachedelta=None, deltacomputer=None):
2405 """add a raw revision with known flags, node and parents
2405 """add a raw revision with known flags, node and parents
2406 useful when reusing a revision not stored in this revlog (ex: received
2406 useful when reusing a revision not stored in this revlog (ex: received
2407 over wire, or read from an external bundle).
2407 over wire, or read from an external bundle).
2408 """
2408 """
2409 dfh = None
2409 dfh = None
2410 if not self._inline:
2410 if not self._inline:
2411 dfh = self._datafp("a+")
2411 dfh = self._datafp("a+")
2412 ifh = self._indexfp("a+")
2412 ifh = self._indexfp("a+")
2413 try:
2413 try:
2414 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2414 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2415 flags, cachedelta, ifh, dfh,
2415 flags, cachedelta, ifh, dfh,
2416 deltacomputer=deltacomputer)
2416 deltacomputer=deltacomputer)
2417 finally:
2417 finally:
2418 if dfh:
2418 if dfh:
2419 dfh.close()
2419 dfh.close()
2420 ifh.close()
2420 ifh.close()
2421
2421
2422 def compress(self, data):
2422 def compress(self, data):
2423 """Generate a possibly-compressed representation of data."""
2423 """Generate a possibly-compressed representation of data."""
2424 if not data:
2424 if not data:
2425 return '', data
2425 return '', data
2426
2426
2427 compressed = self._compressor.compress(data)
2427 compressed = self._compressor.compress(data)
2428
2428
2429 if compressed:
2429 if compressed:
2430 # The revlog compressor added the header in the returned data.
2430 # The revlog compressor added the header in the returned data.
2431 return '', compressed
2431 return '', compressed
2432
2432
2433 if data[0:1] == '\0':
2433 if data[0:1] == '\0':
2434 return '', data
2434 return '', data
2435 return 'u', data
2435 return 'u', data
2436
2436
2437 def decompress(self, data):
2437 def decompress(self, data):
2438 """Decompress a revlog chunk.
2438 """Decompress a revlog chunk.
2439
2439
2440 The chunk is expected to begin with a header identifying the
2440 The chunk is expected to begin with a header identifying the
2441 format type so it can be routed to an appropriate decompressor.
2441 format type so it can be routed to an appropriate decompressor.
2442 """
2442 """
2443 if not data:
2443 if not data:
2444 return data
2444 return data
2445
2445
2446 # Revlogs are read much more frequently than they are written and many
2446 # Revlogs are read much more frequently than they are written and many
2447 # chunks only take microseconds to decompress, so performance is
2447 # chunks only take microseconds to decompress, so performance is
2448 # important here.
2448 # important here.
2449 #
2449 #
2450 # We can make a few assumptions about revlogs:
2450 # We can make a few assumptions about revlogs:
2451 #
2451 #
2452 # 1) the majority of chunks will be compressed (as opposed to inline
2452 # 1) the majority of chunks will be compressed (as opposed to inline
2453 # raw data).
2453 # raw data).
2454 # 2) decompressing *any* data will likely by at least 10x slower than
2454 # 2) decompressing *any* data will likely by at least 10x slower than
2455 # returning raw inline data.
2455 # returning raw inline data.
2456 # 3) we want to prioritize common and officially supported compression
2456 # 3) we want to prioritize common and officially supported compression
2457 # engines
2457 # engines
2458 #
2458 #
2459 # It follows that we want to optimize for "decompress compressed data
2459 # It follows that we want to optimize for "decompress compressed data
2460 # when encoded with common and officially supported compression engines"
2460 # when encoded with common and officially supported compression engines"
2461 # case over "raw data" and "data encoded by less common or non-official
2461 # case over "raw data" and "data encoded by less common or non-official
2462 # compression engines." That is why we have the inline lookup first
2462 # compression engines." That is why we have the inline lookup first
2463 # followed by the compengines lookup.
2463 # followed by the compengines lookup.
2464 #
2464 #
2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2466 # compressed chunks. And this matters for changelog and manifest reads.
2466 # compressed chunks. And this matters for changelog and manifest reads.
2467 t = data[0:1]
2467 t = data[0:1]
2468
2468
2469 if t == 'x':
2469 if t == 'x':
2470 try:
2470 try:
2471 return _zlibdecompress(data)
2471 return _zlibdecompress(data)
2472 except zlib.error as e:
2472 except zlib.error as e:
2473 raise RevlogError(_('revlog decompress error: %s') %
2473 raise RevlogError(_('revlog decompress error: %s') %
2474 stringutil.forcebytestr(e))
2474 stringutil.forcebytestr(e))
2475 # '\0' is more common than 'u' so it goes first.
2475 # '\0' is more common than 'u' so it goes first.
2476 elif t == '\0':
2476 elif t == '\0':
2477 return data
2477 return data
2478 elif t == 'u':
2478 elif t == 'u':
2479 return util.buffer(data, 1)
2479 return util.buffer(data, 1)
2480
2480
2481 try:
2481 try:
2482 compressor = self._decompressors[t]
2482 compressor = self._decompressors[t]
2483 except KeyError:
2483 except KeyError:
2484 try:
2484 try:
2485 engine = util.compengines.forrevlogheader(t)
2485 engine = util.compengines.forrevlogheader(t)
2486 compressor = engine.revlogcompressor()
2486 compressor = engine.revlogcompressor()
2487 self._decompressors[t] = compressor
2487 self._decompressors[t] = compressor
2488 except KeyError:
2488 except KeyError:
2489 raise RevlogError(_('unknown compression type %r') % t)
2489 raise RevlogError(_('unknown compression type %r') % t)
2490
2490
2491 return compressor.decompress(data)
2491 return compressor.decompress(data)
2492
2492
2493 def _isgooddeltainfo(self, deltainfo, revinfo):
2493 def _isgooddeltainfo(self, deltainfo, revinfo):
2494 """Returns True if the given delta is good. Good means that it is within
2494 """Returns True if the given delta is good. Good means that it is within
2495 the disk span, disk size, and chain length bounds that we know to be
2495 the disk span, disk size, and chain length bounds that we know to be
2496 performant."""
2496 performant."""
2497 if deltainfo is None:
2497 if deltainfo is None:
2498 return False
2498 return False
2499
2499
2500 # - 'deltainfo.distance' is the distance from the base revision --
2500 # - 'deltainfo.distance' is the distance from the base revision --
2501 # bounding it limits the amount of I/O we need to do.
2501 # bounding it limits the amount of I/O we need to do.
2502 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2502 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2503 # deltas we need to apply -- bounding it limits the amount of CPU
2503 # deltas we need to apply -- bounding it limits the amount of CPU
2504 # we consume.
2504 # we consume.
2505
2505
2506 if self._sparserevlog:
2506 if self._sparserevlog:
2507 # As sparse-read will be used, we can consider that the distance,
2507 # As sparse-read will be used, we can consider that the distance,
2508 # instead of being the span of the whole chunk,
2508 # instead of being the span of the whole chunk,
2509 # is the span of the largest read chunk
2509 # is the span of the largest read chunk
2510 base = deltainfo.base
2510 base = deltainfo.base
2511
2511
2512 if base != nullrev:
2512 if base != nullrev:
2513 deltachain = self._deltachain(base)[0]
2513 deltachain = self._deltachain(base)[0]
2514 else:
2514 else:
2515 deltachain = []
2515 deltachain = []
2516
2516
2517 # search for the first non-snapshot revision
2518 for idx, r in enumerate(deltachain):
2519 if not self.issnapshot(r):
2520 break
2521 deltachain = deltachain[idx:]
2517 chunks = _slicechunk(self, deltachain, deltainfo)
2522 chunks = _slicechunk(self, deltachain, deltainfo)
2518 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2523 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2519 distance = max(all_span)
2524 distance = max(all_span)
2520 else:
2525 else:
2521 distance = deltainfo.distance
2526 distance = deltainfo.distance
2522
2527
2523 textlen = revinfo.textlen
2528 textlen = revinfo.textlen
2524 defaultmax = textlen * 4
2529 defaultmax = textlen * 4
2525 maxdist = self._maxdeltachainspan
2530 maxdist = self._maxdeltachainspan
2526 if not maxdist:
2531 if not maxdist:
2527 maxdist = distance # ensure the conditional pass
2532 maxdist = distance # ensure the conditional pass
2528 maxdist = max(maxdist, defaultmax)
2533 maxdist = max(maxdist, defaultmax)
2529 if self._sparserevlog and maxdist < self._srmingapsize:
2534 if self._sparserevlog and maxdist < self._srmingapsize:
2530 # In multiple place, we are ignoring irrelevant data range below a
2535 # In multiple place, we are ignoring irrelevant data range below a
2531 # certain size. Be also apply this tradeoff here and relax span
2536 # certain size. Be also apply this tradeoff here and relax span
2532 # constraint for small enought content.
2537 # constraint for small enought content.
2533 maxdist = self._srmingapsize
2538 maxdist = self._srmingapsize
2534
2539
2535 # Bad delta from read span:
2540 # Bad delta from read span:
2536 #
2541 #
2537 # If the span of data read is larger than the maximum allowed.
2542 # If the span of data read is larger than the maximum allowed.
2538 if maxdist < distance:
2543 if maxdist < distance:
2539 return False
2544 return False
2540
2545
2541 # Bad delta from new delta size:
2546 # Bad delta from new delta size:
2542 #
2547 #
2543 # If the delta size is larger than the target text, storing the
2548 # If the delta size is larger than the target text, storing the
2544 # delta will be inefficient.
2549 # delta will be inefficient.
2545 if textlen < deltainfo.deltalen:
2550 if textlen < deltainfo.deltalen:
2546 return False
2551 return False
2547
2552
2548 # Bad delta from cumulated payload size:
2553 # Bad delta from cumulated payload size:
2549 #
2554 #
2550 # If the sum of delta get larger than K * target text length.
2555 # If the sum of delta get larger than K * target text length.
2551 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2556 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2552 return False
2557 return False
2553
2558
2554 # Bad delta from chain length:
2559 # Bad delta from chain length:
2555 #
2560 #
2556 # If the number of delta in the chain gets too high.
2561 # If the number of delta in the chain gets too high.
2557 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2562 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2558 return False
2563 return False
2559
2564
2560 # bad delta from intermediate snapshot size limit
2565 # bad delta from intermediate snapshot size limit
2561 #
2566 #
2562 # If an intermediate snapshot size is higher than the limit. The
2567 # If an intermediate snapshot size is higher than the limit. The
2563 # limit exist to prevent endless chain of intermediate delta to be
2568 # limit exist to prevent endless chain of intermediate delta to be
2564 # created.
2569 # created.
2565 if (deltainfo.snapshotdepth is not None and
2570 if (deltainfo.snapshotdepth is not None and
2566 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2571 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2567 return False
2572 return False
2568
2573
2569 # bad delta if new intermediate snapshot is larger than the previous
2574 # bad delta if new intermediate snapshot is larger than the previous
2570 # snapshot
2575 # snapshot
2571 if (deltainfo.snapshotdepth
2576 if (deltainfo.snapshotdepth
2572 and self.length(deltainfo.base) < deltainfo.deltalen):
2577 and self.length(deltainfo.base) < deltainfo.deltalen):
2573 return False
2578 return False
2574
2579
2575 return True
2580 return True
2576
2581
2577 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2582 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2578 cachedelta, ifh, dfh, alwayscache=False,
2583 cachedelta, ifh, dfh, alwayscache=False,
2579 deltacomputer=None):
2584 deltacomputer=None):
2580 """internal function to add revisions to the log
2585 """internal function to add revisions to the log
2581
2586
2582 see addrevision for argument descriptions.
2587 see addrevision for argument descriptions.
2583
2588
2584 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2589 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2585
2590
2586 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2591 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2587 be used.
2592 be used.
2588
2593
2589 invariants:
2594 invariants:
2590 - rawtext is optional (can be None); if not set, cachedelta must be set.
2595 - rawtext is optional (can be None); if not set, cachedelta must be set.
2591 if both are set, they must correspond to each other.
2596 if both are set, they must correspond to each other.
2592 """
2597 """
2593 if node == nullid:
2598 if node == nullid:
2594 raise RevlogError(_("%s: attempt to add null revision") %
2599 raise RevlogError(_("%s: attempt to add null revision") %
2595 (self.indexfile))
2600 (self.indexfile))
2596 if node == wdirid or node in wdirfilenodeids:
2601 if node == wdirid or node in wdirfilenodeids:
2597 raise RevlogError(_("%s: attempt to add wdir revision") %
2602 raise RevlogError(_("%s: attempt to add wdir revision") %
2598 (self.indexfile))
2603 (self.indexfile))
2599
2604
2600 if self._inline:
2605 if self._inline:
2601 fh = ifh
2606 fh = ifh
2602 else:
2607 else:
2603 fh = dfh
2608 fh = dfh
2604
2609
2605 btext = [rawtext]
2610 btext = [rawtext]
2606
2611
2607 curr = len(self)
2612 curr = len(self)
2608 prev = curr - 1
2613 prev = curr - 1
2609 offset = self.end(prev)
2614 offset = self.end(prev)
2610 p1r, p2r = self.rev(p1), self.rev(p2)
2615 p1r, p2r = self.rev(p1), self.rev(p2)
2611
2616
2612 # full versions are inserted when the needed deltas
2617 # full versions are inserted when the needed deltas
2613 # become comparable to the uncompressed text
2618 # become comparable to the uncompressed text
2614 if rawtext is None:
2619 if rawtext is None:
2615 # need rawtext size, before changed by flag processors, which is
2620 # need rawtext size, before changed by flag processors, which is
2616 # the non-raw size. use revlog explicitly to avoid filelog's extra
2621 # the non-raw size. use revlog explicitly to avoid filelog's extra
2617 # logic that might remove metadata size.
2622 # logic that might remove metadata size.
2618 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2623 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2619 cachedelta[1])
2624 cachedelta[1])
2620 else:
2625 else:
2621 textlen = len(rawtext)
2626 textlen = len(rawtext)
2622
2627
2623 if deltacomputer is None:
2628 if deltacomputer is None:
2624 deltacomputer = _deltacomputer(self)
2629 deltacomputer = _deltacomputer(self)
2625
2630
2626 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2631 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2627
2632
2628 # no delta for flag processor revision (see "candelta" for why)
2633 # no delta for flag processor revision (see "candelta" for why)
2629 # not calling candelta since only one revision needs test, also to
2634 # not calling candelta since only one revision needs test, also to
2630 # avoid overhead fetching flags again.
2635 # avoid overhead fetching flags again.
2631 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2636 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2632 deltainfo = None
2637 deltainfo = None
2633 else:
2638 else:
2634 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2639 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2635
2640
2636 if deltainfo is not None:
2641 if deltainfo is not None:
2637 base = deltainfo.base
2642 base = deltainfo.base
2638 chainbase = deltainfo.chainbase
2643 chainbase = deltainfo.chainbase
2639 data = deltainfo.data
2644 data = deltainfo.data
2640 l = deltainfo.deltalen
2645 l = deltainfo.deltalen
2641 else:
2646 else:
2642 rawtext = deltacomputer.buildtext(revinfo, fh)
2647 rawtext = deltacomputer.buildtext(revinfo, fh)
2643 data = self.compress(rawtext)
2648 data = self.compress(rawtext)
2644 l = len(data[1]) + len(data[0])
2649 l = len(data[1]) + len(data[0])
2645 base = chainbase = curr
2650 base = chainbase = curr
2646
2651
2647 e = (offset_type(offset, flags), l, textlen,
2652 e = (offset_type(offset, flags), l, textlen,
2648 base, link, p1r, p2r, node)
2653 base, link, p1r, p2r, node)
2649 self.index.append(e)
2654 self.index.append(e)
2650 self.nodemap[node] = curr
2655 self.nodemap[node] = curr
2651
2656
2652 entry = self._io.packentry(e, self.node, self.version, curr)
2657 entry = self._io.packentry(e, self.node, self.version, curr)
2653 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2658 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2654
2659
2655 if alwayscache and rawtext is None:
2660 if alwayscache and rawtext is None:
2656 rawtext = deltacomputer._buildtext(revinfo, fh)
2661 rawtext = deltacomputer._buildtext(revinfo, fh)
2657
2662
2658 if type(rawtext) == bytes: # only accept immutable objects
2663 if type(rawtext) == bytes: # only accept immutable objects
2659 self._cache = (node, curr, rawtext)
2664 self._cache = (node, curr, rawtext)
2660 self._chainbasecache[curr] = chainbase
2665 self._chainbasecache[curr] = chainbase
2661 return node
2666 return node
2662
2667
2663 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2668 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2664 # Files opened in a+ mode have inconsistent behavior on various
2669 # Files opened in a+ mode have inconsistent behavior on various
2665 # platforms. Windows requires that a file positioning call be made
2670 # platforms. Windows requires that a file positioning call be made
2666 # when the file handle transitions between reads and writes. See
2671 # when the file handle transitions between reads and writes. See
2667 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2672 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2668 # platforms, Python or the platform itself can be buggy. Some versions
2673 # platforms, Python or the platform itself can be buggy. Some versions
2669 # of Solaris have been observed to not append at the end of the file
2674 # of Solaris have been observed to not append at the end of the file
2670 # if the file was seeked to before the end. See issue4943 for more.
2675 # if the file was seeked to before the end. See issue4943 for more.
2671 #
2676 #
2672 # We work around this issue by inserting a seek() before writing.
2677 # We work around this issue by inserting a seek() before writing.
2673 # Note: This is likely not necessary on Python 3.
2678 # Note: This is likely not necessary on Python 3.
2674 ifh.seek(0, os.SEEK_END)
2679 ifh.seek(0, os.SEEK_END)
2675 if dfh:
2680 if dfh:
2676 dfh.seek(0, os.SEEK_END)
2681 dfh.seek(0, os.SEEK_END)
2677
2682
2678 curr = len(self) - 1
2683 curr = len(self) - 1
2679 if not self._inline:
2684 if not self._inline:
2680 transaction.add(self.datafile, offset)
2685 transaction.add(self.datafile, offset)
2681 transaction.add(self.indexfile, curr * len(entry))
2686 transaction.add(self.indexfile, curr * len(entry))
2682 if data[0]:
2687 if data[0]:
2683 dfh.write(data[0])
2688 dfh.write(data[0])
2684 dfh.write(data[1])
2689 dfh.write(data[1])
2685 ifh.write(entry)
2690 ifh.write(entry)
2686 else:
2691 else:
2687 offset += curr * self._io.size
2692 offset += curr * self._io.size
2688 transaction.add(self.indexfile, offset, curr)
2693 transaction.add(self.indexfile, offset, curr)
2689 ifh.write(entry)
2694 ifh.write(entry)
2690 ifh.write(data[0])
2695 ifh.write(data[0])
2691 ifh.write(data[1])
2696 ifh.write(data[1])
2692 self._enforceinlinesize(transaction, ifh)
2697 self._enforceinlinesize(transaction, ifh)
2693
2698
2694 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2699 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2695 """
2700 """
2696 add a delta group
2701 add a delta group
2697
2702
2698 given a set of deltas, add them to the revision log. the
2703 given a set of deltas, add them to the revision log. the
2699 first delta is against its parent, which should be in our
2704 first delta is against its parent, which should be in our
2700 log, the rest are against the previous delta.
2705 log, the rest are against the previous delta.
2701
2706
2702 If ``addrevisioncb`` is defined, it will be called with arguments of
2707 If ``addrevisioncb`` is defined, it will be called with arguments of
2703 this revlog and the node that was added.
2708 this revlog and the node that was added.
2704 """
2709 """
2705
2710
2706 nodes = []
2711 nodes = []
2707
2712
2708 r = len(self)
2713 r = len(self)
2709 end = 0
2714 end = 0
2710 if r:
2715 if r:
2711 end = self.end(r - 1)
2716 end = self.end(r - 1)
2712 ifh = self._indexfp("a+")
2717 ifh = self._indexfp("a+")
2713 isize = r * self._io.size
2718 isize = r * self._io.size
2714 if self._inline:
2719 if self._inline:
2715 transaction.add(self.indexfile, end + isize, r)
2720 transaction.add(self.indexfile, end + isize, r)
2716 dfh = None
2721 dfh = None
2717 else:
2722 else:
2718 transaction.add(self.indexfile, isize, r)
2723 transaction.add(self.indexfile, isize, r)
2719 transaction.add(self.datafile, end)
2724 transaction.add(self.datafile, end)
2720 dfh = self._datafp("a+")
2725 dfh = self._datafp("a+")
2721 def flush():
2726 def flush():
2722 if dfh:
2727 if dfh:
2723 dfh.flush()
2728 dfh.flush()
2724 ifh.flush()
2729 ifh.flush()
2725 try:
2730 try:
2726 deltacomputer = _deltacomputer(self)
2731 deltacomputer = _deltacomputer(self)
2727 # loop through our set of deltas
2732 # loop through our set of deltas
2728 for data in deltas:
2733 for data in deltas:
2729 node, p1, p2, linknode, deltabase, delta, flags = data
2734 node, p1, p2, linknode, deltabase, delta, flags = data
2730 link = linkmapper(linknode)
2735 link = linkmapper(linknode)
2731 flags = flags or REVIDX_DEFAULT_FLAGS
2736 flags = flags or REVIDX_DEFAULT_FLAGS
2732
2737
2733 nodes.append(node)
2738 nodes.append(node)
2734
2739
2735 if node in self.nodemap:
2740 if node in self.nodemap:
2736 # this can happen if two branches make the same change
2741 # this can happen if two branches make the same change
2737 continue
2742 continue
2738
2743
2739 for p in (p1, p2):
2744 for p in (p1, p2):
2740 if p not in self.nodemap:
2745 if p not in self.nodemap:
2741 raise LookupError(p, self.indexfile,
2746 raise LookupError(p, self.indexfile,
2742 _('unknown parent'))
2747 _('unknown parent'))
2743
2748
2744 if deltabase not in self.nodemap:
2749 if deltabase not in self.nodemap:
2745 raise LookupError(deltabase, self.indexfile,
2750 raise LookupError(deltabase, self.indexfile,
2746 _('unknown delta base'))
2751 _('unknown delta base'))
2747
2752
2748 baserev = self.rev(deltabase)
2753 baserev = self.rev(deltabase)
2749
2754
2750 if baserev != nullrev and self.iscensored(baserev):
2755 if baserev != nullrev and self.iscensored(baserev):
2751 # if base is censored, delta must be full replacement in a
2756 # if base is censored, delta must be full replacement in a
2752 # single patch operation
2757 # single patch operation
2753 hlen = struct.calcsize(">lll")
2758 hlen = struct.calcsize(">lll")
2754 oldlen = self.rawsize(baserev)
2759 oldlen = self.rawsize(baserev)
2755 newlen = len(delta) - hlen
2760 newlen = len(delta) - hlen
2756 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2761 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2757 raise error.CensoredBaseError(self.indexfile,
2762 raise error.CensoredBaseError(self.indexfile,
2758 self.node(baserev))
2763 self.node(baserev))
2759
2764
2760 if not flags and self._peek_iscensored(baserev, delta, flush):
2765 if not flags and self._peek_iscensored(baserev, delta, flush):
2761 flags |= REVIDX_ISCENSORED
2766 flags |= REVIDX_ISCENSORED
2762
2767
2763 # We assume consumers of addrevisioncb will want to retrieve
2768 # We assume consumers of addrevisioncb will want to retrieve
2764 # the added revision, which will require a call to
2769 # the added revision, which will require a call to
2765 # revision(). revision() will fast path if there is a cache
2770 # revision(). revision() will fast path if there is a cache
2766 # hit. So, we tell _addrevision() to always cache in this case.
2771 # hit. So, we tell _addrevision() to always cache in this case.
2767 # We're only using addgroup() in the context of changegroup
2772 # We're only using addgroup() in the context of changegroup
2768 # generation so the revision data can always be handled as raw
2773 # generation so the revision data can always be handled as raw
2769 # by the flagprocessor.
2774 # by the flagprocessor.
2770 self._addrevision(node, None, transaction, link,
2775 self._addrevision(node, None, transaction, link,
2771 p1, p2, flags, (baserev, delta),
2776 p1, p2, flags, (baserev, delta),
2772 ifh, dfh,
2777 ifh, dfh,
2773 alwayscache=bool(addrevisioncb),
2778 alwayscache=bool(addrevisioncb),
2774 deltacomputer=deltacomputer)
2779 deltacomputer=deltacomputer)
2775
2780
2776 if addrevisioncb:
2781 if addrevisioncb:
2777 addrevisioncb(self, node)
2782 addrevisioncb(self, node)
2778
2783
2779 if not dfh and not self._inline:
2784 if not dfh and not self._inline:
2780 # addrevision switched from inline to conventional
2785 # addrevision switched from inline to conventional
2781 # reopen the index
2786 # reopen the index
2782 ifh.close()
2787 ifh.close()
2783 dfh = self._datafp("a+")
2788 dfh = self._datafp("a+")
2784 ifh = self._indexfp("a+")
2789 ifh = self._indexfp("a+")
2785 finally:
2790 finally:
2786 if dfh:
2791 if dfh:
2787 dfh.close()
2792 dfh.close()
2788 ifh.close()
2793 ifh.close()
2789
2794
2790 return nodes
2795 return nodes
2791
2796
2792 def iscensored(self, rev):
2797 def iscensored(self, rev):
2793 """Check if a file revision is censored."""
2798 """Check if a file revision is censored."""
2794 if not self._censorable:
2799 if not self._censorable:
2795 return False
2800 return False
2796
2801
2797 return self.flags(rev) & REVIDX_ISCENSORED
2802 return self.flags(rev) & REVIDX_ISCENSORED
2798
2803
2799 def _peek_iscensored(self, baserev, delta, flush):
2804 def _peek_iscensored(self, baserev, delta, flush):
2800 """Quickly check if a delta produces a censored revision."""
2805 """Quickly check if a delta produces a censored revision."""
2801 if not self._censorable:
2806 if not self._censorable:
2802 return False
2807 return False
2803
2808
2804 # Fragile heuristic: unless new file meta keys are added alphabetically
2809 # Fragile heuristic: unless new file meta keys are added alphabetically
2805 # preceding "censored", all censored revisions are prefixed by
2810 # preceding "censored", all censored revisions are prefixed by
2806 # "\1\ncensored:". A delta producing such a censored revision must be a
2811 # "\1\ncensored:". A delta producing such a censored revision must be a
2807 # full-replacement delta, so we inspect the first and only patch in the
2812 # full-replacement delta, so we inspect the first and only patch in the
2808 # delta for this prefix.
2813 # delta for this prefix.
2809 hlen = struct.calcsize(">lll")
2814 hlen = struct.calcsize(">lll")
2810 if len(delta) <= hlen:
2815 if len(delta) <= hlen:
2811 return False
2816 return False
2812
2817
2813 oldlen = self.rawsize(baserev)
2818 oldlen = self.rawsize(baserev)
2814 newlen = len(delta) - hlen
2819 newlen = len(delta) - hlen
2815 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2820 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2816 return False
2821 return False
2817
2822
2818 add = "\1\ncensored:"
2823 add = "\1\ncensored:"
2819 addlen = len(add)
2824 addlen = len(add)
2820 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2825 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2821
2826
2822 def getstrippoint(self, minlink):
2827 def getstrippoint(self, minlink):
2823 """find the minimum rev that must be stripped to strip the linkrev
2828 """find the minimum rev that must be stripped to strip the linkrev
2824
2829
2825 Returns a tuple containing the minimum rev and a set of all revs that
2830 Returns a tuple containing the minimum rev and a set of all revs that
2826 have linkrevs that will be broken by this strip.
2831 have linkrevs that will be broken by this strip.
2827 """
2832 """
2828 brokenrevs = set()
2833 brokenrevs = set()
2829 strippoint = len(self)
2834 strippoint = len(self)
2830
2835
2831 heads = {}
2836 heads = {}
2832 futurelargelinkrevs = set()
2837 futurelargelinkrevs = set()
2833 for head in self.headrevs():
2838 for head in self.headrevs():
2834 headlinkrev = self.linkrev(head)
2839 headlinkrev = self.linkrev(head)
2835 heads[head] = headlinkrev
2840 heads[head] = headlinkrev
2836 if headlinkrev >= minlink:
2841 if headlinkrev >= minlink:
2837 futurelargelinkrevs.add(headlinkrev)
2842 futurelargelinkrevs.add(headlinkrev)
2838
2843
2839 # This algorithm involves walking down the rev graph, starting at the
2844 # This algorithm involves walking down the rev graph, starting at the
2840 # heads. Since the revs are topologically sorted according to linkrev,
2845 # heads. Since the revs are topologically sorted according to linkrev,
2841 # once all head linkrevs are below the minlink, we know there are
2846 # once all head linkrevs are below the minlink, we know there are
2842 # no more revs that could have a linkrev greater than minlink.
2847 # no more revs that could have a linkrev greater than minlink.
2843 # So we can stop walking.
2848 # So we can stop walking.
2844 while futurelargelinkrevs:
2849 while futurelargelinkrevs:
2845 strippoint -= 1
2850 strippoint -= 1
2846 linkrev = heads.pop(strippoint)
2851 linkrev = heads.pop(strippoint)
2847
2852
2848 if linkrev < minlink:
2853 if linkrev < minlink:
2849 brokenrevs.add(strippoint)
2854 brokenrevs.add(strippoint)
2850 else:
2855 else:
2851 futurelargelinkrevs.remove(linkrev)
2856 futurelargelinkrevs.remove(linkrev)
2852
2857
2853 for p in self.parentrevs(strippoint):
2858 for p in self.parentrevs(strippoint):
2854 if p != nullrev:
2859 if p != nullrev:
2855 plinkrev = self.linkrev(p)
2860 plinkrev = self.linkrev(p)
2856 heads[p] = plinkrev
2861 heads[p] = plinkrev
2857 if plinkrev >= minlink:
2862 if plinkrev >= minlink:
2858 futurelargelinkrevs.add(plinkrev)
2863 futurelargelinkrevs.add(plinkrev)
2859
2864
2860 return strippoint, brokenrevs
2865 return strippoint, brokenrevs
2861
2866
2862 def strip(self, minlink, transaction):
2867 def strip(self, minlink, transaction):
2863 """truncate the revlog on the first revision with a linkrev >= minlink
2868 """truncate the revlog on the first revision with a linkrev >= minlink
2864
2869
2865 This function is called when we're stripping revision minlink and
2870 This function is called when we're stripping revision minlink and
2866 its descendants from the repository.
2871 its descendants from the repository.
2867
2872
2868 We have to remove all revisions with linkrev >= minlink, because
2873 We have to remove all revisions with linkrev >= minlink, because
2869 the equivalent changelog revisions will be renumbered after the
2874 the equivalent changelog revisions will be renumbered after the
2870 strip.
2875 strip.
2871
2876
2872 So we truncate the revlog on the first of these revisions, and
2877 So we truncate the revlog on the first of these revisions, and
2873 trust that the caller has saved the revisions that shouldn't be
2878 trust that the caller has saved the revisions that shouldn't be
2874 removed and that it'll re-add them after this truncation.
2879 removed and that it'll re-add them after this truncation.
2875 """
2880 """
2876 if len(self) == 0:
2881 if len(self) == 0:
2877 return
2882 return
2878
2883
2879 rev, _ = self.getstrippoint(minlink)
2884 rev, _ = self.getstrippoint(minlink)
2880 if rev == len(self):
2885 if rev == len(self):
2881 return
2886 return
2882
2887
2883 # first truncate the files on disk
2888 # first truncate the files on disk
2884 end = self.start(rev)
2889 end = self.start(rev)
2885 if not self._inline:
2890 if not self._inline:
2886 transaction.add(self.datafile, end)
2891 transaction.add(self.datafile, end)
2887 end = rev * self._io.size
2892 end = rev * self._io.size
2888 else:
2893 else:
2889 end += rev * self._io.size
2894 end += rev * self._io.size
2890
2895
2891 transaction.add(self.indexfile, end)
2896 transaction.add(self.indexfile, end)
2892
2897
2893 # then reset internal state in memory to forget those revisions
2898 # then reset internal state in memory to forget those revisions
2894 self._cache = None
2899 self._cache = None
2895 self._chaininfocache = {}
2900 self._chaininfocache = {}
2896 self._chunkclear()
2901 self._chunkclear()
2897 for x in pycompat.xrange(rev, len(self)):
2902 for x in pycompat.xrange(rev, len(self)):
2898 del self.nodemap[self.node(x)]
2903 del self.nodemap[self.node(x)]
2899
2904
2900 del self.index[rev:-1]
2905 del self.index[rev:-1]
2901 self._nodepos = None
2906 self._nodepos = None
2902
2907
2903 def checksize(self):
2908 def checksize(self):
2904 expected = 0
2909 expected = 0
2905 if len(self):
2910 if len(self):
2906 expected = max(0, self.end(len(self) - 1))
2911 expected = max(0, self.end(len(self) - 1))
2907
2912
2908 try:
2913 try:
2909 with self._datafp() as f:
2914 with self._datafp() as f:
2910 f.seek(0, 2)
2915 f.seek(0, 2)
2911 actual = f.tell()
2916 actual = f.tell()
2912 dd = actual - expected
2917 dd = actual - expected
2913 except IOError as inst:
2918 except IOError as inst:
2914 if inst.errno != errno.ENOENT:
2919 if inst.errno != errno.ENOENT:
2915 raise
2920 raise
2916 dd = 0
2921 dd = 0
2917
2922
2918 try:
2923 try:
2919 f = self.opener(self.indexfile)
2924 f = self.opener(self.indexfile)
2920 f.seek(0, 2)
2925 f.seek(0, 2)
2921 actual = f.tell()
2926 actual = f.tell()
2922 f.close()
2927 f.close()
2923 s = self._io.size
2928 s = self._io.size
2924 i = max(0, actual // s)
2929 i = max(0, actual // s)
2925 di = actual - (i * s)
2930 di = actual - (i * s)
2926 if self._inline:
2931 if self._inline:
2927 databytes = 0
2932 databytes = 0
2928 for r in self:
2933 for r in self:
2929 databytes += max(0, self.length(r))
2934 databytes += max(0, self.length(r))
2930 dd = 0
2935 dd = 0
2931 di = actual - len(self) * s - databytes
2936 di = actual - len(self) * s - databytes
2932 except IOError as inst:
2937 except IOError as inst:
2933 if inst.errno != errno.ENOENT:
2938 if inst.errno != errno.ENOENT:
2934 raise
2939 raise
2935 di = 0
2940 di = 0
2936
2941
2937 return (dd, di)
2942 return (dd, di)
2938
2943
2939 def files(self):
2944 def files(self):
2940 res = [self.indexfile]
2945 res = [self.indexfile]
2941 if not self._inline:
2946 if not self._inline:
2942 res.append(self.datafile)
2947 res.append(self.datafile)
2943 return res
2948 return res
2944
2949
2945 DELTAREUSEALWAYS = 'always'
2950 DELTAREUSEALWAYS = 'always'
2946 DELTAREUSESAMEREVS = 'samerevs'
2951 DELTAREUSESAMEREVS = 'samerevs'
2947 DELTAREUSENEVER = 'never'
2952 DELTAREUSENEVER = 'never'
2948
2953
2949 DELTAREUSEFULLADD = 'fulladd'
2954 DELTAREUSEFULLADD = 'fulladd'
2950
2955
2951 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2956 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2952
2957
2953 def clone(self, tr, destrevlog, addrevisioncb=None,
2958 def clone(self, tr, destrevlog, addrevisioncb=None,
2954 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2959 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2955 """Copy this revlog to another, possibly with format changes.
2960 """Copy this revlog to another, possibly with format changes.
2956
2961
2957 The destination revlog will contain the same revisions and nodes.
2962 The destination revlog will contain the same revisions and nodes.
2958 However, it may not be bit-for-bit identical due to e.g. delta encoding
2963 However, it may not be bit-for-bit identical due to e.g. delta encoding
2959 differences.
2964 differences.
2960
2965
2961 The ``deltareuse`` argument control how deltas from the existing revlog
2966 The ``deltareuse`` argument control how deltas from the existing revlog
2962 are preserved in the destination revlog. The argument can have the
2967 are preserved in the destination revlog. The argument can have the
2963 following values:
2968 following values:
2964
2969
2965 DELTAREUSEALWAYS
2970 DELTAREUSEALWAYS
2966 Deltas will always be reused (if possible), even if the destination
2971 Deltas will always be reused (if possible), even if the destination
2967 revlog would not select the same revisions for the delta. This is the
2972 revlog would not select the same revisions for the delta. This is the
2968 fastest mode of operation.
2973 fastest mode of operation.
2969 DELTAREUSESAMEREVS
2974 DELTAREUSESAMEREVS
2970 Deltas will be reused if the destination revlog would pick the same
2975 Deltas will be reused if the destination revlog would pick the same
2971 revisions for the delta. This mode strikes a balance between speed
2976 revisions for the delta. This mode strikes a balance between speed
2972 and optimization.
2977 and optimization.
2973 DELTAREUSENEVER
2978 DELTAREUSENEVER
2974 Deltas will never be reused. This is the slowest mode of execution.
2979 Deltas will never be reused. This is the slowest mode of execution.
2975 This mode can be used to recompute deltas (e.g. if the diff/delta
2980 This mode can be used to recompute deltas (e.g. if the diff/delta
2976 algorithm changes).
2981 algorithm changes).
2977
2982
2978 Delta computation can be slow, so the choice of delta reuse policy can
2983 Delta computation can be slow, so the choice of delta reuse policy can
2979 significantly affect run time.
2984 significantly affect run time.
2980
2985
2981 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2982 two extremes. Deltas will be reused if they are appropriate. But if the
2987 two extremes. Deltas will be reused if they are appropriate. But if the
2983 delta could choose a better revision, it will do so. This means if you
2988 delta could choose a better revision, it will do so. This means if you
2984 are converting a non-generaldelta revlog to a generaldelta revlog,
2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2985 deltas will be recomputed if the delta's parent isn't a parent of the
2990 deltas will be recomputed if the delta's parent isn't a parent of the
2986 revision.
2991 revision.
2987
2992
2988 In addition to the delta policy, the ``deltabothparents`` argument
2993 In addition to the delta policy, the ``deltabothparents`` argument
2989 controls whether to compute deltas against both parents for merges.
2994 controls whether to compute deltas against both parents for merges.
2990 By default, the current default is used.
2995 By default, the current default is used.
2991 """
2996 """
2992 if deltareuse not in self.DELTAREUSEALL:
2997 if deltareuse not in self.DELTAREUSEALL:
2993 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2998 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2994
2999
2995 if len(destrevlog):
3000 if len(destrevlog):
2996 raise ValueError(_('destination revlog is not empty'))
3001 raise ValueError(_('destination revlog is not empty'))
2997
3002
2998 if getattr(self, 'filteredrevs', None):
3003 if getattr(self, 'filteredrevs', None):
2999 raise ValueError(_('source revlog has filtered revisions'))
3004 raise ValueError(_('source revlog has filtered revisions'))
3000 if getattr(destrevlog, 'filteredrevs', None):
3005 if getattr(destrevlog, 'filteredrevs', None):
3001 raise ValueError(_('destination revlog has filtered revisions'))
3006 raise ValueError(_('destination revlog has filtered revisions'))
3002
3007
3003 # lazydeltabase controls whether to reuse a cached delta, if possible.
3008 # lazydeltabase controls whether to reuse a cached delta, if possible.
3004 oldlazydeltabase = destrevlog._lazydeltabase
3009 oldlazydeltabase = destrevlog._lazydeltabase
3005 oldamd = destrevlog._deltabothparents
3010 oldamd = destrevlog._deltabothparents
3006
3011
3007 try:
3012 try:
3008 if deltareuse == self.DELTAREUSEALWAYS:
3013 if deltareuse == self.DELTAREUSEALWAYS:
3009 destrevlog._lazydeltabase = True
3014 destrevlog._lazydeltabase = True
3010 elif deltareuse == self.DELTAREUSESAMEREVS:
3015 elif deltareuse == self.DELTAREUSESAMEREVS:
3011 destrevlog._lazydeltabase = False
3016 destrevlog._lazydeltabase = False
3012
3017
3013 destrevlog._deltabothparents = deltabothparents or oldamd
3018 destrevlog._deltabothparents = deltabothparents or oldamd
3014
3019
3015 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3020 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3016 self.DELTAREUSESAMEREVS)
3021 self.DELTAREUSESAMEREVS)
3017
3022
3018 deltacomputer = _deltacomputer(destrevlog)
3023 deltacomputer = _deltacomputer(destrevlog)
3019 index = self.index
3024 index = self.index
3020 for rev in self:
3025 for rev in self:
3021 entry = index[rev]
3026 entry = index[rev]
3022
3027
3023 # Some classes override linkrev to take filtered revs into
3028 # Some classes override linkrev to take filtered revs into
3024 # account. Use raw entry from index.
3029 # account. Use raw entry from index.
3025 flags = entry[0] & 0xffff
3030 flags = entry[0] & 0xffff
3026 linkrev = entry[4]
3031 linkrev = entry[4]
3027 p1 = index[entry[5]][7]
3032 p1 = index[entry[5]][7]
3028 p2 = index[entry[6]][7]
3033 p2 = index[entry[6]][7]
3029 node = entry[7]
3034 node = entry[7]
3030
3035
3031 # (Possibly) reuse the delta from the revlog if allowed and
3036 # (Possibly) reuse the delta from the revlog if allowed and
3032 # the revlog chunk is a delta.
3037 # the revlog chunk is a delta.
3033 cachedelta = None
3038 cachedelta = None
3034 rawtext = None
3039 rawtext = None
3035 if populatecachedelta:
3040 if populatecachedelta:
3036 dp = self.deltaparent(rev)
3041 dp = self.deltaparent(rev)
3037 if dp != nullrev:
3042 if dp != nullrev:
3038 cachedelta = (dp, bytes(self._chunk(rev)))
3043 cachedelta = (dp, bytes(self._chunk(rev)))
3039
3044
3040 if not cachedelta:
3045 if not cachedelta:
3041 rawtext = self.revision(rev, raw=True)
3046 rawtext = self.revision(rev, raw=True)
3042
3047
3043
3048
3044 if deltareuse == self.DELTAREUSEFULLADD:
3049 if deltareuse == self.DELTAREUSEFULLADD:
3045 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3050 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3046 cachedelta=cachedelta,
3051 cachedelta=cachedelta,
3047 node=node, flags=flags,
3052 node=node, flags=flags,
3048 deltacomputer=deltacomputer)
3053 deltacomputer=deltacomputer)
3049 else:
3054 else:
3050 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3055 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3051 checkambig=False)
3056 checkambig=False)
3052 dfh = None
3057 dfh = None
3053 if not destrevlog._inline:
3058 if not destrevlog._inline:
3054 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3059 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3055 try:
3060 try:
3056 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3061 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3057 p2, flags, cachedelta, ifh, dfh,
3062 p2, flags, cachedelta, ifh, dfh,
3058 deltacomputer=deltacomputer)
3063 deltacomputer=deltacomputer)
3059 finally:
3064 finally:
3060 if dfh:
3065 if dfh:
3061 dfh.close()
3066 dfh.close()
3062 ifh.close()
3067 ifh.close()
3063
3068
3064 if addrevisioncb:
3069 if addrevisioncb:
3065 addrevisioncb(self, rev, node)
3070 addrevisioncb(self, rev, node)
3066 finally:
3071 finally:
3067 destrevlog._lazydeltabase = oldlazydeltabase
3072 destrevlog._lazydeltabase = oldlazydeltabase
3068 destrevlog._deltabothparents = oldamd
3073 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now