##// END OF EJS Templates
sparse-revlog: implement algorithm to write sparse delta chains (issue5480)...
Paul Morelle -
r38740:f8762ea7 default
parent child Browse files
Show More
@@ -1,2910 +1,2960
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 class _testrevlog(object):
199 class _testrevlog(object):
200 """minimalist fake revlog to use in doctests"""
200 """minimalist fake revlog to use in doctests"""
201
201
202 def __init__(self, data, density=0.5, mingap=0):
202 def __init__(self, data, density=0.5, mingap=0):
203 """data is an list of revision payload boundaries"""
203 """data is an list of revision payload boundaries"""
204 self._data = data
204 self._data = data
205 self._srdensitythreshold = density
205 self._srdensitythreshold = density
206 self._srmingapsize = mingap
206 self._srmingapsize = mingap
207
207
208 def start(self, rev):
208 def start(self, rev):
209 if rev == 0:
209 if rev == 0:
210 return 0
210 return 0
211 return self._data[rev - 1]
211 return self._data[rev - 1]
212
212
213 def end(self, rev):
213 def end(self, rev):
214 return self._data[rev]
214 return self._data[rev]
215
215
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def __len__(self):
220 return len(self._data)
221
219 def _trimchunk(revlog, revs, startidx, endidx=None):
222 def _trimchunk(revlog, revs, startidx, endidx=None):
220 """returns revs[startidx:endidx] without empty trailing revs
223 """returns revs[startidx:endidx] without empty trailing revs
221
224
222 Doctest Setup
225 Doctest Setup
223 >>> revlog = _testrevlog([
226 >>> revlog = _testrevlog([
224 ... 5, #0
227 ... 5, #0
225 ... 10, #1
228 ... 10, #1
226 ... 12, #2
229 ... 12, #2
227 ... 12, #3 (empty)
230 ... 12, #3 (empty)
228 ... 17, #4
231 ... 17, #4
229 ... 21, #5
232 ... 21, #5
230 ... 21, #6 (empty)
233 ... 21, #6 (empty)
231 ... ])
234 ... ])
232
235
233 Contiguous cases:
236 Contiguous cases:
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
237 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 [0, 1, 2, 3, 4, 5]
238 [0, 1, 2, 3, 4, 5]
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 [0, 1, 2, 3, 4]
240 [0, 1, 2, 3, 4]
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 [0, 1, 2]
242 [0, 1, 2]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 [2]
244 [2]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 [3, 4, 5]
246 [3, 4, 5]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 [3, 4]
248 [3, 4]
246
249
247 Discontiguous cases:
250 Discontiguous cases:
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
251 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 [1, 3, 5]
252 [1, 3, 5]
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 [1]
254 [1]
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
255 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 [3, 5]
256 [3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 [3, 5]
258 [3, 5]
256 """
259 """
257 length = revlog.length
260 length = revlog.length
258
261
259 if endidx is None:
262 if endidx is None:
260 endidx = len(revs)
263 endidx = len(revs)
261
264
262 # Trim empty revs at the end, but never the very first revision of a chain
265 # Trim empty revs at the end, but never the very first revision of a chain
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
266 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 endidx -= 1
267 endidx -= 1
265
268
266 return revs[startidx:endidx]
269 return revs[startidx:endidx]
267
270
268 def _segmentspan(revlog, revs):
271 def _segmentspan(revlog, revs):
269 """Get the byte span of a segment of revisions
272 """Get the byte span of a segment of revisions
270
273
271 revs is a sorted array of revision numbers
274 revs is a sorted array of revision numbers
272
275
273 >>> revlog = _testrevlog([
276 >>> revlog = _testrevlog([
274 ... 5, #0
277 ... 5, #0
275 ... 10, #1
278 ... 10, #1
276 ... 12, #2
279 ... 12, #2
277 ... 12, #3 (empty)
280 ... 12, #3 (empty)
278 ... 17, #4
281 ... 17, #4
279 ... ])
282 ... ])
280
283
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
284 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 17
285 17
283 >>> _segmentspan(revlog, [0, 4])
286 >>> _segmentspan(revlog, [0, 4])
284 17
287 17
285 >>> _segmentspan(revlog, [3, 4])
288 >>> _segmentspan(revlog, [3, 4])
286 5
289 5
287 >>> _segmentspan(revlog, [1, 2, 3,])
290 >>> _segmentspan(revlog, [1, 2, 3,])
288 7
291 7
289 >>> _segmentspan(revlog, [1, 3])
292 >>> _segmentspan(revlog, [1, 3])
290 7
293 7
291 """
294 """
292 if not revs:
295 if not revs:
293 return 0
296 return 0
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
297 return revlog.end(revs[-1]) - revlog.start(revs[0])
295
298
296 def _slicechunk(revlog, revs, targetsize=None):
299 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
297 """slice revs to reduce the amount of unrelated data to be read from disk.
300 """slice revs to reduce the amount of unrelated data to be read from disk.
298
301
299 ``revs`` is sliced into groups that should be read in one time.
302 ``revs`` is sliced into groups that should be read in one time.
300 Assume that revs are sorted.
303 Assume that revs are sorted.
301
304
302 The initial chunk is sliced until the overall density (payload/chunks-span
305 The initial chunk is sliced until the overall density (payload/chunks-span
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
306 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 `revlog._srmingapsize` is skipped.
307 `revlog._srmingapsize` is skipped.
305
308
306 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
309 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
307 For consistency with other slicing choice, this limit won't go lower than
310 For consistency with other slicing choice, this limit won't go lower than
308 `revlog._srmingapsize`.
311 `revlog._srmingapsize`.
309
312
310 If individual revisions chunk are larger than this limit, they will still
313 If individual revisions chunk are larger than this limit, they will still
311 be raised individually.
314 be raised individually.
312
315
313 >>> revlog = _testrevlog([
316 >>> revlog = _testrevlog([
314 ... 5, #00 (5)
317 ... 5, #00 (5)
315 ... 10, #01 (5)
318 ... 10, #01 (5)
316 ... 12, #02 (2)
319 ... 12, #02 (2)
317 ... 12, #03 (empty)
320 ... 12, #03 (empty)
318 ... 27, #04 (15)
321 ... 27, #04 (15)
319 ... 31, #05 (4)
322 ... 31, #05 (4)
320 ... 31, #06 (empty)
323 ... 31, #06 (empty)
321 ... 42, #07 (11)
324 ... 42, #07 (11)
322 ... 47, #08 (5)
325 ... 47, #08 (5)
323 ... 47, #09 (empty)
326 ... 47, #09 (empty)
324 ... 48, #10 (1)
327 ... 48, #10 (1)
325 ... 51, #11 (3)
328 ... 51, #11 (3)
326 ... 74, #12 (23)
329 ... 74, #12 (23)
327 ... 85, #13 (11)
330 ... 85, #13 (11)
328 ... 86, #14 (1)
331 ... 86, #14 (1)
329 ... 91, #15 (5)
332 ... 91, #15 (5)
330 ... ])
333 ... ])
331
334
332 >>> list(_slicechunk(revlog, list(range(16))))
335 >>> list(_slicechunk(revlog, list(range(16))))
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
336 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 >>> list(_slicechunk(revlog, [0, 15]))
337 >>> list(_slicechunk(revlog, [0, 15]))
335 [[0], [15]]
338 [[0], [15]]
336 >>> list(_slicechunk(revlog, [0, 11, 15]))
339 >>> list(_slicechunk(revlog, [0, 11, 15]))
337 [[0], [11], [15]]
340 [[0], [11], [15]]
338 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
341 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
339 [[0], [11, 13, 15]]
342 [[0], [11, 13, 15]]
340 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
343 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 [[1, 2], [5, 8, 10, 11], [14]]
344 [[1, 2], [5, 8, 10, 11], [14]]
342
345
343 Slicing with a maximum chunk size
346 Slicing with a maximum chunk size
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
347 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
345 [[0], [11], [13], [15]]
348 [[0], [11], [13], [15]]
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
349 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
347 [[0], [11], [13, 15]]
350 [[0], [11], [13, 15]]
348 """
351 """
349 if targetsize is not None:
352 if targetsize is not None:
350 targetsize = max(targetsize, revlog._srmingapsize)
353 targetsize = max(targetsize, revlog._srmingapsize)
354 # targetsize should not be specified when evaluating delta candidates:
355 # * targetsize is used to ensure we stay within specification when reading,
356 # * deltainfo is used to pick are good delta chain when writing.
357 if not (deltainfo is None or targetsize is None):
358 msg = 'cannot use `targetsize` with a `deltainfo`'
359 raise error.ProgrammingError(msg)
351 for chunk in _slicechunktodensity(revlog, revs,
360 for chunk in _slicechunktodensity(revlog, revs,
361 deltainfo,
352 revlog._srdensitythreshold,
362 revlog._srdensitythreshold,
353 revlog._srmingapsize):
363 revlog._srmingapsize):
354 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
364 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
355 yield subchunk
365 yield subchunk
356
366
357 def _slicechunktosize(revlog, revs, targetsize):
367 def _slicechunktosize(revlog, revs, targetsize=None):
358 """slice revs to match the target size
368 """slice revs to match the target size
359
369
360 This is intended to be used on chunk that density slicing selected by that
370 This is intended to be used on chunk that density slicing selected by that
361 are still too large compared to the read garantee of revlog. This might
371 are still too large compared to the read garantee of revlog. This might
362 happens when "minimal gap size" interrupted the slicing or when chain are
372 happens when "minimal gap size" interrupted the slicing or when chain are
363 built in a way that create large blocks next to each other.
373 built in a way that create large blocks next to each other.
364
374
365 >>> revlog = _testrevlog([
375 >>> revlog = _testrevlog([
366 ... 3, #0 (3)
376 ... 3, #0 (3)
367 ... 5, #1 (2)
377 ... 5, #1 (2)
368 ... 6, #2 (1)
378 ... 6, #2 (1)
369 ... 8, #3 (2)
379 ... 8, #3 (2)
370 ... 8, #4 (empty)
380 ... 8, #4 (empty)
371 ... 11, #5 (3)
381 ... 11, #5 (3)
372 ... 12, #6 (1)
382 ... 12, #6 (1)
373 ... 13, #7 (1)
383 ... 13, #7 (1)
374 ... 14, #8 (1)
384 ... 14, #8 (1)
375 ... ])
385 ... ])
376
386
377 Cases where chunk is already small enough
387 Cases where chunk is already small enough
378 >>> list(_slicechunktosize(revlog, [0], 3))
388 >>> list(_slicechunktosize(revlog, [0], 3))
379 [[0]]
389 [[0]]
380 >>> list(_slicechunktosize(revlog, [6, 7], 3))
390 >>> list(_slicechunktosize(revlog, [6, 7], 3))
381 [[6, 7]]
391 [[6, 7]]
382 >>> list(_slicechunktosize(revlog, [0], None))
392 >>> list(_slicechunktosize(revlog, [0], None))
383 [[0]]
393 [[0]]
384 >>> list(_slicechunktosize(revlog, [6, 7], None))
394 >>> list(_slicechunktosize(revlog, [6, 7], None))
385 [[6, 7]]
395 [[6, 7]]
386
396
387 cases where we need actual slicing
397 cases where we need actual slicing
388 >>> list(_slicechunktosize(revlog, [0, 1], 3))
398 >>> list(_slicechunktosize(revlog, [0, 1], 3))
389 [[0], [1]]
399 [[0], [1]]
390 >>> list(_slicechunktosize(revlog, [1, 3], 3))
400 >>> list(_slicechunktosize(revlog, [1, 3], 3))
391 [[1], [3]]
401 [[1], [3]]
392 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
402 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
393 [[1, 2], [3]]
403 [[1, 2], [3]]
394 >>> list(_slicechunktosize(revlog, [3, 5], 3))
404 >>> list(_slicechunktosize(revlog, [3, 5], 3))
395 [[3], [5]]
405 [[3], [5]]
396 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
406 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
397 [[3], [5]]
407 [[3], [5]]
398 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
408 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
399 [[5], [6, 7, 8]]
409 [[5], [6, 7, 8]]
400 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
410 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
401 [[0], [1, 2], [3], [5], [6, 7, 8]]
411 [[0], [1, 2], [3], [5], [6, 7, 8]]
402
412
403 Case with too large individual chunk (must return valid chunk)
413 Case with too large individual chunk (must return valid chunk)
404 >>> list(_slicechunktosize(revlog, [0, 1], 2))
414 >>> list(_slicechunktosize(revlog, [0, 1], 2))
405 [[0], [1]]
415 [[0], [1]]
406 >>> list(_slicechunktosize(revlog, [1, 3], 1))
416 >>> list(_slicechunktosize(revlog, [1, 3], 1))
407 [[1], [3]]
417 [[1], [3]]
408 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
409 [[3], [5]]
419 [[3], [5]]
410 """
420 """
411 assert targetsize is None or 0 <= targetsize
421 assert targetsize is None or 0 <= targetsize
412 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
422 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
413 yield revs
423 yield revs
414 return
424 return
415
425
416 startrevidx = 0
426 startrevidx = 0
417 startdata = revlog.start(revs[0])
427 startdata = revlog.start(revs[0])
418 endrevidx = 0
428 endrevidx = 0
419 iterrevs = enumerate(revs)
429 iterrevs = enumerate(revs)
420 next(iterrevs) # skip first rev.
430 next(iterrevs) # skip first rev.
421 for idx, r in iterrevs:
431 for idx, r in iterrevs:
422 span = revlog.end(r) - startdata
432 span = revlog.end(r) - startdata
423 if span <= targetsize:
433 if span <= targetsize:
424 endrevidx = idx
434 endrevidx = idx
425 else:
435 else:
426 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
436 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
427 if chunk:
437 if chunk:
428 yield chunk
438 yield chunk
429 startrevidx = idx
439 startrevidx = idx
430 startdata = revlog.start(r)
440 startdata = revlog.start(r)
431 endrevidx = idx
441 endrevidx = idx
432 yield _trimchunk(revlog, revs, startrevidx)
442 yield _trimchunk(revlog, revs, startrevidx)
433
443
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 mingapsize=0):
435 """slice revs to reduce the amount of unrelated data to be read from disk.
446 """slice revs to reduce the amount of unrelated data to be read from disk.
436
447
437 ``revs`` is sliced into groups that should be read in one time.
448 ``revs`` is sliced into groups that should be read in one time.
438 Assume that revs are sorted.
449 Assume that revs are sorted.
439
450
451 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 to the top of the revlog.
453
440 The initial chunk is sliced until the overall density (payload/chunks-span
454 The initial chunk is sliced until the overall density (payload/chunks-span
441 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
455 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
442 skipped.
456 skipped.
443
457
444 >>> revlog = _testrevlog([
458 >>> revlog = _testrevlog([
445 ... 5, #00 (5)
459 ... 5, #00 (5)
446 ... 10, #01 (5)
460 ... 10, #01 (5)
447 ... 12, #02 (2)
461 ... 12, #02 (2)
448 ... 12, #03 (empty)
462 ... 12, #03 (empty)
449 ... 27, #04 (15)
463 ... 27, #04 (15)
450 ... 31, #05 (4)
464 ... 31, #05 (4)
451 ... 31, #06 (empty)
465 ... 31, #06 (empty)
452 ... 42, #07 (11)
466 ... 42, #07 (11)
453 ... 47, #08 (5)
467 ... 47, #08 (5)
454 ... 47, #09 (empty)
468 ... 47, #09 (empty)
455 ... 48, #10 (1)
469 ... 48, #10 (1)
456 ... 51, #11 (3)
470 ... 51, #11 (3)
457 ... 74, #12 (23)
471 ... 74, #12 (23)
458 ... 85, #13 (11)
472 ... 85, #13 (11)
459 ... 86, #14 (1)
473 ... 86, #14 (1)
460 ... 91, #15 (5)
474 ... 91, #15 (5)
461 ... ])
475 ... ])
462
476
463 >>> list(_slicechunktodensity(revlog, list(range(16))))
477 >>> list(_slicechunktodensity(revlog, list(range(16))))
464 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
478 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
465 >>> list(_slicechunktodensity(revlog, [0, 15]))
479 >>> list(_slicechunktodensity(revlog, [0, 15]))
466 [[0], [15]]
480 [[0], [15]]
467 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
481 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
468 [[0], [11], [15]]
482 [[0], [11], [15]]
469 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
483 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
470 [[0], [11, 13, 15]]
484 [[0], [11, 13, 15]]
471 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
485 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
472 [[1, 2], [5, 8, 10, 11], [14]]
486 [[1, 2], [5, 8, 10, 11], [14]]
473 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
487 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
474 ... mingapsize=20))
488 ... mingapsize=20))
475 [[1, 2, 3, 5, 8, 10, 11], [14]]
489 [[1, 2, 3, 5, 8, 10, 11], [14]]
476 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
490 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
477 ... targetdensity=0.95))
491 ... targetdensity=0.95))
478 [[1, 2], [5], [8, 10, 11], [14]]
492 [[1, 2], [5], [8, 10, 11], [14]]
479 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
493 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
480 ... targetdensity=0.95, mingapsize=12))
494 ... targetdensity=0.95, mingapsize=12))
481 [[1, 2], [5, 8, 10, 11], [14]]
495 [[1, 2], [5, 8, 10, 11], [14]]
482 """
496 """
483 start = revlog.start
497 start = revlog.start
484 length = revlog.length
498 length = revlog.length
485
499
486 if len(revs) <= 1:
500 if len(revs) <= 1:
487 yield revs
501 yield revs
488 return
502 return
489
503
490 readdata = deltachainspan = _segmentspan(revlog, revs)
504 nextrev = len(revlog)
505 nextoffset = revlog.end(nextrev - 1)
506
507 if deltainfo is None:
508 deltachainspan = _segmentspan(revlog, revs)
509 chainpayload = sum(length(r) for r in revs)
510 else:
511 deltachainspan = deltainfo.distance
512 chainpayload = deltainfo.compresseddeltalen
491
513
492 if deltachainspan < mingapsize:
514 if deltachainspan < mingapsize:
493 yield revs
515 yield revs
494 return
516 return
495
517
496 chainpayload = sum(length(r) for r in revs)
518 readdata = deltachainspan
497
519
498 if deltachainspan:
520 if deltachainspan:
499 density = chainpayload / float(deltachainspan)
521 density = chainpayload / float(deltachainspan)
500 else:
522 else:
501 density = 1.0
523 density = 1.0
502
524
503 if density >= targetdensity:
525 if density >= targetdensity:
504 yield revs
526 yield revs
505 return
527 return
506
528
529 if deltainfo is not None:
530 revs = list(revs)
531 revs.append(nextrev)
532
507 # Store the gaps in a heap to have them sorted by decreasing size
533 # Store the gaps in a heap to have them sorted by decreasing size
508 gapsheap = []
534 gapsheap = []
509 heapq.heapify(gapsheap)
535 heapq.heapify(gapsheap)
510 prevend = None
536 prevend = None
511 for i, rev in enumerate(revs):
537 for i, rev in enumerate(revs):
512 revstart = start(rev)
538 if rev < nextrev:
513 revlen = length(rev)
539 revstart = start(rev)
540 revlen = length(rev)
541 else:
542 revstart = nextoffset
543 revlen = deltainfo.deltalen
514
544
515 # Skip empty revisions to form larger holes
545 # Skip empty revisions to form larger holes
516 if revlen == 0:
546 if revlen == 0:
517 continue
547 continue
518
548
519 if prevend is not None:
549 if prevend is not None:
520 gapsize = revstart - prevend
550 gapsize = revstart - prevend
521 # only consider holes that are large enough
551 # only consider holes that are large enough
522 if gapsize > mingapsize:
552 if gapsize > mingapsize:
523 heapq.heappush(gapsheap, (-gapsize, i))
553 heapq.heappush(gapsheap, (-gapsize, i))
524
554
525 prevend = revstart + revlen
555 prevend = revstart + revlen
526
556
527 # Collect the indices of the largest holes until the density is acceptable
557 # Collect the indices of the largest holes until the density is acceptable
528 indicesheap = []
558 indicesheap = []
529 heapq.heapify(indicesheap)
559 heapq.heapify(indicesheap)
530 while gapsheap and density < targetdensity:
560 while gapsheap and density < targetdensity:
531 oppgapsize, gapidx = heapq.heappop(gapsheap)
561 oppgapsize, gapidx = heapq.heappop(gapsheap)
532
562
533 heapq.heappush(indicesheap, gapidx)
563 heapq.heappush(indicesheap, gapidx)
534
564
535 # the gap sizes are stored as negatives to be sorted decreasingly
565 # the gap sizes are stored as negatives to be sorted decreasingly
536 # by the heap
566 # by the heap
537 readdata -= (-oppgapsize)
567 readdata -= (-oppgapsize)
538 if readdata > 0:
568 if readdata > 0:
539 density = chainpayload / float(readdata)
569 density = chainpayload / float(readdata)
540 else:
570 else:
541 density = 1.0
571 density = 1.0
542
572
543 # Cut the revs at collected indices
573 # Cut the revs at collected indices
544 previdx = 0
574 previdx = 0
545 while indicesheap:
575 while indicesheap:
546 idx = heapq.heappop(indicesheap)
576 idx = heapq.heappop(indicesheap)
547
577
548 chunk = _trimchunk(revlog, revs, previdx, idx)
578 chunk = _trimchunk(revlog, revs, previdx, idx)
549 if chunk:
579 if chunk:
550 yield chunk
580 yield chunk
551
581
552 previdx = idx
582 previdx = idx
553
583
554 chunk = _trimchunk(revlog, revs, previdx)
584 chunk = _trimchunk(revlog, revs, previdx)
555 if chunk:
585 if chunk:
556 yield chunk
586 yield chunk
557
587
558 @attr.s(slots=True, frozen=True)
588 @attr.s(slots=True, frozen=True)
559 class _deltainfo(object):
589 class _deltainfo(object):
560 distance = attr.ib()
590 distance = attr.ib()
561 deltalen = attr.ib()
591 deltalen = attr.ib()
562 data = attr.ib()
592 data = attr.ib()
563 base = attr.ib()
593 base = attr.ib()
564 chainbase = attr.ib()
594 chainbase = attr.ib()
565 chainlen = attr.ib()
595 chainlen = attr.ib()
566 compresseddeltalen = attr.ib()
596 compresseddeltalen = attr.ib()
567
597
568 class _deltacomputer(object):
598 class _deltacomputer(object):
569 def __init__(self, revlog):
599 def __init__(self, revlog):
570 self.revlog = revlog
600 self.revlog = revlog
571
601
572 def _getcandidaterevs(self, p1, p2, cachedelta):
602 def _getcandidaterevs(self, p1, p2, cachedelta):
573 """
603 """
574 Provides revisions that present an interest to be diffed against,
604 Provides revisions that present an interest to be diffed against,
575 grouped by level of easiness.
605 grouped by level of easiness.
576 """
606 """
577 revlog = self.revlog
607 revlog = self.revlog
578 gdelta = revlog._generaldelta
608 gdelta = revlog._generaldelta
579 curr = len(revlog)
609 curr = len(revlog)
580 prev = curr - 1
610 prev = curr - 1
581 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
611 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
582
612
583 # should we try to build a delta?
613 # should we try to build a delta?
584 if prev != nullrev and revlog.storedeltachains:
614 if prev != nullrev and revlog.storedeltachains:
585 tested = set()
615 tested = set()
586 # This condition is true most of the time when processing
616 # This condition is true most of the time when processing
587 # changegroup data into a generaldelta repo. The only time it
617 # changegroup data into a generaldelta repo. The only time it
588 # isn't true is if this is the first revision in a delta chain
618 # isn't true is if this is the first revision in a delta chain
589 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
619 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
590 if cachedelta and gdelta and revlog._lazydeltabase:
620 if cachedelta and gdelta and revlog._lazydeltabase:
591 # Assume what we received from the server is a good choice
621 # Assume what we received from the server is a good choice
592 # build delta will reuse the cache
622 # build delta will reuse the cache
593 yield (cachedelta[0],)
623 yield (cachedelta[0],)
594 tested.add(cachedelta[0])
624 tested.add(cachedelta[0])
595
625
596 if gdelta:
626 if gdelta:
597 # exclude already lazy tested base if any
627 # exclude already lazy tested base if any
598 parents = [p for p in (p1r, p2r)
628 parents = [p for p in (p1r, p2r)
599 if p != nullrev and p not in tested]
629 if p != nullrev and p not in tested]
600
630
601 if not revlog._aggressivemergedeltas and len(parents) == 2:
631 if not revlog._aggressivemergedeltas and len(parents) == 2:
602 parents.sort()
632 parents.sort()
603 # To minimize the chance of having to build a fulltext,
633 # To minimize the chance of having to build a fulltext,
604 # pick first whichever parent is closest to us (max rev)
634 # pick first whichever parent is closest to us (max rev)
605 yield (parents[1],)
635 yield (parents[1],)
606 # then the other one (min rev) if the first did not fit
636 # then the other one (min rev) if the first did not fit
607 yield (parents[0],)
637 yield (parents[0],)
608 tested.update(parents)
638 tested.update(parents)
609 elif len(parents) > 0:
639 elif len(parents) > 0:
610 # Test all parents (1 or 2), and keep the best candidate
640 # Test all parents (1 or 2), and keep the best candidate
611 yield parents
641 yield parents
612 tested.update(parents)
642 tested.update(parents)
613
643
614 if prev not in tested:
644 if prev not in tested:
615 # other approach failed try against prev to hopefully save us a
645 # other approach failed try against prev to hopefully save us a
616 # fulltext.
646 # fulltext.
617 yield (prev,)
647 yield (prev,)
618 tested.add(prev)
648 tested.add(prev)
619
649
620 def buildtext(self, revinfo, fh):
650 def buildtext(self, revinfo, fh):
621 """Builds a fulltext version of a revision
651 """Builds a fulltext version of a revision
622
652
623 revinfo: _revisioninfo instance that contains all needed info
653 revinfo: _revisioninfo instance that contains all needed info
624 fh: file handle to either the .i or the .d revlog file,
654 fh: file handle to either the .i or the .d revlog file,
625 depending on whether it is inlined or not
655 depending on whether it is inlined or not
626 """
656 """
627 btext = revinfo.btext
657 btext = revinfo.btext
628 if btext[0] is not None:
658 if btext[0] is not None:
629 return btext[0]
659 return btext[0]
630
660
631 revlog = self.revlog
661 revlog = self.revlog
632 cachedelta = revinfo.cachedelta
662 cachedelta = revinfo.cachedelta
633 flags = revinfo.flags
663 flags = revinfo.flags
634 node = revinfo.node
664 node = revinfo.node
635
665
636 baserev = cachedelta[0]
666 baserev = cachedelta[0]
637 delta = cachedelta[1]
667 delta = cachedelta[1]
638 # special case deltas which replace entire base; no need to decode
668 # special case deltas which replace entire base; no need to decode
639 # base revision. this neatly avoids censored bases, which throw when
669 # base revision. this neatly avoids censored bases, which throw when
640 # they're decoded.
670 # they're decoded.
641 hlen = struct.calcsize(">lll")
671 hlen = struct.calcsize(">lll")
642 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
672 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
643 len(delta) - hlen):
673 len(delta) - hlen):
644 btext[0] = delta[hlen:]
674 btext[0] = delta[hlen:]
645 else:
675 else:
646 # deltabase is rawtext before changed by flag processors, which is
676 # deltabase is rawtext before changed by flag processors, which is
647 # equivalent to non-raw text
677 # equivalent to non-raw text
648 basetext = revlog.revision(baserev, _df=fh, raw=False)
678 basetext = revlog.revision(baserev, _df=fh, raw=False)
649 btext[0] = mdiff.patch(basetext, delta)
679 btext[0] = mdiff.patch(basetext, delta)
650
680
651 try:
681 try:
652 res = revlog._processflags(btext[0], flags, 'read', raw=True)
682 res = revlog._processflags(btext[0], flags, 'read', raw=True)
653 btext[0], validatehash = res
683 btext[0], validatehash = res
654 if validatehash:
684 if validatehash:
655 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
685 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
656 if flags & REVIDX_ISCENSORED:
686 if flags & REVIDX_ISCENSORED:
657 raise RevlogError(_('node %s is not censored') % node)
687 raise RevlogError(_('node %s is not censored') % node)
658 except CensoredNodeError:
688 except CensoredNodeError:
659 # must pass the censored index flag to add censored revisions
689 # must pass the censored index flag to add censored revisions
660 if not flags & REVIDX_ISCENSORED:
690 if not flags & REVIDX_ISCENSORED:
661 raise
691 raise
662 return btext[0]
692 return btext[0]
663
693
664 def _builddeltadiff(self, base, revinfo, fh):
694 def _builddeltadiff(self, base, revinfo, fh):
665 revlog = self.revlog
695 revlog = self.revlog
666 t = self.buildtext(revinfo, fh)
696 t = self.buildtext(revinfo, fh)
667 if revlog.iscensored(base):
697 if revlog.iscensored(base):
668 # deltas based on a censored revision must replace the
698 # deltas based on a censored revision must replace the
669 # full content in one patch, so delta works everywhere
699 # full content in one patch, so delta works everywhere
670 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
700 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
671 delta = header + t
701 delta = header + t
672 else:
702 else:
673 ptext = revlog.revision(base, _df=fh, raw=True)
703 ptext = revlog.revision(base, _df=fh, raw=True)
674 delta = mdiff.textdiff(ptext, t)
704 delta = mdiff.textdiff(ptext, t)
675
705
676 return delta
706 return delta
677
707
678 def _builddeltainfo(self, revinfo, base, fh):
708 def _builddeltainfo(self, revinfo, base, fh):
679 # can we use the cached delta?
709 # can we use the cached delta?
680 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
710 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
681 delta = revinfo.cachedelta[1]
711 delta = revinfo.cachedelta[1]
682 else:
712 else:
683 delta = self._builddeltadiff(base, revinfo, fh)
713 delta = self._builddeltadiff(base, revinfo, fh)
684 revlog = self.revlog
714 revlog = self.revlog
685 header, data = revlog.compress(delta)
715 header, data = revlog.compress(delta)
686 deltalen = len(header) + len(data)
716 deltalen = len(header) + len(data)
687 chainbase = revlog.chainbase(base)
717 chainbase = revlog.chainbase(base)
688 offset = revlog.end(len(revlog) - 1)
718 offset = revlog.end(len(revlog) - 1)
689 dist = deltalen + offset - revlog.start(chainbase)
719 dist = deltalen + offset - revlog.start(chainbase)
690 if revlog._generaldelta:
720 if revlog._generaldelta:
691 deltabase = base
721 deltabase = base
692 else:
722 else:
693 deltabase = chainbase
723 deltabase = chainbase
694 chainlen, compresseddeltalen = revlog._chaininfo(base)
724 chainlen, compresseddeltalen = revlog._chaininfo(base)
695 chainlen += 1
725 chainlen += 1
696 compresseddeltalen += deltalen
726 compresseddeltalen += deltalen
697 return _deltainfo(dist, deltalen, (header, data), deltabase,
727 return _deltainfo(dist, deltalen, (header, data), deltabase,
698 chainbase, chainlen, compresseddeltalen)
728 chainbase, chainlen, compresseddeltalen)
699
729
700 def finddeltainfo(self, revinfo, fh):
730 def finddeltainfo(self, revinfo, fh):
701 """Find an acceptable delta against a candidate revision
731 """Find an acceptable delta against a candidate revision
702
732
703 revinfo: information about the revision (instance of _revisioninfo)
733 revinfo: information about the revision (instance of _revisioninfo)
704 fh: file handle to either the .i or the .d revlog file,
734 fh: file handle to either the .i or the .d revlog file,
705 depending on whether it is inlined or not
735 depending on whether it is inlined or not
706
736
707 Returns the first acceptable candidate revision, as ordered by
737 Returns the first acceptable candidate revision, as ordered by
708 _getcandidaterevs
738 _getcandidaterevs
709 """
739 """
710 cachedelta = revinfo.cachedelta
740 cachedelta = revinfo.cachedelta
711 p1 = revinfo.p1
741 p1 = revinfo.p1
712 p2 = revinfo.p2
742 p2 = revinfo.p2
713 revlog = self.revlog
743 revlog = self.revlog
714
744
715 deltainfo = None
745 deltainfo = None
716 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
746 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
717 nominateddeltas = []
747 nominateddeltas = []
718 for candidaterev in candidaterevs:
748 for candidaterev in candidaterevs:
719 # no delta for rawtext-changing revs (see "candelta" for why)
749 # no delta for rawtext-changing revs (see "candelta" for why)
720 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
750 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
721 continue
751 continue
722 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
752 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
723 if revlog._isgooddeltainfo(candidatedelta, revinfo):
753 if revlog._isgooddeltainfo(candidatedelta, revinfo):
724 nominateddeltas.append(candidatedelta)
754 nominateddeltas.append(candidatedelta)
725 if nominateddeltas:
755 if nominateddeltas:
726 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
756 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
727 break
757 break
728
758
729 return deltainfo
759 return deltainfo
730
760
731 @attr.s(slots=True, frozen=True)
761 @attr.s(slots=True, frozen=True)
732 class _revisioninfo(object):
762 class _revisioninfo(object):
733 """Information about a revision that allows building its fulltext
763 """Information about a revision that allows building its fulltext
734 node: expected hash of the revision
764 node: expected hash of the revision
735 p1, p2: parent revs of the revision
765 p1, p2: parent revs of the revision
736 btext: built text cache consisting of a one-element list
766 btext: built text cache consisting of a one-element list
737 cachedelta: (baserev, uncompressed_delta) or None
767 cachedelta: (baserev, uncompressed_delta) or None
738 flags: flags associated to the revision storage
768 flags: flags associated to the revision storage
739
769
740 One of btext[0] or cachedelta must be set.
770 One of btext[0] or cachedelta must be set.
741 """
771 """
742 node = attr.ib()
772 node = attr.ib()
743 p1 = attr.ib()
773 p1 = attr.ib()
744 p2 = attr.ib()
774 p2 = attr.ib()
745 btext = attr.ib()
775 btext = attr.ib()
746 textlen = attr.ib()
776 textlen = attr.ib()
747 cachedelta = attr.ib()
777 cachedelta = attr.ib()
748 flags = attr.ib()
778 flags = attr.ib()
749
779
750 # index v0:
780 # index v0:
751 # 4 bytes: offset
781 # 4 bytes: offset
752 # 4 bytes: compressed length
782 # 4 bytes: compressed length
753 # 4 bytes: base rev
783 # 4 bytes: base rev
754 # 4 bytes: link rev
784 # 4 bytes: link rev
755 # 20 bytes: parent 1 nodeid
785 # 20 bytes: parent 1 nodeid
756 # 20 bytes: parent 2 nodeid
786 # 20 bytes: parent 2 nodeid
757 # 20 bytes: nodeid
787 # 20 bytes: nodeid
758 indexformatv0 = struct.Struct(">4l20s20s20s")
788 indexformatv0 = struct.Struct(">4l20s20s20s")
759 indexformatv0_pack = indexformatv0.pack
789 indexformatv0_pack = indexformatv0.pack
760 indexformatv0_unpack = indexformatv0.unpack
790 indexformatv0_unpack = indexformatv0.unpack
761
791
762 class revlogoldio(object):
792 class revlogoldio(object):
763 def __init__(self):
793 def __init__(self):
764 self.size = indexformatv0.size
794 self.size = indexformatv0.size
765
795
766 def parseindex(self, data, inline):
796 def parseindex(self, data, inline):
767 s = self.size
797 s = self.size
768 index = []
798 index = []
769 nodemap = {nullid: nullrev}
799 nodemap = {nullid: nullrev}
770 n = off = 0
800 n = off = 0
771 l = len(data)
801 l = len(data)
772 while off + s <= l:
802 while off + s <= l:
773 cur = data[off:off + s]
803 cur = data[off:off + s]
774 off += s
804 off += s
775 e = indexformatv0_unpack(cur)
805 e = indexformatv0_unpack(cur)
776 # transform to revlogv1 format
806 # transform to revlogv1 format
777 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
807 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
778 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
808 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
779 index.append(e2)
809 index.append(e2)
780 nodemap[e[6]] = n
810 nodemap[e[6]] = n
781 n += 1
811 n += 1
782
812
783 # add the magic null revision at -1
813 # add the magic null revision at -1
784 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
814 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
785
815
786 return index, nodemap, None
816 return index, nodemap, None
787
817
788 def packentry(self, entry, node, version, rev):
818 def packentry(self, entry, node, version, rev):
789 if gettype(entry[0]):
819 if gettype(entry[0]):
790 raise RevlogError(_('index entry flags need revlog version 1'))
820 raise RevlogError(_('index entry flags need revlog version 1'))
791 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
821 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
792 node(entry[5]), node(entry[6]), entry[7])
822 node(entry[5]), node(entry[6]), entry[7])
793 return indexformatv0_pack(*e2)
823 return indexformatv0_pack(*e2)
794
824
795 # index ng:
825 # index ng:
796 # 6 bytes: offset
826 # 6 bytes: offset
797 # 2 bytes: flags
827 # 2 bytes: flags
798 # 4 bytes: compressed length
828 # 4 bytes: compressed length
799 # 4 bytes: uncompressed length
829 # 4 bytes: uncompressed length
800 # 4 bytes: base rev
830 # 4 bytes: base rev
801 # 4 bytes: link rev
831 # 4 bytes: link rev
802 # 4 bytes: parent 1 rev
832 # 4 bytes: parent 1 rev
803 # 4 bytes: parent 2 rev
833 # 4 bytes: parent 2 rev
804 # 32 bytes: nodeid
834 # 32 bytes: nodeid
805 indexformatng = struct.Struct(">Qiiiiii20s12x")
835 indexformatng = struct.Struct(">Qiiiiii20s12x")
806 indexformatng_pack = indexformatng.pack
836 indexformatng_pack = indexformatng.pack
807 versionformat = struct.Struct(">I")
837 versionformat = struct.Struct(">I")
808 versionformat_pack = versionformat.pack
838 versionformat_pack = versionformat.pack
809 versionformat_unpack = versionformat.unpack
839 versionformat_unpack = versionformat.unpack
810
840
811 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
841 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
812 # signed integer)
842 # signed integer)
813 _maxentrysize = 0x7fffffff
843 _maxentrysize = 0x7fffffff
814
844
815 class revlogio(object):
845 class revlogio(object):
816 def __init__(self):
846 def __init__(self):
817 self.size = indexformatng.size
847 self.size = indexformatng.size
818
848
819 def parseindex(self, data, inline):
849 def parseindex(self, data, inline):
820 # call the C implementation to parse the index data
850 # call the C implementation to parse the index data
821 index, cache = parsers.parse_index2(data, inline)
851 index, cache = parsers.parse_index2(data, inline)
822 return index, getattr(index, 'nodemap', None), cache
852 return index, getattr(index, 'nodemap', None), cache
823
853
824 def packentry(self, entry, node, version, rev):
854 def packentry(self, entry, node, version, rev):
825 p = indexformatng_pack(*entry)
855 p = indexformatng_pack(*entry)
826 if rev == 0:
856 if rev == 0:
827 p = versionformat_pack(version) + p[4:]
857 p = versionformat_pack(version) + p[4:]
828 return p
858 return p
829
859
830 class revlog(object):
860 class revlog(object):
831 """
861 """
832 the underlying revision storage object
862 the underlying revision storage object
833
863
834 A revlog consists of two parts, an index and the revision data.
864 A revlog consists of two parts, an index and the revision data.
835
865
836 The index is a file with a fixed record size containing
866 The index is a file with a fixed record size containing
837 information on each revision, including its nodeid (hash), the
867 information on each revision, including its nodeid (hash), the
838 nodeids of its parents, the position and offset of its data within
868 nodeids of its parents, the position and offset of its data within
839 the data file, and the revision it's based on. Finally, each entry
869 the data file, and the revision it's based on. Finally, each entry
840 contains a linkrev entry that can serve as a pointer to external
870 contains a linkrev entry that can serve as a pointer to external
841 data.
871 data.
842
872
843 The revision data itself is a linear collection of data chunks.
873 The revision data itself is a linear collection of data chunks.
844 Each chunk represents a revision and is usually represented as a
874 Each chunk represents a revision and is usually represented as a
845 delta against the previous chunk. To bound lookup time, runs of
875 delta against the previous chunk. To bound lookup time, runs of
846 deltas are limited to about 2 times the length of the original
876 deltas are limited to about 2 times the length of the original
847 version data. This makes retrieval of a version proportional to
877 version data. This makes retrieval of a version proportional to
848 its size, or O(1) relative to the number of revisions.
878 its size, or O(1) relative to the number of revisions.
849
879
850 Both pieces of the revlog are written to in an append-only
880 Both pieces of the revlog are written to in an append-only
851 fashion, which means we never need to rewrite a file to insert or
881 fashion, which means we never need to rewrite a file to insert or
852 remove data, and can use some simple techniques to avoid the need
882 remove data, and can use some simple techniques to avoid the need
853 for locking while reading.
883 for locking while reading.
854
884
855 If checkambig, indexfile is opened with checkambig=True at
885 If checkambig, indexfile is opened with checkambig=True at
856 writing, to avoid file stat ambiguity.
886 writing, to avoid file stat ambiguity.
857
887
858 If mmaplargeindex is True, and an mmapindexthreshold is set, the
888 If mmaplargeindex is True, and an mmapindexthreshold is set, the
859 index will be mmapped rather than read if it is larger than the
889 index will be mmapped rather than read if it is larger than the
860 configured threshold.
890 configured threshold.
861
891
862 If censorable is True, the revlog can have censored revisions.
892 If censorable is True, the revlog can have censored revisions.
863 """
893 """
864 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
894 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
865 mmaplargeindex=False, censorable=False):
895 mmaplargeindex=False, censorable=False):
866 """
896 """
867 create a revlog object
897 create a revlog object
868
898
869 opener is a function that abstracts the file opening operation
899 opener is a function that abstracts the file opening operation
870 and can be used to implement COW semantics or the like.
900 and can be used to implement COW semantics or the like.
871 """
901 """
872 self.indexfile = indexfile
902 self.indexfile = indexfile
873 self.datafile = datafile or (indexfile[:-2] + ".d")
903 self.datafile = datafile or (indexfile[:-2] + ".d")
874 self.opener = opener
904 self.opener = opener
875 # When True, indexfile is opened with checkambig=True at writing, to
905 # When True, indexfile is opened with checkambig=True at writing, to
876 # avoid file stat ambiguity.
906 # avoid file stat ambiguity.
877 self._checkambig = checkambig
907 self._checkambig = checkambig
878 self._censorable = censorable
908 self._censorable = censorable
879 # 3-tuple of (node, rev, text) for a raw revision.
909 # 3-tuple of (node, rev, text) for a raw revision.
880 self._cache = None
910 self._cache = None
881 # Maps rev to chain base rev.
911 # Maps rev to chain base rev.
882 self._chainbasecache = util.lrucachedict(100)
912 self._chainbasecache = util.lrucachedict(100)
883 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
913 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
884 self._chunkcache = (0, '')
914 self._chunkcache = (0, '')
885 # How much data to read and cache into the raw revlog data cache.
915 # How much data to read and cache into the raw revlog data cache.
886 self._chunkcachesize = 65536
916 self._chunkcachesize = 65536
887 self._maxchainlen = None
917 self._maxchainlen = None
888 self._aggressivemergedeltas = True
918 self._aggressivemergedeltas = True
889 self.index = []
919 self.index = []
890 # Mapping of partial identifiers to full nodes.
920 # Mapping of partial identifiers to full nodes.
891 self._pcache = {}
921 self._pcache = {}
892 # Mapping of revision integer to full node.
922 # Mapping of revision integer to full node.
893 self._nodecache = {nullid: nullrev}
923 self._nodecache = {nullid: nullrev}
894 self._nodepos = None
924 self._nodepos = None
895 self._compengine = 'zlib'
925 self._compengine = 'zlib'
896 self._maxdeltachainspan = -1
926 self._maxdeltachainspan = -1
897 self._withsparseread = False
927 self._withsparseread = False
898 self._sparserevlog = False
928 self._sparserevlog = False
899 self._srdensitythreshold = 0.50
929 self._srdensitythreshold = 0.50
900 self._srmingapsize = 262144
930 self._srmingapsize = 262144
901
931
902 mmapindexthreshold = None
932 mmapindexthreshold = None
903 v = REVLOG_DEFAULT_VERSION
933 v = REVLOG_DEFAULT_VERSION
904 opts = getattr(opener, 'options', None)
934 opts = getattr(opener, 'options', None)
905 if opts is not None:
935 if opts is not None:
906 if 'revlogv2' in opts:
936 if 'revlogv2' in opts:
907 # version 2 revlogs always use generaldelta.
937 # version 2 revlogs always use generaldelta.
908 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
938 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
909 elif 'revlogv1' in opts:
939 elif 'revlogv1' in opts:
910 if 'generaldelta' in opts:
940 if 'generaldelta' in opts:
911 v |= FLAG_GENERALDELTA
941 v |= FLAG_GENERALDELTA
912 else:
942 else:
913 v = 0
943 v = 0
914 if 'chunkcachesize' in opts:
944 if 'chunkcachesize' in opts:
915 self._chunkcachesize = opts['chunkcachesize']
945 self._chunkcachesize = opts['chunkcachesize']
916 if 'maxchainlen' in opts:
946 if 'maxchainlen' in opts:
917 self._maxchainlen = opts['maxchainlen']
947 self._maxchainlen = opts['maxchainlen']
918 if 'aggressivemergedeltas' in opts:
948 if 'aggressivemergedeltas' in opts:
919 self._aggressivemergedeltas = opts['aggressivemergedeltas']
949 self._aggressivemergedeltas = opts['aggressivemergedeltas']
920 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
950 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
921 if 'compengine' in opts:
951 if 'compengine' in opts:
922 self._compengine = opts['compengine']
952 self._compengine = opts['compengine']
923 if 'maxdeltachainspan' in opts:
953 if 'maxdeltachainspan' in opts:
924 self._maxdeltachainspan = opts['maxdeltachainspan']
954 self._maxdeltachainspan = opts['maxdeltachainspan']
925 if mmaplargeindex and 'mmapindexthreshold' in opts:
955 if mmaplargeindex and 'mmapindexthreshold' in opts:
926 mmapindexthreshold = opts['mmapindexthreshold']
956 mmapindexthreshold = opts['mmapindexthreshold']
927 self._sparserevlog = bool(opts.get('sparse-revlog', False))
957 self._sparserevlog = bool(opts.get('sparse-revlog', False))
928 withsparseread = bool(opts.get('with-sparse-read', False))
958 withsparseread = bool(opts.get('with-sparse-read', False))
929 # sparse-revlog forces sparse-read
959 # sparse-revlog forces sparse-read
930 self._withsparseread = self._sparserevlog or withsparseread
960 self._withsparseread = self._sparserevlog or withsparseread
931 if 'sparse-read-density-threshold' in opts:
961 if 'sparse-read-density-threshold' in opts:
932 self._srdensitythreshold = opts['sparse-read-density-threshold']
962 self._srdensitythreshold = opts['sparse-read-density-threshold']
933 if 'sparse-read-min-gap-size' in opts:
963 if 'sparse-read-min-gap-size' in opts:
934 self._srmingapsize = opts['sparse-read-min-gap-size']
964 self._srmingapsize = opts['sparse-read-min-gap-size']
935
965
936 if self._chunkcachesize <= 0:
966 if self._chunkcachesize <= 0:
937 raise RevlogError(_('revlog chunk cache size %r is not greater '
967 raise RevlogError(_('revlog chunk cache size %r is not greater '
938 'than 0') % self._chunkcachesize)
968 'than 0') % self._chunkcachesize)
939 elif self._chunkcachesize & (self._chunkcachesize - 1):
969 elif self._chunkcachesize & (self._chunkcachesize - 1):
940 raise RevlogError(_('revlog chunk cache size %r is not a power '
970 raise RevlogError(_('revlog chunk cache size %r is not a power '
941 'of 2') % self._chunkcachesize)
971 'of 2') % self._chunkcachesize)
942
972
943 indexdata = ''
973 indexdata = ''
944 self._initempty = True
974 self._initempty = True
945 try:
975 try:
946 with self._indexfp() as f:
976 with self._indexfp() as f:
947 if (mmapindexthreshold is not None and
977 if (mmapindexthreshold is not None and
948 self.opener.fstat(f).st_size >= mmapindexthreshold):
978 self.opener.fstat(f).st_size >= mmapindexthreshold):
949 indexdata = util.buffer(util.mmapread(f))
979 indexdata = util.buffer(util.mmapread(f))
950 else:
980 else:
951 indexdata = f.read()
981 indexdata = f.read()
952 if len(indexdata) > 0:
982 if len(indexdata) > 0:
953 v = versionformat_unpack(indexdata[:4])[0]
983 v = versionformat_unpack(indexdata[:4])[0]
954 self._initempty = False
984 self._initempty = False
955 except IOError as inst:
985 except IOError as inst:
956 if inst.errno != errno.ENOENT:
986 if inst.errno != errno.ENOENT:
957 raise
987 raise
958
988
959 self.version = v
989 self.version = v
960 self._inline = v & FLAG_INLINE_DATA
990 self._inline = v & FLAG_INLINE_DATA
961 self._generaldelta = v & FLAG_GENERALDELTA
991 self._generaldelta = v & FLAG_GENERALDELTA
962 flags = v & ~0xFFFF
992 flags = v & ~0xFFFF
963 fmt = v & 0xFFFF
993 fmt = v & 0xFFFF
964 if fmt == REVLOGV0:
994 if fmt == REVLOGV0:
965 if flags:
995 if flags:
966 raise RevlogError(_('unknown flags (%#04x) in version %d '
996 raise RevlogError(_('unknown flags (%#04x) in version %d '
967 'revlog %s') %
997 'revlog %s') %
968 (flags >> 16, fmt, self.indexfile))
998 (flags >> 16, fmt, self.indexfile))
969 elif fmt == REVLOGV1:
999 elif fmt == REVLOGV1:
970 if flags & ~REVLOGV1_FLAGS:
1000 if flags & ~REVLOGV1_FLAGS:
971 raise RevlogError(_('unknown flags (%#04x) in version %d '
1001 raise RevlogError(_('unknown flags (%#04x) in version %d '
972 'revlog %s') %
1002 'revlog %s') %
973 (flags >> 16, fmt, self.indexfile))
1003 (flags >> 16, fmt, self.indexfile))
974 elif fmt == REVLOGV2:
1004 elif fmt == REVLOGV2:
975 if flags & ~REVLOGV2_FLAGS:
1005 if flags & ~REVLOGV2_FLAGS:
976 raise RevlogError(_('unknown flags (%#04x) in version %d '
1006 raise RevlogError(_('unknown flags (%#04x) in version %d '
977 'revlog %s') %
1007 'revlog %s') %
978 (flags >> 16, fmt, self.indexfile))
1008 (flags >> 16, fmt, self.indexfile))
979 else:
1009 else:
980 raise RevlogError(_('unknown version (%d) in revlog %s') %
1010 raise RevlogError(_('unknown version (%d) in revlog %s') %
981 (fmt, self.indexfile))
1011 (fmt, self.indexfile))
982
1012
983 self.storedeltachains = True
1013 self.storedeltachains = True
984
1014
985 self._io = revlogio()
1015 self._io = revlogio()
986 if self.version == REVLOGV0:
1016 if self.version == REVLOGV0:
987 self._io = revlogoldio()
1017 self._io = revlogoldio()
988 try:
1018 try:
989 d = self._io.parseindex(indexdata, self._inline)
1019 d = self._io.parseindex(indexdata, self._inline)
990 except (ValueError, IndexError):
1020 except (ValueError, IndexError):
991 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1021 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
992 self.index, nodemap, self._chunkcache = d
1022 self.index, nodemap, self._chunkcache = d
993 if nodemap is not None:
1023 if nodemap is not None:
994 self.nodemap = self._nodecache = nodemap
1024 self.nodemap = self._nodecache = nodemap
995 if not self._chunkcache:
1025 if not self._chunkcache:
996 self._chunkclear()
1026 self._chunkclear()
997 # revnum -> (chain-length, sum-delta-length)
1027 # revnum -> (chain-length, sum-delta-length)
998 self._chaininfocache = {}
1028 self._chaininfocache = {}
999 # revlog header -> revlog compressor
1029 # revlog header -> revlog compressor
1000 self._decompressors = {}
1030 self._decompressors = {}
1001
1031
1002 @util.propertycache
1032 @util.propertycache
1003 def _compressor(self):
1033 def _compressor(self):
1004 return util.compengines[self._compengine].revlogcompressor()
1034 return util.compengines[self._compengine].revlogcompressor()
1005
1035
1006 def _indexfp(self, mode='r'):
1036 def _indexfp(self, mode='r'):
1007 """file object for the revlog's index file"""
1037 """file object for the revlog's index file"""
1008 args = {r'mode': mode}
1038 args = {r'mode': mode}
1009 if mode != 'r':
1039 if mode != 'r':
1010 args[r'checkambig'] = self._checkambig
1040 args[r'checkambig'] = self._checkambig
1011 if mode == 'w':
1041 if mode == 'w':
1012 args[r'atomictemp'] = True
1042 args[r'atomictemp'] = True
1013 return self.opener(self.indexfile, **args)
1043 return self.opener(self.indexfile, **args)
1014
1044
1015 def _datafp(self, mode='r'):
1045 def _datafp(self, mode='r'):
1016 """file object for the revlog's data file"""
1046 """file object for the revlog's data file"""
1017 return self.opener(self.datafile, mode=mode)
1047 return self.opener(self.datafile, mode=mode)
1018
1048
1019 @contextlib.contextmanager
1049 @contextlib.contextmanager
1020 def _datareadfp(self, existingfp=None):
1050 def _datareadfp(self, existingfp=None):
1021 """file object suitable to read data"""
1051 """file object suitable to read data"""
1022 if existingfp is not None:
1052 if existingfp is not None:
1023 yield existingfp
1053 yield existingfp
1024 else:
1054 else:
1025 if self._inline:
1055 if self._inline:
1026 func = self._indexfp
1056 func = self._indexfp
1027 else:
1057 else:
1028 func = self._datafp
1058 func = self._datafp
1029 with func() as fp:
1059 with func() as fp:
1030 yield fp
1060 yield fp
1031
1061
1032 def tip(self):
1062 def tip(self):
1033 return self.node(len(self.index) - 2)
1063 return self.node(len(self.index) - 2)
1034 def __contains__(self, rev):
1064 def __contains__(self, rev):
1035 return 0 <= rev < len(self)
1065 return 0 <= rev < len(self)
1036 def __len__(self):
1066 def __len__(self):
1037 return len(self.index) - 1
1067 return len(self.index) - 1
1038 def __iter__(self):
1068 def __iter__(self):
1039 return iter(xrange(len(self)))
1069 return iter(xrange(len(self)))
1040 def revs(self, start=0, stop=None):
1070 def revs(self, start=0, stop=None):
1041 """iterate over all rev in this revlog (from start to stop)"""
1071 """iterate over all rev in this revlog (from start to stop)"""
1042 step = 1
1072 step = 1
1043 if stop is not None:
1073 if stop is not None:
1044 if start > stop:
1074 if start > stop:
1045 step = -1
1075 step = -1
1046 stop += step
1076 stop += step
1047 else:
1077 else:
1048 stop = len(self)
1078 stop = len(self)
1049 return xrange(start, stop, step)
1079 return xrange(start, stop, step)
1050
1080
1051 @util.propertycache
1081 @util.propertycache
1052 def nodemap(self):
1082 def nodemap(self):
1053 self.rev(self.node(0))
1083 self.rev(self.node(0))
1054 return self._nodecache
1084 return self._nodecache
1055
1085
1056 def hasnode(self, node):
1086 def hasnode(self, node):
1057 try:
1087 try:
1058 self.rev(node)
1088 self.rev(node)
1059 return True
1089 return True
1060 except KeyError:
1090 except KeyError:
1061 return False
1091 return False
1062
1092
1063 def candelta(self, baserev, rev):
1093 def candelta(self, baserev, rev):
1064 """whether two revisions (baserev, rev) can be delta-ed or not"""
1094 """whether two revisions (baserev, rev) can be delta-ed or not"""
1065 # Disable delta if either rev requires a content-changing flag
1095 # Disable delta if either rev requires a content-changing flag
1066 # processor (ex. LFS). This is because such flag processor can alter
1096 # processor (ex. LFS). This is because such flag processor can alter
1067 # the rawtext content that the delta will be based on, and two clients
1097 # the rawtext content that the delta will be based on, and two clients
1068 # could have a same revlog node with different flags (i.e. different
1098 # could have a same revlog node with different flags (i.e. different
1069 # rawtext contents) and the delta could be incompatible.
1099 # rawtext contents) and the delta could be incompatible.
1070 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1100 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1071 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1101 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1072 return False
1102 return False
1073 return True
1103 return True
1074
1104
1075 def clearcaches(self):
1105 def clearcaches(self):
1076 self._cache = None
1106 self._cache = None
1077 self._chainbasecache.clear()
1107 self._chainbasecache.clear()
1078 self._chunkcache = (0, '')
1108 self._chunkcache = (0, '')
1079 self._pcache = {}
1109 self._pcache = {}
1080
1110
1081 try:
1111 try:
1082 self._nodecache.clearcaches()
1112 self._nodecache.clearcaches()
1083 except AttributeError:
1113 except AttributeError:
1084 self._nodecache = {nullid: nullrev}
1114 self._nodecache = {nullid: nullrev}
1085 self._nodepos = None
1115 self._nodepos = None
1086
1116
1087 def rev(self, node):
1117 def rev(self, node):
1088 try:
1118 try:
1089 return self._nodecache[node]
1119 return self._nodecache[node]
1090 except TypeError:
1120 except TypeError:
1091 raise
1121 raise
1092 except RevlogError:
1122 except RevlogError:
1093 # parsers.c radix tree lookup failed
1123 # parsers.c radix tree lookup failed
1094 if node == wdirid or node in wdirfilenodeids:
1124 if node == wdirid or node in wdirfilenodeids:
1095 raise error.WdirUnsupported
1125 raise error.WdirUnsupported
1096 raise LookupError(node, self.indexfile, _('no node'))
1126 raise LookupError(node, self.indexfile, _('no node'))
1097 except KeyError:
1127 except KeyError:
1098 # pure python cache lookup failed
1128 # pure python cache lookup failed
1099 n = self._nodecache
1129 n = self._nodecache
1100 i = self.index
1130 i = self.index
1101 p = self._nodepos
1131 p = self._nodepos
1102 if p is None:
1132 if p is None:
1103 p = len(i) - 2
1133 p = len(i) - 2
1104 else:
1134 else:
1105 assert p < len(i)
1135 assert p < len(i)
1106 for r in xrange(p, -1, -1):
1136 for r in xrange(p, -1, -1):
1107 v = i[r][7]
1137 v = i[r][7]
1108 n[v] = r
1138 n[v] = r
1109 if v == node:
1139 if v == node:
1110 self._nodepos = r - 1
1140 self._nodepos = r - 1
1111 return r
1141 return r
1112 if node == wdirid or node in wdirfilenodeids:
1142 if node == wdirid or node in wdirfilenodeids:
1113 raise error.WdirUnsupported
1143 raise error.WdirUnsupported
1114 raise LookupError(node, self.indexfile, _('no node'))
1144 raise LookupError(node, self.indexfile, _('no node'))
1115
1145
1116 # Accessors for index entries.
1146 # Accessors for index entries.
1117
1147
1118 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1148 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1119 # are flags.
1149 # are flags.
1120 def start(self, rev):
1150 def start(self, rev):
1121 return int(self.index[rev][0] >> 16)
1151 return int(self.index[rev][0] >> 16)
1122
1152
1123 def flags(self, rev):
1153 def flags(self, rev):
1124 return self.index[rev][0] & 0xFFFF
1154 return self.index[rev][0] & 0xFFFF
1125
1155
1126 def length(self, rev):
1156 def length(self, rev):
1127 return self.index[rev][1]
1157 return self.index[rev][1]
1128
1158
1129 def rawsize(self, rev):
1159 def rawsize(self, rev):
1130 """return the length of the uncompressed text for a given revision"""
1160 """return the length of the uncompressed text for a given revision"""
1131 l = self.index[rev][2]
1161 l = self.index[rev][2]
1132 if l >= 0:
1162 if l >= 0:
1133 return l
1163 return l
1134
1164
1135 t = self.revision(rev, raw=True)
1165 t = self.revision(rev, raw=True)
1136 return len(t)
1166 return len(t)
1137
1167
1138 def size(self, rev):
1168 def size(self, rev):
1139 """length of non-raw text (processed by a "read" flag processor)"""
1169 """length of non-raw text (processed by a "read" flag processor)"""
1140 # fast path: if no "read" flag processor could change the content,
1170 # fast path: if no "read" flag processor could change the content,
1141 # size is rawsize. note: ELLIPSIS is known to not change the content.
1171 # size is rawsize. note: ELLIPSIS is known to not change the content.
1142 flags = self.flags(rev)
1172 flags = self.flags(rev)
1143 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1173 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1144 return self.rawsize(rev)
1174 return self.rawsize(rev)
1145
1175
1146 return len(self.revision(rev, raw=False))
1176 return len(self.revision(rev, raw=False))
1147
1177
1148 def chainbase(self, rev):
1178 def chainbase(self, rev):
1149 base = self._chainbasecache.get(rev)
1179 base = self._chainbasecache.get(rev)
1150 if base is not None:
1180 if base is not None:
1151 return base
1181 return base
1152
1182
1153 index = self.index
1183 index = self.index
1154 iterrev = rev
1184 iterrev = rev
1155 base = index[iterrev][3]
1185 base = index[iterrev][3]
1156 while base != iterrev:
1186 while base != iterrev:
1157 iterrev = base
1187 iterrev = base
1158 base = index[iterrev][3]
1188 base = index[iterrev][3]
1159
1189
1160 self._chainbasecache[rev] = base
1190 self._chainbasecache[rev] = base
1161 return base
1191 return base
1162
1192
1163 def linkrev(self, rev):
1193 def linkrev(self, rev):
1164 return self.index[rev][4]
1194 return self.index[rev][4]
1165
1195
1166 def parentrevs(self, rev):
1196 def parentrevs(self, rev):
1167 try:
1197 try:
1168 entry = self.index[rev]
1198 entry = self.index[rev]
1169 except IndexError:
1199 except IndexError:
1170 if rev == wdirrev:
1200 if rev == wdirrev:
1171 raise error.WdirUnsupported
1201 raise error.WdirUnsupported
1172 raise
1202 raise
1173
1203
1174 return entry[5], entry[6]
1204 return entry[5], entry[6]
1175
1205
1176 def node(self, rev):
1206 def node(self, rev):
1177 try:
1207 try:
1178 return self.index[rev][7]
1208 return self.index[rev][7]
1179 except IndexError:
1209 except IndexError:
1180 if rev == wdirrev:
1210 if rev == wdirrev:
1181 raise error.WdirUnsupported
1211 raise error.WdirUnsupported
1182 raise
1212 raise
1183
1213
1184 # Derived from index values.
1214 # Derived from index values.
1185
1215
1186 def end(self, rev):
1216 def end(self, rev):
1187 return self.start(rev) + self.length(rev)
1217 return self.start(rev) + self.length(rev)
1188
1218
1189 def parents(self, node):
1219 def parents(self, node):
1190 i = self.index
1220 i = self.index
1191 d = i[self.rev(node)]
1221 d = i[self.rev(node)]
1192 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1222 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1193
1223
1194 def chainlen(self, rev):
1224 def chainlen(self, rev):
1195 return self._chaininfo(rev)[0]
1225 return self._chaininfo(rev)[0]
1196
1226
1197 def _chaininfo(self, rev):
1227 def _chaininfo(self, rev):
1198 chaininfocache = self._chaininfocache
1228 chaininfocache = self._chaininfocache
1199 if rev in chaininfocache:
1229 if rev in chaininfocache:
1200 return chaininfocache[rev]
1230 return chaininfocache[rev]
1201 index = self.index
1231 index = self.index
1202 generaldelta = self._generaldelta
1232 generaldelta = self._generaldelta
1203 iterrev = rev
1233 iterrev = rev
1204 e = index[iterrev]
1234 e = index[iterrev]
1205 clen = 0
1235 clen = 0
1206 compresseddeltalen = 0
1236 compresseddeltalen = 0
1207 while iterrev != e[3]:
1237 while iterrev != e[3]:
1208 clen += 1
1238 clen += 1
1209 compresseddeltalen += e[1]
1239 compresseddeltalen += e[1]
1210 if generaldelta:
1240 if generaldelta:
1211 iterrev = e[3]
1241 iterrev = e[3]
1212 else:
1242 else:
1213 iterrev -= 1
1243 iterrev -= 1
1214 if iterrev in chaininfocache:
1244 if iterrev in chaininfocache:
1215 t = chaininfocache[iterrev]
1245 t = chaininfocache[iterrev]
1216 clen += t[0]
1246 clen += t[0]
1217 compresseddeltalen += t[1]
1247 compresseddeltalen += t[1]
1218 break
1248 break
1219 e = index[iterrev]
1249 e = index[iterrev]
1220 else:
1250 else:
1221 # Add text length of base since decompressing that also takes
1251 # Add text length of base since decompressing that also takes
1222 # work. For cache hits the length is already included.
1252 # work. For cache hits the length is already included.
1223 compresseddeltalen += e[1]
1253 compresseddeltalen += e[1]
1224 r = (clen, compresseddeltalen)
1254 r = (clen, compresseddeltalen)
1225 chaininfocache[rev] = r
1255 chaininfocache[rev] = r
1226 return r
1256 return r
1227
1257
1228 def _deltachain(self, rev, stoprev=None):
1258 def _deltachain(self, rev, stoprev=None):
1229 """Obtain the delta chain for a revision.
1259 """Obtain the delta chain for a revision.
1230
1260
1231 ``stoprev`` specifies a revision to stop at. If not specified, we
1261 ``stoprev`` specifies a revision to stop at. If not specified, we
1232 stop at the base of the chain.
1262 stop at the base of the chain.
1233
1263
1234 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1264 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1235 revs in ascending order and ``stopped`` is a bool indicating whether
1265 revs in ascending order and ``stopped`` is a bool indicating whether
1236 ``stoprev`` was hit.
1266 ``stoprev`` was hit.
1237 """
1267 """
1238 # Try C implementation.
1268 # Try C implementation.
1239 try:
1269 try:
1240 return self.index.deltachain(rev, stoprev, self._generaldelta)
1270 return self.index.deltachain(rev, stoprev, self._generaldelta)
1241 except AttributeError:
1271 except AttributeError:
1242 pass
1272 pass
1243
1273
1244 chain = []
1274 chain = []
1245
1275
1246 # Alias to prevent attribute lookup in tight loop.
1276 # Alias to prevent attribute lookup in tight loop.
1247 index = self.index
1277 index = self.index
1248 generaldelta = self._generaldelta
1278 generaldelta = self._generaldelta
1249
1279
1250 iterrev = rev
1280 iterrev = rev
1251 e = index[iterrev]
1281 e = index[iterrev]
1252 while iterrev != e[3] and iterrev != stoprev:
1282 while iterrev != e[3] and iterrev != stoprev:
1253 chain.append(iterrev)
1283 chain.append(iterrev)
1254 if generaldelta:
1284 if generaldelta:
1255 iterrev = e[3]
1285 iterrev = e[3]
1256 else:
1286 else:
1257 iterrev -= 1
1287 iterrev -= 1
1258 e = index[iterrev]
1288 e = index[iterrev]
1259
1289
1260 if iterrev == stoprev:
1290 if iterrev == stoprev:
1261 stopped = True
1291 stopped = True
1262 else:
1292 else:
1263 chain.append(iterrev)
1293 chain.append(iterrev)
1264 stopped = False
1294 stopped = False
1265
1295
1266 chain.reverse()
1296 chain.reverse()
1267 return chain, stopped
1297 return chain, stopped
1268
1298
1269 def ancestors(self, revs, stoprev=0, inclusive=False):
1299 def ancestors(self, revs, stoprev=0, inclusive=False):
1270 """Generate the ancestors of 'revs' in reverse topological order.
1300 """Generate the ancestors of 'revs' in reverse topological order.
1271 Does not generate revs lower than stoprev.
1301 Does not generate revs lower than stoprev.
1272
1302
1273 See the documentation for ancestor.lazyancestors for more details."""
1303 See the documentation for ancestor.lazyancestors for more details."""
1274
1304
1275 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1305 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1276 inclusive=inclusive)
1306 inclusive=inclusive)
1277
1307
1278 def descendants(self, revs):
1308 def descendants(self, revs):
1279 """Generate the descendants of 'revs' in revision order.
1309 """Generate the descendants of 'revs' in revision order.
1280
1310
1281 Yield a sequence of revision numbers starting with a child of
1311 Yield a sequence of revision numbers starting with a child of
1282 some rev in revs, i.e., each revision is *not* considered a
1312 some rev in revs, i.e., each revision is *not* considered a
1283 descendant of itself. Results are ordered by revision number (a
1313 descendant of itself. Results are ordered by revision number (a
1284 topological sort)."""
1314 topological sort)."""
1285 first = min(revs)
1315 first = min(revs)
1286 if first == nullrev:
1316 if first == nullrev:
1287 for i in self:
1317 for i in self:
1288 yield i
1318 yield i
1289 return
1319 return
1290
1320
1291 seen = set(revs)
1321 seen = set(revs)
1292 for i in self.revs(start=first + 1):
1322 for i in self.revs(start=first + 1):
1293 for x in self.parentrevs(i):
1323 for x in self.parentrevs(i):
1294 if x != nullrev and x in seen:
1324 if x != nullrev and x in seen:
1295 seen.add(i)
1325 seen.add(i)
1296 yield i
1326 yield i
1297 break
1327 break
1298
1328
1299 def findcommonmissing(self, common=None, heads=None):
1329 def findcommonmissing(self, common=None, heads=None):
1300 """Return a tuple of the ancestors of common and the ancestors of heads
1330 """Return a tuple of the ancestors of common and the ancestors of heads
1301 that are not ancestors of common. In revset terminology, we return the
1331 that are not ancestors of common. In revset terminology, we return the
1302 tuple:
1332 tuple:
1303
1333
1304 ::common, (::heads) - (::common)
1334 ::common, (::heads) - (::common)
1305
1335
1306 The list is sorted by revision number, meaning it is
1336 The list is sorted by revision number, meaning it is
1307 topologically sorted.
1337 topologically sorted.
1308
1338
1309 'heads' and 'common' are both lists of node IDs. If heads is
1339 'heads' and 'common' are both lists of node IDs. If heads is
1310 not supplied, uses all of the revlog's heads. If common is not
1340 not supplied, uses all of the revlog's heads. If common is not
1311 supplied, uses nullid."""
1341 supplied, uses nullid."""
1312 if common is None:
1342 if common is None:
1313 common = [nullid]
1343 common = [nullid]
1314 if heads is None:
1344 if heads is None:
1315 heads = self.heads()
1345 heads = self.heads()
1316
1346
1317 common = [self.rev(n) for n in common]
1347 common = [self.rev(n) for n in common]
1318 heads = [self.rev(n) for n in heads]
1348 heads = [self.rev(n) for n in heads]
1319
1349
1320 # we want the ancestors, but inclusive
1350 # we want the ancestors, but inclusive
1321 class lazyset(object):
1351 class lazyset(object):
1322 def __init__(self, lazyvalues):
1352 def __init__(self, lazyvalues):
1323 self.addedvalues = set()
1353 self.addedvalues = set()
1324 self.lazyvalues = lazyvalues
1354 self.lazyvalues = lazyvalues
1325
1355
1326 def __contains__(self, value):
1356 def __contains__(self, value):
1327 return value in self.addedvalues or value in self.lazyvalues
1357 return value in self.addedvalues or value in self.lazyvalues
1328
1358
1329 def __iter__(self):
1359 def __iter__(self):
1330 added = self.addedvalues
1360 added = self.addedvalues
1331 for r in added:
1361 for r in added:
1332 yield r
1362 yield r
1333 for r in self.lazyvalues:
1363 for r in self.lazyvalues:
1334 if not r in added:
1364 if not r in added:
1335 yield r
1365 yield r
1336
1366
1337 def add(self, value):
1367 def add(self, value):
1338 self.addedvalues.add(value)
1368 self.addedvalues.add(value)
1339
1369
1340 def update(self, values):
1370 def update(self, values):
1341 self.addedvalues.update(values)
1371 self.addedvalues.update(values)
1342
1372
1343 has = lazyset(self.ancestors(common))
1373 has = lazyset(self.ancestors(common))
1344 has.add(nullrev)
1374 has.add(nullrev)
1345 has.update(common)
1375 has.update(common)
1346
1376
1347 # take all ancestors from heads that aren't in has
1377 # take all ancestors from heads that aren't in has
1348 missing = set()
1378 missing = set()
1349 visit = collections.deque(r for r in heads if r not in has)
1379 visit = collections.deque(r for r in heads if r not in has)
1350 while visit:
1380 while visit:
1351 r = visit.popleft()
1381 r = visit.popleft()
1352 if r in missing:
1382 if r in missing:
1353 continue
1383 continue
1354 else:
1384 else:
1355 missing.add(r)
1385 missing.add(r)
1356 for p in self.parentrevs(r):
1386 for p in self.parentrevs(r):
1357 if p not in has:
1387 if p not in has:
1358 visit.append(p)
1388 visit.append(p)
1359 missing = list(missing)
1389 missing = list(missing)
1360 missing.sort()
1390 missing.sort()
1361 return has, [self.node(miss) for miss in missing]
1391 return has, [self.node(miss) for miss in missing]
1362
1392
1363 def incrementalmissingrevs(self, common=None):
1393 def incrementalmissingrevs(self, common=None):
1364 """Return an object that can be used to incrementally compute the
1394 """Return an object that can be used to incrementally compute the
1365 revision numbers of the ancestors of arbitrary sets that are not
1395 revision numbers of the ancestors of arbitrary sets that are not
1366 ancestors of common. This is an ancestor.incrementalmissingancestors
1396 ancestors of common. This is an ancestor.incrementalmissingancestors
1367 object.
1397 object.
1368
1398
1369 'common' is a list of revision numbers. If common is not supplied, uses
1399 'common' is a list of revision numbers. If common is not supplied, uses
1370 nullrev.
1400 nullrev.
1371 """
1401 """
1372 if common is None:
1402 if common is None:
1373 common = [nullrev]
1403 common = [nullrev]
1374
1404
1375 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1405 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1376
1406
1377 def findmissingrevs(self, common=None, heads=None):
1407 def findmissingrevs(self, common=None, heads=None):
1378 """Return the revision numbers of the ancestors of heads that
1408 """Return the revision numbers of the ancestors of heads that
1379 are not ancestors of common.
1409 are not ancestors of common.
1380
1410
1381 More specifically, return a list of revision numbers corresponding to
1411 More specifically, return a list of revision numbers corresponding to
1382 nodes N such that every N satisfies the following constraints:
1412 nodes N such that every N satisfies the following constraints:
1383
1413
1384 1. N is an ancestor of some node in 'heads'
1414 1. N is an ancestor of some node in 'heads'
1385 2. N is not an ancestor of any node in 'common'
1415 2. N is not an ancestor of any node in 'common'
1386
1416
1387 The list is sorted by revision number, meaning it is
1417 The list is sorted by revision number, meaning it is
1388 topologically sorted.
1418 topologically sorted.
1389
1419
1390 'heads' and 'common' are both lists of revision numbers. If heads is
1420 'heads' and 'common' are both lists of revision numbers. If heads is
1391 not supplied, uses all of the revlog's heads. If common is not
1421 not supplied, uses all of the revlog's heads. If common is not
1392 supplied, uses nullid."""
1422 supplied, uses nullid."""
1393 if common is None:
1423 if common is None:
1394 common = [nullrev]
1424 common = [nullrev]
1395 if heads is None:
1425 if heads is None:
1396 heads = self.headrevs()
1426 heads = self.headrevs()
1397
1427
1398 inc = self.incrementalmissingrevs(common=common)
1428 inc = self.incrementalmissingrevs(common=common)
1399 return inc.missingancestors(heads)
1429 return inc.missingancestors(heads)
1400
1430
1401 def findmissing(self, common=None, heads=None):
1431 def findmissing(self, common=None, heads=None):
1402 """Return the ancestors of heads that are not ancestors of common.
1432 """Return the ancestors of heads that are not ancestors of common.
1403
1433
1404 More specifically, return a list of nodes N such that every N
1434 More specifically, return a list of nodes N such that every N
1405 satisfies the following constraints:
1435 satisfies the following constraints:
1406
1436
1407 1. N is an ancestor of some node in 'heads'
1437 1. N is an ancestor of some node in 'heads'
1408 2. N is not an ancestor of any node in 'common'
1438 2. N is not an ancestor of any node in 'common'
1409
1439
1410 The list is sorted by revision number, meaning it is
1440 The list is sorted by revision number, meaning it is
1411 topologically sorted.
1441 topologically sorted.
1412
1442
1413 'heads' and 'common' are both lists of node IDs. If heads is
1443 'heads' and 'common' are both lists of node IDs. If heads is
1414 not supplied, uses all of the revlog's heads. If common is not
1444 not supplied, uses all of the revlog's heads. If common is not
1415 supplied, uses nullid."""
1445 supplied, uses nullid."""
1416 if common is None:
1446 if common is None:
1417 common = [nullid]
1447 common = [nullid]
1418 if heads is None:
1448 if heads is None:
1419 heads = self.heads()
1449 heads = self.heads()
1420
1450
1421 common = [self.rev(n) for n in common]
1451 common = [self.rev(n) for n in common]
1422 heads = [self.rev(n) for n in heads]
1452 heads = [self.rev(n) for n in heads]
1423
1453
1424 inc = self.incrementalmissingrevs(common=common)
1454 inc = self.incrementalmissingrevs(common=common)
1425 return [self.node(r) for r in inc.missingancestors(heads)]
1455 return [self.node(r) for r in inc.missingancestors(heads)]
1426
1456
1427 def nodesbetween(self, roots=None, heads=None):
1457 def nodesbetween(self, roots=None, heads=None):
1428 """Return a topological path from 'roots' to 'heads'.
1458 """Return a topological path from 'roots' to 'heads'.
1429
1459
1430 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1460 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1431 topologically sorted list of all nodes N that satisfy both of
1461 topologically sorted list of all nodes N that satisfy both of
1432 these constraints:
1462 these constraints:
1433
1463
1434 1. N is a descendant of some node in 'roots'
1464 1. N is a descendant of some node in 'roots'
1435 2. N is an ancestor of some node in 'heads'
1465 2. N is an ancestor of some node in 'heads'
1436
1466
1437 Every node is considered to be both a descendant and an ancestor
1467 Every node is considered to be both a descendant and an ancestor
1438 of itself, so every reachable node in 'roots' and 'heads' will be
1468 of itself, so every reachable node in 'roots' and 'heads' will be
1439 included in 'nodes'.
1469 included in 'nodes'.
1440
1470
1441 'outroots' is the list of reachable nodes in 'roots', i.e., the
1471 'outroots' is the list of reachable nodes in 'roots', i.e., the
1442 subset of 'roots' that is returned in 'nodes'. Likewise,
1472 subset of 'roots' that is returned in 'nodes'. Likewise,
1443 'outheads' is the subset of 'heads' that is also in 'nodes'.
1473 'outheads' is the subset of 'heads' that is also in 'nodes'.
1444
1474
1445 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1475 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1446 unspecified, uses nullid as the only root. If 'heads' is
1476 unspecified, uses nullid as the only root. If 'heads' is
1447 unspecified, uses list of all of the revlog's heads."""
1477 unspecified, uses list of all of the revlog's heads."""
1448 nonodes = ([], [], [])
1478 nonodes = ([], [], [])
1449 if roots is not None:
1479 if roots is not None:
1450 roots = list(roots)
1480 roots = list(roots)
1451 if not roots:
1481 if not roots:
1452 return nonodes
1482 return nonodes
1453 lowestrev = min([self.rev(n) for n in roots])
1483 lowestrev = min([self.rev(n) for n in roots])
1454 else:
1484 else:
1455 roots = [nullid] # Everybody's a descendant of nullid
1485 roots = [nullid] # Everybody's a descendant of nullid
1456 lowestrev = nullrev
1486 lowestrev = nullrev
1457 if (lowestrev == nullrev) and (heads is None):
1487 if (lowestrev == nullrev) and (heads is None):
1458 # We want _all_ the nodes!
1488 # We want _all_ the nodes!
1459 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1489 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1460 if heads is None:
1490 if heads is None:
1461 # All nodes are ancestors, so the latest ancestor is the last
1491 # All nodes are ancestors, so the latest ancestor is the last
1462 # node.
1492 # node.
1463 highestrev = len(self) - 1
1493 highestrev = len(self) - 1
1464 # Set ancestors to None to signal that every node is an ancestor.
1494 # Set ancestors to None to signal that every node is an ancestor.
1465 ancestors = None
1495 ancestors = None
1466 # Set heads to an empty dictionary for later discovery of heads
1496 # Set heads to an empty dictionary for later discovery of heads
1467 heads = {}
1497 heads = {}
1468 else:
1498 else:
1469 heads = list(heads)
1499 heads = list(heads)
1470 if not heads:
1500 if not heads:
1471 return nonodes
1501 return nonodes
1472 ancestors = set()
1502 ancestors = set()
1473 # Turn heads into a dictionary so we can remove 'fake' heads.
1503 # Turn heads into a dictionary so we can remove 'fake' heads.
1474 # Also, later we will be using it to filter out the heads we can't
1504 # Also, later we will be using it to filter out the heads we can't
1475 # find from roots.
1505 # find from roots.
1476 heads = dict.fromkeys(heads, False)
1506 heads = dict.fromkeys(heads, False)
1477 # Start at the top and keep marking parents until we're done.
1507 # Start at the top and keep marking parents until we're done.
1478 nodestotag = set(heads)
1508 nodestotag = set(heads)
1479 # Remember where the top was so we can use it as a limit later.
1509 # Remember where the top was so we can use it as a limit later.
1480 highestrev = max([self.rev(n) for n in nodestotag])
1510 highestrev = max([self.rev(n) for n in nodestotag])
1481 while nodestotag:
1511 while nodestotag:
1482 # grab a node to tag
1512 # grab a node to tag
1483 n = nodestotag.pop()
1513 n = nodestotag.pop()
1484 # Never tag nullid
1514 # Never tag nullid
1485 if n == nullid:
1515 if n == nullid:
1486 continue
1516 continue
1487 # A node's revision number represents its place in a
1517 # A node's revision number represents its place in a
1488 # topologically sorted list of nodes.
1518 # topologically sorted list of nodes.
1489 r = self.rev(n)
1519 r = self.rev(n)
1490 if r >= lowestrev:
1520 if r >= lowestrev:
1491 if n not in ancestors:
1521 if n not in ancestors:
1492 # If we are possibly a descendant of one of the roots
1522 # If we are possibly a descendant of one of the roots
1493 # and we haven't already been marked as an ancestor
1523 # and we haven't already been marked as an ancestor
1494 ancestors.add(n) # Mark as ancestor
1524 ancestors.add(n) # Mark as ancestor
1495 # Add non-nullid parents to list of nodes to tag.
1525 # Add non-nullid parents to list of nodes to tag.
1496 nodestotag.update([p for p in self.parents(n) if
1526 nodestotag.update([p for p in self.parents(n) if
1497 p != nullid])
1527 p != nullid])
1498 elif n in heads: # We've seen it before, is it a fake head?
1528 elif n in heads: # We've seen it before, is it a fake head?
1499 # So it is, real heads should not be the ancestors of
1529 # So it is, real heads should not be the ancestors of
1500 # any other heads.
1530 # any other heads.
1501 heads.pop(n)
1531 heads.pop(n)
1502 if not ancestors:
1532 if not ancestors:
1503 return nonodes
1533 return nonodes
1504 # Now that we have our set of ancestors, we want to remove any
1534 # Now that we have our set of ancestors, we want to remove any
1505 # roots that are not ancestors.
1535 # roots that are not ancestors.
1506
1536
1507 # If one of the roots was nullid, everything is included anyway.
1537 # If one of the roots was nullid, everything is included anyway.
1508 if lowestrev > nullrev:
1538 if lowestrev > nullrev:
1509 # But, since we weren't, let's recompute the lowest rev to not
1539 # But, since we weren't, let's recompute the lowest rev to not
1510 # include roots that aren't ancestors.
1540 # include roots that aren't ancestors.
1511
1541
1512 # Filter out roots that aren't ancestors of heads
1542 # Filter out roots that aren't ancestors of heads
1513 roots = [root for root in roots if root in ancestors]
1543 roots = [root for root in roots if root in ancestors]
1514 # Recompute the lowest revision
1544 # Recompute the lowest revision
1515 if roots:
1545 if roots:
1516 lowestrev = min([self.rev(root) for root in roots])
1546 lowestrev = min([self.rev(root) for root in roots])
1517 else:
1547 else:
1518 # No more roots? Return empty list
1548 # No more roots? Return empty list
1519 return nonodes
1549 return nonodes
1520 else:
1550 else:
1521 # We are descending from nullid, and don't need to care about
1551 # We are descending from nullid, and don't need to care about
1522 # any other roots.
1552 # any other roots.
1523 lowestrev = nullrev
1553 lowestrev = nullrev
1524 roots = [nullid]
1554 roots = [nullid]
1525 # Transform our roots list into a set.
1555 # Transform our roots list into a set.
1526 descendants = set(roots)
1556 descendants = set(roots)
1527 # Also, keep the original roots so we can filter out roots that aren't
1557 # Also, keep the original roots so we can filter out roots that aren't
1528 # 'real' roots (i.e. are descended from other roots).
1558 # 'real' roots (i.e. are descended from other roots).
1529 roots = descendants.copy()
1559 roots = descendants.copy()
1530 # Our topologically sorted list of output nodes.
1560 # Our topologically sorted list of output nodes.
1531 orderedout = []
1561 orderedout = []
1532 # Don't start at nullid since we don't want nullid in our output list,
1562 # Don't start at nullid since we don't want nullid in our output list,
1533 # and if nullid shows up in descendants, empty parents will look like
1563 # and if nullid shows up in descendants, empty parents will look like
1534 # they're descendants.
1564 # they're descendants.
1535 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1565 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1536 n = self.node(r)
1566 n = self.node(r)
1537 isdescendant = False
1567 isdescendant = False
1538 if lowestrev == nullrev: # Everybody is a descendant of nullid
1568 if lowestrev == nullrev: # Everybody is a descendant of nullid
1539 isdescendant = True
1569 isdescendant = True
1540 elif n in descendants:
1570 elif n in descendants:
1541 # n is already a descendant
1571 # n is already a descendant
1542 isdescendant = True
1572 isdescendant = True
1543 # This check only needs to be done here because all the roots
1573 # This check only needs to be done here because all the roots
1544 # will start being marked is descendants before the loop.
1574 # will start being marked is descendants before the loop.
1545 if n in roots:
1575 if n in roots:
1546 # If n was a root, check if it's a 'real' root.
1576 # If n was a root, check if it's a 'real' root.
1547 p = tuple(self.parents(n))
1577 p = tuple(self.parents(n))
1548 # If any of its parents are descendants, it's not a root.
1578 # If any of its parents are descendants, it's not a root.
1549 if (p[0] in descendants) or (p[1] in descendants):
1579 if (p[0] in descendants) or (p[1] in descendants):
1550 roots.remove(n)
1580 roots.remove(n)
1551 else:
1581 else:
1552 p = tuple(self.parents(n))
1582 p = tuple(self.parents(n))
1553 # A node is a descendant if either of its parents are
1583 # A node is a descendant if either of its parents are
1554 # descendants. (We seeded the dependents list with the roots
1584 # descendants. (We seeded the dependents list with the roots
1555 # up there, remember?)
1585 # up there, remember?)
1556 if (p[0] in descendants) or (p[1] in descendants):
1586 if (p[0] in descendants) or (p[1] in descendants):
1557 descendants.add(n)
1587 descendants.add(n)
1558 isdescendant = True
1588 isdescendant = True
1559 if isdescendant and ((ancestors is None) or (n in ancestors)):
1589 if isdescendant and ((ancestors is None) or (n in ancestors)):
1560 # Only include nodes that are both descendants and ancestors.
1590 # Only include nodes that are both descendants and ancestors.
1561 orderedout.append(n)
1591 orderedout.append(n)
1562 if (ancestors is not None) and (n in heads):
1592 if (ancestors is not None) and (n in heads):
1563 # We're trying to figure out which heads are reachable
1593 # We're trying to figure out which heads are reachable
1564 # from roots.
1594 # from roots.
1565 # Mark this head as having been reached
1595 # Mark this head as having been reached
1566 heads[n] = True
1596 heads[n] = True
1567 elif ancestors is None:
1597 elif ancestors is None:
1568 # Otherwise, we're trying to discover the heads.
1598 # Otherwise, we're trying to discover the heads.
1569 # Assume this is a head because if it isn't, the next step
1599 # Assume this is a head because if it isn't, the next step
1570 # will eventually remove it.
1600 # will eventually remove it.
1571 heads[n] = True
1601 heads[n] = True
1572 # But, obviously its parents aren't.
1602 # But, obviously its parents aren't.
1573 for p in self.parents(n):
1603 for p in self.parents(n):
1574 heads.pop(p, None)
1604 heads.pop(p, None)
1575 heads = [head for head, flag in heads.iteritems() if flag]
1605 heads = [head for head, flag in heads.iteritems() if flag]
1576 roots = list(roots)
1606 roots = list(roots)
1577 assert orderedout
1607 assert orderedout
1578 assert roots
1608 assert roots
1579 assert heads
1609 assert heads
1580 return (orderedout, roots, heads)
1610 return (orderedout, roots, heads)
1581
1611
1582 def headrevs(self):
1612 def headrevs(self):
1583 try:
1613 try:
1584 return self.index.headrevs()
1614 return self.index.headrevs()
1585 except AttributeError:
1615 except AttributeError:
1586 return self._headrevs()
1616 return self._headrevs()
1587
1617
1588 def computephases(self, roots):
1618 def computephases(self, roots):
1589 return self.index.computephasesmapsets(roots)
1619 return self.index.computephasesmapsets(roots)
1590
1620
1591 def _headrevs(self):
1621 def _headrevs(self):
1592 count = len(self)
1622 count = len(self)
1593 if not count:
1623 if not count:
1594 return [nullrev]
1624 return [nullrev]
1595 # we won't iter over filtered rev so nobody is a head at start
1625 # we won't iter over filtered rev so nobody is a head at start
1596 ishead = [0] * (count + 1)
1626 ishead = [0] * (count + 1)
1597 index = self.index
1627 index = self.index
1598 for r in self:
1628 for r in self:
1599 ishead[r] = 1 # I may be an head
1629 ishead[r] = 1 # I may be an head
1600 e = index[r]
1630 e = index[r]
1601 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1631 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1602 return [r for r, val in enumerate(ishead) if val]
1632 return [r for r, val in enumerate(ishead) if val]
1603
1633
1604 def heads(self, start=None, stop=None):
1634 def heads(self, start=None, stop=None):
1605 """return the list of all nodes that have no children
1635 """return the list of all nodes that have no children
1606
1636
1607 if start is specified, only heads that are descendants of
1637 if start is specified, only heads that are descendants of
1608 start will be returned
1638 start will be returned
1609 if stop is specified, it will consider all the revs from stop
1639 if stop is specified, it will consider all the revs from stop
1610 as if they had no children
1640 as if they had no children
1611 """
1641 """
1612 if start is None and stop is None:
1642 if start is None and stop is None:
1613 if not len(self):
1643 if not len(self):
1614 return [nullid]
1644 return [nullid]
1615 return [self.node(r) for r in self.headrevs()]
1645 return [self.node(r) for r in self.headrevs()]
1616
1646
1617 if start is None:
1647 if start is None:
1618 start = nullid
1648 start = nullid
1619 if stop is None:
1649 if stop is None:
1620 stop = []
1650 stop = []
1621 stoprevs = set([self.rev(n) for n in stop])
1651 stoprevs = set([self.rev(n) for n in stop])
1622 startrev = self.rev(start)
1652 startrev = self.rev(start)
1623 reachable = {startrev}
1653 reachable = {startrev}
1624 heads = {startrev}
1654 heads = {startrev}
1625
1655
1626 parentrevs = self.parentrevs
1656 parentrevs = self.parentrevs
1627 for r in self.revs(start=startrev + 1):
1657 for r in self.revs(start=startrev + 1):
1628 for p in parentrevs(r):
1658 for p in parentrevs(r):
1629 if p in reachable:
1659 if p in reachable:
1630 if r not in stoprevs:
1660 if r not in stoprevs:
1631 reachable.add(r)
1661 reachable.add(r)
1632 heads.add(r)
1662 heads.add(r)
1633 if p in heads and p not in stoprevs:
1663 if p in heads and p not in stoprevs:
1634 heads.remove(p)
1664 heads.remove(p)
1635
1665
1636 return [self.node(r) for r in heads]
1666 return [self.node(r) for r in heads]
1637
1667
1638 def children(self, node):
1668 def children(self, node):
1639 """find the children of a given node"""
1669 """find the children of a given node"""
1640 c = []
1670 c = []
1641 p = self.rev(node)
1671 p = self.rev(node)
1642 for r in self.revs(start=p + 1):
1672 for r in self.revs(start=p + 1):
1643 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1673 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1644 if prevs:
1674 if prevs:
1645 for pr in prevs:
1675 for pr in prevs:
1646 if pr == p:
1676 if pr == p:
1647 c.append(self.node(r))
1677 c.append(self.node(r))
1648 elif p == nullrev:
1678 elif p == nullrev:
1649 c.append(self.node(r))
1679 c.append(self.node(r))
1650 return c
1680 return c
1651
1681
1652 def commonancestorsheads(self, a, b):
1682 def commonancestorsheads(self, a, b):
1653 """calculate all the heads of the common ancestors of nodes a and b"""
1683 """calculate all the heads of the common ancestors of nodes a and b"""
1654 a, b = self.rev(a), self.rev(b)
1684 a, b = self.rev(a), self.rev(b)
1655 ancs = self._commonancestorsheads(a, b)
1685 ancs = self._commonancestorsheads(a, b)
1656 return pycompat.maplist(self.node, ancs)
1686 return pycompat.maplist(self.node, ancs)
1657
1687
1658 def _commonancestorsheads(self, *revs):
1688 def _commonancestorsheads(self, *revs):
1659 """calculate all the heads of the common ancestors of revs"""
1689 """calculate all the heads of the common ancestors of revs"""
1660 try:
1690 try:
1661 ancs = self.index.commonancestorsheads(*revs)
1691 ancs = self.index.commonancestorsheads(*revs)
1662 except (AttributeError, OverflowError): # C implementation failed
1692 except (AttributeError, OverflowError): # C implementation failed
1663 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1693 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1664 return ancs
1694 return ancs
1665
1695
1666 def isancestor(self, a, b):
1696 def isancestor(self, a, b):
1667 """return True if node a is an ancestor of node b
1697 """return True if node a is an ancestor of node b
1668
1698
1669 A revision is considered an ancestor of itself."""
1699 A revision is considered an ancestor of itself."""
1670 a, b = self.rev(a), self.rev(b)
1700 a, b = self.rev(a), self.rev(b)
1671 return self.isancestorrev(a, b)
1701 return self.isancestorrev(a, b)
1672
1702
1673 def descendant(self, a, b):
1703 def descendant(self, a, b):
1674 msg = (b'revlog.descendant is deprecated, use revlog.isancestorrev')
1704 msg = (b'revlog.descendant is deprecated, use revlog.isancestorrev')
1675 self._repo.ui.deprecwarn(msg, b'4.7')
1705 self._repo.ui.deprecwarn(msg, b'4.7')
1676 return self.isancestorrev(a, b)
1706 return self.isancestorrev(a, b)
1677
1707
1678 def isancestorrev(self, a, b):
1708 def isancestorrev(self, a, b):
1679 """return True if revision a is an ancestor of revision b
1709 """return True if revision a is an ancestor of revision b
1680
1710
1681 A revision is considered an ancestor of itself.
1711 A revision is considered an ancestor of itself.
1682
1712
1683 The implementation of this is trivial but the use of
1713 The implementation of this is trivial but the use of
1684 commonancestorsheads is not."""
1714 commonancestorsheads is not."""
1685 if a == nullrev:
1715 if a == nullrev:
1686 return True
1716 return True
1687 elif a == b:
1717 elif a == b:
1688 return True
1718 return True
1689 elif a > b:
1719 elif a > b:
1690 return False
1720 return False
1691 return a in self._commonancestorsheads(a, b)
1721 return a in self._commonancestorsheads(a, b)
1692
1722
1693 def ancestor(self, a, b):
1723 def ancestor(self, a, b):
1694 """calculate the "best" common ancestor of nodes a and b"""
1724 """calculate the "best" common ancestor of nodes a and b"""
1695
1725
1696 a, b = self.rev(a), self.rev(b)
1726 a, b = self.rev(a), self.rev(b)
1697 try:
1727 try:
1698 ancs = self.index.ancestors(a, b)
1728 ancs = self.index.ancestors(a, b)
1699 except (AttributeError, OverflowError):
1729 except (AttributeError, OverflowError):
1700 ancs = ancestor.ancestors(self.parentrevs, a, b)
1730 ancs = ancestor.ancestors(self.parentrevs, a, b)
1701 if ancs:
1731 if ancs:
1702 # choose a consistent winner when there's a tie
1732 # choose a consistent winner when there's a tie
1703 return min(map(self.node, ancs))
1733 return min(map(self.node, ancs))
1704 return nullid
1734 return nullid
1705
1735
1706 def _match(self, id):
1736 def _match(self, id):
1707 if isinstance(id, int):
1737 if isinstance(id, int):
1708 # rev
1738 # rev
1709 return self.node(id)
1739 return self.node(id)
1710 if len(id) == 20:
1740 if len(id) == 20:
1711 # possibly a binary node
1741 # possibly a binary node
1712 # odds of a binary node being all hex in ASCII are 1 in 10**25
1742 # odds of a binary node being all hex in ASCII are 1 in 10**25
1713 try:
1743 try:
1714 node = id
1744 node = id
1715 self.rev(node) # quick search the index
1745 self.rev(node) # quick search the index
1716 return node
1746 return node
1717 except LookupError:
1747 except LookupError:
1718 pass # may be partial hex id
1748 pass # may be partial hex id
1719 try:
1749 try:
1720 # str(rev)
1750 # str(rev)
1721 rev = int(id)
1751 rev = int(id)
1722 if "%d" % rev != id:
1752 if "%d" % rev != id:
1723 raise ValueError
1753 raise ValueError
1724 if rev < 0:
1754 if rev < 0:
1725 rev = len(self) + rev
1755 rev = len(self) + rev
1726 if rev < 0 or rev >= len(self):
1756 if rev < 0 or rev >= len(self):
1727 raise ValueError
1757 raise ValueError
1728 return self.node(rev)
1758 return self.node(rev)
1729 except (ValueError, OverflowError):
1759 except (ValueError, OverflowError):
1730 pass
1760 pass
1731 if len(id) == 40:
1761 if len(id) == 40:
1732 try:
1762 try:
1733 # a full hex nodeid?
1763 # a full hex nodeid?
1734 node = bin(id)
1764 node = bin(id)
1735 self.rev(node)
1765 self.rev(node)
1736 return node
1766 return node
1737 except (TypeError, LookupError):
1767 except (TypeError, LookupError):
1738 pass
1768 pass
1739
1769
1740 def _partialmatch(self, id):
1770 def _partialmatch(self, id):
1741 # we don't care wdirfilenodeids as they should be always full hash
1771 # we don't care wdirfilenodeids as they should be always full hash
1742 maybewdir = wdirhex.startswith(id)
1772 maybewdir = wdirhex.startswith(id)
1743 try:
1773 try:
1744 partial = self.index.partialmatch(id)
1774 partial = self.index.partialmatch(id)
1745 if partial and self.hasnode(partial):
1775 if partial and self.hasnode(partial):
1746 if maybewdir:
1776 if maybewdir:
1747 # single 'ff...' match in radix tree, ambiguous with wdir
1777 # single 'ff...' match in radix tree, ambiguous with wdir
1748 raise RevlogError
1778 raise RevlogError
1749 return partial
1779 return partial
1750 if maybewdir:
1780 if maybewdir:
1751 # no 'ff...' match in radix tree, wdir identified
1781 # no 'ff...' match in radix tree, wdir identified
1752 raise error.WdirUnsupported
1782 raise error.WdirUnsupported
1753 return None
1783 return None
1754 except RevlogError:
1784 except RevlogError:
1755 # parsers.c radix tree lookup gave multiple matches
1785 # parsers.c radix tree lookup gave multiple matches
1756 # fast path: for unfiltered changelog, radix tree is accurate
1786 # fast path: for unfiltered changelog, radix tree is accurate
1757 if not getattr(self, 'filteredrevs', None):
1787 if not getattr(self, 'filteredrevs', None):
1758 raise LookupError(id, self.indexfile,
1788 raise LookupError(id, self.indexfile,
1759 _('ambiguous identifier'))
1789 _('ambiguous identifier'))
1760 # fall through to slow path that filters hidden revisions
1790 # fall through to slow path that filters hidden revisions
1761 except (AttributeError, ValueError):
1791 except (AttributeError, ValueError):
1762 # we are pure python, or key was too short to search radix tree
1792 # we are pure python, or key was too short to search radix tree
1763 pass
1793 pass
1764
1794
1765 if id in self._pcache:
1795 if id in self._pcache:
1766 return self._pcache[id]
1796 return self._pcache[id]
1767
1797
1768 if len(id) <= 40:
1798 if len(id) <= 40:
1769 try:
1799 try:
1770 # hex(node)[:...]
1800 # hex(node)[:...]
1771 l = len(id) // 2 # grab an even number of digits
1801 l = len(id) // 2 # grab an even number of digits
1772 prefix = bin(id[:l * 2])
1802 prefix = bin(id[:l * 2])
1773 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1803 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1774 nl = [n for n in nl if hex(n).startswith(id) and
1804 nl = [n for n in nl if hex(n).startswith(id) and
1775 self.hasnode(n)]
1805 self.hasnode(n)]
1776 if len(nl) > 0:
1806 if len(nl) > 0:
1777 if len(nl) == 1 and not maybewdir:
1807 if len(nl) == 1 and not maybewdir:
1778 self._pcache[id] = nl[0]
1808 self._pcache[id] = nl[0]
1779 return nl[0]
1809 return nl[0]
1780 raise LookupError(id, self.indexfile,
1810 raise LookupError(id, self.indexfile,
1781 _('ambiguous identifier'))
1811 _('ambiguous identifier'))
1782 if maybewdir:
1812 if maybewdir:
1783 raise error.WdirUnsupported
1813 raise error.WdirUnsupported
1784 return None
1814 return None
1785 except TypeError:
1815 except TypeError:
1786 pass
1816 pass
1787
1817
1788 def lookup(self, id):
1818 def lookup(self, id):
1789 """locate a node based on:
1819 """locate a node based on:
1790 - revision number or str(revision number)
1820 - revision number or str(revision number)
1791 - nodeid or subset of hex nodeid
1821 - nodeid or subset of hex nodeid
1792 """
1822 """
1793 n = self._match(id)
1823 n = self._match(id)
1794 if n is not None:
1824 if n is not None:
1795 return n
1825 return n
1796 n = self._partialmatch(id)
1826 n = self._partialmatch(id)
1797 if n:
1827 if n:
1798 return n
1828 return n
1799
1829
1800 raise LookupError(id, self.indexfile, _('no match found'))
1830 raise LookupError(id, self.indexfile, _('no match found'))
1801
1831
1802 def shortest(self, node, minlength=1):
1832 def shortest(self, node, minlength=1):
1803 """Find the shortest unambiguous prefix that matches node."""
1833 """Find the shortest unambiguous prefix that matches node."""
1804 def isvalid(prefix):
1834 def isvalid(prefix):
1805 try:
1835 try:
1806 node = self._partialmatch(prefix)
1836 node = self._partialmatch(prefix)
1807 except error.RevlogError:
1837 except error.RevlogError:
1808 return False
1838 return False
1809 except error.WdirUnsupported:
1839 except error.WdirUnsupported:
1810 # single 'ff...' match
1840 # single 'ff...' match
1811 return True
1841 return True
1812 if node is None:
1842 if node is None:
1813 raise LookupError(node, self.indexfile, _('no node'))
1843 raise LookupError(node, self.indexfile, _('no node'))
1814 return True
1844 return True
1815
1845
1816 def maybewdir(prefix):
1846 def maybewdir(prefix):
1817 return all(c == 'f' for c in prefix)
1847 return all(c == 'f' for c in prefix)
1818
1848
1819 hexnode = hex(node)
1849 hexnode = hex(node)
1820
1850
1821 def disambiguate(hexnode, minlength):
1851 def disambiguate(hexnode, minlength):
1822 """Disambiguate against wdirid."""
1852 """Disambiguate against wdirid."""
1823 for length in range(minlength, 41):
1853 for length in range(minlength, 41):
1824 prefix = hexnode[:length]
1854 prefix = hexnode[:length]
1825 if not maybewdir(prefix):
1855 if not maybewdir(prefix):
1826 return prefix
1856 return prefix
1827
1857
1828 if not getattr(self, 'filteredrevs', None):
1858 if not getattr(self, 'filteredrevs', None):
1829 try:
1859 try:
1830 length = max(self.index.shortest(node), minlength)
1860 length = max(self.index.shortest(node), minlength)
1831 return disambiguate(hexnode, length)
1861 return disambiguate(hexnode, length)
1832 except RevlogError:
1862 except RevlogError:
1833 if node != wdirid:
1863 if node != wdirid:
1834 raise LookupError(node, self.indexfile, _('no node'))
1864 raise LookupError(node, self.indexfile, _('no node'))
1835 except AttributeError:
1865 except AttributeError:
1836 # Fall through to pure code
1866 # Fall through to pure code
1837 pass
1867 pass
1838
1868
1839 if node == wdirid:
1869 if node == wdirid:
1840 for length in range(minlength, 41):
1870 for length in range(minlength, 41):
1841 prefix = hexnode[:length]
1871 prefix = hexnode[:length]
1842 if isvalid(prefix):
1872 if isvalid(prefix):
1843 return prefix
1873 return prefix
1844
1874
1845 for length in range(minlength, 41):
1875 for length in range(minlength, 41):
1846 prefix = hexnode[:length]
1876 prefix = hexnode[:length]
1847 if isvalid(prefix):
1877 if isvalid(prefix):
1848 return disambiguate(hexnode, length)
1878 return disambiguate(hexnode, length)
1849
1879
1850 def cmp(self, node, text):
1880 def cmp(self, node, text):
1851 """compare text with a given file revision
1881 """compare text with a given file revision
1852
1882
1853 returns True if text is different than what is stored.
1883 returns True if text is different than what is stored.
1854 """
1884 """
1855 p1, p2 = self.parents(node)
1885 p1, p2 = self.parents(node)
1856 return hash(text, p1, p2) != node
1886 return hash(text, p1, p2) != node
1857
1887
1858 def _cachesegment(self, offset, data):
1888 def _cachesegment(self, offset, data):
1859 """Add a segment to the revlog cache.
1889 """Add a segment to the revlog cache.
1860
1890
1861 Accepts an absolute offset and the data that is at that location.
1891 Accepts an absolute offset and the data that is at that location.
1862 """
1892 """
1863 o, d = self._chunkcache
1893 o, d = self._chunkcache
1864 # try to add to existing cache
1894 # try to add to existing cache
1865 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1895 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1866 self._chunkcache = o, d + data
1896 self._chunkcache = o, d + data
1867 else:
1897 else:
1868 self._chunkcache = offset, data
1898 self._chunkcache = offset, data
1869
1899
1870 def _readsegment(self, offset, length, df=None):
1900 def _readsegment(self, offset, length, df=None):
1871 """Load a segment of raw data from the revlog.
1901 """Load a segment of raw data from the revlog.
1872
1902
1873 Accepts an absolute offset, length to read, and an optional existing
1903 Accepts an absolute offset, length to read, and an optional existing
1874 file handle to read from.
1904 file handle to read from.
1875
1905
1876 If an existing file handle is passed, it will be seeked and the
1906 If an existing file handle is passed, it will be seeked and the
1877 original seek position will NOT be restored.
1907 original seek position will NOT be restored.
1878
1908
1879 Returns a str or buffer of raw byte data.
1909 Returns a str or buffer of raw byte data.
1880 """
1910 """
1881 # Cache data both forward and backward around the requested
1911 # Cache data both forward and backward around the requested
1882 # data, in a fixed size window. This helps speed up operations
1912 # data, in a fixed size window. This helps speed up operations
1883 # involving reading the revlog backwards.
1913 # involving reading the revlog backwards.
1884 cachesize = self._chunkcachesize
1914 cachesize = self._chunkcachesize
1885 realoffset = offset & ~(cachesize - 1)
1915 realoffset = offset & ~(cachesize - 1)
1886 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1916 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1887 - realoffset)
1917 - realoffset)
1888 with self._datareadfp(df) as df:
1918 with self._datareadfp(df) as df:
1889 df.seek(realoffset)
1919 df.seek(realoffset)
1890 d = df.read(reallength)
1920 d = df.read(reallength)
1891 self._cachesegment(realoffset, d)
1921 self._cachesegment(realoffset, d)
1892 if offset != realoffset or reallength != length:
1922 if offset != realoffset or reallength != length:
1893 return util.buffer(d, offset - realoffset, length)
1923 return util.buffer(d, offset - realoffset, length)
1894 return d
1924 return d
1895
1925
1896 def _getsegment(self, offset, length, df=None):
1926 def _getsegment(self, offset, length, df=None):
1897 """Obtain a segment of raw data from the revlog.
1927 """Obtain a segment of raw data from the revlog.
1898
1928
1899 Accepts an absolute offset, length of bytes to obtain, and an
1929 Accepts an absolute offset, length of bytes to obtain, and an
1900 optional file handle to the already-opened revlog. If the file
1930 optional file handle to the already-opened revlog. If the file
1901 handle is used, it's original seek position will not be preserved.
1931 handle is used, it's original seek position will not be preserved.
1902
1932
1903 Requests for data may be returned from a cache.
1933 Requests for data may be returned from a cache.
1904
1934
1905 Returns a str or a buffer instance of raw byte data.
1935 Returns a str or a buffer instance of raw byte data.
1906 """
1936 """
1907 o, d = self._chunkcache
1937 o, d = self._chunkcache
1908 l = len(d)
1938 l = len(d)
1909
1939
1910 # is it in the cache?
1940 # is it in the cache?
1911 cachestart = offset - o
1941 cachestart = offset - o
1912 cacheend = cachestart + length
1942 cacheend = cachestart + length
1913 if cachestart >= 0 and cacheend <= l:
1943 if cachestart >= 0 and cacheend <= l:
1914 if cachestart == 0 and cacheend == l:
1944 if cachestart == 0 and cacheend == l:
1915 return d # avoid a copy
1945 return d # avoid a copy
1916 return util.buffer(d, cachestart, cacheend - cachestart)
1946 return util.buffer(d, cachestart, cacheend - cachestart)
1917
1947
1918 return self._readsegment(offset, length, df=df)
1948 return self._readsegment(offset, length, df=df)
1919
1949
1920 def _getsegmentforrevs(self, startrev, endrev, df=None):
1950 def _getsegmentforrevs(self, startrev, endrev, df=None):
1921 """Obtain a segment of raw data corresponding to a range of revisions.
1951 """Obtain a segment of raw data corresponding to a range of revisions.
1922
1952
1923 Accepts the start and end revisions and an optional already-open
1953 Accepts the start and end revisions and an optional already-open
1924 file handle to be used for reading. If the file handle is read, its
1954 file handle to be used for reading. If the file handle is read, its
1925 seek position will not be preserved.
1955 seek position will not be preserved.
1926
1956
1927 Requests for data may be satisfied by a cache.
1957 Requests for data may be satisfied by a cache.
1928
1958
1929 Returns a 2-tuple of (offset, data) for the requested range of
1959 Returns a 2-tuple of (offset, data) for the requested range of
1930 revisions. Offset is the integer offset from the beginning of the
1960 revisions. Offset is the integer offset from the beginning of the
1931 revlog and data is a str or buffer of the raw byte data.
1961 revlog and data is a str or buffer of the raw byte data.
1932
1962
1933 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1963 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1934 to determine where each revision's data begins and ends.
1964 to determine where each revision's data begins and ends.
1935 """
1965 """
1936 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1966 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1937 # (functions are expensive).
1967 # (functions are expensive).
1938 index = self.index
1968 index = self.index
1939 istart = index[startrev]
1969 istart = index[startrev]
1940 start = int(istart[0] >> 16)
1970 start = int(istart[0] >> 16)
1941 if startrev == endrev:
1971 if startrev == endrev:
1942 end = start + istart[1]
1972 end = start + istart[1]
1943 else:
1973 else:
1944 iend = index[endrev]
1974 iend = index[endrev]
1945 end = int(iend[0] >> 16) + iend[1]
1975 end = int(iend[0] >> 16) + iend[1]
1946
1976
1947 if self._inline:
1977 if self._inline:
1948 start += (startrev + 1) * self._io.size
1978 start += (startrev + 1) * self._io.size
1949 end += (endrev + 1) * self._io.size
1979 end += (endrev + 1) * self._io.size
1950 length = end - start
1980 length = end - start
1951
1981
1952 return start, self._getsegment(start, length, df=df)
1982 return start, self._getsegment(start, length, df=df)
1953
1983
1954 def _chunk(self, rev, df=None):
1984 def _chunk(self, rev, df=None):
1955 """Obtain a single decompressed chunk for a revision.
1985 """Obtain a single decompressed chunk for a revision.
1956
1986
1957 Accepts an integer revision and an optional already-open file handle
1987 Accepts an integer revision and an optional already-open file handle
1958 to be used for reading. If used, the seek position of the file will not
1988 to be used for reading. If used, the seek position of the file will not
1959 be preserved.
1989 be preserved.
1960
1990
1961 Returns a str holding uncompressed data for the requested revision.
1991 Returns a str holding uncompressed data for the requested revision.
1962 """
1992 """
1963 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1993 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1964
1994
1965 def _chunks(self, revs, df=None, targetsize=None):
1995 def _chunks(self, revs, df=None, targetsize=None):
1966 """Obtain decompressed chunks for the specified revisions.
1996 """Obtain decompressed chunks for the specified revisions.
1967
1997
1968 Accepts an iterable of numeric revisions that are assumed to be in
1998 Accepts an iterable of numeric revisions that are assumed to be in
1969 ascending order. Also accepts an optional already-open file handle
1999 ascending order. Also accepts an optional already-open file handle
1970 to be used for reading. If used, the seek position of the file will
2000 to be used for reading. If used, the seek position of the file will
1971 not be preserved.
2001 not be preserved.
1972
2002
1973 This function is similar to calling ``self._chunk()`` multiple times,
2003 This function is similar to calling ``self._chunk()`` multiple times,
1974 but is faster.
2004 but is faster.
1975
2005
1976 Returns a list with decompressed data for each requested revision.
2006 Returns a list with decompressed data for each requested revision.
1977 """
2007 """
1978 if not revs:
2008 if not revs:
1979 return []
2009 return []
1980 start = self.start
2010 start = self.start
1981 length = self.length
2011 length = self.length
1982 inline = self._inline
2012 inline = self._inline
1983 iosize = self._io.size
2013 iosize = self._io.size
1984 buffer = util.buffer
2014 buffer = util.buffer
1985
2015
1986 l = []
2016 l = []
1987 ladd = l.append
2017 ladd = l.append
1988
2018
1989 if not self._withsparseread:
2019 if not self._withsparseread:
1990 slicedchunks = (revs,)
2020 slicedchunks = (revs,)
1991 else:
2021 else:
1992 slicedchunks = _slicechunk(self, revs, targetsize)
2022 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
1993
2023
1994 for revschunk in slicedchunks:
2024 for revschunk in slicedchunks:
1995 firstrev = revschunk[0]
2025 firstrev = revschunk[0]
1996 # Skip trailing revisions with empty diff
2026 # Skip trailing revisions with empty diff
1997 for lastrev in revschunk[::-1]:
2027 for lastrev in revschunk[::-1]:
1998 if length(lastrev) != 0:
2028 if length(lastrev) != 0:
1999 break
2029 break
2000
2030
2001 try:
2031 try:
2002 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2032 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2003 except OverflowError:
2033 except OverflowError:
2004 # issue4215 - we can't cache a run of chunks greater than
2034 # issue4215 - we can't cache a run of chunks greater than
2005 # 2G on Windows
2035 # 2G on Windows
2006 return [self._chunk(rev, df=df) for rev in revschunk]
2036 return [self._chunk(rev, df=df) for rev in revschunk]
2007
2037
2008 decomp = self.decompress
2038 decomp = self.decompress
2009 for rev in revschunk:
2039 for rev in revschunk:
2010 chunkstart = start(rev)
2040 chunkstart = start(rev)
2011 if inline:
2041 if inline:
2012 chunkstart += (rev + 1) * iosize
2042 chunkstart += (rev + 1) * iosize
2013 chunklength = length(rev)
2043 chunklength = length(rev)
2014 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2044 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2015
2045
2016 return l
2046 return l
2017
2047
2018 def _chunkclear(self):
2048 def _chunkclear(self):
2019 """Clear the raw chunk cache."""
2049 """Clear the raw chunk cache."""
2020 self._chunkcache = (0, '')
2050 self._chunkcache = (0, '')
2021
2051
2022 def deltaparent(self, rev):
2052 def deltaparent(self, rev):
2023 """return deltaparent of the given revision"""
2053 """return deltaparent of the given revision"""
2024 base = self.index[rev][3]
2054 base = self.index[rev][3]
2025 if base == rev:
2055 if base == rev:
2026 return nullrev
2056 return nullrev
2027 elif self._generaldelta:
2057 elif self._generaldelta:
2028 return base
2058 return base
2029 else:
2059 else:
2030 return rev - 1
2060 return rev - 1
2031
2061
2032 def revdiff(self, rev1, rev2):
2062 def revdiff(self, rev1, rev2):
2033 """return or calculate a delta between two revisions
2063 """return or calculate a delta between two revisions
2034
2064
2035 The delta calculated is in binary form and is intended to be written to
2065 The delta calculated is in binary form and is intended to be written to
2036 revlog data directly. So this function needs raw revision data.
2066 revlog data directly. So this function needs raw revision data.
2037 """
2067 """
2038 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2068 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2039 return bytes(self._chunk(rev2))
2069 return bytes(self._chunk(rev2))
2040
2070
2041 return mdiff.textdiff(self.revision(rev1, raw=True),
2071 return mdiff.textdiff(self.revision(rev1, raw=True),
2042 self.revision(rev2, raw=True))
2072 self.revision(rev2, raw=True))
2043
2073
2044 def revision(self, nodeorrev, _df=None, raw=False):
2074 def revision(self, nodeorrev, _df=None, raw=False):
2045 """return an uncompressed revision of a given node or revision
2075 """return an uncompressed revision of a given node or revision
2046 number.
2076 number.
2047
2077
2048 _df - an existing file handle to read from. (internal-only)
2078 _df - an existing file handle to read from. (internal-only)
2049 raw - an optional argument specifying if the revision data is to be
2079 raw - an optional argument specifying if the revision data is to be
2050 treated as raw data when applying flag transforms. 'raw' should be set
2080 treated as raw data when applying flag transforms. 'raw' should be set
2051 to True when generating changegroups or in debug commands.
2081 to True when generating changegroups or in debug commands.
2052 """
2082 """
2053 if isinstance(nodeorrev, int):
2083 if isinstance(nodeorrev, int):
2054 rev = nodeorrev
2084 rev = nodeorrev
2055 node = self.node(rev)
2085 node = self.node(rev)
2056 else:
2086 else:
2057 node = nodeorrev
2087 node = nodeorrev
2058 rev = None
2088 rev = None
2059
2089
2060 cachedrev = None
2090 cachedrev = None
2061 flags = None
2091 flags = None
2062 rawtext = None
2092 rawtext = None
2063 if node == nullid:
2093 if node == nullid:
2064 return ""
2094 return ""
2065 if self._cache:
2095 if self._cache:
2066 if self._cache[0] == node:
2096 if self._cache[0] == node:
2067 # _cache only stores rawtext
2097 # _cache only stores rawtext
2068 if raw:
2098 if raw:
2069 return self._cache[2]
2099 return self._cache[2]
2070 # duplicated, but good for perf
2100 # duplicated, but good for perf
2071 if rev is None:
2101 if rev is None:
2072 rev = self.rev(node)
2102 rev = self.rev(node)
2073 if flags is None:
2103 if flags is None:
2074 flags = self.flags(rev)
2104 flags = self.flags(rev)
2075 # no extra flags set, no flag processor runs, text = rawtext
2105 # no extra flags set, no flag processor runs, text = rawtext
2076 if flags == REVIDX_DEFAULT_FLAGS:
2106 if flags == REVIDX_DEFAULT_FLAGS:
2077 return self._cache[2]
2107 return self._cache[2]
2078 # rawtext is reusable. need to run flag processor
2108 # rawtext is reusable. need to run flag processor
2079 rawtext = self._cache[2]
2109 rawtext = self._cache[2]
2080
2110
2081 cachedrev = self._cache[1]
2111 cachedrev = self._cache[1]
2082
2112
2083 # look up what we need to read
2113 # look up what we need to read
2084 if rawtext is None:
2114 if rawtext is None:
2085 if rev is None:
2115 if rev is None:
2086 rev = self.rev(node)
2116 rev = self.rev(node)
2087
2117
2088 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2118 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2089 if stopped:
2119 if stopped:
2090 rawtext = self._cache[2]
2120 rawtext = self._cache[2]
2091
2121
2092 # drop cache to save memory
2122 # drop cache to save memory
2093 self._cache = None
2123 self._cache = None
2094
2124
2095 targetsize = None
2125 targetsize = None
2096 rawsize = self.index[rev][2]
2126 rawsize = self.index[rev][2]
2097 if 0 <= rawsize:
2127 if 0 <= rawsize:
2098 targetsize = 4 * rawsize
2128 targetsize = 4 * rawsize
2099
2129
2100 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2130 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2101 if rawtext is None:
2131 if rawtext is None:
2102 rawtext = bytes(bins[0])
2132 rawtext = bytes(bins[0])
2103 bins = bins[1:]
2133 bins = bins[1:]
2104
2134
2105 rawtext = mdiff.patches(rawtext, bins)
2135 rawtext = mdiff.patches(rawtext, bins)
2106 self._cache = (node, rev, rawtext)
2136 self._cache = (node, rev, rawtext)
2107
2137
2108 if flags is None:
2138 if flags is None:
2109 if rev is None:
2139 if rev is None:
2110 rev = self.rev(node)
2140 rev = self.rev(node)
2111 flags = self.flags(rev)
2141 flags = self.flags(rev)
2112
2142
2113 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2143 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2114 if validatehash:
2144 if validatehash:
2115 self.checkhash(text, node, rev=rev)
2145 self.checkhash(text, node, rev=rev)
2116
2146
2117 return text
2147 return text
2118
2148
2119 def hash(self, text, p1, p2):
2149 def hash(self, text, p1, p2):
2120 """Compute a node hash.
2150 """Compute a node hash.
2121
2151
2122 Available as a function so that subclasses can replace the hash
2152 Available as a function so that subclasses can replace the hash
2123 as needed.
2153 as needed.
2124 """
2154 """
2125 return hash(text, p1, p2)
2155 return hash(text, p1, p2)
2126
2156
2127 def _processflags(self, text, flags, operation, raw=False):
2157 def _processflags(self, text, flags, operation, raw=False):
2128 """Inspect revision data flags and applies transforms defined by
2158 """Inspect revision data flags and applies transforms defined by
2129 registered flag processors.
2159 registered flag processors.
2130
2160
2131 ``text`` - the revision data to process
2161 ``text`` - the revision data to process
2132 ``flags`` - the revision flags
2162 ``flags`` - the revision flags
2133 ``operation`` - the operation being performed (read or write)
2163 ``operation`` - the operation being performed (read or write)
2134 ``raw`` - an optional argument describing if the raw transform should be
2164 ``raw`` - an optional argument describing if the raw transform should be
2135 applied.
2165 applied.
2136
2166
2137 This method processes the flags in the order (or reverse order if
2167 This method processes the flags in the order (or reverse order if
2138 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2168 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2139 flag processors registered for present flags. The order of flags defined
2169 flag processors registered for present flags. The order of flags defined
2140 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2170 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2141
2171
2142 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2172 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2143 processed text and ``validatehash`` is a bool indicating whether the
2173 processed text and ``validatehash`` is a bool indicating whether the
2144 returned text should be checked for hash integrity.
2174 returned text should be checked for hash integrity.
2145
2175
2146 Note: If the ``raw`` argument is set, it has precedence over the
2176 Note: If the ``raw`` argument is set, it has precedence over the
2147 operation and will only update the value of ``validatehash``.
2177 operation and will only update the value of ``validatehash``.
2148 """
2178 """
2149 # fast path: no flag processors will run
2179 # fast path: no flag processors will run
2150 if flags == 0:
2180 if flags == 0:
2151 return text, True
2181 return text, True
2152 if not operation in ('read', 'write'):
2182 if not operation in ('read', 'write'):
2153 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2183 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2154 # Check all flags are known.
2184 # Check all flags are known.
2155 if flags & ~REVIDX_KNOWN_FLAGS:
2185 if flags & ~REVIDX_KNOWN_FLAGS:
2156 raise RevlogError(_("incompatible revision flag '%#x'") %
2186 raise RevlogError(_("incompatible revision flag '%#x'") %
2157 (flags & ~REVIDX_KNOWN_FLAGS))
2187 (flags & ~REVIDX_KNOWN_FLAGS))
2158 validatehash = True
2188 validatehash = True
2159 # Depending on the operation (read or write), the order might be
2189 # Depending on the operation (read or write), the order might be
2160 # reversed due to non-commutative transforms.
2190 # reversed due to non-commutative transforms.
2161 orderedflags = REVIDX_FLAGS_ORDER
2191 orderedflags = REVIDX_FLAGS_ORDER
2162 if operation == 'write':
2192 if operation == 'write':
2163 orderedflags = reversed(orderedflags)
2193 orderedflags = reversed(orderedflags)
2164
2194
2165 for flag in orderedflags:
2195 for flag in orderedflags:
2166 # If a flagprocessor has been registered for a known flag, apply the
2196 # If a flagprocessor has been registered for a known flag, apply the
2167 # related operation transform and update result tuple.
2197 # related operation transform and update result tuple.
2168 if flag & flags:
2198 if flag & flags:
2169 vhash = True
2199 vhash = True
2170
2200
2171 if flag not in _flagprocessors:
2201 if flag not in _flagprocessors:
2172 message = _("missing processor for flag '%#x'") % (flag)
2202 message = _("missing processor for flag '%#x'") % (flag)
2173 raise RevlogError(message)
2203 raise RevlogError(message)
2174
2204
2175 processor = _flagprocessors[flag]
2205 processor = _flagprocessors[flag]
2176 if processor is not None:
2206 if processor is not None:
2177 readtransform, writetransform, rawtransform = processor
2207 readtransform, writetransform, rawtransform = processor
2178
2208
2179 if raw:
2209 if raw:
2180 vhash = rawtransform(self, text)
2210 vhash = rawtransform(self, text)
2181 elif operation == 'read':
2211 elif operation == 'read':
2182 text, vhash = readtransform(self, text)
2212 text, vhash = readtransform(self, text)
2183 else: # write operation
2213 else: # write operation
2184 text, vhash = writetransform(self, text)
2214 text, vhash = writetransform(self, text)
2185 validatehash = validatehash and vhash
2215 validatehash = validatehash and vhash
2186
2216
2187 return text, validatehash
2217 return text, validatehash
2188
2218
2189 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2219 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2190 """Check node hash integrity.
2220 """Check node hash integrity.
2191
2221
2192 Available as a function so that subclasses can extend hash mismatch
2222 Available as a function so that subclasses can extend hash mismatch
2193 behaviors as needed.
2223 behaviors as needed.
2194 """
2224 """
2195 try:
2225 try:
2196 if p1 is None and p2 is None:
2226 if p1 is None and p2 is None:
2197 p1, p2 = self.parents(node)
2227 p1, p2 = self.parents(node)
2198 if node != self.hash(text, p1, p2):
2228 if node != self.hash(text, p1, p2):
2199 revornode = rev
2229 revornode = rev
2200 if revornode is None:
2230 if revornode is None:
2201 revornode = templatefilters.short(hex(node))
2231 revornode = templatefilters.short(hex(node))
2202 raise RevlogError(_("integrity check failed on %s:%s")
2232 raise RevlogError(_("integrity check failed on %s:%s")
2203 % (self.indexfile, pycompat.bytestr(revornode)))
2233 % (self.indexfile, pycompat.bytestr(revornode)))
2204 except RevlogError:
2234 except RevlogError:
2205 if self._censorable and _censoredtext(text):
2235 if self._censorable and _censoredtext(text):
2206 raise error.CensoredNodeError(self.indexfile, node, text)
2236 raise error.CensoredNodeError(self.indexfile, node, text)
2207 raise
2237 raise
2208
2238
2209 def _enforceinlinesize(self, tr, fp=None):
2239 def _enforceinlinesize(self, tr, fp=None):
2210 """Check if the revlog is too big for inline and convert if so.
2240 """Check if the revlog is too big for inline and convert if so.
2211
2241
2212 This should be called after revisions are added to the revlog. If the
2242 This should be called after revisions are added to the revlog. If the
2213 revlog has grown too large to be an inline revlog, it will convert it
2243 revlog has grown too large to be an inline revlog, it will convert it
2214 to use multiple index and data files.
2244 to use multiple index and data files.
2215 """
2245 """
2216 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2246 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2217 return
2247 return
2218
2248
2219 trinfo = tr.find(self.indexfile)
2249 trinfo = tr.find(self.indexfile)
2220 if trinfo is None:
2250 if trinfo is None:
2221 raise RevlogError(_("%s not found in the transaction")
2251 raise RevlogError(_("%s not found in the transaction")
2222 % self.indexfile)
2252 % self.indexfile)
2223
2253
2224 trindex = trinfo[2]
2254 trindex = trinfo[2]
2225 if trindex is not None:
2255 if trindex is not None:
2226 dataoff = self.start(trindex)
2256 dataoff = self.start(trindex)
2227 else:
2257 else:
2228 # revlog was stripped at start of transaction, use all leftover data
2258 # revlog was stripped at start of transaction, use all leftover data
2229 trindex = len(self) - 1
2259 trindex = len(self) - 1
2230 dataoff = self.end(-2)
2260 dataoff = self.end(-2)
2231
2261
2232 tr.add(self.datafile, dataoff)
2262 tr.add(self.datafile, dataoff)
2233
2263
2234 if fp:
2264 if fp:
2235 fp.flush()
2265 fp.flush()
2236 fp.close()
2266 fp.close()
2237
2267
2238 with self._datafp('w') as df:
2268 with self._datafp('w') as df:
2239 for r in self:
2269 for r in self:
2240 df.write(self._getsegmentforrevs(r, r)[1])
2270 df.write(self._getsegmentforrevs(r, r)[1])
2241
2271
2242 with self._indexfp('w') as fp:
2272 with self._indexfp('w') as fp:
2243 self.version &= ~FLAG_INLINE_DATA
2273 self.version &= ~FLAG_INLINE_DATA
2244 self._inline = False
2274 self._inline = False
2245 io = self._io
2275 io = self._io
2246 for i in self:
2276 for i in self:
2247 e = io.packentry(self.index[i], self.node, self.version, i)
2277 e = io.packentry(self.index[i], self.node, self.version, i)
2248 fp.write(e)
2278 fp.write(e)
2249
2279
2250 # the temp file replace the real index when we exit the context
2280 # the temp file replace the real index when we exit the context
2251 # manager
2281 # manager
2252
2282
2253 tr.replace(self.indexfile, trindex * self._io.size)
2283 tr.replace(self.indexfile, trindex * self._io.size)
2254 self._chunkclear()
2284 self._chunkclear()
2255
2285
2256 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2286 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2257 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2287 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2258 """add a revision to the log
2288 """add a revision to the log
2259
2289
2260 text - the revision data to add
2290 text - the revision data to add
2261 transaction - the transaction object used for rollback
2291 transaction - the transaction object used for rollback
2262 link - the linkrev data to add
2292 link - the linkrev data to add
2263 p1, p2 - the parent nodeids of the revision
2293 p1, p2 - the parent nodeids of the revision
2264 cachedelta - an optional precomputed delta
2294 cachedelta - an optional precomputed delta
2265 node - nodeid of revision; typically node is not specified, and it is
2295 node - nodeid of revision; typically node is not specified, and it is
2266 computed by default as hash(text, p1, p2), however subclasses might
2296 computed by default as hash(text, p1, p2), however subclasses might
2267 use different hashing method (and override checkhash() in such case)
2297 use different hashing method (and override checkhash() in such case)
2268 flags - the known flags to set on the revision
2298 flags - the known flags to set on the revision
2269 deltacomputer - an optional _deltacomputer instance shared between
2299 deltacomputer - an optional _deltacomputer instance shared between
2270 multiple calls
2300 multiple calls
2271 """
2301 """
2272 if link == nullrev:
2302 if link == nullrev:
2273 raise RevlogError(_("attempted to add linkrev -1 to %s")
2303 raise RevlogError(_("attempted to add linkrev -1 to %s")
2274 % self.indexfile)
2304 % self.indexfile)
2275
2305
2276 if flags:
2306 if flags:
2277 node = node or self.hash(text, p1, p2)
2307 node = node or self.hash(text, p1, p2)
2278
2308
2279 rawtext, validatehash = self._processflags(text, flags, 'write')
2309 rawtext, validatehash = self._processflags(text, flags, 'write')
2280
2310
2281 # If the flag processor modifies the revision data, ignore any provided
2311 # If the flag processor modifies the revision data, ignore any provided
2282 # cachedelta.
2312 # cachedelta.
2283 if rawtext != text:
2313 if rawtext != text:
2284 cachedelta = None
2314 cachedelta = None
2285
2315
2286 if len(rawtext) > _maxentrysize:
2316 if len(rawtext) > _maxentrysize:
2287 raise RevlogError(
2317 raise RevlogError(
2288 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2318 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2289 % (self.indexfile, len(rawtext)))
2319 % (self.indexfile, len(rawtext)))
2290
2320
2291 node = node or self.hash(rawtext, p1, p2)
2321 node = node or self.hash(rawtext, p1, p2)
2292 if node in self.nodemap:
2322 if node in self.nodemap:
2293 return node
2323 return node
2294
2324
2295 if validatehash:
2325 if validatehash:
2296 self.checkhash(rawtext, node, p1=p1, p2=p2)
2326 self.checkhash(rawtext, node, p1=p1, p2=p2)
2297
2327
2298 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2328 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2299 flags, cachedelta=cachedelta,
2329 flags, cachedelta=cachedelta,
2300 deltacomputer=deltacomputer)
2330 deltacomputer=deltacomputer)
2301
2331
2302 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2332 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2303 cachedelta=None, deltacomputer=None):
2333 cachedelta=None, deltacomputer=None):
2304 """add a raw revision with known flags, node and parents
2334 """add a raw revision with known flags, node and parents
2305 useful when reusing a revision not stored in this revlog (ex: received
2335 useful when reusing a revision not stored in this revlog (ex: received
2306 over wire, or read from an external bundle).
2336 over wire, or read from an external bundle).
2307 """
2337 """
2308 dfh = None
2338 dfh = None
2309 if not self._inline:
2339 if not self._inline:
2310 dfh = self._datafp("a+")
2340 dfh = self._datafp("a+")
2311 ifh = self._indexfp("a+")
2341 ifh = self._indexfp("a+")
2312 try:
2342 try:
2313 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2343 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2314 flags, cachedelta, ifh, dfh,
2344 flags, cachedelta, ifh, dfh,
2315 deltacomputer=deltacomputer)
2345 deltacomputer=deltacomputer)
2316 finally:
2346 finally:
2317 if dfh:
2347 if dfh:
2318 dfh.close()
2348 dfh.close()
2319 ifh.close()
2349 ifh.close()
2320
2350
2321 def compress(self, data):
2351 def compress(self, data):
2322 """Generate a possibly-compressed representation of data."""
2352 """Generate a possibly-compressed representation of data."""
2323 if not data:
2353 if not data:
2324 return '', data
2354 return '', data
2325
2355
2326 compressed = self._compressor.compress(data)
2356 compressed = self._compressor.compress(data)
2327
2357
2328 if compressed:
2358 if compressed:
2329 # The revlog compressor added the header in the returned data.
2359 # The revlog compressor added the header in the returned data.
2330 return '', compressed
2360 return '', compressed
2331
2361
2332 if data[0:1] == '\0':
2362 if data[0:1] == '\0':
2333 return '', data
2363 return '', data
2334 return 'u', data
2364 return 'u', data
2335
2365
2336 def decompress(self, data):
2366 def decompress(self, data):
2337 """Decompress a revlog chunk.
2367 """Decompress a revlog chunk.
2338
2368
2339 The chunk is expected to begin with a header identifying the
2369 The chunk is expected to begin with a header identifying the
2340 format type so it can be routed to an appropriate decompressor.
2370 format type so it can be routed to an appropriate decompressor.
2341 """
2371 """
2342 if not data:
2372 if not data:
2343 return data
2373 return data
2344
2374
2345 # Revlogs are read much more frequently than they are written and many
2375 # Revlogs are read much more frequently than they are written and many
2346 # chunks only take microseconds to decompress, so performance is
2376 # chunks only take microseconds to decompress, so performance is
2347 # important here.
2377 # important here.
2348 #
2378 #
2349 # We can make a few assumptions about revlogs:
2379 # We can make a few assumptions about revlogs:
2350 #
2380 #
2351 # 1) the majority of chunks will be compressed (as opposed to inline
2381 # 1) the majority of chunks will be compressed (as opposed to inline
2352 # raw data).
2382 # raw data).
2353 # 2) decompressing *any* data will likely by at least 10x slower than
2383 # 2) decompressing *any* data will likely by at least 10x slower than
2354 # returning raw inline data.
2384 # returning raw inline data.
2355 # 3) we want to prioritize common and officially supported compression
2385 # 3) we want to prioritize common and officially supported compression
2356 # engines
2386 # engines
2357 #
2387 #
2358 # It follows that we want to optimize for "decompress compressed data
2388 # It follows that we want to optimize for "decompress compressed data
2359 # when encoded with common and officially supported compression engines"
2389 # when encoded with common and officially supported compression engines"
2360 # case over "raw data" and "data encoded by less common or non-official
2390 # case over "raw data" and "data encoded by less common or non-official
2361 # compression engines." That is why we have the inline lookup first
2391 # compression engines." That is why we have the inline lookup first
2362 # followed by the compengines lookup.
2392 # followed by the compengines lookup.
2363 #
2393 #
2364 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2394 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2365 # compressed chunks. And this matters for changelog and manifest reads.
2395 # compressed chunks. And this matters for changelog and manifest reads.
2366 t = data[0:1]
2396 t = data[0:1]
2367
2397
2368 if t == 'x':
2398 if t == 'x':
2369 try:
2399 try:
2370 return _zlibdecompress(data)
2400 return _zlibdecompress(data)
2371 except zlib.error as e:
2401 except zlib.error as e:
2372 raise RevlogError(_('revlog decompress error: %s') %
2402 raise RevlogError(_('revlog decompress error: %s') %
2373 stringutil.forcebytestr(e))
2403 stringutil.forcebytestr(e))
2374 # '\0' is more common than 'u' so it goes first.
2404 # '\0' is more common than 'u' so it goes first.
2375 elif t == '\0':
2405 elif t == '\0':
2376 return data
2406 return data
2377 elif t == 'u':
2407 elif t == 'u':
2378 return util.buffer(data, 1)
2408 return util.buffer(data, 1)
2379
2409
2380 try:
2410 try:
2381 compressor = self._decompressors[t]
2411 compressor = self._decompressors[t]
2382 except KeyError:
2412 except KeyError:
2383 try:
2413 try:
2384 engine = util.compengines.forrevlogheader(t)
2414 engine = util.compengines.forrevlogheader(t)
2385 compressor = engine.revlogcompressor()
2415 compressor = engine.revlogcompressor()
2386 self._decompressors[t] = compressor
2416 self._decompressors[t] = compressor
2387 except KeyError:
2417 except KeyError:
2388 raise RevlogError(_('unknown compression type %r') % t)
2418 raise RevlogError(_('unknown compression type %r') % t)
2389
2419
2390 return compressor.decompress(data)
2420 return compressor.decompress(data)
2391
2421
2392 def _isgooddeltainfo(self, deltainfo, revinfo):
2422 def _isgooddeltainfo(self, deltainfo, revinfo):
2393 """Returns True if the given delta is good. Good means that it is within
2423 """Returns True if the given delta is good. Good means that it is within
2394 the disk span, disk size, and chain length bounds that we know to be
2424 the disk span, disk size, and chain length bounds that we know to be
2395 performant."""
2425 performant."""
2396 if deltainfo is None:
2426 if deltainfo is None:
2397 return False
2427 return False
2398
2428
2399 # - 'deltainfo.distance' is the distance from the base revision --
2429 # - 'deltainfo.distance' is the distance from the base revision --
2400 # bounding it limits the amount of I/O we need to do.
2430 # bounding it limits the amount of I/O we need to do.
2401 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2431 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2402 # deltas we need to apply -- bounding it limits the amount of CPU
2432 # deltas we need to apply -- bounding it limits the amount of CPU
2403 # we consume.
2433 # we consume.
2404
2434
2405 distance = deltainfo.distance
2435 if self._sparserevlog:
2436 # As sparse-read will be used, we can consider that the distance,
2437 # instead of being the span of the whole chunk,
2438 # is the span of the largest read chunk
2439 base = deltainfo.base
2440
2441 if base != nullrev:
2442 deltachain = self._deltachain(base)[0]
2443 else:
2444 deltachain = []
2445
2446 chunks = _slicechunk(self, deltachain, deltainfo)
2447 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2448 else:
2449 distance = deltainfo.distance
2450
2406 textlen = revinfo.textlen
2451 textlen = revinfo.textlen
2407 defaultmax = textlen * 4
2452 defaultmax = textlen * 4
2408 maxdist = self._maxdeltachainspan
2453 maxdist = self._maxdeltachainspan
2409 if not maxdist:
2454 if not maxdist:
2410 maxdist = distance # ensure the conditional pass
2455 maxdist = distance # ensure the conditional pass
2411 maxdist = max(maxdist, defaultmax)
2456 maxdist = max(maxdist, defaultmax)
2457 if self._sparserevlog and maxdist < self._srmingapsize:
2458 # In multiple place, we are ignoring irrelevant data range below a
2459 # certain size. Be also apply this tradeoff here and relax span
2460 # constraint for small enought content.
2461 maxdist = self._srmingapsize
2412 if (distance > maxdist or deltainfo.deltalen > textlen or
2462 if (distance > maxdist or deltainfo.deltalen > textlen or
2413 deltainfo.compresseddeltalen > textlen * 2 or
2463 deltainfo.compresseddeltalen > textlen * 2 or
2414 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2464 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2415 return False
2465 return False
2416
2466
2417 return True
2467 return True
2418
2468
2419 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2469 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2420 cachedelta, ifh, dfh, alwayscache=False,
2470 cachedelta, ifh, dfh, alwayscache=False,
2421 deltacomputer=None):
2471 deltacomputer=None):
2422 """internal function to add revisions to the log
2472 """internal function to add revisions to the log
2423
2473
2424 see addrevision for argument descriptions.
2474 see addrevision for argument descriptions.
2425
2475
2426 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2427
2477
2428 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2429 be used.
2479 be used.
2430
2480
2431 invariants:
2481 invariants:
2432 - rawtext is optional (can be None); if not set, cachedelta must be set.
2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2433 if both are set, they must correspond to each other.
2483 if both are set, they must correspond to each other.
2434 """
2484 """
2435 if node == nullid:
2485 if node == nullid:
2436 raise RevlogError(_("%s: attempt to add null revision") %
2486 raise RevlogError(_("%s: attempt to add null revision") %
2437 (self.indexfile))
2487 (self.indexfile))
2438 if node == wdirid or node in wdirfilenodeids:
2488 if node == wdirid or node in wdirfilenodeids:
2439 raise RevlogError(_("%s: attempt to add wdir revision") %
2489 raise RevlogError(_("%s: attempt to add wdir revision") %
2440 (self.indexfile))
2490 (self.indexfile))
2441
2491
2442 if self._inline:
2492 if self._inline:
2443 fh = ifh
2493 fh = ifh
2444 else:
2494 else:
2445 fh = dfh
2495 fh = dfh
2446
2496
2447 btext = [rawtext]
2497 btext = [rawtext]
2448
2498
2449 curr = len(self)
2499 curr = len(self)
2450 prev = curr - 1
2500 prev = curr - 1
2451 offset = self.end(prev)
2501 offset = self.end(prev)
2452 p1r, p2r = self.rev(p1), self.rev(p2)
2502 p1r, p2r = self.rev(p1), self.rev(p2)
2453
2503
2454 # full versions are inserted when the needed deltas
2504 # full versions are inserted when the needed deltas
2455 # become comparable to the uncompressed text
2505 # become comparable to the uncompressed text
2456 if rawtext is None:
2506 if rawtext is None:
2457 # need rawtext size, before changed by flag processors, which is
2507 # need rawtext size, before changed by flag processors, which is
2458 # the non-raw size. use revlog explicitly to avoid filelog's extra
2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2459 # logic that might remove metadata size.
2509 # logic that might remove metadata size.
2460 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2510 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2461 cachedelta[1])
2511 cachedelta[1])
2462 else:
2512 else:
2463 textlen = len(rawtext)
2513 textlen = len(rawtext)
2464
2514
2465 if deltacomputer is None:
2515 if deltacomputer is None:
2466 deltacomputer = _deltacomputer(self)
2516 deltacomputer = _deltacomputer(self)
2467
2517
2468 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2518 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2469
2519
2470 # no delta for flag processor revision (see "candelta" for why)
2520 # no delta for flag processor revision (see "candelta" for why)
2471 # not calling candelta since only one revision needs test, also to
2521 # not calling candelta since only one revision needs test, also to
2472 # avoid overhead fetching flags again.
2522 # avoid overhead fetching flags again.
2473 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2523 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2474 deltainfo = None
2524 deltainfo = None
2475 else:
2525 else:
2476 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2526 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2477
2527
2478 if deltainfo is not None:
2528 if deltainfo is not None:
2479 base = deltainfo.base
2529 base = deltainfo.base
2480 chainbase = deltainfo.chainbase
2530 chainbase = deltainfo.chainbase
2481 data = deltainfo.data
2531 data = deltainfo.data
2482 l = deltainfo.deltalen
2532 l = deltainfo.deltalen
2483 else:
2533 else:
2484 rawtext = deltacomputer.buildtext(revinfo, fh)
2534 rawtext = deltacomputer.buildtext(revinfo, fh)
2485 data = self.compress(rawtext)
2535 data = self.compress(rawtext)
2486 l = len(data[1]) + len(data[0])
2536 l = len(data[1]) + len(data[0])
2487 base = chainbase = curr
2537 base = chainbase = curr
2488
2538
2489 e = (offset_type(offset, flags), l, textlen,
2539 e = (offset_type(offset, flags), l, textlen,
2490 base, link, p1r, p2r, node)
2540 base, link, p1r, p2r, node)
2491 self.index.insert(-1, e)
2541 self.index.insert(-1, e)
2492 self.nodemap[node] = curr
2542 self.nodemap[node] = curr
2493
2543
2494 entry = self._io.packentry(e, self.node, self.version, curr)
2544 entry = self._io.packentry(e, self.node, self.version, curr)
2495 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2545 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2496
2546
2497 if alwayscache and rawtext is None:
2547 if alwayscache and rawtext is None:
2498 rawtext = deltacomputer._buildtext(revinfo, fh)
2548 rawtext = deltacomputer._buildtext(revinfo, fh)
2499
2549
2500 if type(rawtext) == bytes: # only accept immutable objects
2550 if type(rawtext) == bytes: # only accept immutable objects
2501 self._cache = (node, curr, rawtext)
2551 self._cache = (node, curr, rawtext)
2502 self._chainbasecache[curr] = chainbase
2552 self._chainbasecache[curr] = chainbase
2503 return node
2553 return node
2504
2554
2505 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2555 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2506 # Files opened in a+ mode have inconsistent behavior on various
2556 # Files opened in a+ mode have inconsistent behavior on various
2507 # platforms. Windows requires that a file positioning call be made
2557 # platforms. Windows requires that a file positioning call be made
2508 # when the file handle transitions between reads and writes. See
2558 # when the file handle transitions between reads and writes. See
2509 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2559 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2510 # platforms, Python or the platform itself can be buggy. Some versions
2560 # platforms, Python or the platform itself can be buggy. Some versions
2511 # of Solaris have been observed to not append at the end of the file
2561 # of Solaris have been observed to not append at the end of the file
2512 # if the file was seeked to before the end. See issue4943 for more.
2562 # if the file was seeked to before the end. See issue4943 for more.
2513 #
2563 #
2514 # We work around this issue by inserting a seek() before writing.
2564 # We work around this issue by inserting a seek() before writing.
2515 # Note: This is likely not necessary on Python 3.
2565 # Note: This is likely not necessary on Python 3.
2516 ifh.seek(0, os.SEEK_END)
2566 ifh.seek(0, os.SEEK_END)
2517 if dfh:
2567 if dfh:
2518 dfh.seek(0, os.SEEK_END)
2568 dfh.seek(0, os.SEEK_END)
2519
2569
2520 curr = len(self) - 1
2570 curr = len(self) - 1
2521 if not self._inline:
2571 if not self._inline:
2522 transaction.add(self.datafile, offset)
2572 transaction.add(self.datafile, offset)
2523 transaction.add(self.indexfile, curr * len(entry))
2573 transaction.add(self.indexfile, curr * len(entry))
2524 if data[0]:
2574 if data[0]:
2525 dfh.write(data[0])
2575 dfh.write(data[0])
2526 dfh.write(data[1])
2576 dfh.write(data[1])
2527 ifh.write(entry)
2577 ifh.write(entry)
2528 else:
2578 else:
2529 offset += curr * self._io.size
2579 offset += curr * self._io.size
2530 transaction.add(self.indexfile, offset, curr)
2580 transaction.add(self.indexfile, offset, curr)
2531 ifh.write(entry)
2581 ifh.write(entry)
2532 ifh.write(data[0])
2582 ifh.write(data[0])
2533 ifh.write(data[1])
2583 ifh.write(data[1])
2534 self._enforceinlinesize(transaction, ifh)
2584 self._enforceinlinesize(transaction, ifh)
2535
2585
2536 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2586 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2537 """
2587 """
2538 add a delta group
2588 add a delta group
2539
2589
2540 given a set of deltas, add them to the revision log. the
2590 given a set of deltas, add them to the revision log. the
2541 first delta is against its parent, which should be in our
2591 first delta is against its parent, which should be in our
2542 log, the rest are against the previous delta.
2592 log, the rest are against the previous delta.
2543
2593
2544 If ``addrevisioncb`` is defined, it will be called with arguments of
2594 If ``addrevisioncb`` is defined, it will be called with arguments of
2545 this revlog and the node that was added.
2595 this revlog and the node that was added.
2546 """
2596 """
2547
2597
2548 nodes = []
2598 nodes = []
2549
2599
2550 r = len(self)
2600 r = len(self)
2551 end = 0
2601 end = 0
2552 if r:
2602 if r:
2553 end = self.end(r - 1)
2603 end = self.end(r - 1)
2554 ifh = self._indexfp("a+")
2604 ifh = self._indexfp("a+")
2555 isize = r * self._io.size
2605 isize = r * self._io.size
2556 if self._inline:
2606 if self._inline:
2557 transaction.add(self.indexfile, end + isize, r)
2607 transaction.add(self.indexfile, end + isize, r)
2558 dfh = None
2608 dfh = None
2559 else:
2609 else:
2560 transaction.add(self.indexfile, isize, r)
2610 transaction.add(self.indexfile, isize, r)
2561 transaction.add(self.datafile, end)
2611 transaction.add(self.datafile, end)
2562 dfh = self._datafp("a+")
2612 dfh = self._datafp("a+")
2563 def flush():
2613 def flush():
2564 if dfh:
2614 if dfh:
2565 dfh.flush()
2615 dfh.flush()
2566 ifh.flush()
2616 ifh.flush()
2567 try:
2617 try:
2568 deltacomputer = _deltacomputer(self)
2618 deltacomputer = _deltacomputer(self)
2569 # loop through our set of deltas
2619 # loop through our set of deltas
2570 for data in deltas:
2620 for data in deltas:
2571 node, p1, p2, linknode, deltabase, delta, flags = data
2621 node, p1, p2, linknode, deltabase, delta, flags = data
2572 link = linkmapper(linknode)
2622 link = linkmapper(linknode)
2573 flags = flags or REVIDX_DEFAULT_FLAGS
2623 flags = flags or REVIDX_DEFAULT_FLAGS
2574
2624
2575 nodes.append(node)
2625 nodes.append(node)
2576
2626
2577 if node in self.nodemap:
2627 if node in self.nodemap:
2578 # this can happen if two branches make the same change
2628 # this can happen if two branches make the same change
2579 continue
2629 continue
2580
2630
2581 for p in (p1, p2):
2631 for p in (p1, p2):
2582 if p not in self.nodemap:
2632 if p not in self.nodemap:
2583 raise LookupError(p, self.indexfile,
2633 raise LookupError(p, self.indexfile,
2584 _('unknown parent'))
2634 _('unknown parent'))
2585
2635
2586 if deltabase not in self.nodemap:
2636 if deltabase not in self.nodemap:
2587 raise LookupError(deltabase, self.indexfile,
2637 raise LookupError(deltabase, self.indexfile,
2588 _('unknown delta base'))
2638 _('unknown delta base'))
2589
2639
2590 baserev = self.rev(deltabase)
2640 baserev = self.rev(deltabase)
2591
2641
2592 if baserev != nullrev and self.iscensored(baserev):
2642 if baserev != nullrev and self.iscensored(baserev):
2593 # if base is censored, delta must be full replacement in a
2643 # if base is censored, delta must be full replacement in a
2594 # single patch operation
2644 # single patch operation
2595 hlen = struct.calcsize(">lll")
2645 hlen = struct.calcsize(">lll")
2596 oldlen = self.rawsize(baserev)
2646 oldlen = self.rawsize(baserev)
2597 newlen = len(delta) - hlen
2647 newlen = len(delta) - hlen
2598 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2648 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2599 raise error.CensoredBaseError(self.indexfile,
2649 raise error.CensoredBaseError(self.indexfile,
2600 self.node(baserev))
2650 self.node(baserev))
2601
2651
2602 if not flags and self._peek_iscensored(baserev, delta, flush):
2652 if not flags and self._peek_iscensored(baserev, delta, flush):
2603 flags |= REVIDX_ISCENSORED
2653 flags |= REVIDX_ISCENSORED
2604
2654
2605 # We assume consumers of addrevisioncb will want to retrieve
2655 # We assume consumers of addrevisioncb will want to retrieve
2606 # the added revision, which will require a call to
2656 # the added revision, which will require a call to
2607 # revision(). revision() will fast path if there is a cache
2657 # revision(). revision() will fast path if there is a cache
2608 # hit. So, we tell _addrevision() to always cache in this case.
2658 # hit. So, we tell _addrevision() to always cache in this case.
2609 # We're only using addgroup() in the context of changegroup
2659 # We're only using addgroup() in the context of changegroup
2610 # generation so the revision data can always be handled as raw
2660 # generation so the revision data can always be handled as raw
2611 # by the flagprocessor.
2661 # by the flagprocessor.
2612 self._addrevision(node, None, transaction, link,
2662 self._addrevision(node, None, transaction, link,
2613 p1, p2, flags, (baserev, delta),
2663 p1, p2, flags, (baserev, delta),
2614 ifh, dfh,
2664 ifh, dfh,
2615 alwayscache=bool(addrevisioncb),
2665 alwayscache=bool(addrevisioncb),
2616 deltacomputer=deltacomputer)
2666 deltacomputer=deltacomputer)
2617
2667
2618 if addrevisioncb:
2668 if addrevisioncb:
2619 addrevisioncb(self, node)
2669 addrevisioncb(self, node)
2620
2670
2621 if not dfh and not self._inline:
2671 if not dfh and not self._inline:
2622 # addrevision switched from inline to conventional
2672 # addrevision switched from inline to conventional
2623 # reopen the index
2673 # reopen the index
2624 ifh.close()
2674 ifh.close()
2625 dfh = self._datafp("a+")
2675 dfh = self._datafp("a+")
2626 ifh = self._indexfp("a+")
2676 ifh = self._indexfp("a+")
2627 finally:
2677 finally:
2628 if dfh:
2678 if dfh:
2629 dfh.close()
2679 dfh.close()
2630 ifh.close()
2680 ifh.close()
2631
2681
2632 return nodes
2682 return nodes
2633
2683
2634 def iscensored(self, rev):
2684 def iscensored(self, rev):
2635 """Check if a file revision is censored."""
2685 """Check if a file revision is censored."""
2636 if not self._censorable:
2686 if not self._censorable:
2637 return False
2687 return False
2638
2688
2639 return self.flags(rev) & REVIDX_ISCENSORED
2689 return self.flags(rev) & REVIDX_ISCENSORED
2640
2690
2641 def _peek_iscensored(self, baserev, delta, flush):
2691 def _peek_iscensored(self, baserev, delta, flush):
2642 """Quickly check if a delta produces a censored revision."""
2692 """Quickly check if a delta produces a censored revision."""
2643 if not self._censorable:
2693 if not self._censorable:
2644 return False
2694 return False
2645
2695
2646 # Fragile heuristic: unless new file meta keys are added alphabetically
2696 # Fragile heuristic: unless new file meta keys are added alphabetically
2647 # preceding "censored", all censored revisions are prefixed by
2697 # preceding "censored", all censored revisions are prefixed by
2648 # "\1\ncensored:". A delta producing such a censored revision must be a
2698 # "\1\ncensored:". A delta producing such a censored revision must be a
2649 # full-replacement delta, so we inspect the first and only patch in the
2699 # full-replacement delta, so we inspect the first and only patch in the
2650 # delta for this prefix.
2700 # delta for this prefix.
2651 hlen = struct.calcsize(">lll")
2701 hlen = struct.calcsize(">lll")
2652 if len(delta) <= hlen:
2702 if len(delta) <= hlen:
2653 return False
2703 return False
2654
2704
2655 oldlen = self.rawsize(baserev)
2705 oldlen = self.rawsize(baserev)
2656 newlen = len(delta) - hlen
2706 newlen = len(delta) - hlen
2657 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2707 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2658 return False
2708 return False
2659
2709
2660 add = "\1\ncensored:"
2710 add = "\1\ncensored:"
2661 addlen = len(add)
2711 addlen = len(add)
2662 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2712 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2663
2713
2664 def getstrippoint(self, minlink):
2714 def getstrippoint(self, minlink):
2665 """find the minimum rev that must be stripped to strip the linkrev
2715 """find the minimum rev that must be stripped to strip the linkrev
2666
2716
2667 Returns a tuple containing the minimum rev and a set of all revs that
2717 Returns a tuple containing the minimum rev and a set of all revs that
2668 have linkrevs that will be broken by this strip.
2718 have linkrevs that will be broken by this strip.
2669 """
2719 """
2670 brokenrevs = set()
2720 brokenrevs = set()
2671 strippoint = len(self)
2721 strippoint = len(self)
2672
2722
2673 heads = {}
2723 heads = {}
2674 futurelargelinkrevs = set()
2724 futurelargelinkrevs = set()
2675 for head in self.headrevs():
2725 for head in self.headrevs():
2676 headlinkrev = self.linkrev(head)
2726 headlinkrev = self.linkrev(head)
2677 heads[head] = headlinkrev
2727 heads[head] = headlinkrev
2678 if headlinkrev >= minlink:
2728 if headlinkrev >= minlink:
2679 futurelargelinkrevs.add(headlinkrev)
2729 futurelargelinkrevs.add(headlinkrev)
2680
2730
2681 # This algorithm involves walking down the rev graph, starting at the
2731 # This algorithm involves walking down the rev graph, starting at the
2682 # heads. Since the revs are topologically sorted according to linkrev,
2732 # heads. Since the revs are topologically sorted according to linkrev,
2683 # once all head linkrevs are below the minlink, we know there are
2733 # once all head linkrevs are below the minlink, we know there are
2684 # no more revs that could have a linkrev greater than minlink.
2734 # no more revs that could have a linkrev greater than minlink.
2685 # So we can stop walking.
2735 # So we can stop walking.
2686 while futurelargelinkrevs:
2736 while futurelargelinkrevs:
2687 strippoint -= 1
2737 strippoint -= 1
2688 linkrev = heads.pop(strippoint)
2738 linkrev = heads.pop(strippoint)
2689
2739
2690 if linkrev < minlink:
2740 if linkrev < minlink:
2691 brokenrevs.add(strippoint)
2741 brokenrevs.add(strippoint)
2692 else:
2742 else:
2693 futurelargelinkrevs.remove(linkrev)
2743 futurelargelinkrevs.remove(linkrev)
2694
2744
2695 for p in self.parentrevs(strippoint):
2745 for p in self.parentrevs(strippoint):
2696 if p != nullrev:
2746 if p != nullrev:
2697 plinkrev = self.linkrev(p)
2747 plinkrev = self.linkrev(p)
2698 heads[p] = plinkrev
2748 heads[p] = plinkrev
2699 if plinkrev >= minlink:
2749 if plinkrev >= minlink:
2700 futurelargelinkrevs.add(plinkrev)
2750 futurelargelinkrevs.add(plinkrev)
2701
2751
2702 return strippoint, brokenrevs
2752 return strippoint, brokenrevs
2703
2753
2704 def strip(self, minlink, transaction):
2754 def strip(self, minlink, transaction):
2705 """truncate the revlog on the first revision with a linkrev >= minlink
2755 """truncate the revlog on the first revision with a linkrev >= minlink
2706
2756
2707 This function is called when we're stripping revision minlink and
2757 This function is called when we're stripping revision minlink and
2708 its descendants from the repository.
2758 its descendants from the repository.
2709
2759
2710 We have to remove all revisions with linkrev >= minlink, because
2760 We have to remove all revisions with linkrev >= minlink, because
2711 the equivalent changelog revisions will be renumbered after the
2761 the equivalent changelog revisions will be renumbered after the
2712 strip.
2762 strip.
2713
2763
2714 So we truncate the revlog on the first of these revisions, and
2764 So we truncate the revlog on the first of these revisions, and
2715 trust that the caller has saved the revisions that shouldn't be
2765 trust that the caller has saved the revisions that shouldn't be
2716 removed and that it'll re-add them after this truncation.
2766 removed and that it'll re-add them after this truncation.
2717 """
2767 """
2718 if len(self) == 0:
2768 if len(self) == 0:
2719 return
2769 return
2720
2770
2721 rev, _ = self.getstrippoint(minlink)
2771 rev, _ = self.getstrippoint(minlink)
2722 if rev == len(self):
2772 if rev == len(self):
2723 return
2773 return
2724
2774
2725 # first truncate the files on disk
2775 # first truncate the files on disk
2726 end = self.start(rev)
2776 end = self.start(rev)
2727 if not self._inline:
2777 if not self._inline:
2728 transaction.add(self.datafile, end)
2778 transaction.add(self.datafile, end)
2729 end = rev * self._io.size
2779 end = rev * self._io.size
2730 else:
2780 else:
2731 end += rev * self._io.size
2781 end += rev * self._io.size
2732
2782
2733 transaction.add(self.indexfile, end)
2783 transaction.add(self.indexfile, end)
2734
2784
2735 # then reset internal state in memory to forget those revisions
2785 # then reset internal state in memory to forget those revisions
2736 self._cache = None
2786 self._cache = None
2737 self._chaininfocache = {}
2787 self._chaininfocache = {}
2738 self._chunkclear()
2788 self._chunkclear()
2739 for x in xrange(rev, len(self)):
2789 for x in xrange(rev, len(self)):
2740 del self.nodemap[self.node(x)]
2790 del self.nodemap[self.node(x)]
2741
2791
2742 del self.index[rev:-1]
2792 del self.index[rev:-1]
2743 self._nodepos = None
2793 self._nodepos = None
2744
2794
2745 def checksize(self):
2795 def checksize(self):
2746 expected = 0
2796 expected = 0
2747 if len(self):
2797 if len(self):
2748 expected = max(0, self.end(len(self) - 1))
2798 expected = max(0, self.end(len(self) - 1))
2749
2799
2750 try:
2800 try:
2751 with self._datafp() as f:
2801 with self._datafp() as f:
2752 f.seek(0, 2)
2802 f.seek(0, 2)
2753 actual = f.tell()
2803 actual = f.tell()
2754 dd = actual - expected
2804 dd = actual - expected
2755 except IOError as inst:
2805 except IOError as inst:
2756 if inst.errno != errno.ENOENT:
2806 if inst.errno != errno.ENOENT:
2757 raise
2807 raise
2758 dd = 0
2808 dd = 0
2759
2809
2760 try:
2810 try:
2761 f = self.opener(self.indexfile)
2811 f = self.opener(self.indexfile)
2762 f.seek(0, 2)
2812 f.seek(0, 2)
2763 actual = f.tell()
2813 actual = f.tell()
2764 f.close()
2814 f.close()
2765 s = self._io.size
2815 s = self._io.size
2766 i = max(0, actual // s)
2816 i = max(0, actual // s)
2767 di = actual - (i * s)
2817 di = actual - (i * s)
2768 if self._inline:
2818 if self._inline:
2769 databytes = 0
2819 databytes = 0
2770 for r in self:
2820 for r in self:
2771 databytes += max(0, self.length(r))
2821 databytes += max(0, self.length(r))
2772 dd = 0
2822 dd = 0
2773 di = actual - len(self) * s - databytes
2823 di = actual - len(self) * s - databytes
2774 except IOError as inst:
2824 except IOError as inst:
2775 if inst.errno != errno.ENOENT:
2825 if inst.errno != errno.ENOENT:
2776 raise
2826 raise
2777 di = 0
2827 di = 0
2778
2828
2779 return (dd, di)
2829 return (dd, di)
2780
2830
2781 def files(self):
2831 def files(self):
2782 res = [self.indexfile]
2832 res = [self.indexfile]
2783 if not self._inline:
2833 if not self._inline:
2784 res.append(self.datafile)
2834 res.append(self.datafile)
2785 return res
2835 return res
2786
2836
2787 DELTAREUSEALWAYS = 'always'
2837 DELTAREUSEALWAYS = 'always'
2788 DELTAREUSESAMEREVS = 'samerevs'
2838 DELTAREUSESAMEREVS = 'samerevs'
2789 DELTAREUSENEVER = 'never'
2839 DELTAREUSENEVER = 'never'
2790
2840
2791 DELTAREUSEFULLADD = 'fulladd'
2841 DELTAREUSEFULLADD = 'fulladd'
2792
2842
2793 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2843 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2794
2844
2795 def clone(self, tr, destrevlog, addrevisioncb=None,
2845 def clone(self, tr, destrevlog, addrevisioncb=None,
2796 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2846 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2797 """Copy this revlog to another, possibly with format changes.
2847 """Copy this revlog to another, possibly with format changes.
2798
2848
2799 The destination revlog will contain the same revisions and nodes.
2849 The destination revlog will contain the same revisions and nodes.
2800 However, it may not be bit-for-bit identical due to e.g. delta encoding
2850 However, it may not be bit-for-bit identical due to e.g. delta encoding
2801 differences.
2851 differences.
2802
2852
2803 The ``deltareuse`` argument control how deltas from the existing revlog
2853 The ``deltareuse`` argument control how deltas from the existing revlog
2804 are preserved in the destination revlog. The argument can have the
2854 are preserved in the destination revlog. The argument can have the
2805 following values:
2855 following values:
2806
2856
2807 DELTAREUSEALWAYS
2857 DELTAREUSEALWAYS
2808 Deltas will always be reused (if possible), even if the destination
2858 Deltas will always be reused (if possible), even if the destination
2809 revlog would not select the same revisions for the delta. This is the
2859 revlog would not select the same revisions for the delta. This is the
2810 fastest mode of operation.
2860 fastest mode of operation.
2811 DELTAREUSESAMEREVS
2861 DELTAREUSESAMEREVS
2812 Deltas will be reused if the destination revlog would pick the same
2862 Deltas will be reused if the destination revlog would pick the same
2813 revisions for the delta. This mode strikes a balance between speed
2863 revisions for the delta. This mode strikes a balance between speed
2814 and optimization.
2864 and optimization.
2815 DELTAREUSENEVER
2865 DELTAREUSENEVER
2816 Deltas will never be reused. This is the slowest mode of execution.
2866 Deltas will never be reused. This is the slowest mode of execution.
2817 This mode can be used to recompute deltas (e.g. if the diff/delta
2867 This mode can be used to recompute deltas (e.g. if the diff/delta
2818 algorithm changes).
2868 algorithm changes).
2819
2869
2820 Delta computation can be slow, so the choice of delta reuse policy can
2870 Delta computation can be slow, so the choice of delta reuse policy can
2821 significantly affect run time.
2871 significantly affect run time.
2822
2872
2823 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2873 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2824 two extremes. Deltas will be reused if they are appropriate. But if the
2874 two extremes. Deltas will be reused if they are appropriate. But if the
2825 delta could choose a better revision, it will do so. This means if you
2875 delta could choose a better revision, it will do so. This means if you
2826 are converting a non-generaldelta revlog to a generaldelta revlog,
2876 are converting a non-generaldelta revlog to a generaldelta revlog,
2827 deltas will be recomputed if the delta's parent isn't a parent of the
2877 deltas will be recomputed if the delta's parent isn't a parent of the
2828 revision.
2878 revision.
2829
2879
2830 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2880 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2831 controls whether to compute deltas against both parents for merges.
2881 controls whether to compute deltas against both parents for merges.
2832 By default, the current default is used.
2882 By default, the current default is used.
2833 """
2883 """
2834 if deltareuse not in self.DELTAREUSEALL:
2884 if deltareuse not in self.DELTAREUSEALL:
2835 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2885 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2836
2886
2837 if len(destrevlog):
2887 if len(destrevlog):
2838 raise ValueError(_('destination revlog is not empty'))
2888 raise ValueError(_('destination revlog is not empty'))
2839
2889
2840 if getattr(self, 'filteredrevs', None):
2890 if getattr(self, 'filteredrevs', None):
2841 raise ValueError(_('source revlog has filtered revisions'))
2891 raise ValueError(_('source revlog has filtered revisions'))
2842 if getattr(destrevlog, 'filteredrevs', None):
2892 if getattr(destrevlog, 'filteredrevs', None):
2843 raise ValueError(_('destination revlog has filtered revisions'))
2893 raise ValueError(_('destination revlog has filtered revisions'))
2844
2894
2845 # lazydeltabase controls whether to reuse a cached delta, if possible.
2895 # lazydeltabase controls whether to reuse a cached delta, if possible.
2846 oldlazydeltabase = destrevlog._lazydeltabase
2896 oldlazydeltabase = destrevlog._lazydeltabase
2847 oldamd = destrevlog._aggressivemergedeltas
2897 oldamd = destrevlog._aggressivemergedeltas
2848
2898
2849 try:
2899 try:
2850 if deltareuse == self.DELTAREUSEALWAYS:
2900 if deltareuse == self.DELTAREUSEALWAYS:
2851 destrevlog._lazydeltabase = True
2901 destrevlog._lazydeltabase = True
2852 elif deltareuse == self.DELTAREUSESAMEREVS:
2902 elif deltareuse == self.DELTAREUSESAMEREVS:
2853 destrevlog._lazydeltabase = False
2903 destrevlog._lazydeltabase = False
2854
2904
2855 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2905 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2856
2906
2857 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2907 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2858 self.DELTAREUSESAMEREVS)
2908 self.DELTAREUSESAMEREVS)
2859
2909
2860 deltacomputer = _deltacomputer(destrevlog)
2910 deltacomputer = _deltacomputer(destrevlog)
2861 index = self.index
2911 index = self.index
2862 for rev in self:
2912 for rev in self:
2863 entry = index[rev]
2913 entry = index[rev]
2864
2914
2865 # Some classes override linkrev to take filtered revs into
2915 # Some classes override linkrev to take filtered revs into
2866 # account. Use raw entry from index.
2916 # account. Use raw entry from index.
2867 flags = entry[0] & 0xffff
2917 flags = entry[0] & 0xffff
2868 linkrev = entry[4]
2918 linkrev = entry[4]
2869 p1 = index[entry[5]][7]
2919 p1 = index[entry[5]][7]
2870 p2 = index[entry[6]][7]
2920 p2 = index[entry[6]][7]
2871 node = entry[7]
2921 node = entry[7]
2872
2922
2873 # (Possibly) reuse the delta from the revlog if allowed and
2923 # (Possibly) reuse the delta from the revlog if allowed and
2874 # the revlog chunk is a delta.
2924 # the revlog chunk is a delta.
2875 cachedelta = None
2925 cachedelta = None
2876 rawtext = None
2926 rawtext = None
2877 if populatecachedelta:
2927 if populatecachedelta:
2878 dp = self.deltaparent(rev)
2928 dp = self.deltaparent(rev)
2879 if dp != nullrev:
2929 if dp != nullrev:
2880 cachedelta = (dp, bytes(self._chunk(rev)))
2930 cachedelta = (dp, bytes(self._chunk(rev)))
2881
2931
2882 if not cachedelta:
2932 if not cachedelta:
2883 rawtext = self.revision(rev, raw=True)
2933 rawtext = self.revision(rev, raw=True)
2884
2934
2885
2935
2886 if deltareuse == self.DELTAREUSEFULLADD:
2936 if deltareuse == self.DELTAREUSEFULLADD:
2887 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2937 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2888 cachedelta=cachedelta,
2938 cachedelta=cachedelta,
2889 node=node, flags=flags,
2939 node=node, flags=flags,
2890 deltacomputer=deltacomputer)
2940 deltacomputer=deltacomputer)
2891 else:
2941 else:
2892 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2942 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2893 checkambig=False)
2943 checkambig=False)
2894 dfh = None
2944 dfh = None
2895 if not destrevlog._inline:
2945 if not destrevlog._inline:
2896 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2946 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2897 try:
2947 try:
2898 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2948 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2899 p2, flags, cachedelta, ifh, dfh,
2949 p2, flags, cachedelta, ifh, dfh,
2900 deltacomputer=deltacomputer)
2950 deltacomputer=deltacomputer)
2901 finally:
2951 finally:
2902 if dfh:
2952 if dfh:
2903 dfh.close()
2953 dfh.close()
2904 ifh.close()
2954 ifh.close()
2905
2955
2906 if addrevisioncb:
2956 if addrevisioncb:
2907 addrevisioncb(self, rev, node)
2957 addrevisioncb(self, rev, node)
2908 finally:
2958 finally:
2909 destrevlog._lazydeltabase = oldlazydeltabase
2959 destrevlog._lazydeltabase = oldlazydeltabase
2910 destrevlog._aggressivemergedeltas = oldamd
2960 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now