##// END OF EJS Templates
revlog: reintroduce `revlog.descendant` as deprecated...
Boris Feld -
r38732:4ac3c207 default
parent child Browse files
Show More
@@ -1,2900 +1,2905 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 class _testrevlog(object):
199 class _testrevlog(object):
200 """minimalist fake revlog to use in doctests"""
200 """minimalist fake revlog to use in doctests"""
201
201
202 def __init__(self, data, density=0.5, mingap=0):
202 def __init__(self, data, density=0.5, mingap=0):
203 """data is an list of revision payload boundaries"""
203 """data is an list of revision payload boundaries"""
204 self._data = data
204 self._data = data
205 self._srdensitythreshold = density
205 self._srdensitythreshold = density
206 self._srmingapsize = mingap
206 self._srmingapsize = mingap
207
207
208 def start(self, rev):
208 def start(self, rev):
209 if rev == 0:
209 if rev == 0:
210 return 0
210 return 0
211 return self._data[rev - 1]
211 return self._data[rev - 1]
212
212
213 def end(self, rev):
213 def end(self, rev):
214 return self._data[rev]
214 return self._data[rev]
215
215
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def _trimchunk(revlog, revs, startidx, endidx=None):
219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 """returns revs[startidx:endidx] without empty trailing revs
220 """returns revs[startidx:endidx] without empty trailing revs
221
221
222 Doctest Setup
222 Doctest Setup
223 >>> revlog = _testrevlog([
223 >>> revlog = _testrevlog([
224 ... 5, #0
224 ... 5, #0
225 ... 10, #1
225 ... 10, #1
226 ... 12, #2
226 ... 12, #2
227 ... 12, #3 (empty)
227 ... 12, #3 (empty)
228 ... 17, #4
228 ... 17, #4
229 ... 21, #5
229 ... 21, #5
230 ... 21, #6 (empty)
230 ... 21, #6 (empty)
231 ... ])
231 ... ])
232
232
233 Contiguous cases:
233 Contiguous cases:
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 [0, 1, 2, 3, 4, 5]
235 [0, 1, 2, 3, 4, 5]
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 [0, 1, 2, 3, 4]
237 [0, 1, 2, 3, 4]
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 [0, 1, 2]
239 [0, 1, 2]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 [2]
241 [2]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 [3, 4, 5]
243 [3, 4, 5]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 [3, 4]
245 [3, 4]
246
246
247 Discontiguous cases:
247 Discontiguous cases:
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 [1, 3, 5]
249 [1, 3, 5]
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 [1]
251 [1]
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 [3, 5]
253 [3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 [3, 5]
255 [3, 5]
256 """
256 """
257 length = revlog.length
257 length = revlog.length
258
258
259 if endidx is None:
259 if endidx is None:
260 endidx = len(revs)
260 endidx = len(revs)
261
261
262 # Trim empty revs at the end, but never the very first revision of a chain
262 # Trim empty revs at the end, but never the very first revision of a chain
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 endidx -= 1
264 endidx -= 1
265
265
266 return revs[startidx:endidx]
266 return revs[startidx:endidx]
267
267
268 def _segmentspan(revlog, revs):
268 def _segmentspan(revlog, revs):
269 """Get the byte span of a segment of revisions
269 """Get the byte span of a segment of revisions
270
270
271 revs is a sorted array of revision numbers
271 revs is a sorted array of revision numbers
272
272
273 >>> revlog = _testrevlog([
273 >>> revlog = _testrevlog([
274 ... 5, #0
274 ... 5, #0
275 ... 10, #1
275 ... 10, #1
276 ... 12, #2
276 ... 12, #2
277 ... 12, #3 (empty)
277 ... 12, #3 (empty)
278 ... 17, #4
278 ... 17, #4
279 ... ])
279 ... ])
280
280
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 17
282 17
283 >>> _segmentspan(revlog, [0, 4])
283 >>> _segmentspan(revlog, [0, 4])
284 17
284 17
285 >>> _segmentspan(revlog, [3, 4])
285 >>> _segmentspan(revlog, [3, 4])
286 5
286 5
287 >>> _segmentspan(revlog, [1, 2, 3,])
287 >>> _segmentspan(revlog, [1, 2, 3,])
288 7
288 7
289 >>> _segmentspan(revlog, [1, 3])
289 >>> _segmentspan(revlog, [1, 3])
290 7
290 7
291 """
291 """
292 if not revs:
292 if not revs:
293 return 0
293 return 0
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
295
295
296 def _slicechunk(revlog, revs, targetsize=None):
296 def _slicechunk(revlog, revs, targetsize=None):
297 """slice revs to reduce the amount of unrelated data to be read from disk.
297 """slice revs to reduce the amount of unrelated data to be read from disk.
298
298
299 ``revs`` is sliced into groups that should be read in one time.
299 ``revs`` is sliced into groups that should be read in one time.
300 Assume that revs are sorted.
300 Assume that revs are sorted.
301
301
302 The initial chunk is sliced until the overall density (payload/chunks-span
302 The initial chunk is sliced until the overall density (payload/chunks-span
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 `revlog._srmingapsize` is skipped.
304 `revlog._srmingapsize` is skipped.
305
305
306 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
306 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
307 For consistency with other slicing choice, this limit won't go lower than
307 For consistency with other slicing choice, this limit won't go lower than
308 `revlog._srmingapsize`.
308 `revlog._srmingapsize`.
309
309
310 If individual revisions chunk are larger than this limit, they will still
310 If individual revisions chunk are larger than this limit, they will still
311 be raised individually.
311 be raised individually.
312
312
313 >>> revlog = _testrevlog([
313 >>> revlog = _testrevlog([
314 ... 5, #00 (5)
314 ... 5, #00 (5)
315 ... 10, #01 (5)
315 ... 10, #01 (5)
316 ... 12, #02 (2)
316 ... 12, #02 (2)
317 ... 12, #03 (empty)
317 ... 12, #03 (empty)
318 ... 27, #04 (15)
318 ... 27, #04 (15)
319 ... 31, #05 (4)
319 ... 31, #05 (4)
320 ... 31, #06 (empty)
320 ... 31, #06 (empty)
321 ... 42, #07 (11)
321 ... 42, #07 (11)
322 ... 47, #08 (5)
322 ... 47, #08 (5)
323 ... 47, #09 (empty)
323 ... 47, #09 (empty)
324 ... 48, #10 (1)
324 ... 48, #10 (1)
325 ... 51, #11 (3)
325 ... 51, #11 (3)
326 ... 74, #12 (23)
326 ... 74, #12 (23)
327 ... 85, #13 (11)
327 ... 85, #13 (11)
328 ... 86, #14 (1)
328 ... 86, #14 (1)
329 ... 91, #15 (5)
329 ... 91, #15 (5)
330 ... ])
330 ... ])
331
331
332 >>> list(_slicechunk(revlog, list(range(16))))
332 >>> list(_slicechunk(revlog, list(range(16))))
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 >>> list(_slicechunk(revlog, [0, 15]))
334 >>> list(_slicechunk(revlog, [0, 15]))
335 [[0], [15]]
335 [[0], [15]]
336 >>> list(_slicechunk(revlog, [0, 11, 15]))
336 >>> list(_slicechunk(revlog, [0, 11, 15]))
337 [[0], [11], [15]]
337 [[0], [11], [15]]
338 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
338 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
339 [[0], [11, 13, 15]]
339 [[0], [11, 13, 15]]
340 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
340 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 [[1, 2], [5, 8, 10, 11], [14]]
341 [[1, 2], [5, 8, 10, 11], [14]]
342
342
343 Slicing with a maximum chunk size
343 Slicing with a maximum chunk size
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
345 [[0], [11], [13], [15]]
345 [[0], [11], [13], [15]]
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
347 [[0], [11], [13, 15]]
347 [[0], [11], [13, 15]]
348 """
348 """
349 if targetsize is not None:
349 if targetsize is not None:
350 targetsize = max(targetsize, revlog._srmingapsize)
350 targetsize = max(targetsize, revlog._srmingapsize)
351 for chunk in _slicechunktodensity(revlog, revs,
351 for chunk in _slicechunktodensity(revlog, revs,
352 revlog._srdensitythreshold,
352 revlog._srdensitythreshold,
353 revlog._srmingapsize):
353 revlog._srmingapsize):
354 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
354 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
355 yield subchunk
355 yield subchunk
356
356
357 def _slicechunktosize(revlog, revs, targetsize):
357 def _slicechunktosize(revlog, revs, targetsize):
358 """slice revs to match the target size
358 """slice revs to match the target size
359
359
360 This is intended to be used on chunk that density slicing selected by that
360 This is intended to be used on chunk that density slicing selected by that
361 are still too large compared to the read garantee of revlog. This might
361 are still too large compared to the read garantee of revlog. This might
362 happens when "minimal gap size" interrupted the slicing or when chain are
362 happens when "minimal gap size" interrupted the slicing or when chain are
363 built in a way that create large blocks next to each other.
363 built in a way that create large blocks next to each other.
364
364
365 >>> revlog = _testrevlog([
365 >>> revlog = _testrevlog([
366 ... 3, #0 (3)
366 ... 3, #0 (3)
367 ... 5, #1 (2)
367 ... 5, #1 (2)
368 ... 6, #2 (1)
368 ... 6, #2 (1)
369 ... 8, #3 (2)
369 ... 8, #3 (2)
370 ... 8, #4 (empty)
370 ... 8, #4 (empty)
371 ... 11, #5 (3)
371 ... 11, #5 (3)
372 ... 12, #6 (1)
372 ... 12, #6 (1)
373 ... 13, #7 (1)
373 ... 13, #7 (1)
374 ... 14, #8 (1)
374 ... 14, #8 (1)
375 ... ])
375 ... ])
376
376
377 Cases where chunk is already small enough
377 Cases where chunk is already small enough
378 >>> list(_slicechunktosize(revlog, [0], 3))
378 >>> list(_slicechunktosize(revlog, [0], 3))
379 [[0]]
379 [[0]]
380 >>> list(_slicechunktosize(revlog, [6, 7], 3))
380 >>> list(_slicechunktosize(revlog, [6, 7], 3))
381 [[6, 7]]
381 [[6, 7]]
382 >>> list(_slicechunktosize(revlog, [0], None))
382 >>> list(_slicechunktosize(revlog, [0], None))
383 [[0]]
383 [[0]]
384 >>> list(_slicechunktosize(revlog, [6, 7], None))
384 >>> list(_slicechunktosize(revlog, [6, 7], None))
385 [[6, 7]]
385 [[6, 7]]
386
386
387 cases where we need actual slicing
387 cases where we need actual slicing
388 >>> list(_slicechunktosize(revlog, [0, 1], 3))
388 >>> list(_slicechunktosize(revlog, [0, 1], 3))
389 [[0], [1]]
389 [[0], [1]]
390 >>> list(_slicechunktosize(revlog, [1, 3], 3))
390 >>> list(_slicechunktosize(revlog, [1, 3], 3))
391 [[1], [3]]
391 [[1], [3]]
392 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
392 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
393 [[1, 2], [3]]
393 [[1, 2], [3]]
394 >>> list(_slicechunktosize(revlog, [3, 5], 3))
394 >>> list(_slicechunktosize(revlog, [3, 5], 3))
395 [[3], [5]]
395 [[3], [5]]
396 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
396 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
397 [[3], [5]]
397 [[3], [5]]
398 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
398 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
399 [[5], [6, 7, 8]]
399 [[5], [6, 7, 8]]
400 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
400 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
401 [[0], [1, 2], [3], [5], [6, 7, 8]]
401 [[0], [1, 2], [3], [5], [6, 7, 8]]
402
402
403 Case with too large individual chunk (must return valid chunk)
403 Case with too large individual chunk (must return valid chunk)
404 >>> list(_slicechunktosize(revlog, [0, 1], 2))
404 >>> list(_slicechunktosize(revlog, [0, 1], 2))
405 [[0], [1]]
405 [[0], [1]]
406 >>> list(_slicechunktosize(revlog, [1, 3], 1))
406 >>> list(_slicechunktosize(revlog, [1, 3], 1))
407 [[1], [3]]
407 [[1], [3]]
408 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
408 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
409 [[3], [5]]
409 [[3], [5]]
410 """
410 """
411 assert targetsize is None or 0 <= targetsize
411 assert targetsize is None or 0 <= targetsize
412 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
412 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
413 yield revs
413 yield revs
414 return
414 return
415
415
416 startrevidx = 0
416 startrevidx = 0
417 startdata = revlog.start(revs[0])
417 startdata = revlog.start(revs[0])
418 endrevidx = 0
418 endrevidx = 0
419 iterrevs = enumerate(revs)
419 iterrevs = enumerate(revs)
420 next(iterrevs) # skip first rev.
420 next(iterrevs) # skip first rev.
421 for idx, r in iterrevs:
421 for idx, r in iterrevs:
422 span = revlog.end(r) - startdata
422 span = revlog.end(r) - startdata
423 if span <= targetsize:
423 if span <= targetsize:
424 endrevidx = idx
424 endrevidx = idx
425 else:
425 else:
426 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
426 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
427 if chunk:
427 if chunk:
428 yield chunk
428 yield chunk
429 startrevidx = idx
429 startrevidx = idx
430 startdata = revlog.start(r)
430 startdata = revlog.start(r)
431 endrevidx = idx
431 endrevidx = idx
432 yield _trimchunk(revlog, revs, startrevidx)
432 yield _trimchunk(revlog, revs, startrevidx)
433
433
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
435 """slice revs to reduce the amount of unrelated data to be read from disk.
435 """slice revs to reduce the amount of unrelated data to be read from disk.
436
436
437 ``revs`` is sliced into groups that should be read in one time.
437 ``revs`` is sliced into groups that should be read in one time.
438 Assume that revs are sorted.
438 Assume that revs are sorted.
439
439
440 The initial chunk is sliced until the overall density (payload/chunks-span
440 The initial chunk is sliced until the overall density (payload/chunks-span
441 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
441 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
442 skipped.
442 skipped.
443
443
444 >>> revlog = _testrevlog([
444 >>> revlog = _testrevlog([
445 ... 5, #00 (5)
445 ... 5, #00 (5)
446 ... 10, #01 (5)
446 ... 10, #01 (5)
447 ... 12, #02 (2)
447 ... 12, #02 (2)
448 ... 12, #03 (empty)
448 ... 12, #03 (empty)
449 ... 27, #04 (15)
449 ... 27, #04 (15)
450 ... 31, #05 (4)
450 ... 31, #05 (4)
451 ... 31, #06 (empty)
451 ... 31, #06 (empty)
452 ... 42, #07 (11)
452 ... 42, #07 (11)
453 ... 47, #08 (5)
453 ... 47, #08 (5)
454 ... 47, #09 (empty)
454 ... 47, #09 (empty)
455 ... 48, #10 (1)
455 ... 48, #10 (1)
456 ... 51, #11 (3)
456 ... 51, #11 (3)
457 ... 74, #12 (23)
457 ... 74, #12 (23)
458 ... 85, #13 (11)
458 ... 85, #13 (11)
459 ... 86, #14 (1)
459 ... 86, #14 (1)
460 ... 91, #15 (5)
460 ... 91, #15 (5)
461 ... ])
461 ... ])
462
462
463 >>> list(_slicechunktodensity(revlog, list(range(16))))
463 >>> list(_slicechunktodensity(revlog, list(range(16))))
464 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
464 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
465 >>> list(_slicechunktodensity(revlog, [0, 15]))
465 >>> list(_slicechunktodensity(revlog, [0, 15]))
466 [[0], [15]]
466 [[0], [15]]
467 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
467 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
468 [[0], [11], [15]]
468 [[0], [11], [15]]
469 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
469 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
470 [[0], [11, 13, 15]]
470 [[0], [11, 13, 15]]
471 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
471 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
472 [[1, 2], [5, 8, 10, 11], [14]]
472 [[1, 2], [5, 8, 10, 11], [14]]
473 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
473 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
474 ... mingapsize=20))
474 ... mingapsize=20))
475 [[1, 2, 3, 5, 8, 10, 11], [14]]
475 [[1, 2, 3, 5, 8, 10, 11], [14]]
476 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
476 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
477 ... targetdensity=0.95))
477 ... targetdensity=0.95))
478 [[1, 2], [5], [8, 10, 11], [14]]
478 [[1, 2], [5], [8, 10, 11], [14]]
479 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
479 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
480 ... targetdensity=0.95, mingapsize=12))
480 ... targetdensity=0.95, mingapsize=12))
481 [[1, 2], [5, 8, 10, 11], [14]]
481 [[1, 2], [5, 8, 10, 11], [14]]
482 """
482 """
483 start = revlog.start
483 start = revlog.start
484 length = revlog.length
484 length = revlog.length
485
485
486 if len(revs) <= 1:
486 if len(revs) <= 1:
487 yield revs
487 yield revs
488 return
488 return
489
489
490 readdata = deltachainspan = _segmentspan(revlog, revs)
490 readdata = deltachainspan = _segmentspan(revlog, revs)
491
491
492 if deltachainspan < mingapsize:
492 if deltachainspan < mingapsize:
493 yield revs
493 yield revs
494 return
494 return
495
495
496 chainpayload = sum(length(r) for r in revs)
496 chainpayload = sum(length(r) for r in revs)
497
497
498 if deltachainspan:
498 if deltachainspan:
499 density = chainpayload / float(deltachainspan)
499 density = chainpayload / float(deltachainspan)
500 else:
500 else:
501 density = 1.0
501 density = 1.0
502
502
503 if density >= targetdensity:
503 if density >= targetdensity:
504 yield revs
504 yield revs
505 return
505 return
506
506
507 # Store the gaps in a heap to have them sorted by decreasing size
507 # Store the gaps in a heap to have them sorted by decreasing size
508 gapsheap = []
508 gapsheap = []
509 heapq.heapify(gapsheap)
509 heapq.heapify(gapsheap)
510 prevend = None
510 prevend = None
511 for i, rev in enumerate(revs):
511 for i, rev in enumerate(revs):
512 revstart = start(rev)
512 revstart = start(rev)
513 revlen = length(rev)
513 revlen = length(rev)
514
514
515 # Skip empty revisions to form larger holes
515 # Skip empty revisions to form larger holes
516 if revlen == 0:
516 if revlen == 0:
517 continue
517 continue
518
518
519 if prevend is not None:
519 if prevend is not None:
520 gapsize = revstart - prevend
520 gapsize = revstart - prevend
521 # only consider holes that are large enough
521 # only consider holes that are large enough
522 if gapsize > mingapsize:
522 if gapsize > mingapsize:
523 heapq.heappush(gapsheap, (-gapsize, i))
523 heapq.heappush(gapsheap, (-gapsize, i))
524
524
525 prevend = revstart + revlen
525 prevend = revstart + revlen
526
526
527 # Collect the indices of the largest holes until the density is acceptable
527 # Collect the indices of the largest holes until the density is acceptable
528 indicesheap = []
528 indicesheap = []
529 heapq.heapify(indicesheap)
529 heapq.heapify(indicesheap)
530 while gapsheap and density < targetdensity:
530 while gapsheap and density < targetdensity:
531 oppgapsize, gapidx = heapq.heappop(gapsheap)
531 oppgapsize, gapidx = heapq.heappop(gapsheap)
532
532
533 heapq.heappush(indicesheap, gapidx)
533 heapq.heappush(indicesheap, gapidx)
534
534
535 # the gap sizes are stored as negatives to be sorted decreasingly
535 # the gap sizes are stored as negatives to be sorted decreasingly
536 # by the heap
536 # by the heap
537 readdata -= (-oppgapsize)
537 readdata -= (-oppgapsize)
538 if readdata > 0:
538 if readdata > 0:
539 density = chainpayload / float(readdata)
539 density = chainpayload / float(readdata)
540 else:
540 else:
541 density = 1.0
541 density = 1.0
542
542
543 # Cut the revs at collected indices
543 # Cut the revs at collected indices
544 previdx = 0
544 previdx = 0
545 while indicesheap:
545 while indicesheap:
546 idx = heapq.heappop(indicesheap)
546 idx = heapq.heappop(indicesheap)
547
547
548 chunk = _trimchunk(revlog, revs, previdx, idx)
548 chunk = _trimchunk(revlog, revs, previdx, idx)
549 if chunk:
549 if chunk:
550 yield chunk
550 yield chunk
551
551
552 previdx = idx
552 previdx = idx
553
553
554 chunk = _trimchunk(revlog, revs, previdx)
554 chunk = _trimchunk(revlog, revs, previdx)
555 if chunk:
555 if chunk:
556 yield chunk
556 yield chunk
557
557
558 @attr.s(slots=True, frozen=True)
558 @attr.s(slots=True, frozen=True)
559 class _deltainfo(object):
559 class _deltainfo(object):
560 distance = attr.ib()
560 distance = attr.ib()
561 deltalen = attr.ib()
561 deltalen = attr.ib()
562 data = attr.ib()
562 data = attr.ib()
563 base = attr.ib()
563 base = attr.ib()
564 chainbase = attr.ib()
564 chainbase = attr.ib()
565 chainlen = attr.ib()
565 chainlen = attr.ib()
566 compresseddeltalen = attr.ib()
566 compresseddeltalen = attr.ib()
567
567
568 class _deltacomputer(object):
568 class _deltacomputer(object):
569 def __init__(self, revlog):
569 def __init__(self, revlog):
570 self.revlog = revlog
570 self.revlog = revlog
571
571
572 def _getcandidaterevs(self, p1, p2, cachedelta):
572 def _getcandidaterevs(self, p1, p2, cachedelta):
573 """
573 """
574 Provides revisions that present an interest to be diffed against,
574 Provides revisions that present an interest to be diffed against,
575 grouped by level of easiness.
575 grouped by level of easiness.
576 """
576 """
577 revlog = self.revlog
577 revlog = self.revlog
578 gdelta = revlog._generaldelta
578 gdelta = revlog._generaldelta
579 curr = len(revlog)
579 curr = len(revlog)
580 prev = curr - 1
580 prev = curr - 1
581 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
581 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
582
582
583 # should we try to build a delta?
583 # should we try to build a delta?
584 if prev != nullrev and revlog.storedeltachains:
584 if prev != nullrev and revlog.storedeltachains:
585 tested = set()
585 tested = set()
586 # This condition is true most of the time when processing
586 # This condition is true most of the time when processing
587 # changegroup data into a generaldelta repo. The only time it
587 # changegroup data into a generaldelta repo. The only time it
588 # isn't true is if this is the first revision in a delta chain
588 # isn't true is if this is the first revision in a delta chain
589 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
589 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
590 if cachedelta and gdelta and revlog._lazydeltabase:
590 if cachedelta and gdelta and revlog._lazydeltabase:
591 # Assume what we received from the server is a good choice
591 # Assume what we received from the server is a good choice
592 # build delta will reuse the cache
592 # build delta will reuse the cache
593 yield (cachedelta[0],)
593 yield (cachedelta[0],)
594 tested.add(cachedelta[0])
594 tested.add(cachedelta[0])
595
595
596 if gdelta:
596 if gdelta:
597 # exclude already lazy tested base if any
597 # exclude already lazy tested base if any
598 parents = [p for p in (p1r, p2r)
598 parents = [p for p in (p1r, p2r)
599 if p != nullrev and p not in tested]
599 if p != nullrev and p not in tested]
600
600
601 if not revlog._aggressivemergedeltas and len(parents) == 2:
601 if not revlog._aggressivemergedeltas and len(parents) == 2:
602 parents.sort()
602 parents.sort()
603 # To minimize the chance of having to build a fulltext,
603 # To minimize the chance of having to build a fulltext,
604 # pick first whichever parent is closest to us (max rev)
604 # pick first whichever parent is closest to us (max rev)
605 yield (parents[1],)
605 yield (parents[1],)
606 # then the other one (min rev) if the first did not fit
606 # then the other one (min rev) if the first did not fit
607 yield (parents[0],)
607 yield (parents[0],)
608 tested.update(parents)
608 tested.update(parents)
609 elif len(parents) > 0:
609 elif len(parents) > 0:
610 # Test all parents (1 or 2), and keep the best candidate
610 # Test all parents (1 or 2), and keep the best candidate
611 yield parents
611 yield parents
612 tested.update(parents)
612 tested.update(parents)
613
613
614 if prev not in tested:
614 if prev not in tested:
615 # other approach failed try against prev to hopefully save us a
615 # other approach failed try against prev to hopefully save us a
616 # fulltext.
616 # fulltext.
617 yield (prev,)
617 yield (prev,)
618 tested.add(prev)
618 tested.add(prev)
619
619
620 def buildtext(self, revinfo, fh):
620 def buildtext(self, revinfo, fh):
621 """Builds a fulltext version of a revision
621 """Builds a fulltext version of a revision
622
622
623 revinfo: _revisioninfo instance that contains all needed info
623 revinfo: _revisioninfo instance that contains all needed info
624 fh: file handle to either the .i or the .d revlog file,
624 fh: file handle to either the .i or the .d revlog file,
625 depending on whether it is inlined or not
625 depending on whether it is inlined or not
626 """
626 """
627 btext = revinfo.btext
627 btext = revinfo.btext
628 if btext[0] is not None:
628 if btext[0] is not None:
629 return btext[0]
629 return btext[0]
630
630
631 revlog = self.revlog
631 revlog = self.revlog
632 cachedelta = revinfo.cachedelta
632 cachedelta = revinfo.cachedelta
633 flags = revinfo.flags
633 flags = revinfo.flags
634 node = revinfo.node
634 node = revinfo.node
635
635
636 baserev = cachedelta[0]
636 baserev = cachedelta[0]
637 delta = cachedelta[1]
637 delta = cachedelta[1]
638 # special case deltas which replace entire base; no need to decode
638 # special case deltas which replace entire base; no need to decode
639 # base revision. this neatly avoids censored bases, which throw when
639 # base revision. this neatly avoids censored bases, which throw when
640 # they're decoded.
640 # they're decoded.
641 hlen = struct.calcsize(">lll")
641 hlen = struct.calcsize(">lll")
642 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
642 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
643 len(delta) - hlen):
643 len(delta) - hlen):
644 btext[0] = delta[hlen:]
644 btext[0] = delta[hlen:]
645 else:
645 else:
646 # deltabase is rawtext before changed by flag processors, which is
646 # deltabase is rawtext before changed by flag processors, which is
647 # equivalent to non-raw text
647 # equivalent to non-raw text
648 basetext = revlog.revision(baserev, _df=fh, raw=False)
648 basetext = revlog.revision(baserev, _df=fh, raw=False)
649 btext[0] = mdiff.patch(basetext, delta)
649 btext[0] = mdiff.patch(basetext, delta)
650
650
651 try:
651 try:
652 res = revlog._processflags(btext[0], flags, 'read', raw=True)
652 res = revlog._processflags(btext[0], flags, 'read', raw=True)
653 btext[0], validatehash = res
653 btext[0], validatehash = res
654 if validatehash:
654 if validatehash:
655 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
655 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
656 if flags & REVIDX_ISCENSORED:
656 if flags & REVIDX_ISCENSORED:
657 raise RevlogError(_('node %s is not censored') % node)
657 raise RevlogError(_('node %s is not censored') % node)
658 except CensoredNodeError:
658 except CensoredNodeError:
659 # must pass the censored index flag to add censored revisions
659 # must pass the censored index flag to add censored revisions
660 if not flags & REVIDX_ISCENSORED:
660 if not flags & REVIDX_ISCENSORED:
661 raise
661 raise
662 return btext[0]
662 return btext[0]
663
663
664 def _builddeltadiff(self, base, revinfo, fh):
664 def _builddeltadiff(self, base, revinfo, fh):
665 revlog = self.revlog
665 revlog = self.revlog
666 t = self.buildtext(revinfo, fh)
666 t = self.buildtext(revinfo, fh)
667 if revlog.iscensored(base):
667 if revlog.iscensored(base):
668 # deltas based on a censored revision must replace the
668 # deltas based on a censored revision must replace the
669 # full content in one patch, so delta works everywhere
669 # full content in one patch, so delta works everywhere
670 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
670 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
671 delta = header + t
671 delta = header + t
672 else:
672 else:
673 ptext = revlog.revision(base, _df=fh, raw=True)
673 ptext = revlog.revision(base, _df=fh, raw=True)
674 delta = mdiff.textdiff(ptext, t)
674 delta = mdiff.textdiff(ptext, t)
675
675
676 return delta
676 return delta
677
677
678 def _builddeltainfo(self, revinfo, base, fh):
678 def _builddeltainfo(self, revinfo, base, fh):
679 # can we use the cached delta?
679 # can we use the cached delta?
680 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
680 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
681 delta = revinfo.cachedelta[1]
681 delta = revinfo.cachedelta[1]
682 else:
682 else:
683 delta = self._builddeltadiff(base, revinfo, fh)
683 delta = self._builddeltadiff(base, revinfo, fh)
684 revlog = self.revlog
684 revlog = self.revlog
685 header, data = revlog.compress(delta)
685 header, data = revlog.compress(delta)
686 deltalen = len(header) + len(data)
686 deltalen = len(header) + len(data)
687 chainbase = revlog.chainbase(base)
687 chainbase = revlog.chainbase(base)
688 offset = revlog.end(len(revlog) - 1)
688 offset = revlog.end(len(revlog) - 1)
689 dist = deltalen + offset - revlog.start(chainbase)
689 dist = deltalen + offset - revlog.start(chainbase)
690 if revlog._generaldelta:
690 if revlog._generaldelta:
691 deltabase = base
691 deltabase = base
692 else:
692 else:
693 deltabase = chainbase
693 deltabase = chainbase
694 chainlen, compresseddeltalen = revlog._chaininfo(base)
694 chainlen, compresseddeltalen = revlog._chaininfo(base)
695 chainlen += 1
695 chainlen += 1
696 compresseddeltalen += deltalen
696 compresseddeltalen += deltalen
697 return _deltainfo(dist, deltalen, (header, data), deltabase,
697 return _deltainfo(dist, deltalen, (header, data), deltabase,
698 chainbase, chainlen, compresseddeltalen)
698 chainbase, chainlen, compresseddeltalen)
699
699
700 def finddeltainfo(self, revinfo, fh):
700 def finddeltainfo(self, revinfo, fh):
701 """Find an acceptable delta against a candidate revision
701 """Find an acceptable delta against a candidate revision
702
702
703 revinfo: information about the revision (instance of _revisioninfo)
703 revinfo: information about the revision (instance of _revisioninfo)
704 fh: file handle to either the .i or the .d revlog file,
704 fh: file handle to either the .i or the .d revlog file,
705 depending on whether it is inlined or not
705 depending on whether it is inlined or not
706
706
707 Returns the first acceptable candidate revision, as ordered by
707 Returns the first acceptable candidate revision, as ordered by
708 _getcandidaterevs
708 _getcandidaterevs
709 """
709 """
710 cachedelta = revinfo.cachedelta
710 cachedelta = revinfo.cachedelta
711 p1 = revinfo.p1
711 p1 = revinfo.p1
712 p2 = revinfo.p2
712 p2 = revinfo.p2
713 revlog = self.revlog
713 revlog = self.revlog
714
714
715 deltainfo = None
715 deltainfo = None
716 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
716 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
717 nominateddeltas = []
717 nominateddeltas = []
718 for candidaterev in candidaterevs:
718 for candidaterev in candidaterevs:
719 # no delta for rawtext-changing revs (see "candelta" for why)
719 # no delta for rawtext-changing revs (see "candelta" for why)
720 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
720 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
721 continue
721 continue
722 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
722 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
723 if revlog._isgooddeltainfo(candidatedelta, revinfo):
723 if revlog._isgooddeltainfo(candidatedelta, revinfo):
724 nominateddeltas.append(candidatedelta)
724 nominateddeltas.append(candidatedelta)
725 if nominateddeltas:
725 if nominateddeltas:
726 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
726 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
727 break
727 break
728
728
729 return deltainfo
729 return deltainfo
730
730
731 @attr.s(slots=True, frozen=True)
731 @attr.s(slots=True, frozen=True)
732 class _revisioninfo(object):
732 class _revisioninfo(object):
733 """Information about a revision that allows building its fulltext
733 """Information about a revision that allows building its fulltext
734 node: expected hash of the revision
734 node: expected hash of the revision
735 p1, p2: parent revs of the revision
735 p1, p2: parent revs of the revision
736 btext: built text cache consisting of a one-element list
736 btext: built text cache consisting of a one-element list
737 cachedelta: (baserev, uncompressed_delta) or None
737 cachedelta: (baserev, uncompressed_delta) or None
738 flags: flags associated to the revision storage
738 flags: flags associated to the revision storage
739
739
740 One of btext[0] or cachedelta must be set.
740 One of btext[0] or cachedelta must be set.
741 """
741 """
742 node = attr.ib()
742 node = attr.ib()
743 p1 = attr.ib()
743 p1 = attr.ib()
744 p2 = attr.ib()
744 p2 = attr.ib()
745 btext = attr.ib()
745 btext = attr.ib()
746 textlen = attr.ib()
746 textlen = attr.ib()
747 cachedelta = attr.ib()
747 cachedelta = attr.ib()
748 flags = attr.ib()
748 flags = attr.ib()
749
749
750 # index v0:
750 # index v0:
751 # 4 bytes: offset
751 # 4 bytes: offset
752 # 4 bytes: compressed length
752 # 4 bytes: compressed length
753 # 4 bytes: base rev
753 # 4 bytes: base rev
754 # 4 bytes: link rev
754 # 4 bytes: link rev
755 # 20 bytes: parent 1 nodeid
755 # 20 bytes: parent 1 nodeid
756 # 20 bytes: parent 2 nodeid
756 # 20 bytes: parent 2 nodeid
757 # 20 bytes: nodeid
757 # 20 bytes: nodeid
758 indexformatv0 = struct.Struct(">4l20s20s20s")
758 indexformatv0 = struct.Struct(">4l20s20s20s")
759 indexformatv0_pack = indexformatv0.pack
759 indexformatv0_pack = indexformatv0.pack
760 indexformatv0_unpack = indexformatv0.unpack
760 indexformatv0_unpack = indexformatv0.unpack
761
761
762 class revlogoldio(object):
762 class revlogoldio(object):
763 def __init__(self):
763 def __init__(self):
764 self.size = indexformatv0.size
764 self.size = indexformatv0.size
765
765
766 def parseindex(self, data, inline):
766 def parseindex(self, data, inline):
767 s = self.size
767 s = self.size
768 index = []
768 index = []
769 nodemap = {nullid: nullrev}
769 nodemap = {nullid: nullrev}
770 n = off = 0
770 n = off = 0
771 l = len(data)
771 l = len(data)
772 while off + s <= l:
772 while off + s <= l:
773 cur = data[off:off + s]
773 cur = data[off:off + s]
774 off += s
774 off += s
775 e = indexformatv0_unpack(cur)
775 e = indexformatv0_unpack(cur)
776 # transform to revlogv1 format
776 # transform to revlogv1 format
777 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
777 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
778 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
778 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
779 index.append(e2)
779 index.append(e2)
780 nodemap[e[6]] = n
780 nodemap[e[6]] = n
781 n += 1
781 n += 1
782
782
783 # add the magic null revision at -1
783 # add the magic null revision at -1
784 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
784 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
785
785
786 return index, nodemap, None
786 return index, nodemap, None
787
787
788 def packentry(self, entry, node, version, rev):
788 def packentry(self, entry, node, version, rev):
789 if gettype(entry[0]):
789 if gettype(entry[0]):
790 raise RevlogError(_('index entry flags need revlog version 1'))
790 raise RevlogError(_('index entry flags need revlog version 1'))
791 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
791 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
792 node(entry[5]), node(entry[6]), entry[7])
792 node(entry[5]), node(entry[6]), entry[7])
793 return indexformatv0_pack(*e2)
793 return indexformatv0_pack(*e2)
794
794
795 # index ng:
795 # index ng:
796 # 6 bytes: offset
796 # 6 bytes: offset
797 # 2 bytes: flags
797 # 2 bytes: flags
798 # 4 bytes: compressed length
798 # 4 bytes: compressed length
799 # 4 bytes: uncompressed length
799 # 4 bytes: uncompressed length
800 # 4 bytes: base rev
800 # 4 bytes: base rev
801 # 4 bytes: link rev
801 # 4 bytes: link rev
802 # 4 bytes: parent 1 rev
802 # 4 bytes: parent 1 rev
803 # 4 bytes: parent 2 rev
803 # 4 bytes: parent 2 rev
804 # 32 bytes: nodeid
804 # 32 bytes: nodeid
805 indexformatng = struct.Struct(">Qiiiiii20s12x")
805 indexformatng = struct.Struct(">Qiiiiii20s12x")
806 indexformatng_pack = indexformatng.pack
806 indexformatng_pack = indexformatng.pack
807 versionformat = struct.Struct(">I")
807 versionformat = struct.Struct(">I")
808 versionformat_pack = versionformat.pack
808 versionformat_pack = versionformat.pack
809 versionformat_unpack = versionformat.unpack
809 versionformat_unpack = versionformat.unpack
810
810
811 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
811 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
812 # signed integer)
812 # signed integer)
813 _maxentrysize = 0x7fffffff
813 _maxentrysize = 0x7fffffff
814
814
815 class revlogio(object):
815 class revlogio(object):
816 def __init__(self):
816 def __init__(self):
817 self.size = indexformatng.size
817 self.size = indexformatng.size
818
818
819 def parseindex(self, data, inline):
819 def parseindex(self, data, inline):
820 # call the C implementation to parse the index data
820 # call the C implementation to parse the index data
821 index, cache = parsers.parse_index2(data, inline)
821 index, cache = parsers.parse_index2(data, inline)
822 return index, getattr(index, 'nodemap', None), cache
822 return index, getattr(index, 'nodemap', None), cache
823
823
824 def packentry(self, entry, node, version, rev):
824 def packentry(self, entry, node, version, rev):
825 p = indexformatng_pack(*entry)
825 p = indexformatng_pack(*entry)
826 if rev == 0:
826 if rev == 0:
827 p = versionformat_pack(version) + p[4:]
827 p = versionformat_pack(version) + p[4:]
828 return p
828 return p
829
829
830 class revlog(object):
830 class revlog(object):
831 """
831 """
832 the underlying revision storage object
832 the underlying revision storage object
833
833
834 A revlog consists of two parts, an index and the revision data.
834 A revlog consists of two parts, an index and the revision data.
835
835
836 The index is a file with a fixed record size containing
836 The index is a file with a fixed record size containing
837 information on each revision, including its nodeid (hash), the
837 information on each revision, including its nodeid (hash), the
838 nodeids of its parents, the position and offset of its data within
838 nodeids of its parents, the position and offset of its data within
839 the data file, and the revision it's based on. Finally, each entry
839 the data file, and the revision it's based on. Finally, each entry
840 contains a linkrev entry that can serve as a pointer to external
840 contains a linkrev entry that can serve as a pointer to external
841 data.
841 data.
842
842
843 The revision data itself is a linear collection of data chunks.
843 The revision data itself is a linear collection of data chunks.
844 Each chunk represents a revision and is usually represented as a
844 Each chunk represents a revision and is usually represented as a
845 delta against the previous chunk. To bound lookup time, runs of
845 delta against the previous chunk. To bound lookup time, runs of
846 deltas are limited to about 2 times the length of the original
846 deltas are limited to about 2 times the length of the original
847 version data. This makes retrieval of a version proportional to
847 version data. This makes retrieval of a version proportional to
848 its size, or O(1) relative to the number of revisions.
848 its size, or O(1) relative to the number of revisions.
849
849
850 Both pieces of the revlog are written to in an append-only
850 Both pieces of the revlog are written to in an append-only
851 fashion, which means we never need to rewrite a file to insert or
851 fashion, which means we never need to rewrite a file to insert or
852 remove data, and can use some simple techniques to avoid the need
852 remove data, and can use some simple techniques to avoid the need
853 for locking while reading.
853 for locking while reading.
854
854
855 If checkambig, indexfile is opened with checkambig=True at
855 If checkambig, indexfile is opened with checkambig=True at
856 writing, to avoid file stat ambiguity.
856 writing, to avoid file stat ambiguity.
857
857
858 If mmaplargeindex is True, and an mmapindexthreshold is set, the
858 If mmaplargeindex is True, and an mmapindexthreshold is set, the
859 index will be mmapped rather than read if it is larger than the
859 index will be mmapped rather than read if it is larger than the
860 configured threshold.
860 configured threshold.
861
861
862 If censorable is True, the revlog can have censored revisions.
862 If censorable is True, the revlog can have censored revisions.
863 """
863 """
864 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
864 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
865 mmaplargeindex=False, censorable=False):
865 mmaplargeindex=False, censorable=False):
866 """
866 """
867 create a revlog object
867 create a revlog object
868
868
869 opener is a function that abstracts the file opening operation
869 opener is a function that abstracts the file opening operation
870 and can be used to implement COW semantics or the like.
870 and can be used to implement COW semantics or the like.
871 """
871 """
872 self.indexfile = indexfile
872 self.indexfile = indexfile
873 self.datafile = datafile or (indexfile[:-2] + ".d")
873 self.datafile = datafile or (indexfile[:-2] + ".d")
874 self.opener = opener
874 self.opener = opener
875 # When True, indexfile is opened with checkambig=True at writing, to
875 # When True, indexfile is opened with checkambig=True at writing, to
876 # avoid file stat ambiguity.
876 # avoid file stat ambiguity.
877 self._checkambig = checkambig
877 self._checkambig = checkambig
878 self._censorable = censorable
878 self._censorable = censorable
879 # 3-tuple of (node, rev, text) for a raw revision.
879 # 3-tuple of (node, rev, text) for a raw revision.
880 self._cache = None
880 self._cache = None
881 # Maps rev to chain base rev.
881 # Maps rev to chain base rev.
882 self._chainbasecache = util.lrucachedict(100)
882 self._chainbasecache = util.lrucachedict(100)
883 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
883 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
884 self._chunkcache = (0, '')
884 self._chunkcache = (0, '')
885 # How much data to read and cache into the raw revlog data cache.
885 # How much data to read and cache into the raw revlog data cache.
886 self._chunkcachesize = 65536
886 self._chunkcachesize = 65536
887 self._maxchainlen = None
887 self._maxchainlen = None
888 self._aggressivemergedeltas = True
888 self._aggressivemergedeltas = True
889 self.index = []
889 self.index = []
890 # Mapping of partial identifiers to full nodes.
890 # Mapping of partial identifiers to full nodes.
891 self._pcache = {}
891 self._pcache = {}
892 # Mapping of revision integer to full node.
892 # Mapping of revision integer to full node.
893 self._nodecache = {nullid: nullrev}
893 self._nodecache = {nullid: nullrev}
894 self._nodepos = None
894 self._nodepos = None
895 self._compengine = 'zlib'
895 self._compengine = 'zlib'
896 self._maxdeltachainspan = -1
896 self._maxdeltachainspan = -1
897 self._withsparseread = False
897 self._withsparseread = False
898 self._srdensitythreshold = 0.50
898 self._srdensitythreshold = 0.50
899 self._srmingapsize = 262144
899 self._srmingapsize = 262144
900
900
901 mmapindexthreshold = None
901 mmapindexthreshold = None
902 v = REVLOG_DEFAULT_VERSION
902 v = REVLOG_DEFAULT_VERSION
903 opts = getattr(opener, 'options', None)
903 opts = getattr(opener, 'options', None)
904 if opts is not None:
904 if opts is not None:
905 if 'revlogv2' in opts:
905 if 'revlogv2' in opts:
906 # version 2 revlogs always use generaldelta.
906 # version 2 revlogs always use generaldelta.
907 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
907 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
908 elif 'revlogv1' in opts:
908 elif 'revlogv1' in opts:
909 if 'generaldelta' in opts:
909 if 'generaldelta' in opts:
910 v |= FLAG_GENERALDELTA
910 v |= FLAG_GENERALDELTA
911 else:
911 else:
912 v = 0
912 v = 0
913 if 'chunkcachesize' in opts:
913 if 'chunkcachesize' in opts:
914 self._chunkcachesize = opts['chunkcachesize']
914 self._chunkcachesize = opts['chunkcachesize']
915 if 'maxchainlen' in opts:
915 if 'maxchainlen' in opts:
916 self._maxchainlen = opts['maxchainlen']
916 self._maxchainlen = opts['maxchainlen']
917 if 'aggressivemergedeltas' in opts:
917 if 'aggressivemergedeltas' in opts:
918 self._aggressivemergedeltas = opts['aggressivemergedeltas']
918 self._aggressivemergedeltas = opts['aggressivemergedeltas']
919 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
919 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
920 if 'compengine' in opts:
920 if 'compengine' in opts:
921 self._compengine = opts['compengine']
921 self._compengine = opts['compengine']
922 if 'maxdeltachainspan' in opts:
922 if 'maxdeltachainspan' in opts:
923 self._maxdeltachainspan = opts['maxdeltachainspan']
923 self._maxdeltachainspan = opts['maxdeltachainspan']
924 if mmaplargeindex and 'mmapindexthreshold' in opts:
924 if mmaplargeindex and 'mmapindexthreshold' in opts:
925 mmapindexthreshold = opts['mmapindexthreshold']
925 mmapindexthreshold = opts['mmapindexthreshold']
926 self._withsparseread = bool(opts.get('with-sparse-read', False))
926 self._withsparseread = bool(opts.get('with-sparse-read', False))
927 if 'sparse-read-density-threshold' in opts:
927 if 'sparse-read-density-threshold' in opts:
928 self._srdensitythreshold = opts['sparse-read-density-threshold']
928 self._srdensitythreshold = opts['sparse-read-density-threshold']
929 if 'sparse-read-min-gap-size' in opts:
929 if 'sparse-read-min-gap-size' in opts:
930 self._srmingapsize = opts['sparse-read-min-gap-size']
930 self._srmingapsize = opts['sparse-read-min-gap-size']
931
931
932 if self._chunkcachesize <= 0:
932 if self._chunkcachesize <= 0:
933 raise RevlogError(_('revlog chunk cache size %r is not greater '
933 raise RevlogError(_('revlog chunk cache size %r is not greater '
934 'than 0') % self._chunkcachesize)
934 'than 0') % self._chunkcachesize)
935 elif self._chunkcachesize & (self._chunkcachesize - 1):
935 elif self._chunkcachesize & (self._chunkcachesize - 1):
936 raise RevlogError(_('revlog chunk cache size %r is not a power '
936 raise RevlogError(_('revlog chunk cache size %r is not a power '
937 'of 2') % self._chunkcachesize)
937 'of 2') % self._chunkcachesize)
938
938
939 indexdata = ''
939 indexdata = ''
940 self._initempty = True
940 self._initempty = True
941 try:
941 try:
942 with self._indexfp() as f:
942 with self._indexfp() as f:
943 if (mmapindexthreshold is not None and
943 if (mmapindexthreshold is not None and
944 self.opener.fstat(f).st_size >= mmapindexthreshold):
944 self.opener.fstat(f).st_size >= mmapindexthreshold):
945 indexdata = util.buffer(util.mmapread(f))
945 indexdata = util.buffer(util.mmapread(f))
946 else:
946 else:
947 indexdata = f.read()
947 indexdata = f.read()
948 if len(indexdata) > 0:
948 if len(indexdata) > 0:
949 v = versionformat_unpack(indexdata[:4])[0]
949 v = versionformat_unpack(indexdata[:4])[0]
950 self._initempty = False
950 self._initempty = False
951 except IOError as inst:
951 except IOError as inst:
952 if inst.errno != errno.ENOENT:
952 if inst.errno != errno.ENOENT:
953 raise
953 raise
954
954
955 self.version = v
955 self.version = v
956 self._inline = v & FLAG_INLINE_DATA
956 self._inline = v & FLAG_INLINE_DATA
957 self._generaldelta = v & FLAG_GENERALDELTA
957 self._generaldelta = v & FLAG_GENERALDELTA
958 flags = v & ~0xFFFF
958 flags = v & ~0xFFFF
959 fmt = v & 0xFFFF
959 fmt = v & 0xFFFF
960 if fmt == REVLOGV0:
960 if fmt == REVLOGV0:
961 if flags:
961 if flags:
962 raise RevlogError(_('unknown flags (%#04x) in version %d '
962 raise RevlogError(_('unknown flags (%#04x) in version %d '
963 'revlog %s') %
963 'revlog %s') %
964 (flags >> 16, fmt, self.indexfile))
964 (flags >> 16, fmt, self.indexfile))
965 elif fmt == REVLOGV1:
965 elif fmt == REVLOGV1:
966 if flags & ~REVLOGV1_FLAGS:
966 if flags & ~REVLOGV1_FLAGS:
967 raise RevlogError(_('unknown flags (%#04x) in version %d '
967 raise RevlogError(_('unknown flags (%#04x) in version %d '
968 'revlog %s') %
968 'revlog %s') %
969 (flags >> 16, fmt, self.indexfile))
969 (flags >> 16, fmt, self.indexfile))
970 elif fmt == REVLOGV2:
970 elif fmt == REVLOGV2:
971 if flags & ~REVLOGV2_FLAGS:
971 if flags & ~REVLOGV2_FLAGS:
972 raise RevlogError(_('unknown flags (%#04x) in version %d '
972 raise RevlogError(_('unknown flags (%#04x) in version %d '
973 'revlog %s') %
973 'revlog %s') %
974 (flags >> 16, fmt, self.indexfile))
974 (flags >> 16, fmt, self.indexfile))
975 else:
975 else:
976 raise RevlogError(_('unknown version (%d) in revlog %s') %
976 raise RevlogError(_('unknown version (%d) in revlog %s') %
977 (fmt, self.indexfile))
977 (fmt, self.indexfile))
978
978
979 self.storedeltachains = True
979 self.storedeltachains = True
980
980
981 self._io = revlogio()
981 self._io = revlogio()
982 if self.version == REVLOGV0:
982 if self.version == REVLOGV0:
983 self._io = revlogoldio()
983 self._io = revlogoldio()
984 try:
984 try:
985 d = self._io.parseindex(indexdata, self._inline)
985 d = self._io.parseindex(indexdata, self._inline)
986 except (ValueError, IndexError):
986 except (ValueError, IndexError):
987 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
987 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
988 self.index, nodemap, self._chunkcache = d
988 self.index, nodemap, self._chunkcache = d
989 if nodemap is not None:
989 if nodemap is not None:
990 self.nodemap = self._nodecache = nodemap
990 self.nodemap = self._nodecache = nodemap
991 if not self._chunkcache:
991 if not self._chunkcache:
992 self._chunkclear()
992 self._chunkclear()
993 # revnum -> (chain-length, sum-delta-length)
993 # revnum -> (chain-length, sum-delta-length)
994 self._chaininfocache = {}
994 self._chaininfocache = {}
995 # revlog header -> revlog compressor
995 # revlog header -> revlog compressor
996 self._decompressors = {}
996 self._decompressors = {}
997
997
998 @util.propertycache
998 @util.propertycache
999 def _compressor(self):
999 def _compressor(self):
1000 return util.compengines[self._compengine].revlogcompressor()
1000 return util.compengines[self._compengine].revlogcompressor()
1001
1001
1002 def _indexfp(self, mode='r'):
1002 def _indexfp(self, mode='r'):
1003 """file object for the revlog's index file"""
1003 """file object for the revlog's index file"""
1004 args = {r'mode': mode}
1004 args = {r'mode': mode}
1005 if mode != 'r':
1005 if mode != 'r':
1006 args[r'checkambig'] = self._checkambig
1006 args[r'checkambig'] = self._checkambig
1007 if mode == 'w':
1007 if mode == 'w':
1008 args[r'atomictemp'] = True
1008 args[r'atomictemp'] = True
1009 return self.opener(self.indexfile, **args)
1009 return self.opener(self.indexfile, **args)
1010
1010
1011 def _datafp(self, mode='r'):
1011 def _datafp(self, mode='r'):
1012 """file object for the revlog's data file"""
1012 """file object for the revlog's data file"""
1013 return self.opener(self.datafile, mode=mode)
1013 return self.opener(self.datafile, mode=mode)
1014
1014
1015 @contextlib.contextmanager
1015 @contextlib.contextmanager
1016 def _datareadfp(self, existingfp=None):
1016 def _datareadfp(self, existingfp=None):
1017 """file object suitable to read data"""
1017 """file object suitable to read data"""
1018 if existingfp is not None:
1018 if existingfp is not None:
1019 yield existingfp
1019 yield existingfp
1020 else:
1020 else:
1021 if self._inline:
1021 if self._inline:
1022 func = self._indexfp
1022 func = self._indexfp
1023 else:
1023 else:
1024 func = self._datafp
1024 func = self._datafp
1025 with func() as fp:
1025 with func() as fp:
1026 yield fp
1026 yield fp
1027
1027
1028 def tip(self):
1028 def tip(self):
1029 return self.node(len(self.index) - 2)
1029 return self.node(len(self.index) - 2)
1030 def __contains__(self, rev):
1030 def __contains__(self, rev):
1031 return 0 <= rev < len(self)
1031 return 0 <= rev < len(self)
1032 def __len__(self):
1032 def __len__(self):
1033 return len(self.index) - 1
1033 return len(self.index) - 1
1034 def __iter__(self):
1034 def __iter__(self):
1035 return iter(xrange(len(self)))
1035 return iter(xrange(len(self)))
1036 def revs(self, start=0, stop=None):
1036 def revs(self, start=0, stop=None):
1037 """iterate over all rev in this revlog (from start to stop)"""
1037 """iterate over all rev in this revlog (from start to stop)"""
1038 step = 1
1038 step = 1
1039 if stop is not None:
1039 if stop is not None:
1040 if start > stop:
1040 if start > stop:
1041 step = -1
1041 step = -1
1042 stop += step
1042 stop += step
1043 else:
1043 else:
1044 stop = len(self)
1044 stop = len(self)
1045 return xrange(start, stop, step)
1045 return xrange(start, stop, step)
1046
1046
1047 @util.propertycache
1047 @util.propertycache
1048 def nodemap(self):
1048 def nodemap(self):
1049 self.rev(self.node(0))
1049 self.rev(self.node(0))
1050 return self._nodecache
1050 return self._nodecache
1051
1051
1052 def hasnode(self, node):
1052 def hasnode(self, node):
1053 try:
1053 try:
1054 self.rev(node)
1054 self.rev(node)
1055 return True
1055 return True
1056 except KeyError:
1056 except KeyError:
1057 return False
1057 return False
1058
1058
1059 def candelta(self, baserev, rev):
1059 def candelta(self, baserev, rev):
1060 """whether two revisions (baserev, rev) can be delta-ed or not"""
1060 """whether two revisions (baserev, rev) can be delta-ed or not"""
1061 # Disable delta if either rev requires a content-changing flag
1061 # Disable delta if either rev requires a content-changing flag
1062 # processor (ex. LFS). This is because such flag processor can alter
1062 # processor (ex. LFS). This is because such flag processor can alter
1063 # the rawtext content that the delta will be based on, and two clients
1063 # the rawtext content that the delta will be based on, and two clients
1064 # could have a same revlog node with different flags (i.e. different
1064 # could have a same revlog node with different flags (i.e. different
1065 # rawtext contents) and the delta could be incompatible.
1065 # rawtext contents) and the delta could be incompatible.
1066 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1066 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1067 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1067 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1068 return False
1068 return False
1069 return True
1069 return True
1070
1070
1071 def clearcaches(self):
1071 def clearcaches(self):
1072 self._cache = None
1072 self._cache = None
1073 self._chainbasecache.clear()
1073 self._chainbasecache.clear()
1074 self._chunkcache = (0, '')
1074 self._chunkcache = (0, '')
1075 self._pcache = {}
1075 self._pcache = {}
1076
1076
1077 try:
1077 try:
1078 self._nodecache.clearcaches()
1078 self._nodecache.clearcaches()
1079 except AttributeError:
1079 except AttributeError:
1080 self._nodecache = {nullid: nullrev}
1080 self._nodecache = {nullid: nullrev}
1081 self._nodepos = None
1081 self._nodepos = None
1082
1082
1083 def rev(self, node):
1083 def rev(self, node):
1084 try:
1084 try:
1085 return self._nodecache[node]
1085 return self._nodecache[node]
1086 except TypeError:
1086 except TypeError:
1087 raise
1087 raise
1088 except RevlogError:
1088 except RevlogError:
1089 # parsers.c radix tree lookup failed
1089 # parsers.c radix tree lookup failed
1090 if node == wdirid or node in wdirfilenodeids:
1090 if node == wdirid or node in wdirfilenodeids:
1091 raise error.WdirUnsupported
1091 raise error.WdirUnsupported
1092 raise LookupError(node, self.indexfile, _('no node'))
1092 raise LookupError(node, self.indexfile, _('no node'))
1093 except KeyError:
1093 except KeyError:
1094 # pure python cache lookup failed
1094 # pure python cache lookup failed
1095 n = self._nodecache
1095 n = self._nodecache
1096 i = self.index
1096 i = self.index
1097 p = self._nodepos
1097 p = self._nodepos
1098 if p is None:
1098 if p is None:
1099 p = len(i) - 2
1099 p = len(i) - 2
1100 else:
1100 else:
1101 assert p < len(i)
1101 assert p < len(i)
1102 for r in xrange(p, -1, -1):
1102 for r in xrange(p, -1, -1):
1103 v = i[r][7]
1103 v = i[r][7]
1104 n[v] = r
1104 n[v] = r
1105 if v == node:
1105 if v == node:
1106 self._nodepos = r - 1
1106 self._nodepos = r - 1
1107 return r
1107 return r
1108 if node == wdirid or node in wdirfilenodeids:
1108 if node == wdirid or node in wdirfilenodeids:
1109 raise error.WdirUnsupported
1109 raise error.WdirUnsupported
1110 raise LookupError(node, self.indexfile, _('no node'))
1110 raise LookupError(node, self.indexfile, _('no node'))
1111
1111
1112 # Accessors for index entries.
1112 # Accessors for index entries.
1113
1113
1114 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1114 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1115 # are flags.
1115 # are flags.
1116 def start(self, rev):
1116 def start(self, rev):
1117 return int(self.index[rev][0] >> 16)
1117 return int(self.index[rev][0] >> 16)
1118
1118
1119 def flags(self, rev):
1119 def flags(self, rev):
1120 return self.index[rev][0] & 0xFFFF
1120 return self.index[rev][0] & 0xFFFF
1121
1121
1122 def length(self, rev):
1122 def length(self, rev):
1123 return self.index[rev][1]
1123 return self.index[rev][1]
1124
1124
1125 def rawsize(self, rev):
1125 def rawsize(self, rev):
1126 """return the length of the uncompressed text for a given revision"""
1126 """return the length of the uncompressed text for a given revision"""
1127 l = self.index[rev][2]
1127 l = self.index[rev][2]
1128 if l >= 0:
1128 if l >= 0:
1129 return l
1129 return l
1130
1130
1131 t = self.revision(rev, raw=True)
1131 t = self.revision(rev, raw=True)
1132 return len(t)
1132 return len(t)
1133
1133
1134 def size(self, rev):
1134 def size(self, rev):
1135 """length of non-raw text (processed by a "read" flag processor)"""
1135 """length of non-raw text (processed by a "read" flag processor)"""
1136 # fast path: if no "read" flag processor could change the content,
1136 # fast path: if no "read" flag processor could change the content,
1137 # size is rawsize. note: ELLIPSIS is known to not change the content.
1137 # size is rawsize. note: ELLIPSIS is known to not change the content.
1138 flags = self.flags(rev)
1138 flags = self.flags(rev)
1139 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1139 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1140 return self.rawsize(rev)
1140 return self.rawsize(rev)
1141
1141
1142 return len(self.revision(rev, raw=False))
1142 return len(self.revision(rev, raw=False))
1143
1143
1144 def chainbase(self, rev):
1144 def chainbase(self, rev):
1145 base = self._chainbasecache.get(rev)
1145 base = self._chainbasecache.get(rev)
1146 if base is not None:
1146 if base is not None:
1147 return base
1147 return base
1148
1148
1149 index = self.index
1149 index = self.index
1150 iterrev = rev
1150 iterrev = rev
1151 base = index[iterrev][3]
1151 base = index[iterrev][3]
1152 while base != iterrev:
1152 while base != iterrev:
1153 iterrev = base
1153 iterrev = base
1154 base = index[iterrev][3]
1154 base = index[iterrev][3]
1155
1155
1156 self._chainbasecache[rev] = base
1156 self._chainbasecache[rev] = base
1157 return base
1157 return base
1158
1158
1159 def linkrev(self, rev):
1159 def linkrev(self, rev):
1160 return self.index[rev][4]
1160 return self.index[rev][4]
1161
1161
1162 def parentrevs(self, rev):
1162 def parentrevs(self, rev):
1163 try:
1163 try:
1164 entry = self.index[rev]
1164 entry = self.index[rev]
1165 except IndexError:
1165 except IndexError:
1166 if rev == wdirrev:
1166 if rev == wdirrev:
1167 raise error.WdirUnsupported
1167 raise error.WdirUnsupported
1168 raise
1168 raise
1169
1169
1170 return entry[5], entry[6]
1170 return entry[5], entry[6]
1171
1171
1172 def node(self, rev):
1172 def node(self, rev):
1173 try:
1173 try:
1174 return self.index[rev][7]
1174 return self.index[rev][7]
1175 except IndexError:
1175 except IndexError:
1176 if rev == wdirrev:
1176 if rev == wdirrev:
1177 raise error.WdirUnsupported
1177 raise error.WdirUnsupported
1178 raise
1178 raise
1179
1179
1180 # Derived from index values.
1180 # Derived from index values.
1181
1181
1182 def end(self, rev):
1182 def end(self, rev):
1183 return self.start(rev) + self.length(rev)
1183 return self.start(rev) + self.length(rev)
1184
1184
1185 def parents(self, node):
1185 def parents(self, node):
1186 i = self.index
1186 i = self.index
1187 d = i[self.rev(node)]
1187 d = i[self.rev(node)]
1188 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1188 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1189
1189
1190 def chainlen(self, rev):
1190 def chainlen(self, rev):
1191 return self._chaininfo(rev)[0]
1191 return self._chaininfo(rev)[0]
1192
1192
1193 def _chaininfo(self, rev):
1193 def _chaininfo(self, rev):
1194 chaininfocache = self._chaininfocache
1194 chaininfocache = self._chaininfocache
1195 if rev in chaininfocache:
1195 if rev in chaininfocache:
1196 return chaininfocache[rev]
1196 return chaininfocache[rev]
1197 index = self.index
1197 index = self.index
1198 generaldelta = self._generaldelta
1198 generaldelta = self._generaldelta
1199 iterrev = rev
1199 iterrev = rev
1200 e = index[iterrev]
1200 e = index[iterrev]
1201 clen = 0
1201 clen = 0
1202 compresseddeltalen = 0
1202 compresseddeltalen = 0
1203 while iterrev != e[3]:
1203 while iterrev != e[3]:
1204 clen += 1
1204 clen += 1
1205 compresseddeltalen += e[1]
1205 compresseddeltalen += e[1]
1206 if generaldelta:
1206 if generaldelta:
1207 iterrev = e[3]
1207 iterrev = e[3]
1208 else:
1208 else:
1209 iterrev -= 1
1209 iterrev -= 1
1210 if iterrev in chaininfocache:
1210 if iterrev in chaininfocache:
1211 t = chaininfocache[iterrev]
1211 t = chaininfocache[iterrev]
1212 clen += t[0]
1212 clen += t[0]
1213 compresseddeltalen += t[1]
1213 compresseddeltalen += t[1]
1214 break
1214 break
1215 e = index[iterrev]
1215 e = index[iterrev]
1216 else:
1216 else:
1217 # Add text length of base since decompressing that also takes
1217 # Add text length of base since decompressing that also takes
1218 # work. For cache hits the length is already included.
1218 # work. For cache hits the length is already included.
1219 compresseddeltalen += e[1]
1219 compresseddeltalen += e[1]
1220 r = (clen, compresseddeltalen)
1220 r = (clen, compresseddeltalen)
1221 chaininfocache[rev] = r
1221 chaininfocache[rev] = r
1222 return r
1222 return r
1223
1223
1224 def _deltachain(self, rev, stoprev=None):
1224 def _deltachain(self, rev, stoprev=None):
1225 """Obtain the delta chain for a revision.
1225 """Obtain the delta chain for a revision.
1226
1226
1227 ``stoprev`` specifies a revision to stop at. If not specified, we
1227 ``stoprev`` specifies a revision to stop at. If not specified, we
1228 stop at the base of the chain.
1228 stop at the base of the chain.
1229
1229
1230 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1230 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1231 revs in ascending order and ``stopped`` is a bool indicating whether
1231 revs in ascending order and ``stopped`` is a bool indicating whether
1232 ``stoprev`` was hit.
1232 ``stoprev`` was hit.
1233 """
1233 """
1234 # Try C implementation.
1234 # Try C implementation.
1235 try:
1235 try:
1236 return self.index.deltachain(rev, stoprev, self._generaldelta)
1236 return self.index.deltachain(rev, stoprev, self._generaldelta)
1237 except AttributeError:
1237 except AttributeError:
1238 pass
1238 pass
1239
1239
1240 chain = []
1240 chain = []
1241
1241
1242 # Alias to prevent attribute lookup in tight loop.
1242 # Alias to prevent attribute lookup in tight loop.
1243 index = self.index
1243 index = self.index
1244 generaldelta = self._generaldelta
1244 generaldelta = self._generaldelta
1245
1245
1246 iterrev = rev
1246 iterrev = rev
1247 e = index[iterrev]
1247 e = index[iterrev]
1248 while iterrev != e[3] and iterrev != stoprev:
1248 while iterrev != e[3] and iterrev != stoprev:
1249 chain.append(iterrev)
1249 chain.append(iterrev)
1250 if generaldelta:
1250 if generaldelta:
1251 iterrev = e[3]
1251 iterrev = e[3]
1252 else:
1252 else:
1253 iterrev -= 1
1253 iterrev -= 1
1254 e = index[iterrev]
1254 e = index[iterrev]
1255
1255
1256 if iterrev == stoprev:
1256 if iterrev == stoprev:
1257 stopped = True
1257 stopped = True
1258 else:
1258 else:
1259 chain.append(iterrev)
1259 chain.append(iterrev)
1260 stopped = False
1260 stopped = False
1261
1261
1262 chain.reverse()
1262 chain.reverse()
1263 return chain, stopped
1263 return chain, stopped
1264
1264
1265 def ancestors(self, revs, stoprev=0, inclusive=False):
1265 def ancestors(self, revs, stoprev=0, inclusive=False):
1266 """Generate the ancestors of 'revs' in reverse topological order.
1266 """Generate the ancestors of 'revs' in reverse topological order.
1267 Does not generate revs lower than stoprev.
1267 Does not generate revs lower than stoprev.
1268
1268
1269 See the documentation for ancestor.lazyancestors for more details."""
1269 See the documentation for ancestor.lazyancestors for more details."""
1270
1270
1271 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1271 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1272 inclusive=inclusive)
1272 inclusive=inclusive)
1273
1273
1274 def descendants(self, revs):
1274 def descendants(self, revs):
1275 """Generate the descendants of 'revs' in revision order.
1275 """Generate the descendants of 'revs' in revision order.
1276
1276
1277 Yield a sequence of revision numbers starting with a child of
1277 Yield a sequence of revision numbers starting with a child of
1278 some rev in revs, i.e., each revision is *not* considered a
1278 some rev in revs, i.e., each revision is *not* considered a
1279 descendant of itself. Results are ordered by revision number (a
1279 descendant of itself. Results are ordered by revision number (a
1280 topological sort)."""
1280 topological sort)."""
1281 first = min(revs)
1281 first = min(revs)
1282 if first == nullrev:
1282 if first == nullrev:
1283 for i in self:
1283 for i in self:
1284 yield i
1284 yield i
1285 return
1285 return
1286
1286
1287 seen = set(revs)
1287 seen = set(revs)
1288 for i in self.revs(start=first + 1):
1288 for i in self.revs(start=first + 1):
1289 for x in self.parentrevs(i):
1289 for x in self.parentrevs(i):
1290 if x != nullrev and x in seen:
1290 if x != nullrev and x in seen:
1291 seen.add(i)
1291 seen.add(i)
1292 yield i
1292 yield i
1293 break
1293 break
1294
1294
1295 def findcommonmissing(self, common=None, heads=None):
1295 def findcommonmissing(self, common=None, heads=None):
1296 """Return a tuple of the ancestors of common and the ancestors of heads
1296 """Return a tuple of the ancestors of common and the ancestors of heads
1297 that are not ancestors of common. In revset terminology, we return the
1297 that are not ancestors of common. In revset terminology, we return the
1298 tuple:
1298 tuple:
1299
1299
1300 ::common, (::heads) - (::common)
1300 ::common, (::heads) - (::common)
1301
1301
1302 The list is sorted by revision number, meaning it is
1302 The list is sorted by revision number, meaning it is
1303 topologically sorted.
1303 topologically sorted.
1304
1304
1305 'heads' and 'common' are both lists of node IDs. If heads is
1305 'heads' and 'common' are both lists of node IDs. If heads is
1306 not supplied, uses all of the revlog's heads. If common is not
1306 not supplied, uses all of the revlog's heads. If common is not
1307 supplied, uses nullid."""
1307 supplied, uses nullid."""
1308 if common is None:
1308 if common is None:
1309 common = [nullid]
1309 common = [nullid]
1310 if heads is None:
1310 if heads is None:
1311 heads = self.heads()
1311 heads = self.heads()
1312
1312
1313 common = [self.rev(n) for n in common]
1313 common = [self.rev(n) for n in common]
1314 heads = [self.rev(n) for n in heads]
1314 heads = [self.rev(n) for n in heads]
1315
1315
1316 # we want the ancestors, but inclusive
1316 # we want the ancestors, but inclusive
1317 class lazyset(object):
1317 class lazyset(object):
1318 def __init__(self, lazyvalues):
1318 def __init__(self, lazyvalues):
1319 self.addedvalues = set()
1319 self.addedvalues = set()
1320 self.lazyvalues = lazyvalues
1320 self.lazyvalues = lazyvalues
1321
1321
1322 def __contains__(self, value):
1322 def __contains__(self, value):
1323 return value in self.addedvalues or value in self.lazyvalues
1323 return value in self.addedvalues or value in self.lazyvalues
1324
1324
1325 def __iter__(self):
1325 def __iter__(self):
1326 added = self.addedvalues
1326 added = self.addedvalues
1327 for r in added:
1327 for r in added:
1328 yield r
1328 yield r
1329 for r in self.lazyvalues:
1329 for r in self.lazyvalues:
1330 if not r in added:
1330 if not r in added:
1331 yield r
1331 yield r
1332
1332
1333 def add(self, value):
1333 def add(self, value):
1334 self.addedvalues.add(value)
1334 self.addedvalues.add(value)
1335
1335
1336 def update(self, values):
1336 def update(self, values):
1337 self.addedvalues.update(values)
1337 self.addedvalues.update(values)
1338
1338
1339 has = lazyset(self.ancestors(common))
1339 has = lazyset(self.ancestors(common))
1340 has.add(nullrev)
1340 has.add(nullrev)
1341 has.update(common)
1341 has.update(common)
1342
1342
1343 # take all ancestors from heads that aren't in has
1343 # take all ancestors from heads that aren't in has
1344 missing = set()
1344 missing = set()
1345 visit = collections.deque(r for r in heads if r not in has)
1345 visit = collections.deque(r for r in heads if r not in has)
1346 while visit:
1346 while visit:
1347 r = visit.popleft()
1347 r = visit.popleft()
1348 if r in missing:
1348 if r in missing:
1349 continue
1349 continue
1350 else:
1350 else:
1351 missing.add(r)
1351 missing.add(r)
1352 for p in self.parentrevs(r):
1352 for p in self.parentrevs(r):
1353 if p not in has:
1353 if p not in has:
1354 visit.append(p)
1354 visit.append(p)
1355 missing = list(missing)
1355 missing = list(missing)
1356 missing.sort()
1356 missing.sort()
1357 return has, [self.node(miss) for miss in missing]
1357 return has, [self.node(miss) for miss in missing]
1358
1358
1359 def incrementalmissingrevs(self, common=None):
1359 def incrementalmissingrevs(self, common=None):
1360 """Return an object that can be used to incrementally compute the
1360 """Return an object that can be used to incrementally compute the
1361 revision numbers of the ancestors of arbitrary sets that are not
1361 revision numbers of the ancestors of arbitrary sets that are not
1362 ancestors of common. This is an ancestor.incrementalmissingancestors
1362 ancestors of common. This is an ancestor.incrementalmissingancestors
1363 object.
1363 object.
1364
1364
1365 'common' is a list of revision numbers. If common is not supplied, uses
1365 'common' is a list of revision numbers. If common is not supplied, uses
1366 nullrev.
1366 nullrev.
1367 """
1367 """
1368 if common is None:
1368 if common is None:
1369 common = [nullrev]
1369 common = [nullrev]
1370
1370
1371 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1371 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1372
1372
1373 def findmissingrevs(self, common=None, heads=None):
1373 def findmissingrevs(self, common=None, heads=None):
1374 """Return the revision numbers of the ancestors of heads that
1374 """Return the revision numbers of the ancestors of heads that
1375 are not ancestors of common.
1375 are not ancestors of common.
1376
1376
1377 More specifically, return a list of revision numbers corresponding to
1377 More specifically, return a list of revision numbers corresponding to
1378 nodes N such that every N satisfies the following constraints:
1378 nodes N such that every N satisfies the following constraints:
1379
1379
1380 1. N is an ancestor of some node in 'heads'
1380 1. N is an ancestor of some node in 'heads'
1381 2. N is not an ancestor of any node in 'common'
1381 2. N is not an ancestor of any node in 'common'
1382
1382
1383 The list is sorted by revision number, meaning it is
1383 The list is sorted by revision number, meaning it is
1384 topologically sorted.
1384 topologically sorted.
1385
1385
1386 'heads' and 'common' are both lists of revision numbers. If heads is
1386 'heads' and 'common' are both lists of revision numbers. If heads is
1387 not supplied, uses all of the revlog's heads. If common is not
1387 not supplied, uses all of the revlog's heads. If common is not
1388 supplied, uses nullid."""
1388 supplied, uses nullid."""
1389 if common is None:
1389 if common is None:
1390 common = [nullrev]
1390 common = [nullrev]
1391 if heads is None:
1391 if heads is None:
1392 heads = self.headrevs()
1392 heads = self.headrevs()
1393
1393
1394 inc = self.incrementalmissingrevs(common=common)
1394 inc = self.incrementalmissingrevs(common=common)
1395 return inc.missingancestors(heads)
1395 return inc.missingancestors(heads)
1396
1396
1397 def findmissing(self, common=None, heads=None):
1397 def findmissing(self, common=None, heads=None):
1398 """Return the ancestors of heads that are not ancestors of common.
1398 """Return the ancestors of heads that are not ancestors of common.
1399
1399
1400 More specifically, return a list of nodes N such that every N
1400 More specifically, return a list of nodes N such that every N
1401 satisfies the following constraints:
1401 satisfies the following constraints:
1402
1402
1403 1. N is an ancestor of some node in 'heads'
1403 1. N is an ancestor of some node in 'heads'
1404 2. N is not an ancestor of any node in 'common'
1404 2. N is not an ancestor of any node in 'common'
1405
1405
1406 The list is sorted by revision number, meaning it is
1406 The list is sorted by revision number, meaning it is
1407 topologically sorted.
1407 topologically sorted.
1408
1408
1409 'heads' and 'common' are both lists of node IDs. If heads is
1409 'heads' and 'common' are both lists of node IDs. If heads is
1410 not supplied, uses all of the revlog's heads. If common is not
1410 not supplied, uses all of the revlog's heads. If common is not
1411 supplied, uses nullid."""
1411 supplied, uses nullid."""
1412 if common is None:
1412 if common is None:
1413 common = [nullid]
1413 common = [nullid]
1414 if heads is None:
1414 if heads is None:
1415 heads = self.heads()
1415 heads = self.heads()
1416
1416
1417 common = [self.rev(n) for n in common]
1417 common = [self.rev(n) for n in common]
1418 heads = [self.rev(n) for n in heads]
1418 heads = [self.rev(n) for n in heads]
1419
1419
1420 inc = self.incrementalmissingrevs(common=common)
1420 inc = self.incrementalmissingrevs(common=common)
1421 return [self.node(r) for r in inc.missingancestors(heads)]
1421 return [self.node(r) for r in inc.missingancestors(heads)]
1422
1422
1423 def nodesbetween(self, roots=None, heads=None):
1423 def nodesbetween(self, roots=None, heads=None):
1424 """Return a topological path from 'roots' to 'heads'.
1424 """Return a topological path from 'roots' to 'heads'.
1425
1425
1426 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1426 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1427 topologically sorted list of all nodes N that satisfy both of
1427 topologically sorted list of all nodes N that satisfy both of
1428 these constraints:
1428 these constraints:
1429
1429
1430 1. N is a descendant of some node in 'roots'
1430 1. N is a descendant of some node in 'roots'
1431 2. N is an ancestor of some node in 'heads'
1431 2. N is an ancestor of some node in 'heads'
1432
1432
1433 Every node is considered to be both a descendant and an ancestor
1433 Every node is considered to be both a descendant and an ancestor
1434 of itself, so every reachable node in 'roots' and 'heads' will be
1434 of itself, so every reachable node in 'roots' and 'heads' will be
1435 included in 'nodes'.
1435 included in 'nodes'.
1436
1436
1437 'outroots' is the list of reachable nodes in 'roots', i.e., the
1437 'outroots' is the list of reachable nodes in 'roots', i.e., the
1438 subset of 'roots' that is returned in 'nodes'. Likewise,
1438 subset of 'roots' that is returned in 'nodes'. Likewise,
1439 'outheads' is the subset of 'heads' that is also in 'nodes'.
1439 'outheads' is the subset of 'heads' that is also in 'nodes'.
1440
1440
1441 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1441 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1442 unspecified, uses nullid as the only root. If 'heads' is
1442 unspecified, uses nullid as the only root. If 'heads' is
1443 unspecified, uses list of all of the revlog's heads."""
1443 unspecified, uses list of all of the revlog's heads."""
1444 nonodes = ([], [], [])
1444 nonodes = ([], [], [])
1445 if roots is not None:
1445 if roots is not None:
1446 roots = list(roots)
1446 roots = list(roots)
1447 if not roots:
1447 if not roots:
1448 return nonodes
1448 return nonodes
1449 lowestrev = min([self.rev(n) for n in roots])
1449 lowestrev = min([self.rev(n) for n in roots])
1450 else:
1450 else:
1451 roots = [nullid] # Everybody's a descendant of nullid
1451 roots = [nullid] # Everybody's a descendant of nullid
1452 lowestrev = nullrev
1452 lowestrev = nullrev
1453 if (lowestrev == nullrev) and (heads is None):
1453 if (lowestrev == nullrev) and (heads is None):
1454 # We want _all_ the nodes!
1454 # We want _all_ the nodes!
1455 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1455 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1456 if heads is None:
1456 if heads is None:
1457 # All nodes are ancestors, so the latest ancestor is the last
1457 # All nodes are ancestors, so the latest ancestor is the last
1458 # node.
1458 # node.
1459 highestrev = len(self) - 1
1459 highestrev = len(self) - 1
1460 # Set ancestors to None to signal that every node is an ancestor.
1460 # Set ancestors to None to signal that every node is an ancestor.
1461 ancestors = None
1461 ancestors = None
1462 # Set heads to an empty dictionary for later discovery of heads
1462 # Set heads to an empty dictionary for later discovery of heads
1463 heads = {}
1463 heads = {}
1464 else:
1464 else:
1465 heads = list(heads)
1465 heads = list(heads)
1466 if not heads:
1466 if not heads:
1467 return nonodes
1467 return nonodes
1468 ancestors = set()
1468 ancestors = set()
1469 # Turn heads into a dictionary so we can remove 'fake' heads.
1469 # Turn heads into a dictionary so we can remove 'fake' heads.
1470 # Also, later we will be using it to filter out the heads we can't
1470 # Also, later we will be using it to filter out the heads we can't
1471 # find from roots.
1471 # find from roots.
1472 heads = dict.fromkeys(heads, False)
1472 heads = dict.fromkeys(heads, False)
1473 # Start at the top and keep marking parents until we're done.
1473 # Start at the top and keep marking parents until we're done.
1474 nodestotag = set(heads)
1474 nodestotag = set(heads)
1475 # Remember where the top was so we can use it as a limit later.
1475 # Remember where the top was so we can use it as a limit later.
1476 highestrev = max([self.rev(n) for n in nodestotag])
1476 highestrev = max([self.rev(n) for n in nodestotag])
1477 while nodestotag:
1477 while nodestotag:
1478 # grab a node to tag
1478 # grab a node to tag
1479 n = nodestotag.pop()
1479 n = nodestotag.pop()
1480 # Never tag nullid
1480 # Never tag nullid
1481 if n == nullid:
1481 if n == nullid:
1482 continue
1482 continue
1483 # A node's revision number represents its place in a
1483 # A node's revision number represents its place in a
1484 # topologically sorted list of nodes.
1484 # topologically sorted list of nodes.
1485 r = self.rev(n)
1485 r = self.rev(n)
1486 if r >= lowestrev:
1486 if r >= lowestrev:
1487 if n not in ancestors:
1487 if n not in ancestors:
1488 # If we are possibly a descendant of one of the roots
1488 # If we are possibly a descendant of one of the roots
1489 # and we haven't already been marked as an ancestor
1489 # and we haven't already been marked as an ancestor
1490 ancestors.add(n) # Mark as ancestor
1490 ancestors.add(n) # Mark as ancestor
1491 # Add non-nullid parents to list of nodes to tag.
1491 # Add non-nullid parents to list of nodes to tag.
1492 nodestotag.update([p for p in self.parents(n) if
1492 nodestotag.update([p for p in self.parents(n) if
1493 p != nullid])
1493 p != nullid])
1494 elif n in heads: # We've seen it before, is it a fake head?
1494 elif n in heads: # We've seen it before, is it a fake head?
1495 # So it is, real heads should not be the ancestors of
1495 # So it is, real heads should not be the ancestors of
1496 # any other heads.
1496 # any other heads.
1497 heads.pop(n)
1497 heads.pop(n)
1498 if not ancestors:
1498 if not ancestors:
1499 return nonodes
1499 return nonodes
1500 # Now that we have our set of ancestors, we want to remove any
1500 # Now that we have our set of ancestors, we want to remove any
1501 # roots that are not ancestors.
1501 # roots that are not ancestors.
1502
1502
1503 # If one of the roots was nullid, everything is included anyway.
1503 # If one of the roots was nullid, everything is included anyway.
1504 if lowestrev > nullrev:
1504 if lowestrev > nullrev:
1505 # But, since we weren't, let's recompute the lowest rev to not
1505 # But, since we weren't, let's recompute the lowest rev to not
1506 # include roots that aren't ancestors.
1506 # include roots that aren't ancestors.
1507
1507
1508 # Filter out roots that aren't ancestors of heads
1508 # Filter out roots that aren't ancestors of heads
1509 roots = [root for root in roots if root in ancestors]
1509 roots = [root for root in roots if root in ancestors]
1510 # Recompute the lowest revision
1510 # Recompute the lowest revision
1511 if roots:
1511 if roots:
1512 lowestrev = min([self.rev(root) for root in roots])
1512 lowestrev = min([self.rev(root) for root in roots])
1513 else:
1513 else:
1514 # No more roots? Return empty list
1514 # No more roots? Return empty list
1515 return nonodes
1515 return nonodes
1516 else:
1516 else:
1517 # We are descending from nullid, and don't need to care about
1517 # We are descending from nullid, and don't need to care about
1518 # any other roots.
1518 # any other roots.
1519 lowestrev = nullrev
1519 lowestrev = nullrev
1520 roots = [nullid]
1520 roots = [nullid]
1521 # Transform our roots list into a set.
1521 # Transform our roots list into a set.
1522 descendants = set(roots)
1522 descendants = set(roots)
1523 # Also, keep the original roots so we can filter out roots that aren't
1523 # Also, keep the original roots so we can filter out roots that aren't
1524 # 'real' roots (i.e. are descended from other roots).
1524 # 'real' roots (i.e. are descended from other roots).
1525 roots = descendants.copy()
1525 roots = descendants.copy()
1526 # Our topologically sorted list of output nodes.
1526 # Our topologically sorted list of output nodes.
1527 orderedout = []
1527 orderedout = []
1528 # Don't start at nullid since we don't want nullid in our output list,
1528 # Don't start at nullid since we don't want nullid in our output list,
1529 # and if nullid shows up in descendants, empty parents will look like
1529 # and if nullid shows up in descendants, empty parents will look like
1530 # they're descendants.
1530 # they're descendants.
1531 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1531 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1532 n = self.node(r)
1532 n = self.node(r)
1533 isdescendant = False
1533 isdescendant = False
1534 if lowestrev == nullrev: # Everybody is a descendant of nullid
1534 if lowestrev == nullrev: # Everybody is a descendant of nullid
1535 isdescendant = True
1535 isdescendant = True
1536 elif n in descendants:
1536 elif n in descendants:
1537 # n is already a descendant
1537 # n is already a descendant
1538 isdescendant = True
1538 isdescendant = True
1539 # This check only needs to be done here because all the roots
1539 # This check only needs to be done here because all the roots
1540 # will start being marked is descendants before the loop.
1540 # will start being marked is descendants before the loop.
1541 if n in roots:
1541 if n in roots:
1542 # If n was a root, check if it's a 'real' root.
1542 # If n was a root, check if it's a 'real' root.
1543 p = tuple(self.parents(n))
1543 p = tuple(self.parents(n))
1544 # If any of its parents are descendants, it's not a root.
1544 # If any of its parents are descendants, it's not a root.
1545 if (p[0] in descendants) or (p[1] in descendants):
1545 if (p[0] in descendants) or (p[1] in descendants):
1546 roots.remove(n)
1546 roots.remove(n)
1547 else:
1547 else:
1548 p = tuple(self.parents(n))
1548 p = tuple(self.parents(n))
1549 # A node is a descendant if either of its parents are
1549 # A node is a descendant if either of its parents are
1550 # descendants. (We seeded the dependents list with the roots
1550 # descendants. (We seeded the dependents list with the roots
1551 # up there, remember?)
1551 # up there, remember?)
1552 if (p[0] in descendants) or (p[1] in descendants):
1552 if (p[0] in descendants) or (p[1] in descendants):
1553 descendants.add(n)
1553 descendants.add(n)
1554 isdescendant = True
1554 isdescendant = True
1555 if isdescendant and ((ancestors is None) or (n in ancestors)):
1555 if isdescendant and ((ancestors is None) or (n in ancestors)):
1556 # Only include nodes that are both descendants and ancestors.
1556 # Only include nodes that are both descendants and ancestors.
1557 orderedout.append(n)
1557 orderedout.append(n)
1558 if (ancestors is not None) and (n in heads):
1558 if (ancestors is not None) and (n in heads):
1559 # We're trying to figure out which heads are reachable
1559 # We're trying to figure out which heads are reachable
1560 # from roots.
1560 # from roots.
1561 # Mark this head as having been reached
1561 # Mark this head as having been reached
1562 heads[n] = True
1562 heads[n] = True
1563 elif ancestors is None:
1563 elif ancestors is None:
1564 # Otherwise, we're trying to discover the heads.
1564 # Otherwise, we're trying to discover the heads.
1565 # Assume this is a head because if it isn't, the next step
1565 # Assume this is a head because if it isn't, the next step
1566 # will eventually remove it.
1566 # will eventually remove it.
1567 heads[n] = True
1567 heads[n] = True
1568 # But, obviously its parents aren't.
1568 # But, obviously its parents aren't.
1569 for p in self.parents(n):
1569 for p in self.parents(n):
1570 heads.pop(p, None)
1570 heads.pop(p, None)
1571 heads = [head for head, flag in heads.iteritems() if flag]
1571 heads = [head for head, flag in heads.iteritems() if flag]
1572 roots = list(roots)
1572 roots = list(roots)
1573 assert orderedout
1573 assert orderedout
1574 assert roots
1574 assert roots
1575 assert heads
1575 assert heads
1576 return (orderedout, roots, heads)
1576 return (orderedout, roots, heads)
1577
1577
1578 def headrevs(self):
1578 def headrevs(self):
1579 try:
1579 try:
1580 return self.index.headrevs()
1580 return self.index.headrevs()
1581 except AttributeError:
1581 except AttributeError:
1582 return self._headrevs()
1582 return self._headrevs()
1583
1583
1584 def computephases(self, roots):
1584 def computephases(self, roots):
1585 return self.index.computephasesmapsets(roots)
1585 return self.index.computephasesmapsets(roots)
1586
1586
1587 def _headrevs(self):
1587 def _headrevs(self):
1588 count = len(self)
1588 count = len(self)
1589 if not count:
1589 if not count:
1590 return [nullrev]
1590 return [nullrev]
1591 # we won't iter over filtered rev so nobody is a head at start
1591 # we won't iter over filtered rev so nobody is a head at start
1592 ishead = [0] * (count + 1)
1592 ishead = [0] * (count + 1)
1593 index = self.index
1593 index = self.index
1594 for r in self:
1594 for r in self:
1595 ishead[r] = 1 # I may be an head
1595 ishead[r] = 1 # I may be an head
1596 e = index[r]
1596 e = index[r]
1597 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1597 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1598 return [r for r, val in enumerate(ishead) if val]
1598 return [r for r, val in enumerate(ishead) if val]
1599
1599
1600 def heads(self, start=None, stop=None):
1600 def heads(self, start=None, stop=None):
1601 """return the list of all nodes that have no children
1601 """return the list of all nodes that have no children
1602
1602
1603 if start is specified, only heads that are descendants of
1603 if start is specified, only heads that are descendants of
1604 start will be returned
1604 start will be returned
1605 if stop is specified, it will consider all the revs from stop
1605 if stop is specified, it will consider all the revs from stop
1606 as if they had no children
1606 as if they had no children
1607 """
1607 """
1608 if start is None and stop is None:
1608 if start is None and stop is None:
1609 if not len(self):
1609 if not len(self):
1610 return [nullid]
1610 return [nullid]
1611 return [self.node(r) for r in self.headrevs()]
1611 return [self.node(r) for r in self.headrevs()]
1612
1612
1613 if start is None:
1613 if start is None:
1614 start = nullid
1614 start = nullid
1615 if stop is None:
1615 if stop is None:
1616 stop = []
1616 stop = []
1617 stoprevs = set([self.rev(n) for n in stop])
1617 stoprevs = set([self.rev(n) for n in stop])
1618 startrev = self.rev(start)
1618 startrev = self.rev(start)
1619 reachable = {startrev}
1619 reachable = {startrev}
1620 heads = {startrev}
1620 heads = {startrev}
1621
1621
1622 parentrevs = self.parentrevs
1622 parentrevs = self.parentrevs
1623 for r in self.revs(start=startrev + 1):
1623 for r in self.revs(start=startrev + 1):
1624 for p in parentrevs(r):
1624 for p in parentrevs(r):
1625 if p in reachable:
1625 if p in reachable:
1626 if r not in stoprevs:
1626 if r not in stoprevs:
1627 reachable.add(r)
1627 reachable.add(r)
1628 heads.add(r)
1628 heads.add(r)
1629 if p in heads and p not in stoprevs:
1629 if p in heads and p not in stoprevs:
1630 heads.remove(p)
1630 heads.remove(p)
1631
1631
1632 return [self.node(r) for r in heads]
1632 return [self.node(r) for r in heads]
1633
1633
1634 def children(self, node):
1634 def children(self, node):
1635 """find the children of a given node"""
1635 """find the children of a given node"""
1636 c = []
1636 c = []
1637 p = self.rev(node)
1637 p = self.rev(node)
1638 for r in self.revs(start=p + 1):
1638 for r in self.revs(start=p + 1):
1639 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1639 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1640 if prevs:
1640 if prevs:
1641 for pr in prevs:
1641 for pr in prevs:
1642 if pr == p:
1642 if pr == p:
1643 c.append(self.node(r))
1643 c.append(self.node(r))
1644 elif p == nullrev:
1644 elif p == nullrev:
1645 c.append(self.node(r))
1645 c.append(self.node(r))
1646 return c
1646 return c
1647
1647
1648 def commonancestorsheads(self, a, b):
1648 def commonancestorsheads(self, a, b):
1649 """calculate all the heads of the common ancestors of nodes a and b"""
1649 """calculate all the heads of the common ancestors of nodes a and b"""
1650 a, b = self.rev(a), self.rev(b)
1650 a, b = self.rev(a), self.rev(b)
1651 ancs = self._commonancestorsheads(a, b)
1651 ancs = self._commonancestorsheads(a, b)
1652 return pycompat.maplist(self.node, ancs)
1652 return pycompat.maplist(self.node, ancs)
1653
1653
1654 def _commonancestorsheads(self, *revs):
1654 def _commonancestorsheads(self, *revs):
1655 """calculate all the heads of the common ancestors of revs"""
1655 """calculate all the heads of the common ancestors of revs"""
1656 try:
1656 try:
1657 ancs = self.index.commonancestorsheads(*revs)
1657 ancs = self.index.commonancestorsheads(*revs)
1658 except (AttributeError, OverflowError): # C implementation failed
1658 except (AttributeError, OverflowError): # C implementation failed
1659 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1659 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1660 return ancs
1660 return ancs
1661
1661
1662 def isancestor(self, a, b):
1662 def isancestor(self, a, b):
1663 """return True if node a is an ancestor of node b
1663 """return True if node a is an ancestor of node b
1664
1664
1665 A revision is considered an ancestor of itself."""
1665 A revision is considered an ancestor of itself."""
1666 a, b = self.rev(a), self.rev(b)
1666 a, b = self.rev(a), self.rev(b)
1667 return self.isancestorrev(a, b)
1667 return self.isancestorrev(a, b)
1668
1668
1669 def descendant(self, a, b):
1670 msg = (b'revlog.descendant is deprecated, use revlog.isancestorrev')
1671 self._repo.ui.deprecwarn(msg, b'4.7')
1672 return self.isancestorrev(a, b)
1673
1669 def isancestorrev(self, a, b):
1674 def isancestorrev(self, a, b):
1670 """return True if revision a is an ancestor of revision b
1675 """return True if revision a is an ancestor of revision b
1671
1676
1672 A revision is considered an ancestor of itself.
1677 A revision is considered an ancestor of itself.
1673
1678
1674 The implementation of this is trivial but the use of
1679 The implementation of this is trivial but the use of
1675 commonancestorsheads is not."""
1680 commonancestorsheads is not."""
1676 if a == nullrev:
1681 if a == nullrev:
1677 return True
1682 return True
1678 elif a == b:
1683 elif a == b:
1679 return True
1684 return True
1680 elif a > b:
1685 elif a > b:
1681 return False
1686 return False
1682 return a in self._commonancestorsheads(a, b)
1687 return a in self._commonancestorsheads(a, b)
1683
1688
1684 def ancestor(self, a, b):
1689 def ancestor(self, a, b):
1685 """calculate the "best" common ancestor of nodes a and b"""
1690 """calculate the "best" common ancestor of nodes a and b"""
1686
1691
1687 a, b = self.rev(a), self.rev(b)
1692 a, b = self.rev(a), self.rev(b)
1688 try:
1693 try:
1689 ancs = self.index.ancestors(a, b)
1694 ancs = self.index.ancestors(a, b)
1690 except (AttributeError, OverflowError):
1695 except (AttributeError, OverflowError):
1691 ancs = ancestor.ancestors(self.parentrevs, a, b)
1696 ancs = ancestor.ancestors(self.parentrevs, a, b)
1692 if ancs:
1697 if ancs:
1693 # choose a consistent winner when there's a tie
1698 # choose a consistent winner when there's a tie
1694 return min(map(self.node, ancs))
1699 return min(map(self.node, ancs))
1695 return nullid
1700 return nullid
1696
1701
1697 def _match(self, id):
1702 def _match(self, id):
1698 if isinstance(id, int):
1703 if isinstance(id, int):
1699 # rev
1704 # rev
1700 return self.node(id)
1705 return self.node(id)
1701 if len(id) == 20:
1706 if len(id) == 20:
1702 # possibly a binary node
1707 # possibly a binary node
1703 # odds of a binary node being all hex in ASCII are 1 in 10**25
1708 # odds of a binary node being all hex in ASCII are 1 in 10**25
1704 try:
1709 try:
1705 node = id
1710 node = id
1706 self.rev(node) # quick search the index
1711 self.rev(node) # quick search the index
1707 return node
1712 return node
1708 except LookupError:
1713 except LookupError:
1709 pass # may be partial hex id
1714 pass # may be partial hex id
1710 try:
1715 try:
1711 # str(rev)
1716 # str(rev)
1712 rev = int(id)
1717 rev = int(id)
1713 if "%d" % rev != id:
1718 if "%d" % rev != id:
1714 raise ValueError
1719 raise ValueError
1715 if rev < 0:
1720 if rev < 0:
1716 rev = len(self) + rev
1721 rev = len(self) + rev
1717 if rev < 0 or rev >= len(self):
1722 if rev < 0 or rev >= len(self):
1718 raise ValueError
1723 raise ValueError
1719 return self.node(rev)
1724 return self.node(rev)
1720 except (ValueError, OverflowError):
1725 except (ValueError, OverflowError):
1721 pass
1726 pass
1722 if len(id) == 40:
1727 if len(id) == 40:
1723 try:
1728 try:
1724 # a full hex nodeid?
1729 # a full hex nodeid?
1725 node = bin(id)
1730 node = bin(id)
1726 self.rev(node)
1731 self.rev(node)
1727 return node
1732 return node
1728 except (TypeError, LookupError):
1733 except (TypeError, LookupError):
1729 pass
1734 pass
1730
1735
1731 def _partialmatch(self, id):
1736 def _partialmatch(self, id):
1732 # we don't care wdirfilenodeids as they should be always full hash
1737 # we don't care wdirfilenodeids as they should be always full hash
1733 maybewdir = wdirhex.startswith(id)
1738 maybewdir = wdirhex.startswith(id)
1734 try:
1739 try:
1735 partial = self.index.partialmatch(id)
1740 partial = self.index.partialmatch(id)
1736 if partial and self.hasnode(partial):
1741 if partial and self.hasnode(partial):
1737 if maybewdir:
1742 if maybewdir:
1738 # single 'ff...' match in radix tree, ambiguous with wdir
1743 # single 'ff...' match in radix tree, ambiguous with wdir
1739 raise RevlogError
1744 raise RevlogError
1740 return partial
1745 return partial
1741 if maybewdir:
1746 if maybewdir:
1742 # no 'ff...' match in radix tree, wdir identified
1747 # no 'ff...' match in radix tree, wdir identified
1743 raise error.WdirUnsupported
1748 raise error.WdirUnsupported
1744 return None
1749 return None
1745 except RevlogError:
1750 except RevlogError:
1746 # parsers.c radix tree lookup gave multiple matches
1751 # parsers.c radix tree lookup gave multiple matches
1747 # fast path: for unfiltered changelog, radix tree is accurate
1752 # fast path: for unfiltered changelog, radix tree is accurate
1748 if not getattr(self, 'filteredrevs', None):
1753 if not getattr(self, 'filteredrevs', None):
1749 raise LookupError(id, self.indexfile,
1754 raise LookupError(id, self.indexfile,
1750 _('ambiguous identifier'))
1755 _('ambiguous identifier'))
1751 # fall through to slow path that filters hidden revisions
1756 # fall through to slow path that filters hidden revisions
1752 except (AttributeError, ValueError):
1757 except (AttributeError, ValueError):
1753 # we are pure python, or key was too short to search radix tree
1758 # we are pure python, or key was too short to search radix tree
1754 pass
1759 pass
1755
1760
1756 if id in self._pcache:
1761 if id in self._pcache:
1757 return self._pcache[id]
1762 return self._pcache[id]
1758
1763
1759 if len(id) <= 40:
1764 if len(id) <= 40:
1760 try:
1765 try:
1761 # hex(node)[:...]
1766 # hex(node)[:...]
1762 l = len(id) // 2 # grab an even number of digits
1767 l = len(id) // 2 # grab an even number of digits
1763 prefix = bin(id[:l * 2])
1768 prefix = bin(id[:l * 2])
1764 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1769 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1765 nl = [n for n in nl if hex(n).startswith(id) and
1770 nl = [n for n in nl if hex(n).startswith(id) and
1766 self.hasnode(n)]
1771 self.hasnode(n)]
1767 if len(nl) > 0:
1772 if len(nl) > 0:
1768 if len(nl) == 1 and not maybewdir:
1773 if len(nl) == 1 and not maybewdir:
1769 self._pcache[id] = nl[0]
1774 self._pcache[id] = nl[0]
1770 return nl[0]
1775 return nl[0]
1771 raise LookupError(id, self.indexfile,
1776 raise LookupError(id, self.indexfile,
1772 _('ambiguous identifier'))
1777 _('ambiguous identifier'))
1773 if maybewdir:
1778 if maybewdir:
1774 raise error.WdirUnsupported
1779 raise error.WdirUnsupported
1775 return None
1780 return None
1776 except TypeError:
1781 except TypeError:
1777 pass
1782 pass
1778
1783
1779 def lookup(self, id):
1784 def lookup(self, id):
1780 """locate a node based on:
1785 """locate a node based on:
1781 - revision number or str(revision number)
1786 - revision number or str(revision number)
1782 - nodeid or subset of hex nodeid
1787 - nodeid or subset of hex nodeid
1783 """
1788 """
1784 n = self._match(id)
1789 n = self._match(id)
1785 if n is not None:
1790 if n is not None:
1786 return n
1791 return n
1787 n = self._partialmatch(id)
1792 n = self._partialmatch(id)
1788 if n:
1793 if n:
1789 return n
1794 return n
1790
1795
1791 raise LookupError(id, self.indexfile, _('no match found'))
1796 raise LookupError(id, self.indexfile, _('no match found'))
1792
1797
1793 def shortest(self, node, minlength=1):
1798 def shortest(self, node, minlength=1):
1794 """Find the shortest unambiguous prefix that matches node."""
1799 """Find the shortest unambiguous prefix that matches node."""
1795 def isvalid(prefix):
1800 def isvalid(prefix):
1796 try:
1801 try:
1797 node = self._partialmatch(prefix)
1802 node = self._partialmatch(prefix)
1798 except error.RevlogError:
1803 except error.RevlogError:
1799 return False
1804 return False
1800 except error.WdirUnsupported:
1805 except error.WdirUnsupported:
1801 # single 'ff...' match
1806 # single 'ff...' match
1802 return True
1807 return True
1803 if node is None:
1808 if node is None:
1804 raise LookupError(node, self.indexfile, _('no node'))
1809 raise LookupError(node, self.indexfile, _('no node'))
1805 return True
1810 return True
1806
1811
1807 def maybewdir(prefix):
1812 def maybewdir(prefix):
1808 return all(c == 'f' for c in prefix)
1813 return all(c == 'f' for c in prefix)
1809
1814
1810 hexnode = hex(node)
1815 hexnode = hex(node)
1811
1816
1812 def disambiguate(hexnode, minlength):
1817 def disambiguate(hexnode, minlength):
1813 """Disambiguate against wdirid."""
1818 """Disambiguate against wdirid."""
1814 for length in range(minlength, 41):
1819 for length in range(minlength, 41):
1815 prefix = hexnode[:length]
1820 prefix = hexnode[:length]
1816 if not maybewdir(prefix):
1821 if not maybewdir(prefix):
1817 return prefix
1822 return prefix
1818
1823
1819 if not getattr(self, 'filteredrevs', None):
1824 if not getattr(self, 'filteredrevs', None):
1820 try:
1825 try:
1821 length = max(self.index.shortest(node), minlength)
1826 length = max(self.index.shortest(node), minlength)
1822 return disambiguate(hexnode, length)
1827 return disambiguate(hexnode, length)
1823 except RevlogError:
1828 except RevlogError:
1824 if node != wdirid:
1829 if node != wdirid:
1825 raise LookupError(node, self.indexfile, _('no node'))
1830 raise LookupError(node, self.indexfile, _('no node'))
1826 except AttributeError:
1831 except AttributeError:
1827 # Fall through to pure code
1832 # Fall through to pure code
1828 pass
1833 pass
1829
1834
1830 if node == wdirid:
1835 if node == wdirid:
1831 for length in range(minlength, 41):
1836 for length in range(minlength, 41):
1832 prefix = hexnode[:length]
1837 prefix = hexnode[:length]
1833 if isvalid(prefix):
1838 if isvalid(prefix):
1834 return prefix
1839 return prefix
1835
1840
1836 for length in range(minlength, 41):
1841 for length in range(minlength, 41):
1837 prefix = hexnode[:length]
1842 prefix = hexnode[:length]
1838 if isvalid(prefix):
1843 if isvalid(prefix):
1839 return disambiguate(hexnode, length)
1844 return disambiguate(hexnode, length)
1840
1845
1841 def cmp(self, node, text):
1846 def cmp(self, node, text):
1842 """compare text with a given file revision
1847 """compare text with a given file revision
1843
1848
1844 returns True if text is different than what is stored.
1849 returns True if text is different than what is stored.
1845 """
1850 """
1846 p1, p2 = self.parents(node)
1851 p1, p2 = self.parents(node)
1847 return hash(text, p1, p2) != node
1852 return hash(text, p1, p2) != node
1848
1853
1849 def _cachesegment(self, offset, data):
1854 def _cachesegment(self, offset, data):
1850 """Add a segment to the revlog cache.
1855 """Add a segment to the revlog cache.
1851
1856
1852 Accepts an absolute offset and the data that is at that location.
1857 Accepts an absolute offset and the data that is at that location.
1853 """
1858 """
1854 o, d = self._chunkcache
1859 o, d = self._chunkcache
1855 # try to add to existing cache
1860 # try to add to existing cache
1856 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1861 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1857 self._chunkcache = o, d + data
1862 self._chunkcache = o, d + data
1858 else:
1863 else:
1859 self._chunkcache = offset, data
1864 self._chunkcache = offset, data
1860
1865
1861 def _readsegment(self, offset, length, df=None):
1866 def _readsegment(self, offset, length, df=None):
1862 """Load a segment of raw data from the revlog.
1867 """Load a segment of raw data from the revlog.
1863
1868
1864 Accepts an absolute offset, length to read, and an optional existing
1869 Accepts an absolute offset, length to read, and an optional existing
1865 file handle to read from.
1870 file handle to read from.
1866
1871
1867 If an existing file handle is passed, it will be seeked and the
1872 If an existing file handle is passed, it will be seeked and the
1868 original seek position will NOT be restored.
1873 original seek position will NOT be restored.
1869
1874
1870 Returns a str or buffer of raw byte data.
1875 Returns a str or buffer of raw byte data.
1871 """
1876 """
1872 # Cache data both forward and backward around the requested
1877 # Cache data both forward and backward around the requested
1873 # data, in a fixed size window. This helps speed up operations
1878 # data, in a fixed size window. This helps speed up operations
1874 # involving reading the revlog backwards.
1879 # involving reading the revlog backwards.
1875 cachesize = self._chunkcachesize
1880 cachesize = self._chunkcachesize
1876 realoffset = offset & ~(cachesize - 1)
1881 realoffset = offset & ~(cachesize - 1)
1877 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1882 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1878 - realoffset)
1883 - realoffset)
1879 with self._datareadfp(df) as df:
1884 with self._datareadfp(df) as df:
1880 df.seek(realoffset)
1885 df.seek(realoffset)
1881 d = df.read(reallength)
1886 d = df.read(reallength)
1882 self._cachesegment(realoffset, d)
1887 self._cachesegment(realoffset, d)
1883 if offset != realoffset or reallength != length:
1888 if offset != realoffset or reallength != length:
1884 return util.buffer(d, offset - realoffset, length)
1889 return util.buffer(d, offset - realoffset, length)
1885 return d
1890 return d
1886
1891
1887 def _getsegment(self, offset, length, df=None):
1892 def _getsegment(self, offset, length, df=None):
1888 """Obtain a segment of raw data from the revlog.
1893 """Obtain a segment of raw data from the revlog.
1889
1894
1890 Accepts an absolute offset, length of bytes to obtain, and an
1895 Accepts an absolute offset, length of bytes to obtain, and an
1891 optional file handle to the already-opened revlog. If the file
1896 optional file handle to the already-opened revlog. If the file
1892 handle is used, it's original seek position will not be preserved.
1897 handle is used, it's original seek position will not be preserved.
1893
1898
1894 Requests for data may be returned from a cache.
1899 Requests for data may be returned from a cache.
1895
1900
1896 Returns a str or a buffer instance of raw byte data.
1901 Returns a str or a buffer instance of raw byte data.
1897 """
1902 """
1898 o, d = self._chunkcache
1903 o, d = self._chunkcache
1899 l = len(d)
1904 l = len(d)
1900
1905
1901 # is it in the cache?
1906 # is it in the cache?
1902 cachestart = offset - o
1907 cachestart = offset - o
1903 cacheend = cachestart + length
1908 cacheend = cachestart + length
1904 if cachestart >= 0 and cacheend <= l:
1909 if cachestart >= 0 and cacheend <= l:
1905 if cachestart == 0 and cacheend == l:
1910 if cachestart == 0 and cacheend == l:
1906 return d # avoid a copy
1911 return d # avoid a copy
1907 return util.buffer(d, cachestart, cacheend - cachestart)
1912 return util.buffer(d, cachestart, cacheend - cachestart)
1908
1913
1909 return self._readsegment(offset, length, df=df)
1914 return self._readsegment(offset, length, df=df)
1910
1915
1911 def _getsegmentforrevs(self, startrev, endrev, df=None):
1916 def _getsegmentforrevs(self, startrev, endrev, df=None):
1912 """Obtain a segment of raw data corresponding to a range of revisions.
1917 """Obtain a segment of raw data corresponding to a range of revisions.
1913
1918
1914 Accepts the start and end revisions and an optional already-open
1919 Accepts the start and end revisions and an optional already-open
1915 file handle to be used for reading. If the file handle is read, its
1920 file handle to be used for reading. If the file handle is read, its
1916 seek position will not be preserved.
1921 seek position will not be preserved.
1917
1922
1918 Requests for data may be satisfied by a cache.
1923 Requests for data may be satisfied by a cache.
1919
1924
1920 Returns a 2-tuple of (offset, data) for the requested range of
1925 Returns a 2-tuple of (offset, data) for the requested range of
1921 revisions. Offset is the integer offset from the beginning of the
1926 revisions. Offset is the integer offset from the beginning of the
1922 revlog and data is a str or buffer of the raw byte data.
1927 revlog and data is a str or buffer of the raw byte data.
1923
1928
1924 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1929 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1925 to determine where each revision's data begins and ends.
1930 to determine where each revision's data begins and ends.
1926 """
1931 """
1927 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1932 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1928 # (functions are expensive).
1933 # (functions are expensive).
1929 index = self.index
1934 index = self.index
1930 istart = index[startrev]
1935 istart = index[startrev]
1931 start = int(istart[0] >> 16)
1936 start = int(istart[0] >> 16)
1932 if startrev == endrev:
1937 if startrev == endrev:
1933 end = start + istart[1]
1938 end = start + istart[1]
1934 else:
1939 else:
1935 iend = index[endrev]
1940 iend = index[endrev]
1936 end = int(iend[0] >> 16) + iend[1]
1941 end = int(iend[0] >> 16) + iend[1]
1937
1942
1938 if self._inline:
1943 if self._inline:
1939 start += (startrev + 1) * self._io.size
1944 start += (startrev + 1) * self._io.size
1940 end += (endrev + 1) * self._io.size
1945 end += (endrev + 1) * self._io.size
1941 length = end - start
1946 length = end - start
1942
1947
1943 return start, self._getsegment(start, length, df=df)
1948 return start, self._getsegment(start, length, df=df)
1944
1949
1945 def _chunk(self, rev, df=None):
1950 def _chunk(self, rev, df=None):
1946 """Obtain a single decompressed chunk for a revision.
1951 """Obtain a single decompressed chunk for a revision.
1947
1952
1948 Accepts an integer revision and an optional already-open file handle
1953 Accepts an integer revision and an optional already-open file handle
1949 to be used for reading. If used, the seek position of the file will not
1954 to be used for reading. If used, the seek position of the file will not
1950 be preserved.
1955 be preserved.
1951
1956
1952 Returns a str holding uncompressed data for the requested revision.
1957 Returns a str holding uncompressed data for the requested revision.
1953 """
1958 """
1954 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1959 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1955
1960
1956 def _chunks(self, revs, df=None, targetsize=None):
1961 def _chunks(self, revs, df=None, targetsize=None):
1957 """Obtain decompressed chunks for the specified revisions.
1962 """Obtain decompressed chunks for the specified revisions.
1958
1963
1959 Accepts an iterable of numeric revisions that are assumed to be in
1964 Accepts an iterable of numeric revisions that are assumed to be in
1960 ascending order. Also accepts an optional already-open file handle
1965 ascending order. Also accepts an optional already-open file handle
1961 to be used for reading. If used, the seek position of the file will
1966 to be used for reading. If used, the seek position of the file will
1962 not be preserved.
1967 not be preserved.
1963
1968
1964 This function is similar to calling ``self._chunk()`` multiple times,
1969 This function is similar to calling ``self._chunk()`` multiple times,
1965 but is faster.
1970 but is faster.
1966
1971
1967 Returns a list with decompressed data for each requested revision.
1972 Returns a list with decompressed data for each requested revision.
1968 """
1973 """
1969 if not revs:
1974 if not revs:
1970 return []
1975 return []
1971 start = self.start
1976 start = self.start
1972 length = self.length
1977 length = self.length
1973 inline = self._inline
1978 inline = self._inline
1974 iosize = self._io.size
1979 iosize = self._io.size
1975 buffer = util.buffer
1980 buffer = util.buffer
1976
1981
1977 l = []
1982 l = []
1978 ladd = l.append
1983 ladd = l.append
1979
1984
1980 if not self._withsparseread:
1985 if not self._withsparseread:
1981 slicedchunks = (revs,)
1986 slicedchunks = (revs,)
1982 else:
1987 else:
1983 slicedchunks = _slicechunk(self, revs, targetsize)
1988 slicedchunks = _slicechunk(self, revs, targetsize)
1984
1989
1985 for revschunk in slicedchunks:
1990 for revschunk in slicedchunks:
1986 firstrev = revschunk[0]
1991 firstrev = revschunk[0]
1987 # Skip trailing revisions with empty diff
1992 # Skip trailing revisions with empty diff
1988 for lastrev in revschunk[::-1]:
1993 for lastrev in revschunk[::-1]:
1989 if length(lastrev) != 0:
1994 if length(lastrev) != 0:
1990 break
1995 break
1991
1996
1992 try:
1997 try:
1993 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1998 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1994 except OverflowError:
1999 except OverflowError:
1995 # issue4215 - we can't cache a run of chunks greater than
2000 # issue4215 - we can't cache a run of chunks greater than
1996 # 2G on Windows
2001 # 2G on Windows
1997 return [self._chunk(rev, df=df) for rev in revschunk]
2002 return [self._chunk(rev, df=df) for rev in revschunk]
1998
2003
1999 decomp = self.decompress
2004 decomp = self.decompress
2000 for rev in revschunk:
2005 for rev in revschunk:
2001 chunkstart = start(rev)
2006 chunkstart = start(rev)
2002 if inline:
2007 if inline:
2003 chunkstart += (rev + 1) * iosize
2008 chunkstart += (rev + 1) * iosize
2004 chunklength = length(rev)
2009 chunklength = length(rev)
2005 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2010 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2006
2011
2007 return l
2012 return l
2008
2013
2009 def _chunkclear(self):
2014 def _chunkclear(self):
2010 """Clear the raw chunk cache."""
2015 """Clear the raw chunk cache."""
2011 self._chunkcache = (0, '')
2016 self._chunkcache = (0, '')
2012
2017
2013 def deltaparent(self, rev):
2018 def deltaparent(self, rev):
2014 """return deltaparent of the given revision"""
2019 """return deltaparent of the given revision"""
2015 base = self.index[rev][3]
2020 base = self.index[rev][3]
2016 if base == rev:
2021 if base == rev:
2017 return nullrev
2022 return nullrev
2018 elif self._generaldelta:
2023 elif self._generaldelta:
2019 return base
2024 return base
2020 else:
2025 else:
2021 return rev - 1
2026 return rev - 1
2022
2027
2023 def revdiff(self, rev1, rev2):
2028 def revdiff(self, rev1, rev2):
2024 """return or calculate a delta between two revisions
2029 """return or calculate a delta between two revisions
2025
2030
2026 The delta calculated is in binary form and is intended to be written to
2031 The delta calculated is in binary form and is intended to be written to
2027 revlog data directly. So this function needs raw revision data.
2032 revlog data directly. So this function needs raw revision data.
2028 """
2033 """
2029 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2034 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2030 return bytes(self._chunk(rev2))
2035 return bytes(self._chunk(rev2))
2031
2036
2032 return mdiff.textdiff(self.revision(rev1, raw=True),
2037 return mdiff.textdiff(self.revision(rev1, raw=True),
2033 self.revision(rev2, raw=True))
2038 self.revision(rev2, raw=True))
2034
2039
2035 def revision(self, nodeorrev, _df=None, raw=False):
2040 def revision(self, nodeorrev, _df=None, raw=False):
2036 """return an uncompressed revision of a given node or revision
2041 """return an uncompressed revision of a given node or revision
2037 number.
2042 number.
2038
2043
2039 _df - an existing file handle to read from. (internal-only)
2044 _df - an existing file handle to read from. (internal-only)
2040 raw - an optional argument specifying if the revision data is to be
2045 raw - an optional argument specifying if the revision data is to be
2041 treated as raw data when applying flag transforms. 'raw' should be set
2046 treated as raw data when applying flag transforms. 'raw' should be set
2042 to True when generating changegroups or in debug commands.
2047 to True when generating changegroups or in debug commands.
2043 """
2048 """
2044 if isinstance(nodeorrev, int):
2049 if isinstance(nodeorrev, int):
2045 rev = nodeorrev
2050 rev = nodeorrev
2046 node = self.node(rev)
2051 node = self.node(rev)
2047 else:
2052 else:
2048 node = nodeorrev
2053 node = nodeorrev
2049 rev = None
2054 rev = None
2050
2055
2051 cachedrev = None
2056 cachedrev = None
2052 flags = None
2057 flags = None
2053 rawtext = None
2058 rawtext = None
2054 if node == nullid:
2059 if node == nullid:
2055 return ""
2060 return ""
2056 if self._cache:
2061 if self._cache:
2057 if self._cache[0] == node:
2062 if self._cache[0] == node:
2058 # _cache only stores rawtext
2063 # _cache only stores rawtext
2059 if raw:
2064 if raw:
2060 return self._cache[2]
2065 return self._cache[2]
2061 # duplicated, but good for perf
2066 # duplicated, but good for perf
2062 if rev is None:
2067 if rev is None:
2063 rev = self.rev(node)
2068 rev = self.rev(node)
2064 if flags is None:
2069 if flags is None:
2065 flags = self.flags(rev)
2070 flags = self.flags(rev)
2066 # no extra flags set, no flag processor runs, text = rawtext
2071 # no extra flags set, no flag processor runs, text = rawtext
2067 if flags == REVIDX_DEFAULT_FLAGS:
2072 if flags == REVIDX_DEFAULT_FLAGS:
2068 return self._cache[2]
2073 return self._cache[2]
2069 # rawtext is reusable. need to run flag processor
2074 # rawtext is reusable. need to run flag processor
2070 rawtext = self._cache[2]
2075 rawtext = self._cache[2]
2071
2076
2072 cachedrev = self._cache[1]
2077 cachedrev = self._cache[1]
2073
2078
2074 # look up what we need to read
2079 # look up what we need to read
2075 if rawtext is None:
2080 if rawtext is None:
2076 if rev is None:
2081 if rev is None:
2077 rev = self.rev(node)
2082 rev = self.rev(node)
2078
2083
2079 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2084 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2080 if stopped:
2085 if stopped:
2081 rawtext = self._cache[2]
2086 rawtext = self._cache[2]
2082
2087
2083 # drop cache to save memory
2088 # drop cache to save memory
2084 self._cache = None
2089 self._cache = None
2085
2090
2086 targetsize = None
2091 targetsize = None
2087 rawsize = self.index[rev][2]
2092 rawsize = self.index[rev][2]
2088 if 0 <= rawsize:
2093 if 0 <= rawsize:
2089 targetsize = 4 * rawsize
2094 targetsize = 4 * rawsize
2090
2095
2091 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2096 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2092 if rawtext is None:
2097 if rawtext is None:
2093 rawtext = bytes(bins[0])
2098 rawtext = bytes(bins[0])
2094 bins = bins[1:]
2099 bins = bins[1:]
2095
2100
2096 rawtext = mdiff.patches(rawtext, bins)
2101 rawtext = mdiff.patches(rawtext, bins)
2097 self._cache = (node, rev, rawtext)
2102 self._cache = (node, rev, rawtext)
2098
2103
2099 if flags is None:
2104 if flags is None:
2100 if rev is None:
2105 if rev is None:
2101 rev = self.rev(node)
2106 rev = self.rev(node)
2102 flags = self.flags(rev)
2107 flags = self.flags(rev)
2103
2108
2104 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2109 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2105 if validatehash:
2110 if validatehash:
2106 self.checkhash(text, node, rev=rev)
2111 self.checkhash(text, node, rev=rev)
2107
2112
2108 return text
2113 return text
2109
2114
2110 def hash(self, text, p1, p2):
2115 def hash(self, text, p1, p2):
2111 """Compute a node hash.
2116 """Compute a node hash.
2112
2117
2113 Available as a function so that subclasses can replace the hash
2118 Available as a function so that subclasses can replace the hash
2114 as needed.
2119 as needed.
2115 """
2120 """
2116 return hash(text, p1, p2)
2121 return hash(text, p1, p2)
2117
2122
2118 def _processflags(self, text, flags, operation, raw=False):
2123 def _processflags(self, text, flags, operation, raw=False):
2119 """Inspect revision data flags and applies transforms defined by
2124 """Inspect revision data flags and applies transforms defined by
2120 registered flag processors.
2125 registered flag processors.
2121
2126
2122 ``text`` - the revision data to process
2127 ``text`` - the revision data to process
2123 ``flags`` - the revision flags
2128 ``flags`` - the revision flags
2124 ``operation`` - the operation being performed (read or write)
2129 ``operation`` - the operation being performed (read or write)
2125 ``raw`` - an optional argument describing if the raw transform should be
2130 ``raw`` - an optional argument describing if the raw transform should be
2126 applied.
2131 applied.
2127
2132
2128 This method processes the flags in the order (or reverse order if
2133 This method processes the flags in the order (or reverse order if
2129 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2134 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2130 flag processors registered for present flags. The order of flags defined
2135 flag processors registered for present flags. The order of flags defined
2131 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2136 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2132
2137
2133 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2138 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2134 processed text and ``validatehash`` is a bool indicating whether the
2139 processed text and ``validatehash`` is a bool indicating whether the
2135 returned text should be checked for hash integrity.
2140 returned text should be checked for hash integrity.
2136
2141
2137 Note: If the ``raw`` argument is set, it has precedence over the
2142 Note: If the ``raw`` argument is set, it has precedence over the
2138 operation and will only update the value of ``validatehash``.
2143 operation and will only update the value of ``validatehash``.
2139 """
2144 """
2140 # fast path: no flag processors will run
2145 # fast path: no flag processors will run
2141 if flags == 0:
2146 if flags == 0:
2142 return text, True
2147 return text, True
2143 if not operation in ('read', 'write'):
2148 if not operation in ('read', 'write'):
2144 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2149 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2145 # Check all flags are known.
2150 # Check all flags are known.
2146 if flags & ~REVIDX_KNOWN_FLAGS:
2151 if flags & ~REVIDX_KNOWN_FLAGS:
2147 raise RevlogError(_("incompatible revision flag '%#x'") %
2152 raise RevlogError(_("incompatible revision flag '%#x'") %
2148 (flags & ~REVIDX_KNOWN_FLAGS))
2153 (flags & ~REVIDX_KNOWN_FLAGS))
2149 validatehash = True
2154 validatehash = True
2150 # Depending on the operation (read or write), the order might be
2155 # Depending on the operation (read or write), the order might be
2151 # reversed due to non-commutative transforms.
2156 # reversed due to non-commutative transforms.
2152 orderedflags = REVIDX_FLAGS_ORDER
2157 orderedflags = REVIDX_FLAGS_ORDER
2153 if operation == 'write':
2158 if operation == 'write':
2154 orderedflags = reversed(orderedflags)
2159 orderedflags = reversed(orderedflags)
2155
2160
2156 for flag in orderedflags:
2161 for flag in orderedflags:
2157 # If a flagprocessor has been registered for a known flag, apply the
2162 # If a flagprocessor has been registered for a known flag, apply the
2158 # related operation transform and update result tuple.
2163 # related operation transform and update result tuple.
2159 if flag & flags:
2164 if flag & flags:
2160 vhash = True
2165 vhash = True
2161
2166
2162 if flag not in _flagprocessors:
2167 if flag not in _flagprocessors:
2163 message = _("missing processor for flag '%#x'") % (flag)
2168 message = _("missing processor for flag '%#x'") % (flag)
2164 raise RevlogError(message)
2169 raise RevlogError(message)
2165
2170
2166 processor = _flagprocessors[flag]
2171 processor = _flagprocessors[flag]
2167 if processor is not None:
2172 if processor is not None:
2168 readtransform, writetransform, rawtransform = processor
2173 readtransform, writetransform, rawtransform = processor
2169
2174
2170 if raw:
2175 if raw:
2171 vhash = rawtransform(self, text)
2176 vhash = rawtransform(self, text)
2172 elif operation == 'read':
2177 elif operation == 'read':
2173 text, vhash = readtransform(self, text)
2178 text, vhash = readtransform(self, text)
2174 else: # write operation
2179 else: # write operation
2175 text, vhash = writetransform(self, text)
2180 text, vhash = writetransform(self, text)
2176 validatehash = validatehash and vhash
2181 validatehash = validatehash and vhash
2177
2182
2178 return text, validatehash
2183 return text, validatehash
2179
2184
2180 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2185 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2181 """Check node hash integrity.
2186 """Check node hash integrity.
2182
2187
2183 Available as a function so that subclasses can extend hash mismatch
2188 Available as a function so that subclasses can extend hash mismatch
2184 behaviors as needed.
2189 behaviors as needed.
2185 """
2190 """
2186 try:
2191 try:
2187 if p1 is None and p2 is None:
2192 if p1 is None and p2 is None:
2188 p1, p2 = self.parents(node)
2193 p1, p2 = self.parents(node)
2189 if node != self.hash(text, p1, p2):
2194 if node != self.hash(text, p1, p2):
2190 revornode = rev
2195 revornode = rev
2191 if revornode is None:
2196 if revornode is None:
2192 revornode = templatefilters.short(hex(node))
2197 revornode = templatefilters.short(hex(node))
2193 raise RevlogError(_("integrity check failed on %s:%s")
2198 raise RevlogError(_("integrity check failed on %s:%s")
2194 % (self.indexfile, pycompat.bytestr(revornode)))
2199 % (self.indexfile, pycompat.bytestr(revornode)))
2195 except RevlogError:
2200 except RevlogError:
2196 if self._censorable and _censoredtext(text):
2201 if self._censorable and _censoredtext(text):
2197 raise error.CensoredNodeError(self.indexfile, node, text)
2202 raise error.CensoredNodeError(self.indexfile, node, text)
2198 raise
2203 raise
2199
2204
2200 def _enforceinlinesize(self, tr, fp=None):
2205 def _enforceinlinesize(self, tr, fp=None):
2201 """Check if the revlog is too big for inline and convert if so.
2206 """Check if the revlog is too big for inline and convert if so.
2202
2207
2203 This should be called after revisions are added to the revlog. If the
2208 This should be called after revisions are added to the revlog. If the
2204 revlog has grown too large to be an inline revlog, it will convert it
2209 revlog has grown too large to be an inline revlog, it will convert it
2205 to use multiple index and data files.
2210 to use multiple index and data files.
2206 """
2211 """
2207 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2212 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2208 return
2213 return
2209
2214
2210 trinfo = tr.find(self.indexfile)
2215 trinfo = tr.find(self.indexfile)
2211 if trinfo is None:
2216 if trinfo is None:
2212 raise RevlogError(_("%s not found in the transaction")
2217 raise RevlogError(_("%s not found in the transaction")
2213 % self.indexfile)
2218 % self.indexfile)
2214
2219
2215 trindex = trinfo[2]
2220 trindex = trinfo[2]
2216 if trindex is not None:
2221 if trindex is not None:
2217 dataoff = self.start(trindex)
2222 dataoff = self.start(trindex)
2218 else:
2223 else:
2219 # revlog was stripped at start of transaction, use all leftover data
2224 # revlog was stripped at start of transaction, use all leftover data
2220 trindex = len(self) - 1
2225 trindex = len(self) - 1
2221 dataoff = self.end(-2)
2226 dataoff = self.end(-2)
2222
2227
2223 tr.add(self.datafile, dataoff)
2228 tr.add(self.datafile, dataoff)
2224
2229
2225 if fp:
2230 if fp:
2226 fp.flush()
2231 fp.flush()
2227 fp.close()
2232 fp.close()
2228
2233
2229 with self._datafp('w') as df:
2234 with self._datafp('w') as df:
2230 for r in self:
2235 for r in self:
2231 df.write(self._getsegmentforrevs(r, r)[1])
2236 df.write(self._getsegmentforrevs(r, r)[1])
2232
2237
2233 with self._indexfp('w') as fp:
2238 with self._indexfp('w') as fp:
2234 self.version &= ~FLAG_INLINE_DATA
2239 self.version &= ~FLAG_INLINE_DATA
2235 self._inline = False
2240 self._inline = False
2236 io = self._io
2241 io = self._io
2237 for i in self:
2242 for i in self:
2238 e = io.packentry(self.index[i], self.node, self.version, i)
2243 e = io.packentry(self.index[i], self.node, self.version, i)
2239 fp.write(e)
2244 fp.write(e)
2240
2245
2241 # the temp file replace the real index when we exit the context
2246 # the temp file replace the real index when we exit the context
2242 # manager
2247 # manager
2243
2248
2244 tr.replace(self.indexfile, trindex * self._io.size)
2249 tr.replace(self.indexfile, trindex * self._io.size)
2245 self._chunkclear()
2250 self._chunkclear()
2246
2251
2247 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2252 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2248 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2253 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2249 """add a revision to the log
2254 """add a revision to the log
2250
2255
2251 text - the revision data to add
2256 text - the revision data to add
2252 transaction - the transaction object used for rollback
2257 transaction - the transaction object used for rollback
2253 link - the linkrev data to add
2258 link - the linkrev data to add
2254 p1, p2 - the parent nodeids of the revision
2259 p1, p2 - the parent nodeids of the revision
2255 cachedelta - an optional precomputed delta
2260 cachedelta - an optional precomputed delta
2256 node - nodeid of revision; typically node is not specified, and it is
2261 node - nodeid of revision; typically node is not specified, and it is
2257 computed by default as hash(text, p1, p2), however subclasses might
2262 computed by default as hash(text, p1, p2), however subclasses might
2258 use different hashing method (and override checkhash() in such case)
2263 use different hashing method (and override checkhash() in such case)
2259 flags - the known flags to set on the revision
2264 flags - the known flags to set on the revision
2260 deltacomputer - an optional _deltacomputer instance shared between
2265 deltacomputer - an optional _deltacomputer instance shared between
2261 multiple calls
2266 multiple calls
2262 """
2267 """
2263 if link == nullrev:
2268 if link == nullrev:
2264 raise RevlogError(_("attempted to add linkrev -1 to %s")
2269 raise RevlogError(_("attempted to add linkrev -1 to %s")
2265 % self.indexfile)
2270 % self.indexfile)
2266
2271
2267 if flags:
2272 if flags:
2268 node = node or self.hash(text, p1, p2)
2273 node = node or self.hash(text, p1, p2)
2269
2274
2270 rawtext, validatehash = self._processflags(text, flags, 'write')
2275 rawtext, validatehash = self._processflags(text, flags, 'write')
2271
2276
2272 # If the flag processor modifies the revision data, ignore any provided
2277 # If the flag processor modifies the revision data, ignore any provided
2273 # cachedelta.
2278 # cachedelta.
2274 if rawtext != text:
2279 if rawtext != text:
2275 cachedelta = None
2280 cachedelta = None
2276
2281
2277 if len(rawtext) > _maxentrysize:
2282 if len(rawtext) > _maxentrysize:
2278 raise RevlogError(
2283 raise RevlogError(
2279 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2284 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2280 % (self.indexfile, len(rawtext)))
2285 % (self.indexfile, len(rawtext)))
2281
2286
2282 node = node or self.hash(rawtext, p1, p2)
2287 node = node or self.hash(rawtext, p1, p2)
2283 if node in self.nodemap:
2288 if node in self.nodemap:
2284 return node
2289 return node
2285
2290
2286 if validatehash:
2291 if validatehash:
2287 self.checkhash(rawtext, node, p1=p1, p2=p2)
2292 self.checkhash(rawtext, node, p1=p1, p2=p2)
2288
2293
2289 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2294 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2290 flags, cachedelta=cachedelta,
2295 flags, cachedelta=cachedelta,
2291 deltacomputer=deltacomputer)
2296 deltacomputer=deltacomputer)
2292
2297
2293 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2298 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2294 cachedelta=None, deltacomputer=None):
2299 cachedelta=None, deltacomputer=None):
2295 """add a raw revision with known flags, node and parents
2300 """add a raw revision with known flags, node and parents
2296 useful when reusing a revision not stored in this revlog (ex: received
2301 useful when reusing a revision not stored in this revlog (ex: received
2297 over wire, or read from an external bundle).
2302 over wire, or read from an external bundle).
2298 """
2303 """
2299 dfh = None
2304 dfh = None
2300 if not self._inline:
2305 if not self._inline:
2301 dfh = self._datafp("a+")
2306 dfh = self._datafp("a+")
2302 ifh = self._indexfp("a+")
2307 ifh = self._indexfp("a+")
2303 try:
2308 try:
2304 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2309 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2305 flags, cachedelta, ifh, dfh,
2310 flags, cachedelta, ifh, dfh,
2306 deltacomputer=deltacomputer)
2311 deltacomputer=deltacomputer)
2307 finally:
2312 finally:
2308 if dfh:
2313 if dfh:
2309 dfh.close()
2314 dfh.close()
2310 ifh.close()
2315 ifh.close()
2311
2316
2312 def compress(self, data):
2317 def compress(self, data):
2313 """Generate a possibly-compressed representation of data."""
2318 """Generate a possibly-compressed representation of data."""
2314 if not data:
2319 if not data:
2315 return '', data
2320 return '', data
2316
2321
2317 compressed = self._compressor.compress(data)
2322 compressed = self._compressor.compress(data)
2318
2323
2319 if compressed:
2324 if compressed:
2320 # The revlog compressor added the header in the returned data.
2325 # The revlog compressor added the header in the returned data.
2321 return '', compressed
2326 return '', compressed
2322
2327
2323 if data[0:1] == '\0':
2328 if data[0:1] == '\0':
2324 return '', data
2329 return '', data
2325 return 'u', data
2330 return 'u', data
2326
2331
2327 def decompress(self, data):
2332 def decompress(self, data):
2328 """Decompress a revlog chunk.
2333 """Decompress a revlog chunk.
2329
2334
2330 The chunk is expected to begin with a header identifying the
2335 The chunk is expected to begin with a header identifying the
2331 format type so it can be routed to an appropriate decompressor.
2336 format type so it can be routed to an appropriate decompressor.
2332 """
2337 """
2333 if not data:
2338 if not data:
2334 return data
2339 return data
2335
2340
2336 # Revlogs are read much more frequently than they are written and many
2341 # Revlogs are read much more frequently than they are written and many
2337 # chunks only take microseconds to decompress, so performance is
2342 # chunks only take microseconds to decompress, so performance is
2338 # important here.
2343 # important here.
2339 #
2344 #
2340 # We can make a few assumptions about revlogs:
2345 # We can make a few assumptions about revlogs:
2341 #
2346 #
2342 # 1) the majority of chunks will be compressed (as opposed to inline
2347 # 1) the majority of chunks will be compressed (as opposed to inline
2343 # raw data).
2348 # raw data).
2344 # 2) decompressing *any* data will likely by at least 10x slower than
2349 # 2) decompressing *any* data will likely by at least 10x slower than
2345 # returning raw inline data.
2350 # returning raw inline data.
2346 # 3) we want to prioritize common and officially supported compression
2351 # 3) we want to prioritize common and officially supported compression
2347 # engines
2352 # engines
2348 #
2353 #
2349 # It follows that we want to optimize for "decompress compressed data
2354 # It follows that we want to optimize for "decompress compressed data
2350 # when encoded with common and officially supported compression engines"
2355 # when encoded with common and officially supported compression engines"
2351 # case over "raw data" and "data encoded by less common or non-official
2356 # case over "raw data" and "data encoded by less common or non-official
2352 # compression engines." That is why we have the inline lookup first
2357 # compression engines." That is why we have the inline lookup first
2353 # followed by the compengines lookup.
2358 # followed by the compengines lookup.
2354 #
2359 #
2355 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2360 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2356 # compressed chunks. And this matters for changelog and manifest reads.
2361 # compressed chunks. And this matters for changelog and manifest reads.
2357 t = data[0:1]
2362 t = data[0:1]
2358
2363
2359 if t == 'x':
2364 if t == 'x':
2360 try:
2365 try:
2361 return _zlibdecompress(data)
2366 return _zlibdecompress(data)
2362 except zlib.error as e:
2367 except zlib.error as e:
2363 raise RevlogError(_('revlog decompress error: %s') %
2368 raise RevlogError(_('revlog decompress error: %s') %
2364 stringutil.forcebytestr(e))
2369 stringutil.forcebytestr(e))
2365 # '\0' is more common than 'u' so it goes first.
2370 # '\0' is more common than 'u' so it goes first.
2366 elif t == '\0':
2371 elif t == '\0':
2367 return data
2372 return data
2368 elif t == 'u':
2373 elif t == 'u':
2369 return util.buffer(data, 1)
2374 return util.buffer(data, 1)
2370
2375
2371 try:
2376 try:
2372 compressor = self._decompressors[t]
2377 compressor = self._decompressors[t]
2373 except KeyError:
2378 except KeyError:
2374 try:
2379 try:
2375 engine = util.compengines.forrevlogheader(t)
2380 engine = util.compengines.forrevlogheader(t)
2376 compressor = engine.revlogcompressor()
2381 compressor = engine.revlogcompressor()
2377 self._decompressors[t] = compressor
2382 self._decompressors[t] = compressor
2378 except KeyError:
2383 except KeyError:
2379 raise RevlogError(_('unknown compression type %r') % t)
2384 raise RevlogError(_('unknown compression type %r') % t)
2380
2385
2381 return compressor.decompress(data)
2386 return compressor.decompress(data)
2382
2387
2383 def _isgooddeltainfo(self, deltainfo, revinfo):
2388 def _isgooddeltainfo(self, deltainfo, revinfo):
2384 """Returns True if the given delta is good. Good means that it is within
2389 """Returns True if the given delta is good. Good means that it is within
2385 the disk span, disk size, and chain length bounds that we know to be
2390 the disk span, disk size, and chain length bounds that we know to be
2386 performant."""
2391 performant."""
2387 if deltainfo is None:
2392 if deltainfo is None:
2388 return False
2393 return False
2389
2394
2390 # - 'deltainfo.distance' is the distance from the base revision --
2395 # - 'deltainfo.distance' is the distance from the base revision --
2391 # bounding it limits the amount of I/O we need to do.
2396 # bounding it limits the amount of I/O we need to do.
2392 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2397 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2393 # deltas we need to apply -- bounding it limits the amount of CPU
2398 # deltas we need to apply -- bounding it limits the amount of CPU
2394 # we consume.
2399 # we consume.
2395
2400
2396 textlen = revinfo.textlen
2401 textlen = revinfo.textlen
2397 defaultmax = textlen * 4
2402 defaultmax = textlen * 4
2398 maxdist = self._maxdeltachainspan
2403 maxdist = self._maxdeltachainspan
2399 if not maxdist:
2404 if not maxdist:
2400 maxdist = deltainfo.distance # ensure the conditional pass
2405 maxdist = deltainfo.distance # ensure the conditional pass
2401 maxdist = max(maxdist, defaultmax)
2406 maxdist = max(maxdist, defaultmax)
2402 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2407 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2403 deltainfo.compresseddeltalen > textlen * 2 or
2408 deltainfo.compresseddeltalen > textlen * 2 or
2404 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2409 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2405 return False
2410 return False
2406
2411
2407 return True
2412 return True
2408
2413
2409 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2414 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2410 cachedelta, ifh, dfh, alwayscache=False,
2415 cachedelta, ifh, dfh, alwayscache=False,
2411 deltacomputer=None):
2416 deltacomputer=None):
2412 """internal function to add revisions to the log
2417 """internal function to add revisions to the log
2413
2418
2414 see addrevision for argument descriptions.
2419 see addrevision for argument descriptions.
2415
2420
2416 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2421 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2417
2422
2418 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2423 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2419 be used.
2424 be used.
2420
2425
2421 invariants:
2426 invariants:
2422 - rawtext is optional (can be None); if not set, cachedelta must be set.
2427 - rawtext is optional (can be None); if not set, cachedelta must be set.
2423 if both are set, they must correspond to each other.
2428 if both are set, they must correspond to each other.
2424 """
2429 """
2425 if node == nullid:
2430 if node == nullid:
2426 raise RevlogError(_("%s: attempt to add null revision") %
2431 raise RevlogError(_("%s: attempt to add null revision") %
2427 (self.indexfile))
2432 (self.indexfile))
2428 if node == wdirid or node in wdirfilenodeids:
2433 if node == wdirid or node in wdirfilenodeids:
2429 raise RevlogError(_("%s: attempt to add wdir revision") %
2434 raise RevlogError(_("%s: attempt to add wdir revision") %
2430 (self.indexfile))
2435 (self.indexfile))
2431
2436
2432 if self._inline:
2437 if self._inline:
2433 fh = ifh
2438 fh = ifh
2434 else:
2439 else:
2435 fh = dfh
2440 fh = dfh
2436
2441
2437 btext = [rawtext]
2442 btext = [rawtext]
2438
2443
2439 curr = len(self)
2444 curr = len(self)
2440 prev = curr - 1
2445 prev = curr - 1
2441 offset = self.end(prev)
2446 offset = self.end(prev)
2442 p1r, p2r = self.rev(p1), self.rev(p2)
2447 p1r, p2r = self.rev(p1), self.rev(p2)
2443
2448
2444 # full versions are inserted when the needed deltas
2449 # full versions are inserted when the needed deltas
2445 # become comparable to the uncompressed text
2450 # become comparable to the uncompressed text
2446 if rawtext is None:
2451 if rawtext is None:
2447 # need rawtext size, before changed by flag processors, which is
2452 # need rawtext size, before changed by flag processors, which is
2448 # the non-raw size. use revlog explicitly to avoid filelog's extra
2453 # the non-raw size. use revlog explicitly to avoid filelog's extra
2449 # logic that might remove metadata size.
2454 # logic that might remove metadata size.
2450 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2455 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2451 cachedelta[1])
2456 cachedelta[1])
2452 else:
2457 else:
2453 textlen = len(rawtext)
2458 textlen = len(rawtext)
2454
2459
2455 if deltacomputer is None:
2460 if deltacomputer is None:
2456 deltacomputer = _deltacomputer(self)
2461 deltacomputer = _deltacomputer(self)
2457
2462
2458 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2463 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2459
2464
2460 # no delta for flag processor revision (see "candelta" for why)
2465 # no delta for flag processor revision (see "candelta" for why)
2461 # not calling candelta since only one revision needs test, also to
2466 # not calling candelta since only one revision needs test, also to
2462 # avoid overhead fetching flags again.
2467 # avoid overhead fetching flags again.
2463 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2468 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2464 deltainfo = None
2469 deltainfo = None
2465 else:
2470 else:
2466 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2471 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2467
2472
2468 if deltainfo is not None:
2473 if deltainfo is not None:
2469 base = deltainfo.base
2474 base = deltainfo.base
2470 chainbase = deltainfo.chainbase
2475 chainbase = deltainfo.chainbase
2471 data = deltainfo.data
2476 data = deltainfo.data
2472 l = deltainfo.deltalen
2477 l = deltainfo.deltalen
2473 else:
2478 else:
2474 rawtext = deltacomputer.buildtext(revinfo, fh)
2479 rawtext = deltacomputer.buildtext(revinfo, fh)
2475 data = self.compress(rawtext)
2480 data = self.compress(rawtext)
2476 l = len(data[1]) + len(data[0])
2481 l = len(data[1]) + len(data[0])
2477 base = chainbase = curr
2482 base = chainbase = curr
2478
2483
2479 e = (offset_type(offset, flags), l, textlen,
2484 e = (offset_type(offset, flags), l, textlen,
2480 base, link, p1r, p2r, node)
2485 base, link, p1r, p2r, node)
2481 self.index.insert(-1, e)
2486 self.index.insert(-1, e)
2482 self.nodemap[node] = curr
2487 self.nodemap[node] = curr
2483
2488
2484 entry = self._io.packentry(e, self.node, self.version, curr)
2489 entry = self._io.packentry(e, self.node, self.version, curr)
2485 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2490 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2486
2491
2487 if alwayscache and rawtext is None:
2492 if alwayscache and rawtext is None:
2488 rawtext = deltacomputer._buildtext(revinfo, fh)
2493 rawtext = deltacomputer._buildtext(revinfo, fh)
2489
2494
2490 if type(rawtext) == bytes: # only accept immutable objects
2495 if type(rawtext) == bytes: # only accept immutable objects
2491 self._cache = (node, curr, rawtext)
2496 self._cache = (node, curr, rawtext)
2492 self._chainbasecache[curr] = chainbase
2497 self._chainbasecache[curr] = chainbase
2493 return node
2498 return node
2494
2499
2495 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2500 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2496 # Files opened in a+ mode have inconsistent behavior on various
2501 # Files opened in a+ mode have inconsistent behavior on various
2497 # platforms. Windows requires that a file positioning call be made
2502 # platforms. Windows requires that a file positioning call be made
2498 # when the file handle transitions between reads and writes. See
2503 # when the file handle transitions between reads and writes. See
2499 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2504 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2500 # platforms, Python or the platform itself can be buggy. Some versions
2505 # platforms, Python or the platform itself can be buggy. Some versions
2501 # of Solaris have been observed to not append at the end of the file
2506 # of Solaris have been observed to not append at the end of the file
2502 # if the file was seeked to before the end. See issue4943 for more.
2507 # if the file was seeked to before the end. See issue4943 for more.
2503 #
2508 #
2504 # We work around this issue by inserting a seek() before writing.
2509 # We work around this issue by inserting a seek() before writing.
2505 # Note: This is likely not necessary on Python 3.
2510 # Note: This is likely not necessary on Python 3.
2506 ifh.seek(0, os.SEEK_END)
2511 ifh.seek(0, os.SEEK_END)
2507 if dfh:
2512 if dfh:
2508 dfh.seek(0, os.SEEK_END)
2513 dfh.seek(0, os.SEEK_END)
2509
2514
2510 curr = len(self) - 1
2515 curr = len(self) - 1
2511 if not self._inline:
2516 if not self._inline:
2512 transaction.add(self.datafile, offset)
2517 transaction.add(self.datafile, offset)
2513 transaction.add(self.indexfile, curr * len(entry))
2518 transaction.add(self.indexfile, curr * len(entry))
2514 if data[0]:
2519 if data[0]:
2515 dfh.write(data[0])
2520 dfh.write(data[0])
2516 dfh.write(data[1])
2521 dfh.write(data[1])
2517 ifh.write(entry)
2522 ifh.write(entry)
2518 else:
2523 else:
2519 offset += curr * self._io.size
2524 offset += curr * self._io.size
2520 transaction.add(self.indexfile, offset, curr)
2525 transaction.add(self.indexfile, offset, curr)
2521 ifh.write(entry)
2526 ifh.write(entry)
2522 ifh.write(data[0])
2527 ifh.write(data[0])
2523 ifh.write(data[1])
2528 ifh.write(data[1])
2524 self._enforceinlinesize(transaction, ifh)
2529 self._enforceinlinesize(transaction, ifh)
2525
2530
2526 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2531 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2527 """
2532 """
2528 add a delta group
2533 add a delta group
2529
2534
2530 given a set of deltas, add them to the revision log. the
2535 given a set of deltas, add them to the revision log. the
2531 first delta is against its parent, which should be in our
2536 first delta is against its parent, which should be in our
2532 log, the rest are against the previous delta.
2537 log, the rest are against the previous delta.
2533
2538
2534 If ``addrevisioncb`` is defined, it will be called with arguments of
2539 If ``addrevisioncb`` is defined, it will be called with arguments of
2535 this revlog and the node that was added.
2540 this revlog and the node that was added.
2536 """
2541 """
2537
2542
2538 nodes = []
2543 nodes = []
2539
2544
2540 r = len(self)
2545 r = len(self)
2541 end = 0
2546 end = 0
2542 if r:
2547 if r:
2543 end = self.end(r - 1)
2548 end = self.end(r - 1)
2544 ifh = self._indexfp("a+")
2549 ifh = self._indexfp("a+")
2545 isize = r * self._io.size
2550 isize = r * self._io.size
2546 if self._inline:
2551 if self._inline:
2547 transaction.add(self.indexfile, end + isize, r)
2552 transaction.add(self.indexfile, end + isize, r)
2548 dfh = None
2553 dfh = None
2549 else:
2554 else:
2550 transaction.add(self.indexfile, isize, r)
2555 transaction.add(self.indexfile, isize, r)
2551 transaction.add(self.datafile, end)
2556 transaction.add(self.datafile, end)
2552 dfh = self._datafp("a+")
2557 dfh = self._datafp("a+")
2553 def flush():
2558 def flush():
2554 if dfh:
2559 if dfh:
2555 dfh.flush()
2560 dfh.flush()
2556 ifh.flush()
2561 ifh.flush()
2557 try:
2562 try:
2558 deltacomputer = _deltacomputer(self)
2563 deltacomputer = _deltacomputer(self)
2559 # loop through our set of deltas
2564 # loop through our set of deltas
2560 for data in deltas:
2565 for data in deltas:
2561 node, p1, p2, linknode, deltabase, delta, flags = data
2566 node, p1, p2, linknode, deltabase, delta, flags = data
2562 link = linkmapper(linknode)
2567 link = linkmapper(linknode)
2563 flags = flags or REVIDX_DEFAULT_FLAGS
2568 flags = flags or REVIDX_DEFAULT_FLAGS
2564
2569
2565 nodes.append(node)
2570 nodes.append(node)
2566
2571
2567 if node in self.nodemap:
2572 if node in self.nodemap:
2568 # this can happen if two branches make the same change
2573 # this can happen if two branches make the same change
2569 continue
2574 continue
2570
2575
2571 for p in (p1, p2):
2576 for p in (p1, p2):
2572 if p not in self.nodemap:
2577 if p not in self.nodemap:
2573 raise LookupError(p, self.indexfile,
2578 raise LookupError(p, self.indexfile,
2574 _('unknown parent'))
2579 _('unknown parent'))
2575
2580
2576 if deltabase not in self.nodemap:
2581 if deltabase not in self.nodemap:
2577 raise LookupError(deltabase, self.indexfile,
2582 raise LookupError(deltabase, self.indexfile,
2578 _('unknown delta base'))
2583 _('unknown delta base'))
2579
2584
2580 baserev = self.rev(deltabase)
2585 baserev = self.rev(deltabase)
2581
2586
2582 if baserev != nullrev and self.iscensored(baserev):
2587 if baserev != nullrev and self.iscensored(baserev):
2583 # if base is censored, delta must be full replacement in a
2588 # if base is censored, delta must be full replacement in a
2584 # single patch operation
2589 # single patch operation
2585 hlen = struct.calcsize(">lll")
2590 hlen = struct.calcsize(">lll")
2586 oldlen = self.rawsize(baserev)
2591 oldlen = self.rawsize(baserev)
2587 newlen = len(delta) - hlen
2592 newlen = len(delta) - hlen
2588 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2593 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2589 raise error.CensoredBaseError(self.indexfile,
2594 raise error.CensoredBaseError(self.indexfile,
2590 self.node(baserev))
2595 self.node(baserev))
2591
2596
2592 if not flags and self._peek_iscensored(baserev, delta, flush):
2597 if not flags and self._peek_iscensored(baserev, delta, flush):
2593 flags |= REVIDX_ISCENSORED
2598 flags |= REVIDX_ISCENSORED
2594
2599
2595 # We assume consumers of addrevisioncb will want to retrieve
2600 # We assume consumers of addrevisioncb will want to retrieve
2596 # the added revision, which will require a call to
2601 # the added revision, which will require a call to
2597 # revision(). revision() will fast path if there is a cache
2602 # revision(). revision() will fast path if there is a cache
2598 # hit. So, we tell _addrevision() to always cache in this case.
2603 # hit. So, we tell _addrevision() to always cache in this case.
2599 # We're only using addgroup() in the context of changegroup
2604 # We're only using addgroup() in the context of changegroup
2600 # generation so the revision data can always be handled as raw
2605 # generation so the revision data can always be handled as raw
2601 # by the flagprocessor.
2606 # by the flagprocessor.
2602 self._addrevision(node, None, transaction, link,
2607 self._addrevision(node, None, transaction, link,
2603 p1, p2, flags, (baserev, delta),
2608 p1, p2, flags, (baserev, delta),
2604 ifh, dfh,
2609 ifh, dfh,
2605 alwayscache=bool(addrevisioncb),
2610 alwayscache=bool(addrevisioncb),
2606 deltacomputer=deltacomputer)
2611 deltacomputer=deltacomputer)
2607
2612
2608 if addrevisioncb:
2613 if addrevisioncb:
2609 addrevisioncb(self, node)
2614 addrevisioncb(self, node)
2610
2615
2611 if not dfh and not self._inline:
2616 if not dfh and not self._inline:
2612 # addrevision switched from inline to conventional
2617 # addrevision switched from inline to conventional
2613 # reopen the index
2618 # reopen the index
2614 ifh.close()
2619 ifh.close()
2615 dfh = self._datafp("a+")
2620 dfh = self._datafp("a+")
2616 ifh = self._indexfp("a+")
2621 ifh = self._indexfp("a+")
2617 finally:
2622 finally:
2618 if dfh:
2623 if dfh:
2619 dfh.close()
2624 dfh.close()
2620 ifh.close()
2625 ifh.close()
2621
2626
2622 return nodes
2627 return nodes
2623
2628
2624 def iscensored(self, rev):
2629 def iscensored(self, rev):
2625 """Check if a file revision is censored."""
2630 """Check if a file revision is censored."""
2626 if not self._censorable:
2631 if not self._censorable:
2627 return False
2632 return False
2628
2633
2629 return self.flags(rev) & REVIDX_ISCENSORED
2634 return self.flags(rev) & REVIDX_ISCENSORED
2630
2635
2631 def _peek_iscensored(self, baserev, delta, flush):
2636 def _peek_iscensored(self, baserev, delta, flush):
2632 """Quickly check if a delta produces a censored revision."""
2637 """Quickly check if a delta produces a censored revision."""
2633 if not self._censorable:
2638 if not self._censorable:
2634 return False
2639 return False
2635
2640
2636 # Fragile heuristic: unless new file meta keys are added alphabetically
2641 # Fragile heuristic: unless new file meta keys are added alphabetically
2637 # preceding "censored", all censored revisions are prefixed by
2642 # preceding "censored", all censored revisions are prefixed by
2638 # "\1\ncensored:". A delta producing such a censored revision must be a
2643 # "\1\ncensored:". A delta producing such a censored revision must be a
2639 # full-replacement delta, so we inspect the first and only patch in the
2644 # full-replacement delta, so we inspect the first and only patch in the
2640 # delta for this prefix.
2645 # delta for this prefix.
2641 hlen = struct.calcsize(">lll")
2646 hlen = struct.calcsize(">lll")
2642 if len(delta) <= hlen:
2647 if len(delta) <= hlen:
2643 return False
2648 return False
2644
2649
2645 oldlen = self.rawsize(baserev)
2650 oldlen = self.rawsize(baserev)
2646 newlen = len(delta) - hlen
2651 newlen = len(delta) - hlen
2647 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2652 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2648 return False
2653 return False
2649
2654
2650 add = "\1\ncensored:"
2655 add = "\1\ncensored:"
2651 addlen = len(add)
2656 addlen = len(add)
2652 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2657 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2653
2658
2654 def getstrippoint(self, minlink):
2659 def getstrippoint(self, minlink):
2655 """find the minimum rev that must be stripped to strip the linkrev
2660 """find the minimum rev that must be stripped to strip the linkrev
2656
2661
2657 Returns a tuple containing the minimum rev and a set of all revs that
2662 Returns a tuple containing the minimum rev and a set of all revs that
2658 have linkrevs that will be broken by this strip.
2663 have linkrevs that will be broken by this strip.
2659 """
2664 """
2660 brokenrevs = set()
2665 brokenrevs = set()
2661 strippoint = len(self)
2666 strippoint = len(self)
2662
2667
2663 heads = {}
2668 heads = {}
2664 futurelargelinkrevs = set()
2669 futurelargelinkrevs = set()
2665 for head in self.headrevs():
2670 for head in self.headrevs():
2666 headlinkrev = self.linkrev(head)
2671 headlinkrev = self.linkrev(head)
2667 heads[head] = headlinkrev
2672 heads[head] = headlinkrev
2668 if headlinkrev >= minlink:
2673 if headlinkrev >= minlink:
2669 futurelargelinkrevs.add(headlinkrev)
2674 futurelargelinkrevs.add(headlinkrev)
2670
2675
2671 # This algorithm involves walking down the rev graph, starting at the
2676 # This algorithm involves walking down the rev graph, starting at the
2672 # heads. Since the revs are topologically sorted according to linkrev,
2677 # heads. Since the revs are topologically sorted according to linkrev,
2673 # once all head linkrevs are below the minlink, we know there are
2678 # once all head linkrevs are below the minlink, we know there are
2674 # no more revs that could have a linkrev greater than minlink.
2679 # no more revs that could have a linkrev greater than minlink.
2675 # So we can stop walking.
2680 # So we can stop walking.
2676 while futurelargelinkrevs:
2681 while futurelargelinkrevs:
2677 strippoint -= 1
2682 strippoint -= 1
2678 linkrev = heads.pop(strippoint)
2683 linkrev = heads.pop(strippoint)
2679
2684
2680 if linkrev < minlink:
2685 if linkrev < minlink:
2681 brokenrevs.add(strippoint)
2686 brokenrevs.add(strippoint)
2682 else:
2687 else:
2683 futurelargelinkrevs.remove(linkrev)
2688 futurelargelinkrevs.remove(linkrev)
2684
2689
2685 for p in self.parentrevs(strippoint):
2690 for p in self.parentrevs(strippoint):
2686 if p != nullrev:
2691 if p != nullrev:
2687 plinkrev = self.linkrev(p)
2692 plinkrev = self.linkrev(p)
2688 heads[p] = plinkrev
2693 heads[p] = plinkrev
2689 if plinkrev >= minlink:
2694 if plinkrev >= minlink:
2690 futurelargelinkrevs.add(plinkrev)
2695 futurelargelinkrevs.add(plinkrev)
2691
2696
2692 return strippoint, brokenrevs
2697 return strippoint, brokenrevs
2693
2698
2694 def strip(self, minlink, transaction):
2699 def strip(self, minlink, transaction):
2695 """truncate the revlog on the first revision with a linkrev >= minlink
2700 """truncate the revlog on the first revision with a linkrev >= minlink
2696
2701
2697 This function is called when we're stripping revision minlink and
2702 This function is called when we're stripping revision minlink and
2698 its descendants from the repository.
2703 its descendants from the repository.
2699
2704
2700 We have to remove all revisions with linkrev >= minlink, because
2705 We have to remove all revisions with linkrev >= minlink, because
2701 the equivalent changelog revisions will be renumbered after the
2706 the equivalent changelog revisions will be renumbered after the
2702 strip.
2707 strip.
2703
2708
2704 So we truncate the revlog on the first of these revisions, and
2709 So we truncate the revlog on the first of these revisions, and
2705 trust that the caller has saved the revisions that shouldn't be
2710 trust that the caller has saved the revisions that shouldn't be
2706 removed and that it'll re-add them after this truncation.
2711 removed and that it'll re-add them after this truncation.
2707 """
2712 """
2708 if len(self) == 0:
2713 if len(self) == 0:
2709 return
2714 return
2710
2715
2711 rev, _ = self.getstrippoint(minlink)
2716 rev, _ = self.getstrippoint(minlink)
2712 if rev == len(self):
2717 if rev == len(self):
2713 return
2718 return
2714
2719
2715 # first truncate the files on disk
2720 # first truncate the files on disk
2716 end = self.start(rev)
2721 end = self.start(rev)
2717 if not self._inline:
2722 if not self._inline:
2718 transaction.add(self.datafile, end)
2723 transaction.add(self.datafile, end)
2719 end = rev * self._io.size
2724 end = rev * self._io.size
2720 else:
2725 else:
2721 end += rev * self._io.size
2726 end += rev * self._io.size
2722
2727
2723 transaction.add(self.indexfile, end)
2728 transaction.add(self.indexfile, end)
2724
2729
2725 # then reset internal state in memory to forget those revisions
2730 # then reset internal state in memory to forget those revisions
2726 self._cache = None
2731 self._cache = None
2727 self._chaininfocache = {}
2732 self._chaininfocache = {}
2728 self._chunkclear()
2733 self._chunkclear()
2729 for x in xrange(rev, len(self)):
2734 for x in xrange(rev, len(self)):
2730 del self.nodemap[self.node(x)]
2735 del self.nodemap[self.node(x)]
2731
2736
2732 del self.index[rev:-1]
2737 del self.index[rev:-1]
2733 self._nodepos = None
2738 self._nodepos = None
2734
2739
2735 def checksize(self):
2740 def checksize(self):
2736 expected = 0
2741 expected = 0
2737 if len(self):
2742 if len(self):
2738 expected = max(0, self.end(len(self) - 1))
2743 expected = max(0, self.end(len(self) - 1))
2739
2744
2740 try:
2745 try:
2741 with self._datafp() as f:
2746 with self._datafp() as f:
2742 f.seek(0, 2)
2747 f.seek(0, 2)
2743 actual = f.tell()
2748 actual = f.tell()
2744 dd = actual - expected
2749 dd = actual - expected
2745 except IOError as inst:
2750 except IOError as inst:
2746 if inst.errno != errno.ENOENT:
2751 if inst.errno != errno.ENOENT:
2747 raise
2752 raise
2748 dd = 0
2753 dd = 0
2749
2754
2750 try:
2755 try:
2751 f = self.opener(self.indexfile)
2756 f = self.opener(self.indexfile)
2752 f.seek(0, 2)
2757 f.seek(0, 2)
2753 actual = f.tell()
2758 actual = f.tell()
2754 f.close()
2759 f.close()
2755 s = self._io.size
2760 s = self._io.size
2756 i = max(0, actual // s)
2761 i = max(0, actual // s)
2757 di = actual - (i * s)
2762 di = actual - (i * s)
2758 if self._inline:
2763 if self._inline:
2759 databytes = 0
2764 databytes = 0
2760 for r in self:
2765 for r in self:
2761 databytes += max(0, self.length(r))
2766 databytes += max(0, self.length(r))
2762 dd = 0
2767 dd = 0
2763 di = actual - len(self) * s - databytes
2768 di = actual - len(self) * s - databytes
2764 except IOError as inst:
2769 except IOError as inst:
2765 if inst.errno != errno.ENOENT:
2770 if inst.errno != errno.ENOENT:
2766 raise
2771 raise
2767 di = 0
2772 di = 0
2768
2773
2769 return (dd, di)
2774 return (dd, di)
2770
2775
2771 def files(self):
2776 def files(self):
2772 res = [self.indexfile]
2777 res = [self.indexfile]
2773 if not self._inline:
2778 if not self._inline:
2774 res.append(self.datafile)
2779 res.append(self.datafile)
2775 return res
2780 return res
2776
2781
2777 DELTAREUSEALWAYS = 'always'
2782 DELTAREUSEALWAYS = 'always'
2778 DELTAREUSESAMEREVS = 'samerevs'
2783 DELTAREUSESAMEREVS = 'samerevs'
2779 DELTAREUSENEVER = 'never'
2784 DELTAREUSENEVER = 'never'
2780
2785
2781 DELTAREUSEFULLADD = 'fulladd'
2786 DELTAREUSEFULLADD = 'fulladd'
2782
2787
2783 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2788 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2784
2789
2785 def clone(self, tr, destrevlog, addrevisioncb=None,
2790 def clone(self, tr, destrevlog, addrevisioncb=None,
2786 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2791 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2787 """Copy this revlog to another, possibly with format changes.
2792 """Copy this revlog to another, possibly with format changes.
2788
2793
2789 The destination revlog will contain the same revisions and nodes.
2794 The destination revlog will contain the same revisions and nodes.
2790 However, it may not be bit-for-bit identical due to e.g. delta encoding
2795 However, it may not be bit-for-bit identical due to e.g. delta encoding
2791 differences.
2796 differences.
2792
2797
2793 The ``deltareuse`` argument control how deltas from the existing revlog
2798 The ``deltareuse`` argument control how deltas from the existing revlog
2794 are preserved in the destination revlog. The argument can have the
2799 are preserved in the destination revlog. The argument can have the
2795 following values:
2800 following values:
2796
2801
2797 DELTAREUSEALWAYS
2802 DELTAREUSEALWAYS
2798 Deltas will always be reused (if possible), even if the destination
2803 Deltas will always be reused (if possible), even if the destination
2799 revlog would not select the same revisions for the delta. This is the
2804 revlog would not select the same revisions for the delta. This is the
2800 fastest mode of operation.
2805 fastest mode of operation.
2801 DELTAREUSESAMEREVS
2806 DELTAREUSESAMEREVS
2802 Deltas will be reused if the destination revlog would pick the same
2807 Deltas will be reused if the destination revlog would pick the same
2803 revisions for the delta. This mode strikes a balance between speed
2808 revisions for the delta. This mode strikes a balance between speed
2804 and optimization.
2809 and optimization.
2805 DELTAREUSENEVER
2810 DELTAREUSENEVER
2806 Deltas will never be reused. This is the slowest mode of execution.
2811 Deltas will never be reused. This is the slowest mode of execution.
2807 This mode can be used to recompute deltas (e.g. if the diff/delta
2812 This mode can be used to recompute deltas (e.g. if the diff/delta
2808 algorithm changes).
2813 algorithm changes).
2809
2814
2810 Delta computation can be slow, so the choice of delta reuse policy can
2815 Delta computation can be slow, so the choice of delta reuse policy can
2811 significantly affect run time.
2816 significantly affect run time.
2812
2817
2813 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2818 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2814 two extremes. Deltas will be reused if they are appropriate. But if the
2819 two extremes. Deltas will be reused if they are appropriate. But if the
2815 delta could choose a better revision, it will do so. This means if you
2820 delta could choose a better revision, it will do so. This means if you
2816 are converting a non-generaldelta revlog to a generaldelta revlog,
2821 are converting a non-generaldelta revlog to a generaldelta revlog,
2817 deltas will be recomputed if the delta's parent isn't a parent of the
2822 deltas will be recomputed if the delta's parent isn't a parent of the
2818 revision.
2823 revision.
2819
2824
2820 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2825 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2821 controls whether to compute deltas against both parents for merges.
2826 controls whether to compute deltas against both parents for merges.
2822 By default, the current default is used.
2827 By default, the current default is used.
2823 """
2828 """
2824 if deltareuse not in self.DELTAREUSEALL:
2829 if deltareuse not in self.DELTAREUSEALL:
2825 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2830 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2826
2831
2827 if len(destrevlog):
2832 if len(destrevlog):
2828 raise ValueError(_('destination revlog is not empty'))
2833 raise ValueError(_('destination revlog is not empty'))
2829
2834
2830 if getattr(self, 'filteredrevs', None):
2835 if getattr(self, 'filteredrevs', None):
2831 raise ValueError(_('source revlog has filtered revisions'))
2836 raise ValueError(_('source revlog has filtered revisions'))
2832 if getattr(destrevlog, 'filteredrevs', None):
2837 if getattr(destrevlog, 'filteredrevs', None):
2833 raise ValueError(_('destination revlog has filtered revisions'))
2838 raise ValueError(_('destination revlog has filtered revisions'))
2834
2839
2835 # lazydeltabase controls whether to reuse a cached delta, if possible.
2840 # lazydeltabase controls whether to reuse a cached delta, if possible.
2836 oldlazydeltabase = destrevlog._lazydeltabase
2841 oldlazydeltabase = destrevlog._lazydeltabase
2837 oldamd = destrevlog._aggressivemergedeltas
2842 oldamd = destrevlog._aggressivemergedeltas
2838
2843
2839 try:
2844 try:
2840 if deltareuse == self.DELTAREUSEALWAYS:
2845 if deltareuse == self.DELTAREUSEALWAYS:
2841 destrevlog._lazydeltabase = True
2846 destrevlog._lazydeltabase = True
2842 elif deltareuse == self.DELTAREUSESAMEREVS:
2847 elif deltareuse == self.DELTAREUSESAMEREVS:
2843 destrevlog._lazydeltabase = False
2848 destrevlog._lazydeltabase = False
2844
2849
2845 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2850 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2846
2851
2847 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2852 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2848 self.DELTAREUSESAMEREVS)
2853 self.DELTAREUSESAMEREVS)
2849
2854
2850 deltacomputer = _deltacomputer(destrevlog)
2855 deltacomputer = _deltacomputer(destrevlog)
2851 index = self.index
2856 index = self.index
2852 for rev in self:
2857 for rev in self:
2853 entry = index[rev]
2858 entry = index[rev]
2854
2859
2855 # Some classes override linkrev to take filtered revs into
2860 # Some classes override linkrev to take filtered revs into
2856 # account. Use raw entry from index.
2861 # account. Use raw entry from index.
2857 flags = entry[0] & 0xffff
2862 flags = entry[0] & 0xffff
2858 linkrev = entry[4]
2863 linkrev = entry[4]
2859 p1 = index[entry[5]][7]
2864 p1 = index[entry[5]][7]
2860 p2 = index[entry[6]][7]
2865 p2 = index[entry[6]][7]
2861 node = entry[7]
2866 node = entry[7]
2862
2867
2863 # (Possibly) reuse the delta from the revlog if allowed and
2868 # (Possibly) reuse the delta from the revlog if allowed and
2864 # the revlog chunk is a delta.
2869 # the revlog chunk is a delta.
2865 cachedelta = None
2870 cachedelta = None
2866 rawtext = None
2871 rawtext = None
2867 if populatecachedelta:
2872 if populatecachedelta:
2868 dp = self.deltaparent(rev)
2873 dp = self.deltaparent(rev)
2869 if dp != nullrev:
2874 if dp != nullrev:
2870 cachedelta = (dp, bytes(self._chunk(rev)))
2875 cachedelta = (dp, bytes(self._chunk(rev)))
2871
2876
2872 if not cachedelta:
2877 if not cachedelta:
2873 rawtext = self.revision(rev, raw=True)
2878 rawtext = self.revision(rev, raw=True)
2874
2879
2875
2880
2876 if deltareuse == self.DELTAREUSEFULLADD:
2881 if deltareuse == self.DELTAREUSEFULLADD:
2877 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2882 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2878 cachedelta=cachedelta,
2883 cachedelta=cachedelta,
2879 node=node, flags=flags,
2884 node=node, flags=flags,
2880 deltacomputer=deltacomputer)
2885 deltacomputer=deltacomputer)
2881 else:
2886 else:
2882 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2887 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2883 checkambig=False)
2888 checkambig=False)
2884 dfh = None
2889 dfh = None
2885 if not destrevlog._inline:
2890 if not destrevlog._inline:
2886 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2891 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2887 try:
2892 try:
2888 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2893 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2889 p2, flags, cachedelta, ifh, dfh,
2894 p2, flags, cachedelta, ifh, dfh,
2890 deltacomputer=deltacomputer)
2895 deltacomputer=deltacomputer)
2891 finally:
2896 finally:
2892 if dfh:
2897 if dfh:
2893 dfh.close()
2898 dfh.close()
2894 ifh.close()
2899 ifh.close()
2895
2900
2896 if addrevisioncb:
2901 if addrevisioncb:
2897 addrevisioncb(self, rev, node)
2902 addrevisioncb(self, rev, node)
2898 finally:
2903 finally:
2899 destrevlog._lazydeltabase = oldlazydeltabase
2904 destrevlog._lazydeltabase = oldlazydeltabase
2900 destrevlog._aggressivemergedeltas = oldamd
2905 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now