##// END OF EJS Templates
revlog: fix pure nodemap to not access missing index entry...
Yuya Nishihara -
r39176:65d5de11 default
parent child Browse files
Show More
@@ -1,3005 +1,3007 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # Trim empty revs at the end, but never the very first revision of a chain
266 # Trim empty revs at the end, but never the very first revision of a chain
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 endidx -= 1
268 endidx -= 1
269
269
270 return revs[startidx:endidx]
270 return revs[startidx:endidx]
271
271
272 def _segmentspan(revlog, revs):
272 def _segmentspan(revlog, revs):
273 """Get the byte span of a segment of revisions
273 """Get the byte span of a segment of revisions
274
274
275 revs is a sorted array of revision numbers
275 revs is a sorted array of revision numbers
276
276
277 >>> revlog = _testrevlog([
277 >>> revlog = _testrevlog([
278 ... 5, #0
278 ... 5, #0
279 ... 10, #1
279 ... 10, #1
280 ... 12, #2
280 ... 12, #2
281 ... 12, #3 (empty)
281 ... 12, #3 (empty)
282 ... 17, #4
282 ... 17, #4
283 ... ])
283 ... ])
284
284
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 17
286 17
287 >>> _segmentspan(revlog, [0, 4])
287 >>> _segmentspan(revlog, [0, 4])
288 17
288 17
289 >>> _segmentspan(revlog, [3, 4])
289 >>> _segmentspan(revlog, [3, 4])
290 5
290 5
291 >>> _segmentspan(revlog, [1, 2, 3,])
291 >>> _segmentspan(revlog, [1, 2, 3,])
292 7
292 7
293 >>> _segmentspan(revlog, [1, 3])
293 >>> _segmentspan(revlog, [1, 3])
294 7
294 7
295 """
295 """
296 if not revs:
296 if not revs:
297 return 0
297 return 0
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299
299
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302
302
303 ``revs`` is sliced into groups that should be read in one time.
303 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
304 Assume that revs are sorted.
305
305
306 The initial chunk is sliced until the overall density (payload/chunks-span
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 `revlog._srmingapsize` is skipped.
308 `revlog._srmingapsize` is skipped.
309
309
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 For consistency with other slicing choice, this limit won't go lower than
311 For consistency with other slicing choice, this limit won't go lower than
312 `revlog._srmingapsize`.
312 `revlog._srmingapsize`.
313
313
314 If individual revisions chunk are larger than this limit, they will still
314 If individual revisions chunk are larger than this limit, they will still
315 be raised individually.
315 be raised individually.
316
316
317 >>> revlog = _testrevlog([
317 >>> revlog = _testrevlog([
318 ... 5, #00 (5)
318 ... 5, #00 (5)
319 ... 10, #01 (5)
319 ... 10, #01 (5)
320 ... 12, #02 (2)
320 ... 12, #02 (2)
321 ... 12, #03 (empty)
321 ... 12, #03 (empty)
322 ... 27, #04 (15)
322 ... 27, #04 (15)
323 ... 31, #05 (4)
323 ... 31, #05 (4)
324 ... 31, #06 (empty)
324 ... 31, #06 (empty)
325 ... 42, #07 (11)
325 ... 42, #07 (11)
326 ... 47, #08 (5)
326 ... 47, #08 (5)
327 ... 47, #09 (empty)
327 ... 47, #09 (empty)
328 ... 48, #10 (1)
328 ... 48, #10 (1)
329 ... 51, #11 (3)
329 ... 51, #11 (3)
330 ... 74, #12 (23)
330 ... 74, #12 (23)
331 ... 85, #13 (11)
331 ... 85, #13 (11)
332 ... 86, #14 (1)
332 ... 86, #14 (1)
333 ... 91, #15 (5)
333 ... 91, #15 (5)
334 ... ])
334 ... ])
335
335
336 >>> list(_slicechunk(revlog, list(range(16))))
336 >>> list(_slicechunk(revlog, list(range(16))))
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 >>> list(_slicechunk(revlog, [0, 15]))
338 >>> list(_slicechunk(revlog, [0, 15]))
339 [[0], [15]]
339 [[0], [15]]
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 [[0], [11], [15]]
341 [[0], [11], [15]]
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 [[0], [11, 13, 15]]
343 [[0], [11, 13, 15]]
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 [[1, 2], [5, 8, 10, 11], [14]]
345 [[1, 2], [5, 8, 10, 11], [14]]
346
346
347 Slicing with a maximum chunk size
347 Slicing with a maximum chunk size
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 [[0], [11], [13], [15]]
349 [[0], [11], [13], [15]]
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 [[0], [11], [13, 15]]
351 [[0], [11], [13, 15]]
352 """
352 """
353 if targetsize is not None:
353 if targetsize is not None:
354 targetsize = max(targetsize, revlog._srmingapsize)
354 targetsize = max(targetsize, revlog._srmingapsize)
355 # targetsize should not be specified when evaluating delta candidates:
355 # targetsize should not be specified when evaluating delta candidates:
356 # * targetsize is used to ensure we stay within specification when reading,
356 # * targetsize is used to ensure we stay within specification when reading,
357 # * deltainfo is used to pick are good delta chain when writing.
357 # * deltainfo is used to pick are good delta chain when writing.
358 if not (deltainfo is None or targetsize is None):
358 if not (deltainfo is None or targetsize is None):
359 msg = 'cannot use `targetsize` with a `deltainfo`'
359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 raise error.ProgrammingError(msg)
360 raise error.ProgrammingError(msg)
361 for chunk in _slicechunktodensity(revlog, revs,
361 for chunk in _slicechunktodensity(revlog, revs,
362 deltainfo,
362 deltainfo,
363 revlog._srdensitythreshold,
363 revlog._srdensitythreshold,
364 revlog._srmingapsize):
364 revlog._srmingapsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 yield subchunk
366 yield subchunk
367
367
368 def _slicechunktosize(revlog, revs, targetsize=None):
368 def _slicechunktosize(revlog, revs, targetsize=None):
369 """slice revs to match the target size
369 """slice revs to match the target size
370
370
371 This is intended to be used on chunk that density slicing selected by that
371 This is intended to be used on chunk that density slicing selected by that
372 are still too large compared to the read garantee of revlog. This might
372 are still too large compared to the read garantee of revlog. This might
373 happens when "minimal gap size" interrupted the slicing or when chain are
373 happens when "minimal gap size" interrupted the slicing or when chain are
374 built in a way that create large blocks next to each other.
374 built in a way that create large blocks next to each other.
375
375
376 >>> revlog = _testrevlog([
376 >>> revlog = _testrevlog([
377 ... 3, #0 (3)
377 ... 3, #0 (3)
378 ... 5, #1 (2)
378 ... 5, #1 (2)
379 ... 6, #2 (1)
379 ... 6, #2 (1)
380 ... 8, #3 (2)
380 ... 8, #3 (2)
381 ... 8, #4 (empty)
381 ... 8, #4 (empty)
382 ... 11, #5 (3)
382 ... 11, #5 (3)
383 ... 12, #6 (1)
383 ... 12, #6 (1)
384 ... 13, #7 (1)
384 ... 13, #7 (1)
385 ... 14, #8 (1)
385 ... 14, #8 (1)
386 ... ])
386 ... ])
387
387
388 Cases where chunk is already small enough
388 Cases where chunk is already small enough
389 >>> list(_slicechunktosize(revlog, [0], 3))
389 >>> list(_slicechunktosize(revlog, [0], 3))
390 [[0]]
390 [[0]]
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 [[6, 7]]
392 [[6, 7]]
393 >>> list(_slicechunktosize(revlog, [0], None))
393 >>> list(_slicechunktosize(revlog, [0], None))
394 [[0]]
394 [[0]]
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 [[6, 7]]
396 [[6, 7]]
397
397
398 cases where we need actual slicing
398 cases where we need actual slicing
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 [[0], [1]]
400 [[0], [1]]
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 [[1], [3]]
402 [[1], [3]]
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 [[1, 2], [3]]
404 [[1, 2], [3]]
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 [[3], [5]]
406 [[3], [5]]
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 [[3], [5]]
408 [[3], [5]]
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 [[5], [6, 7, 8]]
410 [[5], [6, 7, 8]]
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413
413
414 Case with too large individual chunk (must return valid chunk)
414 Case with too large individual chunk (must return valid chunk)
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 [[0], [1]]
416 [[0], [1]]
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 [[1], [3]]
418 [[1], [3]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 [[3], [5]]
420 [[3], [5]]
421 """
421 """
422 assert targetsize is None or 0 <= targetsize
422 assert targetsize is None or 0 <= targetsize
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 yield revs
424 yield revs
425 return
425 return
426
426
427 startrevidx = 0
427 startrevidx = 0
428 startdata = revlog.start(revs[0])
428 startdata = revlog.start(revs[0])
429 endrevidx = 0
429 endrevidx = 0
430 iterrevs = enumerate(revs)
430 iterrevs = enumerate(revs)
431 next(iterrevs) # skip first rev.
431 next(iterrevs) # skip first rev.
432 for idx, r in iterrevs:
432 for idx, r in iterrevs:
433 span = revlog.end(r) - startdata
433 span = revlog.end(r) - startdata
434 if span <= targetsize:
434 if span <= targetsize:
435 endrevidx = idx
435 endrevidx = idx
436 else:
436 else:
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440 startrevidx = idx
440 startrevidx = idx
441 startdata = revlog.start(r)
441 startdata = revlog.start(r)
442 endrevidx = idx
442 endrevidx = idx
443 yield _trimchunk(revlog, revs, startrevidx)
443 yield _trimchunk(revlog, revs, startrevidx)
444
444
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 mingapsize=0):
446 mingapsize=0):
447 """slice revs to reduce the amount of unrelated data to be read from disk.
447 """slice revs to reduce the amount of unrelated data to be read from disk.
448
448
449 ``revs`` is sliced into groups that should be read in one time.
449 ``revs`` is sliced into groups that should be read in one time.
450 Assume that revs are sorted.
450 Assume that revs are sorted.
451
451
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 to the top of the revlog.
453 to the top of the revlog.
454
454
455 The initial chunk is sliced until the overall density (payload/chunks-span
455 The initial chunk is sliced until the overall density (payload/chunks-span
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 skipped.
457 skipped.
458
458
459 >>> revlog = _testrevlog([
459 >>> revlog = _testrevlog([
460 ... 5, #00 (5)
460 ... 5, #00 (5)
461 ... 10, #01 (5)
461 ... 10, #01 (5)
462 ... 12, #02 (2)
462 ... 12, #02 (2)
463 ... 12, #03 (empty)
463 ... 12, #03 (empty)
464 ... 27, #04 (15)
464 ... 27, #04 (15)
465 ... 31, #05 (4)
465 ... 31, #05 (4)
466 ... 31, #06 (empty)
466 ... 31, #06 (empty)
467 ... 42, #07 (11)
467 ... 42, #07 (11)
468 ... 47, #08 (5)
468 ... 47, #08 (5)
469 ... 47, #09 (empty)
469 ... 47, #09 (empty)
470 ... 48, #10 (1)
470 ... 48, #10 (1)
471 ... 51, #11 (3)
471 ... 51, #11 (3)
472 ... 74, #12 (23)
472 ... 74, #12 (23)
473 ... 85, #13 (11)
473 ... 85, #13 (11)
474 ... 86, #14 (1)
474 ... 86, #14 (1)
475 ... 91, #15 (5)
475 ... 91, #15 (5)
476 ... ])
476 ... ])
477
477
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 [[0], [15]]
481 [[0], [15]]
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 [[0], [11], [15]]
483 [[0], [11], [15]]
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 [[0], [11, 13, 15]]
485 [[0], [11, 13, 15]]
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 [[1, 2], [5, 8, 10, 11], [14]]
487 [[1, 2], [5, 8, 10, 11], [14]]
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 ... mingapsize=20))
489 ... mingapsize=20))
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 ... targetdensity=0.95))
492 ... targetdensity=0.95))
493 [[1, 2], [5], [8, 10, 11], [14]]
493 [[1, 2], [5], [8, 10, 11], [14]]
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 ... targetdensity=0.95, mingapsize=12))
495 ... targetdensity=0.95, mingapsize=12))
496 [[1, 2], [5, 8, 10, 11], [14]]
496 [[1, 2], [5, 8, 10, 11], [14]]
497 """
497 """
498 start = revlog.start
498 start = revlog.start
499 length = revlog.length
499 length = revlog.length
500
500
501 if len(revs) <= 1:
501 if len(revs) <= 1:
502 yield revs
502 yield revs
503 return
503 return
504
504
505 nextrev = len(revlog)
505 nextrev = len(revlog)
506 nextoffset = revlog.end(nextrev - 1)
506 nextoffset = revlog.end(nextrev - 1)
507
507
508 if deltainfo is None:
508 if deltainfo is None:
509 deltachainspan = _segmentspan(revlog, revs)
509 deltachainspan = _segmentspan(revlog, revs)
510 chainpayload = sum(length(r) for r in revs)
510 chainpayload = sum(length(r) for r in revs)
511 else:
511 else:
512 deltachainspan = deltainfo.distance
512 deltachainspan = deltainfo.distance
513 chainpayload = deltainfo.compresseddeltalen
513 chainpayload = deltainfo.compresseddeltalen
514
514
515 if deltachainspan < mingapsize:
515 if deltachainspan < mingapsize:
516 yield revs
516 yield revs
517 return
517 return
518
518
519 readdata = deltachainspan
519 readdata = deltachainspan
520
520
521 if deltachainspan:
521 if deltachainspan:
522 density = chainpayload / float(deltachainspan)
522 density = chainpayload / float(deltachainspan)
523 else:
523 else:
524 density = 1.0
524 density = 1.0
525
525
526 if density >= targetdensity:
526 if density >= targetdensity:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 if deltainfo is not None:
530 if deltainfo is not None:
531 revs = list(revs)
531 revs = list(revs)
532 revs.append(nextrev)
532 revs.append(nextrev)
533
533
534 # Store the gaps in a heap to have them sorted by decreasing size
534 # Store the gaps in a heap to have them sorted by decreasing size
535 gapsheap = []
535 gapsheap = []
536 heapq.heapify(gapsheap)
536 heapq.heapify(gapsheap)
537 prevend = None
537 prevend = None
538 for i, rev in enumerate(revs):
538 for i, rev in enumerate(revs):
539 if rev < nextrev:
539 if rev < nextrev:
540 revstart = start(rev)
540 revstart = start(rev)
541 revlen = length(rev)
541 revlen = length(rev)
542 else:
542 else:
543 revstart = nextoffset
543 revstart = nextoffset
544 revlen = deltainfo.deltalen
544 revlen = deltainfo.deltalen
545
545
546 # Skip empty revisions to form larger holes
546 # Skip empty revisions to form larger holes
547 if revlen == 0:
547 if revlen == 0:
548 continue
548 continue
549
549
550 if prevend is not None:
550 if prevend is not None:
551 gapsize = revstart - prevend
551 gapsize = revstart - prevend
552 # only consider holes that are large enough
552 # only consider holes that are large enough
553 if gapsize > mingapsize:
553 if gapsize > mingapsize:
554 heapq.heappush(gapsheap, (-gapsize, i))
554 heapq.heappush(gapsheap, (-gapsize, i))
555
555
556 prevend = revstart + revlen
556 prevend = revstart + revlen
557
557
558 # Collect the indices of the largest holes until the density is acceptable
558 # Collect the indices of the largest holes until the density is acceptable
559 indicesheap = []
559 indicesheap = []
560 heapq.heapify(indicesheap)
560 heapq.heapify(indicesheap)
561 while gapsheap and density < targetdensity:
561 while gapsheap and density < targetdensity:
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563
563
564 heapq.heappush(indicesheap, gapidx)
564 heapq.heappush(indicesheap, gapidx)
565
565
566 # the gap sizes are stored as negatives to be sorted decreasingly
566 # the gap sizes are stored as negatives to be sorted decreasingly
567 # by the heap
567 # by the heap
568 readdata -= (-oppgapsize)
568 readdata -= (-oppgapsize)
569 if readdata > 0:
569 if readdata > 0:
570 density = chainpayload / float(readdata)
570 density = chainpayload / float(readdata)
571 else:
571 else:
572 density = 1.0
572 density = 1.0
573
573
574 # Cut the revs at collected indices
574 # Cut the revs at collected indices
575 previdx = 0
575 previdx = 0
576 while indicesheap:
576 while indicesheap:
577 idx = heapq.heappop(indicesheap)
577 idx = heapq.heappop(indicesheap)
578
578
579 chunk = _trimchunk(revlog, revs, previdx, idx)
579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 if chunk:
580 if chunk:
581 yield chunk
581 yield chunk
582
582
583 previdx = idx
583 previdx = idx
584
584
585 chunk = _trimchunk(revlog, revs, previdx)
585 chunk = _trimchunk(revlog, revs, previdx)
586 if chunk:
586 if chunk:
587 yield chunk
587 yield chunk
588
588
589 @attr.s(slots=True, frozen=True)
589 @attr.s(slots=True, frozen=True)
590 class _deltainfo(object):
590 class _deltainfo(object):
591 distance = attr.ib()
591 distance = attr.ib()
592 deltalen = attr.ib()
592 deltalen = attr.ib()
593 data = attr.ib()
593 data = attr.ib()
594 base = attr.ib()
594 base = attr.ib()
595 chainbase = attr.ib()
595 chainbase = attr.ib()
596 chainlen = attr.ib()
596 chainlen = attr.ib()
597 compresseddeltalen = attr.ib()
597 compresseddeltalen = attr.ib()
598
598
599 class _deltacomputer(object):
599 class _deltacomputer(object):
600 def __init__(self, revlog):
600 def __init__(self, revlog):
601 self.revlog = revlog
601 self.revlog = revlog
602
602
603 def _getcandidaterevs(self, p1, p2, cachedelta):
603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 """
604 """
605 Provides revisions that present an interest to be diffed against,
605 Provides revisions that present an interest to be diffed against,
606 grouped by level of easiness.
606 grouped by level of easiness.
607 """
607 """
608 revlog = self.revlog
608 revlog = self.revlog
609 gdelta = revlog._generaldelta
609 gdelta = revlog._generaldelta
610 curr = len(revlog)
610 curr = len(revlog)
611 prev = curr - 1
611 prev = curr - 1
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613
613
614 # should we try to build a delta?
614 # should we try to build a delta?
615 if prev != nullrev and revlog.storedeltachains:
615 if prev != nullrev and revlog.storedeltachains:
616 tested = set()
616 tested = set()
617 # This condition is true most of the time when processing
617 # This condition is true most of the time when processing
618 # changegroup data into a generaldelta repo. The only time it
618 # changegroup data into a generaldelta repo. The only time it
619 # isn't true is if this is the first revision in a delta chain
619 # isn't true is if this is the first revision in a delta chain
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 if cachedelta and gdelta and revlog._lazydeltabase:
621 if cachedelta and gdelta and revlog._lazydeltabase:
622 # Assume what we received from the server is a good choice
622 # Assume what we received from the server is a good choice
623 # build delta will reuse the cache
623 # build delta will reuse the cache
624 yield (cachedelta[0],)
624 yield (cachedelta[0],)
625 tested.add(cachedelta[0])
625 tested.add(cachedelta[0])
626
626
627 if gdelta:
627 if gdelta:
628 # exclude already lazy tested base if any
628 # exclude already lazy tested base if any
629 parents = [p for p in (p1r, p2r)
629 parents = [p for p in (p1r, p2r)
630 if p != nullrev and p not in tested]
630 if p != nullrev and p not in tested]
631
631
632 if not revlog._deltabothparents and len(parents) == 2:
632 if not revlog._deltabothparents and len(parents) == 2:
633 parents.sort()
633 parents.sort()
634 # To minimize the chance of having to build a fulltext,
634 # To minimize the chance of having to build a fulltext,
635 # pick first whichever parent is closest to us (max rev)
635 # pick first whichever parent is closest to us (max rev)
636 yield (parents[1],)
636 yield (parents[1],)
637 # then the other one (min rev) if the first did not fit
637 # then the other one (min rev) if the first did not fit
638 yield (parents[0],)
638 yield (parents[0],)
639 tested.update(parents)
639 tested.update(parents)
640 elif len(parents) > 0:
640 elif len(parents) > 0:
641 # Test all parents (1 or 2), and keep the best candidate
641 # Test all parents (1 or 2), and keep the best candidate
642 yield parents
642 yield parents
643 tested.update(parents)
643 tested.update(parents)
644
644
645 if prev not in tested:
645 if prev not in tested:
646 # other approach failed try against prev to hopefully save us a
646 # other approach failed try against prev to hopefully save us a
647 # fulltext.
647 # fulltext.
648 yield (prev,)
648 yield (prev,)
649 tested.add(prev)
649 tested.add(prev)
650
650
651 def buildtext(self, revinfo, fh):
651 def buildtext(self, revinfo, fh):
652 """Builds a fulltext version of a revision
652 """Builds a fulltext version of a revision
653
653
654 revinfo: _revisioninfo instance that contains all needed info
654 revinfo: _revisioninfo instance that contains all needed info
655 fh: file handle to either the .i or the .d revlog file,
655 fh: file handle to either the .i or the .d revlog file,
656 depending on whether it is inlined or not
656 depending on whether it is inlined or not
657 """
657 """
658 btext = revinfo.btext
658 btext = revinfo.btext
659 if btext[0] is not None:
659 if btext[0] is not None:
660 return btext[0]
660 return btext[0]
661
661
662 revlog = self.revlog
662 revlog = self.revlog
663 cachedelta = revinfo.cachedelta
663 cachedelta = revinfo.cachedelta
664 flags = revinfo.flags
664 flags = revinfo.flags
665 node = revinfo.node
665 node = revinfo.node
666
666
667 baserev = cachedelta[0]
667 baserev = cachedelta[0]
668 delta = cachedelta[1]
668 delta = cachedelta[1]
669 # special case deltas which replace entire base; no need to decode
669 # special case deltas which replace entire base; no need to decode
670 # base revision. this neatly avoids censored bases, which throw when
670 # base revision. this neatly avoids censored bases, which throw when
671 # they're decoded.
671 # they're decoded.
672 hlen = struct.calcsize(">lll")
672 hlen = struct.calcsize(">lll")
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 len(delta) - hlen):
674 len(delta) - hlen):
675 btext[0] = delta[hlen:]
675 btext[0] = delta[hlen:]
676 else:
676 else:
677 # deltabase is rawtext before changed by flag processors, which is
677 # deltabase is rawtext before changed by flag processors, which is
678 # equivalent to non-raw text
678 # equivalent to non-raw text
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 btext[0] = mdiff.patch(basetext, delta)
680 btext[0] = mdiff.patch(basetext, delta)
681
681
682 try:
682 try:
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 btext[0], validatehash = res
684 btext[0], validatehash = res
685 if validatehash:
685 if validatehash:
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 if flags & REVIDX_ISCENSORED:
687 if flags & REVIDX_ISCENSORED:
688 raise RevlogError(_('node %s is not censored') % node)
688 raise RevlogError(_('node %s is not censored') % node)
689 except CensoredNodeError:
689 except CensoredNodeError:
690 # must pass the censored index flag to add censored revisions
690 # must pass the censored index flag to add censored revisions
691 if not flags & REVIDX_ISCENSORED:
691 if not flags & REVIDX_ISCENSORED:
692 raise
692 raise
693 return btext[0]
693 return btext[0]
694
694
695 def _builddeltadiff(self, base, revinfo, fh):
695 def _builddeltadiff(self, base, revinfo, fh):
696 revlog = self.revlog
696 revlog = self.revlog
697 t = self.buildtext(revinfo, fh)
697 t = self.buildtext(revinfo, fh)
698 if revlog.iscensored(base):
698 if revlog.iscensored(base):
699 # deltas based on a censored revision must replace the
699 # deltas based on a censored revision must replace the
700 # full content in one patch, so delta works everywhere
700 # full content in one patch, so delta works everywhere
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 delta = header + t
702 delta = header + t
703 else:
703 else:
704 ptext = revlog.revision(base, _df=fh, raw=True)
704 ptext = revlog.revision(base, _df=fh, raw=True)
705 delta = mdiff.textdiff(ptext, t)
705 delta = mdiff.textdiff(ptext, t)
706
706
707 return delta
707 return delta
708
708
709 def _builddeltainfo(self, revinfo, base, fh):
709 def _builddeltainfo(self, revinfo, base, fh):
710 # can we use the cached delta?
710 # can we use the cached delta?
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 delta = revinfo.cachedelta[1]
712 delta = revinfo.cachedelta[1]
713 else:
713 else:
714 delta = self._builddeltadiff(base, revinfo, fh)
714 delta = self._builddeltadiff(base, revinfo, fh)
715 revlog = self.revlog
715 revlog = self.revlog
716 header, data = revlog.compress(delta)
716 header, data = revlog.compress(delta)
717 deltalen = len(header) + len(data)
717 deltalen = len(header) + len(data)
718 chainbase = revlog.chainbase(base)
718 chainbase = revlog.chainbase(base)
719 offset = revlog.end(len(revlog) - 1)
719 offset = revlog.end(len(revlog) - 1)
720 dist = deltalen + offset - revlog.start(chainbase)
720 dist = deltalen + offset - revlog.start(chainbase)
721 if revlog._generaldelta:
721 if revlog._generaldelta:
722 deltabase = base
722 deltabase = base
723 else:
723 else:
724 deltabase = chainbase
724 deltabase = chainbase
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 chainlen += 1
726 chainlen += 1
727 compresseddeltalen += deltalen
727 compresseddeltalen += deltalen
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 chainbase, chainlen, compresseddeltalen)
729 chainbase, chainlen, compresseddeltalen)
730
730
731 def finddeltainfo(self, revinfo, fh):
731 def finddeltainfo(self, revinfo, fh):
732 """Find an acceptable delta against a candidate revision
732 """Find an acceptable delta against a candidate revision
733
733
734 revinfo: information about the revision (instance of _revisioninfo)
734 revinfo: information about the revision (instance of _revisioninfo)
735 fh: file handle to either the .i or the .d revlog file,
735 fh: file handle to either the .i or the .d revlog file,
736 depending on whether it is inlined or not
736 depending on whether it is inlined or not
737
737
738 Returns the first acceptable candidate revision, as ordered by
738 Returns the first acceptable candidate revision, as ordered by
739 _getcandidaterevs
739 _getcandidaterevs
740 """
740 """
741 if not revinfo.textlen:
741 if not revinfo.textlen:
742 return None # empty file do not need delta
742 return None # empty file do not need delta
743
743
744 cachedelta = revinfo.cachedelta
744 cachedelta = revinfo.cachedelta
745 p1 = revinfo.p1
745 p1 = revinfo.p1
746 p2 = revinfo.p2
746 p2 = revinfo.p2
747 revlog = self.revlog
747 revlog = self.revlog
748
748
749 deltalength = self.revlog.length
749 deltalength = self.revlog.length
750 deltaparent = self.revlog.deltaparent
750 deltaparent = self.revlog.deltaparent
751
751
752 deltainfo = None
752 deltainfo = None
753 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
753 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
754 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
754 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
755 # filter out delta base that will never produce good delta
755 # filter out delta base that will never produce good delta
756 candidaterevs = [r for r in candidaterevs
756 candidaterevs = [r for r in candidaterevs
757 if self.revlog.length(r) <= deltas_limit]
757 if self.revlog.length(r) <= deltas_limit]
758 nominateddeltas = []
758 nominateddeltas = []
759 for candidaterev in candidaterevs:
759 for candidaterev in candidaterevs:
760 # skip over empty delta (no need to include them in a chain)
760 # skip over empty delta (no need to include them in a chain)
761 while candidaterev != nullrev and not deltalength(candidaterev):
761 while candidaterev != nullrev and not deltalength(candidaterev):
762 candidaterev = deltaparent(candidaterev)
762 candidaterev = deltaparent(candidaterev)
763 # no need to try a delta against nullid, this will be handled
763 # no need to try a delta against nullid, this will be handled
764 # by fulltext later.
764 # by fulltext later.
765 if candidaterev == nullrev:
765 if candidaterev == nullrev:
766 continue
766 continue
767 # no delta for rawtext-changing revs (see "candelta" for why)
767 # no delta for rawtext-changing revs (see "candelta" for why)
768 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
768 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
769 continue
769 continue
770 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
770 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
771 if revlog._isgooddeltainfo(candidatedelta, revinfo):
771 if revlog._isgooddeltainfo(candidatedelta, revinfo):
772 nominateddeltas.append(candidatedelta)
772 nominateddeltas.append(candidatedelta)
773 if nominateddeltas:
773 if nominateddeltas:
774 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
774 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
775 break
775 break
776
776
777 return deltainfo
777 return deltainfo
778
778
779 @attr.s(slots=True, frozen=True)
779 @attr.s(slots=True, frozen=True)
780 class _revisioninfo(object):
780 class _revisioninfo(object):
781 """Information about a revision that allows building its fulltext
781 """Information about a revision that allows building its fulltext
782 node: expected hash of the revision
782 node: expected hash of the revision
783 p1, p2: parent revs of the revision
783 p1, p2: parent revs of the revision
784 btext: built text cache consisting of a one-element list
784 btext: built text cache consisting of a one-element list
785 cachedelta: (baserev, uncompressed_delta) or None
785 cachedelta: (baserev, uncompressed_delta) or None
786 flags: flags associated to the revision storage
786 flags: flags associated to the revision storage
787
787
788 One of btext[0] or cachedelta must be set.
788 One of btext[0] or cachedelta must be set.
789 """
789 """
790 node = attr.ib()
790 node = attr.ib()
791 p1 = attr.ib()
791 p1 = attr.ib()
792 p2 = attr.ib()
792 p2 = attr.ib()
793 btext = attr.ib()
793 btext = attr.ib()
794 textlen = attr.ib()
794 textlen = attr.ib()
795 cachedelta = attr.ib()
795 cachedelta = attr.ib()
796 flags = attr.ib()
796 flags = attr.ib()
797
797
798 # index v0:
798 # index v0:
799 # 4 bytes: offset
799 # 4 bytes: offset
800 # 4 bytes: compressed length
800 # 4 bytes: compressed length
801 # 4 bytes: base rev
801 # 4 bytes: base rev
802 # 4 bytes: link rev
802 # 4 bytes: link rev
803 # 20 bytes: parent 1 nodeid
803 # 20 bytes: parent 1 nodeid
804 # 20 bytes: parent 2 nodeid
804 # 20 bytes: parent 2 nodeid
805 # 20 bytes: nodeid
805 # 20 bytes: nodeid
806 indexformatv0 = struct.Struct(">4l20s20s20s")
806 indexformatv0 = struct.Struct(">4l20s20s20s")
807 indexformatv0_pack = indexformatv0.pack
807 indexformatv0_pack = indexformatv0.pack
808 indexformatv0_unpack = indexformatv0.unpack
808 indexformatv0_unpack = indexformatv0.unpack
809
809
810 class revlogoldindex(list):
810 class revlogoldindex(list):
811 def __getitem__(self, i):
811 def __getitem__(self, i):
812 if i == -1:
812 if i == -1:
813 return (0, 0, 0, -1, -1, -1, -1, nullid)
813 return (0, 0, 0, -1, -1, -1, -1, nullid)
814 return list.__getitem__(self, i)
814 return list.__getitem__(self, i)
815
815
816 # maximum <delta-chain-data>/<revision-text-length> ratio
816 # maximum <delta-chain-data>/<revision-text-length> ratio
817 LIMIT_DELTA2TEXT = 2
817 LIMIT_DELTA2TEXT = 2
818
818
819 class revlogoldio(object):
819 class revlogoldio(object):
820 def __init__(self):
820 def __init__(self):
821 self.size = indexformatv0.size
821 self.size = indexformatv0.size
822
822
823 def parseindex(self, data, inline):
823 def parseindex(self, data, inline):
824 s = self.size
824 s = self.size
825 index = []
825 index = []
826 nodemap = {nullid: nullrev}
826 nodemap = {nullid: nullrev}
827 n = off = 0
827 n = off = 0
828 l = len(data)
828 l = len(data)
829 while off + s <= l:
829 while off + s <= l:
830 cur = data[off:off + s]
830 cur = data[off:off + s]
831 off += s
831 off += s
832 e = indexformatv0_unpack(cur)
832 e = indexformatv0_unpack(cur)
833 # transform to revlogv1 format
833 # transform to revlogv1 format
834 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
834 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
835 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
835 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
836 index.append(e2)
836 index.append(e2)
837 nodemap[e[6]] = n
837 nodemap[e[6]] = n
838 n += 1
838 n += 1
839
839
840 return revlogoldindex(index), nodemap, None
840 return revlogoldindex(index), nodemap, None
841
841
842 def packentry(self, entry, node, version, rev):
842 def packentry(self, entry, node, version, rev):
843 if gettype(entry[0]):
843 if gettype(entry[0]):
844 raise RevlogError(_('index entry flags need revlog version 1'))
844 raise RevlogError(_('index entry flags need revlog version 1'))
845 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
845 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
846 node(entry[5]), node(entry[6]), entry[7])
846 node(entry[5]), node(entry[6]), entry[7])
847 return indexformatv0_pack(*e2)
847 return indexformatv0_pack(*e2)
848
848
849 # index ng:
849 # index ng:
850 # 6 bytes: offset
850 # 6 bytes: offset
851 # 2 bytes: flags
851 # 2 bytes: flags
852 # 4 bytes: compressed length
852 # 4 bytes: compressed length
853 # 4 bytes: uncompressed length
853 # 4 bytes: uncompressed length
854 # 4 bytes: base rev
854 # 4 bytes: base rev
855 # 4 bytes: link rev
855 # 4 bytes: link rev
856 # 4 bytes: parent 1 rev
856 # 4 bytes: parent 1 rev
857 # 4 bytes: parent 2 rev
857 # 4 bytes: parent 2 rev
858 # 32 bytes: nodeid
858 # 32 bytes: nodeid
859 indexformatng = struct.Struct(">Qiiiiii20s12x")
859 indexformatng = struct.Struct(">Qiiiiii20s12x")
860 indexformatng_pack = indexformatng.pack
860 indexformatng_pack = indexformatng.pack
861 versionformat = struct.Struct(">I")
861 versionformat = struct.Struct(">I")
862 versionformat_pack = versionformat.pack
862 versionformat_pack = versionformat.pack
863 versionformat_unpack = versionformat.unpack
863 versionformat_unpack = versionformat.unpack
864
864
865 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
865 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
866 # signed integer)
866 # signed integer)
867 _maxentrysize = 0x7fffffff
867 _maxentrysize = 0x7fffffff
868
868
869 class revlogio(object):
869 class revlogio(object):
870 def __init__(self):
870 def __init__(self):
871 self.size = indexformatng.size
871 self.size = indexformatng.size
872
872
873 def parseindex(self, data, inline):
873 def parseindex(self, data, inline):
874 # call the C implementation to parse the index data
874 # call the C implementation to parse the index data
875 index, cache = parsers.parse_index2(data, inline)
875 index, cache = parsers.parse_index2(data, inline)
876 return index, getattr(index, 'nodemap', None), cache
876 return index, getattr(index, 'nodemap', None), cache
877
877
878 def packentry(self, entry, node, version, rev):
878 def packentry(self, entry, node, version, rev):
879 p = indexformatng_pack(*entry)
879 p = indexformatng_pack(*entry)
880 if rev == 0:
880 if rev == 0:
881 p = versionformat_pack(version) + p[4:]
881 p = versionformat_pack(version) + p[4:]
882 return p
882 return p
883
883
884 class revlog(object):
884 class revlog(object):
885 """
885 """
886 the underlying revision storage object
886 the underlying revision storage object
887
887
888 A revlog consists of two parts, an index and the revision data.
888 A revlog consists of two parts, an index and the revision data.
889
889
890 The index is a file with a fixed record size containing
890 The index is a file with a fixed record size containing
891 information on each revision, including its nodeid (hash), the
891 information on each revision, including its nodeid (hash), the
892 nodeids of its parents, the position and offset of its data within
892 nodeids of its parents, the position and offset of its data within
893 the data file, and the revision it's based on. Finally, each entry
893 the data file, and the revision it's based on. Finally, each entry
894 contains a linkrev entry that can serve as a pointer to external
894 contains a linkrev entry that can serve as a pointer to external
895 data.
895 data.
896
896
897 The revision data itself is a linear collection of data chunks.
897 The revision data itself is a linear collection of data chunks.
898 Each chunk represents a revision and is usually represented as a
898 Each chunk represents a revision and is usually represented as a
899 delta against the previous chunk. To bound lookup time, runs of
899 delta against the previous chunk. To bound lookup time, runs of
900 deltas are limited to about 2 times the length of the original
900 deltas are limited to about 2 times the length of the original
901 version data. This makes retrieval of a version proportional to
901 version data. This makes retrieval of a version proportional to
902 its size, or O(1) relative to the number of revisions.
902 its size, or O(1) relative to the number of revisions.
903
903
904 Both pieces of the revlog are written to in an append-only
904 Both pieces of the revlog are written to in an append-only
905 fashion, which means we never need to rewrite a file to insert or
905 fashion, which means we never need to rewrite a file to insert or
906 remove data, and can use some simple techniques to avoid the need
906 remove data, and can use some simple techniques to avoid the need
907 for locking while reading.
907 for locking while reading.
908
908
909 If checkambig, indexfile is opened with checkambig=True at
909 If checkambig, indexfile is opened with checkambig=True at
910 writing, to avoid file stat ambiguity.
910 writing, to avoid file stat ambiguity.
911
911
912 If mmaplargeindex is True, and an mmapindexthreshold is set, the
912 If mmaplargeindex is True, and an mmapindexthreshold is set, the
913 index will be mmapped rather than read if it is larger than the
913 index will be mmapped rather than read if it is larger than the
914 configured threshold.
914 configured threshold.
915
915
916 If censorable is True, the revlog can have censored revisions.
916 If censorable is True, the revlog can have censored revisions.
917 """
917 """
918 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
918 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
919 mmaplargeindex=False, censorable=False):
919 mmaplargeindex=False, censorable=False):
920 """
920 """
921 create a revlog object
921 create a revlog object
922
922
923 opener is a function that abstracts the file opening operation
923 opener is a function that abstracts the file opening operation
924 and can be used to implement COW semantics or the like.
924 and can be used to implement COW semantics or the like.
925 """
925 """
926 self.indexfile = indexfile
926 self.indexfile = indexfile
927 self.datafile = datafile or (indexfile[:-2] + ".d")
927 self.datafile = datafile or (indexfile[:-2] + ".d")
928 self.opener = opener
928 self.opener = opener
929 # When True, indexfile is opened with checkambig=True at writing, to
929 # When True, indexfile is opened with checkambig=True at writing, to
930 # avoid file stat ambiguity.
930 # avoid file stat ambiguity.
931 self._checkambig = checkambig
931 self._checkambig = checkambig
932 self._censorable = censorable
932 self._censorable = censorable
933 # 3-tuple of (node, rev, text) for a raw revision.
933 # 3-tuple of (node, rev, text) for a raw revision.
934 self._cache = None
934 self._cache = None
935 # Maps rev to chain base rev.
935 # Maps rev to chain base rev.
936 self._chainbasecache = util.lrucachedict(100)
936 self._chainbasecache = util.lrucachedict(100)
937 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
937 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
938 self._chunkcache = (0, '')
938 self._chunkcache = (0, '')
939 # How much data to read and cache into the raw revlog data cache.
939 # How much data to read and cache into the raw revlog data cache.
940 self._chunkcachesize = 65536
940 self._chunkcachesize = 65536
941 self._maxchainlen = None
941 self._maxchainlen = None
942 self._deltabothparents = True
942 self._deltabothparents = True
943 self.index = []
943 self.index = []
944 # Mapping of partial identifiers to full nodes.
944 # Mapping of partial identifiers to full nodes.
945 self._pcache = {}
945 self._pcache = {}
946 # Mapping of revision integer to full node.
946 # Mapping of revision integer to full node.
947 self._nodecache = {nullid: nullrev}
947 self._nodecache = {nullid: nullrev}
948 self._nodepos = None
948 self._nodepos = None
949 self._compengine = 'zlib'
949 self._compengine = 'zlib'
950 self._maxdeltachainspan = -1
950 self._maxdeltachainspan = -1
951 self._withsparseread = False
951 self._withsparseread = False
952 self._sparserevlog = False
952 self._sparserevlog = False
953 self._srdensitythreshold = 0.50
953 self._srdensitythreshold = 0.50
954 self._srmingapsize = 262144
954 self._srmingapsize = 262144
955
955
956 mmapindexthreshold = None
956 mmapindexthreshold = None
957 v = REVLOG_DEFAULT_VERSION
957 v = REVLOG_DEFAULT_VERSION
958 opts = getattr(opener, 'options', None)
958 opts = getattr(opener, 'options', None)
959 if opts is not None:
959 if opts is not None:
960 if 'revlogv2' in opts:
960 if 'revlogv2' in opts:
961 # version 2 revlogs always use generaldelta.
961 # version 2 revlogs always use generaldelta.
962 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
962 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
963 elif 'revlogv1' in opts:
963 elif 'revlogv1' in opts:
964 if 'generaldelta' in opts:
964 if 'generaldelta' in opts:
965 v |= FLAG_GENERALDELTA
965 v |= FLAG_GENERALDELTA
966 else:
966 else:
967 v = 0
967 v = 0
968 if 'chunkcachesize' in opts:
968 if 'chunkcachesize' in opts:
969 self._chunkcachesize = opts['chunkcachesize']
969 self._chunkcachesize = opts['chunkcachesize']
970 if 'maxchainlen' in opts:
970 if 'maxchainlen' in opts:
971 self._maxchainlen = opts['maxchainlen']
971 self._maxchainlen = opts['maxchainlen']
972 if 'deltabothparents' in opts:
972 if 'deltabothparents' in opts:
973 self._deltabothparents = opts['deltabothparents']
973 self._deltabothparents = opts['deltabothparents']
974 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
974 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
975 if 'compengine' in opts:
975 if 'compengine' in opts:
976 self._compengine = opts['compengine']
976 self._compengine = opts['compengine']
977 if 'maxdeltachainspan' in opts:
977 if 'maxdeltachainspan' in opts:
978 self._maxdeltachainspan = opts['maxdeltachainspan']
978 self._maxdeltachainspan = opts['maxdeltachainspan']
979 if mmaplargeindex and 'mmapindexthreshold' in opts:
979 if mmaplargeindex and 'mmapindexthreshold' in opts:
980 mmapindexthreshold = opts['mmapindexthreshold']
980 mmapindexthreshold = opts['mmapindexthreshold']
981 self._sparserevlog = bool(opts.get('sparse-revlog', False))
981 self._sparserevlog = bool(opts.get('sparse-revlog', False))
982 withsparseread = bool(opts.get('with-sparse-read', False))
982 withsparseread = bool(opts.get('with-sparse-read', False))
983 # sparse-revlog forces sparse-read
983 # sparse-revlog forces sparse-read
984 self._withsparseread = self._sparserevlog or withsparseread
984 self._withsparseread = self._sparserevlog or withsparseread
985 if 'sparse-read-density-threshold' in opts:
985 if 'sparse-read-density-threshold' in opts:
986 self._srdensitythreshold = opts['sparse-read-density-threshold']
986 self._srdensitythreshold = opts['sparse-read-density-threshold']
987 if 'sparse-read-min-gap-size' in opts:
987 if 'sparse-read-min-gap-size' in opts:
988 self._srmingapsize = opts['sparse-read-min-gap-size']
988 self._srmingapsize = opts['sparse-read-min-gap-size']
989
989
990 if self._chunkcachesize <= 0:
990 if self._chunkcachesize <= 0:
991 raise RevlogError(_('revlog chunk cache size %r is not greater '
991 raise RevlogError(_('revlog chunk cache size %r is not greater '
992 'than 0') % self._chunkcachesize)
992 'than 0') % self._chunkcachesize)
993 elif self._chunkcachesize & (self._chunkcachesize - 1):
993 elif self._chunkcachesize & (self._chunkcachesize - 1):
994 raise RevlogError(_('revlog chunk cache size %r is not a power '
994 raise RevlogError(_('revlog chunk cache size %r is not a power '
995 'of 2') % self._chunkcachesize)
995 'of 2') % self._chunkcachesize)
996
996
997 indexdata = ''
997 indexdata = ''
998 self._initempty = True
998 self._initempty = True
999 try:
999 try:
1000 with self._indexfp() as f:
1000 with self._indexfp() as f:
1001 if (mmapindexthreshold is not None and
1001 if (mmapindexthreshold is not None and
1002 self.opener.fstat(f).st_size >= mmapindexthreshold):
1002 self.opener.fstat(f).st_size >= mmapindexthreshold):
1003 indexdata = util.buffer(util.mmapread(f))
1003 indexdata = util.buffer(util.mmapread(f))
1004 else:
1004 else:
1005 indexdata = f.read()
1005 indexdata = f.read()
1006 if len(indexdata) > 0:
1006 if len(indexdata) > 0:
1007 v = versionformat_unpack(indexdata[:4])[0]
1007 v = versionformat_unpack(indexdata[:4])[0]
1008 self._initempty = False
1008 self._initempty = False
1009 except IOError as inst:
1009 except IOError as inst:
1010 if inst.errno != errno.ENOENT:
1010 if inst.errno != errno.ENOENT:
1011 raise
1011 raise
1012
1012
1013 self.version = v
1013 self.version = v
1014 self._inline = v & FLAG_INLINE_DATA
1014 self._inline = v & FLAG_INLINE_DATA
1015 self._generaldelta = v & FLAG_GENERALDELTA
1015 self._generaldelta = v & FLAG_GENERALDELTA
1016 flags = v & ~0xFFFF
1016 flags = v & ~0xFFFF
1017 fmt = v & 0xFFFF
1017 fmt = v & 0xFFFF
1018 if fmt == REVLOGV0:
1018 if fmt == REVLOGV0:
1019 if flags:
1019 if flags:
1020 raise RevlogError(_('unknown flags (%#04x) in version %d '
1020 raise RevlogError(_('unknown flags (%#04x) in version %d '
1021 'revlog %s') %
1021 'revlog %s') %
1022 (flags >> 16, fmt, self.indexfile))
1022 (flags >> 16, fmt, self.indexfile))
1023 elif fmt == REVLOGV1:
1023 elif fmt == REVLOGV1:
1024 if flags & ~REVLOGV1_FLAGS:
1024 if flags & ~REVLOGV1_FLAGS:
1025 raise RevlogError(_('unknown flags (%#04x) in version %d '
1025 raise RevlogError(_('unknown flags (%#04x) in version %d '
1026 'revlog %s') %
1026 'revlog %s') %
1027 (flags >> 16, fmt, self.indexfile))
1027 (flags >> 16, fmt, self.indexfile))
1028 elif fmt == REVLOGV2:
1028 elif fmt == REVLOGV2:
1029 if flags & ~REVLOGV2_FLAGS:
1029 if flags & ~REVLOGV2_FLAGS:
1030 raise RevlogError(_('unknown flags (%#04x) in version %d '
1030 raise RevlogError(_('unknown flags (%#04x) in version %d '
1031 'revlog %s') %
1031 'revlog %s') %
1032 (flags >> 16, fmt, self.indexfile))
1032 (flags >> 16, fmt, self.indexfile))
1033 else:
1033 else:
1034 raise RevlogError(_('unknown version (%d) in revlog %s') %
1034 raise RevlogError(_('unknown version (%d) in revlog %s') %
1035 (fmt, self.indexfile))
1035 (fmt, self.indexfile))
1036
1036
1037 self.storedeltachains = True
1037 self.storedeltachains = True
1038
1038
1039 self._io = revlogio()
1039 self._io = revlogio()
1040 if self.version == REVLOGV0:
1040 if self.version == REVLOGV0:
1041 self._io = revlogoldio()
1041 self._io = revlogoldio()
1042 try:
1042 try:
1043 d = self._io.parseindex(indexdata, self._inline)
1043 d = self._io.parseindex(indexdata, self._inline)
1044 except (ValueError, IndexError):
1044 except (ValueError, IndexError):
1045 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1045 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1046 self.index, nodemap, self._chunkcache = d
1046 self.index, nodemap, self._chunkcache = d
1047 if nodemap is not None:
1047 if nodemap is not None:
1048 self.nodemap = self._nodecache = nodemap
1048 self.nodemap = self._nodecache = nodemap
1049 if not self._chunkcache:
1049 if not self._chunkcache:
1050 self._chunkclear()
1050 self._chunkclear()
1051 # revnum -> (chain-length, sum-delta-length)
1051 # revnum -> (chain-length, sum-delta-length)
1052 self._chaininfocache = {}
1052 self._chaininfocache = {}
1053 # revlog header -> revlog compressor
1053 # revlog header -> revlog compressor
1054 self._decompressors = {}
1054 self._decompressors = {}
1055
1055
1056 @util.propertycache
1056 @util.propertycache
1057 def _compressor(self):
1057 def _compressor(self):
1058 return util.compengines[self._compengine].revlogcompressor()
1058 return util.compengines[self._compengine].revlogcompressor()
1059
1059
1060 def _indexfp(self, mode='r'):
1060 def _indexfp(self, mode='r'):
1061 """file object for the revlog's index file"""
1061 """file object for the revlog's index file"""
1062 args = {r'mode': mode}
1062 args = {r'mode': mode}
1063 if mode != 'r':
1063 if mode != 'r':
1064 args[r'checkambig'] = self._checkambig
1064 args[r'checkambig'] = self._checkambig
1065 if mode == 'w':
1065 if mode == 'w':
1066 args[r'atomictemp'] = True
1066 args[r'atomictemp'] = True
1067 return self.opener(self.indexfile, **args)
1067 return self.opener(self.indexfile, **args)
1068
1068
1069 def _datafp(self, mode='r'):
1069 def _datafp(self, mode='r'):
1070 """file object for the revlog's data file"""
1070 """file object for the revlog's data file"""
1071 return self.opener(self.datafile, mode=mode)
1071 return self.opener(self.datafile, mode=mode)
1072
1072
1073 @contextlib.contextmanager
1073 @contextlib.contextmanager
1074 def _datareadfp(self, existingfp=None):
1074 def _datareadfp(self, existingfp=None):
1075 """file object suitable to read data"""
1075 """file object suitable to read data"""
1076 if existingfp is not None:
1076 if existingfp is not None:
1077 yield existingfp
1077 yield existingfp
1078 else:
1078 else:
1079 if self._inline:
1079 if self._inline:
1080 func = self._indexfp
1080 func = self._indexfp
1081 else:
1081 else:
1082 func = self._datafp
1082 func = self._datafp
1083 with func() as fp:
1083 with func() as fp:
1084 yield fp
1084 yield fp
1085
1085
1086 def tip(self):
1086 def tip(self):
1087 return self.node(len(self.index) - 1)
1087 return self.node(len(self.index) - 1)
1088 def __contains__(self, rev):
1088 def __contains__(self, rev):
1089 return 0 <= rev < len(self)
1089 return 0 <= rev < len(self)
1090 def __len__(self):
1090 def __len__(self):
1091 return len(self.index)
1091 return len(self.index)
1092 def __iter__(self):
1092 def __iter__(self):
1093 return iter(pycompat.xrange(len(self)))
1093 return iter(pycompat.xrange(len(self)))
1094 def revs(self, start=0, stop=None):
1094 def revs(self, start=0, stop=None):
1095 """iterate over all rev in this revlog (from start to stop)"""
1095 """iterate over all rev in this revlog (from start to stop)"""
1096 step = 1
1096 step = 1
1097 length = len(self)
1097 length = len(self)
1098 if stop is not None:
1098 if stop is not None:
1099 if start > stop:
1099 if start > stop:
1100 step = -1
1100 step = -1
1101 stop += step
1101 stop += step
1102 if stop > length:
1102 if stop > length:
1103 stop = length
1103 stop = length
1104 else:
1104 else:
1105 stop = length
1105 stop = length
1106 return pycompat.xrange(start, stop, step)
1106 return pycompat.xrange(start, stop, step)
1107
1107
1108 @util.propertycache
1108 @util.propertycache
1109 def nodemap(self):
1109 def nodemap(self):
1110 if self.index:
1111 # populate mapping down to the initial node
1110 self.rev(self.node(0))
1112 self.rev(self.node(0))
1111 return self._nodecache
1113 return self._nodecache
1112
1114
1113 def hasnode(self, node):
1115 def hasnode(self, node):
1114 try:
1116 try:
1115 self.rev(node)
1117 self.rev(node)
1116 return True
1118 return True
1117 except KeyError:
1119 except KeyError:
1118 return False
1120 return False
1119
1121
1120 def candelta(self, baserev, rev):
1122 def candelta(self, baserev, rev):
1121 """whether two revisions (baserev, rev) can be delta-ed or not"""
1123 """whether two revisions (baserev, rev) can be delta-ed or not"""
1122 # Disable delta if either rev requires a content-changing flag
1124 # Disable delta if either rev requires a content-changing flag
1123 # processor (ex. LFS). This is because such flag processor can alter
1125 # processor (ex. LFS). This is because such flag processor can alter
1124 # the rawtext content that the delta will be based on, and two clients
1126 # the rawtext content that the delta will be based on, and two clients
1125 # could have a same revlog node with different flags (i.e. different
1127 # could have a same revlog node with different flags (i.e. different
1126 # rawtext contents) and the delta could be incompatible.
1128 # rawtext contents) and the delta could be incompatible.
1127 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1129 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1128 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1130 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1129 return False
1131 return False
1130 return True
1132 return True
1131
1133
1132 def clearcaches(self):
1134 def clearcaches(self):
1133 self._cache = None
1135 self._cache = None
1134 self._chainbasecache.clear()
1136 self._chainbasecache.clear()
1135 self._chunkcache = (0, '')
1137 self._chunkcache = (0, '')
1136 self._pcache = {}
1138 self._pcache = {}
1137
1139
1138 try:
1140 try:
1139 self._nodecache.clearcaches()
1141 self._nodecache.clearcaches()
1140 except AttributeError:
1142 except AttributeError:
1141 self._nodecache = {nullid: nullrev}
1143 self._nodecache = {nullid: nullrev}
1142 self._nodepos = None
1144 self._nodepos = None
1143
1145
1144 def rev(self, node):
1146 def rev(self, node):
1145 try:
1147 try:
1146 return self._nodecache[node]
1148 return self._nodecache[node]
1147 except TypeError:
1149 except TypeError:
1148 raise
1150 raise
1149 except RevlogError:
1151 except RevlogError:
1150 # parsers.c radix tree lookup failed
1152 # parsers.c radix tree lookup failed
1151 if node == wdirid or node in wdirfilenodeids:
1153 if node == wdirid or node in wdirfilenodeids:
1152 raise error.WdirUnsupported
1154 raise error.WdirUnsupported
1153 raise LookupError(node, self.indexfile, _('no node'))
1155 raise LookupError(node, self.indexfile, _('no node'))
1154 except KeyError:
1156 except KeyError:
1155 # pure python cache lookup failed
1157 # pure python cache lookup failed
1156 n = self._nodecache
1158 n = self._nodecache
1157 i = self.index
1159 i = self.index
1158 p = self._nodepos
1160 p = self._nodepos
1159 if p is None:
1161 if p is None:
1160 p = len(i) - 1
1162 p = len(i) - 1
1161 else:
1163 else:
1162 assert p < len(i)
1164 assert p < len(i)
1163 for r in pycompat.xrange(p, -1, -1):
1165 for r in pycompat.xrange(p, -1, -1):
1164 v = i[r][7]
1166 v = i[r][7]
1165 n[v] = r
1167 n[v] = r
1166 if v == node:
1168 if v == node:
1167 self._nodepos = r - 1
1169 self._nodepos = r - 1
1168 return r
1170 return r
1169 if node == wdirid or node in wdirfilenodeids:
1171 if node == wdirid or node in wdirfilenodeids:
1170 raise error.WdirUnsupported
1172 raise error.WdirUnsupported
1171 raise LookupError(node, self.indexfile, _('no node'))
1173 raise LookupError(node, self.indexfile, _('no node'))
1172
1174
1173 # Accessors for index entries.
1175 # Accessors for index entries.
1174
1176
1175 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1177 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1176 # are flags.
1178 # are flags.
1177 def start(self, rev):
1179 def start(self, rev):
1178 return int(self.index[rev][0] >> 16)
1180 return int(self.index[rev][0] >> 16)
1179
1181
1180 def flags(self, rev):
1182 def flags(self, rev):
1181 return self.index[rev][0] & 0xFFFF
1183 return self.index[rev][0] & 0xFFFF
1182
1184
1183 def length(self, rev):
1185 def length(self, rev):
1184 return self.index[rev][1]
1186 return self.index[rev][1]
1185
1187
1186 def rawsize(self, rev):
1188 def rawsize(self, rev):
1187 """return the length of the uncompressed text for a given revision"""
1189 """return the length of the uncompressed text for a given revision"""
1188 l = self.index[rev][2]
1190 l = self.index[rev][2]
1189 if l >= 0:
1191 if l >= 0:
1190 return l
1192 return l
1191
1193
1192 t = self.revision(rev, raw=True)
1194 t = self.revision(rev, raw=True)
1193 return len(t)
1195 return len(t)
1194
1196
1195 def size(self, rev):
1197 def size(self, rev):
1196 """length of non-raw text (processed by a "read" flag processor)"""
1198 """length of non-raw text (processed by a "read" flag processor)"""
1197 # fast path: if no "read" flag processor could change the content,
1199 # fast path: if no "read" flag processor could change the content,
1198 # size is rawsize. note: ELLIPSIS is known to not change the content.
1200 # size is rawsize. note: ELLIPSIS is known to not change the content.
1199 flags = self.flags(rev)
1201 flags = self.flags(rev)
1200 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1202 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1201 return self.rawsize(rev)
1203 return self.rawsize(rev)
1202
1204
1203 return len(self.revision(rev, raw=False))
1205 return len(self.revision(rev, raw=False))
1204
1206
1205 def chainbase(self, rev):
1207 def chainbase(self, rev):
1206 base = self._chainbasecache.get(rev)
1208 base = self._chainbasecache.get(rev)
1207 if base is not None:
1209 if base is not None:
1208 return base
1210 return base
1209
1211
1210 index = self.index
1212 index = self.index
1211 iterrev = rev
1213 iterrev = rev
1212 base = index[iterrev][3]
1214 base = index[iterrev][3]
1213 while base != iterrev:
1215 while base != iterrev:
1214 iterrev = base
1216 iterrev = base
1215 base = index[iterrev][3]
1217 base = index[iterrev][3]
1216
1218
1217 self._chainbasecache[rev] = base
1219 self._chainbasecache[rev] = base
1218 return base
1220 return base
1219
1221
1220 def linkrev(self, rev):
1222 def linkrev(self, rev):
1221 return self.index[rev][4]
1223 return self.index[rev][4]
1222
1224
1223 def parentrevs(self, rev):
1225 def parentrevs(self, rev):
1224 try:
1226 try:
1225 entry = self.index[rev]
1227 entry = self.index[rev]
1226 except IndexError:
1228 except IndexError:
1227 if rev == wdirrev:
1229 if rev == wdirrev:
1228 raise error.WdirUnsupported
1230 raise error.WdirUnsupported
1229 raise
1231 raise
1230
1232
1231 return entry[5], entry[6]
1233 return entry[5], entry[6]
1232
1234
1233 def node(self, rev):
1235 def node(self, rev):
1234 try:
1236 try:
1235 return self.index[rev][7]
1237 return self.index[rev][7]
1236 except IndexError:
1238 except IndexError:
1237 if rev == wdirrev:
1239 if rev == wdirrev:
1238 raise error.WdirUnsupported
1240 raise error.WdirUnsupported
1239 raise
1241 raise
1240
1242
1241 # Derived from index values.
1243 # Derived from index values.
1242
1244
1243 def end(self, rev):
1245 def end(self, rev):
1244 return self.start(rev) + self.length(rev)
1246 return self.start(rev) + self.length(rev)
1245
1247
1246 def parents(self, node):
1248 def parents(self, node):
1247 i = self.index
1249 i = self.index
1248 d = i[self.rev(node)]
1250 d = i[self.rev(node)]
1249 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1251 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1250
1252
1251 def chainlen(self, rev):
1253 def chainlen(self, rev):
1252 return self._chaininfo(rev)[0]
1254 return self._chaininfo(rev)[0]
1253
1255
1254 def _chaininfo(self, rev):
1256 def _chaininfo(self, rev):
1255 chaininfocache = self._chaininfocache
1257 chaininfocache = self._chaininfocache
1256 if rev in chaininfocache:
1258 if rev in chaininfocache:
1257 return chaininfocache[rev]
1259 return chaininfocache[rev]
1258 index = self.index
1260 index = self.index
1259 generaldelta = self._generaldelta
1261 generaldelta = self._generaldelta
1260 iterrev = rev
1262 iterrev = rev
1261 e = index[iterrev]
1263 e = index[iterrev]
1262 clen = 0
1264 clen = 0
1263 compresseddeltalen = 0
1265 compresseddeltalen = 0
1264 while iterrev != e[3]:
1266 while iterrev != e[3]:
1265 clen += 1
1267 clen += 1
1266 compresseddeltalen += e[1]
1268 compresseddeltalen += e[1]
1267 if generaldelta:
1269 if generaldelta:
1268 iterrev = e[3]
1270 iterrev = e[3]
1269 else:
1271 else:
1270 iterrev -= 1
1272 iterrev -= 1
1271 if iterrev in chaininfocache:
1273 if iterrev in chaininfocache:
1272 t = chaininfocache[iterrev]
1274 t = chaininfocache[iterrev]
1273 clen += t[0]
1275 clen += t[0]
1274 compresseddeltalen += t[1]
1276 compresseddeltalen += t[1]
1275 break
1277 break
1276 e = index[iterrev]
1278 e = index[iterrev]
1277 else:
1279 else:
1278 # Add text length of base since decompressing that also takes
1280 # Add text length of base since decompressing that also takes
1279 # work. For cache hits the length is already included.
1281 # work. For cache hits the length is already included.
1280 compresseddeltalen += e[1]
1282 compresseddeltalen += e[1]
1281 r = (clen, compresseddeltalen)
1283 r = (clen, compresseddeltalen)
1282 chaininfocache[rev] = r
1284 chaininfocache[rev] = r
1283 return r
1285 return r
1284
1286
1285 def _deltachain(self, rev, stoprev=None):
1287 def _deltachain(self, rev, stoprev=None):
1286 """Obtain the delta chain for a revision.
1288 """Obtain the delta chain for a revision.
1287
1289
1288 ``stoprev`` specifies a revision to stop at. If not specified, we
1290 ``stoprev`` specifies a revision to stop at. If not specified, we
1289 stop at the base of the chain.
1291 stop at the base of the chain.
1290
1292
1291 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1293 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1292 revs in ascending order and ``stopped`` is a bool indicating whether
1294 revs in ascending order and ``stopped`` is a bool indicating whether
1293 ``stoprev`` was hit.
1295 ``stoprev`` was hit.
1294 """
1296 """
1295 # Try C implementation.
1297 # Try C implementation.
1296 try:
1298 try:
1297 return self.index.deltachain(rev, stoprev, self._generaldelta)
1299 return self.index.deltachain(rev, stoprev, self._generaldelta)
1298 except AttributeError:
1300 except AttributeError:
1299 pass
1301 pass
1300
1302
1301 chain = []
1303 chain = []
1302
1304
1303 # Alias to prevent attribute lookup in tight loop.
1305 # Alias to prevent attribute lookup in tight loop.
1304 index = self.index
1306 index = self.index
1305 generaldelta = self._generaldelta
1307 generaldelta = self._generaldelta
1306
1308
1307 iterrev = rev
1309 iterrev = rev
1308 e = index[iterrev]
1310 e = index[iterrev]
1309 while iterrev != e[3] and iterrev != stoprev:
1311 while iterrev != e[3] and iterrev != stoprev:
1310 chain.append(iterrev)
1312 chain.append(iterrev)
1311 if generaldelta:
1313 if generaldelta:
1312 iterrev = e[3]
1314 iterrev = e[3]
1313 else:
1315 else:
1314 iterrev -= 1
1316 iterrev -= 1
1315 e = index[iterrev]
1317 e = index[iterrev]
1316
1318
1317 if iterrev == stoprev:
1319 if iterrev == stoprev:
1318 stopped = True
1320 stopped = True
1319 else:
1321 else:
1320 chain.append(iterrev)
1322 chain.append(iterrev)
1321 stopped = False
1323 stopped = False
1322
1324
1323 chain.reverse()
1325 chain.reverse()
1324 return chain, stopped
1326 return chain, stopped
1325
1327
1326 def ancestors(self, revs, stoprev=0, inclusive=False):
1328 def ancestors(self, revs, stoprev=0, inclusive=False):
1327 """Generate the ancestors of 'revs' in reverse topological order.
1329 """Generate the ancestors of 'revs' in reverse topological order.
1328 Does not generate revs lower than stoprev.
1330 Does not generate revs lower than stoprev.
1329
1331
1330 See the documentation for ancestor.lazyancestors for more details."""
1332 See the documentation for ancestor.lazyancestors for more details."""
1331
1333
1332 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1334 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1333 inclusive=inclusive)
1335 inclusive=inclusive)
1334
1336
1335 def descendants(self, revs):
1337 def descendants(self, revs):
1336 """Generate the descendants of 'revs' in revision order.
1338 """Generate the descendants of 'revs' in revision order.
1337
1339
1338 Yield a sequence of revision numbers starting with a child of
1340 Yield a sequence of revision numbers starting with a child of
1339 some rev in revs, i.e., each revision is *not* considered a
1341 some rev in revs, i.e., each revision is *not* considered a
1340 descendant of itself. Results are ordered by revision number (a
1342 descendant of itself. Results are ordered by revision number (a
1341 topological sort)."""
1343 topological sort)."""
1342 first = min(revs)
1344 first = min(revs)
1343 if first == nullrev:
1345 if first == nullrev:
1344 for i in self:
1346 for i in self:
1345 yield i
1347 yield i
1346 return
1348 return
1347
1349
1348 seen = set(revs)
1350 seen = set(revs)
1349 for i in self.revs(start=first + 1):
1351 for i in self.revs(start=first + 1):
1350 for x in self.parentrevs(i):
1352 for x in self.parentrevs(i):
1351 if x != nullrev and x in seen:
1353 if x != nullrev and x in seen:
1352 seen.add(i)
1354 seen.add(i)
1353 yield i
1355 yield i
1354 break
1356 break
1355
1357
1356 def findcommonmissing(self, common=None, heads=None):
1358 def findcommonmissing(self, common=None, heads=None):
1357 """Return a tuple of the ancestors of common and the ancestors of heads
1359 """Return a tuple of the ancestors of common and the ancestors of heads
1358 that are not ancestors of common. In revset terminology, we return the
1360 that are not ancestors of common. In revset terminology, we return the
1359 tuple:
1361 tuple:
1360
1362
1361 ::common, (::heads) - (::common)
1363 ::common, (::heads) - (::common)
1362
1364
1363 The list is sorted by revision number, meaning it is
1365 The list is sorted by revision number, meaning it is
1364 topologically sorted.
1366 topologically sorted.
1365
1367
1366 'heads' and 'common' are both lists of node IDs. If heads is
1368 'heads' and 'common' are both lists of node IDs. If heads is
1367 not supplied, uses all of the revlog's heads. If common is not
1369 not supplied, uses all of the revlog's heads. If common is not
1368 supplied, uses nullid."""
1370 supplied, uses nullid."""
1369 if common is None:
1371 if common is None:
1370 common = [nullid]
1372 common = [nullid]
1371 if heads is None:
1373 if heads is None:
1372 heads = self.heads()
1374 heads = self.heads()
1373
1375
1374 common = [self.rev(n) for n in common]
1376 common = [self.rev(n) for n in common]
1375 heads = [self.rev(n) for n in heads]
1377 heads = [self.rev(n) for n in heads]
1376
1378
1377 # we want the ancestors, but inclusive
1379 # we want the ancestors, but inclusive
1378 class lazyset(object):
1380 class lazyset(object):
1379 def __init__(self, lazyvalues):
1381 def __init__(self, lazyvalues):
1380 self.addedvalues = set()
1382 self.addedvalues = set()
1381 self.lazyvalues = lazyvalues
1383 self.lazyvalues = lazyvalues
1382
1384
1383 def __contains__(self, value):
1385 def __contains__(self, value):
1384 return value in self.addedvalues or value in self.lazyvalues
1386 return value in self.addedvalues or value in self.lazyvalues
1385
1387
1386 def __iter__(self):
1388 def __iter__(self):
1387 added = self.addedvalues
1389 added = self.addedvalues
1388 for r in added:
1390 for r in added:
1389 yield r
1391 yield r
1390 for r in self.lazyvalues:
1392 for r in self.lazyvalues:
1391 if not r in added:
1393 if not r in added:
1392 yield r
1394 yield r
1393
1395
1394 def add(self, value):
1396 def add(self, value):
1395 self.addedvalues.add(value)
1397 self.addedvalues.add(value)
1396
1398
1397 def update(self, values):
1399 def update(self, values):
1398 self.addedvalues.update(values)
1400 self.addedvalues.update(values)
1399
1401
1400 has = lazyset(self.ancestors(common))
1402 has = lazyset(self.ancestors(common))
1401 has.add(nullrev)
1403 has.add(nullrev)
1402 has.update(common)
1404 has.update(common)
1403
1405
1404 # take all ancestors from heads that aren't in has
1406 # take all ancestors from heads that aren't in has
1405 missing = set()
1407 missing = set()
1406 visit = collections.deque(r for r in heads if r not in has)
1408 visit = collections.deque(r for r in heads if r not in has)
1407 while visit:
1409 while visit:
1408 r = visit.popleft()
1410 r = visit.popleft()
1409 if r in missing:
1411 if r in missing:
1410 continue
1412 continue
1411 else:
1413 else:
1412 missing.add(r)
1414 missing.add(r)
1413 for p in self.parentrevs(r):
1415 for p in self.parentrevs(r):
1414 if p not in has:
1416 if p not in has:
1415 visit.append(p)
1417 visit.append(p)
1416 missing = list(missing)
1418 missing = list(missing)
1417 missing.sort()
1419 missing.sort()
1418 return has, [self.node(miss) for miss in missing]
1420 return has, [self.node(miss) for miss in missing]
1419
1421
1420 def incrementalmissingrevs(self, common=None):
1422 def incrementalmissingrevs(self, common=None):
1421 """Return an object that can be used to incrementally compute the
1423 """Return an object that can be used to incrementally compute the
1422 revision numbers of the ancestors of arbitrary sets that are not
1424 revision numbers of the ancestors of arbitrary sets that are not
1423 ancestors of common. This is an ancestor.incrementalmissingancestors
1425 ancestors of common. This is an ancestor.incrementalmissingancestors
1424 object.
1426 object.
1425
1427
1426 'common' is a list of revision numbers. If common is not supplied, uses
1428 'common' is a list of revision numbers. If common is not supplied, uses
1427 nullrev.
1429 nullrev.
1428 """
1430 """
1429 if common is None:
1431 if common is None:
1430 common = [nullrev]
1432 common = [nullrev]
1431
1433
1432 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1434 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1433
1435
1434 def findmissingrevs(self, common=None, heads=None):
1436 def findmissingrevs(self, common=None, heads=None):
1435 """Return the revision numbers of the ancestors of heads that
1437 """Return the revision numbers of the ancestors of heads that
1436 are not ancestors of common.
1438 are not ancestors of common.
1437
1439
1438 More specifically, return a list of revision numbers corresponding to
1440 More specifically, return a list of revision numbers corresponding to
1439 nodes N such that every N satisfies the following constraints:
1441 nodes N such that every N satisfies the following constraints:
1440
1442
1441 1. N is an ancestor of some node in 'heads'
1443 1. N is an ancestor of some node in 'heads'
1442 2. N is not an ancestor of any node in 'common'
1444 2. N is not an ancestor of any node in 'common'
1443
1445
1444 The list is sorted by revision number, meaning it is
1446 The list is sorted by revision number, meaning it is
1445 topologically sorted.
1447 topologically sorted.
1446
1448
1447 'heads' and 'common' are both lists of revision numbers. If heads is
1449 'heads' and 'common' are both lists of revision numbers. If heads is
1448 not supplied, uses all of the revlog's heads. If common is not
1450 not supplied, uses all of the revlog's heads. If common is not
1449 supplied, uses nullid."""
1451 supplied, uses nullid."""
1450 if common is None:
1452 if common is None:
1451 common = [nullrev]
1453 common = [nullrev]
1452 if heads is None:
1454 if heads is None:
1453 heads = self.headrevs()
1455 heads = self.headrevs()
1454
1456
1455 inc = self.incrementalmissingrevs(common=common)
1457 inc = self.incrementalmissingrevs(common=common)
1456 return inc.missingancestors(heads)
1458 return inc.missingancestors(heads)
1457
1459
1458 def findmissing(self, common=None, heads=None):
1460 def findmissing(self, common=None, heads=None):
1459 """Return the ancestors of heads that are not ancestors of common.
1461 """Return the ancestors of heads that are not ancestors of common.
1460
1462
1461 More specifically, return a list of nodes N such that every N
1463 More specifically, return a list of nodes N such that every N
1462 satisfies the following constraints:
1464 satisfies the following constraints:
1463
1465
1464 1. N is an ancestor of some node in 'heads'
1466 1. N is an ancestor of some node in 'heads'
1465 2. N is not an ancestor of any node in 'common'
1467 2. N is not an ancestor of any node in 'common'
1466
1468
1467 The list is sorted by revision number, meaning it is
1469 The list is sorted by revision number, meaning it is
1468 topologically sorted.
1470 topologically sorted.
1469
1471
1470 'heads' and 'common' are both lists of node IDs. If heads is
1472 'heads' and 'common' are both lists of node IDs. If heads is
1471 not supplied, uses all of the revlog's heads. If common is not
1473 not supplied, uses all of the revlog's heads. If common is not
1472 supplied, uses nullid."""
1474 supplied, uses nullid."""
1473 if common is None:
1475 if common is None:
1474 common = [nullid]
1476 common = [nullid]
1475 if heads is None:
1477 if heads is None:
1476 heads = self.heads()
1478 heads = self.heads()
1477
1479
1478 common = [self.rev(n) for n in common]
1480 common = [self.rev(n) for n in common]
1479 heads = [self.rev(n) for n in heads]
1481 heads = [self.rev(n) for n in heads]
1480
1482
1481 inc = self.incrementalmissingrevs(common=common)
1483 inc = self.incrementalmissingrevs(common=common)
1482 return [self.node(r) for r in inc.missingancestors(heads)]
1484 return [self.node(r) for r in inc.missingancestors(heads)]
1483
1485
1484 def nodesbetween(self, roots=None, heads=None):
1486 def nodesbetween(self, roots=None, heads=None):
1485 """Return a topological path from 'roots' to 'heads'.
1487 """Return a topological path from 'roots' to 'heads'.
1486
1488
1487 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1489 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1488 topologically sorted list of all nodes N that satisfy both of
1490 topologically sorted list of all nodes N that satisfy both of
1489 these constraints:
1491 these constraints:
1490
1492
1491 1. N is a descendant of some node in 'roots'
1493 1. N is a descendant of some node in 'roots'
1492 2. N is an ancestor of some node in 'heads'
1494 2. N is an ancestor of some node in 'heads'
1493
1495
1494 Every node is considered to be both a descendant and an ancestor
1496 Every node is considered to be both a descendant and an ancestor
1495 of itself, so every reachable node in 'roots' and 'heads' will be
1497 of itself, so every reachable node in 'roots' and 'heads' will be
1496 included in 'nodes'.
1498 included in 'nodes'.
1497
1499
1498 'outroots' is the list of reachable nodes in 'roots', i.e., the
1500 'outroots' is the list of reachable nodes in 'roots', i.e., the
1499 subset of 'roots' that is returned in 'nodes'. Likewise,
1501 subset of 'roots' that is returned in 'nodes'. Likewise,
1500 'outheads' is the subset of 'heads' that is also in 'nodes'.
1502 'outheads' is the subset of 'heads' that is also in 'nodes'.
1501
1503
1502 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1504 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1503 unspecified, uses nullid as the only root. If 'heads' is
1505 unspecified, uses nullid as the only root. If 'heads' is
1504 unspecified, uses list of all of the revlog's heads."""
1506 unspecified, uses list of all of the revlog's heads."""
1505 nonodes = ([], [], [])
1507 nonodes = ([], [], [])
1506 if roots is not None:
1508 if roots is not None:
1507 roots = list(roots)
1509 roots = list(roots)
1508 if not roots:
1510 if not roots:
1509 return nonodes
1511 return nonodes
1510 lowestrev = min([self.rev(n) for n in roots])
1512 lowestrev = min([self.rev(n) for n in roots])
1511 else:
1513 else:
1512 roots = [nullid] # Everybody's a descendant of nullid
1514 roots = [nullid] # Everybody's a descendant of nullid
1513 lowestrev = nullrev
1515 lowestrev = nullrev
1514 if (lowestrev == nullrev) and (heads is None):
1516 if (lowestrev == nullrev) and (heads is None):
1515 # We want _all_ the nodes!
1517 # We want _all_ the nodes!
1516 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1518 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1517 if heads is None:
1519 if heads is None:
1518 # All nodes are ancestors, so the latest ancestor is the last
1520 # All nodes are ancestors, so the latest ancestor is the last
1519 # node.
1521 # node.
1520 highestrev = len(self) - 1
1522 highestrev = len(self) - 1
1521 # Set ancestors to None to signal that every node is an ancestor.
1523 # Set ancestors to None to signal that every node is an ancestor.
1522 ancestors = None
1524 ancestors = None
1523 # Set heads to an empty dictionary for later discovery of heads
1525 # Set heads to an empty dictionary for later discovery of heads
1524 heads = {}
1526 heads = {}
1525 else:
1527 else:
1526 heads = list(heads)
1528 heads = list(heads)
1527 if not heads:
1529 if not heads:
1528 return nonodes
1530 return nonodes
1529 ancestors = set()
1531 ancestors = set()
1530 # Turn heads into a dictionary so we can remove 'fake' heads.
1532 # Turn heads into a dictionary so we can remove 'fake' heads.
1531 # Also, later we will be using it to filter out the heads we can't
1533 # Also, later we will be using it to filter out the heads we can't
1532 # find from roots.
1534 # find from roots.
1533 heads = dict.fromkeys(heads, False)
1535 heads = dict.fromkeys(heads, False)
1534 # Start at the top and keep marking parents until we're done.
1536 # Start at the top and keep marking parents until we're done.
1535 nodestotag = set(heads)
1537 nodestotag = set(heads)
1536 # Remember where the top was so we can use it as a limit later.
1538 # Remember where the top was so we can use it as a limit later.
1537 highestrev = max([self.rev(n) for n in nodestotag])
1539 highestrev = max([self.rev(n) for n in nodestotag])
1538 while nodestotag:
1540 while nodestotag:
1539 # grab a node to tag
1541 # grab a node to tag
1540 n = nodestotag.pop()
1542 n = nodestotag.pop()
1541 # Never tag nullid
1543 # Never tag nullid
1542 if n == nullid:
1544 if n == nullid:
1543 continue
1545 continue
1544 # A node's revision number represents its place in a
1546 # A node's revision number represents its place in a
1545 # topologically sorted list of nodes.
1547 # topologically sorted list of nodes.
1546 r = self.rev(n)
1548 r = self.rev(n)
1547 if r >= lowestrev:
1549 if r >= lowestrev:
1548 if n not in ancestors:
1550 if n not in ancestors:
1549 # If we are possibly a descendant of one of the roots
1551 # If we are possibly a descendant of one of the roots
1550 # and we haven't already been marked as an ancestor
1552 # and we haven't already been marked as an ancestor
1551 ancestors.add(n) # Mark as ancestor
1553 ancestors.add(n) # Mark as ancestor
1552 # Add non-nullid parents to list of nodes to tag.
1554 # Add non-nullid parents to list of nodes to tag.
1553 nodestotag.update([p for p in self.parents(n) if
1555 nodestotag.update([p for p in self.parents(n) if
1554 p != nullid])
1556 p != nullid])
1555 elif n in heads: # We've seen it before, is it a fake head?
1557 elif n in heads: # We've seen it before, is it a fake head?
1556 # So it is, real heads should not be the ancestors of
1558 # So it is, real heads should not be the ancestors of
1557 # any other heads.
1559 # any other heads.
1558 heads.pop(n)
1560 heads.pop(n)
1559 if not ancestors:
1561 if not ancestors:
1560 return nonodes
1562 return nonodes
1561 # Now that we have our set of ancestors, we want to remove any
1563 # Now that we have our set of ancestors, we want to remove any
1562 # roots that are not ancestors.
1564 # roots that are not ancestors.
1563
1565
1564 # If one of the roots was nullid, everything is included anyway.
1566 # If one of the roots was nullid, everything is included anyway.
1565 if lowestrev > nullrev:
1567 if lowestrev > nullrev:
1566 # But, since we weren't, let's recompute the lowest rev to not
1568 # But, since we weren't, let's recompute the lowest rev to not
1567 # include roots that aren't ancestors.
1569 # include roots that aren't ancestors.
1568
1570
1569 # Filter out roots that aren't ancestors of heads
1571 # Filter out roots that aren't ancestors of heads
1570 roots = [root for root in roots if root in ancestors]
1572 roots = [root for root in roots if root in ancestors]
1571 # Recompute the lowest revision
1573 # Recompute the lowest revision
1572 if roots:
1574 if roots:
1573 lowestrev = min([self.rev(root) for root in roots])
1575 lowestrev = min([self.rev(root) for root in roots])
1574 else:
1576 else:
1575 # No more roots? Return empty list
1577 # No more roots? Return empty list
1576 return nonodes
1578 return nonodes
1577 else:
1579 else:
1578 # We are descending from nullid, and don't need to care about
1580 # We are descending from nullid, and don't need to care about
1579 # any other roots.
1581 # any other roots.
1580 lowestrev = nullrev
1582 lowestrev = nullrev
1581 roots = [nullid]
1583 roots = [nullid]
1582 # Transform our roots list into a set.
1584 # Transform our roots list into a set.
1583 descendants = set(roots)
1585 descendants = set(roots)
1584 # Also, keep the original roots so we can filter out roots that aren't
1586 # Also, keep the original roots so we can filter out roots that aren't
1585 # 'real' roots (i.e. are descended from other roots).
1587 # 'real' roots (i.e. are descended from other roots).
1586 roots = descendants.copy()
1588 roots = descendants.copy()
1587 # Our topologically sorted list of output nodes.
1589 # Our topologically sorted list of output nodes.
1588 orderedout = []
1590 orderedout = []
1589 # Don't start at nullid since we don't want nullid in our output list,
1591 # Don't start at nullid since we don't want nullid in our output list,
1590 # and if nullid shows up in descendants, empty parents will look like
1592 # and if nullid shows up in descendants, empty parents will look like
1591 # they're descendants.
1593 # they're descendants.
1592 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1593 n = self.node(r)
1595 n = self.node(r)
1594 isdescendant = False
1596 isdescendant = False
1595 if lowestrev == nullrev: # Everybody is a descendant of nullid
1597 if lowestrev == nullrev: # Everybody is a descendant of nullid
1596 isdescendant = True
1598 isdescendant = True
1597 elif n in descendants:
1599 elif n in descendants:
1598 # n is already a descendant
1600 # n is already a descendant
1599 isdescendant = True
1601 isdescendant = True
1600 # This check only needs to be done here because all the roots
1602 # This check only needs to be done here because all the roots
1601 # will start being marked is descendants before the loop.
1603 # will start being marked is descendants before the loop.
1602 if n in roots:
1604 if n in roots:
1603 # If n was a root, check if it's a 'real' root.
1605 # If n was a root, check if it's a 'real' root.
1604 p = tuple(self.parents(n))
1606 p = tuple(self.parents(n))
1605 # If any of its parents are descendants, it's not a root.
1607 # If any of its parents are descendants, it's not a root.
1606 if (p[0] in descendants) or (p[1] in descendants):
1608 if (p[0] in descendants) or (p[1] in descendants):
1607 roots.remove(n)
1609 roots.remove(n)
1608 else:
1610 else:
1609 p = tuple(self.parents(n))
1611 p = tuple(self.parents(n))
1610 # A node is a descendant if either of its parents are
1612 # A node is a descendant if either of its parents are
1611 # descendants. (We seeded the dependents list with the roots
1613 # descendants. (We seeded the dependents list with the roots
1612 # up there, remember?)
1614 # up there, remember?)
1613 if (p[0] in descendants) or (p[1] in descendants):
1615 if (p[0] in descendants) or (p[1] in descendants):
1614 descendants.add(n)
1616 descendants.add(n)
1615 isdescendant = True
1617 isdescendant = True
1616 if isdescendant and ((ancestors is None) or (n in ancestors)):
1618 if isdescendant and ((ancestors is None) or (n in ancestors)):
1617 # Only include nodes that are both descendants and ancestors.
1619 # Only include nodes that are both descendants and ancestors.
1618 orderedout.append(n)
1620 orderedout.append(n)
1619 if (ancestors is not None) and (n in heads):
1621 if (ancestors is not None) and (n in heads):
1620 # We're trying to figure out which heads are reachable
1622 # We're trying to figure out which heads are reachable
1621 # from roots.
1623 # from roots.
1622 # Mark this head as having been reached
1624 # Mark this head as having been reached
1623 heads[n] = True
1625 heads[n] = True
1624 elif ancestors is None:
1626 elif ancestors is None:
1625 # Otherwise, we're trying to discover the heads.
1627 # Otherwise, we're trying to discover the heads.
1626 # Assume this is a head because if it isn't, the next step
1628 # Assume this is a head because if it isn't, the next step
1627 # will eventually remove it.
1629 # will eventually remove it.
1628 heads[n] = True
1630 heads[n] = True
1629 # But, obviously its parents aren't.
1631 # But, obviously its parents aren't.
1630 for p in self.parents(n):
1632 for p in self.parents(n):
1631 heads.pop(p, None)
1633 heads.pop(p, None)
1632 heads = [head for head, flag in heads.iteritems() if flag]
1634 heads = [head for head, flag in heads.iteritems() if flag]
1633 roots = list(roots)
1635 roots = list(roots)
1634 assert orderedout
1636 assert orderedout
1635 assert roots
1637 assert roots
1636 assert heads
1638 assert heads
1637 return (orderedout, roots, heads)
1639 return (orderedout, roots, heads)
1638
1640
1639 def headrevs(self):
1641 def headrevs(self):
1640 try:
1642 try:
1641 return self.index.headrevs()
1643 return self.index.headrevs()
1642 except AttributeError:
1644 except AttributeError:
1643 return self._headrevs()
1645 return self._headrevs()
1644
1646
1645 def computephases(self, roots):
1647 def computephases(self, roots):
1646 return self.index.computephasesmapsets(roots)
1648 return self.index.computephasesmapsets(roots)
1647
1649
1648 def _headrevs(self):
1650 def _headrevs(self):
1649 count = len(self)
1651 count = len(self)
1650 if not count:
1652 if not count:
1651 return [nullrev]
1653 return [nullrev]
1652 # we won't iter over filtered rev so nobody is a head at start
1654 # we won't iter over filtered rev so nobody is a head at start
1653 ishead = [0] * (count + 1)
1655 ishead = [0] * (count + 1)
1654 index = self.index
1656 index = self.index
1655 for r in self:
1657 for r in self:
1656 ishead[r] = 1 # I may be an head
1658 ishead[r] = 1 # I may be an head
1657 e = index[r]
1659 e = index[r]
1658 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1660 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1659 return [r for r, val in enumerate(ishead) if val]
1661 return [r for r, val in enumerate(ishead) if val]
1660
1662
1661 def heads(self, start=None, stop=None):
1663 def heads(self, start=None, stop=None):
1662 """return the list of all nodes that have no children
1664 """return the list of all nodes that have no children
1663
1665
1664 if start is specified, only heads that are descendants of
1666 if start is specified, only heads that are descendants of
1665 start will be returned
1667 start will be returned
1666 if stop is specified, it will consider all the revs from stop
1668 if stop is specified, it will consider all the revs from stop
1667 as if they had no children
1669 as if they had no children
1668 """
1670 """
1669 if start is None and stop is None:
1671 if start is None and stop is None:
1670 if not len(self):
1672 if not len(self):
1671 return [nullid]
1673 return [nullid]
1672 return [self.node(r) for r in self.headrevs()]
1674 return [self.node(r) for r in self.headrevs()]
1673
1675
1674 if start is None:
1676 if start is None:
1675 start = nullid
1677 start = nullid
1676 if stop is None:
1678 if stop is None:
1677 stop = []
1679 stop = []
1678 stoprevs = set([self.rev(n) for n in stop])
1680 stoprevs = set([self.rev(n) for n in stop])
1679 startrev = self.rev(start)
1681 startrev = self.rev(start)
1680 reachable = {startrev}
1682 reachable = {startrev}
1681 heads = {startrev}
1683 heads = {startrev}
1682
1684
1683 parentrevs = self.parentrevs
1685 parentrevs = self.parentrevs
1684 for r in self.revs(start=startrev + 1):
1686 for r in self.revs(start=startrev + 1):
1685 for p in parentrevs(r):
1687 for p in parentrevs(r):
1686 if p in reachable:
1688 if p in reachable:
1687 if r not in stoprevs:
1689 if r not in stoprevs:
1688 reachable.add(r)
1690 reachable.add(r)
1689 heads.add(r)
1691 heads.add(r)
1690 if p in heads and p not in stoprevs:
1692 if p in heads and p not in stoprevs:
1691 heads.remove(p)
1693 heads.remove(p)
1692
1694
1693 return [self.node(r) for r in heads]
1695 return [self.node(r) for r in heads]
1694
1696
1695 def children(self, node):
1697 def children(self, node):
1696 """find the children of a given node"""
1698 """find the children of a given node"""
1697 c = []
1699 c = []
1698 p = self.rev(node)
1700 p = self.rev(node)
1699 for r in self.revs(start=p + 1):
1701 for r in self.revs(start=p + 1):
1700 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1702 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1701 if prevs:
1703 if prevs:
1702 for pr in prevs:
1704 for pr in prevs:
1703 if pr == p:
1705 if pr == p:
1704 c.append(self.node(r))
1706 c.append(self.node(r))
1705 elif p == nullrev:
1707 elif p == nullrev:
1706 c.append(self.node(r))
1708 c.append(self.node(r))
1707 return c
1709 return c
1708
1710
1709 def commonancestorsheads(self, a, b):
1711 def commonancestorsheads(self, a, b):
1710 """calculate all the heads of the common ancestors of nodes a and b"""
1712 """calculate all the heads of the common ancestors of nodes a and b"""
1711 a, b = self.rev(a), self.rev(b)
1713 a, b = self.rev(a), self.rev(b)
1712 ancs = self._commonancestorsheads(a, b)
1714 ancs = self._commonancestorsheads(a, b)
1713 return pycompat.maplist(self.node, ancs)
1715 return pycompat.maplist(self.node, ancs)
1714
1716
1715 def _commonancestorsheads(self, *revs):
1717 def _commonancestorsheads(self, *revs):
1716 """calculate all the heads of the common ancestors of revs"""
1718 """calculate all the heads of the common ancestors of revs"""
1717 try:
1719 try:
1718 ancs = self.index.commonancestorsheads(*revs)
1720 ancs = self.index.commonancestorsheads(*revs)
1719 except (AttributeError, OverflowError): # C implementation failed
1721 except (AttributeError, OverflowError): # C implementation failed
1720 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1722 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1721 return ancs
1723 return ancs
1722
1724
1723 def isancestor(self, a, b):
1725 def isancestor(self, a, b):
1724 """return True if node a is an ancestor of node b
1726 """return True if node a is an ancestor of node b
1725
1727
1726 A revision is considered an ancestor of itself."""
1728 A revision is considered an ancestor of itself."""
1727 a, b = self.rev(a), self.rev(b)
1729 a, b = self.rev(a), self.rev(b)
1728 return self.isancestorrev(a, b)
1730 return self.isancestorrev(a, b)
1729
1731
1730 def isancestorrev(self, a, b):
1732 def isancestorrev(self, a, b):
1731 """return True if revision a is an ancestor of revision b
1733 """return True if revision a is an ancestor of revision b
1732
1734
1733 A revision is considered an ancestor of itself.
1735 A revision is considered an ancestor of itself.
1734
1736
1735 The implementation of this is trivial but the use of
1737 The implementation of this is trivial but the use of
1736 commonancestorsheads is not."""
1738 commonancestorsheads is not."""
1737 if a == nullrev:
1739 if a == nullrev:
1738 return True
1740 return True
1739 elif a == b:
1741 elif a == b:
1740 return True
1742 return True
1741 elif a > b:
1743 elif a > b:
1742 return False
1744 return False
1743 return a in self._commonancestorsheads(a, b)
1745 return a in self._commonancestorsheads(a, b)
1744
1746
1745 def ancestor(self, a, b):
1747 def ancestor(self, a, b):
1746 """calculate the "best" common ancestor of nodes a and b"""
1748 """calculate the "best" common ancestor of nodes a and b"""
1747
1749
1748 a, b = self.rev(a), self.rev(b)
1750 a, b = self.rev(a), self.rev(b)
1749 try:
1751 try:
1750 ancs = self.index.ancestors(a, b)
1752 ancs = self.index.ancestors(a, b)
1751 except (AttributeError, OverflowError):
1753 except (AttributeError, OverflowError):
1752 ancs = ancestor.ancestors(self.parentrevs, a, b)
1754 ancs = ancestor.ancestors(self.parentrevs, a, b)
1753 if ancs:
1755 if ancs:
1754 # choose a consistent winner when there's a tie
1756 # choose a consistent winner when there's a tie
1755 return min(map(self.node, ancs))
1757 return min(map(self.node, ancs))
1756 return nullid
1758 return nullid
1757
1759
1758 def _match(self, id):
1760 def _match(self, id):
1759 if isinstance(id, int):
1761 if isinstance(id, int):
1760 # rev
1762 # rev
1761 return self.node(id)
1763 return self.node(id)
1762 if len(id) == 20:
1764 if len(id) == 20:
1763 # possibly a binary node
1765 # possibly a binary node
1764 # odds of a binary node being all hex in ASCII are 1 in 10**25
1766 # odds of a binary node being all hex in ASCII are 1 in 10**25
1765 try:
1767 try:
1766 node = id
1768 node = id
1767 self.rev(node) # quick search the index
1769 self.rev(node) # quick search the index
1768 return node
1770 return node
1769 except LookupError:
1771 except LookupError:
1770 pass # may be partial hex id
1772 pass # may be partial hex id
1771 try:
1773 try:
1772 # str(rev)
1774 # str(rev)
1773 rev = int(id)
1775 rev = int(id)
1774 if "%d" % rev != id:
1776 if "%d" % rev != id:
1775 raise ValueError
1777 raise ValueError
1776 if rev < 0:
1778 if rev < 0:
1777 rev = len(self) + rev
1779 rev = len(self) + rev
1778 if rev < 0 or rev >= len(self):
1780 if rev < 0 or rev >= len(self):
1779 raise ValueError
1781 raise ValueError
1780 return self.node(rev)
1782 return self.node(rev)
1781 except (ValueError, OverflowError):
1783 except (ValueError, OverflowError):
1782 pass
1784 pass
1783 if len(id) == 40:
1785 if len(id) == 40:
1784 try:
1786 try:
1785 # a full hex nodeid?
1787 # a full hex nodeid?
1786 node = bin(id)
1788 node = bin(id)
1787 self.rev(node)
1789 self.rev(node)
1788 return node
1790 return node
1789 except (TypeError, LookupError):
1791 except (TypeError, LookupError):
1790 pass
1792 pass
1791
1793
1792 def _partialmatch(self, id):
1794 def _partialmatch(self, id):
1793 # we don't care wdirfilenodeids as they should be always full hash
1795 # we don't care wdirfilenodeids as they should be always full hash
1794 maybewdir = wdirhex.startswith(id)
1796 maybewdir = wdirhex.startswith(id)
1795 try:
1797 try:
1796 partial = self.index.partialmatch(id)
1798 partial = self.index.partialmatch(id)
1797 if partial and self.hasnode(partial):
1799 if partial and self.hasnode(partial):
1798 if maybewdir:
1800 if maybewdir:
1799 # single 'ff...' match in radix tree, ambiguous with wdir
1801 # single 'ff...' match in radix tree, ambiguous with wdir
1800 raise RevlogError
1802 raise RevlogError
1801 return partial
1803 return partial
1802 if maybewdir:
1804 if maybewdir:
1803 # no 'ff...' match in radix tree, wdir identified
1805 # no 'ff...' match in radix tree, wdir identified
1804 raise error.WdirUnsupported
1806 raise error.WdirUnsupported
1805 return None
1807 return None
1806 except RevlogError:
1808 except RevlogError:
1807 # parsers.c radix tree lookup gave multiple matches
1809 # parsers.c radix tree lookup gave multiple matches
1808 # fast path: for unfiltered changelog, radix tree is accurate
1810 # fast path: for unfiltered changelog, radix tree is accurate
1809 if not getattr(self, 'filteredrevs', None):
1811 if not getattr(self, 'filteredrevs', None):
1810 raise AmbiguousPrefixLookupError(id, self.indexfile,
1812 raise AmbiguousPrefixLookupError(id, self.indexfile,
1811 _('ambiguous identifier'))
1813 _('ambiguous identifier'))
1812 # fall through to slow path that filters hidden revisions
1814 # fall through to slow path that filters hidden revisions
1813 except (AttributeError, ValueError):
1815 except (AttributeError, ValueError):
1814 # we are pure python, or key was too short to search radix tree
1816 # we are pure python, or key was too short to search radix tree
1815 pass
1817 pass
1816
1818
1817 if id in self._pcache:
1819 if id in self._pcache:
1818 return self._pcache[id]
1820 return self._pcache[id]
1819
1821
1820 if len(id) <= 40:
1822 if len(id) <= 40:
1821 try:
1823 try:
1822 # hex(node)[:...]
1824 # hex(node)[:...]
1823 l = len(id) // 2 # grab an even number of digits
1825 l = len(id) // 2 # grab an even number of digits
1824 prefix = bin(id[:l * 2])
1826 prefix = bin(id[:l * 2])
1825 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1827 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1826 nl = [n for n in nl if hex(n).startswith(id) and
1828 nl = [n for n in nl if hex(n).startswith(id) and
1827 self.hasnode(n)]
1829 self.hasnode(n)]
1828 if len(nl) > 0:
1830 if len(nl) > 0:
1829 if len(nl) == 1 and not maybewdir:
1831 if len(nl) == 1 and not maybewdir:
1830 self._pcache[id] = nl[0]
1832 self._pcache[id] = nl[0]
1831 return nl[0]
1833 return nl[0]
1832 raise AmbiguousPrefixLookupError(id, self.indexfile,
1834 raise AmbiguousPrefixLookupError(id, self.indexfile,
1833 _('ambiguous identifier'))
1835 _('ambiguous identifier'))
1834 if maybewdir:
1836 if maybewdir:
1835 raise error.WdirUnsupported
1837 raise error.WdirUnsupported
1836 return None
1838 return None
1837 except TypeError:
1839 except TypeError:
1838 pass
1840 pass
1839
1841
1840 def lookup(self, id):
1842 def lookup(self, id):
1841 """locate a node based on:
1843 """locate a node based on:
1842 - revision number or str(revision number)
1844 - revision number or str(revision number)
1843 - nodeid or subset of hex nodeid
1845 - nodeid or subset of hex nodeid
1844 """
1846 """
1845 n = self._match(id)
1847 n = self._match(id)
1846 if n is not None:
1848 if n is not None:
1847 return n
1849 return n
1848 n = self._partialmatch(id)
1850 n = self._partialmatch(id)
1849 if n:
1851 if n:
1850 return n
1852 return n
1851
1853
1852 raise LookupError(id, self.indexfile, _('no match found'))
1854 raise LookupError(id, self.indexfile, _('no match found'))
1853
1855
1854 def shortest(self, node, minlength=1):
1856 def shortest(self, node, minlength=1):
1855 """Find the shortest unambiguous prefix that matches node."""
1857 """Find the shortest unambiguous prefix that matches node."""
1856 def isvalid(prefix):
1858 def isvalid(prefix):
1857 try:
1859 try:
1858 node = self._partialmatch(prefix)
1860 node = self._partialmatch(prefix)
1859 except error.RevlogError:
1861 except error.RevlogError:
1860 return False
1862 return False
1861 except error.WdirUnsupported:
1863 except error.WdirUnsupported:
1862 # single 'ff...' match
1864 # single 'ff...' match
1863 return True
1865 return True
1864 if node is None:
1866 if node is None:
1865 raise LookupError(node, self.indexfile, _('no node'))
1867 raise LookupError(node, self.indexfile, _('no node'))
1866 return True
1868 return True
1867
1869
1868 def maybewdir(prefix):
1870 def maybewdir(prefix):
1869 return all(c == 'f' for c in prefix)
1871 return all(c == 'f' for c in prefix)
1870
1872
1871 hexnode = hex(node)
1873 hexnode = hex(node)
1872
1874
1873 def disambiguate(hexnode, minlength):
1875 def disambiguate(hexnode, minlength):
1874 """Disambiguate against wdirid."""
1876 """Disambiguate against wdirid."""
1875 for length in range(minlength, 41):
1877 for length in range(minlength, 41):
1876 prefix = hexnode[:length]
1878 prefix = hexnode[:length]
1877 if not maybewdir(prefix):
1879 if not maybewdir(prefix):
1878 return prefix
1880 return prefix
1879
1881
1880 if not getattr(self, 'filteredrevs', None):
1882 if not getattr(self, 'filteredrevs', None):
1881 try:
1883 try:
1882 length = max(self.index.shortest(node), minlength)
1884 length = max(self.index.shortest(node), minlength)
1883 return disambiguate(hexnode, length)
1885 return disambiguate(hexnode, length)
1884 except RevlogError:
1886 except RevlogError:
1885 if node != wdirid:
1887 if node != wdirid:
1886 raise LookupError(node, self.indexfile, _('no node'))
1888 raise LookupError(node, self.indexfile, _('no node'))
1887 except AttributeError:
1889 except AttributeError:
1888 # Fall through to pure code
1890 # Fall through to pure code
1889 pass
1891 pass
1890
1892
1891 if node == wdirid:
1893 if node == wdirid:
1892 for length in range(minlength, 41):
1894 for length in range(minlength, 41):
1893 prefix = hexnode[:length]
1895 prefix = hexnode[:length]
1894 if isvalid(prefix):
1896 if isvalid(prefix):
1895 return prefix
1897 return prefix
1896
1898
1897 for length in range(minlength, 41):
1899 for length in range(minlength, 41):
1898 prefix = hexnode[:length]
1900 prefix = hexnode[:length]
1899 if isvalid(prefix):
1901 if isvalid(prefix):
1900 return disambiguate(hexnode, length)
1902 return disambiguate(hexnode, length)
1901
1903
1902 def cmp(self, node, text):
1904 def cmp(self, node, text):
1903 """compare text with a given file revision
1905 """compare text with a given file revision
1904
1906
1905 returns True if text is different than what is stored.
1907 returns True if text is different than what is stored.
1906 """
1908 """
1907 p1, p2 = self.parents(node)
1909 p1, p2 = self.parents(node)
1908 return hash(text, p1, p2) != node
1910 return hash(text, p1, p2) != node
1909
1911
1910 def _cachesegment(self, offset, data):
1912 def _cachesegment(self, offset, data):
1911 """Add a segment to the revlog cache.
1913 """Add a segment to the revlog cache.
1912
1914
1913 Accepts an absolute offset and the data that is at that location.
1915 Accepts an absolute offset and the data that is at that location.
1914 """
1916 """
1915 o, d = self._chunkcache
1917 o, d = self._chunkcache
1916 # try to add to existing cache
1918 # try to add to existing cache
1917 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1919 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1918 self._chunkcache = o, d + data
1920 self._chunkcache = o, d + data
1919 else:
1921 else:
1920 self._chunkcache = offset, data
1922 self._chunkcache = offset, data
1921
1923
1922 def _readsegment(self, offset, length, df=None):
1924 def _readsegment(self, offset, length, df=None):
1923 """Load a segment of raw data from the revlog.
1925 """Load a segment of raw data from the revlog.
1924
1926
1925 Accepts an absolute offset, length to read, and an optional existing
1927 Accepts an absolute offset, length to read, and an optional existing
1926 file handle to read from.
1928 file handle to read from.
1927
1929
1928 If an existing file handle is passed, it will be seeked and the
1930 If an existing file handle is passed, it will be seeked and the
1929 original seek position will NOT be restored.
1931 original seek position will NOT be restored.
1930
1932
1931 Returns a str or buffer of raw byte data.
1933 Returns a str or buffer of raw byte data.
1932 """
1934 """
1933 # Cache data both forward and backward around the requested
1935 # Cache data both forward and backward around the requested
1934 # data, in a fixed size window. This helps speed up operations
1936 # data, in a fixed size window. This helps speed up operations
1935 # involving reading the revlog backwards.
1937 # involving reading the revlog backwards.
1936 cachesize = self._chunkcachesize
1938 cachesize = self._chunkcachesize
1937 realoffset = offset & ~(cachesize - 1)
1939 realoffset = offset & ~(cachesize - 1)
1938 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1940 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1939 - realoffset)
1941 - realoffset)
1940 with self._datareadfp(df) as df:
1942 with self._datareadfp(df) as df:
1941 df.seek(realoffset)
1943 df.seek(realoffset)
1942 d = df.read(reallength)
1944 d = df.read(reallength)
1943 self._cachesegment(realoffset, d)
1945 self._cachesegment(realoffset, d)
1944 if offset != realoffset or reallength != length:
1946 if offset != realoffset or reallength != length:
1945 return util.buffer(d, offset - realoffset, length)
1947 return util.buffer(d, offset - realoffset, length)
1946 return d
1948 return d
1947
1949
1948 def _getsegment(self, offset, length, df=None):
1950 def _getsegment(self, offset, length, df=None):
1949 """Obtain a segment of raw data from the revlog.
1951 """Obtain a segment of raw data from the revlog.
1950
1952
1951 Accepts an absolute offset, length of bytes to obtain, and an
1953 Accepts an absolute offset, length of bytes to obtain, and an
1952 optional file handle to the already-opened revlog. If the file
1954 optional file handle to the already-opened revlog. If the file
1953 handle is used, it's original seek position will not be preserved.
1955 handle is used, it's original seek position will not be preserved.
1954
1956
1955 Requests for data may be returned from a cache.
1957 Requests for data may be returned from a cache.
1956
1958
1957 Returns a str or a buffer instance of raw byte data.
1959 Returns a str or a buffer instance of raw byte data.
1958 """
1960 """
1959 o, d = self._chunkcache
1961 o, d = self._chunkcache
1960 l = len(d)
1962 l = len(d)
1961
1963
1962 # is it in the cache?
1964 # is it in the cache?
1963 cachestart = offset - o
1965 cachestart = offset - o
1964 cacheend = cachestart + length
1966 cacheend = cachestart + length
1965 if cachestart >= 0 and cacheend <= l:
1967 if cachestart >= 0 and cacheend <= l:
1966 if cachestart == 0 and cacheend == l:
1968 if cachestart == 0 and cacheend == l:
1967 return d # avoid a copy
1969 return d # avoid a copy
1968 return util.buffer(d, cachestart, cacheend - cachestart)
1970 return util.buffer(d, cachestart, cacheend - cachestart)
1969
1971
1970 return self._readsegment(offset, length, df=df)
1972 return self._readsegment(offset, length, df=df)
1971
1973
1972 def _getsegmentforrevs(self, startrev, endrev, df=None):
1974 def _getsegmentforrevs(self, startrev, endrev, df=None):
1973 """Obtain a segment of raw data corresponding to a range of revisions.
1975 """Obtain a segment of raw data corresponding to a range of revisions.
1974
1976
1975 Accepts the start and end revisions and an optional already-open
1977 Accepts the start and end revisions and an optional already-open
1976 file handle to be used for reading. If the file handle is read, its
1978 file handle to be used for reading. If the file handle is read, its
1977 seek position will not be preserved.
1979 seek position will not be preserved.
1978
1980
1979 Requests for data may be satisfied by a cache.
1981 Requests for data may be satisfied by a cache.
1980
1982
1981 Returns a 2-tuple of (offset, data) for the requested range of
1983 Returns a 2-tuple of (offset, data) for the requested range of
1982 revisions. Offset is the integer offset from the beginning of the
1984 revisions. Offset is the integer offset from the beginning of the
1983 revlog and data is a str or buffer of the raw byte data.
1985 revlog and data is a str or buffer of the raw byte data.
1984
1986
1985 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1987 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1986 to determine where each revision's data begins and ends.
1988 to determine where each revision's data begins and ends.
1987 """
1989 """
1988 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1990 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1989 # (functions are expensive).
1991 # (functions are expensive).
1990 index = self.index
1992 index = self.index
1991 istart = index[startrev]
1993 istart = index[startrev]
1992 start = int(istart[0] >> 16)
1994 start = int(istart[0] >> 16)
1993 if startrev == endrev:
1995 if startrev == endrev:
1994 end = start + istart[1]
1996 end = start + istart[1]
1995 else:
1997 else:
1996 iend = index[endrev]
1998 iend = index[endrev]
1997 end = int(iend[0] >> 16) + iend[1]
1999 end = int(iend[0] >> 16) + iend[1]
1998
2000
1999 if self._inline:
2001 if self._inline:
2000 start += (startrev + 1) * self._io.size
2002 start += (startrev + 1) * self._io.size
2001 end += (endrev + 1) * self._io.size
2003 end += (endrev + 1) * self._io.size
2002 length = end - start
2004 length = end - start
2003
2005
2004 return start, self._getsegment(start, length, df=df)
2006 return start, self._getsegment(start, length, df=df)
2005
2007
2006 def _chunk(self, rev, df=None):
2008 def _chunk(self, rev, df=None):
2007 """Obtain a single decompressed chunk for a revision.
2009 """Obtain a single decompressed chunk for a revision.
2008
2010
2009 Accepts an integer revision and an optional already-open file handle
2011 Accepts an integer revision and an optional already-open file handle
2010 to be used for reading. If used, the seek position of the file will not
2012 to be used for reading. If used, the seek position of the file will not
2011 be preserved.
2013 be preserved.
2012
2014
2013 Returns a str holding uncompressed data for the requested revision.
2015 Returns a str holding uncompressed data for the requested revision.
2014 """
2016 """
2015 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2017 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2016
2018
2017 def _chunks(self, revs, df=None, targetsize=None):
2019 def _chunks(self, revs, df=None, targetsize=None):
2018 """Obtain decompressed chunks for the specified revisions.
2020 """Obtain decompressed chunks for the specified revisions.
2019
2021
2020 Accepts an iterable of numeric revisions that are assumed to be in
2022 Accepts an iterable of numeric revisions that are assumed to be in
2021 ascending order. Also accepts an optional already-open file handle
2023 ascending order. Also accepts an optional already-open file handle
2022 to be used for reading. If used, the seek position of the file will
2024 to be used for reading. If used, the seek position of the file will
2023 not be preserved.
2025 not be preserved.
2024
2026
2025 This function is similar to calling ``self._chunk()`` multiple times,
2027 This function is similar to calling ``self._chunk()`` multiple times,
2026 but is faster.
2028 but is faster.
2027
2029
2028 Returns a list with decompressed data for each requested revision.
2030 Returns a list with decompressed data for each requested revision.
2029 """
2031 """
2030 if not revs:
2032 if not revs:
2031 return []
2033 return []
2032 start = self.start
2034 start = self.start
2033 length = self.length
2035 length = self.length
2034 inline = self._inline
2036 inline = self._inline
2035 iosize = self._io.size
2037 iosize = self._io.size
2036 buffer = util.buffer
2038 buffer = util.buffer
2037
2039
2038 l = []
2040 l = []
2039 ladd = l.append
2041 ladd = l.append
2040
2042
2041 if not self._withsparseread:
2043 if not self._withsparseread:
2042 slicedchunks = (revs,)
2044 slicedchunks = (revs,)
2043 else:
2045 else:
2044 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2046 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2045
2047
2046 for revschunk in slicedchunks:
2048 for revschunk in slicedchunks:
2047 firstrev = revschunk[0]
2049 firstrev = revschunk[0]
2048 # Skip trailing revisions with empty diff
2050 # Skip trailing revisions with empty diff
2049 for lastrev in revschunk[::-1]:
2051 for lastrev in revschunk[::-1]:
2050 if length(lastrev) != 0:
2052 if length(lastrev) != 0:
2051 break
2053 break
2052
2054
2053 try:
2055 try:
2054 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2056 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2055 except OverflowError:
2057 except OverflowError:
2056 # issue4215 - we can't cache a run of chunks greater than
2058 # issue4215 - we can't cache a run of chunks greater than
2057 # 2G on Windows
2059 # 2G on Windows
2058 return [self._chunk(rev, df=df) for rev in revschunk]
2060 return [self._chunk(rev, df=df) for rev in revschunk]
2059
2061
2060 decomp = self.decompress
2062 decomp = self.decompress
2061 for rev in revschunk:
2063 for rev in revschunk:
2062 chunkstart = start(rev)
2064 chunkstart = start(rev)
2063 if inline:
2065 if inline:
2064 chunkstart += (rev + 1) * iosize
2066 chunkstart += (rev + 1) * iosize
2065 chunklength = length(rev)
2067 chunklength = length(rev)
2066 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2068 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2067
2069
2068 return l
2070 return l
2069
2071
2070 def _chunkclear(self):
2072 def _chunkclear(self):
2071 """Clear the raw chunk cache."""
2073 """Clear the raw chunk cache."""
2072 self._chunkcache = (0, '')
2074 self._chunkcache = (0, '')
2073
2075
2074 def deltaparent(self, rev):
2076 def deltaparent(self, rev):
2075 """return deltaparent of the given revision"""
2077 """return deltaparent of the given revision"""
2076 base = self.index[rev][3]
2078 base = self.index[rev][3]
2077 if base == rev:
2079 if base == rev:
2078 return nullrev
2080 return nullrev
2079 elif self._generaldelta:
2081 elif self._generaldelta:
2080 return base
2082 return base
2081 else:
2083 else:
2082 return rev - 1
2084 return rev - 1
2083
2085
2084 def revdiff(self, rev1, rev2):
2086 def revdiff(self, rev1, rev2):
2085 """return or calculate a delta between two revisions
2087 """return or calculate a delta between two revisions
2086
2088
2087 The delta calculated is in binary form and is intended to be written to
2089 The delta calculated is in binary form and is intended to be written to
2088 revlog data directly. So this function needs raw revision data.
2090 revlog data directly. So this function needs raw revision data.
2089 """
2091 """
2090 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2092 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2091 return bytes(self._chunk(rev2))
2093 return bytes(self._chunk(rev2))
2092
2094
2093 return mdiff.textdiff(self.revision(rev1, raw=True),
2095 return mdiff.textdiff(self.revision(rev1, raw=True),
2094 self.revision(rev2, raw=True))
2096 self.revision(rev2, raw=True))
2095
2097
2096 def revision(self, nodeorrev, _df=None, raw=False):
2098 def revision(self, nodeorrev, _df=None, raw=False):
2097 """return an uncompressed revision of a given node or revision
2099 """return an uncompressed revision of a given node or revision
2098 number.
2100 number.
2099
2101
2100 _df - an existing file handle to read from. (internal-only)
2102 _df - an existing file handle to read from. (internal-only)
2101 raw - an optional argument specifying if the revision data is to be
2103 raw - an optional argument specifying if the revision data is to be
2102 treated as raw data when applying flag transforms. 'raw' should be set
2104 treated as raw data when applying flag transforms. 'raw' should be set
2103 to True when generating changegroups or in debug commands.
2105 to True when generating changegroups or in debug commands.
2104 """
2106 """
2105 if isinstance(nodeorrev, int):
2107 if isinstance(nodeorrev, int):
2106 rev = nodeorrev
2108 rev = nodeorrev
2107 node = self.node(rev)
2109 node = self.node(rev)
2108 else:
2110 else:
2109 node = nodeorrev
2111 node = nodeorrev
2110 rev = None
2112 rev = None
2111
2113
2112 cachedrev = None
2114 cachedrev = None
2113 flags = None
2115 flags = None
2114 rawtext = None
2116 rawtext = None
2115 if node == nullid:
2117 if node == nullid:
2116 return ""
2118 return ""
2117 if self._cache:
2119 if self._cache:
2118 if self._cache[0] == node:
2120 if self._cache[0] == node:
2119 # _cache only stores rawtext
2121 # _cache only stores rawtext
2120 if raw:
2122 if raw:
2121 return self._cache[2]
2123 return self._cache[2]
2122 # duplicated, but good for perf
2124 # duplicated, but good for perf
2123 if rev is None:
2125 if rev is None:
2124 rev = self.rev(node)
2126 rev = self.rev(node)
2125 if flags is None:
2127 if flags is None:
2126 flags = self.flags(rev)
2128 flags = self.flags(rev)
2127 # no extra flags set, no flag processor runs, text = rawtext
2129 # no extra flags set, no flag processor runs, text = rawtext
2128 if flags == REVIDX_DEFAULT_FLAGS:
2130 if flags == REVIDX_DEFAULT_FLAGS:
2129 return self._cache[2]
2131 return self._cache[2]
2130 # rawtext is reusable. need to run flag processor
2132 # rawtext is reusable. need to run flag processor
2131 rawtext = self._cache[2]
2133 rawtext = self._cache[2]
2132
2134
2133 cachedrev = self._cache[1]
2135 cachedrev = self._cache[1]
2134
2136
2135 # look up what we need to read
2137 # look up what we need to read
2136 if rawtext is None:
2138 if rawtext is None:
2137 if rev is None:
2139 if rev is None:
2138 rev = self.rev(node)
2140 rev = self.rev(node)
2139
2141
2140 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2142 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2141 if stopped:
2143 if stopped:
2142 rawtext = self._cache[2]
2144 rawtext = self._cache[2]
2143
2145
2144 # drop cache to save memory
2146 # drop cache to save memory
2145 self._cache = None
2147 self._cache = None
2146
2148
2147 targetsize = None
2149 targetsize = None
2148 rawsize = self.index[rev][2]
2150 rawsize = self.index[rev][2]
2149 if 0 <= rawsize:
2151 if 0 <= rawsize:
2150 targetsize = 4 * rawsize
2152 targetsize = 4 * rawsize
2151
2153
2152 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2154 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2153 if rawtext is None:
2155 if rawtext is None:
2154 rawtext = bytes(bins[0])
2156 rawtext = bytes(bins[0])
2155 bins = bins[1:]
2157 bins = bins[1:]
2156
2158
2157 rawtext = mdiff.patches(rawtext, bins)
2159 rawtext = mdiff.patches(rawtext, bins)
2158 self._cache = (node, rev, rawtext)
2160 self._cache = (node, rev, rawtext)
2159
2161
2160 if flags is None:
2162 if flags is None:
2161 if rev is None:
2163 if rev is None:
2162 rev = self.rev(node)
2164 rev = self.rev(node)
2163 flags = self.flags(rev)
2165 flags = self.flags(rev)
2164
2166
2165 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2167 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2166 if validatehash:
2168 if validatehash:
2167 self.checkhash(text, node, rev=rev)
2169 self.checkhash(text, node, rev=rev)
2168
2170
2169 return text
2171 return text
2170
2172
2171 def hash(self, text, p1, p2):
2173 def hash(self, text, p1, p2):
2172 """Compute a node hash.
2174 """Compute a node hash.
2173
2175
2174 Available as a function so that subclasses can replace the hash
2176 Available as a function so that subclasses can replace the hash
2175 as needed.
2177 as needed.
2176 """
2178 """
2177 return hash(text, p1, p2)
2179 return hash(text, p1, p2)
2178
2180
2179 def _processflags(self, text, flags, operation, raw=False):
2181 def _processflags(self, text, flags, operation, raw=False):
2180 """Inspect revision data flags and applies transforms defined by
2182 """Inspect revision data flags and applies transforms defined by
2181 registered flag processors.
2183 registered flag processors.
2182
2184
2183 ``text`` - the revision data to process
2185 ``text`` - the revision data to process
2184 ``flags`` - the revision flags
2186 ``flags`` - the revision flags
2185 ``operation`` - the operation being performed (read or write)
2187 ``operation`` - the operation being performed (read or write)
2186 ``raw`` - an optional argument describing if the raw transform should be
2188 ``raw`` - an optional argument describing if the raw transform should be
2187 applied.
2189 applied.
2188
2190
2189 This method processes the flags in the order (or reverse order if
2191 This method processes the flags in the order (or reverse order if
2190 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2192 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2191 flag processors registered for present flags. The order of flags defined
2193 flag processors registered for present flags. The order of flags defined
2192 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2194 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2193
2195
2194 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2196 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2195 processed text and ``validatehash`` is a bool indicating whether the
2197 processed text and ``validatehash`` is a bool indicating whether the
2196 returned text should be checked for hash integrity.
2198 returned text should be checked for hash integrity.
2197
2199
2198 Note: If the ``raw`` argument is set, it has precedence over the
2200 Note: If the ``raw`` argument is set, it has precedence over the
2199 operation and will only update the value of ``validatehash``.
2201 operation and will only update the value of ``validatehash``.
2200 """
2202 """
2201 # fast path: no flag processors will run
2203 # fast path: no flag processors will run
2202 if flags == 0:
2204 if flags == 0:
2203 return text, True
2205 return text, True
2204 if not operation in ('read', 'write'):
2206 if not operation in ('read', 'write'):
2205 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2207 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2206 # Check all flags are known.
2208 # Check all flags are known.
2207 if flags & ~REVIDX_KNOWN_FLAGS:
2209 if flags & ~REVIDX_KNOWN_FLAGS:
2208 raise RevlogError(_("incompatible revision flag '%#x'") %
2210 raise RevlogError(_("incompatible revision flag '%#x'") %
2209 (flags & ~REVIDX_KNOWN_FLAGS))
2211 (flags & ~REVIDX_KNOWN_FLAGS))
2210 validatehash = True
2212 validatehash = True
2211 # Depending on the operation (read or write), the order might be
2213 # Depending on the operation (read or write), the order might be
2212 # reversed due to non-commutative transforms.
2214 # reversed due to non-commutative transforms.
2213 orderedflags = REVIDX_FLAGS_ORDER
2215 orderedflags = REVIDX_FLAGS_ORDER
2214 if operation == 'write':
2216 if operation == 'write':
2215 orderedflags = reversed(orderedflags)
2217 orderedflags = reversed(orderedflags)
2216
2218
2217 for flag in orderedflags:
2219 for flag in orderedflags:
2218 # If a flagprocessor has been registered for a known flag, apply the
2220 # If a flagprocessor has been registered for a known flag, apply the
2219 # related operation transform and update result tuple.
2221 # related operation transform and update result tuple.
2220 if flag & flags:
2222 if flag & flags:
2221 vhash = True
2223 vhash = True
2222
2224
2223 if flag not in _flagprocessors:
2225 if flag not in _flagprocessors:
2224 message = _("missing processor for flag '%#x'") % (flag)
2226 message = _("missing processor for flag '%#x'") % (flag)
2225 raise RevlogError(message)
2227 raise RevlogError(message)
2226
2228
2227 processor = _flagprocessors[flag]
2229 processor = _flagprocessors[flag]
2228 if processor is not None:
2230 if processor is not None:
2229 readtransform, writetransform, rawtransform = processor
2231 readtransform, writetransform, rawtransform = processor
2230
2232
2231 if raw:
2233 if raw:
2232 vhash = rawtransform(self, text)
2234 vhash = rawtransform(self, text)
2233 elif operation == 'read':
2235 elif operation == 'read':
2234 text, vhash = readtransform(self, text)
2236 text, vhash = readtransform(self, text)
2235 else: # write operation
2237 else: # write operation
2236 text, vhash = writetransform(self, text)
2238 text, vhash = writetransform(self, text)
2237 validatehash = validatehash and vhash
2239 validatehash = validatehash and vhash
2238
2240
2239 return text, validatehash
2241 return text, validatehash
2240
2242
2241 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2243 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2242 """Check node hash integrity.
2244 """Check node hash integrity.
2243
2245
2244 Available as a function so that subclasses can extend hash mismatch
2246 Available as a function so that subclasses can extend hash mismatch
2245 behaviors as needed.
2247 behaviors as needed.
2246 """
2248 """
2247 try:
2249 try:
2248 if p1 is None and p2 is None:
2250 if p1 is None and p2 is None:
2249 p1, p2 = self.parents(node)
2251 p1, p2 = self.parents(node)
2250 if node != self.hash(text, p1, p2):
2252 if node != self.hash(text, p1, p2):
2251 revornode = rev
2253 revornode = rev
2252 if revornode is None:
2254 if revornode is None:
2253 revornode = templatefilters.short(hex(node))
2255 revornode = templatefilters.short(hex(node))
2254 raise RevlogError(_("integrity check failed on %s:%s")
2256 raise RevlogError(_("integrity check failed on %s:%s")
2255 % (self.indexfile, pycompat.bytestr(revornode)))
2257 % (self.indexfile, pycompat.bytestr(revornode)))
2256 except RevlogError:
2258 except RevlogError:
2257 if self._censorable and _censoredtext(text):
2259 if self._censorable and _censoredtext(text):
2258 raise error.CensoredNodeError(self.indexfile, node, text)
2260 raise error.CensoredNodeError(self.indexfile, node, text)
2259 raise
2261 raise
2260
2262
2261 def _enforceinlinesize(self, tr, fp=None):
2263 def _enforceinlinesize(self, tr, fp=None):
2262 """Check if the revlog is too big for inline and convert if so.
2264 """Check if the revlog is too big for inline and convert if so.
2263
2265
2264 This should be called after revisions are added to the revlog. If the
2266 This should be called after revisions are added to the revlog. If the
2265 revlog has grown too large to be an inline revlog, it will convert it
2267 revlog has grown too large to be an inline revlog, it will convert it
2266 to use multiple index and data files.
2268 to use multiple index and data files.
2267 """
2269 """
2268 tiprev = len(self) - 1
2270 tiprev = len(self) - 1
2269 if (not self._inline or
2271 if (not self._inline or
2270 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2272 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2271 return
2273 return
2272
2274
2273 trinfo = tr.find(self.indexfile)
2275 trinfo = tr.find(self.indexfile)
2274 if trinfo is None:
2276 if trinfo is None:
2275 raise RevlogError(_("%s not found in the transaction")
2277 raise RevlogError(_("%s not found in the transaction")
2276 % self.indexfile)
2278 % self.indexfile)
2277
2279
2278 trindex = trinfo[2]
2280 trindex = trinfo[2]
2279 if trindex is not None:
2281 if trindex is not None:
2280 dataoff = self.start(trindex)
2282 dataoff = self.start(trindex)
2281 else:
2283 else:
2282 # revlog was stripped at start of transaction, use all leftover data
2284 # revlog was stripped at start of transaction, use all leftover data
2283 trindex = len(self) - 1
2285 trindex = len(self) - 1
2284 dataoff = self.end(tiprev)
2286 dataoff = self.end(tiprev)
2285
2287
2286 tr.add(self.datafile, dataoff)
2288 tr.add(self.datafile, dataoff)
2287
2289
2288 if fp:
2290 if fp:
2289 fp.flush()
2291 fp.flush()
2290 fp.close()
2292 fp.close()
2291
2293
2292 with self._datafp('w') as df:
2294 with self._datafp('w') as df:
2293 for r in self:
2295 for r in self:
2294 df.write(self._getsegmentforrevs(r, r)[1])
2296 df.write(self._getsegmentforrevs(r, r)[1])
2295
2297
2296 with self._indexfp('w') as fp:
2298 with self._indexfp('w') as fp:
2297 self.version &= ~FLAG_INLINE_DATA
2299 self.version &= ~FLAG_INLINE_DATA
2298 self._inline = False
2300 self._inline = False
2299 io = self._io
2301 io = self._io
2300 for i in self:
2302 for i in self:
2301 e = io.packentry(self.index[i], self.node, self.version, i)
2303 e = io.packentry(self.index[i], self.node, self.version, i)
2302 fp.write(e)
2304 fp.write(e)
2303
2305
2304 # the temp file replace the real index when we exit the context
2306 # the temp file replace the real index when we exit the context
2305 # manager
2307 # manager
2306
2308
2307 tr.replace(self.indexfile, trindex * self._io.size)
2309 tr.replace(self.indexfile, trindex * self._io.size)
2308 self._chunkclear()
2310 self._chunkclear()
2309
2311
2310 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2312 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2311 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2313 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2312 """add a revision to the log
2314 """add a revision to the log
2313
2315
2314 text - the revision data to add
2316 text - the revision data to add
2315 transaction - the transaction object used for rollback
2317 transaction - the transaction object used for rollback
2316 link - the linkrev data to add
2318 link - the linkrev data to add
2317 p1, p2 - the parent nodeids of the revision
2319 p1, p2 - the parent nodeids of the revision
2318 cachedelta - an optional precomputed delta
2320 cachedelta - an optional precomputed delta
2319 node - nodeid of revision; typically node is not specified, and it is
2321 node - nodeid of revision; typically node is not specified, and it is
2320 computed by default as hash(text, p1, p2), however subclasses might
2322 computed by default as hash(text, p1, p2), however subclasses might
2321 use different hashing method (and override checkhash() in such case)
2323 use different hashing method (and override checkhash() in such case)
2322 flags - the known flags to set on the revision
2324 flags - the known flags to set on the revision
2323 deltacomputer - an optional _deltacomputer instance shared between
2325 deltacomputer - an optional _deltacomputer instance shared between
2324 multiple calls
2326 multiple calls
2325 """
2327 """
2326 if link == nullrev:
2328 if link == nullrev:
2327 raise RevlogError(_("attempted to add linkrev -1 to %s")
2329 raise RevlogError(_("attempted to add linkrev -1 to %s")
2328 % self.indexfile)
2330 % self.indexfile)
2329
2331
2330 if flags:
2332 if flags:
2331 node = node or self.hash(text, p1, p2)
2333 node = node or self.hash(text, p1, p2)
2332
2334
2333 rawtext, validatehash = self._processflags(text, flags, 'write')
2335 rawtext, validatehash = self._processflags(text, flags, 'write')
2334
2336
2335 # If the flag processor modifies the revision data, ignore any provided
2337 # If the flag processor modifies the revision data, ignore any provided
2336 # cachedelta.
2338 # cachedelta.
2337 if rawtext != text:
2339 if rawtext != text:
2338 cachedelta = None
2340 cachedelta = None
2339
2341
2340 if len(rawtext) > _maxentrysize:
2342 if len(rawtext) > _maxentrysize:
2341 raise RevlogError(
2343 raise RevlogError(
2342 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2344 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2343 % (self.indexfile, len(rawtext)))
2345 % (self.indexfile, len(rawtext)))
2344
2346
2345 node = node or self.hash(rawtext, p1, p2)
2347 node = node or self.hash(rawtext, p1, p2)
2346 if node in self.nodemap:
2348 if node in self.nodemap:
2347 return node
2349 return node
2348
2350
2349 if validatehash:
2351 if validatehash:
2350 self.checkhash(rawtext, node, p1=p1, p2=p2)
2352 self.checkhash(rawtext, node, p1=p1, p2=p2)
2351
2353
2352 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2354 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2353 flags, cachedelta=cachedelta,
2355 flags, cachedelta=cachedelta,
2354 deltacomputer=deltacomputer)
2356 deltacomputer=deltacomputer)
2355
2357
2356 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2358 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2357 cachedelta=None, deltacomputer=None):
2359 cachedelta=None, deltacomputer=None):
2358 """add a raw revision with known flags, node and parents
2360 """add a raw revision with known flags, node and parents
2359 useful when reusing a revision not stored in this revlog (ex: received
2361 useful when reusing a revision not stored in this revlog (ex: received
2360 over wire, or read from an external bundle).
2362 over wire, or read from an external bundle).
2361 """
2363 """
2362 dfh = None
2364 dfh = None
2363 if not self._inline:
2365 if not self._inline:
2364 dfh = self._datafp("a+")
2366 dfh = self._datafp("a+")
2365 ifh = self._indexfp("a+")
2367 ifh = self._indexfp("a+")
2366 try:
2368 try:
2367 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2369 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2368 flags, cachedelta, ifh, dfh,
2370 flags, cachedelta, ifh, dfh,
2369 deltacomputer=deltacomputer)
2371 deltacomputer=deltacomputer)
2370 finally:
2372 finally:
2371 if dfh:
2373 if dfh:
2372 dfh.close()
2374 dfh.close()
2373 ifh.close()
2375 ifh.close()
2374
2376
2375 def compress(self, data):
2377 def compress(self, data):
2376 """Generate a possibly-compressed representation of data."""
2378 """Generate a possibly-compressed representation of data."""
2377 if not data:
2379 if not data:
2378 return '', data
2380 return '', data
2379
2381
2380 compressed = self._compressor.compress(data)
2382 compressed = self._compressor.compress(data)
2381
2383
2382 if compressed:
2384 if compressed:
2383 # The revlog compressor added the header in the returned data.
2385 # The revlog compressor added the header in the returned data.
2384 return '', compressed
2386 return '', compressed
2385
2387
2386 if data[0:1] == '\0':
2388 if data[0:1] == '\0':
2387 return '', data
2389 return '', data
2388 return 'u', data
2390 return 'u', data
2389
2391
2390 def decompress(self, data):
2392 def decompress(self, data):
2391 """Decompress a revlog chunk.
2393 """Decompress a revlog chunk.
2392
2394
2393 The chunk is expected to begin with a header identifying the
2395 The chunk is expected to begin with a header identifying the
2394 format type so it can be routed to an appropriate decompressor.
2396 format type so it can be routed to an appropriate decompressor.
2395 """
2397 """
2396 if not data:
2398 if not data:
2397 return data
2399 return data
2398
2400
2399 # Revlogs are read much more frequently than they are written and many
2401 # Revlogs are read much more frequently than they are written and many
2400 # chunks only take microseconds to decompress, so performance is
2402 # chunks only take microseconds to decompress, so performance is
2401 # important here.
2403 # important here.
2402 #
2404 #
2403 # We can make a few assumptions about revlogs:
2405 # We can make a few assumptions about revlogs:
2404 #
2406 #
2405 # 1) the majority of chunks will be compressed (as opposed to inline
2407 # 1) the majority of chunks will be compressed (as opposed to inline
2406 # raw data).
2408 # raw data).
2407 # 2) decompressing *any* data will likely by at least 10x slower than
2409 # 2) decompressing *any* data will likely by at least 10x slower than
2408 # returning raw inline data.
2410 # returning raw inline data.
2409 # 3) we want to prioritize common and officially supported compression
2411 # 3) we want to prioritize common and officially supported compression
2410 # engines
2412 # engines
2411 #
2413 #
2412 # It follows that we want to optimize for "decompress compressed data
2414 # It follows that we want to optimize for "decompress compressed data
2413 # when encoded with common and officially supported compression engines"
2415 # when encoded with common and officially supported compression engines"
2414 # case over "raw data" and "data encoded by less common or non-official
2416 # case over "raw data" and "data encoded by less common or non-official
2415 # compression engines." That is why we have the inline lookup first
2417 # compression engines." That is why we have the inline lookup first
2416 # followed by the compengines lookup.
2418 # followed by the compengines lookup.
2417 #
2419 #
2418 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2419 # compressed chunks. And this matters for changelog and manifest reads.
2421 # compressed chunks. And this matters for changelog and manifest reads.
2420 t = data[0:1]
2422 t = data[0:1]
2421
2423
2422 if t == 'x':
2424 if t == 'x':
2423 try:
2425 try:
2424 return _zlibdecompress(data)
2426 return _zlibdecompress(data)
2425 except zlib.error as e:
2427 except zlib.error as e:
2426 raise RevlogError(_('revlog decompress error: %s') %
2428 raise RevlogError(_('revlog decompress error: %s') %
2427 stringutil.forcebytestr(e))
2429 stringutil.forcebytestr(e))
2428 # '\0' is more common than 'u' so it goes first.
2430 # '\0' is more common than 'u' so it goes first.
2429 elif t == '\0':
2431 elif t == '\0':
2430 return data
2432 return data
2431 elif t == 'u':
2433 elif t == 'u':
2432 return util.buffer(data, 1)
2434 return util.buffer(data, 1)
2433
2435
2434 try:
2436 try:
2435 compressor = self._decompressors[t]
2437 compressor = self._decompressors[t]
2436 except KeyError:
2438 except KeyError:
2437 try:
2439 try:
2438 engine = util.compengines.forrevlogheader(t)
2440 engine = util.compengines.forrevlogheader(t)
2439 compressor = engine.revlogcompressor()
2441 compressor = engine.revlogcompressor()
2440 self._decompressors[t] = compressor
2442 self._decompressors[t] = compressor
2441 except KeyError:
2443 except KeyError:
2442 raise RevlogError(_('unknown compression type %r') % t)
2444 raise RevlogError(_('unknown compression type %r') % t)
2443
2445
2444 return compressor.decompress(data)
2446 return compressor.decompress(data)
2445
2447
2446 def _isgooddeltainfo(self, deltainfo, revinfo):
2448 def _isgooddeltainfo(self, deltainfo, revinfo):
2447 """Returns True if the given delta is good. Good means that it is within
2449 """Returns True if the given delta is good. Good means that it is within
2448 the disk span, disk size, and chain length bounds that we know to be
2450 the disk span, disk size, and chain length bounds that we know to be
2449 performant."""
2451 performant."""
2450 if deltainfo is None:
2452 if deltainfo is None:
2451 return False
2453 return False
2452
2454
2453 # - 'deltainfo.distance' is the distance from the base revision --
2455 # - 'deltainfo.distance' is the distance from the base revision --
2454 # bounding it limits the amount of I/O we need to do.
2456 # bounding it limits the amount of I/O we need to do.
2455 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2457 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2456 # deltas we need to apply -- bounding it limits the amount of CPU
2458 # deltas we need to apply -- bounding it limits the amount of CPU
2457 # we consume.
2459 # we consume.
2458
2460
2459 if self._sparserevlog:
2461 if self._sparserevlog:
2460 # As sparse-read will be used, we can consider that the distance,
2462 # As sparse-read will be used, we can consider that the distance,
2461 # instead of being the span of the whole chunk,
2463 # instead of being the span of the whole chunk,
2462 # is the span of the largest read chunk
2464 # is the span of the largest read chunk
2463 base = deltainfo.base
2465 base = deltainfo.base
2464
2466
2465 if base != nullrev:
2467 if base != nullrev:
2466 deltachain = self._deltachain(base)[0]
2468 deltachain = self._deltachain(base)[0]
2467 else:
2469 else:
2468 deltachain = []
2470 deltachain = []
2469
2471
2470 chunks = _slicechunk(self, deltachain, deltainfo)
2472 chunks = _slicechunk(self, deltachain, deltainfo)
2471 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2473 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2472 else:
2474 else:
2473 distance = deltainfo.distance
2475 distance = deltainfo.distance
2474
2476
2475 textlen = revinfo.textlen
2477 textlen = revinfo.textlen
2476 defaultmax = textlen * 4
2478 defaultmax = textlen * 4
2477 maxdist = self._maxdeltachainspan
2479 maxdist = self._maxdeltachainspan
2478 if not maxdist:
2480 if not maxdist:
2479 maxdist = distance # ensure the conditional pass
2481 maxdist = distance # ensure the conditional pass
2480 maxdist = max(maxdist, defaultmax)
2482 maxdist = max(maxdist, defaultmax)
2481 if self._sparserevlog and maxdist < self._srmingapsize:
2483 if self._sparserevlog and maxdist < self._srmingapsize:
2482 # In multiple place, we are ignoring irrelevant data range below a
2484 # In multiple place, we are ignoring irrelevant data range below a
2483 # certain size. Be also apply this tradeoff here and relax span
2485 # certain size. Be also apply this tradeoff here and relax span
2484 # constraint for small enought content.
2486 # constraint for small enought content.
2485 maxdist = self._srmingapsize
2487 maxdist = self._srmingapsize
2486
2488
2487 # Bad delta from read span:
2489 # Bad delta from read span:
2488 #
2490 #
2489 # If the span of data read is larger than the maximum allowed.
2491 # If the span of data read is larger than the maximum allowed.
2490 if maxdist < distance:
2492 if maxdist < distance:
2491 return False
2493 return False
2492
2494
2493 # Bad delta from new delta size:
2495 # Bad delta from new delta size:
2494 #
2496 #
2495 # If the delta size is larger than the target text, storing the
2497 # If the delta size is larger than the target text, storing the
2496 # delta will be inefficient.
2498 # delta will be inefficient.
2497 if textlen < deltainfo.deltalen:
2499 if textlen < deltainfo.deltalen:
2498 return False
2500 return False
2499
2501
2500 # Bad delta from cumulated payload size:
2502 # Bad delta from cumulated payload size:
2501 #
2503 #
2502 # If the sum of delta get larger than K * target text length.
2504 # If the sum of delta get larger than K * target text length.
2503 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2505 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2504 return False
2506 return False
2505
2507
2506 # Bad delta from chain length:
2508 # Bad delta from chain length:
2507 #
2509 #
2508 # If the number of delta in the chain gets too high.
2510 # If the number of delta in the chain gets too high.
2509 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2511 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2510 return False
2512 return False
2511
2513
2512 return True
2514 return True
2513
2515
2514 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2516 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2515 cachedelta, ifh, dfh, alwayscache=False,
2517 cachedelta, ifh, dfh, alwayscache=False,
2516 deltacomputer=None):
2518 deltacomputer=None):
2517 """internal function to add revisions to the log
2519 """internal function to add revisions to the log
2518
2520
2519 see addrevision for argument descriptions.
2521 see addrevision for argument descriptions.
2520
2522
2521 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2523 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2522
2524
2523 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2525 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2524 be used.
2526 be used.
2525
2527
2526 invariants:
2528 invariants:
2527 - rawtext is optional (can be None); if not set, cachedelta must be set.
2529 - rawtext is optional (can be None); if not set, cachedelta must be set.
2528 if both are set, they must correspond to each other.
2530 if both are set, they must correspond to each other.
2529 """
2531 """
2530 if node == nullid:
2532 if node == nullid:
2531 raise RevlogError(_("%s: attempt to add null revision") %
2533 raise RevlogError(_("%s: attempt to add null revision") %
2532 (self.indexfile))
2534 (self.indexfile))
2533 if node == wdirid or node in wdirfilenodeids:
2535 if node == wdirid or node in wdirfilenodeids:
2534 raise RevlogError(_("%s: attempt to add wdir revision") %
2536 raise RevlogError(_("%s: attempt to add wdir revision") %
2535 (self.indexfile))
2537 (self.indexfile))
2536
2538
2537 if self._inline:
2539 if self._inline:
2538 fh = ifh
2540 fh = ifh
2539 else:
2541 else:
2540 fh = dfh
2542 fh = dfh
2541
2543
2542 btext = [rawtext]
2544 btext = [rawtext]
2543
2545
2544 curr = len(self)
2546 curr = len(self)
2545 prev = curr - 1
2547 prev = curr - 1
2546 offset = self.end(prev)
2548 offset = self.end(prev)
2547 p1r, p2r = self.rev(p1), self.rev(p2)
2549 p1r, p2r = self.rev(p1), self.rev(p2)
2548
2550
2549 # full versions are inserted when the needed deltas
2551 # full versions are inserted when the needed deltas
2550 # become comparable to the uncompressed text
2552 # become comparable to the uncompressed text
2551 if rawtext is None:
2553 if rawtext is None:
2552 # need rawtext size, before changed by flag processors, which is
2554 # need rawtext size, before changed by flag processors, which is
2553 # the non-raw size. use revlog explicitly to avoid filelog's extra
2555 # the non-raw size. use revlog explicitly to avoid filelog's extra
2554 # logic that might remove metadata size.
2556 # logic that might remove metadata size.
2555 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2557 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2556 cachedelta[1])
2558 cachedelta[1])
2557 else:
2559 else:
2558 textlen = len(rawtext)
2560 textlen = len(rawtext)
2559
2561
2560 if deltacomputer is None:
2562 if deltacomputer is None:
2561 deltacomputer = _deltacomputer(self)
2563 deltacomputer = _deltacomputer(self)
2562
2564
2563 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2565 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2564
2566
2565 # no delta for flag processor revision (see "candelta" for why)
2567 # no delta for flag processor revision (see "candelta" for why)
2566 # not calling candelta since only one revision needs test, also to
2568 # not calling candelta since only one revision needs test, also to
2567 # avoid overhead fetching flags again.
2569 # avoid overhead fetching flags again.
2568 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2570 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2569 deltainfo = None
2571 deltainfo = None
2570 else:
2572 else:
2571 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2573 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2572
2574
2573 if deltainfo is not None:
2575 if deltainfo is not None:
2574 base = deltainfo.base
2576 base = deltainfo.base
2575 chainbase = deltainfo.chainbase
2577 chainbase = deltainfo.chainbase
2576 data = deltainfo.data
2578 data = deltainfo.data
2577 l = deltainfo.deltalen
2579 l = deltainfo.deltalen
2578 else:
2580 else:
2579 rawtext = deltacomputer.buildtext(revinfo, fh)
2581 rawtext = deltacomputer.buildtext(revinfo, fh)
2580 data = self.compress(rawtext)
2582 data = self.compress(rawtext)
2581 l = len(data[1]) + len(data[0])
2583 l = len(data[1]) + len(data[0])
2582 base = chainbase = curr
2584 base = chainbase = curr
2583
2585
2584 e = (offset_type(offset, flags), l, textlen,
2586 e = (offset_type(offset, flags), l, textlen,
2585 base, link, p1r, p2r, node)
2587 base, link, p1r, p2r, node)
2586 self.index.append(e)
2588 self.index.append(e)
2587 self.nodemap[node] = curr
2589 self.nodemap[node] = curr
2588
2590
2589 entry = self._io.packentry(e, self.node, self.version, curr)
2591 entry = self._io.packentry(e, self.node, self.version, curr)
2590 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2592 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2591
2593
2592 if alwayscache and rawtext is None:
2594 if alwayscache and rawtext is None:
2593 rawtext = deltacomputer._buildtext(revinfo, fh)
2595 rawtext = deltacomputer._buildtext(revinfo, fh)
2594
2596
2595 if type(rawtext) == bytes: # only accept immutable objects
2597 if type(rawtext) == bytes: # only accept immutable objects
2596 self._cache = (node, curr, rawtext)
2598 self._cache = (node, curr, rawtext)
2597 self._chainbasecache[curr] = chainbase
2599 self._chainbasecache[curr] = chainbase
2598 return node
2600 return node
2599
2601
2600 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2602 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2601 # Files opened in a+ mode have inconsistent behavior on various
2603 # Files opened in a+ mode have inconsistent behavior on various
2602 # platforms. Windows requires that a file positioning call be made
2604 # platforms. Windows requires that a file positioning call be made
2603 # when the file handle transitions between reads and writes. See
2605 # when the file handle transitions between reads and writes. See
2604 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2606 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2605 # platforms, Python or the platform itself can be buggy. Some versions
2607 # platforms, Python or the platform itself can be buggy. Some versions
2606 # of Solaris have been observed to not append at the end of the file
2608 # of Solaris have been observed to not append at the end of the file
2607 # if the file was seeked to before the end. See issue4943 for more.
2609 # if the file was seeked to before the end. See issue4943 for more.
2608 #
2610 #
2609 # We work around this issue by inserting a seek() before writing.
2611 # We work around this issue by inserting a seek() before writing.
2610 # Note: This is likely not necessary on Python 3.
2612 # Note: This is likely not necessary on Python 3.
2611 ifh.seek(0, os.SEEK_END)
2613 ifh.seek(0, os.SEEK_END)
2612 if dfh:
2614 if dfh:
2613 dfh.seek(0, os.SEEK_END)
2615 dfh.seek(0, os.SEEK_END)
2614
2616
2615 curr = len(self) - 1
2617 curr = len(self) - 1
2616 if not self._inline:
2618 if not self._inline:
2617 transaction.add(self.datafile, offset)
2619 transaction.add(self.datafile, offset)
2618 transaction.add(self.indexfile, curr * len(entry))
2620 transaction.add(self.indexfile, curr * len(entry))
2619 if data[0]:
2621 if data[0]:
2620 dfh.write(data[0])
2622 dfh.write(data[0])
2621 dfh.write(data[1])
2623 dfh.write(data[1])
2622 ifh.write(entry)
2624 ifh.write(entry)
2623 else:
2625 else:
2624 offset += curr * self._io.size
2626 offset += curr * self._io.size
2625 transaction.add(self.indexfile, offset, curr)
2627 transaction.add(self.indexfile, offset, curr)
2626 ifh.write(entry)
2628 ifh.write(entry)
2627 ifh.write(data[0])
2629 ifh.write(data[0])
2628 ifh.write(data[1])
2630 ifh.write(data[1])
2629 self._enforceinlinesize(transaction, ifh)
2631 self._enforceinlinesize(transaction, ifh)
2630
2632
2631 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2633 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2632 """
2634 """
2633 add a delta group
2635 add a delta group
2634
2636
2635 given a set of deltas, add them to the revision log. the
2637 given a set of deltas, add them to the revision log. the
2636 first delta is against its parent, which should be in our
2638 first delta is against its parent, which should be in our
2637 log, the rest are against the previous delta.
2639 log, the rest are against the previous delta.
2638
2640
2639 If ``addrevisioncb`` is defined, it will be called with arguments of
2641 If ``addrevisioncb`` is defined, it will be called with arguments of
2640 this revlog and the node that was added.
2642 this revlog and the node that was added.
2641 """
2643 """
2642
2644
2643 nodes = []
2645 nodes = []
2644
2646
2645 r = len(self)
2647 r = len(self)
2646 end = 0
2648 end = 0
2647 if r:
2649 if r:
2648 end = self.end(r - 1)
2650 end = self.end(r - 1)
2649 ifh = self._indexfp("a+")
2651 ifh = self._indexfp("a+")
2650 isize = r * self._io.size
2652 isize = r * self._io.size
2651 if self._inline:
2653 if self._inline:
2652 transaction.add(self.indexfile, end + isize, r)
2654 transaction.add(self.indexfile, end + isize, r)
2653 dfh = None
2655 dfh = None
2654 else:
2656 else:
2655 transaction.add(self.indexfile, isize, r)
2657 transaction.add(self.indexfile, isize, r)
2656 transaction.add(self.datafile, end)
2658 transaction.add(self.datafile, end)
2657 dfh = self._datafp("a+")
2659 dfh = self._datafp("a+")
2658 def flush():
2660 def flush():
2659 if dfh:
2661 if dfh:
2660 dfh.flush()
2662 dfh.flush()
2661 ifh.flush()
2663 ifh.flush()
2662 try:
2664 try:
2663 deltacomputer = _deltacomputer(self)
2665 deltacomputer = _deltacomputer(self)
2664 # loop through our set of deltas
2666 # loop through our set of deltas
2665 for data in deltas:
2667 for data in deltas:
2666 node, p1, p2, linknode, deltabase, delta, flags = data
2668 node, p1, p2, linknode, deltabase, delta, flags = data
2667 link = linkmapper(linknode)
2669 link = linkmapper(linknode)
2668 flags = flags or REVIDX_DEFAULT_FLAGS
2670 flags = flags or REVIDX_DEFAULT_FLAGS
2669
2671
2670 nodes.append(node)
2672 nodes.append(node)
2671
2673
2672 if node in self.nodemap:
2674 if node in self.nodemap:
2673 # this can happen if two branches make the same change
2675 # this can happen if two branches make the same change
2674 continue
2676 continue
2675
2677
2676 for p in (p1, p2):
2678 for p in (p1, p2):
2677 if p not in self.nodemap:
2679 if p not in self.nodemap:
2678 raise LookupError(p, self.indexfile,
2680 raise LookupError(p, self.indexfile,
2679 _('unknown parent'))
2681 _('unknown parent'))
2680
2682
2681 if deltabase not in self.nodemap:
2683 if deltabase not in self.nodemap:
2682 raise LookupError(deltabase, self.indexfile,
2684 raise LookupError(deltabase, self.indexfile,
2683 _('unknown delta base'))
2685 _('unknown delta base'))
2684
2686
2685 baserev = self.rev(deltabase)
2687 baserev = self.rev(deltabase)
2686
2688
2687 if baserev != nullrev and self.iscensored(baserev):
2689 if baserev != nullrev and self.iscensored(baserev):
2688 # if base is censored, delta must be full replacement in a
2690 # if base is censored, delta must be full replacement in a
2689 # single patch operation
2691 # single patch operation
2690 hlen = struct.calcsize(">lll")
2692 hlen = struct.calcsize(">lll")
2691 oldlen = self.rawsize(baserev)
2693 oldlen = self.rawsize(baserev)
2692 newlen = len(delta) - hlen
2694 newlen = len(delta) - hlen
2693 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2695 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2694 raise error.CensoredBaseError(self.indexfile,
2696 raise error.CensoredBaseError(self.indexfile,
2695 self.node(baserev))
2697 self.node(baserev))
2696
2698
2697 if not flags and self._peek_iscensored(baserev, delta, flush):
2699 if not flags and self._peek_iscensored(baserev, delta, flush):
2698 flags |= REVIDX_ISCENSORED
2700 flags |= REVIDX_ISCENSORED
2699
2701
2700 # We assume consumers of addrevisioncb will want to retrieve
2702 # We assume consumers of addrevisioncb will want to retrieve
2701 # the added revision, which will require a call to
2703 # the added revision, which will require a call to
2702 # revision(). revision() will fast path if there is a cache
2704 # revision(). revision() will fast path if there is a cache
2703 # hit. So, we tell _addrevision() to always cache in this case.
2705 # hit. So, we tell _addrevision() to always cache in this case.
2704 # We're only using addgroup() in the context of changegroup
2706 # We're only using addgroup() in the context of changegroup
2705 # generation so the revision data can always be handled as raw
2707 # generation so the revision data can always be handled as raw
2706 # by the flagprocessor.
2708 # by the flagprocessor.
2707 self._addrevision(node, None, transaction, link,
2709 self._addrevision(node, None, transaction, link,
2708 p1, p2, flags, (baserev, delta),
2710 p1, p2, flags, (baserev, delta),
2709 ifh, dfh,
2711 ifh, dfh,
2710 alwayscache=bool(addrevisioncb),
2712 alwayscache=bool(addrevisioncb),
2711 deltacomputer=deltacomputer)
2713 deltacomputer=deltacomputer)
2712
2714
2713 if addrevisioncb:
2715 if addrevisioncb:
2714 addrevisioncb(self, node)
2716 addrevisioncb(self, node)
2715
2717
2716 if not dfh and not self._inline:
2718 if not dfh and not self._inline:
2717 # addrevision switched from inline to conventional
2719 # addrevision switched from inline to conventional
2718 # reopen the index
2720 # reopen the index
2719 ifh.close()
2721 ifh.close()
2720 dfh = self._datafp("a+")
2722 dfh = self._datafp("a+")
2721 ifh = self._indexfp("a+")
2723 ifh = self._indexfp("a+")
2722 finally:
2724 finally:
2723 if dfh:
2725 if dfh:
2724 dfh.close()
2726 dfh.close()
2725 ifh.close()
2727 ifh.close()
2726
2728
2727 return nodes
2729 return nodes
2728
2730
2729 def iscensored(self, rev):
2731 def iscensored(self, rev):
2730 """Check if a file revision is censored."""
2732 """Check if a file revision is censored."""
2731 if not self._censorable:
2733 if not self._censorable:
2732 return False
2734 return False
2733
2735
2734 return self.flags(rev) & REVIDX_ISCENSORED
2736 return self.flags(rev) & REVIDX_ISCENSORED
2735
2737
2736 def _peek_iscensored(self, baserev, delta, flush):
2738 def _peek_iscensored(self, baserev, delta, flush):
2737 """Quickly check if a delta produces a censored revision."""
2739 """Quickly check if a delta produces a censored revision."""
2738 if not self._censorable:
2740 if not self._censorable:
2739 return False
2741 return False
2740
2742
2741 # Fragile heuristic: unless new file meta keys are added alphabetically
2743 # Fragile heuristic: unless new file meta keys are added alphabetically
2742 # preceding "censored", all censored revisions are prefixed by
2744 # preceding "censored", all censored revisions are prefixed by
2743 # "\1\ncensored:". A delta producing such a censored revision must be a
2745 # "\1\ncensored:". A delta producing such a censored revision must be a
2744 # full-replacement delta, so we inspect the first and only patch in the
2746 # full-replacement delta, so we inspect the first and only patch in the
2745 # delta for this prefix.
2747 # delta for this prefix.
2746 hlen = struct.calcsize(">lll")
2748 hlen = struct.calcsize(">lll")
2747 if len(delta) <= hlen:
2749 if len(delta) <= hlen:
2748 return False
2750 return False
2749
2751
2750 oldlen = self.rawsize(baserev)
2752 oldlen = self.rawsize(baserev)
2751 newlen = len(delta) - hlen
2753 newlen = len(delta) - hlen
2752 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2754 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2753 return False
2755 return False
2754
2756
2755 add = "\1\ncensored:"
2757 add = "\1\ncensored:"
2756 addlen = len(add)
2758 addlen = len(add)
2757 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2759 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2758
2760
2759 def getstrippoint(self, minlink):
2761 def getstrippoint(self, minlink):
2760 """find the minimum rev that must be stripped to strip the linkrev
2762 """find the minimum rev that must be stripped to strip the linkrev
2761
2763
2762 Returns a tuple containing the minimum rev and a set of all revs that
2764 Returns a tuple containing the minimum rev and a set of all revs that
2763 have linkrevs that will be broken by this strip.
2765 have linkrevs that will be broken by this strip.
2764 """
2766 """
2765 brokenrevs = set()
2767 brokenrevs = set()
2766 strippoint = len(self)
2768 strippoint = len(self)
2767
2769
2768 heads = {}
2770 heads = {}
2769 futurelargelinkrevs = set()
2771 futurelargelinkrevs = set()
2770 for head in self.headrevs():
2772 for head in self.headrevs():
2771 headlinkrev = self.linkrev(head)
2773 headlinkrev = self.linkrev(head)
2772 heads[head] = headlinkrev
2774 heads[head] = headlinkrev
2773 if headlinkrev >= minlink:
2775 if headlinkrev >= minlink:
2774 futurelargelinkrevs.add(headlinkrev)
2776 futurelargelinkrevs.add(headlinkrev)
2775
2777
2776 # This algorithm involves walking down the rev graph, starting at the
2778 # This algorithm involves walking down the rev graph, starting at the
2777 # heads. Since the revs are topologically sorted according to linkrev,
2779 # heads. Since the revs are topologically sorted according to linkrev,
2778 # once all head linkrevs are below the minlink, we know there are
2780 # once all head linkrevs are below the minlink, we know there are
2779 # no more revs that could have a linkrev greater than minlink.
2781 # no more revs that could have a linkrev greater than minlink.
2780 # So we can stop walking.
2782 # So we can stop walking.
2781 while futurelargelinkrevs:
2783 while futurelargelinkrevs:
2782 strippoint -= 1
2784 strippoint -= 1
2783 linkrev = heads.pop(strippoint)
2785 linkrev = heads.pop(strippoint)
2784
2786
2785 if linkrev < minlink:
2787 if linkrev < minlink:
2786 brokenrevs.add(strippoint)
2788 brokenrevs.add(strippoint)
2787 else:
2789 else:
2788 futurelargelinkrevs.remove(linkrev)
2790 futurelargelinkrevs.remove(linkrev)
2789
2791
2790 for p in self.parentrevs(strippoint):
2792 for p in self.parentrevs(strippoint):
2791 if p != nullrev:
2793 if p != nullrev:
2792 plinkrev = self.linkrev(p)
2794 plinkrev = self.linkrev(p)
2793 heads[p] = plinkrev
2795 heads[p] = plinkrev
2794 if plinkrev >= minlink:
2796 if plinkrev >= minlink:
2795 futurelargelinkrevs.add(plinkrev)
2797 futurelargelinkrevs.add(plinkrev)
2796
2798
2797 return strippoint, brokenrevs
2799 return strippoint, brokenrevs
2798
2800
2799 def strip(self, minlink, transaction):
2801 def strip(self, minlink, transaction):
2800 """truncate the revlog on the first revision with a linkrev >= minlink
2802 """truncate the revlog on the first revision with a linkrev >= minlink
2801
2803
2802 This function is called when we're stripping revision minlink and
2804 This function is called when we're stripping revision minlink and
2803 its descendants from the repository.
2805 its descendants from the repository.
2804
2806
2805 We have to remove all revisions with linkrev >= minlink, because
2807 We have to remove all revisions with linkrev >= minlink, because
2806 the equivalent changelog revisions will be renumbered after the
2808 the equivalent changelog revisions will be renumbered after the
2807 strip.
2809 strip.
2808
2810
2809 So we truncate the revlog on the first of these revisions, and
2811 So we truncate the revlog on the first of these revisions, and
2810 trust that the caller has saved the revisions that shouldn't be
2812 trust that the caller has saved the revisions that shouldn't be
2811 removed and that it'll re-add them after this truncation.
2813 removed and that it'll re-add them after this truncation.
2812 """
2814 """
2813 if len(self) == 0:
2815 if len(self) == 0:
2814 return
2816 return
2815
2817
2816 rev, _ = self.getstrippoint(minlink)
2818 rev, _ = self.getstrippoint(minlink)
2817 if rev == len(self):
2819 if rev == len(self):
2818 return
2820 return
2819
2821
2820 # first truncate the files on disk
2822 # first truncate the files on disk
2821 end = self.start(rev)
2823 end = self.start(rev)
2822 if not self._inline:
2824 if not self._inline:
2823 transaction.add(self.datafile, end)
2825 transaction.add(self.datafile, end)
2824 end = rev * self._io.size
2826 end = rev * self._io.size
2825 else:
2827 else:
2826 end += rev * self._io.size
2828 end += rev * self._io.size
2827
2829
2828 transaction.add(self.indexfile, end)
2830 transaction.add(self.indexfile, end)
2829
2831
2830 # then reset internal state in memory to forget those revisions
2832 # then reset internal state in memory to forget those revisions
2831 self._cache = None
2833 self._cache = None
2832 self._chaininfocache = {}
2834 self._chaininfocache = {}
2833 self._chunkclear()
2835 self._chunkclear()
2834 for x in pycompat.xrange(rev, len(self)):
2836 for x in pycompat.xrange(rev, len(self)):
2835 del self.nodemap[self.node(x)]
2837 del self.nodemap[self.node(x)]
2836
2838
2837 del self.index[rev:-1]
2839 del self.index[rev:-1]
2838 self._nodepos = None
2840 self._nodepos = None
2839
2841
2840 def checksize(self):
2842 def checksize(self):
2841 expected = 0
2843 expected = 0
2842 if len(self):
2844 if len(self):
2843 expected = max(0, self.end(len(self) - 1))
2845 expected = max(0, self.end(len(self) - 1))
2844
2846
2845 try:
2847 try:
2846 with self._datafp() as f:
2848 with self._datafp() as f:
2847 f.seek(0, 2)
2849 f.seek(0, 2)
2848 actual = f.tell()
2850 actual = f.tell()
2849 dd = actual - expected
2851 dd = actual - expected
2850 except IOError as inst:
2852 except IOError as inst:
2851 if inst.errno != errno.ENOENT:
2853 if inst.errno != errno.ENOENT:
2852 raise
2854 raise
2853 dd = 0
2855 dd = 0
2854
2856
2855 try:
2857 try:
2856 f = self.opener(self.indexfile)
2858 f = self.opener(self.indexfile)
2857 f.seek(0, 2)
2859 f.seek(0, 2)
2858 actual = f.tell()
2860 actual = f.tell()
2859 f.close()
2861 f.close()
2860 s = self._io.size
2862 s = self._io.size
2861 i = max(0, actual // s)
2863 i = max(0, actual // s)
2862 di = actual - (i * s)
2864 di = actual - (i * s)
2863 if self._inline:
2865 if self._inline:
2864 databytes = 0
2866 databytes = 0
2865 for r in self:
2867 for r in self:
2866 databytes += max(0, self.length(r))
2868 databytes += max(0, self.length(r))
2867 dd = 0
2869 dd = 0
2868 di = actual - len(self) * s - databytes
2870 di = actual - len(self) * s - databytes
2869 except IOError as inst:
2871 except IOError as inst:
2870 if inst.errno != errno.ENOENT:
2872 if inst.errno != errno.ENOENT:
2871 raise
2873 raise
2872 di = 0
2874 di = 0
2873
2875
2874 return (dd, di)
2876 return (dd, di)
2875
2877
2876 def files(self):
2878 def files(self):
2877 res = [self.indexfile]
2879 res = [self.indexfile]
2878 if not self._inline:
2880 if not self._inline:
2879 res.append(self.datafile)
2881 res.append(self.datafile)
2880 return res
2882 return res
2881
2883
2882 DELTAREUSEALWAYS = 'always'
2884 DELTAREUSEALWAYS = 'always'
2883 DELTAREUSESAMEREVS = 'samerevs'
2885 DELTAREUSESAMEREVS = 'samerevs'
2884 DELTAREUSENEVER = 'never'
2886 DELTAREUSENEVER = 'never'
2885
2887
2886 DELTAREUSEFULLADD = 'fulladd'
2888 DELTAREUSEFULLADD = 'fulladd'
2887
2889
2888 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2890 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2889
2891
2890 def clone(self, tr, destrevlog, addrevisioncb=None,
2892 def clone(self, tr, destrevlog, addrevisioncb=None,
2891 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2893 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2892 """Copy this revlog to another, possibly with format changes.
2894 """Copy this revlog to another, possibly with format changes.
2893
2895
2894 The destination revlog will contain the same revisions and nodes.
2896 The destination revlog will contain the same revisions and nodes.
2895 However, it may not be bit-for-bit identical due to e.g. delta encoding
2897 However, it may not be bit-for-bit identical due to e.g. delta encoding
2896 differences.
2898 differences.
2897
2899
2898 The ``deltareuse`` argument control how deltas from the existing revlog
2900 The ``deltareuse`` argument control how deltas from the existing revlog
2899 are preserved in the destination revlog. The argument can have the
2901 are preserved in the destination revlog. The argument can have the
2900 following values:
2902 following values:
2901
2903
2902 DELTAREUSEALWAYS
2904 DELTAREUSEALWAYS
2903 Deltas will always be reused (if possible), even if the destination
2905 Deltas will always be reused (if possible), even if the destination
2904 revlog would not select the same revisions for the delta. This is the
2906 revlog would not select the same revisions for the delta. This is the
2905 fastest mode of operation.
2907 fastest mode of operation.
2906 DELTAREUSESAMEREVS
2908 DELTAREUSESAMEREVS
2907 Deltas will be reused if the destination revlog would pick the same
2909 Deltas will be reused if the destination revlog would pick the same
2908 revisions for the delta. This mode strikes a balance between speed
2910 revisions for the delta. This mode strikes a balance between speed
2909 and optimization.
2911 and optimization.
2910 DELTAREUSENEVER
2912 DELTAREUSENEVER
2911 Deltas will never be reused. This is the slowest mode of execution.
2913 Deltas will never be reused. This is the slowest mode of execution.
2912 This mode can be used to recompute deltas (e.g. if the diff/delta
2914 This mode can be used to recompute deltas (e.g. if the diff/delta
2913 algorithm changes).
2915 algorithm changes).
2914
2916
2915 Delta computation can be slow, so the choice of delta reuse policy can
2917 Delta computation can be slow, so the choice of delta reuse policy can
2916 significantly affect run time.
2918 significantly affect run time.
2917
2919
2918 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2920 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2919 two extremes. Deltas will be reused if they are appropriate. But if the
2921 two extremes. Deltas will be reused if they are appropriate. But if the
2920 delta could choose a better revision, it will do so. This means if you
2922 delta could choose a better revision, it will do so. This means if you
2921 are converting a non-generaldelta revlog to a generaldelta revlog,
2923 are converting a non-generaldelta revlog to a generaldelta revlog,
2922 deltas will be recomputed if the delta's parent isn't a parent of the
2924 deltas will be recomputed if the delta's parent isn't a parent of the
2923 revision.
2925 revision.
2924
2926
2925 In addition to the delta policy, the ``deltabothparents`` argument
2927 In addition to the delta policy, the ``deltabothparents`` argument
2926 controls whether to compute deltas against both parents for merges.
2928 controls whether to compute deltas against both parents for merges.
2927 By default, the current default is used.
2929 By default, the current default is used.
2928 """
2930 """
2929 if deltareuse not in self.DELTAREUSEALL:
2931 if deltareuse not in self.DELTAREUSEALL:
2930 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2932 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2931
2933
2932 if len(destrevlog):
2934 if len(destrevlog):
2933 raise ValueError(_('destination revlog is not empty'))
2935 raise ValueError(_('destination revlog is not empty'))
2934
2936
2935 if getattr(self, 'filteredrevs', None):
2937 if getattr(self, 'filteredrevs', None):
2936 raise ValueError(_('source revlog has filtered revisions'))
2938 raise ValueError(_('source revlog has filtered revisions'))
2937 if getattr(destrevlog, 'filteredrevs', None):
2939 if getattr(destrevlog, 'filteredrevs', None):
2938 raise ValueError(_('destination revlog has filtered revisions'))
2940 raise ValueError(_('destination revlog has filtered revisions'))
2939
2941
2940 # lazydeltabase controls whether to reuse a cached delta, if possible.
2942 # lazydeltabase controls whether to reuse a cached delta, if possible.
2941 oldlazydeltabase = destrevlog._lazydeltabase
2943 oldlazydeltabase = destrevlog._lazydeltabase
2942 oldamd = destrevlog._deltabothparents
2944 oldamd = destrevlog._deltabothparents
2943
2945
2944 try:
2946 try:
2945 if deltareuse == self.DELTAREUSEALWAYS:
2947 if deltareuse == self.DELTAREUSEALWAYS:
2946 destrevlog._lazydeltabase = True
2948 destrevlog._lazydeltabase = True
2947 elif deltareuse == self.DELTAREUSESAMEREVS:
2949 elif deltareuse == self.DELTAREUSESAMEREVS:
2948 destrevlog._lazydeltabase = False
2950 destrevlog._lazydeltabase = False
2949
2951
2950 destrevlog._deltabothparents = deltabothparents or oldamd
2952 destrevlog._deltabothparents = deltabothparents or oldamd
2951
2953
2952 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2954 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2953 self.DELTAREUSESAMEREVS)
2955 self.DELTAREUSESAMEREVS)
2954
2956
2955 deltacomputer = _deltacomputer(destrevlog)
2957 deltacomputer = _deltacomputer(destrevlog)
2956 index = self.index
2958 index = self.index
2957 for rev in self:
2959 for rev in self:
2958 entry = index[rev]
2960 entry = index[rev]
2959
2961
2960 # Some classes override linkrev to take filtered revs into
2962 # Some classes override linkrev to take filtered revs into
2961 # account. Use raw entry from index.
2963 # account. Use raw entry from index.
2962 flags = entry[0] & 0xffff
2964 flags = entry[0] & 0xffff
2963 linkrev = entry[4]
2965 linkrev = entry[4]
2964 p1 = index[entry[5]][7]
2966 p1 = index[entry[5]][7]
2965 p2 = index[entry[6]][7]
2967 p2 = index[entry[6]][7]
2966 node = entry[7]
2968 node = entry[7]
2967
2969
2968 # (Possibly) reuse the delta from the revlog if allowed and
2970 # (Possibly) reuse the delta from the revlog if allowed and
2969 # the revlog chunk is a delta.
2971 # the revlog chunk is a delta.
2970 cachedelta = None
2972 cachedelta = None
2971 rawtext = None
2973 rawtext = None
2972 if populatecachedelta:
2974 if populatecachedelta:
2973 dp = self.deltaparent(rev)
2975 dp = self.deltaparent(rev)
2974 if dp != nullrev:
2976 if dp != nullrev:
2975 cachedelta = (dp, bytes(self._chunk(rev)))
2977 cachedelta = (dp, bytes(self._chunk(rev)))
2976
2978
2977 if not cachedelta:
2979 if not cachedelta:
2978 rawtext = self.revision(rev, raw=True)
2980 rawtext = self.revision(rev, raw=True)
2979
2981
2980
2982
2981 if deltareuse == self.DELTAREUSEFULLADD:
2983 if deltareuse == self.DELTAREUSEFULLADD:
2982 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2984 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2983 cachedelta=cachedelta,
2985 cachedelta=cachedelta,
2984 node=node, flags=flags,
2986 node=node, flags=flags,
2985 deltacomputer=deltacomputer)
2987 deltacomputer=deltacomputer)
2986 else:
2988 else:
2987 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2989 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2988 checkambig=False)
2990 checkambig=False)
2989 dfh = None
2991 dfh = None
2990 if not destrevlog._inline:
2992 if not destrevlog._inline:
2991 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2993 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2992 try:
2994 try:
2993 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2995 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2994 p2, flags, cachedelta, ifh, dfh,
2996 p2, flags, cachedelta, ifh, dfh,
2995 deltacomputer=deltacomputer)
2997 deltacomputer=deltacomputer)
2996 finally:
2998 finally:
2997 if dfh:
2999 if dfh:
2998 dfh.close()
3000 dfh.close()
2999 ifh.close()
3001 ifh.close()
3000
3002
3001 if addrevisioncb:
3003 if addrevisioncb:
3002 addrevisioncb(self, rev, node)
3004 addrevisioncb(self, rev, node)
3003 finally:
3005 finally:
3004 destrevlog._lazydeltabase = oldlazydeltabase
3006 destrevlog._lazydeltabase = oldlazydeltabase
3005 destrevlog._deltabothparents = oldamd
3007 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now