##// END OF EJS Templates
sparse-revlog: fix delta validity computation...
Boris Feld -
r39179:3730b779 stable
parent child Browse files
Show More
@@ -1,2960 +1,2972
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 class _testrevlog(object):
199 class _testrevlog(object):
200 """minimalist fake revlog to use in doctests"""
200 """minimalist fake revlog to use in doctests"""
201
201
202 def __init__(self, data, density=0.5, mingap=0):
202 def __init__(self, data, density=0.5, mingap=0):
203 """data is an list of revision payload boundaries"""
203 """data is an list of revision payload boundaries"""
204 self._data = data
204 self._data = data
205 self._srdensitythreshold = density
205 self._srdensitythreshold = density
206 self._srmingapsize = mingap
206 self._srmingapsize = mingap
207
207
208 def start(self, rev):
208 def start(self, rev):
209 if rev == 0:
209 if rev == 0:
210 return 0
210 return 0
211 return self._data[rev - 1]
211 return self._data[rev - 1]
212
212
213 def end(self, rev):
213 def end(self, rev):
214 return self._data[rev]
214 return self._data[rev]
215
215
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def __len__(self):
219 def __len__(self):
220 return len(self._data)
220 return len(self._data)
221
221
222 def _trimchunk(revlog, revs, startidx, endidx=None):
222 def _trimchunk(revlog, revs, startidx, endidx=None):
223 """returns revs[startidx:endidx] without empty trailing revs
223 """returns revs[startidx:endidx] without empty trailing revs
224
224
225 Doctest Setup
225 Doctest Setup
226 >>> revlog = _testrevlog([
226 >>> revlog = _testrevlog([
227 ... 5, #0
227 ... 5, #0
228 ... 10, #1
228 ... 10, #1
229 ... 12, #2
229 ... 12, #2
230 ... 12, #3 (empty)
230 ... 12, #3 (empty)
231 ... 17, #4
231 ... 17, #4
232 ... 21, #5
232 ... 21, #5
233 ... 21, #6 (empty)
233 ... 21, #6 (empty)
234 ... ])
234 ... ])
235
235
236 Contiguous cases:
236 Contiguous cases:
237 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
237 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 [0, 1, 2, 3, 4, 5]
238 [0, 1, 2, 3, 4, 5]
239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 [0, 1, 2, 3, 4]
240 [0, 1, 2, 3, 4]
241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 [0, 1, 2]
242 [0, 1, 2]
243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 [2]
244 [2]
245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 [3, 4, 5]
246 [3, 4, 5]
247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 [3, 4]
248 [3, 4]
249
249
250 Discontiguous cases:
250 Discontiguous cases:
251 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
251 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 [1, 3, 5]
252 [1, 3, 5]
253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 [1]
254 [1]
255 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
255 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 [3, 5]
256 [3, 5]
257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 [3, 5]
258 [3, 5]
259 """
259 """
260 length = revlog.length
260 length = revlog.length
261
261
262 if endidx is None:
262 if endidx is None:
263 endidx = len(revs)
263 endidx = len(revs)
264
264
265 # Trim empty revs at the end, but never the very first revision of a chain
265 # If we have a non-emtpy delta candidate, there are nothing to trim
266 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
266 if revs[endidx - 1] < len(revlog):
267 endidx -= 1
267 # Trim empty revs at the end, except the very first revision of a chain
268 while (endidx > 1
269 and endidx > startidx
270 and length(revs[endidx - 1]) == 0):
271 endidx -= 1
268
272
269 return revs[startidx:endidx]
273 return revs[startidx:endidx]
270
274
271 def _segmentspan(revlog, revs):
275 def _segmentspan(revlog, revs, deltainfo=None):
272 """Get the byte span of a segment of revisions
276 """Get the byte span of a segment of revisions
273
277
274 revs is a sorted array of revision numbers
278 revs is a sorted array of revision numbers
275
279
276 >>> revlog = _testrevlog([
280 >>> revlog = _testrevlog([
277 ... 5, #0
281 ... 5, #0
278 ... 10, #1
282 ... 10, #1
279 ... 12, #2
283 ... 12, #2
280 ... 12, #3 (empty)
284 ... 12, #3 (empty)
281 ... 17, #4
285 ... 17, #4
282 ... ])
286 ... ])
283
287
284 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
288 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 17
289 17
286 >>> _segmentspan(revlog, [0, 4])
290 >>> _segmentspan(revlog, [0, 4])
287 17
291 17
288 >>> _segmentspan(revlog, [3, 4])
292 >>> _segmentspan(revlog, [3, 4])
289 5
293 5
290 >>> _segmentspan(revlog, [1, 2, 3,])
294 >>> _segmentspan(revlog, [1, 2, 3,])
291 7
295 7
292 >>> _segmentspan(revlog, [1, 3])
296 >>> _segmentspan(revlog, [1, 3])
293 7
297 7
294 """
298 """
295 if not revs:
299 if not revs:
296 return 0
300 return 0
297 return revlog.end(revs[-1]) - revlog.start(revs[0])
301 if deltainfo is not None and len(revlog) <= revs[-1]:
302 if len(revs) == 1:
303 return deltainfo.deltalen
304 offset = revlog.end(len(revlog) - 1)
305 end = deltainfo.deltalen + offset
306 else:
307 end = revlog.end(revs[-1])
308 return end - revlog.start(revs[0])
298
309
299 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
310 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 """slice revs to reduce the amount of unrelated data to be read from disk.
311 """slice revs to reduce the amount of unrelated data to be read from disk.
301
312
302 ``revs`` is sliced into groups that should be read in one time.
313 ``revs`` is sliced into groups that should be read in one time.
303 Assume that revs are sorted.
314 Assume that revs are sorted.
304
315
305 The initial chunk is sliced until the overall density (payload/chunks-span
316 The initial chunk is sliced until the overall density (payload/chunks-span
306 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
317 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 `revlog._srmingapsize` is skipped.
318 `revlog._srmingapsize` is skipped.
308
319
309 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
320 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 For consistency with other slicing choice, this limit won't go lower than
321 For consistency with other slicing choice, this limit won't go lower than
311 `revlog._srmingapsize`.
322 `revlog._srmingapsize`.
312
323
313 If individual revisions chunk are larger than this limit, they will still
324 If individual revisions chunk are larger than this limit, they will still
314 be raised individually.
325 be raised individually.
315
326
316 >>> revlog = _testrevlog([
327 >>> revlog = _testrevlog([
317 ... 5, #00 (5)
328 ... 5, #00 (5)
318 ... 10, #01 (5)
329 ... 10, #01 (5)
319 ... 12, #02 (2)
330 ... 12, #02 (2)
320 ... 12, #03 (empty)
331 ... 12, #03 (empty)
321 ... 27, #04 (15)
332 ... 27, #04 (15)
322 ... 31, #05 (4)
333 ... 31, #05 (4)
323 ... 31, #06 (empty)
334 ... 31, #06 (empty)
324 ... 42, #07 (11)
335 ... 42, #07 (11)
325 ... 47, #08 (5)
336 ... 47, #08 (5)
326 ... 47, #09 (empty)
337 ... 47, #09 (empty)
327 ... 48, #10 (1)
338 ... 48, #10 (1)
328 ... 51, #11 (3)
339 ... 51, #11 (3)
329 ... 74, #12 (23)
340 ... 74, #12 (23)
330 ... 85, #13 (11)
341 ... 85, #13 (11)
331 ... 86, #14 (1)
342 ... 86, #14 (1)
332 ... 91, #15 (5)
343 ... 91, #15 (5)
333 ... ])
344 ... ])
334
345
335 >>> list(_slicechunk(revlog, list(range(16))))
346 >>> list(_slicechunk(revlog, list(range(16))))
336 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 >>> list(_slicechunk(revlog, [0, 15]))
348 >>> list(_slicechunk(revlog, [0, 15]))
338 [[0], [15]]
349 [[0], [15]]
339 >>> list(_slicechunk(revlog, [0, 11, 15]))
350 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 [[0], [11], [15]]
351 [[0], [11], [15]]
341 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
352 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 [[0], [11, 13, 15]]
353 [[0], [11, 13, 15]]
343 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
345
356
346 Slicing with a maximum chunk size
357 Slicing with a maximum chunk size
347 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
358 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 [[0], [11], [13], [15]]
359 [[0], [11], [13], [15]]
349 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
360 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 [[0], [11], [13, 15]]
361 [[0], [11], [13, 15]]
351 """
362 """
352 if targetsize is not None:
363 if targetsize is not None:
353 targetsize = max(targetsize, revlog._srmingapsize)
364 targetsize = max(targetsize, revlog._srmingapsize)
354 # targetsize should not be specified when evaluating delta candidates:
365 # targetsize should not be specified when evaluating delta candidates:
355 # * targetsize is used to ensure we stay within specification when reading,
366 # * targetsize is used to ensure we stay within specification when reading,
356 # * deltainfo is used to pick are good delta chain when writing.
367 # * deltainfo is used to pick are good delta chain when writing.
357 if not (deltainfo is None or targetsize is None):
368 if not (deltainfo is None or targetsize is None):
358 msg = 'cannot use `targetsize` with a `deltainfo`'
369 msg = 'cannot use `targetsize` with a `deltainfo`'
359 raise error.ProgrammingError(msg)
370 raise error.ProgrammingError(msg)
360 for chunk in _slicechunktodensity(revlog, revs,
371 for chunk in _slicechunktodensity(revlog, revs,
361 deltainfo,
372 deltainfo,
362 revlog._srdensitythreshold,
373 revlog._srdensitythreshold,
363 revlog._srmingapsize):
374 revlog._srmingapsize):
364 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
375 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 yield subchunk
376 yield subchunk
366
377
367 def _slicechunktosize(revlog, revs, targetsize=None):
378 def _slicechunktosize(revlog, revs, targetsize=None):
368 """slice revs to match the target size
379 """slice revs to match the target size
369
380
370 This is intended to be used on chunk that density slicing selected by that
381 This is intended to be used on chunk that density slicing selected by that
371 are still too large compared to the read garantee of revlog. This might
382 are still too large compared to the read garantee of revlog. This might
372 happens when "minimal gap size" interrupted the slicing or when chain are
383 happens when "minimal gap size" interrupted the slicing or when chain are
373 built in a way that create large blocks next to each other.
384 built in a way that create large blocks next to each other.
374
385
375 >>> revlog = _testrevlog([
386 >>> revlog = _testrevlog([
376 ... 3, #0 (3)
387 ... 3, #0 (3)
377 ... 5, #1 (2)
388 ... 5, #1 (2)
378 ... 6, #2 (1)
389 ... 6, #2 (1)
379 ... 8, #3 (2)
390 ... 8, #3 (2)
380 ... 8, #4 (empty)
391 ... 8, #4 (empty)
381 ... 11, #5 (3)
392 ... 11, #5 (3)
382 ... 12, #6 (1)
393 ... 12, #6 (1)
383 ... 13, #7 (1)
394 ... 13, #7 (1)
384 ... 14, #8 (1)
395 ... 14, #8 (1)
385 ... ])
396 ... ])
386
397
387 Cases where chunk is already small enough
398 Cases where chunk is already small enough
388 >>> list(_slicechunktosize(revlog, [0], 3))
399 >>> list(_slicechunktosize(revlog, [0], 3))
389 [[0]]
400 [[0]]
390 >>> list(_slicechunktosize(revlog, [6, 7], 3))
401 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 [[6, 7]]
402 [[6, 7]]
392 >>> list(_slicechunktosize(revlog, [0], None))
403 >>> list(_slicechunktosize(revlog, [0], None))
393 [[0]]
404 [[0]]
394 >>> list(_slicechunktosize(revlog, [6, 7], None))
405 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 [[6, 7]]
406 [[6, 7]]
396
407
397 cases where we need actual slicing
408 cases where we need actual slicing
398 >>> list(_slicechunktosize(revlog, [0, 1], 3))
409 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 [[0], [1]]
410 [[0], [1]]
400 >>> list(_slicechunktosize(revlog, [1, 3], 3))
411 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 [[1], [3]]
412 [[1], [3]]
402 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
413 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 [[1, 2], [3]]
414 [[1, 2], [3]]
404 >>> list(_slicechunktosize(revlog, [3, 5], 3))
415 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 [[3], [5]]
416 [[3], [5]]
406 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
417 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 [[3], [5]]
418 [[3], [5]]
408 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
419 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 [[5], [6, 7, 8]]
420 [[5], [6, 7, 8]]
410 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
421 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 [[0], [1, 2], [3], [5], [6, 7, 8]]
422 [[0], [1, 2], [3], [5], [6, 7, 8]]
412
423
413 Case with too large individual chunk (must return valid chunk)
424 Case with too large individual chunk (must return valid chunk)
414 >>> list(_slicechunktosize(revlog, [0, 1], 2))
425 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 [[0], [1]]
426 [[0], [1]]
416 >>> list(_slicechunktosize(revlog, [1, 3], 1))
427 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 [[1], [3]]
428 [[1], [3]]
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
429 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 [[3], [5]]
430 [[3], [5]]
420 """
431 """
421 assert targetsize is None or 0 <= targetsize
432 assert targetsize is None or 0 <= targetsize
422 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
433 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 yield revs
434 yield revs
424 return
435 return
425
436
426 startrevidx = 0
437 startrevidx = 0
427 startdata = revlog.start(revs[0])
438 startdata = revlog.start(revs[0])
428 endrevidx = 0
439 endrevidx = 0
429 iterrevs = enumerate(revs)
440 iterrevs = enumerate(revs)
430 next(iterrevs) # skip first rev.
441 next(iterrevs) # skip first rev.
431 for idx, r in iterrevs:
442 for idx, r in iterrevs:
432 span = revlog.end(r) - startdata
443 span = revlog.end(r) - startdata
433 if span <= targetsize:
444 if span <= targetsize:
434 endrevidx = idx
445 endrevidx = idx
435 else:
446 else:
436 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
447 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 if chunk:
448 if chunk:
438 yield chunk
449 yield chunk
439 startrevidx = idx
450 startrevidx = idx
440 startdata = revlog.start(r)
451 startdata = revlog.start(r)
441 endrevidx = idx
452 endrevidx = idx
442 yield _trimchunk(revlog, revs, startrevidx)
453 yield _trimchunk(revlog, revs, startrevidx)
443
454
444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
455 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 mingapsize=0):
456 mingapsize=0):
446 """slice revs to reduce the amount of unrelated data to be read from disk.
457 """slice revs to reduce the amount of unrelated data to be read from disk.
447
458
448 ``revs`` is sliced into groups that should be read in one time.
459 ``revs`` is sliced into groups that should be read in one time.
449 Assume that revs are sorted.
460 Assume that revs are sorted.
450
461
451 ``deltainfo`` is a _deltainfo instance of a revision that we would append
462 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 to the top of the revlog.
463 to the top of the revlog.
453
464
454 The initial chunk is sliced until the overall density (payload/chunks-span
465 The initial chunk is sliced until the overall density (payload/chunks-span
455 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
466 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 skipped.
467 skipped.
457
468
458 >>> revlog = _testrevlog([
469 >>> revlog = _testrevlog([
459 ... 5, #00 (5)
470 ... 5, #00 (5)
460 ... 10, #01 (5)
471 ... 10, #01 (5)
461 ... 12, #02 (2)
472 ... 12, #02 (2)
462 ... 12, #03 (empty)
473 ... 12, #03 (empty)
463 ... 27, #04 (15)
474 ... 27, #04 (15)
464 ... 31, #05 (4)
475 ... 31, #05 (4)
465 ... 31, #06 (empty)
476 ... 31, #06 (empty)
466 ... 42, #07 (11)
477 ... 42, #07 (11)
467 ... 47, #08 (5)
478 ... 47, #08 (5)
468 ... 47, #09 (empty)
479 ... 47, #09 (empty)
469 ... 48, #10 (1)
480 ... 48, #10 (1)
470 ... 51, #11 (3)
481 ... 51, #11 (3)
471 ... 74, #12 (23)
482 ... 74, #12 (23)
472 ... 85, #13 (11)
483 ... 85, #13 (11)
473 ... 86, #14 (1)
484 ... 86, #14 (1)
474 ... 91, #15 (5)
485 ... 91, #15 (5)
475 ... ])
486 ... ])
476
487
477 >>> list(_slicechunktodensity(revlog, list(range(16))))
488 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
489 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 >>> list(_slicechunktodensity(revlog, [0, 15]))
490 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 [[0], [15]]
491 [[0], [15]]
481 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
492 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 [[0], [11], [15]]
493 [[0], [11], [15]]
483 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
494 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 [[0], [11, 13, 15]]
495 [[0], [11, 13, 15]]
485 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
496 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 [[1, 2], [5, 8, 10, 11], [14]]
497 [[1, 2], [5, 8, 10, 11], [14]]
487 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
498 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 ... mingapsize=20))
499 ... mingapsize=20))
489 [[1, 2, 3, 5, 8, 10, 11], [14]]
500 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
501 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 ... targetdensity=0.95))
502 ... targetdensity=0.95))
492 [[1, 2], [5], [8, 10, 11], [14]]
503 [[1, 2], [5], [8, 10, 11], [14]]
493 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
504 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 ... targetdensity=0.95, mingapsize=12))
505 ... targetdensity=0.95, mingapsize=12))
495 [[1, 2], [5, 8, 10, 11], [14]]
506 [[1, 2], [5, 8, 10, 11], [14]]
496 """
507 """
497 start = revlog.start
508 start = revlog.start
498 length = revlog.length
509 length = revlog.length
499
510
500 if len(revs) <= 1:
511 if len(revs) <= 1:
501 yield revs
512 yield revs
502 return
513 return
503
514
504 nextrev = len(revlog)
515 nextrev = len(revlog)
505 nextoffset = revlog.end(nextrev - 1)
516 nextoffset = revlog.end(nextrev - 1)
506
517
507 if deltainfo is None:
518 if deltainfo is None:
508 deltachainspan = _segmentspan(revlog, revs)
519 deltachainspan = _segmentspan(revlog, revs)
509 chainpayload = sum(length(r) for r in revs)
520 chainpayload = sum(length(r) for r in revs)
510 else:
521 else:
511 deltachainspan = deltainfo.distance
522 deltachainspan = deltainfo.distance
512 chainpayload = deltainfo.compresseddeltalen
523 chainpayload = deltainfo.compresseddeltalen
513
524
514 if deltachainspan < mingapsize:
525 if deltachainspan < mingapsize:
515 yield revs
526 yield revs
516 return
527 return
517
528
518 readdata = deltachainspan
529 readdata = deltachainspan
519
530
520 if deltachainspan:
531 if deltachainspan:
521 density = chainpayload / float(deltachainspan)
532 density = chainpayload / float(deltachainspan)
522 else:
533 else:
523 density = 1.0
534 density = 1.0
524
535
525 if density >= targetdensity:
536 if density >= targetdensity:
526 yield revs
537 yield revs
527 return
538 return
528
539
529 if deltainfo is not None:
540 if deltainfo is not None and deltainfo.deltalen:
530 revs = list(revs)
541 revs = list(revs)
531 revs.append(nextrev)
542 revs.append(nextrev)
532
543
533 # Store the gaps in a heap to have them sorted by decreasing size
544 # Store the gaps in a heap to have them sorted by decreasing size
534 gapsheap = []
545 gapsheap = []
535 heapq.heapify(gapsheap)
546 heapq.heapify(gapsheap)
536 prevend = None
547 prevend = None
537 for i, rev in enumerate(revs):
548 for i, rev in enumerate(revs):
538 if rev < nextrev:
549 if rev < nextrev:
539 revstart = start(rev)
550 revstart = start(rev)
540 revlen = length(rev)
551 revlen = length(rev)
541 else:
552 else:
542 revstart = nextoffset
553 revstart = nextoffset
543 revlen = deltainfo.deltalen
554 revlen = deltainfo.deltalen
544
555
545 # Skip empty revisions to form larger holes
556 # Skip empty revisions to form larger holes
546 if revlen == 0:
557 if revlen == 0:
547 continue
558 continue
548
559
549 if prevend is not None:
560 if prevend is not None:
550 gapsize = revstart - prevend
561 gapsize = revstart - prevend
551 # only consider holes that are large enough
562 # only consider holes that are large enough
552 if gapsize > mingapsize:
563 if gapsize > mingapsize:
553 heapq.heappush(gapsheap, (-gapsize, i))
564 heapq.heappush(gapsheap, (-gapsize, i))
554
565
555 prevend = revstart + revlen
566 prevend = revstart + revlen
556
567
557 # Collect the indices of the largest holes until the density is acceptable
568 # Collect the indices of the largest holes until the density is acceptable
558 indicesheap = []
569 indicesheap = []
559 heapq.heapify(indicesheap)
570 heapq.heapify(indicesheap)
560 while gapsheap and density < targetdensity:
571 while gapsheap and density < targetdensity:
561 oppgapsize, gapidx = heapq.heappop(gapsheap)
572 oppgapsize, gapidx = heapq.heappop(gapsheap)
562
573
563 heapq.heappush(indicesheap, gapidx)
574 heapq.heappush(indicesheap, gapidx)
564
575
565 # the gap sizes are stored as negatives to be sorted decreasingly
576 # the gap sizes are stored as negatives to be sorted decreasingly
566 # by the heap
577 # by the heap
567 readdata -= (-oppgapsize)
578 readdata -= (-oppgapsize)
568 if readdata > 0:
579 if readdata > 0:
569 density = chainpayload / float(readdata)
580 density = chainpayload / float(readdata)
570 else:
581 else:
571 density = 1.0
582 density = 1.0
572
583
573 # Cut the revs at collected indices
584 # Cut the revs at collected indices
574 previdx = 0
585 previdx = 0
575 while indicesheap:
586 while indicesheap:
576 idx = heapq.heappop(indicesheap)
587 idx = heapq.heappop(indicesheap)
577
588
578 chunk = _trimchunk(revlog, revs, previdx, idx)
589 chunk = _trimchunk(revlog, revs, previdx, idx)
579 if chunk:
590 if chunk:
580 yield chunk
591 yield chunk
581
592
582 previdx = idx
593 previdx = idx
583
594
584 chunk = _trimchunk(revlog, revs, previdx)
595 chunk = _trimchunk(revlog, revs, previdx)
585 if chunk:
596 if chunk:
586 yield chunk
597 yield chunk
587
598
588 @attr.s(slots=True, frozen=True)
599 @attr.s(slots=True, frozen=True)
589 class _deltainfo(object):
600 class _deltainfo(object):
590 distance = attr.ib()
601 distance = attr.ib()
591 deltalen = attr.ib()
602 deltalen = attr.ib()
592 data = attr.ib()
603 data = attr.ib()
593 base = attr.ib()
604 base = attr.ib()
594 chainbase = attr.ib()
605 chainbase = attr.ib()
595 chainlen = attr.ib()
606 chainlen = attr.ib()
596 compresseddeltalen = attr.ib()
607 compresseddeltalen = attr.ib()
597
608
598 class _deltacomputer(object):
609 class _deltacomputer(object):
599 def __init__(self, revlog):
610 def __init__(self, revlog):
600 self.revlog = revlog
611 self.revlog = revlog
601
612
602 def _getcandidaterevs(self, p1, p2, cachedelta):
613 def _getcandidaterevs(self, p1, p2, cachedelta):
603 """
614 """
604 Provides revisions that present an interest to be diffed against,
615 Provides revisions that present an interest to be diffed against,
605 grouped by level of easiness.
616 grouped by level of easiness.
606 """
617 """
607 revlog = self.revlog
618 revlog = self.revlog
608 gdelta = revlog._generaldelta
619 gdelta = revlog._generaldelta
609 curr = len(revlog)
620 curr = len(revlog)
610 prev = curr - 1
621 prev = curr - 1
611 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
622 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612
623
613 # should we try to build a delta?
624 # should we try to build a delta?
614 if prev != nullrev and revlog.storedeltachains:
625 if prev != nullrev and revlog.storedeltachains:
615 tested = set()
626 tested = set()
616 # This condition is true most of the time when processing
627 # This condition is true most of the time when processing
617 # changegroup data into a generaldelta repo. The only time it
628 # changegroup data into a generaldelta repo. The only time it
618 # isn't true is if this is the first revision in a delta chain
629 # isn't true is if this is the first revision in a delta chain
619 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
630 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 if cachedelta and gdelta and revlog._lazydeltabase:
631 if cachedelta and gdelta and revlog._lazydeltabase:
621 # Assume what we received from the server is a good choice
632 # Assume what we received from the server is a good choice
622 # build delta will reuse the cache
633 # build delta will reuse the cache
623 yield (cachedelta[0],)
634 yield (cachedelta[0],)
624 tested.add(cachedelta[0])
635 tested.add(cachedelta[0])
625
636
626 if gdelta:
637 if gdelta:
627 # exclude already lazy tested base if any
638 # exclude already lazy tested base if any
628 parents = [p for p in (p1r, p2r)
639 parents = [p for p in (p1r, p2r)
629 if p != nullrev and p not in tested]
640 if p != nullrev and p not in tested]
630
641
631 if not revlog._deltabothparents and len(parents) == 2:
642 if not revlog._deltabothparents and len(parents) == 2:
632 parents.sort()
643 parents.sort()
633 # To minimize the chance of having to build a fulltext,
644 # To minimize the chance of having to build a fulltext,
634 # pick first whichever parent is closest to us (max rev)
645 # pick first whichever parent is closest to us (max rev)
635 yield (parents[1],)
646 yield (parents[1],)
636 # then the other one (min rev) if the first did not fit
647 # then the other one (min rev) if the first did not fit
637 yield (parents[0],)
648 yield (parents[0],)
638 tested.update(parents)
649 tested.update(parents)
639 elif len(parents) > 0:
650 elif len(parents) > 0:
640 # Test all parents (1 or 2), and keep the best candidate
651 # Test all parents (1 or 2), and keep the best candidate
641 yield parents
652 yield parents
642 tested.update(parents)
653 tested.update(parents)
643
654
644 if prev not in tested:
655 if prev not in tested:
645 # other approach failed try against prev to hopefully save us a
656 # other approach failed try against prev to hopefully save us a
646 # fulltext.
657 # fulltext.
647 yield (prev,)
658 yield (prev,)
648 tested.add(prev)
659 tested.add(prev)
649
660
650 def buildtext(self, revinfo, fh):
661 def buildtext(self, revinfo, fh):
651 """Builds a fulltext version of a revision
662 """Builds a fulltext version of a revision
652
663
653 revinfo: _revisioninfo instance that contains all needed info
664 revinfo: _revisioninfo instance that contains all needed info
654 fh: file handle to either the .i or the .d revlog file,
665 fh: file handle to either the .i or the .d revlog file,
655 depending on whether it is inlined or not
666 depending on whether it is inlined or not
656 """
667 """
657 btext = revinfo.btext
668 btext = revinfo.btext
658 if btext[0] is not None:
669 if btext[0] is not None:
659 return btext[0]
670 return btext[0]
660
671
661 revlog = self.revlog
672 revlog = self.revlog
662 cachedelta = revinfo.cachedelta
673 cachedelta = revinfo.cachedelta
663 flags = revinfo.flags
674 flags = revinfo.flags
664 node = revinfo.node
675 node = revinfo.node
665
676
666 baserev = cachedelta[0]
677 baserev = cachedelta[0]
667 delta = cachedelta[1]
678 delta = cachedelta[1]
668 # special case deltas which replace entire base; no need to decode
679 # special case deltas which replace entire base; no need to decode
669 # base revision. this neatly avoids censored bases, which throw when
680 # base revision. this neatly avoids censored bases, which throw when
670 # they're decoded.
681 # they're decoded.
671 hlen = struct.calcsize(">lll")
682 hlen = struct.calcsize(">lll")
672 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
683 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 len(delta) - hlen):
684 len(delta) - hlen):
674 btext[0] = delta[hlen:]
685 btext[0] = delta[hlen:]
675 else:
686 else:
676 # deltabase is rawtext before changed by flag processors, which is
687 # deltabase is rawtext before changed by flag processors, which is
677 # equivalent to non-raw text
688 # equivalent to non-raw text
678 basetext = revlog.revision(baserev, _df=fh, raw=False)
689 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 btext[0] = mdiff.patch(basetext, delta)
690 btext[0] = mdiff.patch(basetext, delta)
680
691
681 try:
692 try:
682 res = revlog._processflags(btext[0], flags, 'read', raw=True)
693 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 btext[0], validatehash = res
694 btext[0], validatehash = res
684 if validatehash:
695 if validatehash:
685 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
696 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 if flags & REVIDX_ISCENSORED:
697 if flags & REVIDX_ISCENSORED:
687 raise RevlogError(_('node %s is not censored') % node)
698 raise RevlogError(_('node %s is not censored') % node)
688 except CensoredNodeError:
699 except CensoredNodeError:
689 # must pass the censored index flag to add censored revisions
700 # must pass the censored index flag to add censored revisions
690 if not flags & REVIDX_ISCENSORED:
701 if not flags & REVIDX_ISCENSORED:
691 raise
702 raise
692 return btext[0]
703 return btext[0]
693
704
694 def _builddeltadiff(self, base, revinfo, fh):
705 def _builddeltadiff(self, base, revinfo, fh):
695 revlog = self.revlog
706 revlog = self.revlog
696 t = self.buildtext(revinfo, fh)
707 t = self.buildtext(revinfo, fh)
697 if revlog.iscensored(base):
708 if revlog.iscensored(base):
698 # deltas based on a censored revision must replace the
709 # deltas based on a censored revision must replace the
699 # full content in one patch, so delta works everywhere
710 # full content in one patch, so delta works everywhere
700 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
711 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 delta = header + t
712 delta = header + t
702 else:
713 else:
703 ptext = revlog.revision(base, _df=fh, raw=True)
714 ptext = revlog.revision(base, _df=fh, raw=True)
704 delta = mdiff.textdiff(ptext, t)
715 delta = mdiff.textdiff(ptext, t)
705
716
706 return delta
717 return delta
707
718
708 def _builddeltainfo(self, revinfo, base, fh):
719 def _builddeltainfo(self, revinfo, base, fh):
709 # can we use the cached delta?
720 # can we use the cached delta?
710 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
721 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 delta = revinfo.cachedelta[1]
722 delta = revinfo.cachedelta[1]
712 else:
723 else:
713 delta = self._builddeltadiff(base, revinfo, fh)
724 delta = self._builddeltadiff(base, revinfo, fh)
714 revlog = self.revlog
725 revlog = self.revlog
715 header, data = revlog.compress(delta)
726 header, data = revlog.compress(delta)
716 deltalen = len(header) + len(data)
727 deltalen = len(header) + len(data)
717 chainbase = revlog.chainbase(base)
728 chainbase = revlog.chainbase(base)
718 offset = revlog.end(len(revlog) - 1)
729 offset = revlog.end(len(revlog) - 1)
719 dist = deltalen + offset - revlog.start(chainbase)
730 dist = deltalen + offset - revlog.start(chainbase)
720 if revlog._generaldelta:
731 if revlog._generaldelta:
721 deltabase = base
732 deltabase = base
722 else:
733 else:
723 deltabase = chainbase
734 deltabase = chainbase
724 chainlen, compresseddeltalen = revlog._chaininfo(base)
735 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen += 1
736 chainlen += 1
726 compresseddeltalen += deltalen
737 compresseddeltalen += deltalen
727 return _deltainfo(dist, deltalen, (header, data), deltabase,
738 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 chainbase, chainlen, compresseddeltalen)
739 chainbase, chainlen, compresseddeltalen)
729
740
730 def finddeltainfo(self, revinfo, fh):
741 def finddeltainfo(self, revinfo, fh):
731 """Find an acceptable delta against a candidate revision
742 """Find an acceptable delta against a candidate revision
732
743
733 revinfo: information about the revision (instance of _revisioninfo)
744 revinfo: information about the revision (instance of _revisioninfo)
734 fh: file handle to either the .i or the .d revlog file,
745 fh: file handle to either the .i or the .d revlog file,
735 depending on whether it is inlined or not
746 depending on whether it is inlined or not
736
747
737 Returns the first acceptable candidate revision, as ordered by
748 Returns the first acceptable candidate revision, as ordered by
738 _getcandidaterevs
749 _getcandidaterevs
739 """
750 """
740 cachedelta = revinfo.cachedelta
751 cachedelta = revinfo.cachedelta
741 p1 = revinfo.p1
752 p1 = revinfo.p1
742 p2 = revinfo.p2
753 p2 = revinfo.p2
743 revlog = self.revlog
754 revlog = self.revlog
744
755
745 deltainfo = None
756 deltainfo = None
746 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
757 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
747 nominateddeltas = []
758 nominateddeltas = []
748 for candidaterev in candidaterevs:
759 for candidaterev in candidaterevs:
749 # no delta for rawtext-changing revs (see "candelta" for why)
760 # no delta for rawtext-changing revs (see "candelta" for why)
750 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
761 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
751 continue
762 continue
752 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
763 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
753 if revlog._isgooddeltainfo(candidatedelta, revinfo):
764 if revlog._isgooddeltainfo(candidatedelta, revinfo):
754 nominateddeltas.append(candidatedelta)
765 nominateddeltas.append(candidatedelta)
755 if nominateddeltas:
766 if nominateddeltas:
756 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
767 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
757 break
768 break
758
769
759 return deltainfo
770 return deltainfo
760
771
761 @attr.s(slots=True, frozen=True)
772 @attr.s(slots=True, frozen=True)
762 class _revisioninfo(object):
773 class _revisioninfo(object):
763 """Information about a revision that allows building its fulltext
774 """Information about a revision that allows building its fulltext
764 node: expected hash of the revision
775 node: expected hash of the revision
765 p1, p2: parent revs of the revision
776 p1, p2: parent revs of the revision
766 btext: built text cache consisting of a one-element list
777 btext: built text cache consisting of a one-element list
767 cachedelta: (baserev, uncompressed_delta) or None
778 cachedelta: (baserev, uncompressed_delta) or None
768 flags: flags associated to the revision storage
779 flags: flags associated to the revision storage
769
780
770 One of btext[0] or cachedelta must be set.
781 One of btext[0] or cachedelta must be set.
771 """
782 """
772 node = attr.ib()
783 node = attr.ib()
773 p1 = attr.ib()
784 p1 = attr.ib()
774 p2 = attr.ib()
785 p2 = attr.ib()
775 btext = attr.ib()
786 btext = attr.ib()
776 textlen = attr.ib()
787 textlen = attr.ib()
777 cachedelta = attr.ib()
788 cachedelta = attr.ib()
778 flags = attr.ib()
789 flags = attr.ib()
779
790
780 # index v0:
791 # index v0:
781 # 4 bytes: offset
792 # 4 bytes: offset
782 # 4 bytes: compressed length
793 # 4 bytes: compressed length
783 # 4 bytes: base rev
794 # 4 bytes: base rev
784 # 4 bytes: link rev
795 # 4 bytes: link rev
785 # 20 bytes: parent 1 nodeid
796 # 20 bytes: parent 1 nodeid
786 # 20 bytes: parent 2 nodeid
797 # 20 bytes: parent 2 nodeid
787 # 20 bytes: nodeid
798 # 20 bytes: nodeid
788 indexformatv0 = struct.Struct(">4l20s20s20s")
799 indexformatv0 = struct.Struct(">4l20s20s20s")
789 indexformatv0_pack = indexformatv0.pack
800 indexformatv0_pack = indexformatv0.pack
790 indexformatv0_unpack = indexformatv0.unpack
801 indexformatv0_unpack = indexformatv0.unpack
791
802
792 class revlogoldio(object):
803 class revlogoldio(object):
793 def __init__(self):
804 def __init__(self):
794 self.size = indexformatv0.size
805 self.size = indexformatv0.size
795
806
796 def parseindex(self, data, inline):
807 def parseindex(self, data, inline):
797 s = self.size
808 s = self.size
798 index = []
809 index = []
799 nodemap = {nullid: nullrev}
810 nodemap = {nullid: nullrev}
800 n = off = 0
811 n = off = 0
801 l = len(data)
812 l = len(data)
802 while off + s <= l:
813 while off + s <= l:
803 cur = data[off:off + s]
814 cur = data[off:off + s]
804 off += s
815 off += s
805 e = indexformatv0_unpack(cur)
816 e = indexformatv0_unpack(cur)
806 # transform to revlogv1 format
817 # transform to revlogv1 format
807 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
818 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
808 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
819 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
809 index.append(e2)
820 index.append(e2)
810 nodemap[e[6]] = n
821 nodemap[e[6]] = n
811 n += 1
822 n += 1
812
823
813 # add the magic null revision at -1
824 # add the magic null revision at -1
814 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
825 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
815
826
816 return index, nodemap, None
827 return index, nodemap, None
817
828
818 def packentry(self, entry, node, version, rev):
829 def packentry(self, entry, node, version, rev):
819 if gettype(entry[0]):
830 if gettype(entry[0]):
820 raise RevlogError(_('index entry flags need revlog version 1'))
831 raise RevlogError(_('index entry flags need revlog version 1'))
821 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
832 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
822 node(entry[5]), node(entry[6]), entry[7])
833 node(entry[5]), node(entry[6]), entry[7])
823 return indexformatv0_pack(*e2)
834 return indexformatv0_pack(*e2)
824
835
825 # index ng:
836 # index ng:
826 # 6 bytes: offset
837 # 6 bytes: offset
827 # 2 bytes: flags
838 # 2 bytes: flags
828 # 4 bytes: compressed length
839 # 4 bytes: compressed length
829 # 4 bytes: uncompressed length
840 # 4 bytes: uncompressed length
830 # 4 bytes: base rev
841 # 4 bytes: base rev
831 # 4 bytes: link rev
842 # 4 bytes: link rev
832 # 4 bytes: parent 1 rev
843 # 4 bytes: parent 1 rev
833 # 4 bytes: parent 2 rev
844 # 4 bytes: parent 2 rev
834 # 32 bytes: nodeid
845 # 32 bytes: nodeid
835 indexformatng = struct.Struct(">Qiiiiii20s12x")
846 indexformatng = struct.Struct(">Qiiiiii20s12x")
836 indexformatng_pack = indexformatng.pack
847 indexformatng_pack = indexformatng.pack
837 versionformat = struct.Struct(">I")
848 versionformat = struct.Struct(">I")
838 versionformat_pack = versionformat.pack
849 versionformat_pack = versionformat.pack
839 versionformat_unpack = versionformat.unpack
850 versionformat_unpack = versionformat.unpack
840
851
841 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
852 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
842 # signed integer)
853 # signed integer)
843 _maxentrysize = 0x7fffffff
854 _maxentrysize = 0x7fffffff
844
855
845 class revlogio(object):
856 class revlogio(object):
846 def __init__(self):
857 def __init__(self):
847 self.size = indexformatng.size
858 self.size = indexformatng.size
848
859
849 def parseindex(self, data, inline):
860 def parseindex(self, data, inline):
850 # call the C implementation to parse the index data
861 # call the C implementation to parse the index data
851 index, cache = parsers.parse_index2(data, inline)
862 index, cache = parsers.parse_index2(data, inline)
852 return index, getattr(index, 'nodemap', None), cache
863 return index, getattr(index, 'nodemap', None), cache
853
864
854 def packentry(self, entry, node, version, rev):
865 def packentry(self, entry, node, version, rev):
855 p = indexformatng_pack(*entry)
866 p = indexformatng_pack(*entry)
856 if rev == 0:
867 if rev == 0:
857 p = versionformat_pack(version) + p[4:]
868 p = versionformat_pack(version) + p[4:]
858 return p
869 return p
859
870
860 class revlog(object):
871 class revlog(object):
861 """
872 """
862 the underlying revision storage object
873 the underlying revision storage object
863
874
864 A revlog consists of two parts, an index and the revision data.
875 A revlog consists of two parts, an index and the revision data.
865
876
866 The index is a file with a fixed record size containing
877 The index is a file with a fixed record size containing
867 information on each revision, including its nodeid (hash), the
878 information on each revision, including its nodeid (hash), the
868 nodeids of its parents, the position and offset of its data within
879 nodeids of its parents, the position and offset of its data within
869 the data file, and the revision it's based on. Finally, each entry
880 the data file, and the revision it's based on. Finally, each entry
870 contains a linkrev entry that can serve as a pointer to external
881 contains a linkrev entry that can serve as a pointer to external
871 data.
882 data.
872
883
873 The revision data itself is a linear collection of data chunks.
884 The revision data itself is a linear collection of data chunks.
874 Each chunk represents a revision and is usually represented as a
885 Each chunk represents a revision and is usually represented as a
875 delta against the previous chunk. To bound lookup time, runs of
886 delta against the previous chunk. To bound lookup time, runs of
876 deltas are limited to about 2 times the length of the original
887 deltas are limited to about 2 times the length of the original
877 version data. This makes retrieval of a version proportional to
888 version data. This makes retrieval of a version proportional to
878 its size, or O(1) relative to the number of revisions.
889 its size, or O(1) relative to the number of revisions.
879
890
880 Both pieces of the revlog are written to in an append-only
891 Both pieces of the revlog are written to in an append-only
881 fashion, which means we never need to rewrite a file to insert or
892 fashion, which means we never need to rewrite a file to insert or
882 remove data, and can use some simple techniques to avoid the need
893 remove data, and can use some simple techniques to avoid the need
883 for locking while reading.
894 for locking while reading.
884
895
885 If checkambig, indexfile is opened with checkambig=True at
896 If checkambig, indexfile is opened with checkambig=True at
886 writing, to avoid file stat ambiguity.
897 writing, to avoid file stat ambiguity.
887
898
888 If mmaplargeindex is True, and an mmapindexthreshold is set, the
899 If mmaplargeindex is True, and an mmapindexthreshold is set, the
889 index will be mmapped rather than read if it is larger than the
900 index will be mmapped rather than read if it is larger than the
890 configured threshold.
901 configured threshold.
891
902
892 If censorable is True, the revlog can have censored revisions.
903 If censorable is True, the revlog can have censored revisions.
893 """
904 """
894 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
905 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
895 mmaplargeindex=False, censorable=False):
906 mmaplargeindex=False, censorable=False):
896 """
907 """
897 create a revlog object
908 create a revlog object
898
909
899 opener is a function that abstracts the file opening operation
910 opener is a function that abstracts the file opening operation
900 and can be used to implement COW semantics or the like.
911 and can be used to implement COW semantics or the like.
901 """
912 """
902 self.indexfile = indexfile
913 self.indexfile = indexfile
903 self.datafile = datafile or (indexfile[:-2] + ".d")
914 self.datafile = datafile or (indexfile[:-2] + ".d")
904 self.opener = opener
915 self.opener = opener
905 # When True, indexfile is opened with checkambig=True at writing, to
916 # When True, indexfile is opened with checkambig=True at writing, to
906 # avoid file stat ambiguity.
917 # avoid file stat ambiguity.
907 self._checkambig = checkambig
918 self._checkambig = checkambig
908 self._censorable = censorable
919 self._censorable = censorable
909 # 3-tuple of (node, rev, text) for a raw revision.
920 # 3-tuple of (node, rev, text) for a raw revision.
910 self._cache = None
921 self._cache = None
911 # Maps rev to chain base rev.
922 # Maps rev to chain base rev.
912 self._chainbasecache = util.lrucachedict(100)
923 self._chainbasecache = util.lrucachedict(100)
913 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
924 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
914 self._chunkcache = (0, '')
925 self._chunkcache = (0, '')
915 # How much data to read and cache into the raw revlog data cache.
926 # How much data to read and cache into the raw revlog data cache.
916 self._chunkcachesize = 65536
927 self._chunkcachesize = 65536
917 self._maxchainlen = None
928 self._maxchainlen = None
918 self._deltabothparents = True
929 self._deltabothparents = True
919 self.index = []
930 self.index = []
920 # Mapping of partial identifiers to full nodes.
931 # Mapping of partial identifiers to full nodes.
921 self._pcache = {}
932 self._pcache = {}
922 # Mapping of revision integer to full node.
933 # Mapping of revision integer to full node.
923 self._nodecache = {nullid: nullrev}
934 self._nodecache = {nullid: nullrev}
924 self._nodepos = None
935 self._nodepos = None
925 self._compengine = 'zlib'
936 self._compengine = 'zlib'
926 self._maxdeltachainspan = -1
937 self._maxdeltachainspan = -1
927 self._withsparseread = False
938 self._withsparseread = False
928 self._sparserevlog = False
939 self._sparserevlog = False
929 self._srdensitythreshold = 0.50
940 self._srdensitythreshold = 0.50
930 self._srmingapsize = 262144
941 self._srmingapsize = 262144
931
942
932 mmapindexthreshold = None
943 mmapindexthreshold = None
933 v = REVLOG_DEFAULT_VERSION
944 v = REVLOG_DEFAULT_VERSION
934 opts = getattr(opener, 'options', None)
945 opts = getattr(opener, 'options', None)
935 if opts is not None:
946 if opts is not None:
936 if 'revlogv2' in opts:
947 if 'revlogv2' in opts:
937 # version 2 revlogs always use generaldelta.
948 # version 2 revlogs always use generaldelta.
938 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
949 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
939 elif 'revlogv1' in opts:
950 elif 'revlogv1' in opts:
940 if 'generaldelta' in opts:
951 if 'generaldelta' in opts:
941 v |= FLAG_GENERALDELTA
952 v |= FLAG_GENERALDELTA
942 else:
953 else:
943 v = 0
954 v = 0
944 if 'chunkcachesize' in opts:
955 if 'chunkcachesize' in opts:
945 self._chunkcachesize = opts['chunkcachesize']
956 self._chunkcachesize = opts['chunkcachesize']
946 if 'maxchainlen' in opts:
957 if 'maxchainlen' in opts:
947 self._maxchainlen = opts['maxchainlen']
958 self._maxchainlen = opts['maxchainlen']
948 if 'deltabothparents' in opts:
959 if 'deltabothparents' in opts:
949 self._deltabothparents = opts['deltabothparents']
960 self._deltabothparents = opts['deltabothparents']
950 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
961 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
951 if 'compengine' in opts:
962 if 'compengine' in opts:
952 self._compengine = opts['compengine']
963 self._compengine = opts['compengine']
953 if 'maxdeltachainspan' in opts:
964 if 'maxdeltachainspan' in opts:
954 self._maxdeltachainspan = opts['maxdeltachainspan']
965 self._maxdeltachainspan = opts['maxdeltachainspan']
955 if mmaplargeindex and 'mmapindexthreshold' in opts:
966 if mmaplargeindex and 'mmapindexthreshold' in opts:
956 mmapindexthreshold = opts['mmapindexthreshold']
967 mmapindexthreshold = opts['mmapindexthreshold']
957 self._sparserevlog = bool(opts.get('sparse-revlog', False))
968 self._sparserevlog = bool(opts.get('sparse-revlog', False))
958 withsparseread = bool(opts.get('with-sparse-read', False))
969 withsparseread = bool(opts.get('with-sparse-read', False))
959 # sparse-revlog forces sparse-read
970 # sparse-revlog forces sparse-read
960 self._withsparseread = self._sparserevlog or withsparseread
971 self._withsparseread = self._sparserevlog or withsparseread
961 if 'sparse-read-density-threshold' in opts:
972 if 'sparse-read-density-threshold' in opts:
962 self._srdensitythreshold = opts['sparse-read-density-threshold']
973 self._srdensitythreshold = opts['sparse-read-density-threshold']
963 if 'sparse-read-min-gap-size' in opts:
974 if 'sparse-read-min-gap-size' in opts:
964 self._srmingapsize = opts['sparse-read-min-gap-size']
975 self._srmingapsize = opts['sparse-read-min-gap-size']
965
976
966 if self._chunkcachesize <= 0:
977 if self._chunkcachesize <= 0:
967 raise RevlogError(_('revlog chunk cache size %r is not greater '
978 raise RevlogError(_('revlog chunk cache size %r is not greater '
968 'than 0') % self._chunkcachesize)
979 'than 0') % self._chunkcachesize)
969 elif self._chunkcachesize & (self._chunkcachesize - 1):
980 elif self._chunkcachesize & (self._chunkcachesize - 1):
970 raise RevlogError(_('revlog chunk cache size %r is not a power '
981 raise RevlogError(_('revlog chunk cache size %r is not a power '
971 'of 2') % self._chunkcachesize)
982 'of 2') % self._chunkcachesize)
972
983
973 indexdata = ''
984 indexdata = ''
974 self._initempty = True
985 self._initempty = True
975 try:
986 try:
976 with self._indexfp() as f:
987 with self._indexfp() as f:
977 if (mmapindexthreshold is not None and
988 if (mmapindexthreshold is not None and
978 self.opener.fstat(f).st_size >= mmapindexthreshold):
989 self.opener.fstat(f).st_size >= mmapindexthreshold):
979 indexdata = util.buffer(util.mmapread(f))
990 indexdata = util.buffer(util.mmapread(f))
980 else:
991 else:
981 indexdata = f.read()
992 indexdata = f.read()
982 if len(indexdata) > 0:
993 if len(indexdata) > 0:
983 v = versionformat_unpack(indexdata[:4])[0]
994 v = versionformat_unpack(indexdata[:4])[0]
984 self._initempty = False
995 self._initempty = False
985 except IOError as inst:
996 except IOError as inst:
986 if inst.errno != errno.ENOENT:
997 if inst.errno != errno.ENOENT:
987 raise
998 raise
988
999
989 self.version = v
1000 self.version = v
990 self._inline = v & FLAG_INLINE_DATA
1001 self._inline = v & FLAG_INLINE_DATA
991 self._generaldelta = v & FLAG_GENERALDELTA
1002 self._generaldelta = v & FLAG_GENERALDELTA
992 flags = v & ~0xFFFF
1003 flags = v & ~0xFFFF
993 fmt = v & 0xFFFF
1004 fmt = v & 0xFFFF
994 if fmt == REVLOGV0:
1005 if fmt == REVLOGV0:
995 if flags:
1006 if flags:
996 raise RevlogError(_('unknown flags (%#04x) in version %d '
1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
997 'revlog %s') %
1008 'revlog %s') %
998 (flags >> 16, fmt, self.indexfile))
1009 (flags >> 16, fmt, self.indexfile))
999 elif fmt == REVLOGV1:
1010 elif fmt == REVLOGV1:
1000 if flags & ~REVLOGV1_FLAGS:
1011 if flags & ~REVLOGV1_FLAGS:
1001 raise RevlogError(_('unknown flags (%#04x) in version %d '
1012 raise RevlogError(_('unknown flags (%#04x) in version %d '
1002 'revlog %s') %
1013 'revlog %s') %
1003 (flags >> 16, fmt, self.indexfile))
1014 (flags >> 16, fmt, self.indexfile))
1004 elif fmt == REVLOGV2:
1015 elif fmt == REVLOGV2:
1005 if flags & ~REVLOGV2_FLAGS:
1016 if flags & ~REVLOGV2_FLAGS:
1006 raise RevlogError(_('unknown flags (%#04x) in version %d '
1017 raise RevlogError(_('unknown flags (%#04x) in version %d '
1007 'revlog %s') %
1018 'revlog %s') %
1008 (flags >> 16, fmt, self.indexfile))
1019 (flags >> 16, fmt, self.indexfile))
1009 else:
1020 else:
1010 raise RevlogError(_('unknown version (%d) in revlog %s') %
1021 raise RevlogError(_('unknown version (%d) in revlog %s') %
1011 (fmt, self.indexfile))
1022 (fmt, self.indexfile))
1012
1023
1013 self.storedeltachains = True
1024 self.storedeltachains = True
1014
1025
1015 self._io = revlogio()
1026 self._io = revlogio()
1016 if self.version == REVLOGV0:
1027 if self.version == REVLOGV0:
1017 self._io = revlogoldio()
1028 self._io = revlogoldio()
1018 try:
1029 try:
1019 d = self._io.parseindex(indexdata, self._inline)
1030 d = self._io.parseindex(indexdata, self._inline)
1020 except (ValueError, IndexError):
1031 except (ValueError, IndexError):
1021 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1032 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1022 self.index, nodemap, self._chunkcache = d
1033 self.index, nodemap, self._chunkcache = d
1023 if nodemap is not None:
1034 if nodemap is not None:
1024 self.nodemap = self._nodecache = nodemap
1035 self.nodemap = self._nodecache = nodemap
1025 if not self._chunkcache:
1036 if not self._chunkcache:
1026 self._chunkclear()
1037 self._chunkclear()
1027 # revnum -> (chain-length, sum-delta-length)
1038 # revnum -> (chain-length, sum-delta-length)
1028 self._chaininfocache = {}
1039 self._chaininfocache = {}
1029 # revlog header -> revlog compressor
1040 # revlog header -> revlog compressor
1030 self._decompressors = {}
1041 self._decompressors = {}
1031
1042
1032 @util.propertycache
1043 @util.propertycache
1033 def _compressor(self):
1044 def _compressor(self):
1034 return util.compengines[self._compengine].revlogcompressor()
1045 return util.compengines[self._compengine].revlogcompressor()
1035
1046
1036 def _indexfp(self, mode='r'):
1047 def _indexfp(self, mode='r'):
1037 """file object for the revlog's index file"""
1048 """file object for the revlog's index file"""
1038 args = {r'mode': mode}
1049 args = {r'mode': mode}
1039 if mode != 'r':
1050 if mode != 'r':
1040 args[r'checkambig'] = self._checkambig
1051 args[r'checkambig'] = self._checkambig
1041 if mode == 'w':
1052 if mode == 'w':
1042 args[r'atomictemp'] = True
1053 args[r'atomictemp'] = True
1043 return self.opener(self.indexfile, **args)
1054 return self.opener(self.indexfile, **args)
1044
1055
1045 def _datafp(self, mode='r'):
1056 def _datafp(self, mode='r'):
1046 """file object for the revlog's data file"""
1057 """file object for the revlog's data file"""
1047 return self.opener(self.datafile, mode=mode)
1058 return self.opener(self.datafile, mode=mode)
1048
1059
1049 @contextlib.contextmanager
1060 @contextlib.contextmanager
1050 def _datareadfp(self, existingfp=None):
1061 def _datareadfp(self, existingfp=None):
1051 """file object suitable to read data"""
1062 """file object suitable to read data"""
1052 if existingfp is not None:
1063 if existingfp is not None:
1053 yield existingfp
1064 yield existingfp
1054 else:
1065 else:
1055 if self._inline:
1066 if self._inline:
1056 func = self._indexfp
1067 func = self._indexfp
1057 else:
1068 else:
1058 func = self._datafp
1069 func = self._datafp
1059 with func() as fp:
1070 with func() as fp:
1060 yield fp
1071 yield fp
1061
1072
1062 def tip(self):
1073 def tip(self):
1063 return self.node(len(self.index) - 2)
1074 return self.node(len(self.index) - 2)
1064 def __contains__(self, rev):
1075 def __contains__(self, rev):
1065 return 0 <= rev < len(self)
1076 return 0 <= rev < len(self)
1066 def __len__(self):
1077 def __len__(self):
1067 return len(self.index) - 1
1078 return len(self.index) - 1
1068 def __iter__(self):
1079 def __iter__(self):
1069 return iter(xrange(len(self)))
1080 return iter(xrange(len(self)))
1070 def revs(self, start=0, stop=None):
1081 def revs(self, start=0, stop=None):
1071 """iterate over all rev in this revlog (from start to stop)"""
1082 """iterate over all rev in this revlog (from start to stop)"""
1072 step = 1
1083 step = 1
1073 if stop is not None:
1084 if stop is not None:
1074 if start > stop:
1085 if start > stop:
1075 step = -1
1086 step = -1
1076 stop += step
1087 stop += step
1077 else:
1088 else:
1078 stop = len(self)
1089 stop = len(self)
1079 return xrange(start, stop, step)
1090 return xrange(start, stop, step)
1080
1091
1081 @util.propertycache
1092 @util.propertycache
1082 def nodemap(self):
1093 def nodemap(self):
1083 self.rev(self.node(0))
1094 self.rev(self.node(0))
1084 return self._nodecache
1095 return self._nodecache
1085
1096
1086 def hasnode(self, node):
1097 def hasnode(self, node):
1087 try:
1098 try:
1088 self.rev(node)
1099 self.rev(node)
1089 return True
1100 return True
1090 except KeyError:
1101 except KeyError:
1091 return False
1102 return False
1092
1103
1093 def candelta(self, baserev, rev):
1104 def candelta(self, baserev, rev):
1094 """whether two revisions (baserev, rev) can be delta-ed or not"""
1105 """whether two revisions (baserev, rev) can be delta-ed or not"""
1095 # Disable delta if either rev requires a content-changing flag
1106 # Disable delta if either rev requires a content-changing flag
1096 # processor (ex. LFS). This is because such flag processor can alter
1107 # processor (ex. LFS). This is because such flag processor can alter
1097 # the rawtext content that the delta will be based on, and two clients
1108 # the rawtext content that the delta will be based on, and two clients
1098 # could have a same revlog node with different flags (i.e. different
1109 # could have a same revlog node with different flags (i.e. different
1099 # rawtext contents) and the delta could be incompatible.
1110 # rawtext contents) and the delta could be incompatible.
1100 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1111 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1101 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1112 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1102 return False
1113 return False
1103 return True
1114 return True
1104
1115
1105 def clearcaches(self):
1116 def clearcaches(self):
1106 self._cache = None
1117 self._cache = None
1107 self._chainbasecache.clear()
1118 self._chainbasecache.clear()
1108 self._chunkcache = (0, '')
1119 self._chunkcache = (0, '')
1109 self._pcache = {}
1120 self._pcache = {}
1110
1121
1111 try:
1122 try:
1112 self._nodecache.clearcaches()
1123 self._nodecache.clearcaches()
1113 except AttributeError:
1124 except AttributeError:
1114 self._nodecache = {nullid: nullrev}
1125 self._nodecache = {nullid: nullrev}
1115 self._nodepos = None
1126 self._nodepos = None
1116
1127
1117 def rev(self, node):
1128 def rev(self, node):
1118 try:
1129 try:
1119 return self._nodecache[node]
1130 return self._nodecache[node]
1120 except TypeError:
1131 except TypeError:
1121 raise
1132 raise
1122 except RevlogError:
1133 except RevlogError:
1123 # parsers.c radix tree lookup failed
1134 # parsers.c radix tree lookup failed
1124 if node == wdirid or node in wdirfilenodeids:
1135 if node == wdirid or node in wdirfilenodeids:
1125 raise error.WdirUnsupported
1136 raise error.WdirUnsupported
1126 raise LookupError(node, self.indexfile, _('no node'))
1137 raise LookupError(node, self.indexfile, _('no node'))
1127 except KeyError:
1138 except KeyError:
1128 # pure python cache lookup failed
1139 # pure python cache lookup failed
1129 n = self._nodecache
1140 n = self._nodecache
1130 i = self.index
1141 i = self.index
1131 p = self._nodepos
1142 p = self._nodepos
1132 if p is None:
1143 if p is None:
1133 p = len(i) - 2
1144 p = len(i) - 2
1134 else:
1145 else:
1135 assert p < len(i)
1146 assert p < len(i)
1136 for r in xrange(p, -1, -1):
1147 for r in xrange(p, -1, -1):
1137 v = i[r][7]
1148 v = i[r][7]
1138 n[v] = r
1149 n[v] = r
1139 if v == node:
1150 if v == node:
1140 self._nodepos = r - 1
1151 self._nodepos = r - 1
1141 return r
1152 return r
1142 if node == wdirid or node in wdirfilenodeids:
1153 if node == wdirid or node in wdirfilenodeids:
1143 raise error.WdirUnsupported
1154 raise error.WdirUnsupported
1144 raise LookupError(node, self.indexfile, _('no node'))
1155 raise LookupError(node, self.indexfile, _('no node'))
1145
1156
1146 # Accessors for index entries.
1157 # Accessors for index entries.
1147
1158
1148 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1159 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1149 # are flags.
1160 # are flags.
1150 def start(self, rev):
1161 def start(self, rev):
1151 return int(self.index[rev][0] >> 16)
1162 return int(self.index[rev][0] >> 16)
1152
1163
1153 def flags(self, rev):
1164 def flags(self, rev):
1154 return self.index[rev][0] & 0xFFFF
1165 return self.index[rev][0] & 0xFFFF
1155
1166
1156 def length(self, rev):
1167 def length(self, rev):
1157 return self.index[rev][1]
1168 return self.index[rev][1]
1158
1169
1159 def rawsize(self, rev):
1170 def rawsize(self, rev):
1160 """return the length of the uncompressed text for a given revision"""
1171 """return the length of the uncompressed text for a given revision"""
1161 l = self.index[rev][2]
1172 l = self.index[rev][2]
1162 if l >= 0:
1173 if l >= 0:
1163 return l
1174 return l
1164
1175
1165 t = self.revision(rev, raw=True)
1176 t = self.revision(rev, raw=True)
1166 return len(t)
1177 return len(t)
1167
1178
1168 def size(self, rev):
1179 def size(self, rev):
1169 """length of non-raw text (processed by a "read" flag processor)"""
1180 """length of non-raw text (processed by a "read" flag processor)"""
1170 # fast path: if no "read" flag processor could change the content,
1181 # fast path: if no "read" flag processor could change the content,
1171 # size is rawsize. note: ELLIPSIS is known to not change the content.
1182 # size is rawsize. note: ELLIPSIS is known to not change the content.
1172 flags = self.flags(rev)
1183 flags = self.flags(rev)
1173 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1184 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1174 return self.rawsize(rev)
1185 return self.rawsize(rev)
1175
1186
1176 return len(self.revision(rev, raw=False))
1187 return len(self.revision(rev, raw=False))
1177
1188
1178 def chainbase(self, rev):
1189 def chainbase(self, rev):
1179 base = self._chainbasecache.get(rev)
1190 base = self._chainbasecache.get(rev)
1180 if base is not None:
1191 if base is not None:
1181 return base
1192 return base
1182
1193
1183 index = self.index
1194 index = self.index
1184 iterrev = rev
1195 iterrev = rev
1185 base = index[iterrev][3]
1196 base = index[iterrev][3]
1186 while base != iterrev:
1197 while base != iterrev:
1187 iterrev = base
1198 iterrev = base
1188 base = index[iterrev][3]
1199 base = index[iterrev][3]
1189
1200
1190 self._chainbasecache[rev] = base
1201 self._chainbasecache[rev] = base
1191 return base
1202 return base
1192
1203
1193 def linkrev(self, rev):
1204 def linkrev(self, rev):
1194 return self.index[rev][4]
1205 return self.index[rev][4]
1195
1206
1196 def parentrevs(self, rev):
1207 def parentrevs(self, rev):
1197 try:
1208 try:
1198 entry = self.index[rev]
1209 entry = self.index[rev]
1199 except IndexError:
1210 except IndexError:
1200 if rev == wdirrev:
1211 if rev == wdirrev:
1201 raise error.WdirUnsupported
1212 raise error.WdirUnsupported
1202 raise
1213 raise
1203
1214
1204 return entry[5], entry[6]
1215 return entry[5], entry[6]
1205
1216
1206 def node(self, rev):
1217 def node(self, rev):
1207 try:
1218 try:
1208 return self.index[rev][7]
1219 return self.index[rev][7]
1209 except IndexError:
1220 except IndexError:
1210 if rev == wdirrev:
1221 if rev == wdirrev:
1211 raise error.WdirUnsupported
1222 raise error.WdirUnsupported
1212 raise
1223 raise
1213
1224
1214 # Derived from index values.
1225 # Derived from index values.
1215
1226
1216 def end(self, rev):
1227 def end(self, rev):
1217 return self.start(rev) + self.length(rev)
1228 return self.start(rev) + self.length(rev)
1218
1229
1219 def parents(self, node):
1230 def parents(self, node):
1220 i = self.index
1231 i = self.index
1221 d = i[self.rev(node)]
1232 d = i[self.rev(node)]
1222 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1233 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1223
1234
1224 def chainlen(self, rev):
1235 def chainlen(self, rev):
1225 return self._chaininfo(rev)[0]
1236 return self._chaininfo(rev)[0]
1226
1237
1227 def _chaininfo(self, rev):
1238 def _chaininfo(self, rev):
1228 chaininfocache = self._chaininfocache
1239 chaininfocache = self._chaininfocache
1229 if rev in chaininfocache:
1240 if rev in chaininfocache:
1230 return chaininfocache[rev]
1241 return chaininfocache[rev]
1231 index = self.index
1242 index = self.index
1232 generaldelta = self._generaldelta
1243 generaldelta = self._generaldelta
1233 iterrev = rev
1244 iterrev = rev
1234 e = index[iterrev]
1245 e = index[iterrev]
1235 clen = 0
1246 clen = 0
1236 compresseddeltalen = 0
1247 compresseddeltalen = 0
1237 while iterrev != e[3]:
1248 while iterrev != e[3]:
1238 clen += 1
1249 clen += 1
1239 compresseddeltalen += e[1]
1250 compresseddeltalen += e[1]
1240 if generaldelta:
1251 if generaldelta:
1241 iterrev = e[3]
1252 iterrev = e[3]
1242 else:
1253 else:
1243 iterrev -= 1
1254 iterrev -= 1
1244 if iterrev in chaininfocache:
1255 if iterrev in chaininfocache:
1245 t = chaininfocache[iterrev]
1256 t = chaininfocache[iterrev]
1246 clen += t[0]
1257 clen += t[0]
1247 compresseddeltalen += t[1]
1258 compresseddeltalen += t[1]
1248 break
1259 break
1249 e = index[iterrev]
1260 e = index[iterrev]
1250 else:
1261 else:
1251 # Add text length of base since decompressing that also takes
1262 # Add text length of base since decompressing that also takes
1252 # work. For cache hits the length is already included.
1263 # work. For cache hits the length is already included.
1253 compresseddeltalen += e[1]
1264 compresseddeltalen += e[1]
1254 r = (clen, compresseddeltalen)
1265 r = (clen, compresseddeltalen)
1255 chaininfocache[rev] = r
1266 chaininfocache[rev] = r
1256 return r
1267 return r
1257
1268
1258 def _deltachain(self, rev, stoprev=None):
1269 def _deltachain(self, rev, stoprev=None):
1259 """Obtain the delta chain for a revision.
1270 """Obtain the delta chain for a revision.
1260
1271
1261 ``stoprev`` specifies a revision to stop at. If not specified, we
1272 ``stoprev`` specifies a revision to stop at. If not specified, we
1262 stop at the base of the chain.
1273 stop at the base of the chain.
1263
1274
1264 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1275 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1265 revs in ascending order and ``stopped`` is a bool indicating whether
1276 revs in ascending order and ``stopped`` is a bool indicating whether
1266 ``stoprev`` was hit.
1277 ``stoprev`` was hit.
1267 """
1278 """
1268 # Try C implementation.
1279 # Try C implementation.
1269 try:
1280 try:
1270 return self.index.deltachain(rev, stoprev, self._generaldelta)
1281 return self.index.deltachain(rev, stoprev, self._generaldelta)
1271 except AttributeError:
1282 except AttributeError:
1272 pass
1283 pass
1273
1284
1274 chain = []
1285 chain = []
1275
1286
1276 # Alias to prevent attribute lookup in tight loop.
1287 # Alias to prevent attribute lookup in tight loop.
1277 index = self.index
1288 index = self.index
1278 generaldelta = self._generaldelta
1289 generaldelta = self._generaldelta
1279
1290
1280 iterrev = rev
1291 iterrev = rev
1281 e = index[iterrev]
1292 e = index[iterrev]
1282 while iterrev != e[3] and iterrev != stoprev:
1293 while iterrev != e[3] and iterrev != stoprev:
1283 chain.append(iterrev)
1294 chain.append(iterrev)
1284 if generaldelta:
1295 if generaldelta:
1285 iterrev = e[3]
1296 iterrev = e[3]
1286 else:
1297 else:
1287 iterrev -= 1
1298 iterrev -= 1
1288 e = index[iterrev]
1299 e = index[iterrev]
1289
1300
1290 if iterrev == stoprev:
1301 if iterrev == stoprev:
1291 stopped = True
1302 stopped = True
1292 else:
1303 else:
1293 chain.append(iterrev)
1304 chain.append(iterrev)
1294 stopped = False
1305 stopped = False
1295
1306
1296 chain.reverse()
1307 chain.reverse()
1297 return chain, stopped
1308 return chain, stopped
1298
1309
1299 def ancestors(self, revs, stoprev=0, inclusive=False):
1310 def ancestors(self, revs, stoprev=0, inclusive=False):
1300 """Generate the ancestors of 'revs' in reverse topological order.
1311 """Generate the ancestors of 'revs' in reverse topological order.
1301 Does not generate revs lower than stoprev.
1312 Does not generate revs lower than stoprev.
1302
1313
1303 See the documentation for ancestor.lazyancestors for more details."""
1314 See the documentation for ancestor.lazyancestors for more details."""
1304
1315
1305 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1316 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1306 inclusive=inclusive)
1317 inclusive=inclusive)
1307
1318
1308 def descendants(self, revs):
1319 def descendants(self, revs):
1309 """Generate the descendants of 'revs' in revision order.
1320 """Generate the descendants of 'revs' in revision order.
1310
1321
1311 Yield a sequence of revision numbers starting with a child of
1322 Yield a sequence of revision numbers starting with a child of
1312 some rev in revs, i.e., each revision is *not* considered a
1323 some rev in revs, i.e., each revision is *not* considered a
1313 descendant of itself. Results are ordered by revision number (a
1324 descendant of itself. Results are ordered by revision number (a
1314 topological sort)."""
1325 topological sort)."""
1315 first = min(revs)
1326 first = min(revs)
1316 if first == nullrev:
1327 if first == nullrev:
1317 for i in self:
1328 for i in self:
1318 yield i
1329 yield i
1319 return
1330 return
1320
1331
1321 seen = set(revs)
1332 seen = set(revs)
1322 for i in self.revs(start=first + 1):
1333 for i in self.revs(start=first + 1):
1323 for x in self.parentrevs(i):
1334 for x in self.parentrevs(i):
1324 if x != nullrev and x in seen:
1335 if x != nullrev and x in seen:
1325 seen.add(i)
1336 seen.add(i)
1326 yield i
1337 yield i
1327 break
1338 break
1328
1339
1329 def findcommonmissing(self, common=None, heads=None):
1340 def findcommonmissing(self, common=None, heads=None):
1330 """Return a tuple of the ancestors of common and the ancestors of heads
1341 """Return a tuple of the ancestors of common and the ancestors of heads
1331 that are not ancestors of common. In revset terminology, we return the
1342 that are not ancestors of common. In revset terminology, we return the
1332 tuple:
1343 tuple:
1333
1344
1334 ::common, (::heads) - (::common)
1345 ::common, (::heads) - (::common)
1335
1346
1336 The list is sorted by revision number, meaning it is
1347 The list is sorted by revision number, meaning it is
1337 topologically sorted.
1348 topologically sorted.
1338
1349
1339 'heads' and 'common' are both lists of node IDs. If heads is
1350 'heads' and 'common' are both lists of node IDs. If heads is
1340 not supplied, uses all of the revlog's heads. If common is not
1351 not supplied, uses all of the revlog's heads. If common is not
1341 supplied, uses nullid."""
1352 supplied, uses nullid."""
1342 if common is None:
1353 if common is None:
1343 common = [nullid]
1354 common = [nullid]
1344 if heads is None:
1355 if heads is None:
1345 heads = self.heads()
1356 heads = self.heads()
1346
1357
1347 common = [self.rev(n) for n in common]
1358 common = [self.rev(n) for n in common]
1348 heads = [self.rev(n) for n in heads]
1359 heads = [self.rev(n) for n in heads]
1349
1360
1350 # we want the ancestors, but inclusive
1361 # we want the ancestors, but inclusive
1351 class lazyset(object):
1362 class lazyset(object):
1352 def __init__(self, lazyvalues):
1363 def __init__(self, lazyvalues):
1353 self.addedvalues = set()
1364 self.addedvalues = set()
1354 self.lazyvalues = lazyvalues
1365 self.lazyvalues = lazyvalues
1355
1366
1356 def __contains__(self, value):
1367 def __contains__(self, value):
1357 return value in self.addedvalues or value in self.lazyvalues
1368 return value in self.addedvalues or value in self.lazyvalues
1358
1369
1359 def __iter__(self):
1370 def __iter__(self):
1360 added = self.addedvalues
1371 added = self.addedvalues
1361 for r in added:
1372 for r in added:
1362 yield r
1373 yield r
1363 for r in self.lazyvalues:
1374 for r in self.lazyvalues:
1364 if not r in added:
1375 if not r in added:
1365 yield r
1376 yield r
1366
1377
1367 def add(self, value):
1378 def add(self, value):
1368 self.addedvalues.add(value)
1379 self.addedvalues.add(value)
1369
1380
1370 def update(self, values):
1381 def update(self, values):
1371 self.addedvalues.update(values)
1382 self.addedvalues.update(values)
1372
1383
1373 has = lazyset(self.ancestors(common))
1384 has = lazyset(self.ancestors(common))
1374 has.add(nullrev)
1385 has.add(nullrev)
1375 has.update(common)
1386 has.update(common)
1376
1387
1377 # take all ancestors from heads that aren't in has
1388 # take all ancestors from heads that aren't in has
1378 missing = set()
1389 missing = set()
1379 visit = collections.deque(r for r in heads if r not in has)
1390 visit = collections.deque(r for r in heads if r not in has)
1380 while visit:
1391 while visit:
1381 r = visit.popleft()
1392 r = visit.popleft()
1382 if r in missing:
1393 if r in missing:
1383 continue
1394 continue
1384 else:
1395 else:
1385 missing.add(r)
1396 missing.add(r)
1386 for p in self.parentrevs(r):
1397 for p in self.parentrevs(r):
1387 if p not in has:
1398 if p not in has:
1388 visit.append(p)
1399 visit.append(p)
1389 missing = list(missing)
1400 missing = list(missing)
1390 missing.sort()
1401 missing.sort()
1391 return has, [self.node(miss) for miss in missing]
1402 return has, [self.node(miss) for miss in missing]
1392
1403
1393 def incrementalmissingrevs(self, common=None):
1404 def incrementalmissingrevs(self, common=None):
1394 """Return an object that can be used to incrementally compute the
1405 """Return an object that can be used to incrementally compute the
1395 revision numbers of the ancestors of arbitrary sets that are not
1406 revision numbers of the ancestors of arbitrary sets that are not
1396 ancestors of common. This is an ancestor.incrementalmissingancestors
1407 ancestors of common. This is an ancestor.incrementalmissingancestors
1397 object.
1408 object.
1398
1409
1399 'common' is a list of revision numbers. If common is not supplied, uses
1410 'common' is a list of revision numbers. If common is not supplied, uses
1400 nullrev.
1411 nullrev.
1401 """
1412 """
1402 if common is None:
1413 if common is None:
1403 common = [nullrev]
1414 common = [nullrev]
1404
1415
1405 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1416 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1406
1417
1407 def findmissingrevs(self, common=None, heads=None):
1418 def findmissingrevs(self, common=None, heads=None):
1408 """Return the revision numbers of the ancestors of heads that
1419 """Return the revision numbers of the ancestors of heads that
1409 are not ancestors of common.
1420 are not ancestors of common.
1410
1421
1411 More specifically, return a list of revision numbers corresponding to
1422 More specifically, return a list of revision numbers corresponding to
1412 nodes N such that every N satisfies the following constraints:
1423 nodes N such that every N satisfies the following constraints:
1413
1424
1414 1. N is an ancestor of some node in 'heads'
1425 1. N is an ancestor of some node in 'heads'
1415 2. N is not an ancestor of any node in 'common'
1426 2. N is not an ancestor of any node in 'common'
1416
1427
1417 The list is sorted by revision number, meaning it is
1428 The list is sorted by revision number, meaning it is
1418 topologically sorted.
1429 topologically sorted.
1419
1430
1420 'heads' and 'common' are both lists of revision numbers. If heads is
1431 'heads' and 'common' are both lists of revision numbers. If heads is
1421 not supplied, uses all of the revlog's heads. If common is not
1432 not supplied, uses all of the revlog's heads. If common is not
1422 supplied, uses nullid."""
1433 supplied, uses nullid."""
1423 if common is None:
1434 if common is None:
1424 common = [nullrev]
1435 common = [nullrev]
1425 if heads is None:
1436 if heads is None:
1426 heads = self.headrevs()
1437 heads = self.headrevs()
1427
1438
1428 inc = self.incrementalmissingrevs(common=common)
1439 inc = self.incrementalmissingrevs(common=common)
1429 return inc.missingancestors(heads)
1440 return inc.missingancestors(heads)
1430
1441
1431 def findmissing(self, common=None, heads=None):
1442 def findmissing(self, common=None, heads=None):
1432 """Return the ancestors of heads that are not ancestors of common.
1443 """Return the ancestors of heads that are not ancestors of common.
1433
1444
1434 More specifically, return a list of nodes N such that every N
1445 More specifically, return a list of nodes N such that every N
1435 satisfies the following constraints:
1446 satisfies the following constraints:
1436
1447
1437 1. N is an ancestor of some node in 'heads'
1448 1. N is an ancestor of some node in 'heads'
1438 2. N is not an ancestor of any node in 'common'
1449 2. N is not an ancestor of any node in 'common'
1439
1450
1440 The list is sorted by revision number, meaning it is
1451 The list is sorted by revision number, meaning it is
1441 topologically sorted.
1452 topologically sorted.
1442
1453
1443 'heads' and 'common' are both lists of node IDs. If heads is
1454 'heads' and 'common' are both lists of node IDs. If heads is
1444 not supplied, uses all of the revlog's heads. If common is not
1455 not supplied, uses all of the revlog's heads. If common is not
1445 supplied, uses nullid."""
1456 supplied, uses nullid."""
1446 if common is None:
1457 if common is None:
1447 common = [nullid]
1458 common = [nullid]
1448 if heads is None:
1459 if heads is None:
1449 heads = self.heads()
1460 heads = self.heads()
1450
1461
1451 common = [self.rev(n) for n in common]
1462 common = [self.rev(n) for n in common]
1452 heads = [self.rev(n) for n in heads]
1463 heads = [self.rev(n) for n in heads]
1453
1464
1454 inc = self.incrementalmissingrevs(common=common)
1465 inc = self.incrementalmissingrevs(common=common)
1455 return [self.node(r) for r in inc.missingancestors(heads)]
1466 return [self.node(r) for r in inc.missingancestors(heads)]
1456
1467
1457 def nodesbetween(self, roots=None, heads=None):
1468 def nodesbetween(self, roots=None, heads=None):
1458 """Return a topological path from 'roots' to 'heads'.
1469 """Return a topological path from 'roots' to 'heads'.
1459
1470
1460 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1471 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1461 topologically sorted list of all nodes N that satisfy both of
1472 topologically sorted list of all nodes N that satisfy both of
1462 these constraints:
1473 these constraints:
1463
1474
1464 1. N is a descendant of some node in 'roots'
1475 1. N is a descendant of some node in 'roots'
1465 2. N is an ancestor of some node in 'heads'
1476 2. N is an ancestor of some node in 'heads'
1466
1477
1467 Every node is considered to be both a descendant and an ancestor
1478 Every node is considered to be both a descendant and an ancestor
1468 of itself, so every reachable node in 'roots' and 'heads' will be
1479 of itself, so every reachable node in 'roots' and 'heads' will be
1469 included in 'nodes'.
1480 included in 'nodes'.
1470
1481
1471 'outroots' is the list of reachable nodes in 'roots', i.e., the
1482 'outroots' is the list of reachable nodes in 'roots', i.e., the
1472 subset of 'roots' that is returned in 'nodes'. Likewise,
1483 subset of 'roots' that is returned in 'nodes'. Likewise,
1473 'outheads' is the subset of 'heads' that is also in 'nodes'.
1484 'outheads' is the subset of 'heads' that is also in 'nodes'.
1474
1485
1475 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1486 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1476 unspecified, uses nullid as the only root. If 'heads' is
1487 unspecified, uses nullid as the only root. If 'heads' is
1477 unspecified, uses list of all of the revlog's heads."""
1488 unspecified, uses list of all of the revlog's heads."""
1478 nonodes = ([], [], [])
1489 nonodes = ([], [], [])
1479 if roots is not None:
1490 if roots is not None:
1480 roots = list(roots)
1491 roots = list(roots)
1481 if not roots:
1492 if not roots:
1482 return nonodes
1493 return nonodes
1483 lowestrev = min([self.rev(n) for n in roots])
1494 lowestrev = min([self.rev(n) for n in roots])
1484 else:
1495 else:
1485 roots = [nullid] # Everybody's a descendant of nullid
1496 roots = [nullid] # Everybody's a descendant of nullid
1486 lowestrev = nullrev
1497 lowestrev = nullrev
1487 if (lowestrev == nullrev) and (heads is None):
1498 if (lowestrev == nullrev) and (heads is None):
1488 # We want _all_ the nodes!
1499 # We want _all_ the nodes!
1489 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1500 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1490 if heads is None:
1501 if heads is None:
1491 # All nodes are ancestors, so the latest ancestor is the last
1502 # All nodes are ancestors, so the latest ancestor is the last
1492 # node.
1503 # node.
1493 highestrev = len(self) - 1
1504 highestrev = len(self) - 1
1494 # Set ancestors to None to signal that every node is an ancestor.
1505 # Set ancestors to None to signal that every node is an ancestor.
1495 ancestors = None
1506 ancestors = None
1496 # Set heads to an empty dictionary for later discovery of heads
1507 # Set heads to an empty dictionary for later discovery of heads
1497 heads = {}
1508 heads = {}
1498 else:
1509 else:
1499 heads = list(heads)
1510 heads = list(heads)
1500 if not heads:
1511 if not heads:
1501 return nonodes
1512 return nonodes
1502 ancestors = set()
1513 ancestors = set()
1503 # Turn heads into a dictionary so we can remove 'fake' heads.
1514 # Turn heads into a dictionary so we can remove 'fake' heads.
1504 # Also, later we will be using it to filter out the heads we can't
1515 # Also, later we will be using it to filter out the heads we can't
1505 # find from roots.
1516 # find from roots.
1506 heads = dict.fromkeys(heads, False)
1517 heads = dict.fromkeys(heads, False)
1507 # Start at the top and keep marking parents until we're done.
1518 # Start at the top and keep marking parents until we're done.
1508 nodestotag = set(heads)
1519 nodestotag = set(heads)
1509 # Remember where the top was so we can use it as a limit later.
1520 # Remember where the top was so we can use it as a limit later.
1510 highestrev = max([self.rev(n) for n in nodestotag])
1521 highestrev = max([self.rev(n) for n in nodestotag])
1511 while nodestotag:
1522 while nodestotag:
1512 # grab a node to tag
1523 # grab a node to tag
1513 n = nodestotag.pop()
1524 n = nodestotag.pop()
1514 # Never tag nullid
1525 # Never tag nullid
1515 if n == nullid:
1526 if n == nullid:
1516 continue
1527 continue
1517 # A node's revision number represents its place in a
1528 # A node's revision number represents its place in a
1518 # topologically sorted list of nodes.
1529 # topologically sorted list of nodes.
1519 r = self.rev(n)
1530 r = self.rev(n)
1520 if r >= lowestrev:
1531 if r >= lowestrev:
1521 if n not in ancestors:
1532 if n not in ancestors:
1522 # If we are possibly a descendant of one of the roots
1533 # If we are possibly a descendant of one of the roots
1523 # and we haven't already been marked as an ancestor
1534 # and we haven't already been marked as an ancestor
1524 ancestors.add(n) # Mark as ancestor
1535 ancestors.add(n) # Mark as ancestor
1525 # Add non-nullid parents to list of nodes to tag.
1536 # Add non-nullid parents to list of nodes to tag.
1526 nodestotag.update([p for p in self.parents(n) if
1537 nodestotag.update([p for p in self.parents(n) if
1527 p != nullid])
1538 p != nullid])
1528 elif n in heads: # We've seen it before, is it a fake head?
1539 elif n in heads: # We've seen it before, is it a fake head?
1529 # So it is, real heads should not be the ancestors of
1540 # So it is, real heads should not be the ancestors of
1530 # any other heads.
1541 # any other heads.
1531 heads.pop(n)
1542 heads.pop(n)
1532 if not ancestors:
1543 if not ancestors:
1533 return nonodes
1544 return nonodes
1534 # Now that we have our set of ancestors, we want to remove any
1545 # Now that we have our set of ancestors, we want to remove any
1535 # roots that are not ancestors.
1546 # roots that are not ancestors.
1536
1547
1537 # If one of the roots was nullid, everything is included anyway.
1548 # If one of the roots was nullid, everything is included anyway.
1538 if lowestrev > nullrev:
1549 if lowestrev > nullrev:
1539 # But, since we weren't, let's recompute the lowest rev to not
1550 # But, since we weren't, let's recompute the lowest rev to not
1540 # include roots that aren't ancestors.
1551 # include roots that aren't ancestors.
1541
1552
1542 # Filter out roots that aren't ancestors of heads
1553 # Filter out roots that aren't ancestors of heads
1543 roots = [root for root in roots if root in ancestors]
1554 roots = [root for root in roots if root in ancestors]
1544 # Recompute the lowest revision
1555 # Recompute the lowest revision
1545 if roots:
1556 if roots:
1546 lowestrev = min([self.rev(root) for root in roots])
1557 lowestrev = min([self.rev(root) for root in roots])
1547 else:
1558 else:
1548 # No more roots? Return empty list
1559 # No more roots? Return empty list
1549 return nonodes
1560 return nonodes
1550 else:
1561 else:
1551 # We are descending from nullid, and don't need to care about
1562 # We are descending from nullid, and don't need to care about
1552 # any other roots.
1563 # any other roots.
1553 lowestrev = nullrev
1564 lowestrev = nullrev
1554 roots = [nullid]
1565 roots = [nullid]
1555 # Transform our roots list into a set.
1566 # Transform our roots list into a set.
1556 descendants = set(roots)
1567 descendants = set(roots)
1557 # Also, keep the original roots so we can filter out roots that aren't
1568 # Also, keep the original roots so we can filter out roots that aren't
1558 # 'real' roots (i.e. are descended from other roots).
1569 # 'real' roots (i.e. are descended from other roots).
1559 roots = descendants.copy()
1570 roots = descendants.copy()
1560 # Our topologically sorted list of output nodes.
1571 # Our topologically sorted list of output nodes.
1561 orderedout = []
1572 orderedout = []
1562 # Don't start at nullid since we don't want nullid in our output list,
1573 # Don't start at nullid since we don't want nullid in our output list,
1563 # and if nullid shows up in descendants, empty parents will look like
1574 # and if nullid shows up in descendants, empty parents will look like
1564 # they're descendants.
1575 # they're descendants.
1565 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1576 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1566 n = self.node(r)
1577 n = self.node(r)
1567 isdescendant = False
1578 isdescendant = False
1568 if lowestrev == nullrev: # Everybody is a descendant of nullid
1579 if lowestrev == nullrev: # Everybody is a descendant of nullid
1569 isdescendant = True
1580 isdescendant = True
1570 elif n in descendants:
1581 elif n in descendants:
1571 # n is already a descendant
1582 # n is already a descendant
1572 isdescendant = True
1583 isdescendant = True
1573 # This check only needs to be done here because all the roots
1584 # This check only needs to be done here because all the roots
1574 # will start being marked is descendants before the loop.
1585 # will start being marked is descendants before the loop.
1575 if n in roots:
1586 if n in roots:
1576 # If n was a root, check if it's a 'real' root.
1587 # If n was a root, check if it's a 'real' root.
1577 p = tuple(self.parents(n))
1588 p = tuple(self.parents(n))
1578 # If any of its parents are descendants, it's not a root.
1589 # If any of its parents are descendants, it's not a root.
1579 if (p[0] in descendants) or (p[1] in descendants):
1590 if (p[0] in descendants) or (p[1] in descendants):
1580 roots.remove(n)
1591 roots.remove(n)
1581 else:
1592 else:
1582 p = tuple(self.parents(n))
1593 p = tuple(self.parents(n))
1583 # A node is a descendant if either of its parents are
1594 # A node is a descendant if either of its parents are
1584 # descendants. (We seeded the dependents list with the roots
1595 # descendants. (We seeded the dependents list with the roots
1585 # up there, remember?)
1596 # up there, remember?)
1586 if (p[0] in descendants) or (p[1] in descendants):
1597 if (p[0] in descendants) or (p[1] in descendants):
1587 descendants.add(n)
1598 descendants.add(n)
1588 isdescendant = True
1599 isdescendant = True
1589 if isdescendant and ((ancestors is None) or (n in ancestors)):
1600 if isdescendant and ((ancestors is None) or (n in ancestors)):
1590 # Only include nodes that are both descendants and ancestors.
1601 # Only include nodes that are both descendants and ancestors.
1591 orderedout.append(n)
1602 orderedout.append(n)
1592 if (ancestors is not None) and (n in heads):
1603 if (ancestors is not None) and (n in heads):
1593 # We're trying to figure out which heads are reachable
1604 # We're trying to figure out which heads are reachable
1594 # from roots.
1605 # from roots.
1595 # Mark this head as having been reached
1606 # Mark this head as having been reached
1596 heads[n] = True
1607 heads[n] = True
1597 elif ancestors is None:
1608 elif ancestors is None:
1598 # Otherwise, we're trying to discover the heads.
1609 # Otherwise, we're trying to discover the heads.
1599 # Assume this is a head because if it isn't, the next step
1610 # Assume this is a head because if it isn't, the next step
1600 # will eventually remove it.
1611 # will eventually remove it.
1601 heads[n] = True
1612 heads[n] = True
1602 # But, obviously its parents aren't.
1613 # But, obviously its parents aren't.
1603 for p in self.parents(n):
1614 for p in self.parents(n):
1604 heads.pop(p, None)
1615 heads.pop(p, None)
1605 heads = [head for head, flag in heads.iteritems() if flag]
1616 heads = [head for head, flag in heads.iteritems() if flag]
1606 roots = list(roots)
1617 roots = list(roots)
1607 assert orderedout
1618 assert orderedout
1608 assert roots
1619 assert roots
1609 assert heads
1620 assert heads
1610 return (orderedout, roots, heads)
1621 return (orderedout, roots, heads)
1611
1622
1612 def headrevs(self):
1623 def headrevs(self):
1613 try:
1624 try:
1614 return self.index.headrevs()
1625 return self.index.headrevs()
1615 except AttributeError:
1626 except AttributeError:
1616 return self._headrevs()
1627 return self._headrevs()
1617
1628
1618 def computephases(self, roots):
1629 def computephases(self, roots):
1619 return self.index.computephasesmapsets(roots)
1630 return self.index.computephasesmapsets(roots)
1620
1631
1621 def _headrevs(self):
1632 def _headrevs(self):
1622 count = len(self)
1633 count = len(self)
1623 if not count:
1634 if not count:
1624 return [nullrev]
1635 return [nullrev]
1625 # we won't iter over filtered rev so nobody is a head at start
1636 # we won't iter over filtered rev so nobody is a head at start
1626 ishead = [0] * (count + 1)
1637 ishead = [0] * (count + 1)
1627 index = self.index
1638 index = self.index
1628 for r in self:
1639 for r in self:
1629 ishead[r] = 1 # I may be an head
1640 ishead[r] = 1 # I may be an head
1630 e = index[r]
1641 e = index[r]
1631 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1642 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1632 return [r for r, val in enumerate(ishead) if val]
1643 return [r for r, val in enumerate(ishead) if val]
1633
1644
1634 def heads(self, start=None, stop=None):
1645 def heads(self, start=None, stop=None):
1635 """return the list of all nodes that have no children
1646 """return the list of all nodes that have no children
1636
1647
1637 if start is specified, only heads that are descendants of
1648 if start is specified, only heads that are descendants of
1638 start will be returned
1649 start will be returned
1639 if stop is specified, it will consider all the revs from stop
1650 if stop is specified, it will consider all the revs from stop
1640 as if they had no children
1651 as if they had no children
1641 """
1652 """
1642 if start is None and stop is None:
1653 if start is None and stop is None:
1643 if not len(self):
1654 if not len(self):
1644 return [nullid]
1655 return [nullid]
1645 return [self.node(r) for r in self.headrevs()]
1656 return [self.node(r) for r in self.headrevs()]
1646
1657
1647 if start is None:
1658 if start is None:
1648 start = nullid
1659 start = nullid
1649 if stop is None:
1660 if stop is None:
1650 stop = []
1661 stop = []
1651 stoprevs = set([self.rev(n) for n in stop])
1662 stoprevs = set([self.rev(n) for n in stop])
1652 startrev = self.rev(start)
1663 startrev = self.rev(start)
1653 reachable = {startrev}
1664 reachable = {startrev}
1654 heads = {startrev}
1665 heads = {startrev}
1655
1666
1656 parentrevs = self.parentrevs
1667 parentrevs = self.parentrevs
1657 for r in self.revs(start=startrev + 1):
1668 for r in self.revs(start=startrev + 1):
1658 for p in parentrevs(r):
1669 for p in parentrevs(r):
1659 if p in reachable:
1670 if p in reachable:
1660 if r not in stoprevs:
1671 if r not in stoprevs:
1661 reachable.add(r)
1672 reachable.add(r)
1662 heads.add(r)
1673 heads.add(r)
1663 if p in heads and p not in stoprevs:
1674 if p in heads and p not in stoprevs:
1664 heads.remove(p)
1675 heads.remove(p)
1665
1676
1666 return [self.node(r) for r in heads]
1677 return [self.node(r) for r in heads]
1667
1678
1668 def children(self, node):
1679 def children(self, node):
1669 """find the children of a given node"""
1680 """find the children of a given node"""
1670 c = []
1681 c = []
1671 p = self.rev(node)
1682 p = self.rev(node)
1672 for r in self.revs(start=p + 1):
1683 for r in self.revs(start=p + 1):
1673 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1684 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1674 if prevs:
1685 if prevs:
1675 for pr in prevs:
1686 for pr in prevs:
1676 if pr == p:
1687 if pr == p:
1677 c.append(self.node(r))
1688 c.append(self.node(r))
1678 elif p == nullrev:
1689 elif p == nullrev:
1679 c.append(self.node(r))
1690 c.append(self.node(r))
1680 return c
1691 return c
1681
1692
1682 def commonancestorsheads(self, a, b):
1693 def commonancestorsheads(self, a, b):
1683 """calculate all the heads of the common ancestors of nodes a and b"""
1694 """calculate all the heads of the common ancestors of nodes a and b"""
1684 a, b = self.rev(a), self.rev(b)
1695 a, b = self.rev(a), self.rev(b)
1685 ancs = self._commonancestorsheads(a, b)
1696 ancs = self._commonancestorsheads(a, b)
1686 return pycompat.maplist(self.node, ancs)
1697 return pycompat.maplist(self.node, ancs)
1687
1698
1688 def _commonancestorsheads(self, *revs):
1699 def _commonancestorsheads(self, *revs):
1689 """calculate all the heads of the common ancestors of revs"""
1700 """calculate all the heads of the common ancestors of revs"""
1690 try:
1701 try:
1691 ancs = self.index.commonancestorsheads(*revs)
1702 ancs = self.index.commonancestorsheads(*revs)
1692 except (AttributeError, OverflowError): # C implementation failed
1703 except (AttributeError, OverflowError): # C implementation failed
1693 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1704 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1694 return ancs
1705 return ancs
1695
1706
1696 def isancestor(self, a, b):
1707 def isancestor(self, a, b):
1697 """return True if node a is an ancestor of node b
1708 """return True if node a is an ancestor of node b
1698
1709
1699 A revision is considered an ancestor of itself."""
1710 A revision is considered an ancestor of itself."""
1700 a, b = self.rev(a), self.rev(b)
1711 a, b = self.rev(a), self.rev(b)
1701 return self.isancestorrev(a, b)
1712 return self.isancestorrev(a, b)
1702
1713
1703 def descendant(self, a, b):
1714 def descendant(self, a, b):
1704 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1715 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1705 util.nouideprecwarn(msg, '4.7')
1716 util.nouideprecwarn(msg, '4.7')
1706 return self.isancestorrev(a, b)
1717 return self.isancestorrev(a, b)
1707
1718
1708 def isancestorrev(self, a, b):
1719 def isancestorrev(self, a, b):
1709 """return True if revision a is an ancestor of revision b
1720 """return True if revision a is an ancestor of revision b
1710
1721
1711 A revision is considered an ancestor of itself.
1722 A revision is considered an ancestor of itself.
1712
1723
1713 The implementation of this is trivial but the use of
1724 The implementation of this is trivial but the use of
1714 commonancestorsheads is not."""
1725 commonancestorsheads is not."""
1715 if a == nullrev:
1726 if a == nullrev:
1716 return True
1727 return True
1717 elif a == b:
1728 elif a == b:
1718 return True
1729 return True
1719 elif a > b:
1730 elif a > b:
1720 return False
1731 return False
1721 return a in self._commonancestorsheads(a, b)
1732 return a in self._commonancestorsheads(a, b)
1722
1733
1723 def ancestor(self, a, b):
1734 def ancestor(self, a, b):
1724 """calculate the "best" common ancestor of nodes a and b"""
1735 """calculate the "best" common ancestor of nodes a and b"""
1725
1736
1726 a, b = self.rev(a), self.rev(b)
1737 a, b = self.rev(a), self.rev(b)
1727 try:
1738 try:
1728 ancs = self.index.ancestors(a, b)
1739 ancs = self.index.ancestors(a, b)
1729 except (AttributeError, OverflowError):
1740 except (AttributeError, OverflowError):
1730 ancs = ancestor.ancestors(self.parentrevs, a, b)
1741 ancs = ancestor.ancestors(self.parentrevs, a, b)
1731 if ancs:
1742 if ancs:
1732 # choose a consistent winner when there's a tie
1743 # choose a consistent winner when there's a tie
1733 return min(map(self.node, ancs))
1744 return min(map(self.node, ancs))
1734 return nullid
1745 return nullid
1735
1746
1736 def _match(self, id):
1747 def _match(self, id):
1737 if isinstance(id, int):
1748 if isinstance(id, int):
1738 # rev
1749 # rev
1739 return self.node(id)
1750 return self.node(id)
1740 if len(id) == 20:
1751 if len(id) == 20:
1741 # possibly a binary node
1752 # possibly a binary node
1742 # odds of a binary node being all hex in ASCII are 1 in 10**25
1753 # odds of a binary node being all hex in ASCII are 1 in 10**25
1743 try:
1754 try:
1744 node = id
1755 node = id
1745 self.rev(node) # quick search the index
1756 self.rev(node) # quick search the index
1746 return node
1757 return node
1747 except LookupError:
1758 except LookupError:
1748 pass # may be partial hex id
1759 pass # may be partial hex id
1749 try:
1760 try:
1750 # str(rev)
1761 # str(rev)
1751 rev = int(id)
1762 rev = int(id)
1752 if "%d" % rev != id:
1763 if "%d" % rev != id:
1753 raise ValueError
1764 raise ValueError
1754 if rev < 0:
1765 if rev < 0:
1755 rev = len(self) + rev
1766 rev = len(self) + rev
1756 if rev < 0 or rev >= len(self):
1767 if rev < 0 or rev >= len(self):
1757 raise ValueError
1768 raise ValueError
1758 return self.node(rev)
1769 return self.node(rev)
1759 except (ValueError, OverflowError):
1770 except (ValueError, OverflowError):
1760 pass
1771 pass
1761 if len(id) == 40:
1772 if len(id) == 40:
1762 try:
1773 try:
1763 # a full hex nodeid?
1774 # a full hex nodeid?
1764 node = bin(id)
1775 node = bin(id)
1765 self.rev(node)
1776 self.rev(node)
1766 return node
1777 return node
1767 except (TypeError, LookupError):
1778 except (TypeError, LookupError):
1768 pass
1779 pass
1769
1780
1770 def _partialmatch(self, id):
1781 def _partialmatch(self, id):
1771 # we don't care wdirfilenodeids as they should be always full hash
1782 # we don't care wdirfilenodeids as they should be always full hash
1772 maybewdir = wdirhex.startswith(id)
1783 maybewdir = wdirhex.startswith(id)
1773 try:
1784 try:
1774 partial = self.index.partialmatch(id)
1785 partial = self.index.partialmatch(id)
1775 if partial and self.hasnode(partial):
1786 if partial and self.hasnode(partial):
1776 if maybewdir:
1787 if maybewdir:
1777 # single 'ff...' match in radix tree, ambiguous with wdir
1788 # single 'ff...' match in radix tree, ambiguous with wdir
1778 raise RevlogError
1789 raise RevlogError
1779 return partial
1790 return partial
1780 if maybewdir:
1791 if maybewdir:
1781 # no 'ff...' match in radix tree, wdir identified
1792 # no 'ff...' match in radix tree, wdir identified
1782 raise error.WdirUnsupported
1793 raise error.WdirUnsupported
1783 return None
1794 return None
1784 except RevlogError:
1795 except RevlogError:
1785 # parsers.c radix tree lookup gave multiple matches
1796 # parsers.c radix tree lookup gave multiple matches
1786 # fast path: for unfiltered changelog, radix tree is accurate
1797 # fast path: for unfiltered changelog, radix tree is accurate
1787 if not getattr(self, 'filteredrevs', None):
1798 if not getattr(self, 'filteredrevs', None):
1788 raise LookupError(id, self.indexfile,
1799 raise LookupError(id, self.indexfile,
1789 _('ambiguous identifier'))
1800 _('ambiguous identifier'))
1790 # fall through to slow path that filters hidden revisions
1801 # fall through to slow path that filters hidden revisions
1791 except (AttributeError, ValueError):
1802 except (AttributeError, ValueError):
1792 # we are pure python, or key was too short to search radix tree
1803 # we are pure python, or key was too short to search radix tree
1793 pass
1804 pass
1794
1805
1795 if id in self._pcache:
1806 if id in self._pcache:
1796 return self._pcache[id]
1807 return self._pcache[id]
1797
1808
1798 if len(id) <= 40:
1809 if len(id) <= 40:
1799 try:
1810 try:
1800 # hex(node)[:...]
1811 # hex(node)[:...]
1801 l = len(id) // 2 # grab an even number of digits
1812 l = len(id) // 2 # grab an even number of digits
1802 prefix = bin(id[:l * 2])
1813 prefix = bin(id[:l * 2])
1803 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1814 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1804 nl = [n for n in nl if hex(n).startswith(id) and
1815 nl = [n for n in nl if hex(n).startswith(id) and
1805 self.hasnode(n)]
1816 self.hasnode(n)]
1806 if len(nl) > 0:
1817 if len(nl) > 0:
1807 if len(nl) == 1 and not maybewdir:
1818 if len(nl) == 1 and not maybewdir:
1808 self._pcache[id] = nl[0]
1819 self._pcache[id] = nl[0]
1809 return nl[0]
1820 return nl[0]
1810 raise LookupError(id, self.indexfile,
1821 raise LookupError(id, self.indexfile,
1811 _('ambiguous identifier'))
1822 _('ambiguous identifier'))
1812 if maybewdir:
1823 if maybewdir:
1813 raise error.WdirUnsupported
1824 raise error.WdirUnsupported
1814 return None
1825 return None
1815 except TypeError:
1826 except TypeError:
1816 pass
1827 pass
1817
1828
1818 def lookup(self, id):
1829 def lookup(self, id):
1819 """locate a node based on:
1830 """locate a node based on:
1820 - revision number or str(revision number)
1831 - revision number or str(revision number)
1821 - nodeid or subset of hex nodeid
1832 - nodeid or subset of hex nodeid
1822 """
1833 """
1823 n = self._match(id)
1834 n = self._match(id)
1824 if n is not None:
1835 if n is not None:
1825 return n
1836 return n
1826 n = self._partialmatch(id)
1837 n = self._partialmatch(id)
1827 if n:
1838 if n:
1828 return n
1839 return n
1829
1840
1830 raise LookupError(id, self.indexfile, _('no match found'))
1841 raise LookupError(id, self.indexfile, _('no match found'))
1831
1842
1832 def shortest(self, node, minlength=1):
1843 def shortest(self, node, minlength=1):
1833 """Find the shortest unambiguous prefix that matches node."""
1844 """Find the shortest unambiguous prefix that matches node."""
1834 def isvalid(prefix):
1845 def isvalid(prefix):
1835 try:
1846 try:
1836 node = self._partialmatch(prefix)
1847 node = self._partialmatch(prefix)
1837 except error.RevlogError:
1848 except error.RevlogError:
1838 return False
1849 return False
1839 except error.WdirUnsupported:
1850 except error.WdirUnsupported:
1840 # single 'ff...' match
1851 # single 'ff...' match
1841 return True
1852 return True
1842 if node is None:
1853 if node is None:
1843 raise LookupError(node, self.indexfile, _('no node'))
1854 raise LookupError(node, self.indexfile, _('no node'))
1844 return True
1855 return True
1845
1856
1846 def maybewdir(prefix):
1857 def maybewdir(prefix):
1847 return all(c == 'f' for c in prefix)
1858 return all(c == 'f' for c in prefix)
1848
1859
1849 hexnode = hex(node)
1860 hexnode = hex(node)
1850
1861
1851 def disambiguate(hexnode, minlength):
1862 def disambiguate(hexnode, minlength):
1852 """Disambiguate against wdirid."""
1863 """Disambiguate against wdirid."""
1853 for length in range(minlength, 41):
1864 for length in range(minlength, 41):
1854 prefix = hexnode[:length]
1865 prefix = hexnode[:length]
1855 if not maybewdir(prefix):
1866 if not maybewdir(prefix):
1856 return prefix
1867 return prefix
1857
1868
1858 if not getattr(self, 'filteredrevs', None):
1869 if not getattr(self, 'filteredrevs', None):
1859 try:
1870 try:
1860 length = max(self.index.shortest(node), minlength)
1871 length = max(self.index.shortest(node), minlength)
1861 return disambiguate(hexnode, length)
1872 return disambiguate(hexnode, length)
1862 except RevlogError:
1873 except RevlogError:
1863 if node != wdirid:
1874 if node != wdirid:
1864 raise LookupError(node, self.indexfile, _('no node'))
1875 raise LookupError(node, self.indexfile, _('no node'))
1865 except AttributeError:
1876 except AttributeError:
1866 # Fall through to pure code
1877 # Fall through to pure code
1867 pass
1878 pass
1868
1879
1869 if node == wdirid:
1880 if node == wdirid:
1870 for length in range(minlength, 41):
1881 for length in range(minlength, 41):
1871 prefix = hexnode[:length]
1882 prefix = hexnode[:length]
1872 if isvalid(prefix):
1883 if isvalid(prefix):
1873 return prefix
1884 return prefix
1874
1885
1875 for length in range(minlength, 41):
1886 for length in range(minlength, 41):
1876 prefix = hexnode[:length]
1887 prefix = hexnode[:length]
1877 if isvalid(prefix):
1888 if isvalid(prefix):
1878 return disambiguate(hexnode, length)
1889 return disambiguate(hexnode, length)
1879
1890
1880 def cmp(self, node, text):
1891 def cmp(self, node, text):
1881 """compare text with a given file revision
1892 """compare text with a given file revision
1882
1893
1883 returns True if text is different than what is stored.
1894 returns True if text is different than what is stored.
1884 """
1895 """
1885 p1, p2 = self.parents(node)
1896 p1, p2 = self.parents(node)
1886 return hash(text, p1, p2) != node
1897 return hash(text, p1, p2) != node
1887
1898
1888 def _cachesegment(self, offset, data):
1899 def _cachesegment(self, offset, data):
1889 """Add a segment to the revlog cache.
1900 """Add a segment to the revlog cache.
1890
1901
1891 Accepts an absolute offset and the data that is at that location.
1902 Accepts an absolute offset and the data that is at that location.
1892 """
1903 """
1893 o, d = self._chunkcache
1904 o, d = self._chunkcache
1894 # try to add to existing cache
1905 # try to add to existing cache
1895 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1906 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1896 self._chunkcache = o, d + data
1907 self._chunkcache = o, d + data
1897 else:
1908 else:
1898 self._chunkcache = offset, data
1909 self._chunkcache = offset, data
1899
1910
1900 def _readsegment(self, offset, length, df=None):
1911 def _readsegment(self, offset, length, df=None):
1901 """Load a segment of raw data from the revlog.
1912 """Load a segment of raw data from the revlog.
1902
1913
1903 Accepts an absolute offset, length to read, and an optional existing
1914 Accepts an absolute offset, length to read, and an optional existing
1904 file handle to read from.
1915 file handle to read from.
1905
1916
1906 If an existing file handle is passed, it will be seeked and the
1917 If an existing file handle is passed, it will be seeked and the
1907 original seek position will NOT be restored.
1918 original seek position will NOT be restored.
1908
1919
1909 Returns a str or buffer of raw byte data.
1920 Returns a str or buffer of raw byte data.
1910 """
1921 """
1911 # Cache data both forward and backward around the requested
1922 # Cache data both forward and backward around the requested
1912 # data, in a fixed size window. This helps speed up operations
1923 # data, in a fixed size window. This helps speed up operations
1913 # involving reading the revlog backwards.
1924 # involving reading the revlog backwards.
1914 cachesize = self._chunkcachesize
1925 cachesize = self._chunkcachesize
1915 realoffset = offset & ~(cachesize - 1)
1926 realoffset = offset & ~(cachesize - 1)
1916 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1927 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1917 - realoffset)
1928 - realoffset)
1918 with self._datareadfp(df) as df:
1929 with self._datareadfp(df) as df:
1919 df.seek(realoffset)
1930 df.seek(realoffset)
1920 d = df.read(reallength)
1931 d = df.read(reallength)
1921 self._cachesegment(realoffset, d)
1932 self._cachesegment(realoffset, d)
1922 if offset != realoffset or reallength != length:
1933 if offset != realoffset or reallength != length:
1923 return util.buffer(d, offset - realoffset, length)
1934 return util.buffer(d, offset - realoffset, length)
1924 return d
1935 return d
1925
1936
1926 def _getsegment(self, offset, length, df=None):
1937 def _getsegment(self, offset, length, df=None):
1927 """Obtain a segment of raw data from the revlog.
1938 """Obtain a segment of raw data from the revlog.
1928
1939
1929 Accepts an absolute offset, length of bytes to obtain, and an
1940 Accepts an absolute offset, length of bytes to obtain, and an
1930 optional file handle to the already-opened revlog. If the file
1941 optional file handle to the already-opened revlog. If the file
1931 handle is used, it's original seek position will not be preserved.
1942 handle is used, it's original seek position will not be preserved.
1932
1943
1933 Requests for data may be returned from a cache.
1944 Requests for data may be returned from a cache.
1934
1945
1935 Returns a str or a buffer instance of raw byte data.
1946 Returns a str or a buffer instance of raw byte data.
1936 """
1947 """
1937 o, d = self._chunkcache
1948 o, d = self._chunkcache
1938 l = len(d)
1949 l = len(d)
1939
1950
1940 # is it in the cache?
1951 # is it in the cache?
1941 cachestart = offset - o
1952 cachestart = offset - o
1942 cacheend = cachestart + length
1953 cacheend = cachestart + length
1943 if cachestart >= 0 and cacheend <= l:
1954 if cachestart >= 0 and cacheend <= l:
1944 if cachestart == 0 and cacheend == l:
1955 if cachestart == 0 and cacheend == l:
1945 return d # avoid a copy
1956 return d # avoid a copy
1946 return util.buffer(d, cachestart, cacheend - cachestart)
1957 return util.buffer(d, cachestart, cacheend - cachestart)
1947
1958
1948 return self._readsegment(offset, length, df=df)
1959 return self._readsegment(offset, length, df=df)
1949
1960
1950 def _getsegmentforrevs(self, startrev, endrev, df=None):
1961 def _getsegmentforrevs(self, startrev, endrev, df=None):
1951 """Obtain a segment of raw data corresponding to a range of revisions.
1962 """Obtain a segment of raw data corresponding to a range of revisions.
1952
1963
1953 Accepts the start and end revisions and an optional already-open
1964 Accepts the start and end revisions and an optional already-open
1954 file handle to be used for reading. If the file handle is read, its
1965 file handle to be used for reading. If the file handle is read, its
1955 seek position will not be preserved.
1966 seek position will not be preserved.
1956
1967
1957 Requests for data may be satisfied by a cache.
1968 Requests for data may be satisfied by a cache.
1958
1969
1959 Returns a 2-tuple of (offset, data) for the requested range of
1970 Returns a 2-tuple of (offset, data) for the requested range of
1960 revisions. Offset is the integer offset from the beginning of the
1971 revisions. Offset is the integer offset from the beginning of the
1961 revlog and data is a str or buffer of the raw byte data.
1972 revlog and data is a str or buffer of the raw byte data.
1962
1973
1963 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1974 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1964 to determine where each revision's data begins and ends.
1975 to determine where each revision's data begins and ends.
1965 """
1976 """
1966 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1977 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1967 # (functions are expensive).
1978 # (functions are expensive).
1968 index = self.index
1979 index = self.index
1969 istart = index[startrev]
1980 istart = index[startrev]
1970 start = int(istart[0] >> 16)
1981 start = int(istart[0] >> 16)
1971 if startrev == endrev:
1982 if startrev == endrev:
1972 end = start + istart[1]
1983 end = start + istart[1]
1973 else:
1984 else:
1974 iend = index[endrev]
1985 iend = index[endrev]
1975 end = int(iend[0] >> 16) + iend[1]
1986 end = int(iend[0] >> 16) + iend[1]
1976
1987
1977 if self._inline:
1988 if self._inline:
1978 start += (startrev + 1) * self._io.size
1989 start += (startrev + 1) * self._io.size
1979 end += (endrev + 1) * self._io.size
1990 end += (endrev + 1) * self._io.size
1980 length = end - start
1991 length = end - start
1981
1992
1982 return start, self._getsegment(start, length, df=df)
1993 return start, self._getsegment(start, length, df=df)
1983
1994
1984 def _chunk(self, rev, df=None):
1995 def _chunk(self, rev, df=None):
1985 """Obtain a single decompressed chunk for a revision.
1996 """Obtain a single decompressed chunk for a revision.
1986
1997
1987 Accepts an integer revision and an optional already-open file handle
1998 Accepts an integer revision and an optional already-open file handle
1988 to be used for reading. If used, the seek position of the file will not
1999 to be used for reading. If used, the seek position of the file will not
1989 be preserved.
2000 be preserved.
1990
2001
1991 Returns a str holding uncompressed data for the requested revision.
2002 Returns a str holding uncompressed data for the requested revision.
1992 """
2003 """
1993 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2004 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1994
2005
1995 def _chunks(self, revs, df=None, targetsize=None):
2006 def _chunks(self, revs, df=None, targetsize=None):
1996 """Obtain decompressed chunks for the specified revisions.
2007 """Obtain decompressed chunks for the specified revisions.
1997
2008
1998 Accepts an iterable of numeric revisions that are assumed to be in
2009 Accepts an iterable of numeric revisions that are assumed to be in
1999 ascending order. Also accepts an optional already-open file handle
2010 ascending order. Also accepts an optional already-open file handle
2000 to be used for reading. If used, the seek position of the file will
2011 to be used for reading. If used, the seek position of the file will
2001 not be preserved.
2012 not be preserved.
2002
2013
2003 This function is similar to calling ``self._chunk()`` multiple times,
2014 This function is similar to calling ``self._chunk()`` multiple times,
2004 but is faster.
2015 but is faster.
2005
2016
2006 Returns a list with decompressed data for each requested revision.
2017 Returns a list with decompressed data for each requested revision.
2007 """
2018 """
2008 if not revs:
2019 if not revs:
2009 return []
2020 return []
2010 start = self.start
2021 start = self.start
2011 length = self.length
2022 length = self.length
2012 inline = self._inline
2023 inline = self._inline
2013 iosize = self._io.size
2024 iosize = self._io.size
2014 buffer = util.buffer
2025 buffer = util.buffer
2015
2026
2016 l = []
2027 l = []
2017 ladd = l.append
2028 ladd = l.append
2018
2029
2019 if not self._withsparseread:
2030 if not self._withsparseread:
2020 slicedchunks = (revs,)
2031 slicedchunks = (revs,)
2021 else:
2032 else:
2022 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2033 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2023
2034
2024 for revschunk in slicedchunks:
2035 for revschunk in slicedchunks:
2025 firstrev = revschunk[0]
2036 firstrev = revschunk[0]
2026 # Skip trailing revisions with empty diff
2037 # Skip trailing revisions with empty diff
2027 for lastrev in revschunk[::-1]:
2038 for lastrev in revschunk[::-1]:
2028 if length(lastrev) != 0:
2039 if length(lastrev) != 0:
2029 break
2040 break
2030
2041
2031 try:
2042 try:
2032 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2043 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2033 except OverflowError:
2044 except OverflowError:
2034 # issue4215 - we can't cache a run of chunks greater than
2045 # issue4215 - we can't cache a run of chunks greater than
2035 # 2G on Windows
2046 # 2G on Windows
2036 return [self._chunk(rev, df=df) for rev in revschunk]
2047 return [self._chunk(rev, df=df) for rev in revschunk]
2037
2048
2038 decomp = self.decompress
2049 decomp = self.decompress
2039 for rev in revschunk:
2050 for rev in revschunk:
2040 chunkstart = start(rev)
2051 chunkstart = start(rev)
2041 if inline:
2052 if inline:
2042 chunkstart += (rev + 1) * iosize
2053 chunkstart += (rev + 1) * iosize
2043 chunklength = length(rev)
2054 chunklength = length(rev)
2044 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2055 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2045
2056
2046 return l
2057 return l
2047
2058
2048 def _chunkclear(self):
2059 def _chunkclear(self):
2049 """Clear the raw chunk cache."""
2060 """Clear the raw chunk cache."""
2050 self._chunkcache = (0, '')
2061 self._chunkcache = (0, '')
2051
2062
2052 def deltaparent(self, rev):
2063 def deltaparent(self, rev):
2053 """return deltaparent of the given revision"""
2064 """return deltaparent of the given revision"""
2054 base = self.index[rev][3]
2065 base = self.index[rev][3]
2055 if base == rev:
2066 if base == rev:
2056 return nullrev
2067 return nullrev
2057 elif self._generaldelta:
2068 elif self._generaldelta:
2058 return base
2069 return base
2059 else:
2070 else:
2060 return rev - 1
2071 return rev - 1
2061
2072
2062 def revdiff(self, rev1, rev2):
2073 def revdiff(self, rev1, rev2):
2063 """return or calculate a delta between two revisions
2074 """return or calculate a delta between two revisions
2064
2075
2065 The delta calculated is in binary form and is intended to be written to
2076 The delta calculated is in binary form and is intended to be written to
2066 revlog data directly. So this function needs raw revision data.
2077 revlog data directly. So this function needs raw revision data.
2067 """
2078 """
2068 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2079 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2069 return bytes(self._chunk(rev2))
2080 return bytes(self._chunk(rev2))
2070
2081
2071 return mdiff.textdiff(self.revision(rev1, raw=True),
2082 return mdiff.textdiff(self.revision(rev1, raw=True),
2072 self.revision(rev2, raw=True))
2083 self.revision(rev2, raw=True))
2073
2084
2074 def revision(self, nodeorrev, _df=None, raw=False):
2085 def revision(self, nodeorrev, _df=None, raw=False):
2075 """return an uncompressed revision of a given node or revision
2086 """return an uncompressed revision of a given node or revision
2076 number.
2087 number.
2077
2088
2078 _df - an existing file handle to read from. (internal-only)
2089 _df - an existing file handle to read from. (internal-only)
2079 raw - an optional argument specifying if the revision data is to be
2090 raw - an optional argument specifying if the revision data is to be
2080 treated as raw data when applying flag transforms. 'raw' should be set
2091 treated as raw data when applying flag transforms. 'raw' should be set
2081 to True when generating changegroups or in debug commands.
2092 to True when generating changegroups or in debug commands.
2082 """
2093 """
2083 if isinstance(nodeorrev, int):
2094 if isinstance(nodeorrev, int):
2084 rev = nodeorrev
2095 rev = nodeorrev
2085 node = self.node(rev)
2096 node = self.node(rev)
2086 else:
2097 else:
2087 node = nodeorrev
2098 node = nodeorrev
2088 rev = None
2099 rev = None
2089
2100
2090 cachedrev = None
2101 cachedrev = None
2091 flags = None
2102 flags = None
2092 rawtext = None
2103 rawtext = None
2093 if node == nullid:
2104 if node == nullid:
2094 return ""
2105 return ""
2095 if self._cache:
2106 if self._cache:
2096 if self._cache[0] == node:
2107 if self._cache[0] == node:
2097 # _cache only stores rawtext
2108 # _cache only stores rawtext
2098 if raw:
2109 if raw:
2099 return self._cache[2]
2110 return self._cache[2]
2100 # duplicated, but good for perf
2111 # duplicated, but good for perf
2101 if rev is None:
2112 if rev is None:
2102 rev = self.rev(node)
2113 rev = self.rev(node)
2103 if flags is None:
2114 if flags is None:
2104 flags = self.flags(rev)
2115 flags = self.flags(rev)
2105 # no extra flags set, no flag processor runs, text = rawtext
2116 # no extra flags set, no flag processor runs, text = rawtext
2106 if flags == REVIDX_DEFAULT_FLAGS:
2117 if flags == REVIDX_DEFAULT_FLAGS:
2107 return self._cache[2]
2118 return self._cache[2]
2108 # rawtext is reusable. need to run flag processor
2119 # rawtext is reusable. need to run flag processor
2109 rawtext = self._cache[2]
2120 rawtext = self._cache[2]
2110
2121
2111 cachedrev = self._cache[1]
2122 cachedrev = self._cache[1]
2112
2123
2113 # look up what we need to read
2124 # look up what we need to read
2114 if rawtext is None:
2125 if rawtext is None:
2115 if rev is None:
2126 if rev is None:
2116 rev = self.rev(node)
2127 rev = self.rev(node)
2117
2128
2118 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2129 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2119 if stopped:
2130 if stopped:
2120 rawtext = self._cache[2]
2131 rawtext = self._cache[2]
2121
2132
2122 # drop cache to save memory
2133 # drop cache to save memory
2123 self._cache = None
2134 self._cache = None
2124
2135
2125 targetsize = None
2136 targetsize = None
2126 rawsize = self.index[rev][2]
2137 rawsize = self.index[rev][2]
2127 if 0 <= rawsize:
2138 if 0 <= rawsize:
2128 targetsize = 4 * rawsize
2139 targetsize = 4 * rawsize
2129
2140
2130 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2141 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2131 if rawtext is None:
2142 if rawtext is None:
2132 rawtext = bytes(bins[0])
2143 rawtext = bytes(bins[0])
2133 bins = bins[1:]
2144 bins = bins[1:]
2134
2145
2135 rawtext = mdiff.patches(rawtext, bins)
2146 rawtext = mdiff.patches(rawtext, bins)
2136 self._cache = (node, rev, rawtext)
2147 self._cache = (node, rev, rawtext)
2137
2148
2138 if flags is None:
2149 if flags is None:
2139 if rev is None:
2150 if rev is None:
2140 rev = self.rev(node)
2151 rev = self.rev(node)
2141 flags = self.flags(rev)
2152 flags = self.flags(rev)
2142
2153
2143 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2154 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2144 if validatehash:
2155 if validatehash:
2145 self.checkhash(text, node, rev=rev)
2156 self.checkhash(text, node, rev=rev)
2146
2157
2147 return text
2158 return text
2148
2159
2149 def hash(self, text, p1, p2):
2160 def hash(self, text, p1, p2):
2150 """Compute a node hash.
2161 """Compute a node hash.
2151
2162
2152 Available as a function so that subclasses can replace the hash
2163 Available as a function so that subclasses can replace the hash
2153 as needed.
2164 as needed.
2154 """
2165 """
2155 return hash(text, p1, p2)
2166 return hash(text, p1, p2)
2156
2167
2157 def _processflags(self, text, flags, operation, raw=False):
2168 def _processflags(self, text, flags, operation, raw=False):
2158 """Inspect revision data flags and applies transforms defined by
2169 """Inspect revision data flags and applies transforms defined by
2159 registered flag processors.
2170 registered flag processors.
2160
2171
2161 ``text`` - the revision data to process
2172 ``text`` - the revision data to process
2162 ``flags`` - the revision flags
2173 ``flags`` - the revision flags
2163 ``operation`` - the operation being performed (read or write)
2174 ``operation`` - the operation being performed (read or write)
2164 ``raw`` - an optional argument describing if the raw transform should be
2175 ``raw`` - an optional argument describing if the raw transform should be
2165 applied.
2176 applied.
2166
2177
2167 This method processes the flags in the order (or reverse order if
2178 This method processes the flags in the order (or reverse order if
2168 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2179 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2169 flag processors registered for present flags. The order of flags defined
2180 flag processors registered for present flags. The order of flags defined
2170 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2181 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2171
2182
2172 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2183 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2173 processed text and ``validatehash`` is a bool indicating whether the
2184 processed text and ``validatehash`` is a bool indicating whether the
2174 returned text should be checked for hash integrity.
2185 returned text should be checked for hash integrity.
2175
2186
2176 Note: If the ``raw`` argument is set, it has precedence over the
2187 Note: If the ``raw`` argument is set, it has precedence over the
2177 operation and will only update the value of ``validatehash``.
2188 operation and will only update the value of ``validatehash``.
2178 """
2189 """
2179 # fast path: no flag processors will run
2190 # fast path: no flag processors will run
2180 if flags == 0:
2191 if flags == 0:
2181 return text, True
2192 return text, True
2182 if not operation in ('read', 'write'):
2193 if not operation in ('read', 'write'):
2183 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2194 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2184 # Check all flags are known.
2195 # Check all flags are known.
2185 if flags & ~REVIDX_KNOWN_FLAGS:
2196 if flags & ~REVIDX_KNOWN_FLAGS:
2186 raise RevlogError(_("incompatible revision flag '%#x'") %
2197 raise RevlogError(_("incompatible revision flag '%#x'") %
2187 (flags & ~REVIDX_KNOWN_FLAGS))
2198 (flags & ~REVIDX_KNOWN_FLAGS))
2188 validatehash = True
2199 validatehash = True
2189 # Depending on the operation (read or write), the order might be
2200 # Depending on the operation (read or write), the order might be
2190 # reversed due to non-commutative transforms.
2201 # reversed due to non-commutative transforms.
2191 orderedflags = REVIDX_FLAGS_ORDER
2202 orderedflags = REVIDX_FLAGS_ORDER
2192 if operation == 'write':
2203 if operation == 'write':
2193 orderedflags = reversed(orderedflags)
2204 orderedflags = reversed(orderedflags)
2194
2205
2195 for flag in orderedflags:
2206 for flag in orderedflags:
2196 # If a flagprocessor has been registered for a known flag, apply the
2207 # If a flagprocessor has been registered for a known flag, apply the
2197 # related operation transform and update result tuple.
2208 # related operation transform and update result tuple.
2198 if flag & flags:
2209 if flag & flags:
2199 vhash = True
2210 vhash = True
2200
2211
2201 if flag not in _flagprocessors:
2212 if flag not in _flagprocessors:
2202 message = _("missing processor for flag '%#x'") % (flag)
2213 message = _("missing processor for flag '%#x'") % (flag)
2203 raise RevlogError(message)
2214 raise RevlogError(message)
2204
2215
2205 processor = _flagprocessors[flag]
2216 processor = _flagprocessors[flag]
2206 if processor is not None:
2217 if processor is not None:
2207 readtransform, writetransform, rawtransform = processor
2218 readtransform, writetransform, rawtransform = processor
2208
2219
2209 if raw:
2220 if raw:
2210 vhash = rawtransform(self, text)
2221 vhash = rawtransform(self, text)
2211 elif operation == 'read':
2222 elif operation == 'read':
2212 text, vhash = readtransform(self, text)
2223 text, vhash = readtransform(self, text)
2213 else: # write operation
2224 else: # write operation
2214 text, vhash = writetransform(self, text)
2225 text, vhash = writetransform(self, text)
2215 validatehash = validatehash and vhash
2226 validatehash = validatehash and vhash
2216
2227
2217 return text, validatehash
2228 return text, validatehash
2218
2229
2219 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2230 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2220 """Check node hash integrity.
2231 """Check node hash integrity.
2221
2232
2222 Available as a function so that subclasses can extend hash mismatch
2233 Available as a function so that subclasses can extend hash mismatch
2223 behaviors as needed.
2234 behaviors as needed.
2224 """
2235 """
2225 try:
2236 try:
2226 if p1 is None and p2 is None:
2237 if p1 is None and p2 is None:
2227 p1, p2 = self.parents(node)
2238 p1, p2 = self.parents(node)
2228 if node != self.hash(text, p1, p2):
2239 if node != self.hash(text, p1, p2):
2229 revornode = rev
2240 revornode = rev
2230 if revornode is None:
2241 if revornode is None:
2231 revornode = templatefilters.short(hex(node))
2242 revornode = templatefilters.short(hex(node))
2232 raise RevlogError(_("integrity check failed on %s:%s")
2243 raise RevlogError(_("integrity check failed on %s:%s")
2233 % (self.indexfile, pycompat.bytestr(revornode)))
2244 % (self.indexfile, pycompat.bytestr(revornode)))
2234 except RevlogError:
2245 except RevlogError:
2235 if self._censorable and _censoredtext(text):
2246 if self._censorable and _censoredtext(text):
2236 raise error.CensoredNodeError(self.indexfile, node, text)
2247 raise error.CensoredNodeError(self.indexfile, node, text)
2237 raise
2248 raise
2238
2249
2239 def _enforceinlinesize(self, tr, fp=None):
2250 def _enforceinlinesize(self, tr, fp=None):
2240 """Check if the revlog is too big for inline and convert if so.
2251 """Check if the revlog is too big for inline and convert if so.
2241
2252
2242 This should be called after revisions are added to the revlog. If the
2253 This should be called after revisions are added to the revlog. If the
2243 revlog has grown too large to be an inline revlog, it will convert it
2254 revlog has grown too large to be an inline revlog, it will convert it
2244 to use multiple index and data files.
2255 to use multiple index and data files.
2245 """
2256 """
2246 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2257 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2247 return
2258 return
2248
2259
2249 trinfo = tr.find(self.indexfile)
2260 trinfo = tr.find(self.indexfile)
2250 if trinfo is None:
2261 if trinfo is None:
2251 raise RevlogError(_("%s not found in the transaction")
2262 raise RevlogError(_("%s not found in the transaction")
2252 % self.indexfile)
2263 % self.indexfile)
2253
2264
2254 trindex = trinfo[2]
2265 trindex = trinfo[2]
2255 if trindex is not None:
2266 if trindex is not None:
2256 dataoff = self.start(trindex)
2267 dataoff = self.start(trindex)
2257 else:
2268 else:
2258 # revlog was stripped at start of transaction, use all leftover data
2269 # revlog was stripped at start of transaction, use all leftover data
2259 trindex = len(self) - 1
2270 trindex = len(self) - 1
2260 dataoff = self.end(-2)
2271 dataoff = self.end(-2)
2261
2272
2262 tr.add(self.datafile, dataoff)
2273 tr.add(self.datafile, dataoff)
2263
2274
2264 if fp:
2275 if fp:
2265 fp.flush()
2276 fp.flush()
2266 fp.close()
2277 fp.close()
2267
2278
2268 with self._datafp('w') as df:
2279 with self._datafp('w') as df:
2269 for r in self:
2280 for r in self:
2270 df.write(self._getsegmentforrevs(r, r)[1])
2281 df.write(self._getsegmentforrevs(r, r)[1])
2271
2282
2272 with self._indexfp('w') as fp:
2283 with self._indexfp('w') as fp:
2273 self.version &= ~FLAG_INLINE_DATA
2284 self.version &= ~FLAG_INLINE_DATA
2274 self._inline = False
2285 self._inline = False
2275 io = self._io
2286 io = self._io
2276 for i in self:
2287 for i in self:
2277 e = io.packentry(self.index[i], self.node, self.version, i)
2288 e = io.packentry(self.index[i], self.node, self.version, i)
2278 fp.write(e)
2289 fp.write(e)
2279
2290
2280 # the temp file replace the real index when we exit the context
2291 # the temp file replace the real index when we exit the context
2281 # manager
2292 # manager
2282
2293
2283 tr.replace(self.indexfile, trindex * self._io.size)
2294 tr.replace(self.indexfile, trindex * self._io.size)
2284 self._chunkclear()
2295 self._chunkclear()
2285
2296
2286 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2297 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2287 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2298 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2288 """add a revision to the log
2299 """add a revision to the log
2289
2300
2290 text - the revision data to add
2301 text - the revision data to add
2291 transaction - the transaction object used for rollback
2302 transaction - the transaction object used for rollback
2292 link - the linkrev data to add
2303 link - the linkrev data to add
2293 p1, p2 - the parent nodeids of the revision
2304 p1, p2 - the parent nodeids of the revision
2294 cachedelta - an optional precomputed delta
2305 cachedelta - an optional precomputed delta
2295 node - nodeid of revision; typically node is not specified, and it is
2306 node - nodeid of revision; typically node is not specified, and it is
2296 computed by default as hash(text, p1, p2), however subclasses might
2307 computed by default as hash(text, p1, p2), however subclasses might
2297 use different hashing method (and override checkhash() in such case)
2308 use different hashing method (and override checkhash() in such case)
2298 flags - the known flags to set on the revision
2309 flags - the known flags to set on the revision
2299 deltacomputer - an optional _deltacomputer instance shared between
2310 deltacomputer - an optional _deltacomputer instance shared between
2300 multiple calls
2311 multiple calls
2301 """
2312 """
2302 if link == nullrev:
2313 if link == nullrev:
2303 raise RevlogError(_("attempted to add linkrev -1 to %s")
2314 raise RevlogError(_("attempted to add linkrev -1 to %s")
2304 % self.indexfile)
2315 % self.indexfile)
2305
2316
2306 if flags:
2317 if flags:
2307 node = node or self.hash(text, p1, p2)
2318 node = node or self.hash(text, p1, p2)
2308
2319
2309 rawtext, validatehash = self._processflags(text, flags, 'write')
2320 rawtext, validatehash = self._processflags(text, flags, 'write')
2310
2321
2311 # If the flag processor modifies the revision data, ignore any provided
2322 # If the flag processor modifies the revision data, ignore any provided
2312 # cachedelta.
2323 # cachedelta.
2313 if rawtext != text:
2324 if rawtext != text:
2314 cachedelta = None
2325 cachedelta = None
2315
2326
2316 if len(rawtext) > _maxentrysize:
2327 if len(rawtext) > _maxentrysize:
2317 raise RevlogError(
2328 raise RevlogError(
2318 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2329 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2319 % (self.indexfile, len(rawtext)))
2330 % (self.indexfile, len(rawtext)))
2320
2331
2321 node = node or self.hash(rawtext, p1, p2)
2332 node = node or self.hash(rawtext, p1, p2)
2322 if node in self.nodemap:
2333 if node in self.nodemap:
2323 return node
2334 return node
2324
2335
2325 if validatehash:
2336 if validatehash:
2326 self.checkhash(rawtext, node, p1=p1, p2=p2)
2337 self.checkhash(rawtext, node, p1=p1, p2=p2)
2327
2338
2328 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2339 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2329 flags, cachedelta=cachedelta,
2340 flags, cachedelta=cachedelta,
2330 deltacomputer=deltacomputer)
2341 deltacomputer=deltacomputer)
2331
2342
2332 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2343 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2333 cachedelta=None, deltacomputer=None):
2344 cachedelta=None, deltacomputer=None):
2334 """add a raw revision with known flags, node and parents
2345 """add a raw revision with known flags, node and parents
2335 useful when reusing a revision not stored in this revlog (ex: received
2346 useful when reusing a revision not stored in this revlog (ex: received
2336 over wire, or read from an external bundle).
2347 over wire, or read from an external bundle).
2337 """
2348 """
2338 dfh = None
2349 dfh = None
2339 if not self._inline:
2350 if not self._inline:
2340 dfh = self._datafp("a+")
2351 dfh = self._datafp("a+")
2341 ifh = self._indexfp("a+")
2352 ifh = self._indexfp("a+")
2342 try:
2353 try:
2343 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2354 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2344 flags, cachedelta, ifh, dfh,
2355 flags, cachedelta, ifh, dfh,
2345 deltacomputer=deltacomputer)
2356 deltacomputer=deltacomputer)
2346 finally:
2357 finally:
2347 if dfh:
2358 if dfh:
2348 dfh.close()
2359 dfh.close()
2349 ifh.close()
2360 ifh.close()
2350
2361
2351 def compress(self, data):
2362 def compress(self, data):
2352 """Generate a possibly-compressed representation of data."""
2363 """Generate a possibly-compressed representation of data."""
2353 if not data:
2364 if not data:
2354 return '', data
2365 return '', data
2355
2366
2356 compressed = self._compressor.compress(data)
2367 compressed = self._compressor.compress(data)
2357
2368
2358 if compressed:
2369 if compressed:
2359 # The revlog compressor added the header in the returned data.
2370 # The revlog compressor added the header in the returned data.
2360 return '', compressed
2371 return '', compressed
2361
2372
2362 if data[0:1] == '\0':
2373 if data[0:1] == '\0':
2363 return '', data
2374 return '', data
2364 return 'u', data
2375 return 'u', data
2365
2376
2366 def decompress(self, data):
2377 def decompress(self, data):
2367 """Decompress a revlog chunk.
2378 """Decompress a revlog chunk.
2368
2379
2369 The chunk is expected to begin with a header identifying the
2380 The chunk is expected to begin with a header identifying the
2370 format type so it can be routed to an appropriate decompressor.
2381 format type so it can be routed to an appropriate decompressor.
2371 """
2382 """
2372 if not data:
2383 if not data:
2373 return data
2384 return data
2374
2385
2375 # Revlogs are read much more frequently than they are written and many
2386 # Revlogs are read much more frequently than they are written and many
2376 # chunks only take microseconds to decompress, so performance is
2387 # chunks only take microseconds to decompress, so performance is
2377 # important here.
2388 # important here.
2378 #
2389 #
2379 # We can make a few assumptions about revlogs:
2390 # We can make a few assumptions about revlogs:
2380 #
2391 #
2381 # 1) the majority of chunks will be compressed (as opposed to inline
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2382 # raw data).
2393 # raw data).
2383 # 2) decompressing *any* data will likely by at least 10x slower than
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2384 # returning raw inline data.
2395 # returning raw inline data.
2385 # 3) we want to prioritize common and officially supported compression
2396 # 3) we want to prioritize common and officially supported compression
2386 # engines
2397 # engines
2387 #
2398 #
2388 # It follows that we want to optimize for "decompress compressed data
2399 # It follows that we want to optimize for "decompress compressed data
2389 # when encoded with common and officially supported compression engines"
2400 # when encoded with common and officially supported compression engines"
2390 # case over "raw data" and "data encoded by less common or non-official
2401 # case over "raw data" and "data encoded by less common or non-official
2391 # compression engines." That is why we have the inline lookup first
2402 # compression engines." That is why we have the inline lookup first
2392 # followed by the compengines lookup.
2403 # followed by the compengines lookup.
2393 #
2404 #
2394 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2395 # compressed chunks. And this matters for changelog and manifest reads.
2406 # compressed chunks. And this matters for changelog and manifest reads.
2396 t = data[0:1]
2407 t = data[0:1]
2397
2408
2398 if t == 'x':
2409 if t == 'x':
2399 try:
2410 try:
2400 return _zlibdecompress(data)
2411 return _zlibdecompress(data)
2401 except zlib.error as e:
2412 except zlib.error as e:
2402 raise RevlogError(_('revlog decompress error: %s') %
2413 raise RevlogError(_('revlog decompress error: %s') %
2403 stringutil.forcebytestr(e))
2414 stringutil.forcebytestr(e))
2404 # '\0' is more common than 'u' so it goes first.
2415 # '\0' is more common than 'u' so it goes first.
2405 elif t == '\0':
2416 elif t == '\0':
2406 return data
2417 return data
2407 elif t == 'u':
2418 elif t == 'u':
2408 return util.buffer(data, 1)
2419 return util.buffer(data, 1)
2409
2420
2410 try:
2421 try:
2411 compressor = self._decompressors[t]
2422 compressor = self._decompressors[t]
2412 except KeyError:
2423 except KeyError:
2413 try:
2424 try:
2414 engine = util.compengines.forrevlogheader(t)
2425 engine = util.compengines.forrevlogheader(t)
2415 compressor = engine.revlogcompressor()
2426 compressor = engine.revlogcompressor()
2416 self._decompressors[t] = compressor
2427 self._decompressors[t] = compressor
2417 except KeyError:
2428 except KeyError:
2418 raise RevlogError(_('unknown compression type %r') % t)
2429 raise RevlogError(_('unknown compression type %r') % t)
2419
2430
2420 return compressor.decompress(data)
2431 return compressor.decompress(data)
2421
2432
2422 def _isgooddeltainfo(self, deltainfo, revinfo):
2433 def _isgooddeltainfo(self, deltainfo, revinfo):
2423 """Returns True if the given delta is good. Good means that it is within
2434 """Returns True if the given delta is good. Good means that it is within
2424 the disk span, disk size, and chain length bounds that we know to be
2435 the disk span, disk size, and chain length bounds that we know to be
2425 performant."""
2436 performant."""
2426 if deltainfo is None:
2437 if deltainfo is None:
2427 return False
2438 return False
2428
2439
2429 # - 'deltainfo.distance' is the distance from the base revision --
2440 # - 'deltainfo.distance' is the distance from the base revision --
2430 # bounding it limits the amount of I/O we need to do.
2441 # bounding it limits the amount of I/O we need to do.
2431 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2442 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2432 # deltas we need to apply -- bounding it limits the amount of CPU
2443 # deltas we need to apply -- bounding it limits the amount of CPU
2433 # we consume.
2444 # we consume.
2434
2445
2435 if self._sparserevlog:
2446 if self._sparserevlog:
2436 # As sparse-read will be used, we can consider that the distance,
2447 # As sparse-read will be used, we can consider that the distance,
2437 # instead of being the span of the whole chunk,
2448 # instead of being the span of the whole chunk,
2438 # is the span of the largest read chunk
2449 # is the span of the largest read chunk
2439 base = deltainfo.base
2450 base = deltainfo.base
2440
2451
2441 if base != nullrev:
2452 if base != nullrev:
2442 deltachain = self._deltachain(base)[0]
2453 deltachain = self._deltachain(base)[0]
2443 else:
2454 else:
2444 deltachain = []
2455 deltachain = []
2445
2456
2446 chunks = _slicechunk(self, deltachain, deltainfo)
2457 chunks = _slicechunk(self, deltachain, deltainfo)
2447 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2458 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2459 distance = max(all_span)
2448 else:
2460 else:
2449 distance = deltainfo.distance
2461 distance = deltainfo.distance
2450
2462
2451 textlen = revinfo.textlen
2463 textlen = revinfo.textlen
2452 defaultmax = textlen * 4
2464 defaultmax = textlen * 4
2453 maxdist = self._maxdeltachainspan
2465 maxdist = self._maxdeltachainspan
2454 if not maxdist:
2466 if not maxdist:
2455 maxdist = distance # ensure the conditional pass
2467 maxdist = distance # ensure the conditional pass
2456 maxdist = max(maxdist, defaultmax)
2468 maxdist = max(maxdist, defaultmax)
2457 if self._sparserevlog and maxdist < self._srmingapsize:
2469 if self._sparserevlog and maxdist < self._srmingapsize:
2458 # In multiple place, we are ignoring irrelevant data range below a
2470 # In multiple place, we are ignoring irrelevant data range below a
2459 # certain size. Be also apply this tradeoff here and relax span
2471 # certain size. Be also apply this tradeoff here and relax span
2460 # constraint for small enought content.
2472 # constraint for small enought content.
2461 maxdist = self._srmingapsize
2473 maxdist = self._srmingapsize
2462 if (distance > maxdist or deltainfo.deltalen > textlen or
2474 if (distance > maxdist or deltainfo.deltalen > textlen or
2463 deltainfo.compresseddeltalen > textlen * 2 or
2475 deltainfo.compresseddeltalen > textlen * 2 or
2464 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2476 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2465 return False
2477 return False
2466
2478
2467 return True
2479 return True
2468
2480
2469 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2481 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2470 cachedelta, ifh, dfh, alwayscache=False,
2482 cachedelta, ifh, dfh, alwayscache=False,
2471 deltacomputer=None):
2483 deltacomputer=None):
2472 """internal function to add revisions to the log
2484 """internal function to add revisions to the log
2473
2485
2474 see addrevision for argument descriptions.
2486 see addrevision for argument descriptions.
2475
2487
2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2488 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2477
2489
2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2490 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2479 be used.
2491 be used.
2480
2492
2481 invariants:
2493 invariants:
2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2494 - rawtext is optional (can be None); if not set, cachedelta must be set.
2483 if both are set, they must correspond to each other.
2495 if both are set, they must correspond to each other.
2484 """
2496 """
2485 if node == nullid:
2497 if node == nullid:
2486 raise RevlogError(_("%s: attempt to add null revision") %
2498 raise RevlogError(_("%s: attempt to add null revision") %
2487 (self.indexfile))
2499 (self.indexfile))
2488 if node == wdirid or node in wdirfilenodeids:
2500 if node == wdirid or node in wdirfilenodeids:
2489 raise RevlogError(_("%s: attempt to add wdir revision") %
2501 raise RevlogError(_("%s: attempt to add wdir revision") %
2490 (self.indexfile))
2502 (self.indexfile))
2491
2503
2492 if self._inline:
2504 if self._inline:
2493 fh = ifh
2505 fh = ifh
2494 else:
2506 else:
2495 fh = dfh
2507 fh = dfh
2496
2508
2497 btext = [rawtext]
2509 btext = [rawtext]
2498
2510
2499 curr = len(self)
2511 curr = len(self)
2500 prev = curr - 1
2512 prev = curr - 1
2501 offset = self.end(prev)
2513 offset = self.end(prev)
2502 p1r, p2r = self.rev(p1), self.rev(p2)
2514 p1r, p2r = self.rev(p1), self.rev(p2)
2503
2515
2504 # full versions are inserted when the needed deltas
2516 # full versions are inserted when the needed deltas
2505 # become comparable to the uncompressed text
2517 # become comparable to the uncompressed text
2506 if rawtext is None:
2518 if rawtext is None:
2507 # need rawtext size, before changed by flag processors, which is
2519 # need rawtext size, before changed by flag processors, which is
2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2520 # the non-raw size. use revlog explicitly to avoid filelog's extra
2509 # logic that might remove metadata size.
2521 # logic that might remove metadata size.
2510 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2522 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2511 cachedelta[1])
2523 cachedelta[1])
2512 else:
2524 else:
2513 textlen = len(rawtext)
2525 textlen = len(rawtext)
2514
2526
2515 if deltacomputer is None:
2527 if deltacomputer is None:
2516 deltacomputer = _deltacomputer(self)
2528 deltacomputer = _deltacomputer(self)
2517
2529
2518 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2530 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2519
2531
2520 # no delta for flag processor revision (see "candelta" for why)
2532 # no delta for flag processor revision (see "candelta" for why)
2521 # not calling candelta since only one revision needs test, also to
2533 # not calling candelta since only one revision needs test, also to
2522 # avoid overhead fetching flags again.
2534 # avoid overhead fetching flags again.
2523 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2535 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2524 deltainfo = None
2536 deltainfo = None
2525 else:
2537 else:
2526 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2538 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2527
2539
2528 if deltainfo is not None:
2540 if deltainfo is not None:
2529 base = deltainfo.base
2541 base = deltainfo.base
2530 chainbase = deltainfo.chainbase
2542 chainbase = deltainfo.chainbase
2531 data = deltainfo.data
2543 data = deltainfo.data
2532 l = deltainfo.deltalen
2544 l = deltainfo.deltalen
2533 else:
2545 else:
2534 rawtext = deltacomputer.buildtext(revinfo, fh)
2546 rawtext = deltacomputer.buildtext(revinfo, fh)
2535 data = self.compress(rawtext)
2547 data = self.compress(rawtext)
2536 l = len(data[1]) + len(data[0])
2548 l = len(data[1]) + len(data[0])
2537 base = chainbase = curr
2549 base = chainbase = curr
2538
2550
2539 e = (offset_type(offset, flags), l, textlen,
2551 e = (offset_type(offset, flags), l, textlen,
2540 base, link, p1r, p2r, node)
2552 base, link, p1r, p2r, node)
2541 self.index.insert(-1, e)
2553 self.index.insert(-1, e)
2542 self.nodemap[node] = curr
2554 self.nodemap[node] = curr
2543
2555
2544 entry = self._io.packentry(e, self.node, self.version, curr)
2556 entry = self._io.packentry(e, self.node, self.version, curr)
2545 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2557 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2546
2558
2547 if alwayscache and rawtext is None:
2559 if alwayscache and rawtext is None:
2548 rawtext = deltacomputer._buildtext(revinfo, fh)
2560 rawtext = deltacomputer._buildtext(revinfo, fh)
2549
2561
2550 if type(rawtext) == bytes: # only accept immutable objects
2562 if type(rawtext) == bytes: # only accept immutable objects
2551 self._cache = (node, curr, rawtext)
2563 self._cache = (node, curr, rawtext)
2552 self._chainbasecache[curr] = chainbase
2564 self._chainbasecache[curr] = chainbase
2553 return node
2565 return node
2554
2566
2555 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2567 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2556 # Files opened in a+ mode have inconsistent behavior on various
2568 # Files opened in a+ mode have inconsistent behavior on various
2557 # platforms. Windows requires that a file positioning call be made
2569 # platforms. Windows requires that a file positioning call be made
2558 # when the file handle transitions between reads and writes. See
2570 # when the file handle transitions between reads and writes. See
2559 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2571 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2560 # platforms, Python or the platform itself can be buggy. Some versions
2572 # platforms, Python or the platform itself can be buggy. Some versions
2561 # of Solaris have been observed to not append at the end of the file
2573 # of Solaris have been observed to not append at the end of the file
2562 # if the file was seeked to before the end. See issue4943 for more.
2574 # if the file was seeked to before the end. See issue4943 for more.
2563 #
2575 #
2564 # We work around this issue by inserting a seek() before writing.
2576 # We work around this issue by inserting a seek() before writing.
2565 # Note: This is likely not necessary on Python 3.
2577 # Note: This is likely not necessary on Python 3.
2566 ifh.seek(0, os.SEEK_END)
2578 ifh.seek(0, os.SEEK_END)
2567 if dfh:
2579 if dfh:
2568 dfh.seek(0, os.SEEK_END)
2580 dfh.seek(0, os.SEEK_END)
2569
2581
2570 curr = len(self) - 1
2582 curr = len(self) - 1
2571 if not self._inline:
2583 if not self._inline:
2572 transaction.add(self.datafile, offset)
2584 transaction.add(self.datafile, offset)
2573 transaction.add(self.indexfile, curr * len(entry))
2585 transaction.add(self.indexfile, curr * len(entry))
2574 if data[0]:
2586 if data[0]:
2575 dfh.write(data[0])
2587 dfh.write(data[0])
2576 dfh.write(data[1])
2588 dfh.write(data[1])
2577 ifh.write(entry)
2589 ifh.write(entry)
2578 else:
2590 else:
2579 offset += curr * self._io.size
2591 offset += curr * self._io.size
2580 transaction.add(self.indexfile, offset, curr)
2592 transaction.add(self.indexfile, offset, curr)
2581 ifh.write(entry)
2593 ifh.write(entry)
2582 ifh.write(data[0])
2594 ifh.write(data[0])
2583 ifh.write(data[1])
2595 ifh.write(data[1])
2584 self._enforceinlinesize(transaction, ifh)
2596 self._enforceinlinesize(transaction, ifh)
2585
2597
2586 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2598 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2587 """
2599 """
2588 add a delta group
2600 add a delta group
2589
2601
2590 given a set of deltas, add them to the revision log. the
2602 given a set of deltas, add them to the revision log. the
2591 first delta is against its parent, which should be in our
2603 first delta is against its parent, which should be in our
2592 log, the rest are against the previous delta.
2604 log, the rest are against the previous delta.
2593
2605
2594 If ``addrevisioncb`` is defined, it will be called with arguments of
2606 If ``addrevisioncb`` is defined, it will be called with arguments of
2595 this revlog and the node that was added.
2607 this revlog and the node that was added.
2596 """
2608 """
2597
2609
2598 nodes = []
2610 nodes = []
2599
2611
2600 r = len(self)
2612 r = len(self)
2601 end = 0
2613 end = 0
2602 if r:
2614 if r:
2603 end = self.end(r - 1)
2615 end = self.end(r - 1)
2604 ifh = self._indexfp("a+")
2616 ifh = self._indexfp("a+")
2605 isize = r * self._io.size
2617 isize = r * self._io.size
2606 if self._inline:
2618 if self._inline:
2607 transaction.add(self.indexfile, end + isize, r)
2619 transaction.add(self.indexfile, end + isize, r)
2608 dfh = None
2620 dfh = None
2609 else:
2621 else:
2610 transaction.add(self.indexfile, isize, r)
2622 transaction.add(self.indexfile, isize, r)
2611 transaction.add(self.datafile, end)
2623 transaction.add(self.datafile, end)
2612 dfh = self._datafp("a+")
2624 dfh = self._datafp("a+")
2613 def flush():
2625 def flush():
2614 if dfh:
2626 if dfh:
2615 dfh.flush()
2627 dfh.flush()
2616 ifh.flush()
2628 ifh.flush()
2617 try:
2629 try:
2618 deltacomputer = _deltacomputer(self)
2630 deltacomputer = _deltacomputer(self)
2619 # loop through our set of deltas
2631 # loop through our set of deltas
2620 for data in deltas:
2632 for data in deltas:
2621 node, p1, p2, linknode, deltabase, delta, flags = data
2633 node, p1, p2, linknode, deltabase, delta, flags = data
2622 link = linkmapper(linknode)
2634 link = linkmapper(linknode)
2623 flags = flags or REVIDX_DEFAULT_FLAGS
2635 flags = flags or REVIDX_DEFAULT_FLAGS
2624
2636
2625 nodes.append(node)
2637 nodes.append(node)
2626
2638
2627 if node in self.nodemap:
2639 if node in self.nodemap:
2628 # this can happen if two branches make the same change
2640 # this can happen if two branches make the same change
2629 continue
2641 continue
2630
2642
2631 for p in (p1, p2):
2643 for p in (p1, p2):
2632 if p not in self.nodemap:
2644 if p not in self.nodemap:
2633 raise LookupError(p, self.indexfile,
2645 raise LookupError(p, self.indexfile,
2634 _('unknown parent'))
2646 _('unknown parent'))
2635
2647
2636 if deltabase not in self.nodemap:
2648 if deltabase not in self.nodemap:
2637 raise LookupError(deltabase, self.indexfile,
2649 raise LookupError(deltabase, self.indexfile,
2638 _('unknown delta base'))
2650 _('unknown delta base'))
2639
2651
2640 baserev = self.rev(deltabase)
2652 baserev = self.rev(deltabase)
2641
2653
2642 if baserev != nullrev and self.iscensored(baserev):
2654 if baserev != nullrev and self.iscensored(baserev):
2643 # if base is censored, delta must be full replacement in a
2655 # if base is censored, delta must be full replacement in a
2644 # single patch operation
2656 # single patch operation
2645 hlen = struct.calcsize(">lll")
2657 hlen = struct.calcsize(">lll")
2646 oldlen = self.rawsize(baserev)
2658 oldlen = self.rawsize(baserev)
2647 newlen = len(delta) - hlen
2659 newlen = len(delta) - hlen
2648 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2660 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2649 raise error.CensoredBaseError(self.indexfile,
2661 raise error.CensoredBaseError(self.indexfile,
2650 self.node(baserev))
2662 self.node(baserev))
2651
2663
2652 if not flags and self._peek_iscensored(baserev, delta, flush):
2664 if not flags and self._peek_iscensored(baserev, delta, flush):
2653 flags |= REVIDX_ISCENSORED
2665 flags |= REVIDX_ISCENSORED
2654
2666
2655 # We assume consumers of addrevisioncb will want to retrieve
2667 # We assume consumers of addrevisioncb will want to retrieve
2656 # the added revision, which will require a call to
2668 # the added revision, which will require a call to
2657 # revision(). revision() will fast path if there is a cache
2669 # revision(). revision() will fast path if there is a cache
2658 # hit. So, we tell _addrevision() to always cache in this case.
2670 # hit. So, we tell _addrevision() to always cache in this case.
2659 # We're only using addgroup() in the context of changegroup
2671 # We're only using addgroup() in the context of changegroup
2660 # generation so the revision data can always be handled as raw
2672 # generation so the revision data can always be handled as raw
2661 # by the flagprocessor.
2673 # by the flagprocessor.
2662 self._addrevision(node, None, transaction, link,
2674 self._addrevision(node, None, transaction, link,
2663 p1, p2, flags, (baserev, delta),
2675 p1, p2, flags, (baserev, delta),
2664 ifh, dfh,
2676 ifh, dfh,
2665 alwayscache=bool(addrevisioncb),
2677 alwayscache=bool(addrevisioncb),
2666 deltacomputer=deltacomputer)
2678 deltacomputer=deltacomputer)
2667
2679
2668 if addrevisioncb:
2680 if addrevisioncb:
2669 addrevisioncb(self, node)
2681 addrevisioncb(self, node)
2670
2682
2671 if not dfh and not self._inline:
2683 if not dfh and not self._inline:
2672 # addrevision switched from inline to conventional
2684 # addrevision switched from inline to conventional
2673 # reopen the index
2685 # reopen the index
2674 ifh.close()
2686 ifh.close()
2675 dfh = self._datafp("a+")
2687 dfh = self._datafp("a+")
2676 ifh = self._indexfp("a+")
2688 ifh = self._indexfp("a+")
2677 finally:
2689 finally:
2678 if dfh:
2690 if dfh:
2679 dfh.close()
2691 dfh.close()
2680 ifh.close()
2692 ifh.close()
2681
2693
2682 return nodes
2694 return nodes
2683
2695
2684 def iscensored(self, rev):
2696 def iscensored(self, rev):
2685 """Check if a file revision is censored."""
2697 """Check if a file revision is censored."""
2686 if not self._censorable:
2698 if not self._censorable:
2687 return False
2699 return False
2688
2700
2689 return self.flags(rev) & REVIDX_ISCENSORED
2701 return self.flags(rev) & REVIDX_ISCENSORED
2690
2702
2691 def _peek_iscensored(self, baserev, delta, flush):
2703 def _peek_iscensored(self, baserev, delta, flush):
2692 """Quickly check if a delta produces a censored revision."""
2704 """Quickly check if a delta produces a censored revision."""
2693 if not self._censorable:
2705 if not self._censorable:
2694 return False
2706 return False
2695
2707
2696 # Fragile heuristic: unless new file meta keys are added alphabetically
2708 # Fragile heuristic: unless new file meta keys are added alphabetically
2697 # preceding "censored", all censored revisions are prefixed by
2709 # preceding "censored", all censored revisions are prefixed by
2698 # "\1\ncensored:". A delta producing such a censored revision must be a
2710 # "\1\ncensored:". A delta producing such a censored revision must be a
2699 # full-replacement delta, so we inspect the first and only patch in the
2711 # full-replacement delta, so we inspect the first and only patch in the
2700 # delta for this prefix.
2712 # delta for this prefix.
2701 hlen = struct.calcsize(">lll")
2713 hlen = struct.calcsize(">lll")
2702 if len(delta) <= hlen:
2714 if len(delta) <= hlen:
2703 return False
2715 return False
2704
2716
2705 oldlen = self.rawsize(baserev)
2717 oldlen = self.rawsize(baserev)
2706 newlen = len(delta) - hlen
2718 newlen = len(delta) - hlen
2707 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2719 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2708 return False
2720 return False
2709
2721
2710 add = "\1\ncensored:"
2722 add = "\1\ncensored:"
2711 addlen = len(add)
2723 addlen = len(add)
2712 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2724 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2713
2725
2714 def getstrippoint(self, minlink):
2726 def getstrippoint(self, minlink):
2715 """find the minimum rev that must be stripped to strip the linkrev
2727 """find the minimum rev that must be stripped to strip the linkrev
2716
2728
2717 Returns a tuple containing the minimum rev and a set of all revs that
2729 Returns a tuple containing the minimum rev and a set of all revs that
2718 have linkrevs that will be broken by this strip.
2730 have linkrevs that will be broken by this strip.
2719 """
2731 """
2720 brokenrevs = set()
2732 brokenrevs = set()
2721 strippoint = len(self)
2733 strippoint = len(self)
2722
2734
2723 heads = {}
2735 heads = {}
2724 futurelargelinkrevs = set()
2736 futurelargelinkrevs = set()
2725 for head in self.headrevs():
2737 for head in self.headrevs():
2726 headlinkrev = self.linkrev(head)
2738 headlinkrev = self.linkrev(head)
2727 heads[head] = headlinkrev
2739 heads[head] = headlinkrev
2728 if headlinkrev >= minlink:
2740 if headlinkrev >= minlink:
2729 futurelargelinkrevs.add(headlinkrev)
2741 futurelargelinkrevs.add(headlinkrev)
2730
2742
2731 # This algorithm involves walking down the rev graph, starting at the
2743 # This algorithm involves walking down the rev graph, starting at the
2732 # heads. Since the revs are topologically sorted according to linkrev,
2744 # heads. Since the revs are topologically sorted according to linkrev,
2733 # once all head linkrevs are below the minlink, we know there are
2745 # once all head linkrevs are below the minlink, we know there are
2734 # no more revs that could have a linkrev greater than minlink.
2746 # no more revs that could have a linkrev greater than minlink.
2735 # So we can stop walking.
2747 # So we can stop walking.
2736 while futurelargelinkrevs:
2748 while futurelargelinkrevs:
2737 strippoint -= 1
2749 strippoint -= 1
2738 linkrev = heads.pop(strippoint)
2750 linkrev = heads.pop(strippoint)
2739
2751
2740 if linkrev < minlink:
2752 if linkrev < minlink:
2741 brokenrevs.add(strippoint)
2753 brokenrevs.add(strippoint)
2742 else:
2754 else:
2743 futurelargelinkrevs.remove(linkrev)
2755 futurelargelinkrevs.remove(linkrev)
2744
2756
2745 for p in self.parentrevs(strippoint):
2757 for p in self.parentrevs(strippoint):
2746 if p != nullrev:
2758 if p != nullrev:
2747 plinkrev = self.linkrev(p)
2759 plinkrev = self.linkrev(p)
2748 heads[p] = plinkrev
2760 heads[p] = plinkrev
2749 if plinkrev >= minlink:
2761 if plinkrev >= minlink:
2750 futurelargelinkrevs.add(plinkrev)
2762 futurelargelinkrevs.add(plinkrev)
2751
2763
2752 return strippoint, brokenrevs
2764 return strippoint, brokenrevs
2753
2765
2754 def strip(self, minlink, transaction):
2766 def strip(self, minlink, transaction):
2755 """truncate the revlog on the first revision with a linkrev >= minlink
2767 """truncate the revlog on the first revision with a linkrev >= minlink
2756
2768
2757 This function is called when we're stripping revision minlink and
2769 This function is called when we're stripping revision minlink and
2758 its descendants from the repository.
2770 its descendants from the repository.
2759
2771
2760 We have to remove all revisions with linkrev >= minlink, because
2772 We have to remove all revisions with linkrev >= minlink, because
2761 the equivalent changelog revisions will be renumbered after the
2773 the equivalent changelog revisions will be renumbered after the
2762 strip.
2774 strip.
2763
2775
2764 So we truncate the revlog on the first of these revisions, and
2776 So we truncate the revlog on the first of these revisions, and
2765 trust that the caller has saved the revisions that shouldn't be
2777 trust that the caller has saved the revisions that shouldn't be
2766 removed and that it'll re-add them after this truncation.
2778 removed and that it'll re-add them after this truncation.
2767 """
2779 """
2768 if len(self) == 0:
2780 if len(self) == 0:
2769 return
2781 return
2770
2782
2771 rev, _ = self.getstrippoint(minlink)
2783 rev, _ = self.getstrippoint(minlink)
2772 if rev == len(self):
2784 if rev == len(self):
2773 return
2785 return
2774
2786
2775 # first truncate the files on disk
2787 # first truncate the files on disk
2776 end = self.start(rev)
2788 end = self.start(rev)
2777 if not self._inline:
2789 if not self._inline:
2778 transaction.add(self.datafile, end)
2790 transaction.add(self.datafile, end)
2779 end = rev * self._io.size
2791 end = rev * self._io.size
2780 else:
2792 else:
2781 end += rev * self._io.size
2793 end += rev * self._io.size
2782
2794
2783 transaction.add(self.indexfile, end)
2795 transaction.add(self.indexfile, end)
2784
2796
2785 # then reset internal state in memory to forget those revisions
2797 # then reset internal state in memory to forget those revisions
2786 self._cache = None
2798 self._cache = None
2787 self._chaininfocache = {}
2799 self._chaininfocache = {}
2788 self._chunkclear()
2800 self._chunkclear()
2789 for x in xrange(rev, len(self)):
2801 for x in xrange(rev, len(self)):
2790 del self.nodemap[self.node(x)]
2802 del self.nodemap[self.node(x)]
2791
2803
2792 del self.index[rev:-1]
2804 del self.index[rev:-1]
2793 self._nodepos = None
2805 self._nodepos = None
2794
2806
2795 def checksize(self):
2807 def checksize(self):
2796 expected = 0
2808 expected = 0
2797 if len(self):
2809 if len(self):
2798 expected = max(0, self.end(len(self) - 1))
2810 expected = max(0, self.end(len(self) - 1))
2799
2811
2800 try:
2812 try:
2801 with self._datafp() as f:
2813 with self._datafp() as f:
2802 f.seek(0, 2)
2814 f.seek(0, 2)
2803 actual = f.tell()
2815 actual = f.tell()
2804 dd = actual - expected
2816 dd = actual - expected
2805 except IOError as inst:
2817 except IOError as inst:
2806 if inst.errno != errno.ENOENT:
2818 if inst.errno != errno.ENOENT:
2807 raise
2819 raise
2808 dd = 0
2820 dd = 0
2809
2821
2810 try:
2822 try:
2811 f = self.opener(self.indexfile)
2823 f = self.opener(self.indexfile)
2812 f.seek(0, 2)
2824 f.seek(0, 2)
2813 actual = f.tell()
2825 actual = f.tell()
2814 f.close()
2826 f.close()
2815 s = self._io.size
2827 s = self._io.size
2816 i = max(0, actual // s)
2828 i = max(0, actual // s)
2817 di = actual - (i * s)
2829 di = actual - (i * s)
2818 if self._inline:
2830 if self._inline:
2819 databytes = 0
2831 databytes = 0
2820 for r in self:
2832 for r in self:
2821 databytes += max(0, self.length(r))
2833 databytes += max(0, self.length(r))
2822 dd = 0
2834 dd = 0
2823 di = actual - len(self) * s - databytes
2835 di = actual - len(self) * s - databytes
2824 except IOError as inst:
2836 except IOError as inst:
2825 if inst.errno != errno.ENOENT:
2837 if inst.errno != errno.ENOENT:
2826 raise
2838 raise
2827 di = 0
2839 di = 0
2828
2840
2829 return (dd, di)
2841 return (dd, di)
2830
2842
2831 def files(self):
2843 def files(self):
2832 res = [self.indexfile]
2844 res = [self.indexfile]
2833 if not self._inline:
2845 if not self._inline:
2834 res.append(self.datafile)
2846 res.append(self.datafile)
2835 return res
2847 return res
2836
2848
2837 DELTAREUSEALWAYS = 'always'
2849 DELTAREUSEALWAYS = 'always'
2838 DELTAREUSESAMEREVS = 'samerevs'
2850 DELTAREUSESAMEREVS = 'samerevs'
2839 DELTAREUSENEVER = 'never'
2851 DELTAREUSENEVER = 'never'
2840
2852
2841 DELTAREUSEFULLADD = 'fulladd'
2853 DELTAREUSEFULLADD = 'fulladd'
2842
2854
2843 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2855 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2844
2856
2845 def clone(self, tr, destrevlog, addrevisioncb=None,
2857 def clone(self, tr, destrevlog, addrevisioncb=None,
2846 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2858 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2847 """Copy this revlog to another, possibly with format changes.
2859 """Copy this revlog to another, possibly with format changes.
2848
2860
2849 The destination revlog will contain the same revisions and nodes.
2861 The destination revlog will contain the same revisions and nodes.
2850 However, it may not be bit-for-bit identical due to e.g. delta encoding
2862 However, it may not be bit-for-bit identical due to e.g. delta encoding
2851 differences.
2863 differences.
2852
2864
2853 The ``deltareuse`` argument control how deltas from the existing revlog
2865 The ``deltareuse`` argument control how deltas from the existing revlog
2854 are preserved in the destination revlog. The argument can have the
2866 are preserved in the destination revlog. The argument can have the
2855 following values:
2867 following values:
2856
2868
2857 DELTAREUSEALWAYS
2869 DELTAREUSEALWAYS
2858 Deltas will always be reused (if possible), even if the destination
2870 Deltas will always be reused (if possible), even if the destination
2859 revlog would not select the same revisions for the delta. This is the
2871 revlog would not select the same revisions for the delta. This is the
2860 fastest mode of operation.
2872 fastest mode of operation.
2861 DELTAREUSESAMEREVS
2873 DELTAREUSESAMEREVS
2862 Deltas will be reused if the destination revlog would pick the same
2874 Deltas will be reused if the destination revlog would pick the same
2863 revisions for the delta. This mode strikes a balance between speed
2875 revisions for the delta. This mode strikes a balance between speed
2864 and optimization.
2876 and optimization.
2865 DELTAREUSENEVER
2877 DELTAREUSENEVER
2866 Deltas will never be reused. This is the slowest mode of execution.
2878 Deltas will never be reused. This is the slowest mode of execution.
2867 This mode can be used to recompute deltas (e.g. if the diff/delta
2879 This mode can be used to recompute deltas (e.g. if the diff/delta
2868 algorithm changes).
2880 algorithm changes).
2869
2881
2870 Delta computation can be slow, so the choice of delta reuse policy can
2882 Delta computation can be slow, so the choice of delta reuse policy can
2871 significantly affect run time.
2883 significantly affect run time.
2872
2884
2873 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2885 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2874 two extremes. Deltas will be reused if they are appropriate. But if the
2886 two extremes. Deltas will be reused if they are appropriate. But if the
2875 delta could choose a better revision, it will do so. This means if you
2887 delta could choose a better revision, it will do so. This means if you
2876 are converting a non-generaldelta revlog to a generaldelta revlog,
2888 are converting a non-generaldelta revlog to a generaldelta revlog,
2877 deltas will be recomputed if the delta's parent isn't a parent of the
2889 deltas will be recomputed if the delta's parent isn't a parent of the
2878 revision.
2890 revision.
2879
2891
2880 In addition to the delta policy, the ``deltabothparents`` argument
2892 In addition to the delta policy, the ``deltabothparents`` argument
2881 controls whether to compute deltas against both parents for merges.
2893 controls whether to compute deltas against both parents for merges.
2882 By default, the current default is used.
2894 By default, the current default is used.
2883 """
2895 """
2884 if deltareuse not in self.DELTAREUSEALL:
2896 if deltareuse not in self.DELTAREUSEALL:
2885 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2897 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2886
2898
2887 if len(destrevlog):
2899 if len(destrevlog):
2888 raise ValueError(_('destination revlog is not empty'))
2900 raise ValueError(_('destination revlog is not empty'))
2889
2901
2890 if getattr(self, 'filteredrevs', None):
2902 if getattr(self, 'filteredrevs', None):
2891 raise ValueError(_('source revlog has filtered revisions'))
2903 raise ValueError(_('source revlog has filtered revisions'))
2892 if getattr(destrevlog, 'filteredrevs', None):
2904 if getattr(destrevlog, 'filteredrevs', None):
2893 raise ValueError(_('destination revlog has filtered revisions'))
2905 raise ValueError(_('destination revlog has filtered revisions'))
2894
2906
2895 # lazydeltabase controls whether to reuse a cached delta, if possible.
2907 # lazydeltabase controls whether to reuse a cached delta, if possible.
2896 oldlazydeltabase = destrevlog._lazydeltabase
2908 oldlazydeltabase = destrevlog._lazydeltabase
2897 oldamd = destrevlog._deltabothparents
2909 oldamd = destrevlog._deltabothparents
2898
2910
2899 try:
2911 try:
2900 if deltareuse == self.DELTAREUSEALWAYS:
2912 if deltareuse == self.DELTAREUSEALWAYS:
2901 destrevlog._lazydeltabase = True
2913 destrevlog._lazydeltabase = True
2902 elif deltareuse == self.DELTAREUSESAMEREVS:
2914 elif deltareuse == self.DELTAREUSESAMEREVS:
2903 destrevlog._lazydeltabase = False
2915 destrevlog._lazydeltabase = False
2904
2916
2905 destrevlog._deltabothparents = deltabothparents or oldamd
2917 destrevlog._deltabothparents = deltabothparents or oldamd
2906
2918
2907 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2919 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2908 self.DELTAREUSESAMEREVS)
2920 self.DELTAREUSESAMEREVS)
2909
2921
2910 deltacomputer = _deltacomputer(destrevlog)
2922 deltacomputer = _deltacomputer(destrevlog)
2911 index = self.index
2923 index = self.index
2912 for rev in self:
2924 for rev in self:
2913 entry = index[rev]
2925 entry = index[rev]
2914
2926
2915 # Some classes override linkrev to take filtered revs into
2927 # Some classes override linkrev to take filtered revs into
2916 # account. Use raw entry from index.
2928 # account. Use raw entry from index.
2917 flags = entry[0] & 0xffff
2929 flags = entry[0] & 0xffff
2918 linkrev = entry[4]
2930 linkrev = entry[4]
2919 p1 = index[entry[5]][7]
2931 p1 = index[entry[5]][7]
2920 p2 = index[entry[6]][7]
2932 p2 = index[entry[6]][7]
2921 node = entry[7]
2933 node = entry[7]
2922
2934
2923 # (Possibly) reuse the delta from the revlog if allowed and
2935 # (Possibly) reuse the delta from the revlog if allowed and
2924 # the revlog chunk is a delta.
2936 # the revlog chunk is a delta.
2925 cachedelta = None
2937 cachedelta = None
2926 rawtext = None
2938 rawtext = None
2927 if populatecachedelta:
2939 if populatecachedelta:
2928 dp = self.deltaparent(rev)
2940 dp = self.deltaparent(rev)
2929 if dp != nullrev:
2941 if dp != nullrev:
2930 cachedelta = (dp, bytes(self._chunk(rev)))
2942 cachedelta = (dp, bytes(self._chunk(rev)))
2931
2943
2932 if not cachedelta:
2944 if not cachedelta:
2933 rawtext = self.revision(rev, raw=True)
2945 rawtext = self.revision(rev, raw=True)
2934
2946
2935
2947
2936 if deltareuse == self.DELTAREUSEFULLADD:
2948 if deltareuse == self.DELTAREUSEFULLADD:
2937 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2949 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2938 cachedelta=cachedelta,
2950 cachedelta=cachedelta,
2939 node=node, flags=flags,
2951 node=node, flags=flags,
2940 deltacomputer=deltacomputer)
2952 deltacomputer=deltacomputer)
2941 else:
2953 else:
2942 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2954 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2943 checkambig=False)
2955 checkambig=False)
2944 dfh = None
2956 dfh = None
2945 if not destrevlog._inline:
2957 if not destrevlog._inline:
2946 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2958 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2947 try:
2959 try:
2948 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2960 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2949 p2, flags, cachedelta, ifh, dfh,
2961 p2, flags, cachedelta, ifh, dfh,
2950 deltacomputer=deltacomputer)
2962 deltacomputer=deltacomputer)
2951 finally:
2963 finally:
2952 if dfh:
2964 if dfh:
2953 dfh.close()
2965 dfh.close()
2954 ifh.close()
2966 ifh.close()
2955
2967
2956 if addrevisioncb:
2968 if addrevisioncb:
2957 addrevisioncb(self, rev, node)
2969 addrevisioncb(self, rev, node)
2958 finally:
2970 finally:
2959 destrevlog._lazydeltabase = oldlazydeltabase
2971 destrevlog._lazydeltabase = oldlazydeltabase
2960 destrevlog._deltabothparents = oldamd
2972 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now