##// END OF EJS Templates
revlog: add a method to tells whether rev is stored as a snapshot...
Paul Morelle -
r39185:f8db4586 default
parent child Browse files
Show More
@@ -1,3020 +1,3030 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # If we have a non-emtpy delta candidate, there are nothing to trim
266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 if revs[endidx - 1] < len(revlog):
267 if revs[endidx - 1] < len(revlog):
268 # Trim empty revs at the end, except the very first revision of a chain
268 # Trim empty revs at the end, except the very first revision of a chain
269 while (endidx > 1
269 while (endidx > 1
270 and endidx > startidx
270 and endidx > startidx
271 and length(revs[endidx - 1]) == 0):
271 and length(revs[endidx - 1]) == 0):
272 endidx -= 1
272 endidx -= 1
273
273
274 return revs[startidx:endidx]
274 return revs[startidx:endidx]
275
275
276 def _segmentspan(revlog, revs, deltainfo=None):
276 def _segmentspan(revlog, revs, deltainfo=None):
277 """Get the byte span of a segment of revisions
277 """Get the byte span of a segment of revisions
278
278
279 revs is a sorted array of revision numbers
279 revs is a sorted array of revision numbers
280
280
281 >>> revlog = _testrevlog([
281 >>> revlog = _testrevlog([
282 ... 5, #0
282 ... 5, #0
283 ... 10, #1
283 ... 10, #1
284 ... 12, #2
284 ... 12, #2
285 ... 12, #3 (empty)
285 ... 12, #3 (empty)
286 ... 17, #4
286 ... 17, #4
287 ... ])
287 ... ])
288
288
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 17
290 17
291 >>> _segmentspan(revlog, [0, 4])
291 >>> _segmentspan(revlog, [0, 4])
292 17
292 17
293 >>> _segmentspan(revlog, [3, 4])
293 >>> _segmentspan(revlog, [3, 4])
294 5
294 5
295 >>> _segmentspan(revlog, [1, 2, 3,])
295 >>> _segmentspan(revlog, [1, 2, 3,])
296 7
296 7
297 >>> _segmentspan(revlog, [1, 3])
297 >>> _segmentspan(revlog, [1, 3])
298 7
298 7
299 """
299 """
300 if not revs:
300 if not revs:
301 return 0
301 return 0
302 if deltainfo is not None and len(revlog) <= revs[-1]:
302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if len(revs) == 1:
303 if len(revs) == 1:
304 return deltainfo.deltalen
304 return deltainfo.deltalen
305 offset = revlog.end(len(revlog) - 1)
305 offset = revlog.end(len(revlog) - 1)
306 end = deltainfo.deltalen + offset
306 end = deltainfo.deltalen + offset
307 else:
307 else:
308 end = revlog.end(revs[-1])
308 end = revlog.end(revs[-1])
309 return end - revlog.start(revs[0])
309 return end - revlog.start(revs[0])
310
310
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 """slice revs to reduce the amount of unrelated data to be read from disk.
312 """slice revs to reduce the amount of unrelated data to be read from disk.
313
313
314 ``revs`` is sliced into groups that should be read in one time.
314 ``revs`` is sliced into groups that should be read in one time.
315 Assume that revs are sorted.
315 Assume that revs are sorted.
316
316
317 The initial chunk is sliced until the overall density (payload/chunks-span
317 The initial chunk is sliced until the overall density (payload/chunks-span
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 `revlog._srmingapsize` is skipped.
319 `revlog._srmingapsize` is skipped.
320
320
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 For consistency with other slicing choice, this limit won't go lower than
322 For consistency with other slicing choice, this limit won't go lower than
323 `revlog._srmingapsize`.
323 `revlog._srmingapsize`.
324
324
325 If individual revisions chunk are larger than this limit, they will still
325 If individual revisions chunk are larger than this limit, they will still
326 be raised individually.
326 be raised individually.
327
327
328 >>> revlog = _testrevlog([
328 >>> revlog = _testrevlog([
329 ... 5, #00 (5)
329 ... 5, #00 (5)
330 ... 10, #01 (5)
330 ... 10, #01 (5)
331 ... 12, #02 (2)
331 ... 12, #02 (2)
332 ... 12, #03 (empty)
332 ... 12, #03 (empty)
333 ... 27, #04 (15)
333 ... 27, #04 (15)
334 ... 31, #05 (4)
334 ... 31, #05 (4)
335 ... 31, #06 (empty)
335 ... 31, #06 (empty)
336 ... 42, #07 (11)
336 ... 42, #07 (11)
337 ... 47, #08 (5)
337 ... 47, #08 (5)
338 ... 47, #09 (empty)
338 ... 47, #09 (empty)
339 ... 48, #10 (1)
339 ... 48, #10 (1)
340 ... 51, #11 (3)
340 ... 51, #11 (3)
341 ... 74, #12 (23)
341 ... 74, #12 (23)
342 ... 85, #13 (11)
342 ... 85, #13 (11)
343 ... 86, #14 (1)
343 ... 86, #14 (1)
344 ... 91, #15 (5)
344 ... 91, #15 (5)
345 ... ])
345 ... ])
346
346
347 >>> list(_slicechunk(revlog, list(range(16))))
347 >>> list(_slicechunk(revlog, list(range(16))))
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 >>> list(_slicechunk(revlog, [0, 15]))
349 >>> list(_slicechunk(revlog, [0, 15]))
350 [[0], [15]]
350 [[0], [15]]
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 [[0], [11], [15]]
352 [[0], [11], [15]]
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 [[0], [11, 13, 15]]
354 [[0], [11, 13, 15]]
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357
357
358 Slicing with a maximum chunk size
358 Slicing with a maximum chunk size
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 [[0], [11], [13], [15]]
360 [[0], [11], [13], [15]]
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 [[0], [11], [13, 15]]
362 [[0], [11], [13, 15]]
363 """
363 """
364 if targetsize is not None:
364 if targetsize is not None:
365 targetsize = max(targetsize, revlog._srmingapsize)
365 targetsize = max(targetsize, revlog._srmingapsize)
366 # targetsize should not be specified when evaluating delta candidates:
366 # targetsize should not be specified when evaluating delta candidates:
367 # * targetsize is used to ensure we stay within specification when reading,
367 # * targetsize is used to ensure we stay within specification when reading,
368 # * deltainfo is used to pick are good delta chain when writing.
368 # * deltainfo is used to pick are good delta chain when writing.
369 if not (deltainfo is None or targetsize is None):
369 if not (deltainfo is None or targetsize is None):
370 msg = 'cannot use `targetsize` with a `deltainfo`'
370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 raise error.ProgrammingError(msg)
371 raise error.ProgrammingError(msg)
372 for chunk in _slicechunktodensity(revlog, revs,
372 for chunk in _slicechunktodensity(revlog, revs,
373 deltainfo,
373 deltainfo,
374 revlog._srdensitythreshold,
374 revlog._srdensitythreshold,
375 revlog._srmingapsize):
375 revlog._srmingapsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 yield subchunk
377 yield subchunk
378
378
379 def _slicechunktosize(revlog, revs, targetsize=None):
379 def _slicechunktosize(revlog, revs, targetsize=None):
380 """slice revs to match the target size
380 """slice revs to match the target size
381
381
382 This is intended to be used on chunk that density slicing selected by that
382 This is intended to be used on chunk that density slicing selected by that
383 are still too large compared to the read garantee of revlog. This might
383 are still too large compared to the read garantee of revlog. This might
384 happens when "minimal gap size" interrupted the slicing or when chain are
384 happens when "minimal gap size" interrupted the slicing or when chain are
385 built in a way that create large blocks next to each other.
385 built in a way that create large blocks next to each other.
386
386
387 >>> revlog = _testrevlog([
387 >>> revlog = _testrevlog([
388 ... 3, #0 (3)
388 ... 3, #0 (3)
389 ... 5, #1 (2)
389 ... 5, #1 (2)
390 ... 6, #2 (1)
390 ... 6, #2 (1)
391 ... 8, #3 (2)
391 ... 8, #3 (2)
392 ... 8, #4 (empty)
392 ... 8, #4 (empty)
393 ... 11, #5 (3)
393 ... 11, #5 (3)
394 ... 12, #6 (1)
394 ... 12, #6 (1)
395 ... 13, #7 (1)
395 ... 13, #7 (1)
396 ... 14, #8 (1)
396 ... 14, #8 (1)
397 ... ])
397 ... ])
398
398
399 Cases where chunk is already small enough
399 Cases where chunk is already small enough
400 >>> list(_slicechunktosize(revlog, [0], 3))
400 >>> list(_slicechunktosize(revlog, [0], 3))
401 [[0]]
401 [[0]]
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 [[6, 7]]
403 [[6, 7]]
404 >>> list(_slicechunktosize(revlog, [0], None))
404 >>> list(_slicechunktosize(revlog, [0], None))
405 [[0]]
405 [[0]]
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 [[6, 7]]
407 [[6, 7]]
408
408
409 cases where we need actual slicing
409 cases where we need actual slicing
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 [[0], [1]]
411 [[0], [1]]
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 [[1], [3]]
413 [[1], [3]]
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 [[1, 2], [3]]
415 [[1, 2], [3]]
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 [[3], [5]]
417 [[3], [5]]
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 [[3], [5]]
419 [[3], [5]]
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 [[5], [6, 7, 8]]
421 [[5], [6, 7, 8]]
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424
424
425 Case with too large individual chunk (must return valid chunk)
425 Case with too large individual chunk (must return valid chunk)
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 [[0], [1]]
427 [[0], [1]]
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 [[1], [3]]
429 [[1], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 [[3], [5]]
431 [[3], [5]]
432 """
432 """
433 assert targetsize is None or 0 <= targetsize
433 assert targetsize is None or 0 <= targetsize
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 yield revs
435 yield revs
436 return
436 return
437
437
438 startrevidx = 0
438 startrevidx = 0
439 startdata = revlog.start(revs[0])
439 startdata = revlog.start(revs[0])
440 endrevidx = 0
440 endrevidx = 0
441 iterrevs = enumerate(revs)
441 iterrevs = enumerate(revs)
442 next(iterrevs) # skip first rev.
442 next(iterrevs) # skip first rev.
443 for idx, r in iterrevs:
443 for idx, r in iterrevs:
444 span = revlog.end(r) - startdata
444 span = revlog.end(r) - startdata
445 if span <= targetsize:
445 if span <= targetsize:
446 endrevidx = idx
446 endrevidx = idx
447 else:
447 else:
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 if chunk:
449 if chunk:
450 yield chunk
450 yield chunk
451 startrevidx = idx
451 startrevidx = idx
452 startdata = revlog.start(r)
452 startdata = revlog.start(r)
453 endrevidx = idx
453 endrevidx = idx
454 yield _trimchunk(revlog, revs, startrevidx)
454 yield _trimchunk(revlog, revs, startrevidx)
455
455
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 mingapsize=0):
457 mingapsize=0):
458 """slice revs to reduce the amount of unrelated data to be read from disk.
458 """slice revs to reduce the amount of unrelated data to be read from disk.
459
459
460 ``revs`` is sliced into groups that should be read in one time.
460 ``revs`` is sliced into groups that should be read in one time.
461 Assume that revs are sorted.
461 Assume that revs are sorted.
462
462
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 to the top of the revlog.
464 to the top of the revlog.
465
465
466 The initial chunk is sliced until the overall density (payload/chunks-span
466 The initial chunk is sliced until the overall density (payload/chunks-span
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 skipped.
468 skipped.
469
469
470 >>> revlog = _testrevlog([
470 >>> revlog = _testrevlog([
471 ... 5, #00 (5)
471 ... 5, #00 (5)
472 ... 10, #01 (5)
472 ... 10, #01 (5)
473 ... 12, #02 (2)
473 ... 12, #02 (2)
474 ... 12, #03 (empty)
474 ... 12, #03 (empty)
475 ... 27, #04 (15)
475 ... 27, #04 (15)
476 ... 31, #05 (4)
476 ... 31, #05 (4)
477 ... 31, #06 (empty)
477 ... 31, #06 (empty)
478 ... 42, #07 (11)
478 ... 42, #07 (11)
479 ... 47, #08 (5)
479 ... 47, #08 (5)
480 ... 47, #09 (empty)
480 ... 47, #09 (empty)
481 ... 48, #10 (1)
481 ... 48, #10 (1)
482 ... 51, #11 (3)
482 ... 51, #11 (3)
483 ... 74, #12 (23)
483 ... 74, #12 (23)
484 ... 85, #13 (11)
484 ... 85, #13 (11)
485 ... 86, #14 (1)
485 ... 86, #14 (1)
486 ... 91, #15 (5)
486 ... 91, #15 (5)
487 ... ])
487 ... ])
488
488
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 [[0], [15]]
492 [[0], [15]]
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 [[0], [11], [15]]
494 [[0], [11], [15]]
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 [[0], [11, 13, 15]]
496 [[0], [11, 13, 15]]
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 [[1, 2], [5, 8, 10, 11], [14]]
498 [[1, 2], [5, 8, 10, 11], [14]]
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 ... mingapsize=20))
500 ... mingapsize=20))
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 ... targetdensity=0.95))
503 ... targetdensity=0.95))
504 [[1, 2], [5], [8, 10, 11], [14]]
504 [[1, 2], [5], [8, 10, 11], [14]]
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 ... targetdensity=0.95, mingapsize=12))
506 ... targetdensity=0.95, mingapsize=12))
507 [[1, 2], [5, 8, 10, 11], [14]]
507 [[1, 2], [5, 8, 10, 11], [14]]
508 """
508 """
509 start = revlog.start
509 start = revlog.start
510 length = revlog.length
510 length = revlog.length
511
511
512 if len(revs) <= 1:
512 if len(revs) <= 1:
513 yield revs
513 yield revs
514 return
514 return
515
515
516 nextrev = len(revlog)
516 nextrev = len(revlog)
517 nextoffset = revlog.end(nextrev - 1)
517 nextoffset = revlog.end(nextrev - 1)
518
518
519 if deltainfo is None:
519 if deltainfo is None:
520 deltachainspan = _segmentspan(revlog, revs)
520 deltachainspan = _segmentspan(revlog, revs)
521 chainpayload = sum(length(r) for r in revs)
521 chainpayload = sum(length(r) for r in revs)
522 else:
522 else:
523 deltachainspan = deltainfo.distance
523 deltachainspan = deltainfo.distance
524 chainpayload = deltainfo.compresseddeltalen
524 chainpayload = deltainfo.compresseddeltalen
525
525
526 if deltachainspan < mingapsize:
526 if deltachainspan < mingapsize:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 readdata = deltachainspan
530 readdata = deltachainspan
531
531
532 if deltachainspan:
532 if deltachainspan:
533 density = chainpayload / float(deltachainspan)
533 density = chainpayload / float(deltachainspan)
534 else:
534 else:
535 density = 1.0
535 density = 1.0
536
536
537 if density >= targetdensity:
537 if density >= targetdensity:
538 yield revs
538 yield revs
539 return
539 return
540
540
541 if deltainfo is not None and deltainfo.deltalen:
541 if deltainfo is not None and deltainfo.deltalen:
542 revs = list(revs)
542 revs = list(revs)
543 revs.append(nextrev)
543 revs.append(nextrev)
544
544
545 # Store the gaps in a heap to have them sorted by decreasing size
545 # Store the gaps in a heap to have them sorted by decreasing size
546 gapsheap = []
546 gapsheap = []
547 heapq.heapify(gapsheap)
547 heapq.heapify(gapsheap)
548 prevend = None
548 prevend = None
549 for i, rev in enumerate(revs):
549 for i, rev in enumerate(revs):
550 if rev < nextrev:
550 if rev < nextrev:
551 revstart = start(rev)
551 revstart = start(rev)
552 revlen = length(rev)
552 revlen = length(rev)
553 else:
553 else:
554 revstart = nextoffset
554 revstart = nextoffset
555 revlen = deltainfo.deltalen
555 revlen = deltainfo.deltalen
556
556
557 # Skip empty revisions to form larger holes
557 # Skip empty revisions to form larger holes
558 if revlen == 0:
558 if revlen == 0:
559 continue
559 continue
560
560
561 if prevend is not None:
561 if prevend is not None:
562 gapsize = revstart - prevend
562 gapsize = revstart - prevend
563 # only consider holes that are large enough
563 # only consider holes that are large enough
564 if gapsize > mingapsize:
564 if gapsize > mingapsize:
565 heapq.heappush(gapsheap, (-gapsize, i))
565 heapq.heappush(gapsheap, (-gapsize, i))
566
566
567 prevend = revstart + revlen
567 prevend = revstart + revlen
568
568
569 # Collect the indices of the largest holes until the density is acceptable
569 # Collect the indices of the largest holes until the density is acceptable
570 indicesheap = []
570 indicesheap = []
571 heapq.heapify(indicesheap)
571 heapq.heapify(indicesheap)
572 while gapsheap and density < targetdensity:
572 while gapsheap and density < targetdensity:
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574
574
575 heapq.heappush(indicesheap, gapidx)
575 heapq.heappush(indicesheap, gapidx)
576
576
577 # the gap sizes are stored as negatives to be sorted decreasingly
577 # the gap sizes are stored as negatives to be sorted decreasingly
578 # by the heap
578 # by the heap
579 readdata -= (-oppgapsize)
579 readdata -= (-oppgapsize)
580 if readdata > 0:
580 if readdata > 0:
581 density = chainpayload / float(readdata)
581 density = chainpayload / float(readdata)
582 else:
582 else:
583 density = 1.0
583 density = 1.0
584
584
585 # Cut the revs at collected indices
585 # Cut the revs at collected indices
586 previdx = 0
586 previdx = 0
587 while indicesheap:
587 while indicesheap:
588 idx = heapq.heappop(indicesheap)
588 idx = heapq.heappop(indicesheap)
589
589
590 chunk = _trimchunk(revlog, revs, previdx, idx)
590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 if chunk:
591 if chunk:
592 yield chunk
592 yield chunk
593
593
594 previdx = idx
594 previdx = idx
595
595
596 chunk = _trimchunk(revlog, revs, previdx)
596 chunk = _trimchunk(revlog, revs, previdx)
597 if chunk:
597 if chunk:
598 yield chunk
598 yield chunk
599
599
600 @attr.s(slots=True, frozen=True)
600 @attr.s(slots=True, frozen=True)
601 class _deltainfo(object):
601 class _deltainfo(object):
602 distance = attr.ib()
602 distance = attr.ib()
603 deltalen = attr.ib()
603 deltalen = attr.ib()
604 data = attr.ib()
604 data = attr.ib()
605 base = attr.ib()
605 base = attr.ib()
606 chainbase = attr.ib()
606 chainbase = attr.ib()
607 chainlen = attr.ib()
607 chainlen = attr.ib()
608 compresseddeltalen = attr.ib()
608 compresseddeltalen = attr.ib()
609
609
610 class _deltacomputer(object):
610 class _deltacomputer(object):
611 def __init__(self, revlog):
611 def __init__(self, revlog):
612 self.revlog = revlog
612 self.revlog = revlog
613
613
614 def _getcandidaterevs(self, p1, p2, cachedelta):
614 def _getcandidaterevs(self, p1, p2, cachedelta):
615 """
615 """
616 Provides revisions that present an interest to be diffed against,
616 Provides revisions that present an interest to be diffed against,
617 grouped by level of easiness.
617 grouped by level of easiness.
618 """
618 """
619 revlog = self.revlog
619 revlog = self.revlog
620 gdelta = revlog._generaldelta
620 gdelta = revlog._generaldelta
621 curr = len(revlog)
621 curr = len(revlog)
622 prev = curr - 1
622 prev = curr - 1
623 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
623 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624
624
625 # should we try to build a delta?
625 # should we try to build a delta?
626 if prev != nullrev and revlog.storedeltachains:
626 if prev != nullrev and revlog.storedeltachains:
627 tested = set()
627 tested = set()
628 # This condition is true most of the time when processing
628 # This condition is true most of the time when processing
629 # changegroup data into a generaldelta repo. The only time it
629 # changegroup data into a generaldelta repo. The only time it
630 # isn't true is if this is the first revision in a delta chain
630 # isn't true is if this is the first revision in a delta chain
631 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
631 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 if cachedelta and gdelta and revlog._lazydeltabase:
632 if cachedelta and gdelta and revlog._lazydeltabase:
633 # Assume what we received from the server is a good choice
633 # Assume what we received from the server is a good choice
634 # build delta will reuse the cache
634 # build delta will reuse the cache
635 yield (cachedelta[0],)
635 yield (cachedelta[0],)
636 tested.add(cachedelta[0])
636 tested.add(cachedelta[0])
637
637
638 if gdelta:
638 if gdelta:
639 # exclude already lazy tested base if any
639 # exclude already lazy tested base if any
640 parents = [p for p in (p1r, p2r)
640 parents = [p for p in (p1r, p2r)
641 if p != nullrev and p not in tested]
641 if p != nullrev and p not in tested]
642
642
643 if not revlog._deltabothparents and len(parents) == 2:
643 if not revlog._deltabothparents and len(parents) == 2:
644 parents.sort()
644 parents.sort()
645 # To minimize the chance of having to build a fulltext,
645 # To minimize the chance of having to build a fulltext,
646 # pick first whichever parent is closest to us (max rev)
646 # pick first whichever parent is closest to us (max rev)
647 yield (parents[1],)
647 yield (parents[1],)
648 # then the other one (min rev) if the first did not fit
648 # then the other one (min rev) if the first did not fit
649 yield (parents[0],)
649 yield (parents[0],)
650 tested.update(parents)
650 tested.update(parents)
651 elif len(parents) > 0:
651 elif len(parents) > 0:
652 # Test all parents (1 or 2), and keep the best candidate
652 # Test all parents (1 or 2), and keep the best candidate
653 yield parents
653 yield parents
654 tested.update(parents)
654 tested.update(parents)
655
655
656 if prev not in tested:
656 if prev not in tested:
657 # other approach failed try against prev to hopefully save us a
657 # other approach failed try against prev to hopefully save us a
658 # fulltext.
658 # fulltext.
659 yield (prev,)
659 yield (prev,)
660 tested.add(prev)
660 tested.add(prev)
661
661
662 def buildtext(self, revinfo, fh):
662 def buildtext(self, revinfo, fh):
663 """Builds a fulltext version of a revision
663 """Builds a fulltext version of a revision
664
664
665 revinfo: _revisioninfo instance that contains all needed info
665 revinfo: _revisioninfo instance that contains all needed info
666 fh: file handle to either the .i or the .d revlog file,
666 fh: file handle to either the .i or the .d revlog file,
667 depending on whether it is inlined or not
667 depending on whether it is inlined or not
668 """
668 """
669 btext = revinfo.btext
669 btext = revinfo.btext
670 if btext[0] is not None:
670 if btext[0] is not None:
671 return btext[0]
671 return btext[0]
672
672
673 revlog = self.revlog
673 revlog = self.revlog
674 cachedelta = revinfo.cachedelta
674 cachedelta = revinfo.cachedelta
675 flags = revinfo.flags
675 flags = revinfo.flags
676 node = revinfo.node
676 node = revinfo.node
677
677
678 baserev = cachedelta[0]
678 baserev = cachedelta[0]
679 delta = cachedelta[1]
679 delta = cachedelta[1]
680 # special case deltas which replace entire base; no need to decode
680 # special case deltas which replace entire base; no need to decode
681 # base revision. this neatly avoids censored bases, which throw when
681 # base revision. this neatly avoids censored bases, which throw when
682 # they're decoded.
682 # they're decoded.
683 hlen = struct.calcsize(">lll")
683 hlen = struct.calcsize(">lll")
684 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
684 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 len(delta) - hlen):
685 len(delta) - hlen):
686 btext[0] = delta[hlen:]
686 btext[0] = delta[hlen:]
687 else:
687 else:
688 # deltabase is rawtext before changed by flag processors, which is
688 # deltabase is rawtext before changed by flag processors, which is
689 # equivalent to non-raw text
689 # equivalent to non-raw text
690 basetext = revlog.revision(baserev, _df=fh, raw=False)
690 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 btext[0] = mdiff.patch(basetext, delta)
691 btext[0] = mdiff.patch(basetext, delta)
692
692
693 try:
693 try:
694 res = revlog._processflags(btext[0], flags, 'read', raw=True)
694 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 btext[0], validatehash = res
695 btext[0], validatehash = res
696 if validatehash:
696 if validatehash:
697 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
697 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 if flags & REVIDX_ISCENSORED:
698 if flags & REVIDX_ISCENSORED:
699 raise RevlogError(_('node %s is not censored') % node)
699 raise RevlogError(_('node %s is not censored') % node)
700 except CensoredNodeError:
700 except CensoredNodeError:
701 # must pass the censored index flag to add censored revisions
701 # must pass the censored index flag to add censored revisions
702 if not flags & REVIDX_ISCENSORED:
702 if not flags & REVIDX_ISCENSORED:
703 raise
703 raise
704 return btext[0]
704 return btext[0]
705
705
706 def _builddeltadiff(self, base, revinfo, fh):
706 def _builddeltadiff(self, base, revinfo, fh):
707 revlog = self.revlog
707 revlog = self.revlog
708 t = self.buildtext(revinfo, fh)
708 t = self.buildtext(revinfo, fh)
709 if revlog.iscensored(base):
709 if revlog.iscensored(base):
710 # deltas based on a censored revision must replace the
710 # deltas based on a censored revision must replace the
711 # full content in one patch, so delta works everywhere
711 # full content in one patch, so delta works everywhere
712 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
712 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 delta = header + t
713 delta = header + t
714 else:
714 else:
715 ptext = revlog.revision(base, _df=fh, raw=True)
715 ptext = revlog.revision(base, _df=fh, raw=True)
716 delta = mdiff.textdiff(ptext, t)
716 delta = mdiff.textdiff(ptext, t)
717
717
718 return delta
718 return delta
719
719
720 def _builddeltainfo(self, revinfo, base, fh):
720 def _builddeltainfo(self, revinfo, base, fh):
721 # can we use the cached delta?
721 # can we use the cached delta?
722 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
722 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 delta = revinfo.cachedelta[1]
723 delta = revinfo.cachedelta[1]
724 else:
724 else:
725 delta = self._builddeltadiff(base, revinfo, fh)
725 delta = self._builddeltadiff(base, revinfo, fh)
726 revlog = self.revlog
726 revlog = self.revlog
727 header, data = revlog.compress(delta)
727 header, data = revlog.compress(delta)
728 deltalen = len(header) + len(data)
728 deltalen = len(header) + len(data)
729 chainbase = revlog.chainbase(base)
729 chainbase = revlog.chainbase(base)
730 offset = revlog.end(len(revlog) - 1)
730 offset = revlog.end(len(revlog) - 1)
731 dist = deltalen + offset - revlog.start(chainbase)
731 dist = deltalen + offset - revlog.start(chainbase)
732 if revlog._generaldelta:
732 if revlog._generaldelta:
733 deltabase = base
733 deltabase = base
734 else:
734 else:
735 deltabase = chainbase
735 deltabase = chainbase
736 chainlen, compresseddeltalen = revlog._chaininfo(base)
736 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 chainlen += 1
737 chainlen += 1
738 compresseddeltalen += deltalen
738 compresseddeltalen += deltalen
739 return _deltainfo(dist, deltalen, (header, data), deltabase,
739 return _deltainfo(dist, deltalen, (header, data), deltabase,
740 chainbase, chainlen, compresseddeltalen)
740 chainbase, chainlen, compresseddeltalen)
741
741
742 def finddeltainfo(self, revinfo, fh):
742 def finddeltainfo(self, revinfo, fh):
743 """Find an acceptable delta against a candidate revision
743 """Find an acceptable delta against a candidate revision
744
744
745 revinfo: information about the revision (instance of _revisioninfo)
745 revinfo: information about the revision (instance of _revisioninfo)
746 fh: file handle to either the .i or the .d revlog file,
746 fh: file handle to either the .i or the .d revlog file,
747 depending on whether it is inlined or not
747 depending on whether it is inlined or not
748
748
749 Returns the first acceptable candidate revision, as ordered by
749 Returns the first acceptable candidate revision, as ordered by
750 _getcandidaterevs
750 _getcandidaterevs
751 """
751 """
752 if not revinfo.textlen:
752 if not revinfo.textlen:
753 return None # empty file do not need delta
753 return None # empty file do not need delta
754
754
755 cachedelta = revinfo.cachedelta
755 cachedelta = revinfo.cachedelta
756 p1 = revinfo.p1
756 p1 = revinfo.p1
757 p2 = revinfo.p2
757 p2 = revinfo.p2
758 revlog = self.revlog
758 revlog = self.revlog
759
759
760 deltalength = self.revlog.length
760 deltalength = self.revlog.length
761 deltaparent = self.revlog.deltaparent
761 deltaparent = self.revlog.deltaparent
762
762
763 deltainfo = None
763 deltainfo = None
764 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
764 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
765 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
765 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
766 # filter out delta base that will never produce good delta
766 # filter out delta base that will never produce good delta
767 candidaterevs = [r for r in candidaterevs
767 candidaterevs = [r for r in candidaterevs
768 if self.revlog.length(r) <= deltas_limit]
768 if self.revlog.length(r) <= deltas_limit]
769 nominateddeltas = []
769 nominateddeltas = []
770 for candidaterev in candidaterevs:
770 for candidaterev in candidaterevs:
771 # skip over empty delta (no need to include them in a chain)
771 # skip over empty delta (no need to include them in a chain)
772 while candidaterev != nullrev and not deltalength(candidaterev):
772 while candidaterev != nullrev and not deltalength(candidaterev):
773 candidaterev = deltaparent(candidaterev)
773 candidaterev = deltaparent(candidaterev)
774 # no need to try a delta against nullid, this will be handled
774 # no need to try a delta against nullid, this will be handled
775 # by fulltext later.
775 # by fulltext later.
776 if candidaterev == nullrev:
776 if candidaterev == nullrev:
777 continue
777 continue
778 # no delta for rawtext-changing revs (see "candelta" for why)
778 # no delta for rawtext-changing revs (see "candelta" for why)
779 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
779 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
780 continue
780 continue
781 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
781 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
782 if revlog._isgooddeltainfo(candidatedelta, revinfo):
782 if revlog._isgooddeltainfo(candidatedelta, revinfo):
783 nominateddeltas.append(candidatedelta)
783 nominateddeltas.append(candidatedelta)
784 if nominateddeltas:
784 if nominateddeltas:
785 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
785 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
786 break
786 break
787
787
788 return deltainfo
788 return deltainfo
789
789
790 @attr.s(slots=True, frozen=True)
790 @attr.s(slots=True, frozen=True)
791 class _revisioninfo(object):
791 class _revisioninfo(object):
792 """Information about a revision that allows building its fulltext
792 """Information about a revision that allows building its fulltext
793 node: expected hash of the revision
793 node: expected hash of the revision
794 p1, p2: parent revs of the revision
794 p1, p2: parent revs of the revision
795 btext: built text cache consisting of a one-element list
795 btext: built text cache consisting of a one-element list
796 cachedelta: (baserev, uncompressed_delta) or None
796 cachedelta: (baserev, uncompressed_delta) or None
797 flags: flags associated to the revision storage
797 flags: flags associated to the revision storage
798
798
799 One of btext[0] or cachedelta must be set.
799 One of btext[0] or cachedelta must be set.
800 """
800 """
801 node = attr.ib()
801 node = attr.ib()
802 p1 = attr.ib()
802 p1 = attr.ib()
803 p2 = attr.ib()
803 p2 = attr.ib()
804 btext = attr.ib()
804 btext = attr.ib()
805 textlen = attr.ib()
805 textlen = attr.ib()
806 cachedelta = attr.ib()
806 cachedelta = attr.ib()
807 flags = attr.ib()
807 flags = attr.ib()
808
808
809 # index v0:
809 # index v0:
810 # 4 bytes: offset
810 # 4 bytes: offset
811 # 4 bytes: compressed length
811 # 4 bytes: compressed length
812 # 4 bytes: base rev
812 # 4 bytes: base rev
813 # 4 bytes: link rev
813 # 4 bytes: link rev
814 # 20 bytes: parent 1 nodeid
814 # 20 bytes: parent 1 nodeid
815 # 20 bytes: parent 2 nodeid
815 # 20 bytes: parent 2 nodeid
816 # 20 bytes: nodeid
816 # 20 bytes: nodeid
817 indexformatv0 = struct.Struct(">4l20s20s20s")
817 indexformatv0 = struct.Struct(">4l20s20s20s")
818 indexformatv0_pack = indexformatv0.pack
818 indexformatv0_pack = indexformatv0.pack
819 indexformatv0_unpack = indexformatv0.unpack
819 indexformatv0_unpack = indexformatv0.unpack
820
820
821 class revlogoldindex(list):
821 class revlogoldindex(list):
822 def __getitem__(self, i):
822 def __getitem__(self, i):
823 if i == -1:
823 if i == -1:
824 return (0, 0, 0, -1, -1, -1, -1, nullid)
824 return (0, 0, 0, -1, -1, -1, -1, nullid)
825 return list.__getitem__(self, i)
825 return list.__getitem__(self, i)
826
826
827 # maximum <delta-chain-data>/<revision-text-length> ratio
827 # maximum <delta-chain-data>/<revision-text-length> ratio
828 LIMIT_DELTA2TEXT = 2
828 LIMIT_DELTA2TEXT = 2
829
829
830 class revlogoldio(object):
830 class revlogoldio(object):
831 def __init__(self):
831 def __init__(self):
832 self.size = indexformatv0.size
832 self.size = indexformatv0.size
833
833
834 def parseindex(self, data, inline):
834 def parseindex(self, data, inline):
835 s = self.size
835 s = self.size
836 index = []
836 index = []
837 nodemap = {nullid: nullrev}
837 nodemap = {nullid: nullrev}
838 n = off = 0
838 n = off = 0
839 l = len(data)
839 l = len(data)
840 while off + s <= l:
840 while off + s <= l:
841 cur = data[off:off + s]
841 cur = data[off:off + s]
842 off += s
842 off += s
843 e = indexformatv0_unpack(cur)
843 e = indexformatv0_unpack(cur)
844 # transform to revlogv1 format
844 # transform to revlogv1 format
845 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
845 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
846 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
846 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
847 index.append(e2)
847 index.append(e2)
848 nodemap[e[6]] = n
848 nodemap[e[6]] = n
849 n += 1
849 n += 1
850
850
851 return revlogoldindex(index), nodemap, None
851 return revlogoldindex(index), nodemap, None
852
852
853 def packentry(self, entry, node, version, rev):
853 def packentry(self, entry, node, version, rev):
854 if gettype(entry[0]):
854 if gettype(entry[0]):
855 raise RevlogError(_('index entry flags need revlog version 1'))
855 raise RevlogError(_('index entry flags need revlog version 1'))
856 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
856 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
857 node(entry[5]), node(entry[6]), entry[7])
857 node(entry[5]), node(entry[6]), entry[7])
858 return indexformatv0_pack(*e2)
858 return indexformatv0_pack(*e2)
859
859
860 # index ng:
860 # index ng:
861 # 6 bytes: offset
861 # 6 bytes: offset
862 # 2 bytes: flags
862 # 2 bytes: flags
863 # 4 bytes: compressed length
863 # 4 bytes: compressed length
864 # 4 bytes: uncompressed length
864 # 4 bytes: uncompressed length
865 # 4 bytes: base rev
865 # 4 bytes: base rev
866 # 4 bytes: link rev
866 # 4 bytes: link rev
867 # 4 bytes: parent 1 rev
867 # 4 bytes: parent 1 rev
868 # 4 bytes: parent 2 rev
868 # 4 bytes: parent 2 rev
869 # 32 bytes: nodeid
869 # 32 bytes: nodeid
870 indexformatng = struct.Struct(">Qiiiiii20s12x")
870 indexformatng = struct.Struct(">Qiiiiii20s12x")
871 indexformatng_pack = indexformatng.pack
871 indexformatng_pack = indexformatng.pack
872 versionformat = struct.Struct(">I")
872 versionformat = struct.Struct(">I")
873 versionformat_pack = versionformat.pack
873 versionformat_pack = versionformat.pack
874 versionformat_unpack = versionformat.unpack
874 versionformat_unpack = versionformat.unpack
875
875
876 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
876 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
877 # signed integer)
877 # signed integer)
878 _maxentrysize = 0x7fffffff
878 _maxentrysize = 0x7fffffff
879
879
880 class revlogio(object):
880 class revlogio(object):
881 def __init__(self):
881 def __init__(self):
882 self.size = indexformatng.size
882 self.size = indexformatng.size
883
883
884 def parseindex(self, data, inline):
884 def parseindex(self, data, inline):
885 # call the C implementation to parse the index data
885 # call the C implementation to parse the index data
886 index, cache = parsers.parse_index2(data, inline)
886 index, cache = parsers.parse_index2(data, inline)
887 return index, getattr(index, 'nodemap', None), cache
887 return index, getattr(index, 'nodemap', None), cache
888
888
889 def packentry(self, entry, node, version, rev):
889 def packentry(self, entry, node, version, rev):
890 p = indexformatng_pack(*entry)
890 p = indexformatng_pack(*entry)
891 if rev == 0:
891 if rev == 0:
892 p = versionformat_pack(version) + p[4:]
892 p = versionformat_pack(version) + p[4:]
893 return p
893 return p
894
894
895 class revlog(object):
895 class revlog(object):
896 """
896 """
897 the underlying revision storage object
897 the underlying revision storage object
898
898
899 A revlog consists of two parts, an index and the revision data.
899 A revlog consists of two parts, an index and the revision data.
900
900
901 The index is a file with a fixed record size containing
901 The index is a file with a fixed record size containing
902 information on each revision, including its nodeid (hash), the
902 information on each revision, including its nodeid (hash), the
903 nodeids of its parents, the position and offset of its data within
903 nodeids of its parents, the position and offset of its data within
904 the data file, and the revision it's based on. Finally, each entry
904 the data file, and the revision it's based on. Finally, each entry
905 contains a linkrev entry that can serve as a pointer to external
905 contains a linkrev entry that can serve as a pointer to external
906 data.
906 data.
907
907
908 The revision data itself is a linear collection of data chunks.
908 The revision data itself is a linear collection of data chunks.
909 Each chunk represents a revision and is usually represented as a
909 Each chunk represents a revision and is usually represented as a
910 delta against the previous chunk. To bound lookup time, runs of
910 delta against the previous chunk. To bound lookup time, runs of
911 deltas are limited to about 2 times the length of the original
911 deltas are limited to about 2 times the length of the original
912 version data. This makes retrieval of a version proportional to
912 version data. This makes retrieval of a version proportional to
913 its size, or O(1) relative to the number of revisions.
913 its size, or O(1) relative to the number of revisions.
914
914
915 Both pieces of the revlog are written to in an append-only
915 Both pieces of the revlog are written to in an append-only
916 fashion, which means we never need to rewrite a file to insert or
916 fashion, which means we never need to rewrite a file to insert or
917 remove data, and can use some simple techniques to avoid the need
917 remove data, and can use some simple techniques to avoid the need
918 for locking while reading.
918 for locking while reading.
919
919
920 If checkambig, indexfile is opened with checkambig=True at
920 If checkambig, indexfile is opened with checkambig=True at
921 writing, to avoid file stat ambiguity.
921 writing, to avoid file stat ambiguity.
922
922
923 If mmaplargeindex is True, and an mmapindexthreshold is set, the
923 If mmaplargeindex is True, and an mmapindexthreshold is set, the
924 index will be mmapped rather than read if it is larger than the
924 index will be mmapped rather than read if it is larger than the
925 configured threshold.
925 configured threshold.
926
926
927 If censorable is True, the revlog can have censored revisions.
927 If censorable is True, the revlog can have censored revisions.
928 """
928 """
929 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
929 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
930 mmaplargeindex=False, censorable=False):
930 mmaplargeindex=False, censorable=False):
931 """
931 """
932 create a revlog object
932 create a revlog object
933
933
934 opener is a function that abstracts the file opening operation
934 opener is a function that abstracts the file opening operation
935 and can be used to implement COW semantics or the like.
935 and can be used to implement COW semantics or the like.
936 """
936 """
937 self.indexfile = indexfile
937 self.indexfile = indexfile
938 self.datafile = datafile or (indexfile[:-2] + ".d")
938 self.datafile = datafile or (indexfile[:-2] + ".d")
939 self.opener = opener
939 self.opener = opener
940 # When True, indexfile is opened with checkambig=True at writing, to
940 # When True, indexfile is opened with checkambig=True at writing, to
941 # avoid file stat ambiguity.
941 # avoid file stat ambiguity.
942 self._checkambig = checkambig
942 self._checkambig = checkambig
943 self._censorable = censorable
943 self._censorable = censorable
944 # 3-tuple of (node, rev, text) for a raw revision.
944 # 3-tuple of (node, rev, text) for a raw revision.
945 self._cache = None
945 self._cache = None
946 # Maps rev to chain base rev.
946 # Maps rev to chain base rev.
947 self._chainbasecache = util.lrucachedict(100)
947 self._chainbasecache = util.lrucachedict(100)
948 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
948 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
949 self._chunkcache = (0, '')
949 self._chunkcache = (0, '')
950 # How much data to read and cache into the raw revlog data cache.
950 # How much data to read and cache into the raw revlog data cache.
951 self._chunkcachesize = 65536
951 self._chunkcachesize = 65536
952 self._maxchainlen = None
952 self._maxchainlen = None
953 self._deltabothparents = True
953 self._deltabothparents = True
954 self.index = []
954 self.index = []
955 # Mapping of partial identifiers to full nodes.
955 # Mapping of partial identifiers to full nodes.
956 self._pcache = {}
956 self._pcache = {}
957 # Mapping of revision integer to full node.
957 # Mapping of revision integer to full node.
958 self._nodecache = {nullid: nullrev}
958 self._nodecache = {nullid: nullrev}
959 self._nodepos = None
959 self._nodepos = None
960 self._compengine = 'zlib'
960 self._compengine = 'zlib'
961 self._maxdeltachainspan = -1
961 self._maxdeltachainspan = -1
962 self._withsparseread = False
962 self._withsparseread = False
963 self._sparserevlog = False
963 self._sparserevlog = False
964 self._srdensitythreshold = 0.50
964 self._srdensitythreshold = 0.50
965 self._srmingapsize = 262144
965 self._srmingapsize = 262144
966
966
967 mmapindexthreshold = None
967 mmapindexthreshold = None
968 v = REVLOG_DEFAULT_VERSION
968 v = REVLOG_DEFAULT_VERSION
969 opts = getattr(opener, 'options', None)
969 opts = getattr(opener, 'options', None)
970 if opts is not None:
970 if opts is not None:
971 if 'revlogv2' in opts:
971 if 'revlogv2' in opts:
972 # version 2 revlogs always use generaldelta.
972 # version 2 revlogs always use generaldelta.
973 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
973 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
974 elif 'revlogv1' in opts:
974 elif 'revlogv1' in opts:
975 if 'generaldelta' in opts:
975 if 'generaldelta' in opts:
976 v |= FLAG_GENERALDELTA
976 v |= FLAG_GENERALDELTA
977 else:
977 else:
978 v = 0
978 v = 0
979 if 'chunkcachesize' in opts:
979 if 'chunkcachesize' in opts:
980 self._chunkcachesize = opts['chunkcachesize']
980 self._chunkcachesize = opts['chunkcachesize']
981 if 'maxchainlen' in opts:
981 if 'maxchainlen' in opts:
982 self._maxchainlen = opts['maxchainlen']
982 self._maxchainlen = opts['maxchainlen']
983 if 'deltabothparents' in opts:
983 if 'deltabothparents' in opts:
984 self._deltabothparents = opts['deltabothparents']
984 self._deltabothparents = opts['deltabothparents']
985 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
985 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
986 if 'compengine' in opts:
986 if 'compengine' in opts:
987 self._compengine = opts['compengine']
987 self._compengine = opts['compengine']
988 if 'maxdeltachainspan' in opts:
988 if 'maxdeltachainspan' in opts:
989 self._maxdeltachainspan = opts['maxdeltachainspan']
989 self._maxdeltachainspan = opts['maxdeltachainspan']
990 if mmaplargeindex and 'mmapindexthreshold' in opts:
990 if mmaplargeindex and 'mmapindexthreshold' in opts:
991 mmapindexthreshold = opts['mmapindexthreshold']
991 mmapindexthreshold = opts['mmapindexthreshold']
992 self._sparserevlog = bool(opts.get('sparse-revlog', False))
992 self._sparserevlog = bool(opts.get('sparse-revlog', False))
993 withsparseread = bool(opts.get('with-sparse-read', False))
993 withsparseread = bool(opts.get('with-sparse-read', False))
994 # sparse-revlog forces sparse-read
994 # sparse-revlog forces sparse-read
995 self._withsparseread = self._sparserevlog or withsparseread
995 self._withsparseread = self._sparserevlog or withsparseread
996 if 'sparse-read-density-threshold' in opts:
996 if 'sparse-read-density-threshold' in opts:
997 self._srdensitythreshold = opts['sparse-read-density-threshold']
997 self._srdensitythreshold = opts['sparse-read-density-threshold']
998 if 'sparse-read-min-gap-size' in opts:
998 if 'sparse-read-min-gap-size' in opts:
999 self._srmingapsize = opts['sparse-read-min-gap-size']
999 self._srmingapsize = opts['sparse-read-min-gap-size']
1000
1000
1001 if self._chunkcachesize <= 0:
1001 if self._chunkcachesize <= 0:
1002 raise RevlogError(_('revlog chunk cache size %r is not greater '
1002 raise RevlogError(_('revlog chunk cache size %r is not greater '
1003 'than 0') % self._chunkcachesize)
1003 'than 0') % self._chunkcachesize)
1004 elif self._chunkcachesize & (self._chunkcachesize - 1):
1004 elif self._chunkcachesize & (self._chunkcachesize - 1):
1005 raise RevlogError(_('revlog chunk cache size %r is not a power '
1005 raise RevlogError(_('revlog chunk cache size %r is not a power '
1006 'of 2') % self._chunkcachesize)
1006 'of 2') % self._chunkcachesize)
1007
1007
1008 indexdata = ''
1008 indexdata = ''
1009 self._initempty = True
1009 self._initempty = True
1010 try:
1010 try:
1011 with self._indexfp() as f:
1011 with self._indexfp() as f:
1012 if (mmapindexthreshold is not None and
1012 if (mmapindexthreshold is not None and
1013 self.opener.fstat(f).st_size >= mmapindexthreshold):
1013 self.opener.fstat(f).st_size >= mmapindexthreshold):
1014 indexdata = util.buffer(util.mmapread(f))
1014 indexdata = util.buffer(util.mmapread(f))
1015 else:
1015 else:
1016 indexdata = f.read()
1016 indexdata = f.read()
1017 if len(indexdata) > 0:
1017 if len(indexdata) > 0:
1018 v = versionformat_unpack(indexdata[:4])[0]
1018 v = versionformat_unpack(indexdata[:4])[0]
1019 self._initempty = False
1019 self._initempty = False
1020 except IOError as inst:
1020 except IOError as inst:
1021 if inst.errno != errno.ENOENT:
1021 if inst.errno != errno.ENOENT:
1022 raise
1022 raise
1023
1023
1024 self.version = v
1024 self.version = v
1025 self._inline = v & FLAG_INLINE_DATA
1025 self._inline = v & FLAG_INLINE_DATA
1026 self._generaldelta = v & FLAG_GENERALDELTA
1026 self._generaldelta = v & FLAG_GENERALDELTA
1027 flags = v & ~0xFFFF
1027 flags = v & ~0xFFFF
1028 fmt = v & 0xFFFF
1028 fmt = v & 0xFFFF
1029 if fmt == REVLOGV0:
1029 if fmt == REVLOGV0:
1030 if flags:
1030 if flags:
1031 raise RevlogError(_('unknown flags (%#04x) in version %d '
1031 raise RevlogError(_('unknown flags (%#04x) in version %d '
1032 'revlog %s') %
1032 'revlog %s') %
1033 (flags >> 16, fmt, self.indexfile))
1033 (flags >> 16, fmt, self.indexfile))
1034 elif fmt == REVLOGV1:
1034 elif fmt == REVLOGV1:
1035 if flags & ~REVLOGV1_FLAGS:
1035 if flags & ~REVLOGV1_FLAGS:
1036 raise RevlogError(_('unknown flags (%#04x) in version %d '
1036 raise RevlogError(_('unknown flags (%#04x) in version %d '
1037 'revlog %s') %
1037 'revlog %s') %
1038 (flags >> 16, fmt, self.indexfile))
1038 (flags >> 16, fmt, self.indexfile))
1039 elif fmt == REVLOGV2:
1039 elif fmt == REVLOGV2:
1040 if flags & ~REVLOGV2_FLAGS:
1040 if flags & ~REVLOGV2_FLAGS:
1041 raise RevlogError(_('unknown flags (%#04x) in version %d '
1041 raise RevlogError(_('unknown flags (%#04x) in version %d '
1042 'revlog %s') %
1042 'revlog %s') %
1043 (flags >> 16, fmt, self.indexfile))
1043 (flags >> 16, fmt, self.indexfile))
1044 else:
1044 else:
1045 raise RevlogError(_('unknown version (%d) in revlog %s') %
1045 raise RevlogError(_('unknown version (%d) in revlog %s') %
1046 (fmt, self.indexfile))
1046 (fmt, self.indexfile))
1047
1047
1048 self.storedeltachains = True
1048 self.storedeltachains = True
1049
1049
1050 self._io = revlogio()
1050 self._io = revlogio()
1051 if self.version == REVLOGV0:
1051 if self.version == REVLOGV0:
1052 self._io = revlogoldio()
1052 self._io = revlogoldio()
1053 try:
1053 try:
1054 d = self._io.parseindex(indexdata, self._inline)
1054 d = self._io.parseindex(indexdata, self._inline)
1055 except (ValueError, IndexError):
1055 except (ValueError, IndexError):
1056 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1056 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1057 self.index, nodemap, self._chunkcache = d
1057 self.index, nodemap, self._chunkcache = d
1058 if nodemap is not None:
1058 if nodemap is not None:
1059 self.nodemap = self._nodecache = nodemap
1059 self.nodemap = self._nodecache = nodemap
1060 if not self._chunkcache:
1060 if not self._chunkcache:
1061 self._chunkclear()
1061 self._chunkclear()
1062 # revnum -> (chain-length, sum-delta-length)
1062 # revnum -> (chain-length, sum-delta-length)
1063 self._chaininfocache = {}
1063 self._chaininfocache = {}
1064 # revlog header -> revlog compressor
1064 # revlog header -> revlog compressor
1065 self._decompressors = {}
1065 self._decompressors = {}
1066
1066
1067 @util.propertycache
1067 @util.propertycache
1068 def _compressor(self):
1068 def _compressor(self):
1069 return util.compengines[self._compengine].revlogcompressor()
1069 return util.compengines[self._compengine].revlogcompressor()
1070
1070
1071 def _indexfp(self, mode='r'):
1071 def _indexfp(self, mode='r'):
1072 """file object for the revlog's index file"""
1072 """file object for the revlog's index file"""
1073 args = {r'mode': mode}
1073 args = {r'mode': mode}
1074 if mode != 'r':
1074 if mode != 'r':
1075 args[r'checkambig'] = self._checkambig
1075 args[r'checkambig'] = self._checkambig
1076 if mode == 'w':
1076 if mode == 'w':
1077 args[r'atomictemp'] = True
1077 args[r'atomictemp'] = True
1078 return self.opener(self.indexfile, **args)
1078 return self.opener(self.indexfile, **args)
1079
1079
1080 def _datafp(self, mode='r'):
1080 def _datafp(self, mode='r'):
1081 """file object for the revlog's data file"""
1081 """file object for the revlog's data file"""
1082 return self.opener(self.datafile, mode=mode)
1082 return self.opener(self.datafile, mode=mode)
1083
1083
1084 @contextlib.contextmanager
1084 @contextlib.contextmanager
1085 def _datareadfp(self, existingfp=None):
1085 def _datareadfp(self, existingfp=None):
1086 """file object suitable to read data"""
1086 """file object suitable to read data"""
1087 if existingfp is not None:
1087 if existingfp is not None:
1088 yield existingfp
1088 yield existingfp
1089 else:
1089 else:
1090 if self._inline:
1090 if self._inline:
1091 func = self._indexfp
1091 func = self._indexfp
1092 else:
1092 else:
1093 func = self._datafp
1093 func = self._datafp
1094 with func() as fp:
1094 with func() as fp:
1095 yield fp
1095 yield fp
1096
1096
1097 def tip(self):
1097 def tip(self):
1098 return self.node(len(self.index) - 1)
1098 return self.node(len(self.index) - 1)
1099 def __contains__(self, rev):
1099 def __contains__(self, rev):
1100 return 0 <= rev < len(self)
1100 return 0 <= rev < len(self)
1101 def __len__(self):
1101 def __len__(self):
1102 return len(self.index)
1102 return len(self.index)
1103 def __iter__(self):
1103 def __iter__(self):
1104 return iter(pycompat.xrange(len(self)))
1104 return iter(pycompat.xrange(len(self)))
1105 def revs(self, start=0, stop=None):
1105 def revs(self, start=0, stop=None):
1106 """iterate over all rev in this revlog (from start to stop)"""
1106 """iterate over all rev in this revlog (from start to stop)"""
1107 step = 1
1107 step = 1
1108 length = len(self)
1108 length = len(self)
1109 if stop is not None:
1109 if stop is not None:
1110 if start > stop:
1110 if start > stop:
1111 step = -1
1111 step = -1
1112 stop += step
1112 stop += step
1113 if stop > length:
1113 if stop > length:
1114 stop = length
1114 stop = length
1115 else:
1115 else:
1116 stop = length
1116 stop = length
1117 return pycompat.xrange(start, stop, step)
1117 return pycompat.xrange(start, stop, step)
1118
1118
1119 @util.propertycache
1119 @util.propertycache
1120 def nodemap(self):
1120 def nodemap(self):
1121 if self.index:
1121 if self.index:
1122 # populate mapping down to the initial node
1122 # populate mapping down to the initial node
1123 node0 = self.index[0][7] # get around changelog filtering
1123 node0 = self.index[0][7] # get around changelog filtering
1124 self.rev(node0)
1124 self.rev(node0)
1125 return self._nodecache
1125 return self._nodecache
1126
1126
1127 def hasnode(self, node):
1127 def hasnode(self, node):
1128 try:
1128 try:
1129 self.rev(node)
1129 self.rev(node)
1130 return True
1130 return True
1131 except KeyError:
1131 except KeyError:
1132 return False
1132 return False
1133
1133
1134 def candelta(self, baserev, rev):
1134 def candelta(self, baserev, rev):
1135 """whether two revisions (baserev, rev) can be delta-ed or not"""
1135 """whether two revisions (baserev, rev) can be delta-ed or not"""
1136 # Disable delta if either rev requires a content-changing flag
1136 # Disable delta if either rev requires a content-changing flag
1137 # processor (ex. LFS). This is because such flag processor can alter
1137 # processor (ex. LFS). This is because such flag processor can alter
1138 # the rawtext content that the delta will be based on, and two clients
1138 # the rawtext content that the delta will be based on, and two clients
1139 # could have a same revlog node with different flags (i.e. different
1139 # could have a same revlog node with different flags (i.e. different
1140 # rawtext contents) and the delta could be incompatible.
1140 # rawtext contents) and the delta could be incompatible.
1141 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1141 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1142 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1142 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1143 return False
1143 return False
1144 return True
1144 return True
1145
1145
1146 def clearcaches(self):
1146 def clearcaches(self):
1147 self._cache = None
1147 self._cache = None
1148 self._chainbasecache.clear()
1148 self._chainbasecache.clear()
1149 self._chunkcache = (0, '')
1149 self._chunkcache = (0, '')
1150 self._pcache = {}
1150 self._pcache = {}
1151
1151
1152 try:
1152 try:
1153 self._nodecache.clearcaches()
1153 self._nodecache.clearcaches()
1154 except AttributeError:
1154 except AttributeError:
1155 self._nodecache = {nullid: nullrev}
1155 self._nodecache = {nullid: nullrev}
1156 self._nodepos = None
1156 self._nodepos = None
1157
1157
1158 def rev(self, node):
1158 def rev(self, node):
1159 try:
1159 try:
1160 return self._nodecache[node]
1160 return self._nodecache[node]
1161 except TypeError:
1161 except TypeError:
1162 raise
1162 raise
1163 except RevlogError:
1163 except RevlogError:
1164 # parsers.c radix tree lookup failed
1164 # parsers.c radix tree lookup failed
1165 if node == wdirid or node in wdirfilenodeids:
1165 if node == wdirid or node in wdirfilenodeids:
1166 raise error.WdirUnsupported
1166 raise error.WdirUnsupported
1167 raise LookupError(node, self.indexfile, _('no node'))
1167 raise LookupError(node, self.indexfile, _('no node'))
1168 except KeyError:
1168 except KeyError:
1169 # pure python cache lookup failed
1169 # pure python cache lookup failed
1170 n = self._nodecache
1170 n = self._nodecache
1171 i = self.index
1171 i = self.index
1172 p = self._nodepos
1172 p = self._nodepos
1173 if p is None:
1173 if p is None:
1174 p = len(i) - 1
1174 p = len(i) - 1
1175 else:
1175 else:
1176 assert p < len(i)
1176 assert p < len(i)
1177 for r in pycompat.xrange(p, -1, -1):
1177 for r in pycompat.xrange(p, -1, -1):
1178 v = i[r][7]
1178 v = i[r][7]
1179 n[v] = r
1179 n[v] = r
1180 if v == node:
1180 if v == node:
1181 self._nodepos = r - 1
1181 self._nodepos = r - 1
1182 return r
1182 return r
1183 if node == wdirid or node in wdirfilenodeids:
1183 if node == wdirid or node in wdirfilenodeids:
1184 raise error.WdirUnsupported
1184 raise error.WdirUnsupported
1185 raise LookupError(node, self.indexfile, _('no node'))
1185 raise LookupError(node, self.indexfile, _('no node'))
1186
1186
1187 # Accessors for index entries.
1187 # Accessors for index entries.
1188
1188
1189 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1189 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1190 # are flags.
1190 # are flags.
1191 def start(self, rev):
1191 def start(self, rev):
1192 return int(self.index[rev][0] >> 16)
1192 return int(self.index[rev][0] >> 16)
1193
1193
1194 def flags(self, rev):
1194 def flags(self, rev):
1195 return self.index[rev][0] & 0xFFFF
1195 return self.index[rev][0] & 0xFFFF
1196
1196
1197 def length(self, rev):
1197 def length(self, rev):
1198 return self.index[rev][1]
1198 return self.index[rev][1]
1199
1199
1200 def rawsize(self, rev):
1200 def rawsize(self, rev):
1201 """return the length of the uncompressed text for a given revision"""
1201 """return the length of the uncompressed text for a given revision"""
1202 l = self.index[rev][2]
1202 l = self.index[rev][2]
1203 if l >= 0:
1203 if l >= 0:
1204 return l
1204 return l
1205
1205
1206 t = self.revision(rev, raw=True)
1206 t = self.revision(rev, raw=True)
1207 return len(t)
1207 return len(t)
1208
1208
1209 def size(self, rev):
1209 def size(self, rev):
1210 """length of non-raw text (processed by a "read" flag processor)"""
1210 """length of non-raw text (processed by a "read" flag processor)"""
1211 # fast path: if no "read" flag processor could change the content,
1211 # fast path: if no "read" flag processor could change the content,
1212 # size is rawsize. note: ELLIPSIS is known to not change the content.
1212 # size is rawsize. note: ELLIPSIS is known to not change the content.
1213 flags = self.flags(rev)
1213 flags = self.flags(rev)
1214 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1214 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1215 return self.rawsize(rev)
1215 return self.rawsize(rev)
1216
1216
1217 return len(self.revision(rev, raw=False))
1217 return len(self.revision(rev, raw=False))
1218
1218
1219 def chainbase(self, rev):
1219 def chainbase(self, rev):
1220 base = self._chainbasecache.get(rev)
1220 base = self._chainbasecache.get(rev)
1221 if base is not None:
1221 if base is not None:
1222 return base
1222 return base
1223
1223
1224 index = self.index
1224 index = self.index
1225 iterrev = rev
1225 iterrev = rev
1226 base = index[iterrev][3]
1226 base = index[iterrev][3]
1227 while base != iterrev:
1227 while base != iterrev:
1228 iterrev = base
1228 iterrev = base
1229 base = index[iterrev][3]
1229 base = index[iterrev][3]
1230
1230
1231 self._chainbasecache[rev] = base
1231 self._chainbasecache[rev] = base
1232 return base
1232 return base
1233
1233
1234 def linkrev(self, rev):
1234 def linkrev(self, rev):
1235 return self.index[rev][4]
1235 return self.index[rev][4]
1236
1236
1237 def parentrevs(self, rev):
1237 def parentrevs(self, rev):
1238 try:
1238 try:
1239 entry = self.index[rev]
1239 entry = self.index[rev]
1240 except IndexError:
1240 except IndexError:
1241 if rev == wdirrev:
1241 if rev == wdirrev:
1242 raise error.WdirUnsupported
1242 raise error.WdirUnsupported
1243 raise
1243 raise
1244
1244
1245 return entry[5], entry[6]
1245 return entry[5], entry[6]
1246
1246
1247 def node(self, rev):
1247 def node(self, rev):
1248 try:
1248 try:
1249 return self.index[rev][7]
1249 return self.index[rev][7]
1250 except IndexError:
1250 except IndexError:
1251 if rev == wdirrev:
1251 if rev == wdirrev:
1252 raise error.WdirUnsupported
1252 raise error.WdirUnsupported
1253 raise
1253 raise
1254
1254
1255 # Derived from index values.
1255 # Derived from index values.
1256
1256
1257 def end(self, rev):
1257 def end(self, rev):
1258 return self.start(rev) + self.length(rev)
1258 return self.start(rev) + self.length(rev)
1259
1259
1260 def parents(self, node):
1260 def parents(self, node):
1261 i = self.index
1261 i = self.index
1262 d = i[self.rev(node)]
1262 d = i[self.rev(node)]
1263 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1263 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1264
1264
1265 def chainlen(self, rev):
1265 def chainlen(self, rev):
1266 return self._chaininfo(rev)[0]
1266 return self._chaininfo(rev)[0]
1267
1267
1268 def _chaininfo(self, rev):
1268 def _chaininfo(self, rev):
1269 chaininfocache = self._chaininfocache
1269 chaininfocache = self._chaininfocache
1270 if rev in chaininfocache:
1270 if rev in chaininfocache:
1271 return chaininfocache[rev]
1271 return chaininfocache[rev]
1272 index = self.index
1272 index = self.index
1273 generaldelta = self._generaldelta
1273 generaldelta = self._generaldelta
1274 iterrev = rev
1274 iterrev = rev
1275 e = index[iterrev]
1275 e = index[iterrev]
1276 clen = 0
1276 clen = 0
1277 compresseddeltalen = 0
1277 compresseddeltalen = 0
1278 while iterrev != e[3]:
1278 while iterrev != e[3]:
1279 clen += 1
1279 clen += 1
1280 compresseddeltalen += e[1]
1280 compresseddeltalen += e[1]
1281 if generaldelta:
1281 if generaldelta:
1282 iterrev = e[3]
1282 iterrev = e[3]
1283 else:
1283 else:
1284 iterrev -= 1
1284 iterrev -= 1
1285 if iterrev in chaininfocache:
1285 if iterrev in chaininfocache:
1286 t = chaininfocache[iterrev]
1286 t = chaininfocache[iterrev]
1287 clen += t[0]
1287 clen += t[0]
1288 compresseddeltalen += t[1]
1288 compresseddeltalen += t[1]
1289 break
1289 break
1290 e = index[iterrev]
1290 e = index[iterrev]
1291 else:
1291 else:
1292 # Add text length of base since decompressing that also takes
1292 # Add text length of base since decompressing that also takes
1293 # work. For cache hits the length is already included.
1293 # work. For cache hits the length is already included.
1294 compresseddeltalen += e[1]
1294 compresseddeltalen += e[1]
1295 r = (clen, compresseddeltalen)
1295 r = (clen, compresseddeltalen)
1296 chaininfocache[rev] = r
1296 chaininfocache[rev] = r
1297 return r
1297 return r
1298
1298
1299 def _deltachain(self, rev, stoprev=None):
1299 def _deltachain(self, rev, stoprev=None):
1300 """Obtain the delta chain for a revision.
1300 """Obtain the delta chain for a revision.
1301
1301
1302 ``stoprev`` specifies a revision to stop at. If not specified, we
1302 ``stoprev`` specifies a revision to stop at. If not specified, we
1303 stop at the base of the chain.
1303 stop at the base of the chain.
1304
1304
1305 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1305 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1306 revs in ascending order and ``stopped`` is a bool indicating whether
1306 revs in ascending order and ``stopped`` is a bool indicating whether
1307 ``stoprev`` was hit.
1307 ``stoprev`` was hit.
1308 """
1308 """
1309 # Try C implementation.
1309 # Try C implementation.
1310 try:
1310 try:
1311 return self.index.deltachain(rev, stoprev, self._generaldelta)
1311 return self.index.deltachain(rev, stoprev, self._generaldelta)
1312 except AttributeError:
1312 except AttributeError:
1313 pass
1313 pass
1314
1314
1315 chain = []
1315 chain = []
1316
1316
1317 # Alias to prevent attribute lookup in tight loop.
1317 # Alias to prevent attribute lookup in tight loop.
1318 index = self.index
1318 index = self.index
1319 generaldelta = self._generaldelta
1319 generaldelta = self._generaldelta
1320
1320
1321 iterrev = rev
1321 iterrev = rev
1322 e = index[iterrev]
1322 e = index[iterrev]
1323 while iterrev != e[3] and iterrev != stoprev:
1323 while iterrev != e[3] and iterrev != stoprev:
1324 chain.append(iterrev)
1324 chain.append(iterrev)
1325 if generaldelta:
1325 if generaldelta:
1326 iterrev = e[3]
1326 iterrev = e[3]
1327 else:
1327 else:
1328 iterrev -= 1
1328 iterrev -= 1
1329 e = index[iterrev]
1329 e = index[iterrev]
1330
1330
1331 if iterrev == stoprev:
1331 if iterrev == stoprev:
1332 stopped = True
1332 stopped = True
1333 else:
1333 else:
1334 chain.append(iterrev)
1334 chain.append(iterrev)
1335 stopped = False
1335 stopped = False
1336
1336
1337 chain.reverse()
1337 chain.reverse()
1338 return chain, stopped
1338 return chain, stopped
1339
1339
1340 def ancestors(self, revs, stoprev=0, inclusive=False):
1340 def ancestors(self, revs, stoprev=0, inclusive=False):
1341 """Generate the ancestors of 'revs' in reverse topological order.
1341 """Generate the ancestors of 'revs' in reverse topological order.
1342 Does not generate revs lower than stoprev.
1342 Does not generate revs lower than stoprev.
1343
1343
1344 See the documentation for ancestor.lazyancestors for more details."""
1344 See the documentation for ancestor.lazyancestors for more details."""
1345
1345
1346 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1346 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1347 inclusive=inclusive)
1347 inclusive=inclusive)
1348
1348
1349 def descendants(self, revs):
1349 def descendants(self, revs):
1350 """Generate the descendants of 'revs' in revision order.
1350 """Generate the descendants of 'revs' in revision order.
1351
1351
1352 Yield a sequence of revision numbers starting with a child of
1352 Yield a sequence of revision numbers starting with a child of
1353 some rev in revs, i.e., each revision is *not* considered a
1353 some rev in revs, i.e., each revision is *not* considered a
1354 descendant of itself. Results are ordered by revision number (a
1354 descendant of itself. Results are ordered by revision number (a
1355 topological sort)."""
1355 topological sort)."""
1356 first = min(revs)
1356 first = min(revs)
1357 if first == nullrev:
1357 if first == nullrev:
1358 for i in self:
1358 for i in self:
1359 yield i
1359 yield i
1360 return
1360 return
1361
1361
1362 seen = set(revs)
1362 seen = set(revs)
1363 for i in self.revs(start=first + 1):
1363 for i in self.revs(start=first + 1):
1364 for x in self.parentrevs(i):
1364 for x in self.parentrevs(i):
1365 if x != nullrev and x in seen:
1365 if x != nullrev and x in seen:
1366 seen.add(i)
1366 seen.add(i)
1367 yield i
1367 yield i
1368 break
1368 break
1369
1369
1370 def findcommonmissing(self, common=None, heads=None):
1370 def findcommonmissing(self, common=None, heads=None):
1371 """Return a tuple of the ancestors of common and the ancestors of heads
1371 """Return a tuple of the ancestors of common and the ancestors of heads
1372 that are not ancestors of common. In revset terminology, we return the
1372 that are not ancestors of common. In revset terminology, we return the
1373 tuple:
1373 tuple:
1374
1374
1375 ::common, (::heads) - (::common)
1375 ::common, (::heads) - (::common)
1376
1376
1377 The list is sorted by revision number, meaning it is
1377 The list is sorted by revision number, meaning it is
1378 topologically sorted.
1378 topologically sorted.
1379
1379
1380 'heads' and 'common' are both lists of node IDs. If heads is
1380 'heads' and 'common' are both lists of node IDs. If heads is
1381 not supplied, uses all of the revlog's heads. If common is not
1381 not supplied, uses all of the revlog's heads. If common is not
1382 supplied, uses nullid."""
1382 supplied, uses nullid."""
1383 if common is None:
1383 if common is None:
1384 common = [nullid]
1384 common = [nullid]
1385 if heads is None:
1385 if heads is None:
1386 heads = self.heads()
1386 heads = self.heads()
1387
1387
1388 common = [self.rev(n) for n in common]
1388 common = [self.rev(n) for n in common]
1389 heads = [self.rev(n) for n in heads]
1389 heads = [self.rev(n) for n in heads]
1390
1390
1391 # we want the ancestors, but inclusive
1391 # we want the ancestors, but inclusive
1392 class lazyset(object):
1392 class lazyset(object):
1393 def __init__(self, lazyvalues):
1393 def __init__(self, lazyvalues):
1394 self.addedvalues = set()
1394 self.addedvalues = set()
1395 self.lazyvalues = lazyvalues
1395 self.lazyvalues = lazyvalues
1396
1396
1397 def __contains__(self, value):
1397 def __contains__(self, value):
1398 return value in self.addedvalues or value in self.lazyvalues
1398 return value in self.addedvalues or value in self.lazyvalues
1399
1399
1400 def __iter__(self):
1400 def __iter__(self):
1401 added = self.addedvalues
1401 added = self.addedvalues
1402 for r in added:
1402 for r in added:
1403 yield r
1403 yield r
1404 for r in self.lazyvalues:
1404 for r in self.lazyvalues:
1405 if not r in added:
1405 if not r in added:
1406 yield r
1406 yield r
1407
1407
1408 def add(self, value):
1408 def add(self, value):
1409 self.addedvalues.add(value)
1409 self.addedvalues.add(value)
1410
1410
1411 def update(self, values):
1411 def update(self, values):
1412 self.addedvalues.update(values)
1412 self.addedvalues.update(values)
1413
1413
1414 has = lazyset(self.ancestors(common))
1414 has = lazyset(self.ancestors(common))
1415 has.add(nullrev)
1415 has.add(nullrev)
1416 has.update(common)
1416 has.update(common)
1417
1417
1418 # take all ancestors from heads that aren't in has
1418 # take all ancestors from heads that aren't in has
1419 missing = set()
1419 missing = set()
1420 visit = collections.deque(r for r in heads if r not in has)
1420 visit = collections.deque(r for r in heads if r not in has)
1421 while visit:
1421 while visit:
1422 r = visit.popleft()
1422 r = visit.popleft()
1423 if r in missing:
1423 if r in missing:
1424 continue
1424 continue
1425 else:
1425 else:
1426 missing.add(r)
1426 missing.add(r)
1427 for p in self.parentrevs(r):
1427 for p in self.parentrevs(r):
1428 if p not in has:
1428 if p not in has:
1429 visit.append(p)
1429 visit.append(p)
1430 missing = list(missing)
1430 missing = list(missing)
1431 missing.sort()
1431 missing.sort()
1432 return has, [self.node(miss) for miss in missing]
1432 return has, [self.node(miss) for miss in missing]
1433
1433
1434 def incrementalmissingrevs(self, common=None):
1434 def incrementalmissingrevs(self, common=None):
1435 """Return an object that can be used to incrementally compute the
1435 """Return an object that can be used to incrementally compute the
1436 revision numbers of the ancestors of arbitrary sets that are not
1436 revision numbers of the ancestors of arbitrary sets that are not
1437 ancestors of common. This is an ancestor.incrementalmissingancestors
1437 ancestors of common. This is an ancestor.incrementalmissingancestors
1438 object.
1438 object.
1439
1439
1440 'common' is a list of revision numbers. If common is not supplied, uses
1440 'common' is a list of revision numbers. If common is not supplied, uses
1441 nullrev.
1441 nullrev.
1442 """
1442 """
1443 if common is None:
1443 if common is None:
1444 common = [nullrev]
1444 common = [nullrev]
1445
1445
1446 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1446 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1447
1447
1448 def findmissingrevs(self, common=None, heads=None):
1448 def findmissingrevs(self, common=None, heads=None):
1449 """Return the revision numbers of the ancestors of heads that
1449 """Return the revision numbers of the ancestors of heads that
1450 are not ancestors of common.
1450 are not ancestors of common.
1451
1451
1452 More specifically, return a list of revision numbers corresponding to
1452 More specifically, return a list of revision numbers corresponding to
1453 nodes N such that every N satisfies the following constraints:
1453 nodes N such that every N satisfies the following constraints:
1454
1454
1455 1. N is an ancestor of some node in 'heads'
1455 1. N is an ancestor of some node in 'heads'
1456 2. N is not an ancestor of any node in 'common'
1456 2. N is not an ancestor of any node in 'common'
1457
1457
1458 The list is sorted by revision number, meaning it is
1458 The list is sorted by revision number, meaning it is
1459 topologically sorted.
1459 topologically sorted.
1460
1460
1461 'heads' and 'common' are both lists of revision numbers. If heads is
1461 'heads' and 'common' are both lists of revision numbers. If heads is
1462 not supplied, uses all of the revlog's heads. If common is not
1462 not supplied, uses all of the revlog's heads. If common is not
1463 supplied, uses nullid."""
1463 supplied, uses nullid."""
1464 if common is None:
1464 if common is None:
1465 common = [nullrev]
1465 common = [nullrev]
1466 if heads is None:
1466 if heads is None:
1467 heads = self.headrevs()
1467 heads = self.headrevs()
1468
1468
1469 inc = self.incrementalmissingrevs(common=common)
1469 inc = self.incrementalmissingrevs(common=common)
1470 return inc.missingancestors(heads)
1470 return inc.missingancestors(heads)
1471
1471
1472 def findmissing(self, common=None, heads=None):
1472 def findmissing(self, common=None, heads=None):
1473 """Return the ancestors of heads that are not ancestors of common.
1473 """Return the ancestors of heads that are not ancestors of common.
1474
1474
1475 More specifically, return a list of nodes N such that every N
1475 More specifically, return a list of nodes N such that every N
1476 satisfies the following constraints:
1476 satisfies the following constraints:
1477
1477
1478 1. N is an ancestor of some node in 'heads'
1478 1. N is an ancestor of some node in 'heads'
1479 2. N is not an ancestor of any node in 'common'
1479 2. N is not an ancestor of any node in 'common'
1480
1480
1481 The list is sorted by revision number, meaning it is
1481 The list is sorted by revision number, meaning it is
1482 topologically sorted.
1482 topologically sorted.
1483
1483
1484 'heads' and 'common' are both lists of node IDs. If heads is
1484 'heads' and 'common' are both lists of node IDs. If heads is
1485 not supplied, uses all of the revlog's heads. If common is not
1485 not supplied, uses all of the revlog's heads. If common is not
1486 supplied, uses nullid."""
1486 supplied, uses nullid."""
1487 if common is None:
1487 if common is None:
1488 common = [nullid]
1488 common = [nullid]
1489 if heads is None:
1489 if heads is None:
1490 heads = self.heads()
1490 heads = self.heads()
1491
1491
1492 common = [self.rev(n) for n in common]
1492 common = [self.rev(n) for n in common]
1493 heads = [self.rev(n) for n in heads]
1493 heads = [self.rev(n) for n in heads]
1494
1494
1495 inc = self.incrementalmissingrevs(common=common)
1495 inc = self.incrementalmissingrevs(common=common)
1496 return [self.node(r) for r in inc.missingancestors(heads)]
1496 return [self.node(r) for r in inc.missingancestors(heads)]
1497
1497
1498 def nodesbetween(self, roots=None, heads=None):
1498 def nodesbetween(self, roots=None, heads=None):
1499 """Return a topological path from 'roots' to 'heads'.
1499 """Return a topological path from 'roots' to 'heads'.
1500
1500
1501 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1501 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1502 topologically sorted list of all nodes N that satisfy both of
1502 topologically sorted list of all nodes N that satisfy both of
1503 these constraints:
1503 these constraints:
1504
1504
1505 1. N is a descendant of some node in 'roots'
1505 1. N is a descendant of some node in 'roots'
1506 2. N is an ancestor of some node in 'heads'
1506 2. N is an ancestor of some node in 'heads'
1507
1507
1508 Every node is considered to be both a descendant and an ancestor
1508 Every node is considered to be both a descendant and an ancestor
1509 of itself, so every reachable node in 'roots' and 'heads' will be
1509 of itself, so every reachable node in 'roots' and 'heads' will be
1510 included in 'nodes'.
1510 included in 'nodes'.
1511
1511
1512 'outroots' is the list of reachable nodes in 'roots', i.e., the
1512 'outroots' is the list of reachable nodes in 'roots', i.e., the
1513 subset of 'roots' that is returned in 'nodes'. Likewise,
1513 subset of 'roots' that is returned in 'nodes'. Likewise,
1514 'outheads' is the subset of 'heads' that is also in 'nodes'.
1514 'outheads' is the subset of 'heads' that is also in 'nodes'.
1515
1515
1516 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1516 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1517 unspecified, uses nullid as the only root. If 'heads' is
1517 unspecified, uses nullid as the only root. If 'heads' is
1518 unspecified, uses list of all of the revlog's heads."""
1518 unspecified, uses list of all of the revlog's heads."""
1519 nonodes = ([], [], [])
1519 nonodes = ([], [], [])
1520 if roots is not None:
1520 if roots is not None:
1521 roots = list(roots)
1521 roots = list(roots)
1522 if not roots:
1522 if not roots:
1523 return nonodes
1523 return nonodes
1524 lowestrev = min([self.rev(n) for n in roots])
1524 lowestrev = min([self.rev(n) for n in roots])
1525 else:
1525 else:
1526 roots = [nullid] # Everybody's a descendant of nullid
1526 roots = [nullid] # Everybody's a descendant of nullid
1527 lowestrev = nullrev
1527 lowestrev = nullrev
1528 if (lowestrev == nullrev) and (heads is None):
1528 if (lowestrev == nullrev) and (heads is None):
1529 # We want _all_ the nodes!
1529 # We want _all_ the nodes!
1530 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1530 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1531 if heads is None:
1531 if heads is None:
1532 # All nodes are ancestors, so the latest ancestor is the last
1532 # All nodes are ancestors, so the latest ancestor is the last
1533 # node.
1533 # node.
1534 highestrev = len(self) - 1
1534 highestrev = len(self) - 1
1535 # Set ancestors to None to signal that every node is an ancestor.
1535 # Set ancestors to None to signal that every node is an ancestor.
1536 ancestors = None
1536 ancestors = None
1537 # Set heads to an empty dictionary for later discovery of heads
1537 # Set heads to an empty dictionary for later discovery of heads
1538 heads = {}
1538 heads = {}
1539 else:
1539 else:
1540 heads = list(heads)
1540 heads = list(heads)
1541 if not heads:
1541 if not heads:
1542 return nonodes
1542 return nonodes
1543 ancestors = set()
1543 ancestors = set()
1544 # Turn heads into a dictionary so we can remove 'fake' heads.
1544 # Turn heads into a dictionary so we can remove 'fake' heads.
1545 # Also, later we will be using it to filter out the heads we can't
1545 # Also, later we will be using it to filter out the heads we can't
1546 # find from roots.
1546 # find from roots.
1547 heads = dict.fromkeys(heads, False)
1547 heads = dict.fromkeys(heads, False)
1548 # Start at the top and keep marking parents until we're done.
1548 # Start at the top and keep marking parents until we're done.
1549 nodestotag = set(heads)
1549 nodestotag = set(heads)
1550 # Remember where the top was so we can use it as a limit later.
1550 # Remember where the top was so we can use it as a limit later.
1551 highestrev = max([self.rev(n) for n in nodestotag])
1551 highestrev = max([self.rev(n) for n in nodestotag])
1552 while nodestotag:
1552 while nodestotag:
1553 # grab a node to tag
1553 # grab a node to tag
1554 n = nodestotag.pop()
1554 n = nodestotag.pop()
1555 # Never tag nullid
1555 # Never tag nullid
1556 if n == nullid:
1556 if n == nullid:
1557 continue
1557 continue
1558 # A node's revision number represents its place in a
1558 # A node's revision number represents its place in a
1559 # topologically sorted list of nodes.
1559 # topologically sorted list of nodes.
1560 r = self.rev(n)
1560 r = self.rev(n)
1561 if r >= lowestrev:
1561 if r >= lowestrev:
1562 if n not in ancestors:
1562 if n not in ancestors:
1563 # If we are possibly a descendant of one of the roots
1563 # If we are possibly a descendant of one of the roots
1564 # and we haven't already been marked as an ancestor
1564 # and we haven't already been marked as an ancestor
1565 ancestors.add(n) # Mark as ancestor
1565 ancestors.add(n) # Mark as ancestor
1566 # Add non-nullid parents to list of nodes to tag.
1566 # Add non-nullid parents to list of nodes to tag.
1567 nodestotag.update([p for p in self.parents(n) if
1567 nodestotag.update([p for p in self.parents(n) if
1568 p != nullid])
1568 p != nullid])
1569 elif n in heads: # We've seen it before, is it a fake head?
1569 elif n in heads: # We've seen it before, is it a fake head?
1570 # So it is, real heads should not be the ancestors of
1570 # So it is, real heads should not be the ancestors of
1571 # any other heads.
1571 # any other heads.
1572 heads.pop(n)
1572 heads.pop(n)
1573 if not ancestors:
1573 if not ancestors:
1574 return nonodes
1574 return nonodes
1575 # Now that we have our set of ancestors, we want to remove any
1575 # Now that we have our set of ancestors, we want to remove any
1576 # roots that are not ancestors.
1576 # roots that are not ancestors.
1577
1577
1578 # If one of the roots was nullid, everything is included anyway.
1578 # If one of the roots was nullid, everything is included anyway.
1579 if lowestrev > nullrev:
1579 if lowestrev > nullrev:
1580 # But, since we weren't, let's recompute the lowest rev to not
1580 # But, since we weren't, let's recompute the lowest rev to not
1581 # include roots that aren't ancestors.
1581 # include roots that aren't ancestors.
1582
1582
1583 # Filter out roots that aren't ancestors of heads
1583 # Filter out roots that aren't ancestors of heads
1584 roots = [root for root in roots if root in ancestors]
1584 roots = [root for root in roots if root in ancestors]
1585 # Recompute the lowest revision
1585 # Recompute the lowest revision
1586 if roots:
1586 if roots:
1587 lowestrev = min([self.rev(root) for root in roots])
1587 lowestrev = min([self.rev(root) for root in roots])
1588 else:
1588 else:
1589 # No more roots? Return empty list
1589 # No more roots? Return empty list
1590 return nonodes
1590 return nonodes
1591 else:
1591 else:
1592 # We are descending from nullid, and don't need to care about
1592 # We are descending from nullid, and don't need to care about
1593 # any other roots.
1593 # any other roots.
1594 lowestrev = nullrev
1594 lowestrev = nullrev
1595 roots = [nullid]
1595 roots = [nullid]
1596 # Transform our roots list into a set.
1596 # Transform our roots list into a set.
1597 descendants = set(roots)
1597 descendants = set(roots)
1598 # Also, keep the original roots so we can filter out roots that aren't
1598 # Also, keep the original roots so we can filter out roots that aren't
1599 # 'real' roots (i.e. are descended from other roots).
1599 # 'real' roots (i.e. are descended from other roots).
1600 roots = descendants.copy()
1600 roots = descendants.copy()
1601 # Our topologically sorted list of output nodes.
1601 # Our topologically sorted list of output nodes.
1602 orderedout = []
1602 orderedout = []
1603 # Don't start at nullid since we don't want nullid in our output list,
1603 # Don't start at nullid since we don't want nullid in our output list,
1604 # and if nullid shows up in descendants, empty parents will look like
1604 # and if nullid shows up in descendants, empty parents will look like
1605 # they're descendants.
1605 # they're descendants.
1606 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1606 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1607 n = self.node(r)
1607 n = self.node(r)
1608 isdescendant = False
1608 isdescendant = False
1609 if lowestrev == nullrev: # Everybody is a descendant of nullid
1609 if lowestrev == nullrev: # Everybody is a descendant of nullid
1610 isdescendant = True
1610 isdescendant = True
1611 elif n in descendants:
1611 elif n in descendants:
1612 # n is already a descendant
1612 # n is already a descendant
1613 isdescendant = True
1613 isdescendant = True
1614 # This check only needs to be done here because all the roots
1614 # This check only needs to be done here because all the roots
1615 # will start being marked is descendants before the loop.
1615 # will start being marked is descendants before the loop.
1616 if n in roots:
1616 if n in roots:
1617 # If n was a root, check if it's a 'real' root.
1617 # If n was a root, check if it's a 'real' root.
1618 p = tuple(self.parents(n))
1618 p = tuple(self.parents(n))
1619 # If any of its parents are descendants, it's not a root.
1619 # If any of its parents are descendants, it's not a root.
1620 if (p[0] in descendants) or (p[1] in descendants):
1620 if (p[0] in descendants) or (p[1] in descendants):
1621 roots.remove(n)
1621 roots.remove(n)
1622 else:
1622 else:
1623 p = tuple(self.parents(n))
1623 p = tuple(self.parents(n))
1624 # A node is a descendant if either of its parents are
1624 # A node is a descendant if either of its parents are
1625 # descendants. (We seeded the dependents list with the roots
1625 # descendants. (We seeded the dependents list with the roots
1626 # up there, remember?)
1626 # up there, remember?)
1627 if (p[0] in descendants) or (p[1] in descendants):
1627 if (p[0] in descendants) or (p[1] in descendants):
1628 descendants.add(n)
1628 descendants.add(n)
1629 isdescendant = True
1629 isdescendant = True
1630 if isdescendant and ((ancestors is None) or (n in ancestors)):
1630 if isdescendant and ((ancestors is None) or (n in ancestors)):
1631 # Only include nodes that are both descendants and ancestors.
1631 # Only include nodes that are both descendants and ancestors.
1632 orderedout.append(n)
1632 orderedout.append(n)
1633 if (ancestors is not None) and (n in heads):
1633 if (ancestors is not None) and (n in heads):
1634 # We're trying to figure out which heads are reachable
1634 # We're trying to figure out which heads are reachable
1635 # from roots.
1635 # from roots.
1636 # Mark this head as having been reached
1636 # Mark this head as having been reached
1637 heads[n] = True
1637 heads[n] = True
1638 elif ancestors is None:
1638 elif ancestors is None:
1639 # Otherwise, we're trying to discover the heads.
1639 # Otherwise, we're trying to discover the heads.
1640 # Assume this is a head because if it isn't, the next step
1640 # Assume this is a head because if it isn't, the next step
1641 # will eventually remove it.
1641 # will eventually remove it.
1642 heads[n] = True
1642 heads[n] = True
1643 # But, obviously its parents aren't.
1643 # But, obviously its parents aren't.
1644 for p in self.parents(n):
1644 for p in self.parents(n):
1645 heads.pop(p, None)
1645 heads.pop(p, None)
1646 heads = [head for head, flag in heads.iteritems() if flag]
1646 heads = [head for head, flag in heads.iteritems() if flag]
1647 roots = list(roots)
1647 roots = list(roots)
1648 assert orderedout
1648 assert orderedout
1649 assert roots
1649 assert roots
1650 assert heads
1650 assert heads
1651 return (orderedout, roots, heads)
1651 return (orderedout, roots, heads)
1652
1652
1653 def headrevs(self):
1653 def headrevs(self):
1654 try:
1654 try:
1655 return self.index.headrevs()
1655 return self.index.headrevs()
1656 except AttributeError:
1656 except AttributeError:
1657 return self._headrevs()
1657 return self._headrevs()
1658
1658
1659 def computephases(self, roots):
1659 def computephases(self, roots):
1660 return self.index.computephasesmapsets(roots)
1660 return self.index.computephasesmapsets(roots)
1661
1661
1662 def _headrevs(self):
1662 def _headrevs(self):
1663 count = len(self)
1663 count = len(self)
1664 if not count:
1664 if not count:
1665 return [nullrev]
1665 return [nullrev]
1666 # we won't iter over filtered rev so nobody is a head at start
1666 # we won't iter over filtered rev so nobody is a head at start
1667 ishead = [0] * (count + 1)
1667 ishead = [0] * (count + 1)
1668 index = self.index
1668 index = self.index
1669 for r in self:
1669 for r in self:
1670 ishead[r] = 1 # I may be an head
1670 ishead[r] = 1 # I may be an head
1671 e = index[r]
1671 e = index[r]
1672 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1672 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1673 return [r for r, val in enumerate(ishead) if val]
1673 return [r for r, val in enumerate(ishead) if val]
1674
1674
1675 def heads(self, start=None, stop=None):
1675 def heads(self, start=None, stop=None):
1676 """return the list of all nodes that have no children
1676 """return the list of all nodes that have no children
1677
1677
1678 if start is specified, only heads that are descendants of
1678 if start is specified, only heads that are descendants of
1679 start will be returned
1679 start will be returned
1680 if stop is specified, it will consider all the revs from stop
1680 if stop is specified, it will consider all the revs from stop
1681 as if they had no children
1681 as if they had no children
1682 """
1682 """
1683 if start is None and stop is None:
1683 if start is None and stop is None:
1684 if not len(self):
1684 if not len(self):
1685 return [nullid]
1685 return [nullid]
1686 return [self.node(r) for r in self.headrevs()]
1686 return [self.node(r) for r in self.headrevs()]
1687
1687
1688 if start is None:
1688 if start is None:
1689 start = nullid
1689 start = nullid
1690 if stop is None:
1690 if stop is None:
1691 stop = []
1691 stop = []
1692 stoprevs = set([self.rev(n) for n in stop])
1692 stoprevs = set([self.rev(n) for n in stop])
1693 startrev = self.rev(start)
1693 startrev = self.rev(start)
1694 reachable = {startrev}
1694 reachable = {startrev}
1695 heads = {startrev}
1695 heads = {startrev}
1696
1696
1697 parentrevs = self.parentrevs
1697 parentrevs = self.parentrevs
1698 for r in self.revs(start=startrev + 1):
1698 for r in self.revs(start=startrev + 1):
1699 for p in parentrevs(r):
1699 for p in parentrevs(r):
1700 if p in reachable:
1700 if p in reachable:
1701 if r not in stoprevs:
1701 if r not in stoprevs:
1702 reachable.add(r)
1702 reachable.add(r)
1703 heads.add(r)
1703 heads.add(r)
1704 if p in heads and p not in stoprevs:
1704 if p in heads and p not in stoprevs:
1705 heads.remove(p)
1705 heads.remove(p)
1706
1706
1707 return [self.node(r) for r in heads]
1707 return [self.node(r) for r in heads]
1708
1708
1709 def children(self, node):
1709 def children(self, node):
1710 """find the children of a given node"""
1710 """find the children of a given node"""
1711 c = []
1711 c = []
1712 p = self.rev(node)
1712 p = self.rev(node)
1713 for r in self.revs(start=p + 1):
1713 for r in self.revs(start=p + 1):
1714 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1714 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1715 if prevs:
1715 if prevs:
1716 for pr in prevs:
1716 for pr in prevs:
1717 if pr == p:
1717 if pr == p:
1718 c.append(self.node(r))
1718 c.append(self.node(r))
1719 elif p == nullrev:
1719 elif p == nullrev:
1720 c.append(self.node(r))
1720 c.append(self.node(r))
1721 return c
1721 return c
1722
1722
1723 def commonancestorsheads(self, a, b):
1723 def commonancestorsheads(self, a, b):
1724 """calculate all the heads of the common ancestors of nodes a and b"""
1724 """calculate all the heads of the common ancestors of nodes a and b"""
1725 a, b = self.rev(a), self.rev(b)
1725 a, b = self.rev(a), self.rev(b)
1726 ancs = self._commonancestorsheads(a, b)
1726 ancs = self._commonancestorsheads(a, b)
1727 return pycompat.maplist(self.node, ancs)
1727 return pycompat.maplist(self.node, ancs)
1728
1728
1729 def _commonancestorsheads(self, *revs):
1729 def _commonancestorsheads(self, *revs):
1730 """calculate all the heads of the common ancestors of revs"""
1730 """calculate all the heads of the common ancestors of revs"""
1731 try:
1731 try:
1732 ancs = self.index.commonancestorsheads(*revs)
1732 ancs = self.index.commonancestorsheads(*revs)
1733 except (AttributeError, OverflowError): # C implementation failed
1733 except (AttributeError, OverflowError): # C implementation failed
1734 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1734 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1735 return ancs
1735 return ancs
1736
1736
1737 def isancestor(self, a, b):
1737 def isancestor(self, a, b):
1738 """return True if node a is an ancestor of node b
1738 """return True if node a is an ancestor of node b
1739
1739
1740 A revision is considered an ancestor of itself."""
1740 A revision is considered an ancestor of itself."""
1741 a, b = self.rev(a), self.rev(b)
1741 a, b = self.rev(a), self.rev(b)
1742 return self.isancestorrev(a, b)
1742 return self.isancestorrev(a, b)
1743
1743
1744 def isancestorrev(self, a, b):
1744 def isancestorrev(self, a, b):
1745 """return True if revision a is an ancestor of revision b
1745 """return True if revision a is an ancestor of revision b
1746
1746
1747 A revision is considered an ancestor of itself.
1747 A revision is considered an ancestor of itself.
1748
1748
1749 The implementation of this is trivial but the use of
1749 The implementation of this is trivial but the use of
1750 commonancestorsheads is not."""
1750 commonancestorsheads is not."""
1751 if a == nullrev:
1751 if a == nullrev:
1752 return True
1752 return True
1753 elif a == b:
1753 elif a == b:
1754 return True
1754 return True
1755 elif a > b:
1755 elif a > b:
1756 return False
1756 return False
1757 return a in self._commonancestorsheads(a, b)
1757 return a in self._commonancestorsheads(a, b)
1758
1758
1759 def ancestor(self, a, b):
1759 def ancestor(self, a, b):
1760 """calculate the "best" common ancestor of nodes a and b"""
1760 """calculate the "best" common ancestor of nodes a and b"""
1761
1761
1762 a, b = self.rev(a), self.rev(b)
1762 a, b = self.rev(a), self.rev(b)
1763 try:
1763 try:
1764 ancs = self.index.ancestors(a, b)
1764 ancs = self.index.ancestors(a, b)
1765 except (AttributeError, OverflowError):
1765 except (AttributeError, OverflowError):
1766 ancs = ancestor.ancestors(self.parentrevs, a, b)
1766 ancs = ancestor.ancestors(self.parentrevs, a, b)
1767 if ancs:
1767 if ancs:
1768 # choose a consistent winner when there's a tie
1768 # choose a consistent winner when there's a tie
1769 return min(map(self.node, ancs))
1769 return min(map(self.node, ancs))
1770 return nullid
1770 return nullid
1771
1771
1772 def _match(self, id):
1772 def _match(self, id):
1773 if isinstance(id, int):
1773 if isinstance(id, int):
1774 # rev
1774 # rev
1775 return self.node(id)
1775 return self.node(id)
1776 if len(id) == 20:
1776 if len(id) == 20:
1777 # possibly a binary node
1777 # possibly a binary node
1778 # odds of a binary node being all hex in ASCII are 1 in 10**25
1778 # odds of a binary node being all hex in ASCII are 1 in 10**25
1779 try:
1779 try:
1780 node = id
1780 node = id
1781 self.rev(node) # quick search the index
1781 self.rev(node) # quick search the index
1782 return node
1782 return node
1783 except LookupError:
1783 except LookupError:
1784 pass # may be partial hex id
1784 pass # may be partial hex id
1785 try:
1785 try:
1786 # str(rev)
1786 # str(rev)
1787 rev = int(id)
1787 rev = int(id)
1788 if "%d" % rev != id:
1788 if "%d" % rev != id:
1789 raise ValueError
1789 raise ValueError
1790 if rev < 0:
1790 if rev < 0:
1791 rev = len(self) + rev
1791 rev = len(self) + rev
1792 if rev < 0 or rev >= len(self):
1792 if rev < 0 or rev >= len(self):
1793 raise ValueError
1793 raise ValueError
1794 return self.node(rev)
1794 return self.node(rev)
1795 except (ValueError, OverflowError):
1795 except (ValueError, OverflowError):
1796 pass
1796 pass
1797 if len(id) == 40:
1797 if len(id) == 40:
1798 try:
1798 try:
1799 # a full hex nodeid?
1799 # a full hex nodeid?
1800 node = bin(id)
1800 node = bin(id)
1801 self.rev(node)
1801 self.rev(node)
1802 return node
1802 return node
1803 except (TypeError, LookupError):
1803 except (TypeError, LookupError):
1804 pass
1804 pass
1805
1805
1806 def _partialmatch(self, id):
1806 def _partialmatch(self, id):
1807 # we don't care wdirfilenodeids as they should be always full hash
1807 # we don't care wdirfilenodeids as they should be always full hash
1808 maybewdir = wdirhex.startswith(id)
1808 maybewdir = wdirhex.startswith(id)
1809 try:
1809 try:
1810 partial = self.index.partialmatch(id)
1810 partial = self.index.partialmatch(id)
1811 if partial and self.hasnode(partial):
1811 if partial and self.hasnode(partial):
1812 if maybewdir:
1812 if maybewdir:
1813 # single 'ff...' match in radix tree, ambiguous with wdir
1813 # single 'ff...' match in radix tree, ambiguous with wdir
1814 raise RevlogError
1814 raise RevlogError
1815 return partial
1815 return partial
1816 if maybewdir:
1816 if maybewdir:
1817 # no 'ff...' match in radix tree, wdir identified
1817 # no 'ff...' match in radix tree, wdir identified
1818 raise error.WdirUnsupported
1818 raise error.WdirUnsupported
1819 return None
1819 return None
1820 except RevlogError:
1820 except RevlogError:
1821 # parsers.c radix tree lookup gave multiple matches
1821 # parsers.c radix tree lookup gave multiple matches
1822 # fast path: for unfiltered changelog, radix tree is accurate
1822 # fast path: for unfiltered changelog, radix tree is accurate
1823 if not getattr(self, 'filteredrevs', None):
1823 if not getattr(self, 'filteredrevs', None):
1824 raise AmbiguousPrefixLookupError(id, self.indexfile,
1824 raise AmbiguousPrefixLookupError(id, self.indexfile,
1825 _('ambiguous identifier'))
1825 _('ambiguous identifier'))
1826 # fall through to slow path that filters hidden revisions
1826 # fall through to slow path that filters hidden revisions
1827 except (AttributeError, ValueError):
1827 except (AttributeError, ValueError):
1828 # we are pure python, or key was too short to search radix tree
1828 # we are pure python, or key was too short to search radix tree
1829 pass
1829 pass
1830
1830
1831 if id in self._pcache:
1831 if id in self._pcache:
1832 return self._pcache[id]
1832 return self._pcache[id]
1833
1833
1834 if len(id) <= 40:
1834 if len(id) <= 40:
1835 try:
1835 try:
1836 # hex(node)[:...]
1836 # hex(node)[:...]
1837 l = len(id) // 2 # grab an even number of digits
1837 l = len(id) // 2 # grab an even number of digits
1838 prefix = bin(id[:l * 2])
1838 prefix = bin(id[:l * 2])
1839 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1839 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1840 nl = [n for n in nl if hex(n).startswith(id) and
1840 nl = [n for n in nl if hex(n).startswith(id) and
1841 self.hasnode(n)]
1841 self.hasnode(n)]
1842 if len(nl) > 0:
1842 if len(nl) > 0:
1843 if len(nl) == 1 and not maybewdir:
1843 if len(nl) == 1 and not maybewdir:
1844 self._pcache[id] = nl[0]
1844 self._pcache[id] = nl[0]
1845 return nl[0]
1845 return nl[0]
1846 raise AmbiguousPrefixLookupError(id, self.indexfile,
1846 raise AmbiguousPrefixLookupError(id, self.indexfile,
1847 _('ambiguous identifier'))
1847 _('ambiguous identifier'))
1848 if maybewdir:
1848 if maybewdir:
1849 raise error.WdirUnsupported
1849 raise error.WdirUnsupported
1850 return None
1850 return None
1851 except TypeError:
1851 except TypeError:
1852 pass
1852 pass
1853
1853
1854 def lookup(self, id):
1854 def lookup(self, id):
1855 """locate a node based on:
1855 """locate a node based on:
1856 - revision number or str(revision number)
1856 - revision number or str(revision number)
1857 - nodeid or subset of hex nodeid
1857 - nodeid or subset of hex nodeid
1858 """
1858 """
1859 n = self._match(id)
1859 n = self._match(id)
1860 if n is not None:
1860 if n is not None:
1861 return n
1861 return n
1862 n = self._partialmatch(id)
1862 n = self._partialmatch(id)
1863 if n:
1863 if n:
1864 return n
1864 return n
1865
1865
1866 raise LookupError(id, self.indexfile, _('no match found'))
1866 raise LookupError(id, self.indexfile, _('no match found'))
1867
1867
1868 def shortest(self, node, minlength=1):
1868 def shortest(self, node, minlength=1):
1869 """Find the shortest unambiguous prefix that matches node."""
1869 """Find the shortest unambiguous prefix that matches node."""
1870 def isvalid(prefix):
1870 def isvalid(prefix):
1871 try:
1871 try:
1872 node = self._partialmatch(prefix)
1872 node = self._partialmatch(prefix)
1873 except error.RevlogError:
1873 except error.RevlogError:
1874 return False
1874 return False
1875 except error.WdirUnsupported:
1875 except error.WdirUnsupported:
1876 # single 'ff...' match
1876 # single 'ff...' match
1877 return True
1877 return True
1878 if node is None:
1878 if node is None:
1879 raise LookupError(node, self.indexfile, _('no node'))
1879 raise LookupError(node, self.indexfile, _('no node'))
1880 return True
1880 return True
1881
1881
1882 def maybewdir(prefix):
1882 def maybewdir(prefix):
1883 return all(c == 'f' for c in prefix)
1883 return all(c == 'f' for c in prefix)
1884
1884
1885 hexnode = hex(node)
1885 hexnode = hex(node)
1886
1886
1887 def disambiguate(hexnode, minlength):
1887 def disambiguate(hexnode, minlength):
1888 """Disambiguate against wdirid."""
1888 """Disambiguate against wdirid."""
1889 for length in range(minlength, 41):
1889 for length in range(minlength, 41):
1890 prefix = hexnode[:length]
1890 prefix = hexnode[:length]
1891 if not maybewdir(prefix):
1891 if not maybewdir(prefix):
1892 return prefix
1892 return prefix
1893
1893
1894 if not getattr(self, 'filteredrevs', None):
1894 if not getattr(self, 'filteredrevs', None):
1895 try:
1895 try:
1896 length = max(self.index.shortest(node), minlength)
1896 length = max(self.index.shortest(node), minlength)
1897 return disambiguate(hexnode, length)
1897 return disambiguate(hexnode, length)
1898 except RevlogError:
1898 except RevlogError:
1899 if node != wdirid:
1899 if node != wdirid:
1900 raise LookupError(node, self.indexfile, _('no node'))
1900 raise LookupError(node, self.indexfile, _('no node'))
1901 except AttributeError:
1901 except AttributeError:
1902 # Fall through to pure code
1902 # Fall through to pure code
1903 pass
1903 pass
1904
1904
1905 if node == wdirid:
1905 if node == wdirid:
1906 for length in range(minlength, 41):
1906 for length in range(minlength, 41):
1907 prefix = hexnode[:length]
1907 prefix = hexnode[:length]
1908 if isvalid(prefix):
1908 if isvalid(prefix):
1909 return prefix
1909 return prefix
1910
1910
1911 for length in range(minlength, 41):
1911 for length in range(minlength, 41):
1912 prefix = hexnode[:length]
1912 prefix = hexnode[:length]
1913 if isvalid(prefix):
1913 if isvalid(prefix):
1914 return disambiguate(hexnode, length)
1914 return disambiguate(hexnode, length)
1915
1915
1916 def cmp(self, node, text):
1916 def cmp(self, node, text):
1917 """compare text with a given file revision
1917 """compare text with a given file revision
1918
1918
1919 returns True if text is different than what is stored.
1919 returns True if text is different than what is stored.
1920 """
1920 """
1921 p1, p2 = self.parents(node)
1921 p1, p2 = self.parents(node)
1922 return hash(text, p1, p2) != node
1922 return hash(text, p1, p2) != node
1923
1923
1924 def _cachesegment(self, offset, data):
1924 def _cachesegment(self, offset, data):
1925 """Add a segment to the revlog cache.
1925 """Add a segment to the revlog cache.
1926
1926
1927 Accepts an absolute offset and the data that is at that location.
1927 Accepts an absolute offset and the data that is at that location.
1928 """
1928 """
1929 o, d = self._chunkcache
1929 o, d = self._chunkcache
1930 # try to add to existing cache
1930 # try to add to existing cache
1931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1932 self._chunkcache = o, d + data
1932 self._chunkcache = o, d + data
1933 else:
1933 else:
1934 self._chunkcache = offset, data
1934 self._chunkcache = offset, data
1935
1935
1936 def _readsegment(self, offset, length, df=None):
1936 def _readsegment(self, offset, length, df=None):
1937 """Load a segment of raw data from the revlog.
1937 """Load a segment of raw data from the revlog.
1938
1938
1939 Accepts an absolute offset, length to read, and an optional existing
1939 Accepts an absolute offset, length to read, and an optional existing
1940 file handle to read from.
1940 file handle to read from.
1941
1941
1942 If an existing file handle is passed, it will be seeked and the
1942 If an existing file handle is passed, it will be seeked and the
1943 original seek position will NOT be restored.
1943 original seek position will NOT be restored.
1944
1944
1945 Returns a str or buffer of raw byte data.
1945 Returns a str or buffer of raw byte data.
1946 """
1946 """
1947 # Cache data both forward and backward around the requested
1947 # Cache data both forward and backward around the requested
1948 # data, in a fixed size window. This helps speed up operations
1948 # data, in a fixed size window. This helps speed up operations
1949 # involving reading the revlog backwards.
1949 # involving reading the revlog backwards.
1950 cachesize = self._chunkcachesize
1950 cachesize = self._chunkcachesize
1951 realoffset = offset & ~(cachesize - 1)
1951 realoffset = offset & ~(cachesize - 1)
1952 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1952 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1953 - realoffset)
1953 - realoffset)
1954 with self._datareadfp(df) as df:
1954 with self._datareadfp(df) as df:
1955 df.seek(realoffset)
1955 df.seek(realoffset)
1956 d = df.read(reallength)
1956 d = df.read(reallength)
1957 self._cachesegment(realoffset, d)
1957 self._cachesegment(realoffset, d)
1958 if offset != realoffset or reallength != length:
1958 if offset != realoffset or reallength != length:
1959 return util.buffer(d, offset - realoffset, length)
1959 return util.buffer(d, offset - realoffset, length)
1960 return d
1960 return d
1961
1961
1962 def _getsegment(self, offset, length, df=None):
1962 def _getsegment(self, offset, length, df=None):
1963 """Obtain a segment of raw data from the revlog.
1963 """Obtain a segment of raw data from the revlog.
1964
1964
1965 Accepts an absolute offset, length of bytes to obtain, and an
1965 Accepts an absolute offset, length of bytes to obtain, and an
1966 optional file handle to the already-opened revlog. If the file
1966 optional file handle to the already-opened revlog. If the file
1967 handle is used, it's original seek position will not be preserved.
1967 handle is used, it's original seek position will not be preserved.
1968
1968
1969 Requests for data may be returned from a cache.
1969 Requests for data may be returned from a cache.
1970
1970
1971 Returns a str or a buffer instance of raw byte data.
1971 Returns a str or a buffer instance of raw byte data.
1972 """
1972 """
1973 o, d = self._chunkcache
1973 o, d = self._chunkcache
1974 l = len(d)
1974 l = len(d)
1975
1975
1976 # is it in the cache?
1976 # is it in the cache?
1977 cachestart = offset - o
1977 cachestart = offset - o
1978 cacheend = cachestart + length
1978 cacheend = cachestart + length
1979 if cachestart >= 0 and cacheend <= l:
1979 if cachestart >= 0 and cacheend <= l:
1980 if cachestart == 0 and cacheend == l:
1980 if cachestart == 0 and cacheend == l:
1981 return d # avoid a copy
1981 return d # avoid a copy
1982 return util.buffer(d, cachestart, cacheend - cachestart)
1982 return util.buffer(d, cachestart, cacheend - cachestart)
1983
1983
1984 return self._readsegment(offset, length, df=df)
1984 return self._readsegment(offset, length, df=df)
1985
1985
1986 def _getsegmentforrevs(self, startrev, endrev, df=None):
1986 def _getsegmentforrevs(self, startrev, endrev, df=None):
1987 """Obtain a segment of raw data corresponding to a range of revisions.
1987 """Obtain a segment of raw data corresponding to a range of revisions.
1988
1988
1989 Accepts the start and end revisions and an optional already-open
1989 Accepts the start and end revisions and an optional already-open
1990 file handle to be used for reading. If the file handle is read, its
1990 file handle to be used for reading. If the file handle is read, its
1991 seek position will not be preserved.
1991 seek position will not be preserved.
1992
1992
1993 Requests for data may be satisfied by a cache.
1993 Requests for data may be satisfied by a cache.
1994
1994
1995 Returns a 2-tuple of (offset, data) for the requested range of
1995 Returns a 2-tuple of (offset, data) for the requested range of
1996 revisions. Offset is the integer offset from the beginning of the
1996 revisions. Offset is the integer offset from the beginning of the
1997 revlog and data is a str or buffer of the raw byte data.
1997 revlog and data is a str or buffer of the raw byte data.
1998
1998
1999 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1999 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2000 to determine where each revision's data begins and ends.
2000 to determine where each revision's data begins and ends.
2001 """
2001 """
2002 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2002 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2003 # (functions are expensive).
2003 # (functions are expensive).
2004 index = self.index
2004 index = self.index
2005 istart = index[startrev]
2005 istart = index[startrev]
2006 start = int(istart[0] >> 16)
2006 start = int(istart[0] >> 16)
2007 if startrev == endrev:
2007 if startrev == endrev:
2008 end = start + istart[1]
2008 end = start + istart[1]
2009 else:
2009 else:
2010 iend = index[endrev]
2010 iend = index[endrev]
2011 end = int(iend[0] >> 16) + iend[1]
2011 end = int(iend[0] >> 16) + iend[1]
2012
2012
2013 if self._inline:
2013 if self._inline:
2014 start += (startrev + 1) * self._io.size
2014 start += (startrev + 1) * self._io.size
2015 end += (endrev + 1) * self._io.size
2015 end += (endrev + 1) * self._io.size
2016 length = end - start
2016 length = end - start
2017
2017
2018 return start, self._getsegment(start, length, df=df)
2018 return start, self._getsegment(start, length, df=df)
2019
2019
2020 def _chunk(self, rev, df=None):
2020 def _chunk(self, rev, df=None):
2021 """Obtain a single decompressed chunk for a revision.
2021 """Obtain a single decompressed chunk for a revision.
2022
2022
2023 Accepts an integer revision and an optional already-open file handle
2023 Accepts an integer revision and an optional already-open file handle
2024 to be used for reading. If used, the seek position of the file will not
2024 to be used for reading. If used, the seek position of the file will not
2025 be preserved.
2025 be preserved.
2026
2026
2027 Returns a str holding uncompressed data for the requested revision.
2027 Returns a str holding uncompressed data for the requested revision.
2028 """
2028 """
2029 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2029 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2030
2030
2031 def _chunks(self, revs, df=None, targetsize=None):
2031 def _chunks(self, revs, df=None, targetsize=None):
2032 """Obtain decompressed chunks for the specified revisions.
2032 """Obtain decompressed chunks for the specified revisions.
2033
2033
2034 Accepts an iterable of numeric revisions that are assumed to be in
2034 Accepts an iterable of numeric revisions that are assumed to be in
2035 ascending order. Also accepts an optional already-open file handle
2035 ascending order. Also accepts an optional already-open file handle
2036 to be used for reading. If used, the seek position of the file will
2036 to be used for reading. If used, the seek position of the file will
2037 not be preserved.
2037 not be preserved.
2038
2038
2039 This function is similar to calling ``self._chunk()`` multiple times,
2039 This function is similar to calling ``self._chunk()`` multiple times,
2040 but is faster.
2040 but is faster.
2041
2041
2042 Returns a list with decompressed data for each requested revision.
2042 Returns a list with decompressed data for each requested revision.
2043 """
2043 """
2044 if not revs:
2044 if not revs:
2045 return []
2045 return []
2046 start = self.start
2046 start = self.start
2047 length = self.length
2047 length = self.length
2048 inline = self._inline
2048 inline = self._inline
2049 iosize = self._io.size
2049 iosize = self._io.size
2050 buffer = util.buffer
2050 buffer = util.buffer
2051
2051
2052 l = []
2052 l = []
2053 ladd = l.append
2053 ladd = l.append
2054
2054
2055 if not self._withsparseread:
2055 if not self._withsparseread:
2056 slicedchunks = (revs,)
2056 slicedchunks = (revs,)
2057 else:
2057 else:
2058 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2058 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2059
2059
2060 for revschunk in slicedchunks:
2060 for revschunk in slicedchunks:
2061 firstrev = revschunk[0]
2061 firstrev = revschunk[0]
2062 # Skip trailing revisions with empty diff
2062 # Skip trailing revisions with empty diff
2063 for lastrev in revschunk[::-1]:
2063 for lastrev in revschunk[::-1]:
2064 if length(lastrev) != 0:
2064 if length(lastrev) != 0:
2065 break
2065 break
2066
2066
2067 try:
2067 try:
2068 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2068 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2069 except OverflowError:
2069 except OverflowError:
2070 # issue4215 - we can't cache a run of chunks greater than
2070 # issue4215 - we can't cache a run of chunks greater than
2071 # 2G on Windows
2071 # 2G on Windows
2072 return [self._chunk(rev, df=df) for rev in revschunk]
2072 return [self._chunk(rev, df=df) for rev in revschunk]
2073
2073
2074 decomp = self.decompress
2074 decomp = self.decompress
2075 for rev in revschunk:
2075 for rev in revschunk:
2076 chunkstart = start(rev)
2076 chunkstart = start(rev)
2077 if inline:
2077 if inline:
2078 chunkstart += (rev + 1) * iosize
2078 chunkstart += (rev + 1) * iosize
2079 chunklength = length(rev)
2079 chunklength = length(rev)
2080 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2080 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2081
2081
2082 return l
2082 return l
2083
2083
2084 def _chunkclear(self):
2084 def _chunkclear(self):
2085 """Clear the raw chunk cache."""
2085 """Clear the raw chunk cache."""
2086 self._chunkcache = (0, '')
2086 self._chunkcache = (0, '')
2087
2087
2088 def deltaparent(self, rev):
2088 def deltaparent(self, rev):
2089 """return deltaparent of the given revision"""
2089 """return deltaparent of the given revision"""
2090 base = self.index[rev][3]
2090 base = self.index[rev][3]
2091 if base == rev:
2091 if base == rev:
2092 return nullrev
2092 return nullrev
2093 elif self._generaldelta:
2093 elif self._generaldelta:
2094 return base
2094 return base
2095 else:
2095 else:
2096 return rev - 1
2096 return rev - 1
2097
2097
2098 def issnapshot(self, rev):
2099 """tells whether rev is a snapshot
2100 """
2101 if rev == nullrev:
2102 return True
2103 deltap = self.deltaparent(rev)
2104 if deltap == nullrev:
2105 return True
2106 return False
2107
2098 def revdiff(self, rev1, rev2):
2108 def revdiff(self, rev1, rev2):
2099 """return or calculate a delta between two revisions
2109 """return or calculate a delta between two revisions
2100
2110
2101 The delta calculated is in binary form and is intended to be written to
2111 The delta calculated is in binary form and is intended to be written to
2102 revlog data directly. So this function needs raw revision data.
2112 revlog data directly. So this function needs raw revision data.
2103 """
2113 """
2104 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2114 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2105 return bytes(self._chunk(rev2))
2115 return bytes(self._chunk(rev2))
2106
2116
2107 return mdiff.textdiff(self.revision(rev1, raw=True),
2117 return mdiff.textdiff(self.revision(rev1, raw=True),
2108 self.revision(rev2, raw=True))
2118 self.revision(rev2, raw=True))
2109
2119
2110 def revision(self, nodeorrev, _df=None, raw=False):
2120 def revision(self, nodeorrev, _df=None, raw=False):
2111 """return an uncompressed revision of a given node or revision
2121 """return an uncompressed revision of a given node or revision
2112 number.
2122 number.
2113
2123
2114 _df - an existing file handle to read from. (internal-only)
2124 _df - an existing file handle to read from. (internal-only)
2115 raw - an optional argument specifying if the revision data is to be
2125 raw - an optional argument specifying if the revision data is to be
2116 treated as raw data when applying flag transforms. 'raw' should be set
2126 treated as raw data when applying flag transforms. 'raw' should be set
2117 to True when generating changegroups or in debug commands.
2127 to True when generating changegroups or in debug commands.
2118 """
2128 """
2119 if isinstance(nodeorrev, int):
2129 if isinstance(nodeorrev, int):
2120 rev = nodeorrev
2130 rev = nodeorrev
2121 node = self.node(rev)
2131 node = self.node(rev)
2122 else:
2132 else:
2123 node = nodeorrev
2133 node = nodeorrev
2124 rev = None
2134 rev = None
2125
2135
2126 cachedrev = None
2136 cachedrev = None
2127 flags = None
2137 flags = None
2128 rawtext = None
2138 rawtext = None
2129 if node == nullid:
2139 if node == nullid:
2130 return ""
2140 return ""
2131 if self._cache:
2141 if self._cache:
2132 if self._cache[0] == node:
2142 if self._cache[0] == node:
2133 # _cache only stores rawtext
2143 # _cache only stores rawtext
2134 if raw:
2144 if raw:
2135 return self._cache[2]
2145 return self._cache[2]
2136 # duplicated, but good for perf
2146 # duplicated, but good for perf
2137 if rev is None:
2147 if rev is None:
2138 rev = self.rev(node)
2148 rev = self.rev(node)
2139 if flags is None:
2149 if flags is None:
2140 flags = self.flags(rev)
2150 flags = self.flags(rev)
2141 # no extra flags set, no flag processor runs, text = rawtext
2151 # no extra flags set, no flag processor runs, text = rawtext
2142 if flags == REVIDX_DEFAULT_FLAGS:
2152 if flags == REVIDX_DEFAULT_FLAGS:
2143 return self._cache[2]
2153 return self._cache[2]
2144 # rawtext is reusable. need to run flag processor
2154 # rawtext is reusable. need to run flag processor
2145 rawtext = self._cache[2]
2155 rawtext = self._cache[2]
2146
2156
2147 cachedrev = self._cache[1]
2157 cachedrev = self._cache[1]
2148
2158
2149 # look up what we need to read
2159 # look up what we need to read
2150 if rawtext is None:
2160 if rawtext is None:
2151 if rev is None:
2161 if rev is None:
2152 rev = self.rev(node)
2162 rev = self.rev(node)
2153
2163
2154 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2164 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2155 if stopped:
2165 if stopped:
2156 rawtext = self._cache[2]
2166 rawtext = self._cache[2]
2157
2167
2158 # drop cache to save memory
2168 # drop cache to save memory
2159 self._cache = None
2169 self._cache = None
2160
2170
2161 targetsize = None
2171 targetsize = None
2162 rawsize = self.index[rev][2]
2172 rawsize = self.index[rev][2]
2163 if 0 <= rawsize:
2173 if 0 <= rawsize:
2164 targetsize = 4 * rawsize
2174 targetsize = 4 * rawsize
2165
2175
2166 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2176 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2167 if rawtext is None:
2177 if rawtext is None:
2168 rawtext = bytes(bins[0])
2178 rawtext = bytes(bins[0])
2169 bins = bins[1:]
2179 bins = bins[1:]
2170
2180
2171 rawtext = mdiff.patches(rawtext, bins)
2181 rawtext = mdiff.patches(rawtext, bins)
2172 self._cache = (node, rev, rawtext)
2182 self._cache = (node, rev, rawtext)
2173
2183
2174 if flags is None:
2184 if flags is None:
2175 if rev is None:
2185 if rev is None:
2176 rev = self.rev(node)
2186 rev = self.rev(node)
2177 flags = self.flags(rev)
2187 flags = self.flags(rev)
2178
2188
2179 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2189 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2180 if validatehash:
2190 if validatehash:
2181 self.checkhash(text, node, rev=rev)
2191 self.checkhash(text, node, rev=rev)
2182
2192
2183 return text
2193 return text
2184
2194
2185 def hash(self, text, p1, p2):
2195 def hash(self, text, p1, p2):
2186 """Compute a node hash.
2196 """Compute a node hash.
2187
2197
2188 Available as a function so that subclasses can replace the hash
2198 Available as a function so that subclasses can replace the hash
2189 as needed.
2199 as needed.
2190 """
2200 """
2191 return hash(text, p1, p2)
2201 return hash(text, p1, p2)
2192
2202
2193 def _processflags(self, text, flags, operation, raw=False):
2203 def _processflags(self, text, flags, operation, raw=False):
2194 """Inspect revision data flags and applies transforms defined by
2204 """Inspect revision data flags and applies transforms defined by
2195 registered flag processors.
2205 registered flag processors.
2196
2206
2197 ``text`` - the revision data to process
2207 ``text`` - the revision data to process
2198 ``flags`` - the revision flags
2208 ``flags`` - the revision flags
2199 ``operation`` - the operation being performed (read or write)
2209 ``operation`` - the operation being performed (read or write)
2200 ``raw`` - an optional argument describing if the raw transform should be
2210 ``raw`` - an optional argument describing if the raw transform should be
2201 applied.
2211 applied.
2202
2212
2203 This method processes the flags in the order (or reverse order if
2213 This method processes the flags in the order (or reverse order if
2204 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2214 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2205 flag processors registered for present flags. The order of flags defined
2215 flag processors registered for present flags. The order of flags defined
2206 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2216 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2207
2217
2208 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2218 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2209 processed text and ``validatehash`` is a bool indicating whether the
2219 processed text and ``validatehash`` is a bool indicating whether the
2210 returned text should be checked for hash integrity.
2220 returned text should be checked for hash integrity.
2211
2221
2212 Note: If the ``raw`` argument is set, it has precedence over the
2222 Note: If the ``raw`` argument is set, it has precedence over the
2213 operation and will only update the value of ``validatehash``.
2223 operation and will only update the value of ``validatehash``.
2214 """
2224 """
2215 # fast path: no flag processors will run
2225 # fast path: no flag processors will run
2216 if flags == 0:
2226 if flags == 0:
2217 return text, True
2227 return text, True
2218 if not operation in ('read', 'write'):
2228 if not operation in ('read', 'write'):
2219 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2229 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2220 # Check all flags are known.
2230 # Check all flags are known.
2221 if flags & ~REVIDX_KNOWN_FLAGS:
2231 if flags & ~REVIDX_KNOWN_FLAGS:
2222 raise RevlogError(_("incompatible revision flag '%#x'") %
2232 raise RevlogError(_("incompatible revision flag '%#x'") %
2223 (flags & ~REVIDX_KNOWN_FLAGS))
2233 (flags & ~REVIDX_KNOWN_FLAGS))
2224 validatehash = True
2234 validatehash = True
2225 # Depending on the operation (read or write), the order might be
2235 # Depending on the operation (read or write), the order might be
2226 # reversed due to non-commutative transforms.
2236 # reversed due to non-commutative transforms.
2227 orderedflags = REVIDX_FLAGS_ORDER
2237 orderedflags = REVIDX_FLAGS_ORDER
2228 if operation == 'write':
2238 if operation == 'write':
2229 orderedflags = reversed(orderedflags)
2239 orderedflags = reversed(orderedflags)
2230
2240
2231 for flag in orderedflags:
2241 for flag in orderedflags:
2232 # If a flagprocessor has been registered for a known flag, apply the
2242 # If a flagprocessor has been registered for a known flag, apply the
2233 # related operation transform and update result tuple.
2243 # related operation transform and update result tuple.
2234 if flag & flags:
2244 if flag & flags:
2235 vhash = True
2245 vhash = True
2236
2246
2237 if flag not in _flagprocessors:
2247 if flag not in _flagprocessors:
2238 message = _("missing processor for flag '%#x'") % (flag)
2248 message = _("missing processor for flag '%#x'") % (flag)
2239 raise RevlogError(message)
2249 raise RevlogError(message)
2240
2250
2241 processor = _flagprocessors[flag]
2251 processor = _flagprocessors[flag]
2242 if processor is not None:
2252 if processor is not None:
2243 readtransform, writetransform, rawtransform = processor
2253 readtransform, writetransform, rawtransform = processor
2244
2254
2245 if raw:
2255 if raw:
2246 vhash = rawtransform(self, text)
2256 vhash = rawtransform(self, text)
2247 elif operation == 'read':
2257 elif operation == 'read':
2248 text, vhash = readtransform(self, text)
2258 text, vhash = readtransform(self, text)
2249 else: # write operation
2259 else: # write operation
2250 text, vhash = writetransform(self, text)
2260 text, vhash = writetransform(self, text)
2251 validatehash = validatehash and vhash
2261 validatehash = validatehash and vhash
2252
2262
2253 return text, validatehash
2263 return text, validatehash
2254
2264
2255 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2265 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2256 """Check node hash integrity.
2266 """Check node hash integrity.
2257
2267
2258 Available as a function so that subclasses can extend hash mismatch
2268 Available as a function so that subclasses can extend hash mismatch
2259 behaviors as needed.
2269 behaviors as needed.
2260 """
2270 """
2261 try:
2271 try:
2262 if p1 is None and p2 is None:
2272 if p1 is None and p2 is None:
2263 p1, p2 = self.parents(node)
2273 p1, p2 = self.parents(node)
2264 if node != self.hash(text, p1, p2):
2274 if node != self.hash(text, p1, p2):
2265 revornode = rev
2275 revornode = rev
2266 if revornode is None:
2276 if revornode is None:
2267 revornode = templatefilters.short(hex(node))
2277 revornode = templatefilters.short(hex(node))
2268 raise RevlogError(_("integrity check failed on %s:%s")
2278 raise RevlogError(_("integrity check failed on %s:%s")
2269 % (self.indexfile, pycompat.bytestr(revornode)))
2279 % (self.indexfile, pycompat.bytestr(revornode)))
2270 except RevlogError:
2280 except RevlogError:
2271 if self._censorable and _censoredtext(text):
2281 if self._censorable and _censoredtext(text):
2272 raise error.CensoredNodeError(self.indexfile, node, text)
2282 raise error.CensoredNodeError(self.indexfile, node, text)
2273 raise
2283 raise
2274
2284
2275 def _enforceinlinesize(self, tr, fp=None):
2285 def _enforceinlinesize(self, tr, fp=None):
2276 """Check if the revlog is too big for inline and convert if so.
2286 """Check if the revlog is too big for inline and convert if so.
2277
2287
2278 This should be called after revisions are added to the revlog. If the
2288 This should be called after revisions are added to the revlog. If the
2279 revlog has grown too large to be an inline revlog, it will convert it
2289 revlog has grown too large to be an inline revlog, it will convert it
2280 to use multiple index and data files.
2290 to use multiple index and data files.
2281 """
2291 """
2282 tiprev = len(self) - 1
2292 tiprev = len(self) - 1
2283 if (not self._inline or
2293 if (not self._inline or
2284 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2294 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2285 return
2295 return
2286
2296
2287 trinfo = tr.find(self.indexfile)
2297 trinfo = tr.find(self.indexfile)
2288 if trinfo is None:
2298 if trinfo is None:
2289 raise RevlogError(_("%s not found in the transaction")
2299 raise RevlogError(_("%s not found in the transaction")
2290 % self.indexfile)
2300 % self.indexfile)
2291
2301
2292 trindex = trinfo[2]
2302 trindex = trinfo[2]
2293 if trindex is not None:
2303 if trindex is not None:
2294 dataoff = self.start(trindex)
2304 dataoff = self.start(trindex)
2295 else:
2305 else:
2296 # revlog was stripped at start of transaction, use all leftover data
2306 # revlog was stripped at start of transaction, use all leftover data
2297 trindex = len(self) - 1
2307 trindex = len(self) - 1
2298 dataoff = self.end(tiprev)
2308 dataoff = self.end(tiprev)
2299
2309
2300 tr.add(self.datafile, dataoff)
2310 tr.add(self.datafile, dataoff)
2301
2311
2302 if fp:
2312 if fp:
2303 fp.flush()
2313 fp.flush()
2304 fp.close()
2314 fp.close()
2305
2315
2306 with self._datafp('w') as df:
2316 with self._datafp('w') as df:
2307 for r in self:
2317 for r in self:
2308 df.write(self._getsegmentforrevs(r, r)[1])
2318 df.write(self._getsegmentforrevs(r, r)[1])
2309
2319
2310 with self._indexfp('w') as fp:
2320 with self._indexfp('w') as fp:
2311 self.version &= ~FLAG_INLINE_DATA
2321 self.version &= ~FLAG_INLINE_DATA
2312 self._inline = False
2322 self._inline = False
2313 io = self._io
2323 io = self._io
2314 for i in self:
2324 for i in self:
2315 e = io.packentry(self.index[i], self.node, self.version, i)
2325 e = io.packentry(self.index[i], self.node, self.version, i)
2316 fp.write(e)
2326 fp.write(e)
2317
2327
2318 # the temp file replace the real index when we exit the context
2328 # the temp file replace the real index when we exit the context
2319 # manager
2329 # manager
2320
2330
2321 tr.replace(self.indexfile, trindex * self._io.size)
2331 tr.replace(self.indexfile, trindex * self._io.size)
2322 self._chunkclear()
2332 self._chunkclear()
2323
2333
2324 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2334 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2325 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2335 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2326 """add a revision to the log
2336 """add a revision to the log
2327
2337
2328 text - the revision data to add
2338 text - the revision data to add
2329 transaction - the transaction object used for rollback
2339 transaction - the transaction object used for rollback
2330 link - the linkrev data to add
2340 link - the linkrev data to add
2331 p1, p2 - the parent nodeids of the revision
2341 p1, p2 - the parent nodeids of the revision
2332 cachedelta - an optional precomputed delta
2342 cachedelta - an optional precomputed delta
2333 node - nodeid of revision; typically node is not specified, and it is
2343 node - nodeid of revision; typically node is not specified, and it is
2334 computed by default as hash(text, p1, p2), however subclasses might
2344 computed by default as hash(text, p1, p2), however subclasses might
2335 use different hashing method (and override checkhash() in such case)
2345 use different hashing method (and override checkhash() in such case)
2336 flags - the known flags to set on the revision
2346 flags - the known flags to set on the revision
2337 deltacomputer - an optional _deltacomputer instance shared between
2347 deltacomputer - an optional _deltacomputer instance shared between
2338 multiple calls
2348 multiple calls
2339 """
2349 """
2340 if link == nullrev:
2350 if link == nullrev:
2341 raise RevlogError(_("attempted to add linkrev -1 to %s")
2351 raise RevlogError(_("attempted to add linkrev -1 to %s")
2342 % self.indexfile)
2352 % self.indexfile)
2343
2353
2344 if flags:
2354 if flags:
2345 node = node or self.hash(text, p1, p2)
2355 node = node or self.hash(text, p1, p2)
2346
2356
2347 rawtext, validatehash = self._processflags(text, flags, 'write')
2357 rawtext, validatehash = self._processflags(text, flags, 'write')
2348
2358
2349 # If the flag processor modifies the revision data, ignore any provided
2359 # If the flag processor modifies the revision data, ignore any provided
2350 # cachedelta.
2360 # cachedelta.
2351 if rawtext != text:
2361 if rawtext != text:
2352 cachedelta = None
2362 cachedelta = None
2353
2363
2354 if len(rawtext) > _maxentrysize:
2364 if len(rawtext) > _maxentrysize:
2355 raise RevlogError(
2365 raise RevlogError(
2356 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2366 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2357 % (self.indexfile, len(rawtext)))
2367 % (self.indexfile, len(rawtext)))
2358
2368
2359 node = node or self.hash(rawtext, p1, p2)
2369 node = node or self.hash(rawtext, p1, p2)
2360 if node in self.nodemap:
2370 if node in self.nodemap:
2361 return node
2371 return node
2362
2372
2363 if validatehash:
2373 if validatehash:
2364 self.checkhash(rawtext, node, p1=p1, p2=p2)
2374 self.checkhash(rawtext, node, p1=p1, p2=p2)
2365
2375
2366 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2376 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2367 flags, cachedelta=cachedelta,
2377 flags, cachedelta=cachedelta,
2368 deltacomputer=deltacomputer)
2378 deltacomputer=deltacomputer)
2369
2379
2370 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2380 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2371 cachedelta=None, deltacomputer=None):
2381 cachedelta=None, deltacomputer=None):
2372 """add a raw revision with known flags, node and parents
2382 """add a raw revision with known flags, node and parents
2373 useful when reusing a revision not stored in this revlog (ex: received
2383 useful when reusing a revision not stored in this revlog (ex: received
2374 over wire, or read from an external bundle).
2384 over wire, or read from an external bundle).
2375 """
2385 """
2376 dfh = None
2386 dfh = None
2377 if not self._inline:
2387 if not self._inline:
2378 dfh = self._datafp("a+")
2388 dfh = self._datafp("a+")
2379 ifh = self._indexfp("a+")
2389 ifh = self._indexfp("a+")
2380 try:
2390 try:
2381 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2391 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2382 flags, cachedelta, ifh, dfh,
2392 flags, cachedelta, ifh, dfh,
2383 deltacomputer=deltacomputer)
2393 deltacomputer=deltacomputer)
2384 finally:
2394 finally:
2385 if dfh:
2395 if dfh:
2386 dfh.close()
2396 dfh.close()
2387 ifh.close()
2397 ifh.close()
2388
2398
2389 def compress(self, data):
2399 def compress(self, data):
2390 """Generate a possibly-compressed representation of data."""
2400 """Generate a possibly-compressed representation of data."""
2391 if not data:
2401 if not data:
2392 return '', data
2402 return '', data
2393
2403
2394 compressed = self._compressor.compress(data)
2404 compressed = self._compressor.compress(data)
2395
2405
2396 if compressed:
2406 if compressed:
2397 # The revlog compressor added the header in the returned data.
2407 # The revlog compressor added the header in the returned data.
2398 return '', compressed
2408 return '', compressed
2399
2409
2400 if data[0:1] == '\0':
2410 if data[0:1] == '\0':
2401 return '', data
2411 return '', data
2402 return 'u', data
2412 return 'u', data
2403
2413
2404 def decompress(self, data):
2414 def decompress(self, data):
2405 """Decompress a revlog chunk.
2415 """Decompress a revlog chunk.
2406
2416
2407 The chunk is expected to begin with a header identifying the
2417 The chunk is expected to begin with a header identifying the
2408 format type so it can be routed to an appropriate decompressor.
2418 format type so it can be routed to an appropriate decompressor.
2409 """
2419 """
2410 if not data:
2420 if not data:
2411 return data
2421 return data
2412
2422
2413 # Revlogs are read much more frequently than they are written and many
2423 # Revlogs are read much more frequently than they are written and many
2414 # chunks only take microseconds to decompress, so performance is
2424 # chunks only take microseconds to decompress, so performance is
2415 # important here.
2425 # important here.
2416 #
2426 #
2417 # We can make a few assumptions about revlogs:
2427 # We can make a few assumptions about revlogs:
2418 #
2428 #
2419 # 1) the majority of chunks will be compressed (as opposed to inline
2429 # 1) the majority of chunks will be compressed (as opposed to inline
2420 # raw data).
2430 # raw data).
2421 # 2) decompressing *any* data will likely by at least 10x slower than
2431 # 2) decompressing *any* data will likely by at least 10x slower than
2422 # returning raw inline data.
2432 # returning raw inline data.
2423 # 3) we want to prioritize common and officially supported compression
2433 # 3) we want to prioritize common and officially supported compression
2424 # engines
2434 # engines
2425 #
2435 #
2426 # It follows that we want to optimize for "decompress compressed data
2436 # It follows that we want to optimize for "decompress compressed data
2427 # when encoded with common and officially supported compression engines"
2437 # when encoded with common and officially supported compression engines"
2428 # case over "raw data" and "data encoded by less common or non-official
2438 # case over "raw data" and "data encoded by less common or non-official
2429 # compression engines." That is why we have the inline lookup first
2439 # compression engines." That is why we have the inline lookup first
2430 # followed by the compengines lookup.
2440 # followed by the compengines lookup.
2431 #
2441 #
2432 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2442 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2433 # compressed chunks. And this matters for changelog and manifest reads.
2443 # compressed chunks. And this matters for changelog and manifest reads.
2434 t = data[0:1]
2444 t = data[0:1]
2435
2445
2436 if t == 'x':
2446 if t == 'x':
2437 try:
2447 try:
2438 return _zlibdecompress(data)
2448 return _zlibdecompress(data)
2439 except zlib.error as e:
2449 except zlib.error as e:
2440 raise RevlogError(_('revlog decompress error: %s') %
2450 raise RevlogError(_('revlog decompress error: %s') %
2441 stringutil.forcebytestr(e))
2451 stringutil.forcebytestr(e))
2442 # '\0' is more common than 'u' so it goes first.
2452 # '\0' is more common than 'u' so it goes first.
2443 elif t == '\0':
2453 elif t == '\0':
2444 return data
2454 return data
2445 elif t == 'u':
2455 elif t == 'u':
2446 return util.buffer(data, 1)
2456 return util.buffer(data, 1)
2447
2457
2448 try:
2458 try:
2449 compressor = self._decompressors[t]
2459 compressor = self._decompressors[t]
2450 except KeyError:
2460 except KeyError:
2451 try:
2461 try:
2452 engine = util.compengines.forrevlogheader(t)
2462 engine = util.compengines.forrevlogheader(t)
2453 compressor = engine.revlogcompressor()
2463 compressor = engine.revlogcompressor()
2454 self._decompressors[t] = compressor
2464 self._decompressors[t] = compressor
2455 except KeyError:
2465 except KeyError:
2456 raise RevlogError(_('unknown compression type %r') % t)
2466 raise RevlogError(_('unknown compression type %r') % t)
2457
2467
2458 return compressor.decompress(data)
2468 return compressor.decompress(data)
2459
2469
2460 def _isgooddeltainfo(self, deltainfo, revinfo):
2470 def _isgooddeltainfo(self, deltainfo, revinfo):
2461 """Returns True if the given delta is good. Good means that it is within
2471 """Returns True if the given delta is good. Good means that it is within
2462 the disk span, disk size, and chain length bounds that we know to be
2472 the disk span, disk size, and chain length bounds that we know to be
2463 performant."""
2473 performant."""
2464 if deltainfo is None:
2474 if deltainfo is None:
2465 return False
2475 return False
2466
2476
2467 # - 'deltainfo.distance' is the distance from the base revision --
2477 # - 'deltainfo.distance' is the distance from the base revision --
2468 # bounding it limits the amount of I/O we need to do.
2478 # bounding it limits the amount of I/O we need to do.
2469 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2479 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2470 # deltas we need to apply -- bounding it limits the amount of CPU
2480 # deltas we need to apply -- bounding it limits the amount of CPU
2471 # we consume.
2481 # we consume.
2472
2482
2473 if self._sparserevlog:
2483 if self._sparserevlog:
2474 # As sparse-read will be used, we can consider that the distance,
2484 # As sparse-read will be used, we can consider that the distance,
2475 # instead of being the span of the whole chunk,
2485 # instead of being the span of the whole chunk,
2476 # is the span of the largest read chunk
2486 # is the span of the largest read chunk
2477 base = deltainfo.base
2487 base = deltainfo.base
2478
2488
2479 if base != nullrev:
2489 if base != nullrev:
2480 deltachain = self._deltachain(base)[0]
2490 deltachain = self._deltachain(base)[0]
2481 else:
2491 else:
2482 deltachain = []
2492 deltachain = []
2483
2493
2484 chunks = _slicechunk(self, deltachain, deltainfo)
2494 chunks = _slicechunk(self, deltachain, deltainfo)
2485 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2495 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2486 distance = max(all_span)
2496 distance = max(all_span)
2487 else:
2497 else:
2488 distance = deltainfo.distance
2498 distance = deltainfo.distance
2489
2499
2490 textlen = revinfo.textlen
2500 textlen = revinfo.textlen
2491 defaultmax = textlen * 4
2501 defaultmax = textlen * 4
2492 maxdist = self._maxdeltachainspan
2502 maxdist = self._maxdeltachainspan
2493 if not maxdist:
2503 if not maxdist:
2494 maxdist = distance # ensure the conditional pass
2504 maxdist = distance # ensure the conditional pass
2495 maxdist = max(maxdist, defaultmax)
2505 maxdist = max(maxdist, defaultmax)
2496 if self._sparserevlog and maxdist < self._srmingapsize:
2506 if self._sparserevlog and maxdist < self._srmingapsize:
2497 # In multiple place, we are ignoring irrelevant data range below a
2507 # In multiple place, we are ignoring irrelevant data range below a
2498 # certain size. Be also apply this tradeoff here and relax span
2508 # certain size. Be also apply this tradeoff here and relax span
2499 # constraint for small enought content.
2509 # constraint for small enought content.
2500 maxdist = self._srmingapsize
2510 maxdist = self._srmingapsize
2501
2511
2502 # Bad delta from read span:
2512 # Bad delta from read span:
2503 #
2513 #
2504 # If the span of data read is larger than the maximum allowed.
2514 # If the span of data read is larger than the maximum allowed.
2505 if maxdist < distance:
2515 if maxdist < distance:
2506 return False
2516 return False
2507
2517
2508 # Bad delta from new delta size:
2518 # Bad delta from new delta size:
2509 #
2519 #
2510 # If the delta size is larger than the target text, storing the
2520 # If the delta size is larger than the target text, storing the
2511 # delta will be inefficient.
2521 # delta will be inefficient.
2512 if textlen < deltainfo.deltalen:
2522 if textlen < deltainfo.deltalen:
2513 return False
2523 return False
2514
2524
2515 # Bad delta from cumulated payload size:
2525 # Bad delta from cumulated payload size:
2516 #
2526 #
2517 # If the sum of delta get larger than K * target text length.
2527 # If the sum of delta get larger than K * target text length.
2518 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2528 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2519 return False
2529 return False
2520
2530
2521 # Bad delta from chain length:
2531 # Bad delta from chain length:
2522 #
2532 #
2523 # If the number of delta in the chain gets too high.
2533 # If the number of delta in the chain gets too high.
2524 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2534 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2525 return False
2535 return False
2526
2536
2527 return True
2537 return True
2528
2538
2529 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2539 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2530 cachedelta, ifh, dfh, alwayscache=False,
2540 cachedelta, ifh, dfh, alwayscache=False,
2531 deltacomputer=None):
2541 deltacomputer=None):
2532 """internal function to add revisions to the log
2542 """internal function to add revisions to the log
2533
2543
2534 see addrevision for argument descriptions.
2544 see addrevision for argument descriptions.
2535
2545
2536 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2546 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2537
2547
2538 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2548 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2539 be used.
2549 be used.
2540
2550
2541 invariants:
2551 invariants:
2542 - rawtext is optional (can be None); if not set, cachedelta must be set.
2552 - rawtext is optional (can be None); if not set, cachedelta must be set.
2543 if both are set, they must correspond to each other.
2553 if both are set, they must correspond to each other.
2544 """
2554 """
2545 if node == nullid:
2555 if node == nullid:
2546 raise RevlogError(_("%s: attempt to add null revision") %
2556 raise RevlogError(_("%s: attempt to add null revision") %
2547 (self.indexfile))
2557 (self.indexfile))
2548 if node == wdirid or node in wdirfilenodeids:
2558 if node == wdirid or node in wdirfilenodeids:
2549 raise RevlogError(_("%s: attempt to add wdir revision") %
2559 raise RevlogError(_("%s: attempt to add wdir revision") %
2550 (self.indexfile))
2560 (self.indexfile))
2551
2561
2552 if self._inline:
2562 if self._inline:
2553 fh = ifh
2563 fh = ifh
2554 else:
2564 else:
2555 fh = dfh
2565 fh = dfh
2556
2566
2557 btext = [rawtext]
2567 btext = [rawtext]
2558
2568
2559 curr = len(self)
2569 curr = len(self)
2560 prev = curr - 1
2570 prev = curr - 1
2561 offset = self.end(prev)
2571 offset = self.end(prev)
2562 p1r, p2r = self.rev(p1), self.rev(p2)
2572 p1r, p2r = self.rev(p1), self.rev(p2)
2563
2573
2564 # full versions are inserted when the needed deltas
2574 # full versions are inserted when the needed deltas
2565 # become comparable to the uncompressed text
2575 # become comparable to the uncompressed text
2566 if rawtext is None:
2576 if rawtext is None:
2567 # need rawtext size, before changed by flag processors, which is
2577 # need rawtext size, before changed by flag processors, which is
2568 # the non-raw size. use revlog explicitly to avoid filelog's extra
2578 # the non-raw size. use revlog explicitly to avoid filelog's extra
2569 # logic that might remove metadata size.
2579 # logic that might remove metadata size.
2570 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2580 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2571 cachedelta[1])
2581 cachedelta[1])
2572 else:
2582 else:
2573 textlen = len(rawtext)
2583 textlen = len(rawtext)
2574
2584
2575 if deltacomputer is None:
2585 if deltacomputer is None:
2576 deltacomputer = _deltacomputer(self)
2586 deltacomputer = _deltacomputer(self)
2577
2587
2578 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2588 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2579
2589
2580 # no delta for flag processor revision (see "candelta" for why)
2590 # no delta for flag processor revision (see "candelta" for why)
2581 # not calling candelta since only one revision needs test, also to
2591 # not calling candelta since only one revision needs test, also to
2582 # avoid overhead fetching flags again.
2592 # avoid overhead fetching flags again.
2583 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2593 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2584 deltainfo = None
2594 deltainfo = None
2585 else:
2595 else:
2586 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2596 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2587
2597
2588 if deltainfo is not None:
2598 if deltainfo is not None:
2589 base = deltainfo.base
2599 base = deltainfo.base
2590 chainbase = deltainfo.chainbase
2600 chainbase = deltainfo.chainbase
2591 data = deltainfo.data
2601 data = deltainfo.data
2592 l = deltainfo.deltalen
2602 l = deltainfo.deltalen
2593 else:
2603 else:
2594 rawtext = deltacomputer.buildtext(revinfo, fh)
2604 rawtext = deltacomputer.buildtext(revinfo, fh)
2595 data = self.compress(rawtext)
2605 data = self.compress(rawtext)
2596 l = len(data[1]) + len(data[0])
2606 l = len(data[1]) + len(data[0])
2597 base = chainbase = curr
2607 base = chainbase = curr
2598
2608
2599 e = (offset_type(offset, flags), l, textlen,
2609 e = (offset_type(offset, flags), l, textlen,
2600 base, link, p1r, p2r, node)
2610 base, link, p1r, p2r, node)
2601 self.index.append(e)
2611 self.index.append(e)
2602 self.nodemap[node] = curr
2612 self.nodemap[node] = curr
2603
2613
2604 entry = self._io.packentry(e, self.node, self.version, curr)
2614 entry = self._io.packentry(e, self.node, self.version, curr)
2605 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2615 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2606
2616
2607 if alwayscache and rawtext is None:
2617 if alwayscache and rawtext is None:
2608 rawtext = deltacomputer._buildtext(revinfo, fh)
2618 rawtext = deltacomputer._buildtext(revinfo, fh)
2609
2619
2610 if type(rawtext) == bytes: # only accept immutable objects
2620 if type(rawtext) == bytes: # only accept immutable objects
2611 self._cache = (node, curr, rawtext)
2621 self._cache = (node, curr, rawtext)
2612 self._chainbasecache[curr] = chainbase
2622 self._chainbasecache[curr] = chainbase
2613 return node
2623 return node
2614
2624
2615 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2625 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2616 # Files opened in a+ mode have inconsistent behavior on various
2626 # Files opened in a+ mode have inconsistent behavior on various
2617 # platforms. Windows requires that a file positioning call be made
2627 # platforms. Windows requires that a file positioning call be made
2618 # when the file handle transitions between reads and writes. See
2628 # when the file handle transitions between reads and writes. See
2619 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2629 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2620 # platforms, Python or the platform itself can be buggy. Some versions
2630 # platforms, Python or the platform itself can be buggy. Some versions
2621 # of Solaris have been observed to not append at the end of the file
2631 # of Solaris have been observed to not append at the end of the file
2622 # if the file was seeked to before the end. See issue4943 for more.
2632 # if the file was seeked to before the end. See issue4943 for more.
2623 #
2633 #
2624 # We work around this issue by inserting a seek() before writing.
2634 # We work around this issue by inserting a seek() before writing.
2625 # Note: This is likely not necessary on Python 3.
2635 # Note: This is likely not necessary on Python 3.
2626 ifh.seek(0, os.SEEK_END)
2636 ifh.seek(0, os.SEEK_END)
2627 if dfh:
2637 if dfh:
2628 dfh.seek(0, os.SEEK_END)
2638 dfh.seek(0, os.SEEK_END)
2629
2639
2630 curr = len(self) - 1
2640 curr = len(self) - 1
2631 if not self._inline:
2641 if not self._inline:
2632 transaction.add(self.datafile, offset)
2642 transaction.add(self.datafile, offset)
2633 transaction.add(self.indexfile, curr * len(entry))
2643 transaction.add(self.indexfile, curr * len(entry))
2634 if data[0]:
2644 if data[0]:
2635 dfh.write(data[0])
2645 dfh.write(data[0])
2636 dfh.write(data[1])
2646 dfh.write(data[1])
2637 ifh.write(entry)
2647 ifh.write(entry)
2638 else:
2648 else:
2639 offset += curr * self._io.size
2649 offset += curr * self._io.size
2640 transaction.add(self.indexfile, offset, curr)
2650 transaction.add(self.indexfile, offset, curr)
2641 ifh.write(entry)
2651 ifh.write(entry)
2642 ifh.write(data[0])
2652 ifh.write(data[0])
2643 ifh.write(data[1])
2653 ifh.write(data[1])
2644 self._enforceinlinesize(transaction, ifh)
2654 self._enforceinlinesize(transaction, ifh)
2645
2655
2646 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2656 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2647 """
2657 """
2648 add a delta group
2658 add a delta group
2649
2659
2650 given a set of deltas, add them to the revision log. the
2660 given a set of deltas, add them to the revision log. the
2651 first delta is against its parent, which should be in our
2661 first delta is against its parent, which should be in our
2652 log, the rest are against the previous delta.
2662 log, the rest are against the previous delta.
2653
2663
2654 If ``addrevisioncb`` is defined, it will be called with arguments of
2664 If ``addrevisioncb`` is defined, it will be called with arguments of
2655 this revlog and the node that was added.
2665 this revlog and the node that was added.
2656 """
2666 """
2657
2667
2658 nodes = []
2668 nodes = []
2659
2669
2660 r = len(self)
2670 r = len(self)
2661 end = 0
2671 end = 0
2662 if r:
2672 if r:
2663 end = self.end(r - 1)
2673 end = self.end(r - 1)
2664 ifh = self._indexfp("a+")
2674 ifh = self._indexfp("a+")
2665 isize = r * self._io.size
2675 isize = r * self._io.size
2666 if self._inline:
2676 if self._inline:
2667 transaction.add(self.indexfile, end + isize, r)
2677 transaction.add(self.indexfile, end + isize, r)
2668 dfh = None
2678 dfh = None
2669 else:
2679 else:
2670 transaction.add(self.indexfile, isize, r)
2680 transaction.add(self.indexfile, isize, r)
2671 transaction.add(self.datafile, end)
2681 transaction.add(self.datafile, end)
2672 dfh = self._datafp("a+")
2682 dfh = self._datafp("a+")
2673 def flush():
2683 def flush():
2674 if dfh:
2684 if dfh:
2675 dfh.flush()
2685 dfh.flush()
2676 ifh.flush()
2686 ifh.flush()
2677 try:
2687 try:
2678 deltacomputer = _deltacomputer(self)
2688 deltacomputer = _deltacomputer(self)
2679 # loop through our set of deltas
2689 # loop through our set of deltas
2680 for data in deltas:
2690 for data in deltas:
2681 node, p1, p2, linknode, deltabase, delta, flags = data
2691 node, p1, p2, linknode, deltabase, delta, flags = data
2682 link = linkmapper(linknode)
2692 link = linkmapper(linknode)
2683 flags = flags or REVIDX_DEFAULT_FLAGS
2693 flags = flags or REVIDX_DEFAULT_FLAGS
2684
2694
2685 nodes.append(node)
2695 nodes.append(node)
2686
2696
2687 if node in self.nodemap:
2697 if node in self.nodemap:
2688 # this can happen if two branches make the same change
2698 # this can happen if two branches make the same change
2689 continue
2699 continue
2690
2700
2691 for p in (p1, p2):
2701 for p in (p1, p2):
2692 if p not in self.nodemap:
2702 if p not in self.nodemap:
2693 raise LookupError(p, self.indexfile,
2703 raise LookupError(p, self.indexfile,
2694 _('unknown parent'))
2704 _('unknown parent'))
2695
2705
2696 if deltabase not in self.nodemap:
2706 if deltabase not in self.nodemap:
2697 raise LookupError(deltabase, self.indexfile,
2707 raise LookupError(deltabase, self.indexfile,
2698 _('unknown delta base'))
2708 _('unknown delta base'))
2699
2709
2700 baserev = self.rev(deltabase)
2710 baserev = self.rev(deltabase)
2701
2711
2702 if baserev != nullrev and self.iscensored(baserev):
2712 if baserev != nullrev and self.iscensored(baserev):
2703 # if base is censored, delta must be full replacement in a
2713 # if base is censored, delta must be full replacement in a
2704 # single patch operation
2714 # single patch operation
2705 hlen = struct.calcsize(">lll")
2715 hlen = struct.calcsize(">lll")
2706 oldlen = self.rawsize(baserev)
2716 oldlen = self.rawsize(baserev)
2707 newlen = len(delta) - hlen
2717 newlen = len(delta) - hlen
2708 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2709 raise error.CensoredBaseError(self.indexfile,
2719 raise error.CensoredBaseError(self.indexfile,
2710 self.node(baserev))
2720 self.node(baserev))
2711
2721
2712 if not flags and self._peek_iscensored(baserev, delta, flush):
2722 if not flags and self._peek_iscensored(baserev, delta, flush):
2713 flags |= REVIDX_ISCENSORED
2723 flags |= REVIDX_ISCENSORED
2714
2724
2715 # We assume consumers of addrevisioncb will want to retrieve
2725 # We assume consumers of addrevisioncb will want to retrieve
2716 # the added revision, which will require a call to
2726 # the added revision, which will require a call to
2717 # revision(). revision() will fast path if there is a cache
2727 # revision(). revision() will fast path if there is a cache
2718 # hit. So, we tell _addrevision() to always cache in this case.
2728 # hit. So, we tell _addrevision() to always cache in this case.
2719 # We're only using addgroup() in the context of changegroup
2729 # We're only using addgroup() in the context of changegroup
2720 # generation so the revision data can always be handled as raw
2730 # generation so the revision data can always be handled as raw
2721 # by the flagprocessor.
2731 # by the flagprocessor.
2722 self._addrevision(node, None, transaction, link,
2732 self._addrevision(node, None, transaction, link,
2723 p1, p2, flags, (baserev, delta),
2733 p1, p2, flags, (baserev, delta),
2724 ifh, dfh,
2734 ifh, dfh,
2725 alwayscache=bool(addrevisioncb),
2735 alwayscache=bool(addrevisioncb),
2726 deltacomputer=deltacomputer)
2736 deltacomputer=deltacomputer)
2727
2737
2728 if addrevisioncb:
2738 if addrevisioncb:
2729 addrevisioncb(self, node)
2739 addrevisioncb(self, node)
2730
2740
2731 if not dfh and not self._inline:
2741 if not dfh and not self._inline:
2732 # addrevision switched from inline to conventional
2742 # addrevision switched from inline to conventional
2733 # reopen the index
2743 # reopen the index
2734 ifh.close()
2744 ifh.close()
2735 dfh = self._datafp("a+")
2745 dfh = self._datafp("a+")
2736 ifh = self._indexfp("a+")
2746 ifh = self._indexfp("a+")
2737 finally:
2747 finally:
2738 if dfh:
2748 if dfh:
2739 dfh.close()
2749 dfh.close()
2740 ifh.close()
2750 ifh.close()
2741
2751
2742 return nodes
2752 return nodes
2743
2753
2744 def iscensored(self, rev):
2754 def iscensored(self, rev):
2745 """Check if a file revision is censored."""
2755 """Check if a file revision is censored."""
2746 if not self._censorable:
2756 if not self._censorable:
2747 return False
2757 return False
2748
2758
2749 return self.flags(rev) & REVIDX_ISCENSORED
2759 return self.flags(rev) & REVIDX_ISCENSORED
2750
2760
2751 def _peek_iscensored(self, baserev, delta, flush):
2761 def _peek_iscensored(self, baserev, delta, flush):
2752 """Quickly check if a delta produces a censored revision."""
2762 """Quickly check if a delta produces a censored revision."""
2753 if not self._censorable:
2763 if not self._censorable:
2754 return False
2764 return False
2755
2765
2756 # Fragile heuristic: unless new file meta keys are added alphabetically
2766 # Fragile heuristic: unless new file meta keys are added alphabetically
2757 # preceding "censored", all censored revisions are prefixed by
2767 # preceding "censored", all censored revisions are prefixed by
2758 # "\1\ncensored:". A delta producing such a censored revision must be a
2768 # "\1\ncensored:". A delta producing such a censored revision must be a
2759 # full-replacement delta, so we inspect the first and only patch in the
2769 # full-replacement delta, so we inspect the first and only patch in the
2760 # delta for this prefix.
2770 # delta for this prefix.
2761 hlen = struct.calcsize(">lll")
2771 hlen = struct.calcsize(">lll")
2762 if len(delta) <= hlen:
2772 if len(delta) <= hlen:
2763 return False
2773 return False
2764
2774
2765 oldlen = self.rawsize(baserev)
2775 oldlen = self.rawsize(baserev)
2766 newlen = len(delta) - hlen
2776 newlen = len(delta) - hlen
2767 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2777 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2768 return False
2778 return False
2769
2779
2770 add = "\1\ncensored:"
2780 add = "\1\ncensored:"
2771 addlen = len(add)
2781 addlen = len(add)
2772 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2782 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2773
2783
2774 def getstrippoint(self, minlink):
2784 def getstrippoint(self, minlink):
2775 """find the minimum rev that must be stripped to strip the linkrev
2785 """find the minimum rev that must be stripped to strip the linkrev
2776
2786
2777 Returns a tuple containing the minimum rev and a set of all revs that
2787 Returns a tuple containing the minimum rev and a set of all revs that
2778 have linkrevs that will be broken by this strip.
2788 have linkrevs that will be broken by this strip.
2779 """
2789 """
2780 brokenrevs = set()
2790 brokenrevs = set()
2781 strippoint = len(self)
2791 strippoint = len(self)
2782
2792
2783 heads = {}
2793 heads = {}
2784 futurelargelinkrevs = set()
2794 futurelargelinkrevs = set()
2785 for head in self.headrevs():
2795 for head in self.headrevs():
2786 headlinkrev = self.linkrev(head)
2796 headlinkrev = self.linkrev(head)
2787 heads[head] = headlinkrev
2797 heads[head] = headlinkrev
2788 if headlinkrev >= minlink:
2798 if headlinkrev >= minlink:
2789 futurelargelinkrevs.add(headlinkrev)
2799 futurelargelinkrevs.add(headlinkrev)
2790
2800
2791 # This algorithm involves walking down the rev graph, starting at the
2801 # This algorithm involves walking down the rev graph, starting at the
2792 # heads. Since the revs are topologically sorted according to linkrev,
2802 # heads. Since the revs are topologically sorted according to linkrev,
2793 # once all head linkrevs are below the minlink, we know there are
2803 # once all head linkrevs are below the minlink, we know there are
2794 # no more revs that could have a linkrev greater than minlink.
2804 # no more revs that could have a linkrev greater than minlink.
2795 # So we can stop walking.
2805 # So we can stop walking.
2796 while futurelargelinkrevs:
2806 while futurelargelinkrevs:
2797 strippoint -= 1
2807 strippoint -= 1
2798 linkrev = heads.pop(strippoint)
2808 linkrev = heads.pop(strippoint)
2799
2809
2800 if linkrev < minlink:
2810 if linkrev < minlink:
2801 brokenrevs.add(strippoint)
2811 brokenrevs.add(strippoint)
2802 else:
2812 else:
2803 futurelargelinkrevs.remove(linkrev)
2813 futurelargelinkrevs.remove(linkrev)
2804
2814
2805 for p in self.parentrevs(strippoint):
2815 for p in self.parentrevs(strippoint):
2806 if p != nullrev:
2816 if p != nullrev:
2807 plinkrev = self.linkrev(p)
2817 plinkrev = self.linkrev(p)
2808 heads[p] = plinkrev
2818 heads[p] = plinkrev
2809 if plinkrev >= minlink:
2819 if plinkrev >= minlink:
2810 futurelargelinkrevs.add(plinkrev)
2820 futurelargelinkrevs.add(plinkrev)
2811
2821
2812 return strippoint, brokenrevs
2822 return strippoint, brokenrevs
2813
2823
2814 def strip(self, minlink, transaction):
2824 def strip(self, minlink, transaction):
2815 """truncate the revlog on the first revision with a linkrev >= minlink
2825 """truncate the revlog on the first revision with a linkrev >= minlink
2816
2826
2817 This function is called when we're stripping revision minlink and
2827 This function is called when we're stripping revision minlink and
2818 its descendants from the repository.
2828 its descendants from the repository.
2819
2829
2820 We have to remove all revisions with linkrev >= minlink, because
2830 We have to remove all revisions with linkrev >= minlink, because
2821 the equivalent changelog revisions will be renumbered after the
2831 the equivalent changelog revisions will be renumbered after the
2822 strip.
2832 strip.
2823
2833
2824 So we truncate the revlog on the first of these revisions, and
2834 So we truncate the revlog on the first of these revisions, and
2825 trust that the caller has saved the revisions that shouldn't be
2835 trust that the caller has saved the revisions that shouldn't be
2826 removed and that it'll re-add them after this truncation.
2836 removed and that it'll re-add them after this truncation.
2827 """
2837 """
2828 if len(self) == 0:
2838 if len(self) == 0:
2829 return
2839 return
2830
2840
2831 rev, _ = self.getstrippoint(minlink)
2841 rev, _ = self.getstrippoint(minlink)
2832 if rev == len(self):
2842 if rev == len(self):
2833 return
2843 return
2834
2844
2835 # first truncate the files on disk
2845 # first truncate the files on disk
2836 end = self.start(rev)
2846 end = self.start(rev)
2837 if not self._inline:
2847 if not self._inline:
2838 transaction.add(self.datafile, end)
2848 transaction.add(self.datafile, end)
2839 end = rev * self._io.size
2849 end = rev * self._io.size
2840 else:
2850 else:
2841 end += rev * self._io.size
2851 end += rev * self._io.size
2842
2852
2843 transaction.add(self.indexfile, end)
2853 transaction.add(self.indexfile, end)
2844
2854
2845 # then reset internal state in memory to forget those revisions
2855 # then reset internal state in memory to forget those revisions
2846 self._cache = None
2856 self._cache = None
2847 self._chaininfocache = {}
2857 self._chaininfocache = {}
2848 self._chunkclear()
2858 self._chunkclear()
2849 for x in pycompat.xrange(rev, len(self)):
2859 for x in pycompat.xrange(rev, len(self)):
2850 del self.nodemap[self.node(x)]
2860 del self.nodemap[self.node(x)]
2851
2861
2852 del self.index[rev:-1]
2862 del self.index[rev:-1]
2853 self._nodepos = None
2863 self._nodepos = None
2854
2864
2855 def checksize(self):
2865 def checksize(self):
2856 expected = 0
2866 expected = 0
2857 if len(self):
2867 if len(self):
2858 expected = max(0, self.end(len(self) - 1))
2868 expected = max(0, self.end(len(self) - 1))
2859
2869
2860 try:
2870 try:
2861 with self._datafp() as f:
2871 with self._datafp() as f:
2862 f.seek(0, 2)
2872 f.seek(0, 2)
2863 actual = f.tell()
2873 actual = f.tell()
2864 dd = actual - expected
2874 dd = actual - expected
2865 except IOError as inst:
2875 except IOError as inst:
2866 if inst.errno != errno.ENOENT:
2876 if inst.errno != errno.ENOENT:
2867 raise
2877 raise
2868 dd = 0
2878 dd = 0
2869
2879
2870 try:
2880 try:
2871 f = self.opener(self.indexfile)
2881 f = self.opener(self.indexfile)
2872 f.seek(0, 2)
2882 f.seek(0, 2)
2873 actual = f.tell()
2883 actual = f.tell()
2874 f.close()
2884 f.close()
2875 s = self._io.size
2885 s = self._io.size
2876 i = max(0, actual // s)
2886 i = max(0, actual // s)
2877 di = actual - (i * s)
2887 di = actual - (i * s)
2878 if self._inline:
2888 if self._inline:
2879 databytes = 0
2889 databytes = 0
2880 for r in self:
2890 for r in self:
2881 databytes += max(0, self.length(r))
2891 databytes += max(0, self.length(r))
2882 dd = 0
2892 dd = 0
2883 di = actual - len(self) * s - databytes
2893 di = actual - len(self) * s - databytes
2884 except IOError as inst:
2894 except IOError as inst:
2885 if inst.errno != errno.ENOENT:
2895 if inst.errno != errno.ENOENT:
2886 raise
2896 raise
2887 di = 0
2897 di = 0
2888
2898
2889 return (dd, di)
2899 return (dd, di)
2890
2900
2891 def files(self):
2901 def files(self):
2892 res = [self.indexfile]
2902 res = [self.indexfile]
2893 if not self._inline:
2903 if not self._inline:
2894 res.append(self.datafile)
2904 res.append(self.datafile)
2895 return res
2905 return res
2896
2906
2897 DELTAREUSEALWAYS = 'always'
2907 DELTAREUSEALWAYS = 'always'
2898 DELTAREUSESAMEREVS = 'samerevs'
2908 DELTAREUSESAMEREVS = 'samerevs'
2899 DELTAREUSENEVER = 'never'
2909 DELTAREUSENEVER = 'never'
2900
2910
2901 DELTAREUSEFULLADD = 'fulladd'
2911 DELTAREUSEFULLADD = 'fulladd'
2902
2912
2903 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2913 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2904
2914
2905 def clone(self, tr, destrevlog, addrevisioncb=None,
2915 def clone(self, tr, destrevlog, addrevisioncb=None,
2906 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2916 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2907 """Copy this revlog to another, possibly with format changes.
2917 """Copy this revlog to another, possibly with format changes.
2908
2918
2909 The destination revlog will contain the same revisions and nodes.
2919 The destination revlog will contain the same revisions and nodes.
2910 However, it may not be bit-for-bit identical due to e.g. delta encoding
2920 However, it may not be bit-for-bit identical due to e.g. delta encoding
2911 differences.
2921 differences.
2912
2922
2913 The ``deltareuse`` argument control how deltas from the existing revlog
2923 The ``deltareuse`` argument control how deltas from the existing revlog
2914 are preserved in the destination revlog. The argument can have the
2924 are preserved in the destination revlog. The argument can have the
2915 following values:
2925 following values:
2916
2926
2917 DELTAREUSEALWAYS
2927 DELTAREUSEALWAYS
2918 Deltas will always be reused (if possible), even if the destination
2928 Deltas will always be reused (if possible), even if the destination
2919 revlog would not select the same revisions for the delta. This is the
2929 revlog would not select the same revisions for the delta. This is the
2920 fastest mode of operation.
2930 fastest mode of operation.
2921 DELTAREUSESAMEREVS
2931 DELTAREUSESAMEREVS
2922 Deltas will be reused if the destination revlog would pick the same
2932 Deltas will be reused if the destination revlog would pick the same
2923 revisions for the delta. This mode strikes a balance between speed
2933 revisions for the delta. This mode strikes a balance between speed
2924 and optimization.
2934 and optimization.
2925 DELTAREUSENEVER
2935 DELTAREUSENEVER
2926 Deltas will never be reused. This is the slowest mode of execution.
2936 Deltas will never be reused. This is the slowest mode of execution.
2927 This mode can be used to recompute deltas (e.g. if the diff/delta
2937 This mode can be used to recompute deltas (e.g. if the diff/delta
2928 algorithm changes).
2938 algorithm changes).
2929
2939
2930 Delta computation can be slow, so the choice of delta reuse policy can
2940 Delta computation can be slow, so the choice of delta reuse policy can
2931 significantly affect run time.
2941 significantly affect run time.
2932
2942
2933 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2943 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2934 two extremes. Deltas will be reused if they are appropriate. But if the
2944 two extremes. Deltas will be reused if they are appropriate. But if the
2935 delta could choose a better revision, it will do so. This means if you
2945 delta could choose a better revision, it will do so. This means if you
2936 are converting a non-generaldelta revlog to a generaldelta revlog,
2946 are converting a non-generaldelta revlog to a generaldelta revlog,
2937 deltas will be recomputed if the delta's parent isn't a parent of the
2947 deltas will be recomputed if the delta's parent isn't a parent of the
2938 revision.
2948 revision.
2939
2949
2940 In addition to the delta policy, the ``deltabothparents`` argument
2950 In addition to the delta policy, the ``deltabothparents`` argument
2941 controls whether to compute deltas against both parents for merges.
2951 controls whether to compute deltas against both parents for merges.
2942 By default, the current default is used.
2952 By default, the current default is used.
2943 """
2953 """
2944 if deltareuse not in self.DELTAREUSEALL:
2954 if deltareuse not in self.DELTAREUSEALL:
2945 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2955 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2946
2956
2947 if len(destrevlog):
2957 if len(destrevlog):
2948 raise ValueError(_('destination revlog is not empty'))
2958 raise ValueError(_('destination revlog is not empty'))
2949
2959
2950 if getattr(self, 'filteredrevs', None):
2960 if getattr(self, 'filteredrevs', None):
2951 raise ValueError(_('source revlog has filtered revisions'))
2961 raise ValueError(_('source revlog has filtered revisions'))
2952 if getattr(destrevlog, 'filteredrevs', None):
2962 if getattr(destrevlog, 'filteredrevs', None):
2953 raise ValueError(_('destination revlog has filtered revisions'))
2963 raise ValueError(_('destination revlog has filtered revisions'))
2954
2964
2955 # lazydeltabase controls whether to reuse a cached delta, if possible.
2965 # lazydeltabase controls whether to reuse a cached delta, if possible.
2956 oldlazydeltabase = destrevlog._lazydeltabase
2966 oldlazydeltabase = destrevlog._lazydeltabase
2957 oldamd = destrevlog._deltabothparents
2967 oldamd = destrevlog._deltabothparents
2958
2968
2959 try:
2969 try:
2960 if deltareuse == self.DELTAREUSEALWAYS:
2970 if deltareuse == self.DELTAREUSEALWAYS:
2961 destrevlog._lazydeltabase = True
2971 destrevlog._lazydeltabase = True
2962 elif deltareuse == self.DELTAREUSESAMEREVS:
2972 elif deltareuse == self.DELTAREUSESAMEREVS:
2963 destrevlog._lazydeltabase = False
2973 destrevlog._lazydeltabase = False
2964
2974
2965 destrevlog._deltabothparents = deltabothparents or oldamd
2975 destrevlog._deltabothparents = deltabothparents or oldamd
2966
2976
2967 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2977 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2968 self.DELTAREUSESAMEREVS)
2978 self.DELTAREUSESAMEREVS)
2969
2979
2970 deltacomputer = _deltacomputer(destrevlog)
2980 deltacomputer = _deltacomputer(destrevlog)
2971 index = self.index
2981 index = self.index
2972 for rev in self:
2982 for rev in self:
2973 entry = index[rev]
2983 entry = index[rev]
2974
2984
2975 # Some classes override linkrev to take filtered revs into
2985 # Some classes override linkrev to take filtered revs into
2976 # account. Use raw entry from index.
2986 # account. Use raw entry from index.
2977 flags = entry[0] & 0xffff
2987 flags = entry[0] & 0xffff
2978 linkrev = entry[4]
2988 linkrev = entry[4]
2979 p1 = index[entry[5]][7]
2989 p1 = index[entry[5]][7]
2980 p2 = index[entry[6]][7]
2990 p2 = index[entry[6]][7]
2981 node = entry[7]
2991 node = entry[7]
2982
2992
2983 # (Possibly) reuse the delta from the revlog if allowed and
2993 # (Possibly) reuse the delta from the revlog if allowed and
2984 # the revlog chunk is a delta.
2994 # the revlog chunk is a delta.
2985 cachedelta = None
2995 cachedelta = None
2986 rawtext = None
2996 rawtext = None
2987 if populatecachedelta:
2997 if populatecachedelta:
2988 dp = self.deltaparent(rev)
2998 dp = self.deltaparent(rev)
2989 if dp != nullrev:
2999 if dp != nullrev:
2990 cachedelta = (dp, bytes(self._chunk(rev)))
3000 cachedelta = (dp, bytes(self._chunk(rev)))
2991
3001
2992 if not cachedelta:
3002 if not cachedelta:
2993 rawtext = self.revision(rev, raw=True)
3003 rawtext = self.revision(rev, raw=True)
2994
3004
2995
3005
2996 if deltareuse == self.DELTAREUSEFULLADD:
3006 if deltareuse == self.DELTAREUSEFULLADD:
2997 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3007 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2998 cachedelta=cachedelta,
3008 cachedelta=cachedelta,
2999 node=node, flags=flags,
3009 node=node, flags=flags,
3000 deltacomputer=deltacomputer)
3010 deltacomputer=deltacomputer)
3001 else:
3011 else:
3002 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3012 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3003 checkambig=False)
3013 checkambig=False)
3004 dfh = None
3014 dfh = None
3005 if not destrevlog._inline:
3015 if not destrevlog._inline:
3006 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3016 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3007 try:
3017 try:
3008 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3018 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3009 p2, flags, cachedelta, ifh, dfh,
3019 p2, flags, cachedelta, ifh, dfh,
3010 deltacomputer=deltacomputer)
3020 deltacomputer=deltacomputer)
3011 finally:
3021 finally:
3012 if dfh:
3022 if dfh:
3013 dfh.close()
3023 dfh.close()
3014 ifh.close()
3024 ifh.close()
3015
3025
3016 if addrevisioncb:
3026 if addrevisioncb:
3017 addrevisioncb(self, rev, node)
3027 addrevisioncb(self, rev, node)
3018 finally:
3028 finally:
3019 destrevlog._lazydeltabase = oldlazydeltabase
3029 destrevlog._lazydeltabase = oldlazydeltabase
3020 destrevlog._deltabothparents = oldamd
3030 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now