##// END OF EJS Templates
revlog: also detect intermediate snapshots...
Paul Morelle -
r39186:f39efa88 default
parent child Browse files
Show More
@@ -1,3030 +1,3033 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # If we have a non-emtpy delta candidate, there are nothing to trim
266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 if revs[endidx - 1] < len(revlog):
267 if revs[endidx - 1] < len(revlog):
268 # Trim empty revs at the end, except the very first revision of a chain
268 # Trim empty revs at the end, except the very first revision of a chain
269 while (endidx > 1
269 while (endidx > 1
270 and endidx > startidx
270 and endidx > startidx
271 and length(revs[endidx - 1]) == 0):
271 and length(revs[endidx - 1]) == 0):
272 endidx -= 1
272 endidx -= 1
273
273
274 return revs[startidx:endidx]
274 return revs[startidx:endidx]
275
275
276 def _segmentspan(revlog, revs, deltainfo=None):
276 def _segmentspan(revlog, revs, deltainfo=None):
277 """Get the byte span of a segment of revisions
277 """Get the byte span of a segment of revisions
278
278
279 revs is a sorted array of revision numbers
279 revs is a sorted array of revision numbers
280
280
281 >>> revlog = _testrevlog([
281 >>> revlog = _testrevlog([
282 ... 5, #0
282 ... 5, #0
283 ... 10, #1
283 ... 10, #1
284 ... 12, #2
284 ... 12, #2
285 ... 12, #3 (empty)
285 ... 12, #3 (empty)
286 ... 17, #4
286 ... 17, #4
287 ... ])
287 ... ])
288
288
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 17
290 17
291 >>> _segmentspan(revlog, [0, 4])
291 >>> _segmentspan(revlog, [0, 4])
292 17
292 17
293 >>> _segmentspan(revlog, [3, 4])
293 >>> _segmentspan(revlog, [3, 4])
294 5
294 5
295 >>> _segmentspan(revlog, [1, 2, 3,])
295 >>> _segmentspan(revlog, [1, 2, 3,])
296 7
296 7
297 >>> _segmentspan(revlog, [1, 3])
297 >>> _segmentspan(revlog, [1, 3])
298 7
298 7
299 """
299 """
300 if not revs:
300 if not revs:
301 return 0
301 return 0
302 if deltainfo is not None and len(revlog) <= revs[-1]:
302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if len(revs) == 1:
303 if len(revs) == 1:
304 return deltainfo.deltalen
304 return deltainfo.deltalen
305 offset = revlog.end(len(revlog) - 1)
305 offset = revlog.end(len(revlog) - 1)
306 end = deltainfo.deltalen + offset
306 end = deltainfo.deltalen + offset
307 else:
307 else:
308 end = revlog.end(revs[-1])
308 end = revlog.end(revs[-1])
309 return end - revlog.start(revs[0])
309 return end - revlog.start(revs[0])
310
310
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 """slice revs to reduce the amount of unrelated data to be read from disk.
312 """slice revs to reduce the amount of unrelated data to be read from disk.
313
313
314 ``revs`` is sliced into groups that should be read in one time.
314 ``revs`` is sliced into groups that should be read in one time.
315 Assume that revs are sorted.
315 Assume that revs are sorted.
316
316
317 The initial chunk is sliced until the overall density (payload/chunks-span
317 The initial chunk is sliced until the overall density (payload/chunks-span
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 `revlog._srmingapsize` is skipped.
319 `revlog._srmingapsize` is skipped.
320
320
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 For consistency with other slicing choice, this limit won't go lower than
322 For consistency with other slicing choice, this limit won't go lower than
323 `revlog._srmingapsize`.
323 `revlog._srmingapsize`.
324
324
325 If individual revisions chunk are larger than this limit, they will still
325 If individual revisions chunk are larger than this limit, they will still
326 be raised individually.
326 be raised individually.
327
327
328 >>> revlog = _testrevlog([
328 >>> revlog = _testrevlog([
329 ... 5, #00 (5)
329 ... 5, #00 (5)
330 ... 10, #01 (5)
330 ... 10, #01 (5)
331 ... 12, #02 (2)
331 ... 12, #02 (2)
332 ... 12, #03 (empty)
332 ... 12, #03 (empty)
333 ... 27, #04 (15)
333 ... 27, #04 (15)
334 ... 31, #05 (4)
334 ... 31, #05 (4)
335 ... 31, #06 (empty)
335 ... 31, #06 (empty)
336 ... 42, #07 (11)
336 ... 42, #07 (11)
337 ... 47, #08 (5)
337 ... 47, #08 (5)
338 ... 47, #09 (empty)
338 ... 47, #09 (empty)
339 ... 48, #10 (1)
339 ... 48, #10 (1)
340 ... 51, #11 (3)
340 ... 51, #11 (3)
341 ... 74, #12 (23)
341 ... 74, #12 (23)
342 ... 85, #13 (11)
342 ... 85, #13 (11)
343 ... 86, #14 (1)
343 ... 86, #14 (1)
344 ... 91, #15 (5)
344 ... 91, #15 (5)
345 ... ])
345 ... ])
346
346
347 >>> list(_slicechunk(revlog, list(range(16))))
347 >>> list(_slicechunk(revlog, list(range(16))))
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 >>> list(_slicechunk(revlog, [0, 15]))
349 >>> list(_slicechunk(revlog, [0, 15]))
350 [[0], [15]]
350 [[0], [15]]
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 [[0], [11], [15]]
352 [[0], [11], [15]]
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 [[0], [11, 13, 15]]
354 [[0], [11, 13, 15]]
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357
357
358 Slicing with a maximum chunk size
358 Slicing with a maximum chunk size
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 [[0], [11], [13], [15]]
360 [[0], [11], [13], [15]]
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 [[0], [11], [13, 15]]
362 [[0], [11], [13, 15]]
363 """
363 """
364 if targetsize is not None:
364 if targetsize is not None:
365 targetsize = max(targetsize, revlog._srmingapsize)
365 targetsize = max(targetsize, revlog._srmingapsize)
366 # targetsize should not be specified when evaluating delta candidates:
366 # targetsize should not be specified when evaluating delta candidates:
367 # * targetsize is used to ensure we stay within specification when reading,
367 # * targetsize is used to ensure we stay within specification when reading,
368 # * deltainfo is used to pick are good delta chain when writing.
368 # * deltainfo is used to pick are good delta chain when writing.
369 if not (deltainfo is None or targetsize is None):
369 if not (deltainfo is None or targetsize is None):
370 msg = 'cannot use `targetsize` with a `deltainfo`'
370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 raise error.ProgrammingError(msg)
371 raise error.ProgrammingError(msg)
372 for chunk in _slicechunktodensity(revlog, revs,
372 for chunk in _slicechunktodensity(revlog, revs,
373 deltainfo,
373 deltainfo,
374 revlog._srdensitythreshold,
374 revlog._srdensitythreshold,
375 revlog._srmingapsize):
375 revlog._srmingapsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 yield subchunk
377 yield subchunk
378
378
379 def _slicechunktosize(revlog, revs, targetsize=None):
379 def _slicechunktosize(revlog, revs, targetsize=None):
380 """slice revs to match the target size
380 """slice revs to match the target size
381
381
382 This is intended to be used on chunk that density slicing selected by that
382 This is intended to be used on chunk that density slicing selected by that
383 are still too large compared to the read garantee of revlog. This might
383 are still too large compared to the read garantee of revlog. This might
384 happens when "minimal gap size" interrupted the slicing or when chain are
384 happens when "minimal gap size" interrupted the slicing or when chain are
385 built in a way that create large blocks next to each other.
385 built in a way that create large blocks next to each other.
386
386
387 >>> revlog = _testrevlog([
387 >>> revlog = _testrevlog([
388 ... 3, #0 (3)
388 ... 3, #0 (3)
389 ... 5, #1 (2)
389 ... 5, #1 (2)
390 ... 6, #2 (1)
390 ... 6, #2 (1)
391 ... 8, #3 (2)
391 ... 8, #3 (2)
392 ... 8, #4 (empty)
392 ... 8, #4 (empty)
393 ... 11, #5 (3)
393 ... 11, #5 (3)
394 ... 12, #6 (1)
394 ... 12, #6 (1)
395 ... 13, #7 (1)
395 ... 13, #7 (1)
396 ... 14, #8 (1)
396 ... 14, #8 (1)
397 ... ])
397 ... ])
398
398
399 Cases where chunk is already small enough
399 Cases where chunk is already small enough
400 >>> list(_slicechunktosize(revlog, [0], 3))
400 >>> list(_slicechunktosize(revlog, [0], 3))
401 [[0]]
401 [[0]]
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 [[6, 7]]
403 [[6, 7]]
404 >>> list(_slicechunktosize(revlog, [0], None))
404 >>> list(_slicechunktosize(revlog, [0], None))
405 [[0]]
405 [[0]]
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 [[6, 7]]
407 [[6, 7]]
408
408
409 cases where we need actual slicing
409 cases where we need actual slicing
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 [[0], [1]]
411 [[0], [1]]
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 [[1], [3]]
413 [[1], [3]]
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 [[1, 2], [3]]
415 [[1, 2], [3]]
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 [[3], [5]]
417 [[3], [5]]
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 [[3], [5]]
419 [[3], [5]]
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 [[5], [6, 7, 8]]
421 [[5], [6, 7, 8]]
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424
424
425 Case with too large individual chunk (must return valid chunk)
425 Case with too large individual chunk (must return valid chunk)
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 [[0], [1]]
427 [[0], [1]]
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 [[1], [3]]
429 [[1], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 [[3], [5]]
431 [[3], [5]]
432 """
432 """
433 assert targetsize is None or 0 <= targetsize
433 assert targetsize is None or 0 <= targetsize
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 yield revs
435 yield revs
436 return
436 return
437
437
438 startrevidx = 0
438 startrevidx = 0
439 startdata = revlog.start(revs[0])
439 startdata = revlog.start(revs[0])
440 endrevidx = 0
440 endrevidx = 0
441 iterrevs = enumerate(revs)
441 iterrevs = enumerate(revs)
442 next(iterrevs) # skip first rev.
442 next(iterrevs) # skip first rev.
443 for idx, r in iterrevs:
443 for idx, r in iterrevs:
444 span = revlog.end(r) - startdata
444 span = revlog.end(r) - startdata
445 if span <= targetsize:
445 if span <= targetsize:
446 endrevidx = idx
446 endrevidx = idx
447 else:
447 else:
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 if chunk:
449 if chunk:
450 yield chunk
450 yield chunk
451 startrevidx = idx
451 startrevidx = idx
452 startdata = revlog.start(r)
452 startdata = revlog.start(r)
453 endrevidx = idx
453 endrevidx = idx
454 yield _trimchunk(revlog, revs, startrevidx)
454 yield _trimchunk(revlog, revs, startrevidx)
455
455
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 mingapsize=0):
457 mingapsize=0):
458 """slice revs to reduce the amount of unrelated data to be read from disk.
458 """slice revs to reduce the amount of unrelated data to be read from disk.
459
459
460 ``revs`` is sliced into groups that should be read in one time.
460 ``revs`` is sliced into groups that should be read in one time.
461 Assume that revs are sorted.
461 Assume that revs are sorted.
462
462
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 to the top of the revlog.
464 to the top of the revlog.
465
465
466 The initial chunk is sliced until the overall density (payload/chunks-span
466 The initial chunk is sliced until the overall density (payload/chunks-span
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 skipped.
468 skipped.
469
469
470 >>> revlog = _testrevlog([
470 >>> revlog = _testrevlog([
471 ... 5, #00 (5)
471 ... 5, #00 (5)
472 ... 10, #01 (5)
472 ... 10, #01 (5)
473 ... 12, #02 (2)
473 ... 12, #02 (2)
474 ... 12, #03 (empty)
474 ... 12, #03 (empty)
475 ... 27, #04 (15)
475 ... 27, #04 (15)
476 ... 31, #05 (4)
476 ... 31, #05 (4)
477 ... 31, #06 (empty)
477 ... 31, #06 (empty)
478 ... 42, #07 (11)
478 ... 42, #07 (11)
479 ... 47, #08 (5)
479 ... 47, #08 (5)
480 ... 47, #09 (empty)
480 ... 47, #09 (empty)
481 ... 48, #10 (1)
481 ... 48, #10 (1)
482 ... 51, #11 (3)
482 ... 51, #11 (3)
483 ... 74, #12 (23)
483 ... 74, #12 (23)
484 ... 85, #13 (11)
484 ... 85, #13 (11)
485 ... 86, #14 (1)
485 ... 86, #14 (1)
486 ... 91, #15 (5)
486 ... 91, #15 (5)
487 ... ])
487 ... ])
488
488
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 [[0], [15]]
492 [[0], [15]]
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 [[0], [11], [15]]
494 [[0], [11], [15]]
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 [[0], [11, 13, 15]]
496 [[0], [11, 13, 15]]
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 [[1, 2], [5, 8, 10, 11], [14]]
498 [[1, 2], [5, 8, 10, 11], [14]]
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 ... mingapsize=20))
500 ... mingapsize=20))
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 ... targetdensity=0.95))
503 ... targetdensity=0.95))
504 [[1, 2], [5], [8, 10, 11], [14]]
504 [[1, 2], [5], [8, 10, 11], [14]]
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 ... targetdensity=0.95, mingapsize=12))
506 ... targetdensity=0.95, mingapsize=12))
507 [[1, 2], [5, 8, 10, 11], [14]]
507 [[1, 2], [5, 8, 10, 11], [14]]
508 """
508 """
509 start = revlog.start
509 start = revlog.start
510 length = revlog.length
510 length = revlog.length
511
511
512 if len(revs) <= 1:
512 if len(revs) <= 1:
513 yield revs
513 yield revs
514 return
514 return
515
515
516 nextrev = len(revlog)
516 nextrev = len(revlog)
517 nextoffset = revlog.end(nextrev - 1)
517 nextoffset = revlog.end(nextrev - 1)
518
518
519 if deltainfo is None:
519 if deltainfo is None:
520 deltachainspan = _segmentspan(revlog, revs)
520 deltachainspan = _segmentspan(revlog, revs)
521 chainpayload = sum(length(r) for r in revs)
521 chainpayload = sum(length(r) for r in revs)
522 else:
522 else:
523 deltachainspan = deltainfo.distance
523 deltachainspan = deltainfo.distance
524 chainpayload = deltainfo.compresseddeltalen
524 chainpayload = deltainfo.compresseddeltalen
525
525
526 if deltachainspan < mingapsize:
526 if deltachainspan < mingapsize:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 readdata = deltachainspan
530 readdata = deltachainspan
531
531
532 if deltachainspan:
532 if deltachainspan:
533 density = chainpayload / float(deltachainspan)
533 density = chainpayload / float(deltachainspan)
534 else:
534 else:
535 density = 1.0
535 density = 1.0
536
536
537 if density >= targetdensity:
537 if density >= targetdensity:
538 yield revs
538 yield revs
539 return
539 return
540
540
541 if deltainfo is not None and deltainfo.deltalen:
541 if deltainfo is not None and deltainfo.deltalen:
542 revs = list(revs)
542 revs = list(revs)
543 revs.append(nextrev)
543 revs.append(nextrev)
544
544
545 # Store the gaps in a heap to have them sorted by decreasing size
545 # Store the gaps in a heap to have them sorted by decreasing size
546 gapsheap = []
546 gapsheap = []
547 heapq.heapify(gapsheap)
547 heapq.heapify(gapsheap)
548 prevend = None
548 prevend = None
549 for i, rev in enumerate(revs):
549 for i, rev in enumerate(revs):
550 if rev < nextrev:
550 if rev < nextrev:
551 revstart = start(rev)
551 revstart = start(rev)
552 revlen = length(rev)
552 revlen = length(rev)
553 else:
553 else:
554 revstart = nextoffset
554 revstart = nextoffset
555 revlen = deltainfo.deltalen
555 revlen = deltainfo.deltalen
556
556
557 # Skip empty revisions to form larger holes
557 # Skip empty revisions to form larger holes
558 if revlen == 0:
558 if revlen == 0:
559 continue
559 continue
560
560
561 if prevend is not None:
561 if prevend is not None:
562 gapsize = revstart - prevend
562 gapsize = revstart - prevend
563 # only consider holes that are large enough
563 # only consider holes that are large enough
564 if gapsize > mingapsize:
564 if gapsize > mingapsize:
565 heapq.heappush(gapsheap, (-gapsize, i))
565 heapq.heappush(gapsheap, (-gapsize, i))
566
566
567 prevend = revstart + revlen
567 prevend = revstart + revlen
568
568
569 # Collect the indices of the largest holes until the density is acceptable
569 # Collect the indices of the largest holes until the density is acceptable
570 indicesheap = []
570 indicesheap = []
571 heapq.heapify(indicesheap)
571 heapq.heapify(indicesheap)
572 while gapsheap and density < targetdensity:
572 while gapsheap and density < targetdensity:
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574
574
575 heapq.heappush(indicesheap, gapidx)
575 heapq.heappush(indicesheap, gapidx)
576
576
577 # the gap sizes are stored as negatives to be sorted decreasingly
577 # the gap sizes are stored as negatives to be sorted decreasingly
578 # by the heap
578 # by the heap
579 readdata -= (-oppgapsize)
579 readdata -= (-oppgapsize)
580 if readdata > 0:
580 if readdata > 0:
581 density = chainpayload / float(readdata)
581 density = chainpayload / float(readdata)
582 else:
582 else:
583 density = 1.0
583 density = 1.0
584
584
585 # Cut the revs at collected indices
585 # Cut the revs at collected indices
586 previdx = 0
586 previdx = 0
587 while indicesheap:
587 while indicesheap:
588 idx = heapq.heappop(indicesheap)
588 idx = heapq.heappop(indicesheap)
589
589
590 chunk = _trimchunk(revlog, revs, previdx, idx)
590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 if chunk:
591 if chunk:
592 yield chunk
592 yield chunk
593
593
594 previdx = idx
594 previdx = idx
595
595
596 chunk = _trimchunk(revlog, revs, previdx)
596 chunk = _trimchunk(revlog, revs, previdx)
597 if chunk:
597 if chunk:
598 yield chunk
598 yield chunk
599
599
600 @attr.s(slots=True, frozen=True)
600 @attr.s(slots=True, frozen=True)
601 class _deltainfo(object):
601 class _deltainfo(object):
602 distance = attr.ib()
602 distance = attr.ib()
603 deltalen = attr.ib()
603 deltalen = attr.ib()
604 data = attr.ib()
604 data = attr.ib()
605 base = attr.ib()
605 base = attr.ib()
606 chainbase = attr.ib()
606 chainbase = attr.ib()
607 chainlen = attr.ib()
607 chainlen = attr.ib()
608 compresseddeltalen = attr.ib()
608 compresseddeltalen = attr.ib()
609
609
610 class _deltacomputer(object):
610 class _deltacomputer(object):
611 def __init__(self, revlog):
611 def __init__(self, revlog):
612 self.revlog = revlog
612 self.revlog = revlog
613
613
614 def _getcandidaterevs(self, p1, p2, cachedelta):
614 def _getcandidaterevs(self, p1, p2, cachedelta):
615 """
615 """
616 Provides revisions that present an interest to be diffed against,
616 Provides revisions that present an interest to be diffed against,
617 grouped by level of easiness.
617 grouped by level of easiness.
618 """
618 """
619 revlog = self.revlog
619 revlog = self.revlog
620 gdelta = revlog._generaldelta
620 gdelta = revlog._generaldelta
621 curr = len(revlog)
621 curr = len(revlog)
622 prev = curr - 1
622 prev = curr - 1
623 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
623 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624
624
625 # should we try to build a delta?
625 # should we try to build a delta?
626 if prev != nullrev and revlog.storedeltachains:
626 if prev != nullrev and revlog.storedeltachains:
627 tested = set()
627 tested = set()
628 # This condition is true most of the time when processing
628 # This condition is true most of the time when processing
629 # changegroup data into a generaldelta repo. The only time it
629 # changegroup data into a generaldelta repo. The only time it
630 # isn't true is if this is the first revision in a delta chain
630 # isn't true is if this is the first revision in a delta chain
631 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
631 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 if cachedelta and gdelta and revlog._lazydeltabase:
632 if cachedelta and gdelta and revlog._lazydeltabase:
633 # Assume what we received from the server is a good choice
633 # Assume what we received from the server is a good choice
634 # build delta will reuse the cache
634 # build delta will reuse the cache
635 yield (cachedelta[0],)
635 yield (cachedelta[0],)
636 tested.add(cachedelta[0])
636 tested.add(cachedelta[0])
637
637
638 if gdelta:
638 if gdelta:
639 # exclude already lazy tested base if any
639 # exclude already lazy tested base if any
640 parents = [p for p in (p1r, p2r)
640 parents = [p for p in (p1r, p2r)
641 if p != nullrev and p not in tested]
641 if p != nullrev and p not in tested]
642
642
643 if not revlog._deltabothparents and len(parents) == 2:
643 if not revlog._deltabothparents and len(parents) == 2:
644 parents.sort()
644 parents.sort()
645 # To minimize the chance of having to build a fulltext,
645 # To minimize the chance of having to build a fulltext,
646 # pick first whichever parent is closest to us (max rev)
646 # pick first whichever parent is closest to us (max rev)
647 yield (parents[1],)
647 yield (parents[1],)
648 # then the other one (min rev) if the first did not fit
648 # then the other one (min rev) if the first did not fit
649 yield (parents[0],)
649 yield (parents[0],)
650 tested.update(parents)
650 tested.update(parents)
651 elif len(parents) > 0:
651 elif len(parents) > 0:
652 # Test all parents (1 or 2), and keep the best candidate
652 # Test all parents (1 or 2), and keep the best candidate
653 yield parents
653 yield parents
654 tested.update(parents)
654 tested.update(parents)
655
655
656 if prev not in tested:
656 if prev not in tested:
657 # other approach failed try against prev to hopefully save us a
657 # other approach failed try against prev to hopefully save us a
658 # fulltext.
658 # fulltext.
659 yield (prev,)
659 yield (prev,)
660 tested.add(prev)
660 tested.add(prev)
661
661
662 def buildtext(self, revinfo, fh):
662 def buildtext(self, revinfo, fh):
663 """Builds a fulltext version of a revision
663 """Builds a fulltext version of a revision
664
664
665 revinfo: _revisioninfo instance that contains all needed info
665 revinfo: _revisioninfo instance that contains all needed info
666 fh: file handle to either the .i or the .d revlog file,
666 fh: file handle to either the .i or the .d revlog file,
667 depending on whether it is inlined or not
667 depending on whether it is inlined or not
668 """
668 """
669 btext = revinfo.btext
669 btext = revinfo.btext
670 if btext[0] is not None:
670 if btext[0] is not None:
671 return btext[0]
671 return btext[0]
672
672
673 revlog = self.revlog
673 revlog = self.revlog
674 cachedelta = revinfo.cachedelta
674 cachedelta = revinfo.cachedelta
675 flags = revinfo.flags
675 flags = revinfo.flags
676 node = revinfo.node
676 node = revinfo.node
677
677
678 baserev = cachedelta[0]
678 baserev = cachedelta[0]
679 delta = cachedelta[1]
679 delta = cachedelta[1]
680 # special case deltas which replace entire base; no need to decode
680 # special case deltas which replace entire base; no need to decode
681 # base revision. this neatly avoids censored bases, which throw when
681 # base revision. this neatly avoids censored bases, which throw when
682 # they're decoded.
682 # they're decoded.
683 hlen = struct.calcsize(">lll")
683 hlen = struct.calcsize(">lll")
684 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
684 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 len(delta) - hlen):
685 len(delta) - hlen):
686 btext[0] = delta[hlen:]
686 btext[0] = delta[hlen:]
687 else:
687 else:
688 # deltabase is rawtext before changed by flag processors, which is
688 # deltabase is rawtext before changed by flag processors, which is
689 # equivalent to non-raw text
689 # equivalent to non-raw text
690 basetext = revlog.revision(baserev, _df=fh, raw=False)
690 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 btext[0] = mdiff.patch(basetext, delta)
691 btext[0] = mdiff.patch(basetext, delta)
692
692
693 try:
693 try:
694 res = revlog._processflags(btext[0], flags, 'read', raw=True)
694 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 btext[0], validatehash = res
695 btext[0], validatehash = res
696 if validatehash:
696 if validatehash:
697 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
697 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 if flags & REVIDX_ISCENSORED:
698 if flags & REVIDX_ISCENSORED:
699 raise RevlogError(_('node %s is not censored') % node)
699 raise RevlogError(_('node %s is not censored') % node)
700 except CensoredNodeError:
700 except CensoredNodeError:
701 # must pass the censored index flag to add censored revisions
701 # must pass the censored index flag to add censored revisions
702 if not flags & REVIDX_ISCENSORED:
702 if not flags & REVIDX_ISCENSORED:
703 raise
703 raise
704 return btext[0]
704 return btext[0]
705
705
706 def _builddeltadiff(self, base, revinfo, fh):
706 def _builddeltadiff(self, base, revinfo, fh):
707 revlog = self.revlog
707 revlog = self.revlog
708 t = self.buildtext(revinfo, fh)
708 t = self.buildtext(revinfo, fh)
709 if revlog.iscensored(base):
709 if revlog.iscensored(base):
710 # deltas based on a censored revision must replace the
710 # deltas based on a censored revision must replace the
711 # full content in one patch, so delta works everywhere
711 # full content in one patch, so delta works everywhere
712 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
712 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 delta = header + t
713 delta = header + t
714 else:
714 else:
715 ptext = revlog.revision(base, _df=fh, raw=True)
715 ptext = revlog.revision(base, _df=fh, raw=True)
716 delta = mdiff.textdiff(ptext, t)
716 delta = mdiff.textdiff(ptext, t)
717
717
718 return delta
718 return delta
719
719
720 def _builddeltainfo(self, revinfo, base, fh):
720 def _builddeltainfo(self, revinfo, base, fh):
721 # can we use the cached delta?
721 # can we use the cached delta?
722 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
722 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 delta = revinfo.cachedelta[1]
723 delta = revinfo.cachedelta[1]
724 else:
724 else:
725 delta = self._builddeltadiff(base, revinfo, fh)
725 delta = self._builddeltadiff(base, revinfo, fh)
726 revlog = self.revlog
726 revlog = self.revlog
727 header, data = revlog.compress(delta)
727 header, data = revlog.compress(delta)
728 deltalen = len(header) + len(data)
728 deltalen = len(header) + len(data)
729 chainbase = revlog.chainbase(base)
729 chainbase = revlog.chainbase(base)
730 offset = revlog.end(len(revlog) - 1)
730 offset = revlog.end(len(revlog) - 1)
731 dist = deltalen + offset - revlog.start(chainbase)
731 dist = deltalen + offset - revlog.start(chainbase)
732 if revlog._generaldelta:
732 if revlog._generaldelta:
733 deltabase = base
733 deltabase = base
734 else:
734 else:
735 deltabase = chainbase
735 deltabase = chainbase
736 chainlen, compresseddeltalen = revlog._chaininfo(base)
736 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 chainlen += 1
737 chainlen += 1
738 compresseddeltalen += deltalen
738 compresseddeltalen += deltalen
739 return _deltainfo(dist, deltalen, (header, data), deltabase,
739 return _deltainfo(dist, deltalen, (header, data), deltabase,
740 chainbase, chainlen, compresseddeltalen)
740 chainbase, chainlen, compresseddeltalen)
741
741
742 def finddeltainfo(self, revinfo, fh):
742 def finddeltainfo(self, revinfo, fh):
743 """Find an acceptable delta against a candidate revision
743 """Find an acceptable delta against a candidate revision
744
744
745 revinfo: information about the revision (instance of _revisioninfo)
745 revinfo: information about the revision (instance of _revisioninfo)
746 fh: file handle to either the .i or the .d revlog file,
746 fh: file handle to either the .i or the .d revlog file,
747 depending on whether it is inlined or not
747 depending on whether it is inlined or not
748
748
749 Returns the first acceptable candidate revision, as ordered by
749 Returns the first acceptable candidate revision, as ordered by
750 _getcandidaterevs
750 _getcandidaterevs
751 """
751 """
752 if not revinfo.textlen:
752 if not revinfo.textlen:
753 return None # empty file do not need delta
753 return None # empty file do not need delta
754
754
755 cachedelta = revinfo.cachedelta
755 cachedelta = revinfo.cachedelta
756 p1 = revinfo.p1
756 p1 = revinfo.p1
757 p2 = revinfo.p2
757 p2 = revinfo.p2
758 revlog = self.revlog
758 revlog = self.revlog
759
759
760 deltalength = self.revlog.length
760 deltalength = self.revlog.length
761 deltaparent = self.revlog.deltaparent
761 deltaparent = self.revlog.deltaparent
762
762
763 deltainfo = None
763 deltainfo = None
764 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
764 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
765 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
765 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
766 # filter out delta base that will never produce good delta
766 # filter out delta base that will never produce good delta
767 candidaterevs = [r for r in candidaterevs
767 candidaterevs = [r for r in candidaterevs
768 if self.revlog.length(r) <= deltas_limit]
768 if self.revlog.length(r) <= deltas_limit]
769 nominateddeltas = []
769 nominateddeltas = []
770 for candidaterev in candidaterevs:
770 for candidaterev in candidaterevs:
771 # skip over empty delta (no need to include them in a chain)
771 # skip over empty delta (no need to include them in a chain)
772 while candidaterev != nullrev and not deltalength(candidaterev):
772 while candidaterev != nullrev and not deltalength(candidaterev):
773 candidaterev = deltaparent(candidaterev)
773 candidaterev = deltaparent(candidaterev)
774 # no need to try a delta against nullid, this will be handled
774 # no need to try a delta against nullid, this will be handled
775 # by fulltext later.
775 # by fulltext later.
776 if candidaterev == nullrev:
776 if candidaterev == nullrev:
777 continue
777 continue
778 # no delta for rawtext-changing revs (see "candelta" for why)
778 # no delta for rawtext-changing revs (see "candelta" for why)
779 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
779 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
780 continue
780 continue
781 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
781 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
782 if revlog._isgooddeltainfo(candidatedelta, revinfo):
782 if revlog._isgooddeltainfo(candidatedelta, revinfo):
783 nominateddeltas.append(candidatedelta)
783 nominateddeltas.append(candidatedelta)
784 if nominateddeltas:
784 if nominateddeltas:
785 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
785 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
786 break
786 break
787
787
788 return deltainfo
788 return deltainfo
789
789
790 @attr.s(slots=True, frozen=True)
790 @attr.s(slots=True, frozen=True)
791 class _revisioninfo(object):
791 class _revisioninfo(object):
792 """Information about a revision that allows building its fulltext
792 """Information about a revision that allows building its fulltext
793 node: expected hash of the revision
793 node: expected hash of the revision
794 p1, p2: parent revs of the revision
794 p1, p2: parent revs of the revision
795 btext: built text cache consisting of a one-element list
795 btext: built text cache consisting of a one-element list
796 cachedelta: (baserev, uncompressed_delta) or None
796 cachedelta: (baserev, uncompressed_delta) or None
797 flags: flags associated to the revision storage
797 flags: flags associated to the revision storage
798
798
799 One of btext[0] or cachedelta must be set.
799 One of btext[0] or cachedelta must be set.
800 """
800 """
801 node = attr.ib()
801 node = attr.ib()
802 p1 = attr.ib()
802 p1 = attr.ib()
803 p2 = attr.ib()
803 p2 = attr.ib()
804 btext = attr.ib()
804 btext = attr.ib()
805 textlen = attr.ib()
805 textlen = attr.ib()
806 cachedelta = attr.ib()
806 cachedelta = attr.ib()
807 flags = attr.ib()
807 flags = attr.ib()
808
808
809 # index v0:
809 # index v0:
810 # 4 bytes: offset
810 # 4 bytes: offset
811 # 4 bytes: compressed length
811 # 4 bytes: compressed length
812 # 4 bytes: base rev
812 # 4 bytes: base rev
813 # 4 bytes: link rev
813 # 4 bytes: link rev
814 # 20 bytes: parent 1 nodeid
814 # 20 bytes: parent 1 nodeid
815 # 20 bytes: parent 2 nodeid
815 # 20 bytes: parent 2 nodeid
816 # 20 bytes: nodeid
816 # 20 bytes: nodeid
817 indexformatv0 = struct.Struct(">4l20s20s20s")
817 indexformatv0 = struct.Struct(">4l20s20s20s")
818 indexformatv0_pack = indexformatv0.pack
818 indexformatv0_pack = indexformatv0.pack
819 indexformatv0_unpack = indexformatv0.unpack
819 indexformatv0_unpack = indexformatv0.unpack
820
820
821 class revlogoldindex(list):
821 class revlogoldindex(list):
822 def __getitem__(self, i):
822 def __getitem__(self, i):
823 if i == -1:
823 if i == -1:
824 return (0, 0, 0, -1, -1, -1, -1, nullid)
824 return (0, 0, 0, -1, -1, -1, -1, nullid)
825 return list.__getitem__(self, i)
825 return list.__getitem__(self, i)
826
826
827 # maximum <delta-chain-data>/<revision-text-length> ratio
827 # maximum <delta-chain-data>/<revision-text-length> ratio
828 LIMIT_DELTA2TEXT = 2
828 LIMIT_DELTA2TEXT = 2
829
829
830 class revlogoldio(object):
830 class revlogoldio(object):
831 def __init__(self):
831 def __init__(self):
832 self.size = indexformatv0.size
832 self.size = indexformatv0.size
833
833
834 def parseindex(self, data, inline):
834 def parseindex(self, data, inline):
835 s = self.size
835 s = self.size
836 index = []
836 index = []
837 nodemap = {nullid: nullrev}
837 nodemap = {nullid: nullrev}
838 n = off = 0
838 n = off = 0
839 l = len(data)
839 l = len(data)
840 while off + s <= l:
840 while off + s <= l:
841 cur = data[off:off + s]
841 cur = data[off:off + s]
842 off += s
842 off += s
843 e = indexformatv0_unpack(cur)
843 e = indexformatv0_unpack(cur)
844 # transform to revlogv1 format
844 # transform to revlogv1 format
845 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
845 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
846 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
846 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
847 index.append(e2)
847 index.append(e2)
848 nodemap[e[6]] = n
848 nodemap[e[6]] = n
849 n += 1
849 n += 1
850
850
851 return revlogoldindex(index), nodemap, None
851 return revlogoldindex(index), nodemap, None
852
852
853 def packentry(self, entry, node, version, rev):
853 def packentry(self, entry, node, version, rev):
854 if gettype(entry[0]):
854 if gettype(entry[0]):
855 raise RevlogError(_('index entry flags need revlog version 1'))
855 raise RevlogError(_('index entry flags need revlog version 1'))
856 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
856 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
857 node(entry[5]), node(entry[6]), entry[7])
857 node(entry[5]), node(entry[6]), entry[7])
858 return indexformatv0_pack(*e2)
858 return indexformatv0_pack(*e2)
859
859
860 # index ng:
860 # index ng:
861 # 6 bytes: offset
861 # 6 bytes: offset
862 # 2 bytes: flags
862 # 2 bytes: flags
863 # 4 bytes: compressed length
863 # 4 bytes: compressed length
864 # 4 bytes: uncompressed length
864 # 4 bytes: uncompressed length
865 # 4 bytes: base rev
865 # 4 bytes: base rev
866 # 4 bytes: link rev
866 # 4 bytes: link rev
867 # 4 bytes: parent 1 rev
867 # 4 bytes: parent 1 rev
868 # 4 bytes: parent 2 rev
868 # 4 bytes: parent 2 rev
869 # 32 bytes: nodeid
869 # 32 bytes: nodeid
870 indexformatng = struct.Struct(">Qiiiiii20s12x")
870 indexformatng = struct.Struct(">Qiiiiii20s12x")
871 indexformatng_pack = indexformatng.pack
871 indexformatng_pack = indexformatng.pack
872 versionformat = struct.Struct(">I")
872 versionformat = struct.Struct(">I")
873 versionformat_pack = versionformat.pack
873 versionformat_pack = versionformat.pack
874 versionformat_unpack = versionformat.unpack
874 versionformat_unpack = versionformat.unpack
875
875
876 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
876 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
877 # signed integer)
877 # signed integer)
878 _maxentrysize = 0x7fffffff
878 _maxentrysize = 0x7fffffff
879
879
880 class revlogio(object):
880 class revlogio(object):
881 def __init__(self):
881 def __init__(self):
882 self.size = indexformatng.size
882 self.size = indexformatng.size
883
883
884 def parseindex(self, data, inline):
884 def parseindex(self, data, inline):
885 # call the C implementation to parse the index data
885 # call the C implementation to parse the index data
886 index, cache = parsers.parse_index2(data, inline)
886 index, cache = parsers.parse_index2(data, inline)
887 return index, getattr(index, 'nodemap', None), cache
887 return index, getattr(index, 'nodemap', None), cache
888
888
889 def packentry(self, entry, node, version, rev):
889 def packentry(self, entry, node, version, rev):
890 p = indexformatng_pack(*entry)
890 p = indexformatng_pack(*entry)
891 if rev == 0:
891 if rev == 0:
892 p = versionformat_pack(version) + p[4:]
892 p = versionformat_pack(version) + p[4:]
893 return p
893 return p
894
894
895 class revlog(object):
895 class revlog(object):
896 """
896 """
897 the underlying revision storage object
897 the underlying revision storage object
898
898
899 A revlog consists of two parts, an index and the revision data.
899 A revlog consists of two parts, an index and the revision data.
900
900
901 The index is a file with a fixed record size containing
901 The index is a file with a fixed record size containing
902 information on each revision, including its nodeid (hash), the
902 information on each revision, including its nodeid (hash), the
903 nodeids of its parents, the position and offset of its data within
903 nodeids of its parents, the position and offset of its data within
904 the data file, and the revision it's based on. Finally, each entry
904 the data file, and the revision it's based on. Finally, each entry
905 contains a linkrev entry that can serve as a pointer to external
905 contains a linkrev entry that can serve as a pointer to external
906 data.
906 data.
907
907
908 The revision data itself is a linear collection of data chunks.
908 The revision data itself is a linear collection of data chunks.
909 Each chunk represents a revision and is usually represented as a
909 Each chunk represents a revision and is usually represented as a
910 delta against the previous chunk. To bound lookup time, runs of
910 delta against the previous chunk. To bound lookup time, runs of
911 deltas are limited to about 2 times the length of the original
911 deltas are limited to about 2 times the length of the original
912 version data. This makes retrieval of a version proportional to
912 version data. This makes retrieval of a version proportional to
913 its size, or O(1) relative to the number of revisions.
913 its size, or O(1) relative to the number of revisions.
914
914
915 Both pieces of the revlog are written to in an append-only
915 Both pieces of the revlog are written to in an append-only
916 fashion, which means we never need to rewrite a file to insert or
916 fashion, which means we never need to rewrite a file to insert or
917 remove data, and can use some simple techniques to avoid the need
917 remove data, and can use some simple techniques to avoid the need
918 for locking while reading.
918 for locking while reading.
919
919
920 If checkambig, indexfile is opened with checkambig=True at
920 If checkambig, indexfile is opened with checkambig=True at
921 writing, to avoid file stat ambiguity.
921 writing, to avoid file stat ambiguity.
922
922
923 If mmaplargeindex is True, and an mmapindexthreshold is set, the
923 If mmaplargeindex is True, and an mmapindexthreshold is set, the
924 index will be mmapped rather than read if it is larger than the
924 index will be mmapped rather than read if it is larger than the
925 configured threshold.
925 configured threshold.
926
926
927 If censorable is True, the revlog can have censored revisions.
927 If censorable is True, the revlog can have censored revisions.
928 """
928 """
929 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
929 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
930 mmaplargeindex=False, censorable=False):
930 mmaplargeindex=False, censorable=False):
931 """
931 """
932 create a revlog object
932 create a revlog object
933
933
934 opener is a function that abstracts the file opening operation
934 opener is a function that abstracts the file opening operation
935 and can be used to implement COW semantics or the like.
935 and can be used to implement COW semantics or the like.
936 """
936 """
937 self.indexfile = indexfile
937 self.indexfile = indexfile
938 self.datafile = datafile or (indexfile[:-2] + ".d")
938 self.datafile = datafile or (indexfile[:-2] + ".d")
939 self.opener = opener
939 self.opener = opener
940 # When True, indexfile is opened with checkambig=True at writing, to
940 # When True, indexfile is opened with checkambig=True at writing, to
941 # avoid file stat ambiguity.
941 # avoid file stat ambiguity.
942 self._checkambig = checkambig
942 self._checkambig = checkambig
943 self._censorable = censorable
943 self._censorable = censorable
944 # 3-tuple of (node, rev, text) for a raw revision.
944 # 3-tuple of (node, rev, text) for a raw revision.
945 self._cache = None
945 self._cache = None
946 # Maps rev to chain base rev.
946 # Maps rev to chain base rev.
947 self._chainbasecache = util.lrucachedict(100)
947 self._chainbasecache = util.lrucachedict(100)
948 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
948 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
949 self._chunkcache = (0, '')
949 self._chunkcache = (0, '')
950 # How much data to read and cache into the raw revlog data cache.
950 # How much data to read and cache into the raw revlog data cache.
951 self._chunkcachesize = 65536
951 self._chunkcachesize = 65536
952 self._maxchainlen = None
952 self._maxchainlen = None
953 self._deltabothparents = True
953 self._deltabothparents = True
954 self.index = []
954 self.index = []
955 # Mapping of partial identifiers to full nodes.
955 # Mapping of partial identifiers to full nodes.
956 self._pcache = {}
956 self._pcache = {}
957 # Mapping of revision integer to full node.
957 # Mapping of revision integer to full node.
958 self._nodecache = {nullid: nullrev}
958 self._nodecache = {nullid: nullrev}
959 self._nodepos = None
959 self._nodepos = None
960 self._compengine = 'zlib'
960 self._compengine = 'zlib'
961 self._maxdeltachainspan = -1
961 self._maxdeltachainspan = -1
962 self._withsparseread = False
962 self._withsparseread = False
963 self._sparserevlog = False
963 self._sparserevlog = False
964 self._srdensitythreshold = 0.50
964 self._srdensitythreshold = 0.50
965 self._srmingapsize = 262144
965 self._srmingapsize = 262144
966
966
967 mmapindexthreshold = None
967 mmapindexthreshold = None
968 v = REVLOG_DEFAULT_VERSION
968 v = REVLOG_DEFAULT_VERSION
969 opts = getattr(opener, 'options', None)
969 opts = getattr(opener, 'options', None)
970 if opts is not None:
970 if opts is not None:
971 if 'revlogv2' in opts:
971 if 'revlogv2' in opts:
972 # version 2 revlogs always use generaldelta.
972 # version 2 revlogs always use generaldelta.
973 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
973 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
974 elif 'revlogv1' in opts:
974 elif 'revlogv1' in opts:
975 if 'generaldelta' in opts:
975 if 'generaldelta' in opts:
976 v |= FLAG_GENERALDELTA
976 v |= FLAG_GENERALDELTA
977 else:
977 else:
978 v = 0
978 v = 0
979 if 'chunkcachesize' in opts:
979 if 'chunkcachesize' in opts:
980 self._chunkcachesize = opts['chunkcachesize']
980 self._chunkcachesize = opts['chunkcachesize']
981 if 'maxchainlen' in opts:
981 if 'maxchainlen' in opts:
982 self._maxchainlen = opts['maxchainlen']
982 self._maxchainlen = opts['maxchainlen']
983 if 'deltabothparents' in opts:
983 if 'deltabothparents' in opts:
984 self._deltabothparents = opts['deltabothparents']
984 self._deltabothparents = opts['deltabothparents']
985 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
985 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
986 if 'compengine' in opts:
986 if 'compengine' in opts:
987 self._compengine = opts['compengine']
987 self._compengine = opts['compengine']
988 if 'maxdeltachainspan' in opts:
988 if 'maxdeltachainspan' in opts:
989 self._maxdeltachainspan = opts['maxdeltachainspan']
989 self._maxdeltachainspan = opts['maxdeltachainspan']
990 if mmaplargeindex and 'mmapindexthreshold' in opts:
990 if mmaplargeindex and 'mmapindexthreshold' in opts:
991 mmapindexthreshold = opts['mmapindexthreshold']
991 mmapindexthreshold = opts['mmapindexthreshold']
992 self._sparserevlog = bool(opts.get('sparse-revlog', False))
992 self._sparserevlog = bool(opts.get('sparse-revlog', False))
993 withsparseread = bool(opts.get('with-sparse-read', False))
993 withsparseread = bool(opts.get('with-sparse-read', False))
994 # sparse-revlog forces sparse-read
994 # sparse-revlog forces sparse-read
995 self._withsparseread = self._sparserevlog or withsparseread
995 self._withsparseread = self._sparserevlog or withsparseread
996 if 'sparse-read-density-threshold' in opts:
996 if 'sparse-read-density-threshold' in opts:
997 self._srdensitythreshold = opts['sparse-read-density-threshold']
997 self._srdensitythreshold = opts['sparse-read-density-threshold']
998 if 'sparse-read-min-gap-size' in opts:
998 if 'sparse-read-min-gap-size' in opts:
999 self._srmingapsize = opts['sparse-read-min-gap-size']
999 self._srmingapsize = opts['sparse-read-min-gap-size']
1000
1000
1001 if self._chunkcachesize <= 0:
1001 if self._chunkcachesize <= 0:
1002 raise RevlogError(_('revlog chunk cache size %r is not greater '
1002 raise RevlogError(_('revlog chunk cache size %r is not greater '
1003 'than 0') % self._chunkcachesize)
1003 'than 0') % self._chunkcachesize)
1004 elif self._chunkcachesize & (self._chunkcachesize - 1):
1004 elif self._chunkcachesize & (self._chunkcachesize - 1):
1005 raise RevlogError(_('revlog chunk cache size %r is not a power '
1005 raise RevlogError(_('revlog chunk cache size %r is not a power '
1006 'of 2') % self._chunkcachesize)
1006 'of 2') % self._chunkcachesize)
1007
1007
1008 indexdata = ''
1008 indexdata = ''
1009 self._initempty = True
1009 self._initempty = True
1010 try:
1010 try:
1011 with self._indexfp() as f:
1011 with self._indexfp() as f:
1012 if (mmapindexthreshold is not None and
1012 if (mmapindexthreshold is not None and
1013 self.opener.fstat(f).st_size >= mmapindexthreshold):
1013 self.opener.fstat(f).st_size >= mmapindexthreshold):
1014 indexdata = util.buffer(util.mmapread(f))
1014 indexdata = util.buffer(util.mmapread(f))
1015 else:
1015 else:
1016 indexdata = f.read()
1016 indexdata = f.read()
1017 if len(indexdata) > 0:
1017 if len(indexdata) > 0:
1018 v = versionformat_unpack(indexdata[:4])[0]
1018 v = versionformat_unpack(indexdata[:4])[0]
1019 self._initempty = False
1019 self._initempty = False
1020 except IOError as inst:
1020 except IOError as inst:
1021 if inst.errno != errno.ENOENT:
1021 if inst.errno != errno.ENOENT:
1022 raise
1022 raise
1023
1023
1024 self.version = v
1024 self.version = v
1025 self._inline = v & FLAG_INLINE_DATA
1025 self._inline = v & FLAG_INLINE_DATA
1026 self._generaldelta = v & FLAG_GENERALDELTA
1026 self._generaldelta = v & FLAG_GENERALDELTA
1027 flags = v & ~0xFFFF
1027 flags = v & ~0xFFFF
1028 fmt = v & 0xFFFF
1028 fmt = v & 0xFFFF
1029 if fmt == REVLOGV0:
1029 if fmt == REVLOGV0:
1030 if flags:
1030 if flags:
1031 raise RevlogError(_('unknown flags (%#04x) in version %d '
1031 raise RevlogError(_('unknown flags (%#04x) in version %d '
1032 'revlog %s') %
1032 'revlog %s') %
1033 (flags >> 16, fmt, self.indexfile))
1033 (flags >> 16, fmt, self.indexfile))
1034 elif fmt == REVLOGV1:
1034 elif fmt == REVLOGV1:
1035 if flags & ~REVLOGV1_FLAGS:
1035 if flags & ~REVLOGV1_FLAGS:
1036 raise RevlogError(_('unknown flags (%#04x) in version %d '
1036 raise RevlogError(_('unknown flags (%#04x) in version %d '
1037 'revlog %s') %
1037 'revlog %s') %
1038 (flags >> 16, fmt, self.indexfile))
1038 (flags >> 16, fmt, self.indexfile))
1039 elif fmt == REVLOGV2:
1039 elif fmt == REVLOGV2:
1040 if flags & ~REVLOGV2_FLAGS:
1040 if flags & ~REVLOGV2_FLAGS:
1041 raise RevlogError(_('unknown flags (%#04x) in version %d '
1041 raise RevlogError(_('unknown flags (%#04x) in version %d '
1042 'revlog %s') %
1042 'revlog %s') %
1043 (flags >> 16, fmt, self.indexfile))
1043 (flags >> 16, fmt, self.indexfile))
1044 else:
1044 else:
1045 raise RevlogError(_('unknown version (%d) in revlog %s') %
1045 raise RevlogError(_('unknown version (%d) in revlog %s') %
1046 (fmt, self.indexfile))
1046 (fmt, self.indexfile))
1047
1047
1048 self.storedeltachains = True
1048 self.storedeltachains = True
1049
1049
1050 self._io = revlogio()
1050 self._io = revlogio()
1051 if self.version == REVLOGV0:
1051 if self.version == REVLOGV0:
1052 self._io = revlogoldio()
1052 self._io = revlogoldio()
1053 try:
1053 try:
1054 d = self._io.parseindex(indexdata, self._inline)
1054 d = self._io.parseindex(indexdata, self._inline)
1055 except (ValueError, IndexError):
1055 except (ValueError, IndexError):
1056 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1056 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1057 self.index, nodemap, self._chunkcache = d
1057 self.index, nodemap, self._chunkcache = d
1058 if nodemap is not None:
1058 if nodemap is not None:
1059 self.nodemap = self._nodecache = nodemap
1059 self.nodemap = self._nodecache = nodemap
1060 if not self._chunkcache:
1060 if not self._chunkcache:
1061 self._chunkclear()
1061 self._chunkclear()
1062 # revnum -> (chain-length, sum-delta-length)
1062 # revnum -> (chain-length, sum-delta-length)
1063 self._chaininfocache = {}
1063 self._chaininfocache = {}
1064 # revlog header -> revlog compressor
1064 # revlog header -> revlog compressor
1065 self._decompressors = {}
1065 self._decompressors = {}
1066
1066
1067 @util.propertycache
1067 @util.propertycache
1068 def _compressor(self):
1068 def _compressor(self):
1069 return util.compengines[self._compengine].revlogcompressor()
1069 return util.compengines[self._compengine].revlogcompressor()
1070
1070
1071 def _indexfp(self, mode='r'):
1071 def _indexfp(self, mode='r'):
1072 """file object for the revlog's index file"""
1072 """file object for the revlog's index file"""
1073 args = {r'mode': mode}
1073 args = {r'mode': mode}
1074 if mode != 'r':
1074 if mode != 'r':
1075 args[r'checkambig'] = self._checkambig
1075 args[r'checkambig'] = self._checkambig
1076 if mode == 'w':
1076 if mode == 'w':
1077 args[r'atomictemp'] = True
1077 args[r'atomictemp'] = True
1078 return self.opener(self.indexfile, **args)
1078 return self.opener(self.indexfile, **args)
1079
1079
1080 def _datafp(self, mode='r'):
1080 def _datafp(self, mode='r'):
1081 """file object for the revlog's data file"""
1081 """file object for the revlog's data file"""
1082 return self.opener(self.datafile, mode=mode)
1082 return self.opener(self.datafile, mode=mode)
1083
1083
1084 @contextlib.contextmanager
1084 @contextlib.contextmanager
1085 def _datareadfp(self, existingfp=None):
1085 def _datareadfp(self, existingfp=None):
1086 """file object suitable to read data"""
1086 """file object suitable to read data"""
1087 if existingfp is not None:
1087 if existingfp is not None:
1088 yield existingfp
1088 yield existingfp
1089 else:
1089 else:
1090 if self._inline:
1090 if self._inline:
1091 func = self._indexfp
1091 func = self._indexfp
1092 else:
1092 else:
1093 func = self._datafp
1093 func = self._datafp
1094 with func() as fp:
1094 with func() as fp:
1095 yield fp
1095 yield fp
1096
1096
1097 def tip(self):
1097 def tip(self):
1098 return self.node(len(self.index) - 1)
1098 return self.node(len(self.index) - 1)
1099 def __contains__(self, rev):
1099 def __contains__(self, rev):
1100 return 0 <= rev < len(self)
1100 return 0 <= rev < len(self)
1101 def __len__(self):
1101 def __len__(self):
1102 return len(self.index)
1102 return len(self.index)
1103 def __iter__(self):
1103 def __iter__(self):
1104 return iter(pycompat.xrange(len(self)))
1104 return iter(pycompat.xrange(len(self)))
1105 def revs(self, start=0, stop=None):
1105 def revs(self, start=0, stop=None):
1106 """iterate over all rev in this revlog (from start to stop)"""
1106 """iterate over all rev in this revlog (from start to stop)"""
1107 step = 1
1107 step = 1
1108 length = len(self)
1108 length = len(self)
1109 if stop is not None:
1109 if stop is not None:
1110 if start > stop:
1110 if start > stop:
1111 step = -1
1111 step = -1
1112 stop += step
1112 stop += step
1113 if stop > length:
1113 if stop > length:
1114 stop = length
1114 stop = length
1115 else:
1115 else:
1116 stop = length
1116 stop = length
1117 return pycompat.xrange(start, stop, step)
1117 return pycompat.xrange(start, stop, step)
1118
1118
1119 @util.propertycache
1119 @util.propertycache
1120 def nodemap(self):
1120 def nodemap(self):
1121 if self.index:
1121 if self.index:
1122 # populate mapping down to the initial node
1122 # populate mapping down to the initial node
1123 node0 = self.index[0][7] # get around changelog filtering
1123 node0 = self.index[0][7] # get around changelog filtering
1124 self.rev(node0)
1124 self.rev(node0)
1125 return self._nodecache
1125 return self._nodecache
1126
1126
1127 def hasnode(self, node):
1127 def hasnode(self, node):
1128 try:
1128 try:
1129 self.rev(node)
1129 self.rev(node)
1130 return True
1130 return True
1131 except KeyError:
1131 except KeyError:
1132 return False
1132 return False
1133
1133
1134 def candelta(self, baserev, rev):
1134 def candelta(self, baserev, rev):
1135 """whether two revisions (baserev, rev) can be delta-ed or not"""
1135 """whether two revisions (baserev, rev) can be delta-ed or not"""
1136 # Disable delta if either rev requires a content-changing flag
1136 # Disable delta if either rev requires a content-changing flag
1137 # processor (ex. LFS). This is because such flag processor can alter
1137 # processor (ex. LFS). This is because such flag processor can alter
1138 # the rawtext content that the delta will be based on, and two clients
1138 # the rawtext content that the delta will be based on, and two clients
1139 # could have a same revlog node with different flags (i.e. different
1139 # could have a same revlog node with different flags (i.e. different
1140 # rawtext contents) and the delta could be incompatible.
1140 # rawtext contents) and the delta could be incompatible.
1141 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1141 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1142 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1142 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1143 return False
1143 return False
1144 return True
1144 return True
1145
1145
1146 def clearcaches(self):
1146 def clearcaches(self):
1147 self._cache = None
1147 self._cache = None
1148 self._chainbasecache.clear()
1148 self._chainbasecache.clear()
1149 self._chunkcache = (0, '')
1149 self._chunkcache = (0, '')
1150 self._pcache = {}
1150 self._pcache = {}
1151
1151
1152 try:
1152 try:
1153 self._nodecache.clearcaches()
1153 self._nodecache.clearcaches()
1154 except AttributeError:
1154 except AttributeError:
1155 self._nodecache = {nullid: nullrev}
1155 self._nodecache = {nullid: nullrev}
1156 self._nodepos = None
1156 self._nodepos = None
1157
1157
1158 def rev(self, node):
1158 def rev(self, node):
1159 try:
1159 try:
1160 return self._nodecache[node]
1160 return self._nodecache[node]
1161 except TypeError:
1161 except TypeError:
1162 raise
1162 raise
1163 except RevlogError:
1163 except RevlogError:
1164 # parsers.c radix tree lookup failed
1164 # parsers.c radix tree lookup failed
1165 if node == wdirid or node in wdirfilenodeids:
1165 if node == wdirid or node in wdirfilenodeids:
1166 raise error.WdirUnsupported
1166 raise error.WdirUnsupported
1167 raise LookupError(node, self.indexfile, _('no node'))
1167 raise LookupError(node, self.indexfile, _('no node'))
1168 except KeyError:
1168 except KeyError:
1169 # pure python cache lookup failed
1169 # pure python cache lookup failed
1170 n = self._nodecache
1170 n = self._nodecache
1171 i = self.index
1171 i = self.index
1172 p = self._nodepos
1172 p = self._nodepos
1173 if p is None:
1173 if p is None:
1174 p = len(i) - 1
1174 p = len(i) - 1
1175 else:
1175 else:
1176 assert p < len(i)
1176 assert p < len(i)
1177 for r in pycompat.xrange(p, -1, -1):
1177 for r in pycompat.xrange(p, -1, -1):
1178 v = i[r][7]
1178 v = i[r][7]
1179 n[v] = r
1179 n[v] = r
1180 if v == node:
1180 if v == node:
1181 self._nodepos = r - 1
1181 self._nodepos = r - 1
1182 return r
1182 return r
1183 if node == wdirid or node in wdirfilenodeids:
1183 if node == wdirid or node in wdirfilenodeids:
1184 raise error.WdirUnsupported
1184 raise error.WdirUnsupported
1185 raise LookupError(node, self.indexfile, _('no node'))
1185 raise LookupError(node, self.indexfile, _('no node'))
1186
1186
1187 # Accessors for index entries.
1187 # Accessors for index entries.
1188
1188
1189 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1189 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1190 # are flags.
1190 # are flags.
1191 def start(self, rev):
1191 def start(self, rev):
1192 return int(self.index[rev][0] >> 16)
1192 return int(self.index[rev][0] >> 16)
1193
1193
1194 def flags(self, rev):
1194 def flags(self, rev):
1195 return self.index[rev][0] & 0xFFFF
1195 return self.index[rev][0] & 0xFFFF
1196
1196
1197 def length(self, rev):
1197 def length(self, rev):
1198 return self.index[rev][1]
1198 return self.index[rev][1]
1199
1199
1200 def rawsize(self, rev):
1200 def rawsize(self, rev):
1201 """return the length of the uncompressed text for a given revision"""
1201 """return the length of the uncompressed text for a given revision"""
1202 l = self.index[rev][2]
1202 l = self.index[rev][2]
1203 if l >= 0:
1203 if l >= 0:
1204 return l
1204 return l
1205
1205
1206 t = self.revision(rev, raw=True)
1206 t = self.revision(rev, raw=True)
1207 return len(t)
1207 return len(t)
1208
1208
1209 def size(self, rev):
1209 def size(self, rev):
1210 """length of non-raw text (processed by a "read" flag processor)"""
1210 """length of non-raw text (processed by a "read" flag processor)"""
1211 # fast path: if no "read" flag processor could change the content,
1211 # fast path: if no "read" flag processor could change the content,
1212 # size is rawsize. note: ELLIPSIS is known to not change the content.
1212 # size is rawsize. note: ELLIPSIS is known to not change the content.
1213 flags = self.flags(rev)
1213 flags = self.flags(rev)
1214 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1214 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1215 return self.rawsize(rev)
1215 return self.rawsize(rev)
1216
1216
1217 return len(self.revision(rev, raw=False))
1217 return len(self.revision(rev, raw=False))
1218
1218
1219 def chainbase(self, rev):
1219 def chainbase(self, rev):
1220 base = self._chainbasecache.get(rev)
1220 base = self._chainbasecache.get(rev)
1221 if base is not None:
1221 if base is not None:
1222 return base
1222 return base
1223
1223
1224 index = self.index
1224 index = self.index
1225 iterrev = rev
1225 iterrev = rev
1226 base = index[iterrev][3]
1226 base = index[iterrev][3]
1227 while base != iterrev:
1227 while base != iterrev:
1228 iterrev = base
1228 iterrev = base
1229 base = index[iterrev][3]
1229 base = index[iterrev][3]
1230
1230
1231 self._chainbasecache[rev] = base
1231 self._chainbasecache[rev] = base
1232 return base
1232 return base
1233
1233
1234 def linkrev(self, rev):
1234 def linkrev(self, rev):
1235 return self.index[rev][4]
1235 return self.index[rev][4]
1236
1236
1237 def parentrevs(self, rev):
1237 def parentrevs(self, rev):
1238 try:
1238 try:
1239 entry = self.index[rev]
1239 entry = self.index[rev]
1240 except IndexError:
1240 except IndexError:
1241 if rev == wdirrev:
1241 if rev == wdirrev:
1242 raise error.WdirUnsupported
1242 raise error.WdirUnsupported
1243 raise
1243 raise
1244
1244
1245 return entry[5], entry[6]
1245 return entry[5], entry[6]
1246
1246
1247 def node(self, rev):
1247 def node(self, rev):
1248 try:
1248 try:
1249 return self.index[rev][7]
1249 return self.index[rev][7]
1250 except IndexError:
1250 except IndexError:
1251 if rev == wdirrev:
1251 if rev == wdirrev:
1252 raise error.WdirUnsupported
1252 raise error.WdirUnsupported
1253 raise
1253 raise
1254
1254
1255 # Derived from index values.
1255 # Derived from index values.
1256
1256
1257 def end(self, rev):
1257 def end(self, rev):
1258 return self.start(rev) + self.length(rev)
1258 return self.start(rev) + self.length(rev)
1259
1259
1260 def parents(self, node):
1260 def parents(self, node):
1261 i = self.index
1261 i = self.index
1262 d = i[self.rev(node)]
1262 d = i[self.rev(node)]
1263 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1263 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1264
1264
1265 def chainlen(self, rev):
1265 def chainlen(self, rev):
1266 return self._chaininfo(rev)[0]
1266 return self._chaininfo(rev)[0]
1267
1267
1268 def _chaininfo(self, rev):
1268 def _chaininfo(self, rev):
1269 chaininfocache = self._chaininfocache
1269 chaininfocache = self._chaininfocache
1270 if rev in chaininfocache:
1270 if rev in chaininfocache:
1271 return chaininfocache[rev]
1271 return chaininfocache[rev]
1272 index = self.index
1272 index = self.index
1273 generaldelta = self._generaldelta
1273 generaldelta = self._generaldelta
1274 iterrev = rev
1274 iterrev = rev
1275 e = index[iterrev]
1275 e = index[iterrev]
1276 clen = 0
1276 clen = 0
1277 compresseddeltalen = 0
1277 compresseddeltalen = 0
1278 while iterrev != e[3]:
1278 while iterrev != e[3]:
1279 clen += 1
1279 clen += 1
1280 compresseddeltalen += e[1]
1280 compresseddeltalen += e[1]
1281 if generaldelta:
1281 if generaldelta:
1282 iterrev = e[3]
1282 iterrev = e[3]
1283 else:
1283 else:
1284 iterrev -= 1
1284 iterrev -= 1
1285 if iterrev in chaininfocache:
1285 if iterrev in chaininfocache:
1286 t = chaininfocache[iterrev]
1286 t = chaininfocache[iterrev]
1287 clen += t[0]
1287 clen += t[0]
1288 compresseddeltalen += t[1]
1288 compresseddeltalen += t[1]
1289 break
1289 break
1290 e = index[iterrev]
1290 e = index[iterrev]
1291 else:
1291 else:
1292 # Add text length of base since decompressing that also takes
1292 # Add text length of base since decompressing that also takes
1293 # work. For cache hits the length is already included.
1293 # work. For cache hits the length is already included.
1294 compresseddeltalen += e[1]
1294 compresseddeltalen += e[1]
1295 r = (clen, compresseddeltalen)
1295 r = (clen, compresseddeltalen)
1296 chaininfocache[rev] = r
1296 chaininfocache[rev] = r
1297 return r
1297 return r
1298
1298
1299 def _deltachain(self, rev, stoprev=None):
1299 def _deltachain(self, rev, stoprev=None):
1300 """Obtain the delta chain for a revision.
1300 """Obtain the delta chain for a revision.
1301
1301
1302 ``stoprev`` specifies a revision to stop at. If not specified, we
1302 ``stoprev`` specifies a revision to stop at. If not specified, we
1303 stop at the base of the chain.
1303 stop at the base of the chain.
1304
1304
1305 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1305 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1306 revs in ascending order and ``stopped`` is a bool indicating whether
1306 revs in ascending order and ``stopped`` is a bool indicating whether
1307 ``stoprev`` was hit.
1307 ``stoprev`` was hit.
1308 """
1308 """
1309 # Try C implementation.
1309 # Try C implementation.
1310 try:
1310 try:
1311 return self.index.deltachain(rev, stoprev, self._generaldelta)
1311 return self.index.deltachain(rev, stoprev, self._generaldelta)
1312 except AttributeError:
1312 except AttributeError:
1313 pass
1313 pass
1314
1314
1315 chain = []
1315 chain = []
1316
1316
1317 # Alias to prevent attribute lookup in tight loop.
1317 # Alias to prevent attribute lookup in tight loop.
1318 index = self.index
1318 index = self.index
1319 generaldelta = self._generaldelta
1319 generaldelta = self._generaldelta
1320
1320
1321 iterrev = rev
1321 iterrev = rev
1322 e = index[iterrev]
1322 e = index[iterrev]
1323 while iterrev != e[3] and iterrev != stoprev:
1323 while iterrev != e[3] and iterrev != stoprev:
1324 chain.append(iterrev)
1324 chain.append(iterrev)
1325 if generaldelta:
1325 if generaldelta:
1326 iterrev = e[3]
1326 iterrev = e[3]
1327 else:
1327 else:
1328 iterrev -= 1
1328 iterrev -= 1
1329 e = index[iterrev]
1329 e = index[iterrev]
1330
1330
1331 if iterrev == stoprev:
1331 if iterrev == stoprev:
1332 stopped = True
1332 stopped = True
1333 else:
1333 else:
1334 chain.append(iterrev)
1334 chain.append(iterrev)
1335 stopped = False
1335 stopped = False
1336
1336
1337 chain.reverse()
1337 chain.reverse()
1338 return chain, stopped
1338 return chain, stopped
1339
1339
1340 def ancestors(self, revs, stoprev=0, inclusive=False):
1340 def ancestors(self, revs, stoprev=0, inclusive=False):
1341 """Generate the ancestors of 'revs' in reverse topological order.
1341 """Generate the ancestors of 'revs' in reverse topological order.
1342 Does not generate revs lower than stoprev.
1342 Does not generate revs lower than stoprev.
1343
1343
1344 See the documentation for ancestor.lazyancestors for more details."""
1344 See the documentation for ancestor.lazyancestors for more details."""
1345
1345
1346 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1346 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1347 inclusive=inclusive)
1347 inclusive=inclusive)
1348
1348
1349 def descendants(self, revs):
1349 def descendants(self, revs):
1350 """Generate the descendants of 'revs' in revision order.
1350 """Generate the descendants of 'revs' in revision order.
1351
1351
1352 Yield a sequence of revision numbers starting with a child of
1352 Yield a sequence of revision numbers starting with a child of
1353 some rev in revs, i.e., each revision is *not* considered a
1353 some rev in revs, i.e., each revision is *not* considered a
1354 descendant of itself. Results are ordered by revision number (a
1354 descendant of itself. Results are ordered by revision number (a
1355 topological sort)."""
1355 topological sort)."""
1356 first = min(revs)
1356 first = min(revs)
1357 if first == nullrev:
1357 if first == nullrev:
1358 for i in self:
1358 for i in self:
1359 yield i
1359 yield i
1360 return
1360 return
1361
1361
1362 seen = set(revs)
1362 seen = set(revs)
1363 for i in self.revs(start=first + 1):
1363 for i in self.revs(start=first + 1):
1364 for x in self.parentrevs(i):
1364 for x in self.parentrevs(i):
1365 if x != nullrev and x in seen:
1365 if x != nullrev and x in seen:
1366 seen.add(i)
1366 seen.add(i)
1367 yield i
1367 yield i
1368 break
1368 break
1369
1369
1370 def findcommonmissing(self, common=None, heads=None):
1370 def findcommonmissing(self, common=None, heads=None):
1371 """Return a tuple of the ancestors of common and the ancestors of heads
1371 """Return a tuple of the ancestors of common and the ancestors of heads
1372 that are not ancestors of common. In revset terminology, we return the
1372 that are not ancestors of common. In revset terminology, we return the
1373 tuple:
1373 tuple:
1374
1374
1375 ::common, (::heads) - (::common)
1375 ::common, (::heads) - (::common)
1376
1376
1377 The list is sorted by revision number, meaning it is
1377 The list is sorted by revision number, meaning it is
1378 topologically sorted.
1378 topologically sorted.
1379
1379
1380 'heads' and 'common' are both lists of node IDs. If heads is
1380 'heads' and 'common' are both lists of node IDs. If heads is
1381 not supplied, uses all of the revlog's heads. If common is not
1381 not supplied, uses all of the revlog's heads. If common is not
1382 supplied, uses nullid."""
1382 supplied, uses nullid."""
1383 if common is None:
1383 if common is None:
1384 common = [nullid]
1384 common = [nullid]
1385 if heads is None:
1385 if heads is None:
1386 heads = self.heads()
1386 heads = self.heads()
1387
1387
1388 common = [self.rev(n) for n in common]
1388 common = [self.rev(n) for n in common]
1389 heads = [self.rev(n) for n in heads]
1389 heads = [self.rev(n) for n in heads]
1390
1390
1391 # we want the ancestors, but inclusive
1391 # we want the ancestors, but inclusive
1392 class lazyset(object):
1392 class lazyset(object):
1393 def __init__(self, lazyvalues):
1393 def __init__(self, lazyvalues):
1394 self.addedvalues = set()
1394 self.addedvalues = set()
1395 self.lazyvalues = lazyvalues
1395 self.lazyvalues = lazyvalues
1396
1396
1397 def __contains__(self, value):
1397 def __contains__(self, value):
1398 return value in self.addedvalues or value in self.lazyvalues
1398 return value in self.addedvalues or value in self.lazyvalues
1399
1399
1400 def __iter__(self):
1400 def __iter__(self):
1401 added = self.addedvalues
1401 added = self.addedvalues
1402 for r in added:
1402 for r in added:
1403 yield r
1403 yield r
1404 for r in self.lazyvalues:
1404 for r in self.lazyvalues:
1405 if not r in added:
1405 if not r in added:
1406 yield r
1406 yield r
1407
1407
1408 def add(self, value):
1408 def add(self, value):
1409 self.addedvalues.add(value)
1409 self.addedvalues.add(value)
1410
1410
1411 def update(self, values):
1411 def update(self, values):
1412 self.addedvalues.update(values)
1412 self.addedvalues.update(values)
1413
1413
1414 has = lazyset(self.ancestors(common))
1414 has = lazyset(self.ancestors(common))
1415 has.add(nullrev)
1415 has.add(nullrev)
1416 has.update(common)
1416 has.update(common)
1417
1417
1418 # take all ancestors from heads that aren't in has
1418 # take all ancestors from heads that aren't in has
1419 missing = set()
1419 missing = set()
1420 visit = collections.deque(r for r in heads if r not in has)
1420 visit = collections.deque(r for r in heads if r not in has)
1421 while visit:
1421 while visit:
1422 r = visit.popleft()
1422 r = visit.popleft()
1423 if r in missing:
1423 if r in missing:
1424 continue
1424 continue
1425 else:
1425 else:
1426 missing.add(r)
1426 missing.add(r)
1427 for p in self.parentrevs(r):
1427 for p in self.parentrevs(r):
1428 if p not in has:
1428 if p not in has:
1429 visit.append(p)
1429 visit.append(p)
1430 missing = list(missing)
1430 missing = list(missing)
1431 missing.sort()
1431 missing.sort()
1432 return has, [self.node(miss) for miss in missing]
1432 return has, [self.node(miss) for miss in missing]
1433
1433
1434 def incrementalmissingrevs(self, common=None):
1434 def incrementalmissingrevs(self, common=None):
1435 """Return an object that can be used to incrementally compute the
1435 """Return an object that can be used to incrementally compute the
1436 revision numbers of the ancestors of arbitrary sets that are not
1436 revision numbers of the ancestors of arbitrary sets that are not
1437 ancestors of common. This is an ancestor.incrementalmissingancestors
1437 ancestors of common. This is an ancestor.incrementalmissingancestors
1438 object.
1438 object.
1439
1439
1440 'common' is a list of revision numbers. If common is not supplied, uses
1440 'common' is a list of revision numbers. If common is not supplied, uses
1441 nullrev.
1441 nullrev.
1442 """
1442 """
1443 if common is None:
1443 if common is None:
1444 common = [nullrev]
1444 common = [nullrev]
1445
1445
1446 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1446 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1447
1447
1448 def findmissingrevs(self, common=None, heads=None):
1448 def findmissingrevs(self, common=None, heads=None):
1449 """Return the revision numbers of the ancestors of heads that
1449 """Return the revision numbers of the ancestors of heads that
1450 are not ancestors of common.
1450 are not ancestors of common.
1451
1451
1452 More specifically, return a list of revision numbers corresponding to
1452 More specifically, return a list of revision numbers corresponding to
1453 nodes N such that every N satisfies the following constraints:
1453 nodes N such that every N satisfies the following constraints:
1454
1454
1455 1. N is an ancestor of some node in 'heads'
1455 1. N is an ancestor of some node in 'heads'
1456 2. N is not an ancestor of any node in 'common'
1456 2. N is not an ancestor of any node in 'common'
1457
1457
1458 The list is sorted by revision number, meaning it is
1458 The list is sorted by revision number, meaning it is
1459 topologically sorted.
1459 topologically sorted.
1460
1460
1461 'heads' and 'common' are both lists of revision numbers. If heads is
1461 'heads' and 'common' are both lists of revision numbers. If heads is
1462 not supplied, uses all of the revlog's heads. If common is not
1462 not supplied, uses all of the revlog's heads. If common is not
1463 supplied, uses nullid."""
1463 supplied, uses nullid."""
1464 if common is None:
1464 if common is None:
1465 common = [nullrev]
1465 common = [nullrev]
1466 if heads is None:
1466 if heads is None:
1467 heads = self.headrevs()
1467 heads = self.headrevs()
1468
1468
1469 inc = self.incrementalmissingrevs(common=common)
1469 inc = self.incrementalmissingrevs(common=common)
1470 return inc.missingancestors(heads)
1470 return inc.missingancestors(heads)
1471
1471
1472 def findmissing(self, common=None, heads=None):
1472 def findmissing(self, common=None, heads=None):
1473 """Return the ancestors of heads that are not ancestors of common.
1473 """Return the ancestors of heads that are not ancestors of common.
1474
1474
1475 More specifically, return a list of nodes N such that every N
1475 More specifically, return a list of nodes N such that every N
1476 satisfies the following constraints:
1476 satisfies the following constraints:
1477
1477
1478 1. N is an ancestor of some node in 'heads'
1478 1. N is an ancestor of some node in 'heads'
1479 2. N is not an ancestor of any node in 'common'
1479 2. N is not an ancestor of any node in 'common'
1480
1480
1481 The list is sorted by revision number, meaning it is
1481 The list is sorted by revision number, meaning it is
1482 topologically sorted.
1482 topologically sorted.
1483
1483
1484 'heads' and 'common' are both lists of node IDs. If heads is
1484 'heads' and 'common' are both lists of node IDs. If heads is
1485 not supplied, uses all of the revlog's heads. If common is not
1485 not supplied, uses all of the revlog's heads. If common is not
1486 supplied, uses nullid."""
1486 supplied, uses nullid."""
1487 if common is None:
1487 if common is None:
1488 common = [nullid]
1488 common = [nullid]
1489 if heads is None:
1489 if heads is None:
1490 heads = self.heads()
1490 heads = self.heads()
1491
1491
1492 common = [self.rev(n) for n in common]
1492 common = [self.rev(n) for n in common]
1493 heads = [self.rev(n) for n in heads]
1493 heads = [self.rev(n) for n in heads]
1494
1494
1495 inc = self.incrementalmissingrevs(common=common)
1495 inc = self.incrementalmissingrevs(common=common)
1496 return [self.node(r) for r in inc.missingancestors(heads)]
1496 return [self.node(r) for r in inc.missingancestors(heads)]
1497
1497
1498 def nodesbetween(self, roots=None, heads=None):
1498 def nodesbetween(self, roots=None, heads=None):
1499 """Return a topological path from 'roots' to 'heads'.
1499 """Return a topological path from 'roots' to 'heads'.
1500
1500
1501 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1501 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1502 topologically sorted list of all nodes N that satisfy both of
1502 topologically sorted list of all nodes N that satisfy both of
1503 these constraints:
1503 these constraints:
1504
1504
1505 1. N is a descendant of some node in 'roots'
1505 1. N is a descendant of some node in 'roots'
1506 2. N is an ancestor of some node in 'heads'
1506 2. N is an ancestor of some node in 'heads'
1507
1507
1508 Every node is considered to be both a descendant and an ancestor
1508 Every node is considered to be both a descendant and an ancestor
1509 of itself, so every reachable node in 'roots' and 'heads' will be
1509 of itself, so every reachable node in 'roots' and 'heads' will be
1510 included in 'nodes'.
1510 included in 'nodes'.
1511
1511
1512 'outroots' is the list of reachable nodes in 'roots', i.e., the
1512 'outroots' is the list of reachable nodes in 'roots', i.e., the
1513 subset of 'roots' that is returned in 'nodes'. Likewise,
1513 subset of 'roots' that is returned in 'nodes'. Likewise,
1514 'outheads' is the subset of 'heads' that is also in 'nodes'.
1514 'outheads' is the subset of 'heads' that is also in 'nodes'.
1515
1515
1516 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1516 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1517 unspecified, uses nullid as the only root. If 'heads' is
1517 unspecified, uses nullid as the only root. If 'heads' is
1518 unspecified, uses list of all of the revlog's heads."""
1518 unspecified, uses list of all of the revlog's heads."""
1519 nonodes = ([], [], [])
1519 nonodes = ([], [], [])
1520 if roots is not None:
1520 if roots is not None:
1521 roots = list(roots)
1521 roots = list(roots)
1522 if not roots:
1522 if not roots:
1523 return nonodes
1523 return nonodes
1524 lowestrev = min([self.rev(n) for n in roots])
1524 lowestrev = min([self.rev(n) for n in roots])
1525 else:
1525 else:
1526 roots = [nullid] # Everybody's a descendant of nullid
1526 roots = [nullid] # Everybody's a descendant of nullid
1527 lowestrev = nullrev
1527 lowestrev = nullrev
1528 if (lowestrev == nullrev) and (heads is None):
1528 if (lowestrev == nullrev) and (heads is None):
1529 # We want _all_ the nodes!
1529 # We want _all_ the nodes!
1530 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1530 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1531 if heads is None:
1531 if heads is None:
1532 # All nodes are ancestors, so the latest ancestor is the last
1532 # All nodes are ancestors, so the latest ancestor is the last
1533 # node.
1533 # node.
1534 highestrev = len(self) - 1
1534 highestrev = len(self) - 1
1535 # Set ancestors to None to signal that every node is an ancestor.
1535 # Set ancestors to None to signal that every node is an ancestor.
1536 ancestors = None
1536 ancestors = None
1537 # Set heads to an empty dictionary for later discovery of heads
1537 # Set heads to an empty dictionary for later discovery of heads
1538 heads = {}
1538 heads = {}
1539 else:
1539 else:
1540 heads = list(heads)
1540 heads = list(heads)
1541 if not heads:
1541 if not heads:
1542 return nonodes
1542 return nonodes
1543 ancestors = set()
1543 ancestors = set()
1544 # Turn heads into a dictionary so we can remove 'fake' heads.
1544 # Turn heads into a dictionary so we can remove 'fake' heads.
1545 # Also, later we will be using it to filter out the heads we can't
1545 # Also, later we will be using it to filter out the heads we can't
1546 # find from roots.
1546 # find from roots.
1547 heads = dict.fromkeys(heads, False)
1547 heads = dict.fromkeys(heads, False)
1548 # Start at the top and keep marking parents until we're done.
1548 # Start at the top and keep marking parents until we're done.
1549 nodestotag = set(heads)
1549 nodestotag = set(heads)
1550 # Remember where the top was so we can use it as a limit later.
1550 # Remember where the top was so we can use it as a limit later.
1551 highestrev = max([self.rev(n) for n in nodestotag])
1551 highestrev = max([self.rev(n) for n in nodestotag])
1552 while nodestotag:
1552 while nodestotag:
1553 # grab a node to tag
1553 # grab a node to tag
1554 n = nodestotag.pop()
1554 n = nodestotag.pop()
1555 # Never tag nullid
1555 # Never tag nullid
1556 if n == nullid:
1556 if n == nullid:
1557 continue
1557 continue
1558 # A node's revision number represents its place in a
1558 # A node's revision number represents its place in a
1559 # topologically sorted list of nodes.
1559 # topologically sorted list of nodes.
1560 r = self.rev(n)
1560 r = self.rev(n)
1561 if r >= lowestrev:
1561 if r >= lowestrev:
1562 if n not in ancestors:
1562 if n not in ancestors:
1563 # If we are possibly a descendant of one of the roots
1563 # If we are possibly a descendant of one of the roots
1564 # and we haven't already been marked as an ancestor
1564 # and we haven't already been marked as an ancestor
1565 ancestors.add(n) # Mark as ancestor
1565 ancestors.add(n) # Mark as ancestor
1566 # Add non-nullid parents to list of nodes to tag.
1566 # Add non-nullid parents to list of nodes to tag.
1567 nodestotag.update([p for p in self.parents(n) if
1567 nodestotag.update([p for p in self.parents(n) if
1568 p != nullid])
1568 p != nullid])
1569 elif n in heads: # We've seen it before, is it a fake head?
1569 elif n in heads: # We've seen it before, is it a fake head?
1570 # So it is, real heads should not be the ancestors of
1570 # So it is, real heads should not be the ancestors of
1571 # any other heads.
1571 # any other heads.
1572 heads.pop(n)
1572 heads.pop(n)
1573 if not ancestors:
1573 if not ancestors:
1574 return nonodes
1574 return nonodes
1575 # Now that we have our set of ancestors, we want to remove any
1575 # Now that we have our set of ancestors, we want to remove any
1576 # roots that are not ancestors.
1576 # roots that are not ancestors.
1577
1577
1578 # If one of the roots was nullid, everything is included anyway.
1578 # If one of the roots was nullid, everything is included anyway.
1579 if lowestrev > nullrev:
1579 if lowestrev > nullrev:
1580 # But, since we weren't, let's recompute the lowest rev to not
1580 # But, since we weren't, let's recompute the lowest rev to not
1581 # include roots that aren't ancestors.
1581 # include roots that aren't ancestors.
1582
1582
1583 # Filter out roots that aren't ancestors of heads
1583 # Filter out roots that aren't ancestors of heads
1584 roots = [root for root in roots if root in ancestors]
1584 roots = [root for root in roots if root in ancestors]
1585 # Recompute the lowest revision
1585 # Recompute the lowest revision
1586 if roots:
1586 if roots:
1587 lowestrev = min([self.rev(root) for root in roots])
1587 lowestrev = min([self.rev(root) for root in roots])
1588 else:
1588 else:
1589 # No more roots? Return empty list
1589 # No more roots? Return empty list
1590 return nonodes
1590 return nonodes
1591 else:
1591 else:
1592 # We are descending from nullid, and don't need to care about
1592 # We are descending from nullid, and don't need to care about
1593 # any other roots.
1593 # any other roots.
1594 lowestrev = nullrev
1594 lowestrev = nullrev
1595 roots = [nullid]
1595 roots = [nullid]
1596 # Transform our roots list into a set.
1596 # Transform our roots list into a set.
1597 descendants = set(roots)
1597 descendants = set(roots)
1598 # Also, keep the original roots so we can filter out roots that aren't
1598 # Also, keep the original roots so we can filter out roots that aren't
1599 # 'real' roots (i.e. are descended from other roots).
1599 # 'real' roots (i.e. are descended from other roots).
1600 roots = descendants.copy()
1600 roots = descendants.copy()
1601 # Our topologically sorted list of output nodes.
1601 # Our topologically sorted list of output nodes.
1602 orderedout = []
1602 orderedout = []
1603 # Don't start at nullid since we don't want nullid in our output list,
1603 # Don't start at nullid since we don't want nullid in our output list,
1604 # and if nullid shows up in descendants, empty parents will look like
1604 # and if nullid shows up in descendants, empty parents will look like
1605 # they're descendants.
1605 # they're descendants.
1606 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1606 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1607 n = self.node(r)
1607 n = self.node(r)
1608 isdescendant = False
1608 isdescendant = False
1609 if lowestrev == nullrev: # Everybody is a descendant of nullid
1609 if lowestrev == nullrev: # Everybody is a descendant of nullid
1610 isdescendant = True
1610 isdescendant = True
1611 elif n in descendants:
1611 elif n in descendants:
1612 # n is already a descendant
1612 # n is already a descendant
1613 isdescendant = True
1613 isdescendant = True
1614 # This check only needs to be done here because all the roots
1614 # This check only needs to be done here because all the roots
1615 # will start being marked is descendants before the loop.
1615 # will start being marked is descendants before the loop.
1616 if n in roots:
1616 if n in roots:
1617 # If n was a root, check if it's a 'real' root.
1617 # If n was a root, check if it's a 'real' root.
1618 p = tuple(self.parents(n))
1618 p = tuple(self.parents(n))
1619 # If any of its parents are descendants, it's not a root.
1619 # If any of its parents are descendants, it's not a root.
1620 if (p[0] in descendants) or (p[1] in descendants):
1620 if (p[0] in descendants) or (p[1] in descendants):
1621 roots.remove(n)
1621 roots.remove(n)
1622 else:
1622 else:
1623 p = tuple(self.parents(n))
1623 p = tuple(self.parents(n))
1624 # A node is a descendant if either of its parents are
1624 # A node is a descendant if either of its parents are
1625 # descendants. (We seeded the dependents list with the roots
1625 # descendants. (We seeded the dependents list with the roots
1626 # up there, remember?)
1626 # up there, remember?)
1627 if (p[0] in descendants) or (p[1] in descendants):
1627 if (p[0] in descendants) or (p[1] in descendants):
1628 descendants.add(n)
1628 descendants.add(n)
1629 isdescendant = True
1629 isdescendant = True
1630 if isdescendant and ((ancestors is None) or (n in ancestors)):
1630 if isdescendant and ((ancestors is None) or (n in ancestors)):
1631 # Only include nodes that are both descendants and ancestors.
1631 # Only include nodes that are both descendants and ancestors.
1632 orderedout.append(n)
1632 orderedout.append(n)
1633 if (ancestors is not None) and (n in heads):
1633 if (ancestors is not None) and (n in heads):
1634 # We're trying to figure out which heads are reachable
1634 # We're trying to figure out which heads are reachable
1635 # from roots.
1635 # from roots.
1636 # Mark this head as having been reached
1636 # Mark this head as having been reached
1637 heads[n] = True
1637 heads[n] = True
1638 elif ancestors is None:
1638 elif ancestors is None:
1639 # Otherwise, we're trying to discover the heads.
1639 # Otherwise, we're trying to discover the heads.
1640 # Assume this is a head because if it isn't, the next step
1640 # Assume this is a head because if it isn't, the next step
1641 # will eventually remove it.
1641 # will eventually remove it.
1642 heads[n] = True
1642 heads[n] = True
1643 # But, obviously its parents aren't.
1643 # But, obviously its parents aren't.
1644 for p in self.parents(n):
1644 for p in self.parents(n):
1645 heads.pop(p, None)
1645 heads.pop(p, None)
1646 heads = [head for head, flag in heads.iteritems() if flag]
1646 heads = [head for head, flag in heads.iteritems() if flag]
1647 roots = list(roots)
1647 roots = list(roots)
1648 assert orderedout
1648 assert orderedout
1649 assert roots
1649 assert roots
1650 assert heads
1650 assert heads
1651 return (orderedout, roots, heads)
1651 return (orderedout, roots, heads)
1652
1652
1653 def headrevs(self):
1653 def headrevs(self):
1654 try:
1654 try:
1655 return self.index.headrevs()
1655 return self.index.headrevs()
1656 except AttributeError:
1656 except AttributeError:
1657 return self._headrevs()
1657 return self._headrevs()
1658
1658
1659 def computephases(self, roots):
1659 def computephases(self, roots):
1660 return self.index.computephasesmapsets(roots)
1660 return self.index.computephasesmapsets(roots)
1661
1661
1662 def _headrevs(self):
1662 def _headrevs(self):
1663 count = len(self)
1663 count = len(self)
1664 if not count:
1664 if not count:
1665 return [nullrev]
1665 return [nullrev]
1666 # we won't iter over filtered rev so nobody is a head at start
1666 # we won't iter over filtered rev so nobody is a head at start
1667 ishead = [0] * (count + 1)
1667 ishead = [0] * (count + 1)
1668 index = self.index
1668 index = self.index
1669 for r in self:
1669 for r in self:
1670 ishead[r] = 1 # I may be an head
1670 ishead[r] = 1 # I may be an head
1671 e = index[r]
1671 e = index[r]
1672 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1672 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1673 return [r for r, val in enumerate(ishead) if val]
1673 return [r for r, val in enumerate(ishead) if val]
1674
1674
1675 def heads(self, start=None, stop=None):
1675 def heads(self, start=None, stop=None):
1676 """return the list of all nodes that have no children
1676 """return the list of all nodes that have no children
1677
1677
1678 if start is specified, only heads that are descendants of
1678 if start is specified, only heads that are descendants of
1679 start will be returned
1679 start will be returned
1680 if stop is specified, it will consider all the revs from stop
1680 if stop is specified, it will consider all the revs from stop
1681 as if they had no children
1681 as if they had no children
1682 """
1682 """
1683 if start is None and stop is None:
1683 if start is None and stop is None:
1684 if not len(self):
1684 if not len(self):
1685 return [nullid]
1685 return [nullid]
1686 return [self.node(r) for r in self.headrevs()]
1686 return [self.node(r) for r in self.headrevs()]
1687
1687
1688 if start is None:
1688 if start is None:
1689 start = nullid
1689 start = nullid
1690 if stop is None:
1690 if stop is None:
1691 stop = []
1691 stop = []
1692 stoprevs = set([self.rev(n) for n in stop])
1692 stoprevs = set([self.rev(n) for n in stop])
1693 startrev = self.rev(start)
1693 startrev = self.rev(start)
1694 reachable = {startrev}
1694 reachable = {startrev}
1695 heads = {startrev}
1695 heads = {startrev}
1696
1696
1697 parentrevs = self.parentrevs
1697 parentrevs = self.parentrevs
1698 for r in self.revs(start=startrev + 1):
1698 for r in self.revs(start=startrev + 1):
1699 for p in parentrevs(r):
1699 for p in parentrevs(r):
1700 if p in reachable:
1700 if p in reachable:
1701 if r not in stoprevs:
1701 if r not in stoprevs:
1702 reachable.add(r)
1702 reachable.add(r)
1703 heads.add(r)
1703 heads.add(r)
1704 if p in heads and p not in stoprevs:
1704 if p in heads and p not in stoprevs:
1705 heads.remove(p)
1705 heads.remove(p)
1706
1706
1707 return [self.node(r) for r in heads]
1707 return [self.node(r) for r in heads]
1708
1708
1709 def children(self, node):
1709 def children(self, node):
1710 """find the children of a given node"""
1710 """find the children of a given node"""
1711 c = []
1711 c = []
1712 p = self.rev(node)
1712 p = self.rev(node)
1713 for r in self.revs(start=p + 1):
1713 for r in self.revs(start=p + 1):
1714 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1714 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1715 if prevs:
1715 if prevs:
1716 for pr in prevs:
1716 for pr in prevs:
1717 if pr == p:
1717 if pr == p:
1718 c.append(self.node(r))
1718 c.append(self.node(r))
1719 elif p == nullrev:
1719 elif p == nullrev:
1720 c.append(self.node(r))
1720 c.append(self.node(r))
1721 return c
1721 return c
1722
1722
1723 def commonancestorsheads(self, a, b):
1723 def commonancestorsheads(self, a, b):
1724 """calculate all the heads of the common ancestors of nodes a and b"""
1724 """calculate all the heads of the common ancestors of nodes a and b"""
1725 a, b = self.rev(a), self.rev(b)
1725 a, b = self.rev(a), self.rev(b)
1726 ancs = self._commonancestorsheads(a, b)
1726 ancs = self._commonancestorsheads(a, b)
1727 return pycompat.maplist(self.node, ancs)
1727 return pycompat.maplist(self.node, ancs)
1728
1728
1729 def _commonancestorsheads(self, *revs):
1729 def _commonancestorsheads(self, *revs):
1730 """calculate all the heads of the common ancestors of revs"""
1730 """calculate all the heads of the common ancestors of revs"""
1731 try:
1731 try:
1732 ancs = self.index.commonancestorsheads(*revs)
1732 ancs = self.index.commonancestorsheads(*revs)
1733 except (AttributeError, OverflowError): # C implementation failed
1733 except (AttributeError, OverflowError): # C implementation failed
1734 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1734 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1735 return ancs
1735 return ancs
1736
1736
1737 def isancestor(self, a, b):
1737 def isancestor(self, a, b):
1738 """return True if node a is an ancestor of node b
1738 """return True if node a is an ancestor of node b
1739
1739
1740 A revision is considered an ancestor of itself."""
1740 A revision is considered an ancestor of itself."""
1741 a, b = self.rev(a), self.rev(b)
1741 a, b = self.rev(a), self.rev(b)
1742 return self.isancestorrev(a, b)
1742 return self.isancestorrev(a, b)
1743
1743
1744 def isancestorrev(self, a, b):
1744 def isancestorrev(self, a, b):
1745 """return True if revision a is an ancestor of revision b
1745 """return True if revision a is an ancestor of revision b
1746
1746
1747 A revision is considered an ancestor of itself.
1747 A revision is considered an ancestor of itself.
1748
1748
1749 The implementation of this is trivial but the use of
1749 The implementation of this is trivial but the use of
1750 commonancestorsheads is not."""
1750 commonancestorsheads is not."""
1751 if a == nullrev:
1751 if a == nullrev:
1752 return True
1752 return True
1753 elif a == b:
1753 elif a == b:
1754 return True
1754 return True
1755 elif a > b:
1755 elif a > b:
1756 return False
1756 return False
1757 return a in self._commonancestorsheads(a, b)
1757 return a in self._commonancestorsheads(a, b)
1758
1758
1759 def ancestor(self, a, b):
1759 def ancestor(self, a, b):
1760 """calculate the "best" common ancestor of nodes a and b"""
1760 """calculate the "best" common ancestor of nodes a and b"""
1761
1761
1762 a, b = self.rev(a), self.rev(b)
1762 a, b = self.rev(a), self.rev(b)
1763 try:
1763 try:
1764 ancs = self.index.ancestors(a, b)
1764 ancs = self.index.ancestors(a, b)
1765 except (AttributeError, OverflowError):
1765 except (AttributeError, OverflowError):
1766 ancs = ancestor.ancestors(self.parentrevs, a, b)
1766 ancs = ancestor.ancestors(self.parentrevs, a, b)
1767 if ancs:
1767 if ancs:
1768 # choose a consistent winner when there's a tie
1768 # choose a consistent winner when there's a tie
1769 return min(map(self.node, ancs))
1769 return min(map(self.node, ancs))
1770 return nullid
1770 return nullid
1771
1771
1772 def _match(self, id):
1772 def _match(self, id):
1773 if isinstance(id, int):
1773 if isinstance(id, int):
1774 # rev
1774 # rev
1775 return self.node(id)
1775 return self.node(id)
1776 if len(id) == 20:
1776 if len(id) == 20:
1777 # possibly a binary node
1777 # possibly a binary node
1778 # odds of a binary node being all hex in ASCII are 1 in 10**25
1778 # odds of a binary node being all hex in ASCII are 1 in 10**25
1779 try:
1779 try:
1780 node = id
1780 node = id
1781 self.rev(node) # quick search the index
1781 self.rev(node) # quick search the index
1782 return node
1782 return node
1783 except LookupError:
1783 except LookupError:
1784 pass # may be partial hex id
1784 pass # may be partial hex id
1785 try:
1785 try:
1786 # str(rev)
1786 # str(rev)
1787 rev = int(id)
1787 rev = int(id)
1788 if "%d" % rev != id:
1788 if "%d" % rev != id:
1789 raise ValueError
1789 raise ValueError
1790 if rev < 0:
1790 if rev < 0:
1791 rev = len(self) + rev
1791 rev = len(self) + rev
1792 if rev < 0 or rev >= len(self):
1792 if rev < 0 or rev >= len(self):
1793 raise ValueError
1793 raise ValueError
1794 return self.node(rev)
1794 return self.node(rev)
1795 except (ValueError, OverflowError):
1795 except (ValueError, OverflowError):
1796 pass
1796 pass
1797 if len(id) == 40:
1797 if len(id) == 40:
1798 try:
1798 try:
1799 # a full hex nodeid?
1799 # a full hex nodeid?
1800 node = bin(id)
1800 node = bin(id)
1801 self.rev(node)
1801 self.rev(node)
1802 return node
1802 return node
1803 except (TypeError, LookupError):
1803 except (TypeError, LookupError):
1804 pass
1804 pass
1805
1805
1806 def _partialmatch(self, id):
1806 def _partialmatch(self, id):
1807 # we don't care wdirfilenodeids as they should be always full hash
1807 # we don't care wdirfilenodeids as they should be always full hash
1808 maybewdir = wdirhex.startswith(id)
1808 maybewdir = wdirhex.startswith(id)
1809 try:
1809 try:
1810 partial = self.index.partialmatch(id)
1810 partial = self.index.partialmatch(id)
1811 if partial and self.hasnode(partial):
1811 if partial and self.hasnode(partial):
1812 if maybewdir:
1812 if maybewdir:
1813 # single 'ff...' match in radix tree, ambiguous with wdir
1813 # single 'ff...' match in radix tree, ambiguous with wdir
1814 raise RevlogError
1814 raise RevlogError
1815 return partial
1815 return partial
1816 if maybewdir:
1816 if maybewdir:
1817 # no 'ff...' match in radix tree, wdir identified
1817 # no 'ff...' match in radix tree, wdir identified
1818 raise error.WdirUnsupported
1818 raise error.WdirUnsupported
1819 return None
1819 return None
1820 except RevlogError:
1820 except RevlogError:
1821 # parsers.c radix tree lookup gave multiple matches
1821 # parsers.c radix tree lookup gave multiple matches
1822 # fast path: for unfiltered changelog, radix tree is accurate
1822 # fast path: for unfiltered changelog, radix tree is accurate
1823 if not getattr(self, 'filteredrevs', None):
1823 if not getattr(self, 'filteredrevs', None):
1824 raise AmbiguousPrefixLookupError(id, self.indexfile,
1824 raise AmbiguousPrefixLookupError(id, self.indexfile,
1825 _('ambiguous identifier'))
1825 _('ambiguous identifier'))
1826 # fall through to slow path that filters hidden revisions
1826 # fall through to slow path that filters hidden revisions
1827 except (AttributeError, ValueError):
1827 except (AttributeError, ValueError):
1828 # we are pure python, or key was too short to search radix tree
1828 # we are pure python, or key was too short to search radix tree
1829 pass
1829 pass
1830
1830
1831 if id in self._pcache:
1831 if id in self._pcache:
1832 return self._pcache[id]
1832 return self._pcache[id]
1833
1833
1834 if len(id) <= 40:
1834 if len(id) <= 40:
1835 try:
1835 try:
1836 # hex(node)[:...]
1836 # hex(node)[:...]
1837 l = len(id) // 2 # grab an even number of digits
1837 l = len(id) // 2 # grab an even number of digits
1838 prefix = bin(id[:l * 2])
1838 prefix = bin(id[:l * 2])
1839 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1839 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1840 nl = [n for n in nl if hex(n).startswith(id) and
1840 nl = [n for n in nl if hex(n).startswith(id) and
1841 self.hasnode(n)]
1841 self.hasnode(n)]
1842 if len(nl) > 0:
1842 if len(nl) > 0:
1843 if len(nl) == 1 and not maybewdir:
1843 if len(nl) == 1 and not maybewdir:
1844 self._pcache[id] = nl[0]
1844 self._pcache[id] = nl[0]
1845 return nl[0]
1845 return nl[0]
1846 raise AmbiguousPrefixLookupError(id, self.indexfile,
1846 raise AmbiguousPrefixLookupError(id, self.indexfile,
1847 _('ambiguous identifier'))
1847 _('ambiguous identifier'))
1848 if maybewdir:
1848 if maybewdir:
1849 raise error.WdirUnsupported
1849 raise error.WdirUnsupported
1850 return None
1850 return None
1851 except TypeError:
1851 except TypeError:
1852 pass
1852 pass
1853
1853
1854 def lookup(self, id):
1854 def lookup(self, id):
1855 """locate a node based on:
1855 """locate a node based on:
1856 - revision number or str(revision number)
1856 - revision number or str(revision number)
1857 - nodeid or subset of hex nodeid
1857 - nodeid or subset of hex nodeid
1858 """
1858 """
1859 n = self._match(id)
1859 n = self._match(id)
1860 if n is not None:
1860 if n is not None:
1861 return n
1861 return n
1862 n = self._partialmatch(id)
1862 n = self._partialmatch(id)
1863 if n:
1863 if n:
1864 return n
1864 return n
1865
1865
1866 raise LookupError(id, self.indexfile, _('no match found'))
1866 raise LookupError(id, self.indexfile, _('no match found'))
1867
1867
1868 def shortest(self, node, minlength=1):
1868 def shortest(self, node, minlength=1):
1869 """Find the shortest unambiguous prefix that matches node."""
1869 """Find the shortest unambiguous prefix that matches node."""
1870 def isvalid(prefix):
1870 def isvalid(prefix):
1871 try:
1871 try:
1872 node = self._partialmatch(prefix)
1872 node = self._partialmatch(prefix)
1873 except error.RevlogError:
1873 except error.RevlogError:
1874 return False
1874 return False
1875 except error.WdirUnsupported:
1875 except error.WdirUnsupported:
1876 # single 'ff...' match
1876 # single 'ff...' match
1877 return True
1877 return True
1878 if node is None:
1878 if node is None:
1879 raise LookupError(node, self.indexfile, _('no node'))
1879 raise LookupError(node, self.indexfile, _('no node'))
1880 return True
1880 return True
1881
1881
1882 def maybewdir(prefix):
1882 def maybewdir(prefix):
1883 return all(c == 'f' for c in prefix)
1883 return all(c == 'f' for c in prefix)
1884
1884
1885 hexnode = hex(node)
1885 hexnode = hex(node)
1886
1886
1887 def disambiguate(hexnode, minlength):
1887 def disambiguate(hexnode, minlength):
1888 """Disambiguate against wdirid."""
1888 """Disambiguate against wdirid."""
1889 for length in range(minlength, 41):
1889 for length in range(minlength, 41):
1890 prefix = hexnode[:length]
1890 prefix = hexnode[:length]
1891 if not maybewdir(prefix):
1891 if not maybewdir(prefix):
1892 return prefix
1892 return prefix
1893
1893
1894 if not getattr(self, 'filteredrevs', None):
1894 if not getattr(self, 'filteredrevs', None):
1895 try:
1895 try:
1896 length = max(self.index.shortest(node), minlength)
1896 length = max(self.index.shortest(node), minlength)
1897 return disambiguate(hexnode, length)
1897 return disambiguate(hexnode, length)
1898 except RevlogError:
1898 except RevlogError:
1899 if node != wdirid:
1899 if node != wdirid:
1900 raise LookupError(node, self.indexfile, _('no node'))
1900 raise LookupError(node, self.indexfile, _('no node'))
1901 except AttributeError:
1901 except AttributeError:
1902 # Fall through to pure code
1902 # Fall through to pure code
1903 pass
1903 pass
1904
1904
1905 if node == wdirid:
1905 if node == wdirid:
1906 for length in range(minlength, 41):
1906 for length in range(minlength, 41):
1907 prefix = hexnode[:length]
1907 prefix = hexnode[:length]
1908 if isvalid(prefix):
1908 if isvalid(prefix):
1909 return prefix
1909 return prefix
1910
1910
1911 for length in range(minlength, 41):
1911 for length in range(minlength, 41):
1912 prefix = hexnode[:length]
1912 prefix = hexnode[:length]
1913 if isvalid(prefix):
1913 if isvalid(prefix):
1914 return disambiguate(hexnode, length)
1914 return disambiguate(hexnode, length)
1915
1915
1916 def cmp(self, node, text):
1916 def cmp(self, node, text):
1917 """compare text with a given file revision
1917 """compare text with a given file revision
1918
1918
1919 returns True if text is different than what is stored.
1919 returns True if text is different than what is stored.
1920 """
1920 """
1921 p1, p2 = self.parents(node)
1921 p1, p2 = self.parents(node)
1922 return hash(text, p1, p2) != node
1922 return hash(text, p1, p2) != node
1923
1923
1924 def _cachesegment(self, offset, data):
1924 def _cachesegment(self, offset, data):
1925 """Add a segment to the revlog cache.
1925 """Add a segment to the revlog cache.
1926
1926
1927 Accepts an absolute offset and the data that is at that location.
1927 Accepts an absolute offset and the data that is at that location.
1928 """
1928 """
1929 o, d = self._chunkcache
1929 o, d = self._chunkcache
1930 # try to add to existing cache
1930 # try to add to existing cache
1931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1932 self._chunkcache = o, d + data
1932 self._chunkcache = o, d + data
1933 else:
1933 else:
1934 self._chunkcache = offset, data
1934 self._chunkcache = offset, data
1935
1935
1936 def _readsegment(self, offset, length, df=None):
1936 def _readsegment(self, offset, length, df=None):
1937 """Load a segment of raw data from the revlog.
1937 """Load a segment of raw data from the revlog.
1938
1938
1939 Accepts an absolute offset, length to read, and an optional existing
1939 Accepts an absolute offset, length to read, and an optional existing
1940 file handle to read from.
1940 file handle to read from.
1941
1941
1942 If an existing file handle is passed, it will be seeked and the
1942 If an existing file handle is passed, it will be seeked and the
1943 original seek position will NOT be restored.
1943 original seek position will NOT be restored.
1944
1944
1945 Returns a str or buffer of raw byte data.
1945 Returns a str or buffer of raw byte data.
1946 """
1946 """
1947 # Cache data both forward and backward around the requested
1947 # Cache data both forward and backward around the requested
1948 # data, in a fixed size window. This helps speed up operations
1948 # data, in a fixed size window. This helps speed up operations
1949 # involving reading the revlog backwards.
1949 # involving reading the revlog backwards.
1950 cachesize = self._chunkcachesize
1950 cachesize = self._chunkcachesize
1951 realoffset = offset & ~(cachesize - 1)
1951 realoffset = offset & ~(cachesize - 1)
1952 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1952 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1953 - realoffset)
1953 - realoffset)
1954 with self._datareadfp(df) as df:
1954 with self._datareadfp(df) as df:
1955 df.seek(realoffset)
1955 df.seek(realoffset)
1956 d = df.read(reallength)
1956 d = df.read(reallength)
1957 self._cachesegment(realoffset, d)
1957 self._cachesegment(realoffset, d)
1958 if offset != realoffset or reallength != length:
1958 if offset != realoffset or reallength != length:
1959 return util.buffer(d, offset - realoffset, length)
1959 return util.buffer(d, offset - realoffset, length)
1960 return d
1960 return d
1961
1961
1962 def _getsegment(self, offset, length, df=None):
1962 def _getsegment(self, offset, length, df=None):
1963 """Obtain a segment of raw data from the revlog.
1963 """Obtain a segment of raw data from the revlog.
1964
1964
1965 Accepts an absolute offset, length of bytes to obtain, and an
1965 Accepts an absolute offset, length of bytes to obtain, and an
1966 optional file handle to the already-opened revlog. If the file
1966 optional file handle to the already-opened revlog. If the file
1967 handle is used, it's original seek position will not be preserved.
1967 handle is used, it's original seek position will not be preserved.
1968
1968
1969 Requests for data may be returned from a cache.
1969 Requests for data may be returned from a cache.
1970
1970
1971 Returns a str or a buffer instance of raw byte data.
1971 Returns a str or a buffer instance of raw byte data.
1972 """
1972 """
1973 o, d = self._chunkcache
1973 o, d = self._chunkcache
1974 l = len(d)
1974 l = len(d)
1975
1975
1976 # is it in the cache?
1976 # is it in the cache?
1977 cachestart = offset - o
1977 cachestart = offset - o
1978 cacheend = cachestart + length
1978 cacheend = cachestart + length
1979 if cachestart >= 0 and cacheend <= l:
1979 if cachestart >= 0 and cacheend <= l:
1980 if cachestart == 0 and cacheend == l:
1980 if cachestart == 0 and cacheend == l:
1981 return d # avoid a copy
1981 return d # avoid a copy
1982 return util.buffer(d, cachestart, cacheend - cachestart)
1982 return util.buffer(d, cachestart, cacheend - cachestart)
1983
1983
1984 return self._readsegment(offset, length, df=df)
1984 return self._readsegment(offset, length, df=df)
1985
1985
1986 def _getsegmentforrevs(self, startrev, endrev, df=None):
1986 def _getsegmentforrevs(self, startrev, endrev, df=None):
1987 """Obtain a segment of raw data corresponding to a range of revisions.
1987 """Obtain a segment of raw data corresponding to a range of revisions.
1988
1988
1989 Accepts the start and end revisions and an optional already-open
1989 Accepts the start and end revisions and an optional already-open
1990 file handle to be used for reading. If the file handle is read, its
1990 file handle to be used for reading. If the file handle is read, its
1991 seek position will not be preserved.
1991 seek position will not be preserved.
1992
1992
1993 Requests for data may be satisfied by a cache.
1993 Requests for data may be satisfied by a cache.
1994
1994
1995 Returns a 2-tuple of (offset, data) for the requested range of
1995 Returns a 2-tuple of (offset, data) for the requested range of
1996 revisions. Offset is the integer offset from the beginning of the
1996 revisions. Offset is the integer offset from the beginning of the
1997 revlog and data is a str or buffer of the raw byte data.
1997 revlog and data is a str or buffer of the raw byte data.
1998
1998
1999 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1999 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2000 to determine where each revision's data begins and ends.
2000 to determine where each revision's data begins and ends.
2001 """
2001 """
2002 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2002 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2003 # (functions are expensive).
2003 # (functions are expensive).
2004 index = self.index
2004 index = self.index
2005 istart = index[startrev]
2005 istart = index[startrev]
2006 start = int(istart[0] >> 16)
2006 start = int(istart[0] >> 16)
2007 if startrev == endrev:
2007 if startrev == endrev:
2008 end = start + istart[1]
2008 end = start + istart[1]
2009 else:
2009 else:
2010 iend = index[endrev]
2010 iend = index[endrev]
2011 end = int(iend[0] >> 16) + iend[1]
2011 end = int(iend[0] >> 16) + iend[1]
2012
2012
2013 if self._inline:
2013 if self._inline:
2014 start += (startrev + 1) * self._io.size
2014 start += (startrev + 1) * self._io.size
2015 end += (endrev + 1) * self._io.size
2015 end += (endrev + 1) * self._io.size
2016 length = end - start
2016 length = end - start
2017
2017
2018 return start, self._getsegment(start, length, df=df)
2018 return start, self._getsegment(start, length, df=df)
2019
2019
2020 def _chunk(self, rev, df=None):
2020 def _chunk(self, rev, df=None):
2021 """Obtain a single decompressed chunk for a revision.
2021 """Obtain a single decompressed chunk for a revision.
2022
2022
2023 Accepts an integer revision and an optional already-open file handle
2023 Accepts an integer revision and an optional already-open file handle
2024 to be used for reading. If used, the seek position of the file will not
2024 to be used for reading. If used, the seek position of the file will not
2025 be preserved.
2025 be preserved.
2026
2026
2027 Returns a str holding uncompressed data for the requested revision.
2027 Returns a str holding uncompressed data for the requested revision.
2028 """
2028 """
2029 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2029 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2030
2030
2031 def _chunks(self, revs, df=None, targetsize=None):
2031 def _chunks(self, revs, df=None, targetsize=None):
2032 """Obtain decompressed chunks for the specified revisions.
2032 """Obtain decompressed chunks for the specified revisions.
2033
2033
2034 Accepts an iterable of numeric revisions that are assumed to be in
2034 Accepts an iterable of numeric revisions that are assumed to be in
2035 ascending order. Also accepts an optional already-open file handle
2035 ascending order. Also accepts an optional already-open file handle
2036 to be used for reading. If used, the seek position of the file will
2036 to be used for reading. If used, the seek position of the file will
2037 not be preserved.
2037 not be preserved.
2038
2038
2039 This function is similar to calling ``self._chunk()`` multiple times,
2039 This function is similar to calling ``self._chunk()`` multiple times,
2040 but is faster.
2040 but is faster.
2041
2041
2042 Returns a list with decompressed data for each requested revision.
2042 Returns a list with decompressed data for each requested revision.
2043 """
2043 """
2044 if not revs:
2044 if not revs:
2045 return []
2045 return []
2046 start = self.start
2046 start = self.start
2047 length = self.length
2047 length = self.length
2048 inline = self._inline
2048 inline = self._inline
2049 iosize = self._io.size
2049 iosize = self._io.size
2050 buffer = util.buffer
2050 buffer = util.buffer
2051
2051
2052 l = []
2052 l = []
2053 ladd = l.append
2053 ladd = l.append
2054
2054
2055 if not self._withsparseread:
2055 if not self._withsparseread:
2056 slicedchunks = (revs,)
2056 slicedchunks = (revs,)
2057 else:
2057 else:
2058 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2058 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2059
2059
2060 for revschunk in slicedchunks:
2060 for revschunk in slicedchunks:
2061 firstrev = revschunk[0]
2061 firstrev = revschunk[0]
2062 # Skip trailing revisions with empty diff
2062 # Skip trailing revisions with empty diff
2063 for lastrev in revschunk[::-1]:
2063 for lastrev in revschunk[::-1]:
2064 if length(lastrev) != 0:
2064 if length(lastrev) != 0:
2065 break
2065 break
2066
2066
2067 try:
2067 try:
2068 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2068 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2069 except OverflowError:
2069 except OverflowError:
2070 # issue4215 - we can't cache a run of chunks greater than
2070 # issue4215 - we can't cache a run of chunks greater than
2071 # 2G on Windows
2071 # 2G on Windows
2072 return [self._chunk(rev, df=df) for rev in revschunk]
2072 return [self._chunk(rev, df=df) for rev in revschunk]
2073
2073
2074 decomp = self.decompress
2074 decomp = self.decompress
2075 for rev in revschunk:
2075 for rev in revschunk:
2076 chunkstart = start(rev)
2076 chunkstart = start(rev)
2077 if inline:
2077 if inline:
2078 chunkstart += (rev + 1) * iosize
2078 chunkstart += (rev + 1) * iosize
2079 chunklength = length(rev)
2079 chunklength = length(rev)
2080 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2080 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2081
2081
2082 return l
2082 return l
2083
2083
2084 def _chunkclear(self):
2084 def _chunkclear(self):
2085 """Clear the raw chunk cache."""
2085 """Clear the raw chunk cache."""
2086 self._chunkcache = (0, '')
2086 self._chunkcache = (0, '')
2087
2087
2088 def deltaparent(self, rev):
2088 def deltaparent(self, rev):
2089 """return deltaparent of the given revision"""
2089 """return deltaparent of the given revision"""
2090 base = self.index[rev][3]
2090 base = self.index[rev][3]
2091 if base == rev:
2091 if base == rev:
2092 return nullrev
2092 return nullrev
2093 elif self._generaldelta:
2093 elif self._generaldelta:
2094 return base
2094 return base
2095 else:
2095 else:
2096 return rev - 1
2096 return rev - 1
2097
2097
2098 def issnapshot(self, rev):
2098 def issnapshot(self, rev):
2099 """tells whether rev is a snapshot
2099 """tells whether rev is a snapshot
2100 """
2100 """
2101 if rev == nullrev:
2101 if rev == nullrev:
2102 return True
2102 return True
2103 deltap = self.deltaparent(rev)
2103 deltap = self.deltaparent(rev)
2104 if deltap == nullrev:
2104 if deltap == nullrev:
2105 return True
2105 return True
2106 return False
2106 p1, p2 = self.parentrevs(rev)
2107 if deltap in (p1, p2):
2108 return False
2109 return self.issnapshot(deltap)
2107
2110
2108 def revdiff(self, rev1, rev2):
2111 def revdiff(self, rev1, rev2):
2109 """return or calculate a delta between two revisions
2112 """return or calculate a delta between two revisions
2110
2113
2111 The delta calculated is in binary form and is intended to be written to
2114 The delta calculated is in binary form and is intended to be written to
2112 revlog data directly. So this function needs raw revision data.
2115 revlog data directly. So this function needs raw revision data.
2113 """
2116 """
2114 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2117 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2115 return bytes(self._chunk(rev2))
2118 return bytes(self._chunk(rev2))
2116
2119
2117 return mdiff.textdiff(self.revision(rev1, raw=True),
2120 return mdiff.textdiff(self.revision(rev1, raw=True),
2118 self.revision(rev2, raw=True))
2121 self.revision(rev2, raw=True))
2119
2122
2120 def revision(self, nodeorrev, _df=None, raw=False):
2123 def revision(self, nodeorrev, _df=None, raw=False):
2121 """return an uncompressed revision of a given node or revision
2124 """return an uncompressed revision of a given node or revision
2122 number.
2125 number.
2123
2126
2124 _df - an existing file handle to read from. (internal-only)
2127 _df - an existing file handle to read from. (internal-only)
2125 raw - an optional argument specifying if the revision data is to be
2128 raw - an optional argument specifying if the revision data is to be
2126 treated as raw data when applying flag transforms. 'raw' should be set
2129 treated as raw data when applying flag transforms. 'raw' should be set
2127 to True when generating changegroups or in debug commands.
2130 to True when generating changegroups or in debug commands.
2128 """
2131 """
2129 if isinstance(nodeorrev, int):
2132 if isinstance(nodeorrev, int):
2130 rev = nodeorrev
2133 rev = nodeorrev
2131 node = self.node(rev)
2134 node = self.node(rev)
2132 else:
2135 else:
2133 node = nodeorrev
2136 node = nodeorrev
2134 rev = None
2137 rev = None
2135
2138
2136 cachedrev = None
2139 cachedrev = None
2137 flags = None
2140 flags = None
2138 rawtext = None
2141 rawtext = None
2139 if node == nullid:
2142 if node == nullid:
2140 return ""
2143 return ""
2141 if self._cache:
2144 if self._cache:
2142 if self._cache[0] == node:
2145 if self._cache[0] == node:
2143 # _cache only stores rawtext
2146 # _cache only stores rawtext
2144 if raw:
2147 if raw:
2145 return self._cache[2]
2148 return self._cache[2]
2146 # duplicated, but good for perf
2149 # duplicated, but good for perf
2147 if rev is None:
2150 if rev is None:
2148 rev = self.rev(node)
2151 rev = self.rev(node)
2149 if flags is None:
2152 if flags is None:
2150 flags = self.flags(rev)
2153 flags = self.flags(rev)
2151 # no extra flags set, no flag processor runs, text = rawtext
2154 # no extra flags set, no flag processor runs, text = rawtext
2152 if flags == REVIDX_DEFAULT_FLAGS:
2155 if flags == REVIDX_DEFAULT_FLAGS:
2153 return self._cache[2]
2156 return self._cache[2]
2154 # rawtext is reusable. need to run flag processor
2157 # rawtext is reusable. need to run flag processor
2155 rawtext = self._cache[2]
2158 rawtext = self._cache[2]
2156
2159
2157 cachedrev = self._cache[1]
2160 cachedrev = self._cache[1]
2158
2161
2159 # look up what we need to read
2162 # look up what we need to read
2160 if rawtext is None:
2163 if rawtext is None:
2161 if rev is None:
2164 if rev is None:
2162 rev = self.rev(node)
2165 rev = self.rev(node)
2163
2166
2164 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2167 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2165 if stopped:
2168 if stopped:
2166 rawtext = self._cache[2]
2169 rawtext = self._cache[2]
2167
2170
2168 # drop cache to save memory
2171 # drop cache to save memory
2169 self._cache = None
2172 self._cache = None
2170
2173
2171 targetsize = None
2174 targetsize = None
2172 rawsize = self.index[rev][2]
2175 rawsize = self.index[rev][2]
2173 if 0 <= rawsize:
2176 if 0 <= rawsize:
2174 targetsize = 4 * rawsize
2177 targetsize = 4 * rawsize
2175
2178
2176 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2179 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2177 if rawtext is None:
2180 if rawtext is None:
2178 rawtext = bytes(bins[0])
2181 rawtext = bytes(bins[0])
2179 bins = bins[1:]
2182 bins = bins[1:]
2180
2183
2181 rawtext = mdiff.patches(rawtext, bins)
2184 rawtext = mdiff.patches(rawtext, bins)
2182 self._cache = (node, rev, rawtext)
2185 self._cache = (node, rev, rawtext)
2183
2186
2184 if flags is None:
2187 if flags is None:
2185 if rev is None:
2188 if rev is None:
2186 rev = self.rev(node)
2189 rev = self.rev(node)
2187 flags = self.flags(rev)
2190 flags = self.flags(rev)
2188
2191
2189 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2192 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2190 if validatehash:
2193 if validatehash:
2191 self.checkhash(text, node, rev=rev)
2194 self.checkhash(text, node, rev=rev)
2192
2195
2193 return text
2196 return text
2194
2197
2195 def hash(self, text, p1, p2):
2198 def hash(self, text, p1, p2):
2196 """Compute a node hash.
2199 """Compute a node hash.
2197
2200
2198 Available as a function so that subclasses can replace the hash
2201 Available as a function so that subclasses can replace the hash
2199 as needed.
2202 as needed.
2200 """
2203 """
2201 return hash(text, p1, p2)
2204 return hash(text, p1, p2)
2202
2205
2203 def _processflags(self, text, flags, operation, raw=False):
2206 def _processflags(self, text, flags, operation, raw=False):
2204 """Inspect revision data flags and applies transforms defined by
2207 """Inspect revision data flags and applies transforms defined by
2205 registered flag processors.
2208 registered flag processors.
2206
2209
2207 ``text`` - the revision data to process
2210 ``text`` - the revision data to process
2208 ``flags`` - the revision flags
2211 ``flags`` - the revision flags
2209 ``operation`` - the operation being performed (read or write)
2212 ``operation`` - the operation being performed (read or write)
2210 ``raw`` - an optional argument describing if the raw transform should be
2213 ``raw`` - an optional argument describing if the raw transform should be
2211 applied.
2214 applied.
2212
2215
2213 This method processes the flags in the order (or reverse order if
2216 This method processes the flags in the order (or reverse order if
2214 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2217 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2215 flag processors registered for present flags. The order of flags defined
2218 flag processors registered for present flags. The order of flags defined
2216 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2219 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2217
2220
2218 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2221 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2219 processed text and ``validatehash`` is a bool indicating whether the
2222 processed text and ``validatehash`` is a bool indicating whether the
2220 returned text should be checked for hash integrity.
2223 returned text should be checked for hash integrity.
2221
2224
2222 Note: If the ``raw`` argument is set, it has precedence over the
2225 Note: If the ``raw`` argument is set, it has precedence over the
2223 operation and will only update the value of ``validatehash``.
2226 operation and will only update the value of ``validatehash``.
2224 """
2227 """
2225 # fast path: no flag processors will run
2228 # fast path: no flag processors will run
2226 if flags == 0:
2229 if flags == 0:
2227 return text, True
2230 return text, True
2228 if not operation in ('read', 'write'):
2231 if not operation in ('read', 'write'):
2229 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2232 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2230 # Check all flags are known.
2233 # Check all flags are known.
2231 if flags & ~REVIDX_KNOWN_FLAGS:
2234 if flags & ~REVIDX_KNOWN_FLAGS:
2232 raise RevlogError(_("incompatible revision flag '%#x'") %
2235 raise RevlogError(_("incompatible revision flag '%#x'") %
2233 (flags & ~REVIDX_KNOWN_FLAGS))
2236 (flags & ~REVIDX_KNOWN_FLAGS))
2234 validatehash = True
2237 validatehash = True
2235 # Depending on the operation (read or write), the order might be
2238 # Depending on the operation (read or write), the order might be
2236 # reversed due to non-commutative transforms.
2239 # reversed due to non-commutative transforms.
2237 orderedflags = REVIDX_FLAGS_ORDER
2240 orderedflags = REVIDX_FLAGS_ORDER
2238 if operation == 'write':
2241 if operation == 'write':
2239 orderedflags = reversed(orderedflags)
2242 orderedflags = reversed(orderedflags)
2240
2243
2241 for flag in orderedflags:
2244 for flag in orderedflags:
2242 # If a flagprocessor has been registered for a known flag, apply the
2245 # If a flagprocessor has been registered for a known flag, apply the
2243 # related operation transform and update result tuple.
2246 # related operation transform and update result tuple.
2244 if flag & flags:
2247 if flag & flags:
2245 vhash = True
2248 vhash = True
2246
2249
2247 if flag not in _flagprocessors:
2250 if flag not in _flagprocessors:
2248 message = _("missing processor for flag '%#x'") % (flag)
2251 message = _("missing processor for flag '%#x'") % (flag)
2249 raise RevlogError(message)
2252 raise RevlogError(message)
2250
2253
2251 processor = _flagprocessors[flag]
2254 processor = _flagprocessors[flag]
2252 if processor is not None:
2255 if processor is not None:
2253 readtransform, writetransform, rawtransform = processor
2256 readtransform, writetransform, rawtransform = processor
2254
2257
2255 if raw:
2258 if raw:
2256 vhash = rawtransform(self, text)
2259 vhash = rawtransform(self, text)
2257 elif operation == 'read':
2260 elif operation == 'read':
2258 text, vhash = readtransform(self, text)
2261 text, vhash = readtransform(self, text)
2259 else: # write operation
2262 else: # write operation
2260 text, vhash = writetransform(self, text)
2263 text, vhash = writetransform(self, text)
2261 validatehash = validatehash and vhash
2264 validatehash = validatehash and vhash
2262
2265
2263 return text, validatehash
2266 return text, validatehash
2264
2267
2265 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2268 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2266 """Check node hash integrity.
2269 """Check node hash integrity.
2267
2270
2268 Available as a function so that subclasses can extend hash mismatch
2271 Available as a function so that subclasses can extend hash mismatch
2269 behaviors as needed.
2272 behaviors as needed.
2270 """
2273 """
2271 try:
2274 try:
2272 if p1 is None and p2 is None:
2275 if p1 is None and p2 is None:
2273 p1, p2 = self.parents(node)
2276 p1, p2 = self.parents(node)
2274 if node != self.hash(text, p1, p2):
2277 if node != self.hash(text, p1, p2):
2275 revornode = rev
2278 revornode = rev
2276 if revornode is None:
2279 if revornode is None:
2277 revornode = templatefilters.short(hex(node))
2280 revornode = templatefilters.short(hex(node))
2278 raise RevlogError(_("integrity check failed on %s:%s")
2281 raise RevlogError(_("integrity check failed on %s:%s")
2279 % (self.indexfile, pycompat.bytestr(revornode)))
2282 % (self.indexfile, pycompat.bytestr(revornode)))
2280 except RevlogError:
2283 except RevlogError:
2281 if self._censorable and _censoredtext(text):
2284 if self._censorable and _censoredtext(text):
2282 raise error.CensoredNodeError(self.indexfile, node, text)
2285 raise error.CensoredNodeError(self.indexfile, node, text)
2283 raise
2286 raise
2284
2287
2285 def _enforceinlinesize(self, tr, fp=None):
2288 def _enforceinlinesize(self, tr, fp=None):
2286 """Check if the revlog is too big for inline and convert if so.
2289 """Check if the revlog is too big for inline and convert if so.
2287
2290
2288 This should be called after revisions are added to the revlog. If the
2291 This should be called after revisions are added to the revlog. If the
2289 revlog has grown too large to be an inline revlog, it will convert it
2292 revlog has grown too large to be an inline revlog, it will convert it
2290 to use multiple index and data files.
2293 to use multiple index and data files.
2291 """
2294 """
2292 tiprev = len(self) - 1
2295 tiprev = len(self) - 1
2293 if (not self._inline or
2296 if (not self._inline or
2294 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2297 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2295 return
2298 return
2296
2299
2297 trinfo = tr.find(self.indexfile)
2300 trinfo = tr.find(self.indexfile)
2298 if trinfo is None:
2301 if trinfo is None:
2299 raise RevlogError(_("%s not found in the transaction")
2302 raise RevlogError(_("%s not found in the transaction")
2300 % self.indexfile)
2303 % self.indexfile)
2301
2304
2302 trindex = trinfo[2]
2305 trindex = trinfo[2]
2303 if trindex is not None:
2306 if trindex is not None:
2304 dataoff = self.start(trindex)
2307 dataoff = self.start(trindex)
2305 else:
2308 else:
2306 # revlog was stripped at start of transaction, use all leftover data
2309 # revlog was stripped at start of transaction, use all leftover data
2307 trindex = len(self) - 1
2310 trindex = len(self) - 1
2308 dataoff = self.end(tiprev)
2311 dataoff = self.end(tiprev)
2309
2312
2310 tr.add(self.datafile, dataoff)
2313 tr.add(self.datafile, dataoff)
2311
2314
2312 if fp:
2315 if fp:
2313 fp.flush()
2316 fp.flush()
2314 fp.close()
2317 fp.close()
2315
2318
2316 with self._datafp('w') as df:
2319 with self._datafp('w') as df:
2317 for r in self:
2320 for r in self:
2318 df.write(self._getsegmentforrevs(r, r)[1])
2321 df.write(self._getsegmentforrevs(r, r)[1])
2319
2322
2320 with self._indexfp('w') as fp:
2323 with self._indexfp('w') as fp:
2321 self.version &= ~FLAG_INLINE_DATA
2324 self.version &= ~FLAG_INLINE_DATA
2322 self._inline = False
2325 self._inline = False
2323 io = self._io
2326 io = self._io
2324 for i in self:
2327 for i in self:
2325 e = io.packentry(self.index[i], self.node, self.version, i)
2328 e = io.packentry(self.index[i], self.node, self.version, i)
2326 fp.write(e)
2329 fp.write(e)
2327
2330
2328 # the temp file replace the real index when we exit the context
2331 # the temp file replace the real index when we exit the context
2329 # manager
2332 # manager
2330
2333
2331 tr.replace(self.indexfile, trindex * self._io.size)
2334 tr.replace(self.indexfile, trindex * self._io.size)
2332 self._chunkclear()
2335 self._chunkclear()
2333
2336
2334 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2337 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2335 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2338 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2336 """add a revision to the log
2339 """add a revision to the log
2337
2340
2338 text - the revision data to add
2341 text - the revision data to add
2339 transaction - the transaction object used for rollback
2342 transaction - the transaction object used for rollback
2340 link - the linkrev data to add
2343 link - the linkrev data to add
2341 p1, p2 - the parent nodeids of the revision
2344 p1, p2 - the parent nodeids of the revision
2342 cachedelta - an optional precomputed delta
2345 cachedelta - an optional precomputed delta
2343 node - nodeid of revision; typically node is not specified, and it is
2346 node - nodeid of revision; typically node is not specified, and it is
2344 computed by default as hash(text, p1, p2), however subclasses might
2347 computed by default as hash(text, p1, p2), however subclasses might
2345 use different hashing method (and override checkhash() in such case)
2348 use different hashing method (and override checkhash() in such case)
2346 flags - the known flags to set on the revision
2349 flags - the known flags to set on the revision
2347 deltacomputer - an optional _deltacomputer instance shared between
2350 deltacomputer - an optional _deltacomputer instance shared between
2348 multiple calls
2351 multiple calls
2349 """
2352 """
2350 if link == nullrev:
2353 if link == nullrev:
2351 raise RevlogError(_("attempted to add linkrev -1 to %s")
2354 raise RevlogError(_("attempted to add linkrev -1 to %s")
2352 % self.indexfile)
2355 % self.indexfile)
2353
2356
2354 if flags:
2357 if flags:
2355 node = node or self.hash(text, p1, p2)
2358 node = node or self.hash(text, p1, p2)
2356
2359
2357 rawtext, validatehash = self._processflags(text, flags, 'write')
2360 rawtext, validatehash = self._processflags(text, flags, 'write')
2358
2361
2359 # If the flag processor modifies the revision data, ignore any provided
2362 # If the flag processor modifies the revision data, ignore any provided
2360 # cachedelta.
2363 # cachedelta.
2361 if rawtext != text:
2364 if rawtext != text:
2362 cachedelta = None
2365 cachedelta = None
2363
2366
2364 if len(rawtext) > _maxentrysize:
2367 if len(rawtext) > _maxentrysize:
2365 raise RevlogError(
2368 raise RevlogError(
2366 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2369 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2367 % (self.indexfile, len(rawtext)))
2370 % (self.indexfile, len(rawtext)))
2368
2371
2369 node = node or self.hash(rawtext, p1, p2)
2372 node = node or self.hash(rawtext, p1, p2)
2370 if node in self.nodemap:
2373 if node in self.nodemap:
2371 return node
2374 return node
2372
2375
2373 if validatehash:
2376 if validatehash:
2374 self.checkhash(rawtext, node, p1=p1, p2=p2)
2377 self.checkhash(rawtext, node, p1=p1, p2=p2)
2375
2378
2376 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2379 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2377 flags, cachedelta=cachedelta,
2380 flags, cachedelta=cachedelta,
2378 deltacomputer=deltacomputer)
2381 deltacomputer=deltacomputer)
2379
2382
2380 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2383 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2381 cachedelta=None, deltacomputer=None):
2384 cachedelta=None, deltacomputer=None):
2382 """add a raw revision with known flags, node and parents
2385 """add a raw revision with known flags, node and parents
2383 useful when reusing a revision not stored in this revlog (ex: received
2386 useful when reusing a revision not stored in this revlog (ex: received
2384 over wire, or read from an external bundle).
2387 over wire, or read from an external bundle).
2385 """
2388 """
2386 dfh = None
2389 dfh = None
2387 if not self._inline:
2390 if not self._inline:
2388 dfh = self._datafp("a+")
2391 dfh = self._datafp("a+")
2389 ifh = self._indexfp("a+")
2392 ifh = self._indexfp("a+")
2390 try:
2393 try:
2391 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2394 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2392 flags, cachedelta, ifh, dfh,
2395 flags, cachedelta, ifh, dfh,
2393 deltacomputer=deltacomputer)
2396 deltacomputer=deltacomputer)
2394 finally:
2397 finally:
2395 if dfh:
2398 if dfh:
2396 dfh.close()
2399 dfh.close()
2397 ifh.close()
2400 ifh.close()
2398
2401
2399 def compress(self, data):
2402 def compress(self, data):
2400 """Generate a possibly-compressed representation of data."""
2403 """Generate a possibly-compressed representation of data."""
2401 if not data:
2404 if not data:
2402 return '', data
2405 return '', data
2403
2406
2404 compressed = self._compressor.compress(data)
2407 compressed = self._compressor.compress(data)
2405
2408
2406 if compressed:
2409 if compressed:
2407 # The revlog compressor added the header in the returned data.
2410 # The revlog compressor added the header in the returned data.
2408 return '', compressed
2411 return '', compressed
2409
2412
2410 if data[0:1] == '\0':
2413 if data[0:1] == '\0':
2411 return '', data
2414 return '', data
2412 return 'u', data
2415 return 'u', data
2413
2416
2414 def decompress(self, data):
2417 def decompress(self, data):
2415 """Decompress a revlog chunk.
2418 """Decompress a revlog chunk.
2416
2419
2417 The chunk is expected to begin with a header identifying the
2420 The chunk is expected to begin with a header identifying the
2418 format type so it can be routed to an appropriate decompressor.
2421 format type so it can be routed to an appropriate decompressor.
2419 """
2422 """
2420 if not data:
2423 if not data:
2421 return data
2424 return data
2422
2425
2423 # Revlogs are read much more frequently than they are written and many
2426 # Revlogs are read much more frequently than they are written and many
2424 # chunks only take microseconds to decompress, so performance is
2427 # chunks only take microseconds to decompress, so performance is
2425 # important here.
2428 # important here.
2426 #
2429 #
2427 # We can make a few assumptions about revlogs:
2430 # We can make a few assumptions about revlogs:
2428 #
2431 #
2429 # 1) the majority of chunks will be compressed (as opposed to inline
2432 # 1) the majority of chunks will be compressed (as opposed to inline
2430 # raw data).
2433 # raw data).
2431 # 2) decompressing *any* data will likely by at least 10x slower than
2434 # 2) decompressing *any* data will likely by at least 10x slower than
2432 # returning raw inline data.
2435 # returning raw inline data.
2433 # 3) we want to prioritize common and officially supported compression
2436 # 3) we want to prioritize common and officially supported compression
2434 # engines
2437 # engines
2435 #
2438 #
2436 # It follows that we want to optimize for "decompress compressed data
2439 # It follows that we want to optimize for "decompress compressed data
2437 # when encoded with common and officially supported compression engines"
2440 # when encoded with common and officially supported compression engines"
2438 # case over "raw data" and "data encoded by less common or non-official
2441 # case over "raw data" and "data encoded by less common or non-official
2439 # compression engines." That is why we have the inline lookup first
2442 # compression engines." That is why we have the inline lookup first
2440 # followed by the compengines lookup.
2443 # followed by the compengines lookup.
2441 #
2444 #
2442 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2445 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2443 # compressed chunks. And this matters for changelog and manifest reads.
2446 # compressed chunks. And this matters for changelog and manifest reads.
2444 t = data[0:1]
2447 t = data[0:1]
2445
2448
2446 if t == 'x':
2449 if t == 'x':
2447 try:
2450 try:
2448 return _zlibdecompress(data)
2451 return _zlibdecompress(data)
2449 except zlib.error as e:
2452 except zlib.error as e:
2450 raise RevlogError(_('revlog decompress error: %s') %
2453 raise RevlogError(_('revlog decompress error: %s') %
2451 stringutil.forcebytestr(e))
2454 stringutil.forcebytestr(e))
2452 # '\0' is more common than 'u' so it goes first.
2455 # '\0' is more common than 'u' so it goes first.
2453 elif t == '\0':
2456 elif t == '\0':
2454 return data
2457 return data
2455 elif t == 'u':
2458 elif t == 'u':
2456 return util.buffer(data, 1)
2459 return util.buffer(data, 1)
2457
2460
2458 try:
2461 try:
2459 compressor = self._decompressors[t]
2462 compressor = self._decompressors[t]
2460 except KeyError:
2463 except KeyError:
2461 try:
2464 try:
2462 engine = util.compengines.forrevlogheader(t)
2465 engine = util.compengines.forrevlogheader(t)
2463 compressor = engine.revlogcompressor()
2466 compressor = engine.revlogcompressor()
2464 self._decompressors[t] = compressor
2467 self._decompressors[t] = compressor
2465 except KeyError:
2468 except KeyError:
2466 raise RevlogError(_('unknown compression type %r') % t)
2469 raise RevlogError(_('unknown compression type %r') % t)
2467
2470
2468 return compressor.decompress(data)
2471 return compressor.decompress(data)
2469
2472
2470 def _isgooddeltainfo(self, deltainfo, revinfo):
2473 def _isgooddeltainfo(self, deltainfo, revinfo):
2471 """Returns True if the given delta is good. Good means that it is within
2474 """Returns True if the given delta is good. Good means that it is within
2472 the disk span, disk size, and chain length bounds that we know to be
2475 the disk span, disk size, and chain length bounds that we know to be
2473 performant."""
2476 performant."""
2474 if deltainfo is None:
2477 if deltainfo is None:
2475 return False
2478 return False
2476
2479
2477 # - 'deltainfo.distance' is the distance from the base revision --
2480 # - 'deltainfo.distance' is the distance from the base revision --
2478 # bounding it limits the amount of I/O we need to do.
2481 # bounding it limits the amount of I/O we need to do.
2479 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2482 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2480 # deltas we need to apply -- bounding it limits the amount of CPU
2483 # deltas we need to apply -- bounding it limits the amount of CPU
2481 # we consume.
2484 # we consume.
2482
2485
2483 if self._sparserevlog:
2486 if self._sparserevlog:
2484 # As sparse-read will be used, we can consider that the distance,
2487 # As sparse-read will be used, we can consider that the distance,
2485 # instead of being the span of the whole chunk,
2488 # instead of being the span of the whole chunk,
2486 # is the span of the largest read chunk
2489 # is the span of the largest read chunk
2487 base = deltainfo.base
2490 base = deltainfo.base
2488
2491
2489 if base != nullrev:
2492 if base != nullrev:
2490 deltachain = self._deltachain(base)[0]
2493 deltachain = self._deltachain(base)[0]
2491 else:
2494 else:
2492 deltachain = []
2495 deltachain = []
2493
2496
2494 chunks = _slicechunk(self, deltachain, deltainfo)
2497 chunks = _slicechunk(self, deltachain, deltainfo)
2495 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2498 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2496 distance = max(all_span)
2499 distance = max(all_span)
2497 else:
2500 else:
2498 distance = deltainfo.distance
2501 distance = deltainfo.distance
2499
2502
2500 textlen = revinfo.textlen
2503 textlen = revinfo.textlen
2501 defaultmax = textlen * 4
2504 defaultmax = textlen * 4
2502 maxdist = self._maxdeltachainspan
2505 maxdist = self._maxdeltachainspan
2503 if not maxdist:
2506 if not maxdist:
2504 maxdist = distance # ensure the conditional pass
2507 maxdist = distance # ensure the conditional pass
2505 maxdist = max(maxdist, defaultmax)
2508 maxdist = max(maxdist, defaultmax)
2506 if self._sparserevlog and maxdist < self._srmingapsize:
2509 if self._sparserevlog and maxdist < self._srmingapsize:
2507 # In multiple place, we are ignoring irrelevant data range below a
2510 # In multiple place, we are ignoring irrelevant data range below a
2508 # certain size. Be also apply this tradeoff here and relax span
2511 # certain size. Be also apply this tradeoff here and relax span
2509 # constraint for small enought content.
2512 # constraint for small enought content.
2510 maxdist = self._srmingapsize
2513 maxdist = self._srmingapsize
2511
2514
2512 # Bad delta from read span:
2515 # Bad delta from read span:
2513 #
2516 #
2514 # If the span of data read is larger than the maximum allowed.
2517 # If the span of data read is larger than the maximum allowed.
2515 if maxdist < distance:
2518 if maxdist < distance:
2516 return False
2519 return False
2517
2520
2518 # Bad delta from new delta size:
2521 # Bad delta from new delta size:
2519 #
2522 #
2520 # If the delta size is larger than the target text, storing the
2523 # If the delta size is larger than the target text, storing the
2521 # delta will be inefficient.
2524 # delta will be inefficient.
2522 if textlen < deltainfo.deltalen:
2525 if textlen < deltainfo.deltalen:
2523 return False
2526 return False
2524
2527
2525 # Bad delta from cumulated payload size:
2528 # Bad delta from cumulated payload size:
2526 #
2529 #
2527 # If the sum of delta get larger than K * target text length.
2530 # If the sum of delta get larger than K * target text length.
2528 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2531 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2529 return False
2532 return False
2530
2533
2531 # Bad delta from chain length:
2534 # Bad delta from chain length:
2532 #
2535 #
2533 # If the number of delta in the chain gets too high.
2536 # If the number of delta in the chain gets too high.
2534 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2537 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2535 return False
2538 return False
2536
2539
2537 return True
2540 return True
2538
2541
2539 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2542 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2540 cachedelta, ifh, dfh, alwayscache=False,
2543 cachedelta, ifh, dfh, alwayscache=False,
2541 deltacomputer=None):
2544 deltacomputer=None):
2542 """internal function to add revisions to the log
2545 """internal function to add revisions to the log
2543
2546
2544 see addrevision for argument descriptions.
2547 see addrevision for argument descriptions.
2545
2548
2546 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2549 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2547
2550
2548 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2551 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2549 be used.
2552 be used.
2550
2553
2551 invariants:
2554 invariants:
2552 - rawtext is optional (can be None); if not set, cachedelta must be set.
2555 - rawtext is optional (can be None); if not set, cachedelta must be set.
2553 if both are set, they must correspond to each other.
2556 if both are set, they must correspond to each other.
2554 """
2557 """
2555 if node == nullid:
2558 if node == nullid:
2556 raise RevlogError(_("%s: attempt to add null revision") %
2559 raise RevlogError(_("%s: attempt to add null revision") %
2557 (self.indexfile))
2560 (self.indexfile))
2558 if node == wdirid or node in wdirfilenodeids:
2561 if node == wdirid or node in wdirfilenodeids:
2559 raise RevlogError(_("%s: attempt to add wdir revision") %
2562 raise RevlogError(_("%s: attempt to add wdir revision") %
2560 (self.indexfile))
2563 (self.indexfile))
2561
2564
2562 if self._inline:
2565 if self._inline:
2563 fh = ifh
2566 fh = ifh
2564 else:
2567 else:
2565 fh = dfh
2568 fh = dfh
2566
2569
2567 btext = [rawtext]
2570 btext = [rawtext]
2568
2571
2569 curr = len(self)
2572 curr = len(self)
2570 prev = curr - 1
2573 prev = curr - 1
2571 offset = self.end(prev)
2574 offset = self.end(prev)
2572 p1r, p2r = self.rev(p1), self.rev(p2)
2575 p1r, p2r = self.rev(p1), self.rev(p2)
2573
2576
2574 # full versions are inserted when the needed deltas
2577 # full versions are inserted when the needed deltas
2575 # become comparable to the uncompressed text
2578 # become comparable to the uncompressed text
2576 if rawtext is None:
2579 if rawtext is None:
2577 # need rawtext size, before changed by flag processors, which is
2580 # need rawtext size, before changed by flag processors, which is
2578 # the non-raw size. use revlog explicitly to avoid filelog's extra
2581 # the non-raw size. use revlog explicitly to avoid filelog's extra
2579 # logic that might remove metadata size.
2582 # logic that might remove metadata size.
2580 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2583 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2581 cachedelta[1])
2584 cachedelta[1])
2582 else:
2585 else:
2583 textlen = len(rawtext)
2586 textlen = len(rawtext)
2584
2587
2585 if deltacomputer is None:
2588 if deltacomputer is None:
2586 deltacomputer = _deltacomputer(self)
2589 deltacomputer = _deltacomputer(self)
2587
2590
2588 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2591 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2589
2592
2590 # no delta for flag processor revision (see "candelta" for why)
2593 # no delta for flag processor revision (see "candelta" for why)
2591 # not calling candelta since only one revision needs test, also to
2594 # not calling candelta since only one revision needs test, also to
2592 # avoid overhead fetching flags again.
2595 # avoid overhead fetching flags again.
2593 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2596 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2594 deltainfo = None
2597 deltainfo = None
2595 else:
2598 else:
2596 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2599 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2597
2600
2598 if deltainfo is not None:
2601 if deltainfo is not None:
2599 base = deltainfo.base
2602 base = deltainfo.base
2600 chainbase = deltainfo.chainbase
2603 chainbase = deltainfo.chainbase
2601 data = deltainfo.data
2604 data = deltainfo.data
2602 l = deltainfo.deltalen
2605 l = deltainfo.deltalen
2603 else:
2606 else:
2604 rawtext = deltacomputer.buildtext(revinfo, fh)
2607 rawtext = deltacomputer.buildtext(revinfo, fh)
2605 data = self.compress(rawtext)
2608 data = self.compress(rawtext)
2606 l = len(data[1]) + len(data[0])
2609 l = len(data[1]) + len(data[0])
2607 base = chainbase = curr
2610 base = chainbase = curr
2608
2611
2609 e = (offset_type(offset, flags), l, textlen,
2612 e = (offset_type(offset, flags), l, textlen,
2610 base, link, p1r, p2r, node)
2613 base, link, p1r, p2r, node)
2611 self.index.append(e)
2614 self.index.append(e)
2612 self.nodemap[node] = curr
2615 self.nodemap[node] = curr
2613
2616
2614 entry = self._io.packentry(e, self.node, self.version, curr)
2617 entry = self._io.packentry(e, self.node, self.version, curr)
2615 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2618 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2616
2619
2617 if alwayscache and rawtext is None:
2620 if alwayscache and rawtext is None:
2618 rawtext = deltacomputer._buildtext(revinfo, fh)
2621 rawtext = deltacomputer._buildtext(revinfo, fh)
2619
2622
2620 if type(rawtext) == bytes: # only accept immutable objects
2623 if type(rawtext) == bytes: # only accept immutable objects
2621 self._cache = (node, curr, rawtext)
2624 self._cache = (node, curr, rawtext)
2622 self._chainbasecache[curr] = chainbase
2625 self._chainbasecache[curr] = chainbase
2623 return node
2626 return node
2624
2627
2625 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2628 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2626 # Files opened in a+ mode have inconsistent behavior on various
2629 # Files opened in a+ mode have inconsistent behavior on various
2627 # platforms. Windows requires that a file positioning call be made
2630 # platforms. Windows requires that a file positioning call be made
2628 # when the file handle transitions between reads and writes. See
2631 # when the file handle transitions between reads and writes. See
2629 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2632 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2630 # platforms, Python or the platform itself can be buggy. Some versions
2633 # platforms, Python or the platform itself can be buggy. Some versions
2631 # of Solaris have been observed to not append at the end of the file
2634 # of Solaris have been observed to not append at the end of the file
2632 # if the file was seeked to before the end. See issue4943 for more.
2635 # if the file was seeked to before the end. See issue4943 for more.
2633 #
2636 #
2634 # We work around this issue by inserting a seek() before writing.
2637 # We work around this issue by inserting a seek() before writing.
2635 # Note: This is likely not necessary on Python 3.
2638 # Note: This is likely not necessary on Python 3.
2636 ifh.seek(0, os.SEEK_END)
2639 ifh.seek(0, os.SEEK_END)
2637 if dfh:
2640 if dfh:
2638 dfh.seek(0, os.SEEK_END)
2641 dfh.seek(0, os.SEEK_END)
2639
2642
2640 curr = len(self) - 1
2643 curr = len(self) - 1
2641 if not self._inline:
2644 if not self._inline:
2642 transaction.add(self.datafile, offset)
2645 transaction.add(self.datafile, offset)
2643 transaction.add(self.indexfile, curr * len(entry))
2646 transaction.add(self.indexfile, curr * len(entry))
2644 if data[0]:
2647 if data[0]:
2645 dfh.write(data[0])
2648 dfh.write(data[0])
2646 dfh.write(data[1])
2649 dfh.write(data[1])
2647 ifh.write(entry)
2650 ifh.write(entry)
2648 else:
2651 else:
2649 offset += curr * self._io.size
2652 offset += curr * self._io.size
2650 transaction.add(self.indexfile, offset, curr)
2653 transaction.add(self.indexfile, offset, curr)
2651 ifh.write(entry)
2654 ifh.write(entry)
2652 ifh.write(data[0])
2655 ifh.write(data[0])
2653 ifh.write(data[1])
2656 ifh.write(data[1])
2654 self._enforceinlinesize(transaction, ifh)
2657 self._enforceinlinesize(transaction, ifh)
2655
2658
2656 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2659 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2657 """
2660 """
2658 add a delta group
2661 add a delta group
2659
2662
2660 given a set of deltas, add them to the revision log. the
2663 given a set of deltas, add them to the revision log. the
2661 first delta is against its parent, which should be in our
2664 first delta is against its parent, which should be in our
2662 log, the rest are against the previous delta.
2665 log, the rest are against the previous delta.
2663
2666
2664 If ``addrevisioncb`` is defined, it will be called with arguments of
2667 If ``addrevisioncb`` is defined, it will be called with arguments of
2665 this revlog and the node that was added.
2668 this revlog and the node that was added.
2666 """
2669 """
2667
2670
2668 nodes = []
2671 nodes = []
2669
2672
2670 r = len(self)
2673 r = len(self)
2671 end = 0
2674 end = 0
2672 if r:
2675 if r:
2673 end = self.end(r - 1)
2676 end = self.end(r - 1)
2674 ifh = self._indexfp("a+")
2677 ifh = self._indexfp("a+")
2675 isize = r * self._io.size
2678 isize = r * self._io.size
2676 if self._inline:
2679 if self._inline:
2677 transaction.add(self.indexfile, end + isize, r)
2680 transaction.add(self.indexfile, end + isize, r)
2678 dfh = None
2681 dfh = None
2679 else:
2682 else:
2680 transaction.add(self.indexfile, isize, r)
2683 transaction.add(self.indexfile, isize, r)
2681 transaction.add(self.datafile, end)
2684 transaction.add(self.datafile, end)
2682 dfh = self._datafp("a+")
2685 dfh = self._datafp("a+")
2683 def flush():
2686 def flush():
2684 if dfh:
2687 if dfh:
2685 dfh.flush()
2688 dfh.flush()
2686 ifh.flush()
2689 ifh.flush()
2687 try:
2690 try:
2688 deltacomputer = _deltacomputer(self)
2691 deltacomputer = _deltacomputer(self)
2689 # loop through our set of deltas
2692 # loop through our set of deltas
2690 for data in deltas:
2693 for data in deltas:
2691 node, p1, p2, linknode, deltabase, delta, flags = data
2694 node, p1, p2, linknode, deltabase, delta, flags = data
2692 link = linkmapper(linknode)
2695 link = linkmapper(linknode)
2693 flags = flags or REVIDX_DEFAULT_FLAGS
2696 flags = flags or REVIDX_DEFAULT_FLAGS
2694
2697
2695 nodes.append(node)
2698 nodes.append(node)
2696
2699
2697 if node in self.nodemap:
2700 if node in self.nodemap:
2698 # this can happen if two branches make the same change
2701 # this can happen if two branches make the same change
2699 continue
2702 continue
2700
2703
2701 for p in (p1, p2):
2704 for p in (p1, p2):
2702 if p not in self.nodemap:
2705 if p not in self.nodemap:
2703 raise LookupError(p, self.indexfile,
2706 raise LookupError(p, self.indexfile,
2704 _('unknown parent'))
2707 _('unknown parent'))
2705
2708
2706 if deltabase not in self.nodemap:
2709 if deltabase not in self.nodemap:
2707 raise LookupError(deltabase, self.indexfile,
2710 raise LookupError(deltabase, self.indexfile,
2708 _('unknown delta base'))
2711 _('unknown delta base'))
2709
2712
2710 baserev = self.rev(deltabase)
2713 baserev = self.rev(deltabase)
2711
2714
2712 if baserev != nullrev and self.iscensored(baserev):
2715 if baserev != nullrev and self.iscensored(baserev):
2713 # if base is censored, delta must be full replacement in a
2716 # if base is censored, delta must be full replacement in a
2714 # single patch operation
2717 # single patch operation
2715 hlen = struct.calcsize(">lll")
2718 hlen = struct.calcsize(">lll")
2716 oldlen = self.rawsize(baserev)
2719 oldlen = self.rawsize(baserev)
2717 newlen = len(delta) - hlen
2720 newlen = len(delta) - hlen
2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2721 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2719 raise error.CensoredBaseError(self.indexfile,
2722 raise error.CensoredBaseError(self.indexfile,
2720 self.node(baserev))
2723 self.node(baserev))
2721
2724
2722 if not flags and self._peek_iscensored(baserev, delta, flush):
2725 if not flags and self._peek_iscensored(baserev, delta, flush):
2723 flags |= REVIDX_ISCENSORED
2726 flags |= REVIDX_ISCENSORED
2724
2727
2725 # We assume consumers of addrevisioncb will want to retrieve
2728 # We assume consumers of addrevisioncb will want to retrieve
2726 # the added revision, which will require a call to
2729 # the added revision, which will require a call to
2727 # revision(). revision() will fast path if there is a cache
2730 # revision(). revision() will fast path if there is a cache
2728 # hit. So, we tell _addrevision() to always cache in this case.
2731 # hit. So, we tell _addrevision() to always cache in this case.
2729 # We're only using addgroup() in the context of changegroup
2732 # We're only using addgroup() in the context of changegroup
2730 # generation so the revision data can always be handled as raw
2733 # generation so the revision data can always be handled as raw
2731 # by the flagprocessor.
2734 # by the flagprocessor.
2732 self._addrevision(node, None, transaction, link,
2735 self._addrevision(node, None, transaction, link,
2733 p1, p2, flags, (baserev, delta),
2736 p1, p2, flags, (baserev, delta),
2734 ifh, dfh,
2737 ifh, dfh,
2735 alwayscache=bool(addrevisioncb),
2738 alwayscache=bool(addrevisioncb),
2736 deltacomputer=deltacomputer)
2739 deltacomputer=deltacomputer)
2737
2740
2738 if addrevisioncb:
2741 if addrevisioncb:
2739 addrevisioncb(self, node)
2742 addrevisioncb(self, node)
2740
2743
2741 if not dfh and not self._inline:
2744 if not dfh and not self._inline:
2742 # addrevision switched from inline to conventional
2745 # addrevision switched from inline to conventional
2743 # reopen the index
2746 # reopen the index
2744 ifh.close()
2747 ifh.close()
2745 dfh = self._datafp("a+")
2748 dfh = self._datafp("a+")
2746 ifh = self._indexfp("a+")
2749 ifh = self._indexfp("a+")
2747 finally:
2750 finally:
2748 if dfh:
2751 if dfh:
2749 dfh.close()
2752 dfh.close()
2750 ifh.close()
2753 ifh.close()
2751
2754
2752 return nodes
2755 return nodes
2753
2756
2754 def iscensored(self, rev):
2757 def iscensored(self, rev):
2755 """Check if a file revision is censored."""
2758 """Check if a file revision is censored."""
2756 if not self._censorable:
2759 if not self._censorable:
2757 return False
2760 return False
2758
2761
2759 return self.flags(rev) & REVIDX_ISCENSORED
2762 return self.flags(rev) & REVIDX_ISCENSORED
2760
2763
2761 def _peek_iscensored(self, baserev, delta, flush):
2764 def _peek_iscensored(self, baserev, delta, flush):
2762 """Quickly check if a delta produces a censored revision."""
2765 """Quickly check if a delta produces a censored revision."""
2763 if not self._censorable:
2766 if not self._censorable:
2764 return False
2767 return False
2765
2768
2766 # Fragile heuristic: unless new file meta keys are added alphabetically
2769 # Fragile heuristic: unless new file meta keys are added alphabetically
2767 # preceding "censored", all censored revisions are prefixed by
2770 # preceding "censored", all censored revisions are prefixed by
2768 # "\1\ncensored:". A delta producing such a censored revision must be a
2771 # "\1\ncensored:". A delta producing such a censored revision must be a
2769 # full-replacement delta, so we inspect the first and only patch in the
2772 # full-replacement delta, so we inspect the first and only patch in the
2770 # delta for this prefix.
2773 # delta for this prefix.
2771 hlen = struct.calcsize(">lll")
2774 hlen = struct.calcsize(">lll")
2772 if len(delta) <= hlen:
2775 if len(delta) <= hlen:
2773 return False
2776 return False
2774
2777
2775 oldlen = self.rawsize(baserev)
2778 oldlen = self.rawsize(baserev)
2776 newlen = len(delta) - hlen
2779 newlen = len(delta) - hlen
2777 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2780 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2778 return False
2781 return False
2779
2782
2780 add = "\1\ncensored:"
2783 add = "\1\ncensored:"
2781 addlen = len(add)
2784 addlen = len(add)
2782 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2785 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2783
2786
2784 def getstrippoint(self, minlink):
2787 def getstrippoint(self, minlink):
2785 """find the minimum rev that must be stripped to strip the linkrev
2788 """find the minimum rev that must be stripped to strip the linkrev
2786
2789
2787 Returns a tuple containing the minimum rev and a set of all revs that
2790 Returns a tuple containing the minimum rev and a set of all revs that
2788 have linkrevs that will be broken by this strip.
2791 have linkrevs that will be broken by this strip.
2789 """
2792 """
2790 brokenrevs = set()
2793 brokenrevs = set()
2791 strippoint = len(self)
2794 strippoint = len(self)
2792
2795
2793 heads = {}
2796 heads = {}
2794 futurelargelinkrevs = set()
2797 futurelargelinkrevs = set()
2795 for head in self.headrevs():
2798 for head in self.headrevs():
2796 headlinkrev = self.linkrev(head)
2799 headlinkrev = self.linkrev(head)
2797 heads[head] = headlinkrev
2800 heads[head] = headlinkrev
2798 if headlinkrev >= minlink:
2801 if headlinkrev >= minlink:
2799 futurelargelinkrevs.add(headlinkrev)
2802 futurelargelinkrevs.add(headlinkrev)
2800
2803
2801 # This algorithm involves walking down the rev graph, starting at the
2804 # This algorithm involves walking down the rev graph, starting at the
2802 # heads. Since the revs are topologically sorted according to linkrev,
2805 # heads. Since the revs are topologically sorted according to linkrev,
2803 # once all head linkrevs are below the minlink, we know there are
2806 # once all head linkrevs are below the minlink, we know there are
2804 # no more revs that could have a linkrev greater than minlink.
2807 # no more revs that could have a linkrev greater than minlink.
2805 # So we can stop walking.
2808 # So we can stop walking.
2806 while futurelargelinkrevs:
2809 while futurelargelinkrevs:
2807 strippoint -= 1
2810 strippoint -= 1
2808 linkrev = heads.pop(strippoint)
2811 linkrev = heads.pop(strippoint)
2809
2812
2810 if linkrev < minlink:
2813 if linkrev < minlink:
2811 brokenrevs.add(strippoint)
2814 brokenrevs.add(strippoint)
2812 else:
2815 else:
2813 futurelargelinkrevs.remove(linkrev)
2816 futurelargelinkrevs.remove(linkrev)
2814
2817
2815 for p in self.parentrevs(strippoint):
2818 for p in self.parentrevs(strippoint):
2816 if p != nullrev:
2819 if p != nullrev:
2817 plinkrev = self.linkrev(p)
2820 plinkrev = self.linkrev(p)
2818 heads[p] = plinkrev
2821 heads[p] = plinkrev
2819 if plinkrev >= minlink:
2822 if plinkrev >= minlink:
2820 futurelargelinkrevs.add(plinkrev)
2823 futurelargelinkrevs.add(plinkrev)
2821
2824
2822 return strippoint, brokenrevs
2825 return strippoint, brokenrevs
2823
2826
2824 def strip(self, minlink, transaction):
2827 def strip(self, minlink, transaction):
2825 """truncate the revlog on the first revision with a linkrev >= minlink
2828 """truncate the revlog on the first revision with a linkrev >= minlink
2826
2829
2827 This function is called when we're stripping revision minlink and
2830 This function is called when we're stripping revision minlink and
2828 its descendants from the repository.
2831 its descendants from the repository.
2829
2832
2830 We have to remove all revisions with linkrev >= minlink, because
2833 We have to remove all revisions with linkrev >= minlink, because
2831 the equivalent changelog revisions will be renumbered after the
2834 the equivalent changelog revisions will be renumbered after the
2832 strip.
2835 strip.
2833
2836
2834 So we truncate the revlog on the first of these revisions, and
2837 So we truncate the revlog on the first of these revisions, and
2835 trust that the caller has saved the revisions that shouldn't be
2838 trust that the caller has saved the revisions that shouldn't be
2836 removed and that it'll re-add them after this truncation.
2839 removed and that it'll re-add them after this truncation.
2837 """
2840 """
2838 if len(self) == 0:
2841 if len(self) == 0:
2839 return
2842 return
2840
2843
2841 rev, _ = self.getstrippoint(minlink)
2844 rev, _ = self.getstrippoint(minlink)
2842 if rev == len(self):
2845 if rev == len(self):
2843 return
2846 return
2844
2847
2845 # first truncate the files on disk
2848 # first truncate the files on disk
2846 end = self.start(rev)
2849 end = self.start(rev)
2847 if not self._inline:
2850 if not self._inline:
2848 transaction.add(self.datafile, end)
2851 transaction.add(self.datafile, end)
2849 end = rev * self._io.size
2852 end = rev * self._io.size
2850 else:
2853 else:
2851 end += rev * self._io.size
2854 end += rev * self._io.size
2852
2855
2853 transaction.add(self.indexfile, end)
2856 transaction.add(self.indexfile, end)
2854
2857
2855 # then reset internal state in memory to forget those revisions
2858 # then reset internal state in memory to forget those revisions
2856 self._cache = None
2859 self._cache = None
2857 self._chaininfocache = {}
2860 self._chaininfocache = {}
2858 self._chunkclear()
2861 self._chunkclear()
2859 for x in pycompat.xrange(rev, len(self)):
2862 for x in pycompat.xrange(rev, len(self)):
2860 del self.nodemap[self.node(x)]
2863 del self.nodemap[self.node(x)]
2861
2864
2862 del self.index[rev:-1]
2865 del self.index[rev:-1]
2863 self._nodepos = None
2866 self._nodepos = None
2864
2867
2865 def checksize(self):
2868 def checksize(self):
2866 expected = 0
2869 expected = 0
2867 if len(self):
2870 if len(self):
2868 expected = max(0, self.end(len(self) - 1))
2871 expected = max(0, self.end(len(self) - 1))
2869
2872
2870 try:
2873 try:
2871 with self._datafp() as f:
2874 with self._datafp() as f:
2872 f.seek(0, 2)
2875 f.seek(0, 2)
2873 actual = f.tell()
2876 actual = f.tell()
2874 dd = actual - expected
2877 dd = actual - expected
2875 except IOError as inst:
2878 except IOError as inst:
2876 if inst.errno != errno.ENOENT:
2879 if inst.errno != errno.ENOENT:
2877 raise
2880 raise
2878 dd = 0
2881 dd = 0
2879
2882
2880 try:
2883 try:
2881 f = self.opener(self.indexfile)
2884 f = self.opener(self.indexfile)
2882 f.seek(0, 2)
2885 f.seek(0, 2)
2883 actual = f.tell()
2886 actual = f.tell()
2884 f.close()
2887 f.close()
2885 s = self._io.size
2888 s = self._io.size
2886 i = max(0, actual // s)
2889 i = max(0, actual // s)
2887 di = actual - (i * s)
2890 di = actual - (i * s)
2888 if self._inline:
2891 if self._inline:
2889 databytes = 0
2892 databytes = 0
2890 for r in self:
2893 for r in self:
2891 databytes += max(0, self.length(r))
2894 databytes += max(0, self.length(r))
2892 dd = 0
2895 dd = 0
2893 di = actual - len(self) * s - databytes
2896 di = actual - len(self) * s - databytes
2894 except IOError as inst:
2897 except IOError as inst:
2895 if inst.errno != errno.ENOENT:
2898 if inst.errno != errno.ENOENT:
2896 raise
2899 raise
2897 di = 0
2900 di = 0
2898
2901
2899 return (dd, di)
2902 return (dd, di)
2900
2903
2901 def files(self):
2904 def files(self):
2902 res = [self.indexfile]
2905 res = [self.indexfile]
2903 if not self._inline:
2906 if not self._inline:
2904 res.append(self.datafile)
2907 res.append(self.datafile)
2905 return res
2908 return res
2906
2909
2907 DELTAREUSEALWAYS = 'always'
2910 DELTAREUSEALWAYS = 'always'
2908 DELTAREUSESAMEREVS = 'samerevs'
2911 DELTAREUSESAMEREVS = 'samerevs'
2909 DELTAREUSENEVER = 'never'
2912 DELTAREUSENEVER = 'never'
2910
2913
2911 DELTAREUSEFULLADD = 'fulladd'
2914 DELTAREUSEFULLADD = 'fulladd'
2912
2915
2913 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2916 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2914
2917
2915 def clone(self, tr, destrevlog, addrevisioncb=None,
2918 def clone(self, tr, destrevlog, addrevisioncb=None,
2916 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2919 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2917 """Copy this revlog to another, possibly with format changes.
2920 """Copy this revlog to another, possibly with format changes.
2918
2921
2919 The destination revlog will contain the same revisions and nodes.
2922 The destination revlog will contain the same revisions and nodes.
2920 However, it may not be bit-for-bit identical due to e.g. delta encoding
2923 However, it may not be bit-for-bit identical due to e.g. delta encoding
2921 differences.
2924 differences.
2922
2925
2923 The ``deltareuse`` argument control how deltas from the existing revlog
2926 The ``deltareuse`` argument control how deltas from the existing revlog
2924 are preserved in the destination revlog. The argument can have the
2927 are preserved in the destination revlog. The argument can have the
2925 following values:
2928 following values:
2926
2929
2927 DELTAREUSEALWAYS
2930 DELTAREUSEALWAYS
2928 Deltas will always be reused (if possible), even if the destination
2931 Deltas will always be reused (if possible), even if the destination
2929 revlog would not select the same revisions for the delta. This is the
2932 revlog would not select the same revisions for the delta. This is the
2930 fastest mode of operation.
2933 fastest mode of operation.
2931 DELTAREUSESAMEREVS
2934 DELTAREUSESAMEREVS
2932 Deltas will be reused if the destination revlog would pick the same
2935 Deltas will be reused if the destination revlog would pick the same
2933 revisions for the delta. This mode strikes a balance between speed
2936 revisions for the delta. This mode strikes a balance between speed
2934 and optimization.
2937 and optimization.
2935 DELTAREUSENEVER
2938 DELTAREUSENEVER
2936 Deltas will never be reused. This is the slowest mode of execution.
2939 Deltas will never be reused. This is the slowest mode of execution.
2937 This mode can be used to recompute deltas (e.g. if the diff/delta
2940 This mode can be used to recompute deltas (e.g. if the diff/delta
2938 algorithm changes).
2941 algorithm changes).
2939
2942
2940 Delta computation can be slow, so the choice of delta reuse policy can
2943 Delta computation can be slow, so the choice of delta reuse policy can
2941 significantly affect run time.
2944 significantly affect run time.
2942
2945
2943 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2946 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2944 two extremes. Deltas will be reused if they are appropriate. But if the
2947 two extremes. Deltas will be reused if they are appropriate. But if the
2945 delta could choose a better revision, it will do so. This means if you
2948 delta could choose a better revision, it will do so. This means if you
2946 are converting a non-generaldelta revlog to a generaldelta revlog,
2949 are converting a non-generaldelta revlog to a generaldelta revlog,
2947 deltas will be recomputed if the delta's parent isn't a parent of the
2950 deltas will be recomputed if the delta's parent isn't a parent of the
2948 revision.
2951 revision.
2949
2952
2950 In addition to the delta policy, the ``deltabothparents`` argument
2953 In addition to the delta policy, the ``deltabothparents`` argument
2951 controls whether to compute deltas against both parents for merges.
2954 controls whether to compute deltas against both parents for merges.
2952 By default, the current default is used.
2955 By default, the current default is used.
2953 """
2956 """
2954 if deltareuse not in self.DELTAREUSEALL:
2957 if deltareuse not in self.DELTAREUSEALL:
2955 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2958 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2956
2959
2957 if len(destrevlog):
2960 if len(destrevlog):
2958 raise ValueError(_('destination revlog is not empty'))
2961 raise ValueError(_('destination revlog is not empty'))
2959
2962
2960 if getattr(self, 'filteredrevs', None):
2963 if getattr(self, 'filteredrevs', None):
2961 raise ValueError(_('source revlog has filtered revisions'))
2964 raise ValueError(_('source revlog has filtered revisions'))
2962 if getattr(destrevlog, 'filteredrevs', None):
2965 if getattr(destrevlog, 'filteredrevs', None):
2963 raise ValueError(_('destination revlog has filtered revisions'))
2966 raise ValueError(_('destination revlog has filtered revisions'))
2964
2967
2965 # lazydeltabase controls whether to reuse a cached delta, if possible.
2968 # lazydeltabase controls whether to reuse a cached delta, if possible.
2966 oldlazydeltabase = destrevlog._lazydeltabase
2969 oldlazydeltabase = destrevlog._lazydeltabase
2967 oldamd = destrevlog._deltabothparents
2970 oldamd = destrevlog._deltabothparents
2968
2971
2969 try:
2972 try:
2970 if deltareuse == self.DELTAREUSEALWAYS:
2973 if deltareuse == self.DELTAREUSEALWAYS:
2971 destrevlog._lazydeltabase = True
2974 destrevlog._lazydeltabase = True
2972 elif deltareuse == self.DELTAREUSESAMEREVS:
2975 elif deltareuse == self.DELTAREUSESAMEREVS:
2973 destrevlog._lazydeltabase = False
2976 destrevlog._lazydeltabase = False
2974
2977
2975 destrevlog._deltabothparents = deltabothparents or oldamd
2978 destrevlog._deltabothparents = deltabothparents or oldamd
2976
2979
2977 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2980 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2978 self.DELTAREUSESAMEREVS)
2981 self.DELTAREUSESAMEREVS)
2979
2982
2980 deltacomputer = _deltacomputer(destrevlog)
2983 deltacomputer = _deltacomputer(destrevlog)
2981 index = self.index
2984 index = self.index
2982 for rev in self:
2985 for rev in self:
2983 entry = index[rev]
2986 entry = index[rev]
2984
2987
2985 # Some classes override linkrev to take filtered revs into
2988 # Some classes override linkrev to take filtered revs into
2986 # account. Use raw entry from index.
2989 # account. Use raw entry from index.
2987 flags = entry[0] & 0xffff
2990 flags = entry[0] & 0xffff
2988 linkrev = entry[4]
2991 linkrev = entry[4]
2989 p1 = index[entry[5]][7]
2992 p1 = index[entry[5]][7]
2990 p2 = index[entry[6]][7]
2993 p2 = index[entry[6]][7]
2991 node = entry[7]
2994 node = entry[7]
2992
2995
2993 # (Possibly) reuse the delta from the revlog if allowed and
2996 # (Possibly) reuse the delta from the revlog if allowed and
2994 # the revlog chunk is a delta.
2997 # the revlog chunk is a delta.
2995 cachedelta = None
2998 cachedelta = None
2996 rawtext = None
2999 rawtext = None
2997 if populatecachedelta:
3000 if populatecachedelta:
2998 dp = self.deltaparent(rev)
3001 dp = self.deltaparent(rev)
2999 if dp != nullrev:
3002 if dp != nullrev:
3000 cachedelta = (dp, bytes(self._chunk(rev)))
3003 cachedelta = (dp, bytes(self._chunk(rev)))
3001
3004
3002 if not cachedelta:
3005 if not cachedelta:
3003 rawtext = self.revision(rev, raw=True)
3006 rawtext = self.revision(rev, raw=True)
3004
3007
3005
3008
3006 if deltareuse == self.DELTAREUSEFULLADD:
3009 if deltareuse == self.DELTAREUSEFULLADD:
3007 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3010 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3008 cachedelta=cachedelta,
3011 cachedelta=cachedelta,
3009 node=node, flags=flags,
3012 node=node, flags=flags,
3010 deltacomputer=deltacomputer)
3013 deltacomputer=deltacomputer)
3011 else:
3014 else:
3012 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3015 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3013 checkambig=False)
3016 checkambig=False)
3014 dfh = None
3017 dfh = None
3015 if not destrevlog._inline:
3018 if not destrevlog._inline:
3016 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3019 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3017 try:
3020 try:
3018 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3021 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3019 p2, flags, cachedelta, ifh, dfh,
3022 p2, flags, cachedelta, ifh, dfh,
3020 deltacomputer=deltacomputer)
3023 deltacomputer=deltacomputer)
3021 finally:
3024 finally:
3022 if dfh:
3025 if dfh:
3023 dfh.close()
3026 dfh.close()
3024 ifh.close()
3027 ifh.close()
3025
3028
3026 if addrevisioncb:
3029 if addrevisioncb:
3027 addrevisioncb(self, rev, node)
3030 addrevisioncb(self, rev, node)
3028 finally:
3031 finally:
3029 destrevlog._lazydeltabase = oldlazydeltabase
3032 destrevlog._lazydeltabase = oldlazydeltabase
3030 destrevlog._deltabothparents = oldamd
3033 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now