##// END OF EJS Templates
revlog: compute snapshot depth on delta info...
Boris Feld -
r39190:e0da43e2 default
parent child Browse files
Show More
@@ -1,3039 +1,3053 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # If we have a non-emtpy delta candidate, there are nothing to trim
266 # If we have a non-emtpy delta candidate, there are nothing to trim
267 if revs[endidx - 1] < len(revlog):
267 if revs[endidx - 1] < len(revlog):
268 # Trim empty revs at the end, except the very first revision of a chain
268 # Trim empty revs at the end, except the very first revision of a chain
269 while (endidx > 1
269 while (endidx > 1
270 and endidx > startidx
270 and endidx > startidx
271 and length(revs[endidx - 1]) == 0):
271 and length(revs[endidx - 1]) == 0):
272 endidx -= 1
272 endidx -= 1
273
273
274 return revs[startidx:endidx]
274 return revs[startidx:endidx]
275
275
276 def _segmentspan(revlog, revs, deltainfo=None):
276 def _segmentspan(revlog, revs, deltainfo=None):
277 """Get the byte span of a segment of revisions
277 """Get the byte span of a segment of revisions
278
278
279 revs is a sorted array of revision numbers
279 revs is a sorted array of revision numbers
280
280
281 >>> revlog = _testrevlog([
281 >>> revlog = _testrevlog([
282 ... 5, #0
282 ... 5, #0
283 ... 10, #1
283 ... 10, #1
284 ... 12, #2
284 ... 12, #2
285 ... 12, #3 (empty)
285 ... 12, #3 (empty)
286 ... 17, #4
286 ... 17, #4
287 ... ])
287 ... ])
288
288
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
289 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 17
290 17
291 >>> _segmentspan(revlog, [0, 4])
291 >>> _segmentspan(revlog, [0, 4])
292 17
292 17
293 >>> _segmentspan(revlog, [3, 4])
293 >>> _segmentspan(revlog, [3, 4])
294 5
294 5
295 >>> _segmentspan(revlog, [1, 2, 3,])
295 >>> _segmentspan(revlog, [1, 2, 3,])
296 7
296 7
297 >>> _segmentspan(revlog, [1, 3])
297 >>> _segmentspan(revlog, [1, 3])
298 7
298 7
299 """
299 """
300 if not revs:
300 if not revs:
301 return 0
301 return 0
302 if deltainfo is not None and len(revlog) <= revs[-1]:
302 if deltainfo is not None and len(revlog) <= revs[-1]:
303 if len(revs) == 1:
303 if len(revs) == 1:
304 return deltainfo.deltalen
304 return deltainfo.deltalen
305 offset = revlog.end(len(revlog) - 1)
305 offset = revlog.end(len(revlog) - 1)
306 end = deltainfo.deltalen + offset
306 end = deltainfo.deltalen + offset
307 else:
307 else:
308 end = revlog.end(revs[-1])
308 end = revlog.end(revs[-1])
309 return end - revlog.start(revs[0])
309 return end - revlog.start(revs[0])
310
310
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
311 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 """slice revs to reduce the amount of unrelated data to be read from disk.
312 """slice revs to reduce the amount of unrelated data to be read from disk.
313
313
314 ``revs`` is sliced into groups that should be read in one time.
314 ``revs`` is sliced into groups that should be read in one time.
315 Assume that revs are sorted.
315 Assume that revs are sorted.
316
316
317 The initial chunk is sliced until the overall density (payload/chunks-span
317 The initial chunk is sliced until the overall density (payload/chunks-span
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
318 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 `revlog._srmingapsize` is skipped.
319 `revlog._srmingapsize` is skipped.
320
320
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
321 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 For consistency with other slicing choice, this limit won't go lower than
322 For consistency with other slicing choice, this limit won't go lower than
323 `revlog._srmingapsize`.
323 `revlog._srmingapsize`.
324
324
325 If individual revisions chunk are larger than this limit, they will still
325 If individual revisions chunk are larger than this limit, they will still
326 be raised individually.
326 be raised individually.
327
327
328 >>> revlog = _testrevlog([
328 >>> revlog = _testrevlog([
329 ... 5, #00 (5)
329 ... 5, #00 (5)
330 ... 10, #01 (5)
330 ... 10, #01 (5)
331 ... 12, #02 (2)
331 ... 12, #02 (2)
332 ... 12, #03 (empty)
332 ... 12, #03 (empty)
333 ... 27, #04 (15)
333 ... 27, #04 (15)
334 ... 31, #05 (4)
334 ... 31, #05 (4)
335 ... 31, #06 (empty)
335 ... 31, #06 (empty)
336 ... 42, #07 (11)
336 ... 42, #07 (11)
337 ... 47, #08 (5)
337 ... 47, #08 (5)
338 ... 47, #09 (empty)
338 ... 47, #09 (empty)
339 ... 48, #10 (1)
339 ... 48, #10 (1)
340 ... 51, #11 (3)
340 ... 51, #11 (3)
341 ... 74, #12 (23)
341 ... 74, #12 (23)
342 ... 85, #13 (11)
342 ... 85, #13 (11)
343 ... 86, #14 (1)
343 ... 86, #14 (1)
344 ... 91, #15 (5)
344 ... 91, #15 (5)
345 ... ])
345 ... ])
346
346
347 >>> list(_slicechunk(revlog, list(range(16))))
347 >>> list(_slicechunk(revlog, list(range(16))))
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 >>> list(_slicechunk(revlog, [0, 15]))
349 >>> list(_slicechunk(revlog, [0, 15]))
350 [[0], [15]]
350 [[0], [15]]
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
351 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 [[0], [11], [15]]
352 [[0], [11], [15]]
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
353 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 [[0], [11, 13, 15]]
354 [[0], [11, 13, 15]]
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357
357
358 Slicing with a maximum chunk size
358 Slicing with a maximum chunk size
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
359 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 [[0], [11], [13], [15]]
360 [[0], [11], [13], [15]]
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
361 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 [[0], [11], [13, 15]]
362 [[0], [11], [13, 15]]
363 """
363 """
364 if targetsize is not None:
364 if targetsize is not None:
365 targetsize = max(targetsize, revlog._srmingapsize)
365 targetsize = max(targetsize, revlog._srmingapsize)
366 # targetsize should not be specified when evaluating delta candidates:
366 # targetsize should not be specified when evaluating delta candidates:
367 # * targetsize is used to ensure we stay within specification when reading,
367 # * targetsize is used to ensure we stay within specification when reading,
368 # * deltainfo is used to pick are good delta chain when writing.
368 # * deltainfo is used to pick are good delta chain when writing.
369 if not (deltainfo is None or targetsize is None):
369 if not (deltainfo is None or targetsize is None):
370 msg = 'cannot use `targetsize` with a `deltainfo`'
370 msg = 'cannot use `targetsize` with a `deltainfo`'
371 raise error.ProgrammingError(msg)
371 raise error.ProgrammingError(msg)
372 for chunk in _slicechunktodensity(revlog, revs,
372 for chunk in _slicechunktodensity(revlog, revs,
373 deltainfo,
373 deltainfo,
374 revlog._srdensitythreshold,
374 revlog._srdensitythreshold,
375 revlog._srmingapsize):
375 revlog._srmingapsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
376 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 yield subchunk
377 yield subchunk
378
378
379 def _slicechunktosize(revlog, revs, targetsize=None):
379 def _slicechunktosize(revlog, revs, targetsize=None):
380 """slice revs to match the target size
380 """slice revs to match the target size
381
381
382 This is intended to be used on chunk that density slicing selected by that
382 This is intended to be used on chunk that density slicing selected by that
383 are still too large compared to the read garantee of revlog. This might
383 are still too large compared to the read garantee of revlog. This might
384 happens when "minimal gap size" interrupted the slicing or when chain are
384 happens when "minimal gap size" interrupted the slicing or when chain are
385 built in a way that create large blocks next to each other.
385 built in a way that create large blocks next to each other.
386
386
387 >>> revlog = _testrevlog([
387 >>> revlog = _testrevlog([
388 ... 3, #0 (3)
388 ... 3, #0 (3)
389 ... 5, #1 (2)
389 ... 5, #1 (2)
390 ... 6, #2 (1)
390 ... 6, #2 (1)
391 ... 8, #3 (2)
391 ... 8, #3 (2)
392 ... 8, #4 (empty)
392 ... 8, #4 (empty)
393 ... 11, #5 (3)
393 ... 11, #5 (3)
394 ... 12, #6 (1)
394 ... 12, #6 (1)
395 ... 13, #7 (1)
395 ... 13, #7 (1)
396 ... 14, #8 (1)
396 ... 14, #8 (1)
397 ... ])
397 ... ])
398
398
399 Cases where chunk is already small enough
399 Cases where chunk is already small enough
400 >>> list(_slicechunktosize(revlog, [0], 3))
400 >>> list(_slicechunktosize(revlog, [0], 3))
401 [[0]]
401 [[0]]
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
402 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 [[6, 7]]
403 [[6, 7]]
404 >>> list(_slicechunktosize(revlog, [0], None))
404 >>> list(_slicechunktosize(revlog, [0], None))
405 [[0]]
405 [[0]]
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
406 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 [[6, 7]]
407 [[6, 7]]
408
408
409 cases where we need actual slicing
409 cases where we need actual slicing
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
410 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 [[0], [1]]
411 [[0], [1]]
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
412 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 [[1], [3]]
413 [[1], [3]]
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
414 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 [[1, 2], [3]]
415 [[1, 2], [3]]
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
416 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 [[3], [5]]
417 [[3], [5]]
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 [[3], [5]]
419 [[3], [5]]
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
420 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 [[5], [6, 7, 8]]
421 [[5], [6, 7, 8]]
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
422 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
423 [[0], [1, 2], [3], [5], [6, 7, 8]]
424
424
425 Case with too large individual chunk (must return valid chunk)
425 Case with too large individual chunk (must return valid chunk)
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
426 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 [[0], [1]]
427 [[0], [1]]
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
428 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 [[1], [3]]
429 [[1], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
430 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 [[3], [5]]
431 [[3], [5]]
432 """
432 """
433 assert targetsize is None or 0 <= targetsize
433 assert targetsize is None or 0 <= targetsize
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
434 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 yield revs
435 yield revs
436 return
436 return
437
437
438 startrevidx = 0
438 startrevidx = 0
439 startdata = revlog.start(revs[0])
439 startdata = revlog.start(revs[0])
440 endrevidx = 0
440 endrevidx = 0
441 iterrevs = enumerate(revs)
441 iterrevs = enumerate(revs)
442 next(iterrevs) # skip first rev.
442 next(iterrevs) # skip first rev.
443 for idx, r in iterrevs:
443 for idx, r in iterrevs:
444 span = revlog.end(r) - startdata
444 span = revlog.end(r) - startdata
445 if span <= targetsize:
445 if span <= targetsize:
446 endrevidx = idx
446 endrevidx = idx
447 else:
447 else:
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
448 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 if chunk:
449 if chunk:
450 yield chunk
450 yield chunk
451 startrevidx = idx
451 startrevidx = idx
452 startdata = revlog.start(r)
452 startdata = revlog.start(r)
453 endrevidx = idx
453 endrevidx = idx
454 yield _trimchunk(revlog, revs, startrevidx)
454 yield _trimchunk(revlog, revs, startrevidx)
455
455
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
456 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 mingapsize=0):
457 mingapsize=0):
458 """slice revs to reduce the amount of unrelated data to be read from disk.
458 """slice revs to reduce the amount of unrelated data to be read from disk.
459
459
460 ``revs`` is sliced into groups that should be read in one time.
460 ``revs`` is sliced into groups that should be read in one time.
461 Assume that revs are sorted.
461 Assume that revs are sorted.
462
462
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
463 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 to the top of the revlog.
464 to the top of the revlog.
465
465
466 The initial chunk is sliced until the overall density (payload/chunks-span
466 The initial chunk is sliced until the overall density (payload/chunks-span
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
467 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 skipped.
468 skipped.
469
469
470 >>> revlog = _testrevlog([
470 >>> revlog = _testrevlog([
471 ... 5, #00 (5)
471 ... 5, #00 (5)
472 ... 10, #01 (5)
472 ... 10, #01 (5)
473 ... 12, #02 (2)
473 ... 12, #02 (2)
474 ... 12, #03 (empty)
474 ... 12, #03 (empty)
475 ... 27, #04 (15)
475 ... 27, #04 (15)
476 ... 31, #05 (4)
476 ... 31, #05 (4)
477 ... 31, #06 (empty)
477 ... 31, #06 (empty)
478 ... 42, #07 (11)
478 ... 42, #07 (11)
479 ... 47, #08 (5)
479 ... 47, #08 (5)
480 ... 47, #09 (empty)
480 ... 47, #09 (empty)
481 ... 48, #10 (1)
481 ... 48, #10 (1)
482 ... 51, #11 (3)
482 ... 51, #11 (3)
483 ... 74, #12 (23)
483 ... 74, #12 (23)
484 ... 85, #13 (11)
484 ... 85, #13 (11)
485 ... 86, #14 (1)
485 ... 86, #14 (1)
486 ... 91, #15 (5)
486 ... 91, #15 (5)
487 ... ])
487 ... ])
488
488
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
489 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
490 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
491 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 [[0], [15]]
492 [[0], [15]]
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
493 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 [[0], [11], [15]]
494 [[0], [11], [15]]
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
495 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 [[0], [11, 13, 15]]
496 [[0], [11, 13, 15]]
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
497 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 [[1, 2], [5, 8, 10, 11], [14]]
498 [[1, 2], [5, 8, 10, 11], [14]]
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
499 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 ... mingapsize=20))
500 ... mingapsize=20))
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
501 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
502 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 ... targetdensity=0.95))
503 ... targetdensity=0.95))
504 [[1, 2], [5], [8, 10, 11], [14]]
504 [[1, 2], [5], [8, 10, 11], [14]]
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
505 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 ... targetdensity=0.95, mingapsize=12))
506 ... targetdensity=0.95, mingapsize=12))
507 [[1, 2], [5, 8, 10, 11], [14]]
507 [[1, 2], [5, 8, 10, 11], [14]]
508 """
508 """
509 start = revlog.start
509 start = revlog.start
510 length = revlog.length
510 length = revlog.length
511
511
512 if len(revs) <= 1:
512 if len(revs) <= 1:
513 yield revs
513 yield revs
514 return
514 return
515
515
516 nextrev = len(revlog)
516 nextrev = len(revlog)
517 nextoffset = revlog.end(nextrev - 1)
517 nextoffset = revlog.end(nextrev - 1)
518
518
519 if deltainfo is None:
519 if deltainfo is None:
520 deltachainspan = _segmentspan(revlog, revs)
520 deltachainspan = _segmentspan(revlog, revs)
521 chainpayload = sum(length(r) for r in revs)
521 chainpayload = sum(length(r) for r in revs)
522 else:
522 else:
523 deltachainspan = deltainfo.distance
523 deltachainspan = deltainfo.distance
524 chainpayload = deltainfo.compresseddeltalen
524 chainpayload = deltainfo.compresseddeltalen
525
525
526 if deltachainspan < mingapsize:
526 if deltachainspan < mingapsize:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 readdata = deltachainspan
530 readdata = deltachainspan
531
531
532 if deltachainspan:
532 if deltachainspan:
533 density = chainpayload / float(deltachainspan)
533 density = chainpayload / float(deltachainspan)
534 else:
534 else:
535 density = 1.0
535 density = 1.0
536
536
537 if density >= targetdensity:
537 if density >= targetdensity:
538 yield revs
538 yield revs
539 return
539 return
540
540
541 if deltainfo is not None and deltainfo.deltalen:
541 if deltainfo is not None and deltainfo.deltalen:
542 revs = list(revs)
542 revs = list(revs)
543 revs.append(nextrev)
543 revs.append(nextrev)
544
544
545 # Store the gaps in a heap to have them sorted by decreasing size
545 # Store the gaps in a heap to have them sorted by decreasing size
546 gapsheap = []
546 gapsheap = []
547 heapq.heapify(gapsheap)
547 heapq.heapify(gapsheap)
548 prevend = None
548 prevend = None
549 for i, rev in enumerate(revs):
549 for i, rev in enumerate(revs):
550 if rev < nextrev:
550 if rev < nextrev:
551 revstart = start(rev)
551 revstart = start(rev)
552 revlen = length(rev)
552 revlen = length(rev)
553 else:
553 else:
554 revstart = nextoffset
554 revstart = nextoffset
555 revlen = deltainfo.deltalen
555 revlen = deltainfo.deltalen
556
556
557 # Skip empty revisions to form larger holes
557 # Skip empty revisions to form larger holes
558 if revlen == 0:
558 if revlen == 0:
559 continue
559 continue
560
560
561 if prevend is not None:
561 if prevend is not None:
562 gapsize = revstart - prevend
562 gapsize = revstart - prevend
563 # only consider holes that are large enough
563 # only consider holes that are large enough
564 if gapsize > mingapsize:
564 if gapsize > mingapsize:
565 heapq.heappush(gapsheap, (-gapsize, i))
565 heapq.heappush(gapsheap, (-gapsize, i))
566
566
567 prevend = revstart + revlen
567 prevend = revstart + revlen
568
568
569 # Collect the indices of the largest holes until the density is acceptable
569 # Collect the indices of the largest holes until the density is acceptable
570 indicesheap = []
570 indicesheap = []
571 heapq.heapify(indicesheap)
571 heapq.heapify(indicesheap)
572 while gapsheap and density < targetdensity:
572 while gapsheap and density < targetdensity:
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
573 oppgapsize, gapidx = heapq.heappop(gapsheap)
574
574
575 heapq.heappush(indicesheap, gapidx)
575 heapq.heappush(indicesheap, gapidx)
576
576
577 # the gap sizes are stored as negatives to be sorted decreasingly
577 # the gap sizes are stored as negatives to be sorted decreasingly
578 # by the heap
578 # by the heap
579 readdata -= (-oppgapsize)
579 readdata -= (-oppgapsize)
580 if readdata > 0:
580 if readdata > 0:
581 density = chainpayload / float(readdata)
581 density = chainpayload / float(readdata)
582 else:
582 else:
583 density = 1.0
583 density = 1.0
584
584
585 # Cut the revs at collected indices
585 # Cut the revs at collected indices
586 previdx = 0
586 previdx = 0
587 while indicesheap:
587 while indicesheap:
588 idx = heapq.heappop(indicesheap)
588 idx = heapq.heappop(indicesheap)
589
589
590 chunk = _trimchunk(revlog, revs, previdx, idx)
590 chunk = _trimchunk(revlog, revs, previdx, idx)
591 if chunk:
591 if chunk:
592 yield chunk
592 yield chunk
593
593
594 previdx = idx
594 previdx = idx
595
595
596 chunk = _trimchunk(revlog, revs, previdx)
596 chunk = _trimchunk(revlog, revs, previdx)
597 if chunk:
597 if chunk:
598 yield chunk
598 yield chunk
599
599
600 @attr.s(slots=True, frozen=True)
600 @attr.s(slots=True, frozen=True)
601 class _deltainfo(object):
601 class _deltainfo(object):
602 distance = attr.ib()
602 distance = attr.ib()
603 deltalen = attr.ib()
603 deltalen = attr.ib()
604 data = attr.ib()
604 data = attr.ib()
605 base = attr.ib()
605 base = attr.ib()
606 chainbase = attr.ib()
606 chainbase = attr.ib()
607 chainlen = attr.ib()
607 chainlen = attr.ib()
608 compresseddeltalen = attr.ib()
608 compresseddeltalen = attr.ib()
609 snapshotdepth = attr.ib()
609
610
610 class _deltacomputer(object):
611 class _deltacomputer(object):
611 def __init__(self, revlog):
612 def __init__(self, revlog):
612 self.revlog = revlog
613 self.revlog = revlog
613
614
614 def _getcandidaterevs(self, p1, p2, cachedelta):
615 def _getcandidaterevs(self, p1, p2, cachedelta):
615 """
616 """
616 Provides revisions that present an interest to be diffed against,
617 Provides revisions that present an interest to be diffed against,
617 grouped by level of easiness.
618 grouped by level of easiness.
618 """
619 """
619 revlog = self.revlog
620 revlog = self.revlog
620 gdelta = revlog._generaldelta
621 gdelta = revlog._generaldelta
621 curr = len(revlog)
622 curr = len(revlog)
622 prev = curr - 1
623 prev = curr - 1
623 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
624
625
625 # should we try to build a delta?
626 # should we try to build a delta?
626 if prev != nullrev and revlog.storedeltachains:
627 if prev != nullrev and revlog.storedeltachains:
627 tested = set()
628 tested = set()
628 # This condition is true most of the time when processing
629 # This condition is true most of the time when processing
629 # changegroup data into a generaldelta repo. The only time it
630 # changegroup data into a generaldelta repo. The only time it
630 # isn't true is if this is the first revision in a delta chain
631 # isn't true is if this is the first revision in a delta chain
631 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
632 if cachedelta and gdelta and revlog._lazydeltabase:
633 if cachedelta and gdelta and revlog._lazydeltabase:
633 # Assume what we received from the server is a good choice
634 # Assume what we received from the server is a good choice
634 # build delta will reuse the cache
635 # build delta will reuse the cache
635 yield (cachedelta[0],)
636 yield (cachedelta[0],)
636 tested.add(cachedelta[0])
637 tested.add(cachedelta[0])
637
638
638 if gdelta:
639 if gdelta:
639 # exclude already lazy tested base if any
640 # exclude already lazy tested base if any
640 parents = [p for p in (p1r, p2r)
641 parents = [p for p in (p1r, p2r)
641 if p != nullrev and p not in tested]
642 if p != nullrev and p not in tested]
642
643
643 if not revlog._deltabothparents and len(parents) == 2:
644 if not revlog._deltabothparents and len(parents) == 2:
644 parents.sort()
645 parents.sort()
645 # To minimize the chance of having to build a fulltext,
646 # To minimize the chance of having to build a fulltext,
646 # pick first whichever parent is closest to us (max rev)
647 # pick first whichever parent is closest to us (max rev)
647 yield (parents[1],)
648 yield (parents[1],)
648 # then the other one (min rev) if the first did not fit
649 # then the other one (min rev) if the first did not fit
649 yield (parents[0],)
650 yield (parents[0],)
650 tested.update(parents)
651 tested.update(parents)
651 elif len(parents) > 0:
652 elif len(parents) > 0:
652 # Test all parents (1 or 2), and keep the best candidate
653 # Test all parents (1 or 2), and keep the best candidate
653 yield parents
654 yield parents
654 tested.update(parents)
655 tested.update(parents)
655
656
656 if prev not in tested:
657 if prev not in tested:
657 # other approach failed try against prev to hopefully save us a
658 # other approach failed try against prev to hopefully save us a
658 # fulltext.
659 # fulltext.
659 yield (prev,)
660 yield (prev,)
660 tested.add(prev)
661 tested.add(prev)
661
662
662 def buildtext(self, revinfo, fh):
663 def buildtext(self, revinfo, fh):
663 """Builds a fulltext version of a revision
664 """Builds a fulltext version of a revision
664
665
665 revinfo: _revisioninfo instance that contains all needed info
666 revinfo: _revisioninfo instance that contains all needed info
666 fh: file handle to either the .i or the .d revlog file,
667 fh: file handle to either the .i or the .d revlog file,
667 depending on whether it is inlined or not
668 depending on whether it is inlined or not
668 """
669 """
669 btext = revinfo.btext
670 btext = revinfo.btext
670 if btext[0] is not None:
671 if btext[0] is not None:
671 return btext[0]
672 return btext[0]
672
673
673 revlog = self.revlog
674 revlog = self.revlog
674 cachedelta = revinfo.cachedelta
675 cachedelta = revinfo.cachedelta
675 flags = revinfo.flags
676 flags = revinfo.flags
676 node = revinfo.node
677 node = revinfo.node
677
678
678 baserev = cachedelta[0]
679 baserev = cachedelta[0]
679 delta = cachedelta[1]
680 delta = cachedelta[1]
680 # special case deltas which replace entire base; no need to decode
681 # special case deltas which replace entire base; no need to decode
681 # base revision. this neatly avoids censored bases, which throw when
682 # base revision. this neatly avoids censored bases, which throw when
682 # they're decoded.
683 # they're decoded.
683 hlen = struct.calcsize(">lll")
684 hlen = struct.calcsize(">lll")
684 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
685 len(delta) - hlen):
686 len(delta) - hlen):
686 btext[0] = delta[hlen:]
687 btext[0] = delta[hlen:]
687 else:
688 else:
688 # deltabase is rawtext before changed by flag processors, which is
689 # deltabase is rawtext before changed by flag processors, which is
689 # equivalent to non-raw text
690 # equivalent to non-raw text
690 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 basetext = revlog.revision(baserev, _df=fh, raw=False)
691 btext[0] = mdiff.patch(basetext, delta)
692 btext[0] = mdiff.patch(basetext, delta)
692
693
693 try:
694 try:
694 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 res = revlog._processflags(btext[0], flags, 'read', raw=True)
695 btext[0], validatehash = res
696 btext[0], validatehash = res
696 if validatehash:
697 if validatehash:
697 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
698 if flags & REVIDX_ISCENSORED:
699 if flags & REVIDX_ISCENSORED:
699 raise RevlogError(_('node %s is not censored') % node)
700 raise RevlogError(_('node %s is not censored') % node)
700 except CensoredNodeError:
701 except CensoredNodeError:
701 # must pass the censored index flag to add censored revisions
702 # must pass the censored index flag to add censored revisions
702 if not flags & REVIDX_ISCENSORED:
703 if not flags & REVIDX_ISCENSORED:
703 raise
704 raise
704 return btext[0]
705 return btext[0]
705
706
706 def _builddeltadiff(self, base, revinfo, fh):
707 def _builddeltadiff(self, base, revinfo, fh):
707 revlog = self.revlog
708 revlog = self.revlog
708 t = self.buildtext(revinfo, fh)
709 t = self.buildtext(revinfo, fh)
709 if revlog.iscensored(base):
710 if revlog.iscensored(base):
710 # deltas based on a censored revision must replace the
711 # deltas based on a censored revision must replace the
711 # full content in one patch, so delta works everywhere
712 # full content in one patch, so delta works everywhere
712 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
713 delta = header + t
714 delta = header + t
714 else:
715 else:
715 ptext = revlog.revision(base, _df=fh, raw=True)
716 ptext = revlog.revision(base, _df=fh, raw=True)
716 delta = mdiff.textdiff(ptext, t)
717 delta = mdiff.textdiff(ptext, t)
717
718
718 return delta
719 return delta
719
720
720 def _builddeltainfo(self, revinfo, base, fh):
721 def _builddeltainfo(self, revinfo, base, fh):
721 # can we use the cached delta?
722 # can we use the cached delta?
722 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
723 delta = revinfo.cachedelta[1]
724 delta = revinfo.cachedelta[1]
724 else:
725 else:
725 delta = self._builddeltadiff(base, revinfo, fh)
726 delta = self._builddeltadiff(base, revinfo, fh)
726 revlog = self.revlog
727 revlog = self.revlog
727 header, data = revlog.compress(delta)
728 header, data = revlog.compress(delta)
728 deltalen = len(header) + len(data)
729 deltalen = len(header) + len(data)
729 chainbase = revlog.chainbase(base)
730 chainbase = revlog.chainbase(base)
730 offset = revlog.end(len(revlog) - 1)
731 offset = revlog.end(len(revlog) - 1)
731 dist = deltalen + offset - revlog.start(chainbase)
732 dist = deltalen + offset - revlog.start(chainbase)
732 if revlog._generaldelta:
733 if revlog._generaldelta:
733 deltabase = base
734 deltabase = base
734 else:
735 else:
735 deltabase = chainbase
736 deltabase = chainbase
736 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 chainlen, compresseddeltalen = revlog._chaininfo(base)
737 chainlen += 1
738 chainlen += 1
738 compresseddeltalen += deltalen
739 compresseddeltalen += deltalen
740
741 revlog = self.revlog
742 snapshotdepth = None
743 if deltabase == nullrev:
744 snapshotdepth = 0
745 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 # A delta chain should always be one full snapshot,
747 # zero or more semi-snapshots, and zero or more deltas
748 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751
739 return _deltainfo(dist, deltalen, (header, data), deltabase,
752 return _deltainfo(dist, deltalen, (header, data), deltabase,
740 chainbase, chainlen, compresseddeltalen)
753 chainbase, chainlen, compresseddeltalen,
754 snapshotdepth)
741
755
742 def finddeltainfo(self, revinfo, fh):
756 def finddeltainfo(self, revinfo, fh):
743 """Find an acceptable delta against a candidate revision
757 """Find an acceptable delta against a candidate revision
744
758
745 revinfo: information about the revision (instance of _revisioninfo)
759 revinfo: information about the revision (instance of _revisioninfo)
746 fh: file handle to either the .i or the .d revlog file,
760 fh: file handle to either the .i or the .d revlog file,
747 depending on whether it is inlined or not
761 depending on whether it is inlined or not
748
762
749 Returns the first acceptable candidate revision, as ordered by
763 Returns the first acceptable candidate revision, as ordered by
750 _getcandidaterevs
764 _getcandidaterevs
751 """
765 """
752 if not revinfo.textlen:
766 if not revinfo.textlen:
753 return None # empty file do not need delta
767 return None # empty file do not need delta
754
768
755 cachedelta = revinfo.cachedelta
769 cachedelta = revinfo.cachedelta
756 p1 = revinfo.p1
770 p1 = revinfo.p1
757 p2 = revinfo.p2
771 p2 = revinfo.p2
758 revlog = self.revlog
772 revlog = self.revlog
759
773
760 deltalength = self.revlog.length
774 deltalength = self.revlog.length
761 deltaparent = self.revlog.deltaparent
775 deltaparent = self.revlog.deltaparent
762
776
763 deltainfo = None
777 deltainfo = None
764 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
778 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
765 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
779 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
766 # filter out delta base that will never produce good delta
780 # filter out delta base that will never produce good delta
767 candidaterevs = [r for r in candidaterevs
781 candidaterevs = [r for r in candidaterevs
768 if self.revlog.length(r) <= deltas_limit]
782 if self.revlog.length(r) <= deltas_limit]
769 nominateddeltas = []
783 nominateddeltas = []
770 for candidaterev in candidaterevs:
784 for candidaterev in candidaterevs:
771 # skip over empty delta (no need to include them in a chain)
785 # skip over empty delta (no need to include them in a chain)
772 while candidaterev != nullrev and not deltalength(candidaterev):
786 while candidaterev != nullrev and not deltalength(candidaterev):
773 candidaterev = deltaparent(candidaterev)
787 candidaterev = deltaparent(candidaterev)
774 # no need to try a delta against nullid, this will be handled
788 # no need to try a delta against nullid, this will be handled
775 # by fulltext later.
789 # by fulltext later.
776 if candidaterev == nullrev:
790 if candidaterev == nullrev:
777 continue
791 continue
778 # no delta for rawtext-changing revs (see "candelta" for why)
792 # no delta for rawtext-changing revs (see "candelta" for why)
779 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
793 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
780 continue
794 continue
781 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
795 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
782 if revlog._isgooddeltainfo(candidatedelta, revinfo):
796 if revlog._isgooddeltainfo(candidatedelta, revinfo):
783 nominateddeltas.append(candidatedelta)
797 nominateddeltas.append(candidatedelta)
784 if nominateddeltas:
798 if nominateddeltas:
785 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
799 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
786 break
800 break
787
801
788 return deltainfo
802 return deltainfo
789
803
790 @attr.s(slots=True, frozen=True)
804 @attr.s(slots=True, frozen=True)
791 class _revisioninfo(object):
805 class _revisioninfo(object):
792 """Information about a revision that allows building its fulltext
806 """Information about a revision that allows building its fulltext
793 node: expected hash of the revision
807 node: expected hash of the revision
794 p1, p2: parent revs of the revision
808 p1, p2: parent revs of the revision
795 btext: built text cache consisting of a one-element list
809 btext: built text cache consisting of a one-element list
796 cachedelta: (baserev, uncompressed_delta) or None
810 cachedelta: (baserev, uncompressed_delta) or None
797 flags: flags associated to the revision storage
811 flags: flags associated to the revision storage
798
812
799 One of btext[0] or cachedelta must be set.
813 One of btext[0] or cachedelta must be set.
800 """
814 """
801 node = attr.ib()
815 node = attr.ib()
802 p1 = attr.ib()
816 p1 = attr.ib()
803 p2 = attr.ib()
817 p2 = attr.ib()
804 btext = attr.ib()
818 btext = attr.ib()
805 textlen = attr.ib()
819 textlen = attr.ib()
806 cachedelta = attr.ib()
820 cachedelta = attr.ib()
807 flags = attr.ib()
821 flags = attr.ib()
808
822
809 # index v0:
823 # index v0:
810 # 4 bytes: offset
824 # 4 bytes: offset
811 # 4 bytes: compressed length
825 # 4 bytes: compressed length
812 # 4 bytes: base rev
826 # 4 bytes: base rev
813 # 4 bytes: link rev
827 # 4 bytes: link rev
814 # 20 bytes: parent 1 nodeid
828 # 20 bytes: parent 1 nodeid
815 # 20 bytes: parent 2 nodeid
829 # 20 bytes: parent 2 nodeid
816 # 20 bytes: nodeid
830 # 20 bytes: nodeid
817 indexformatv0 = struct.Struct(">4l20s20s20s")
831 indexformatv0 = struct.Struct(">4l20s20s20s")
818 indexformatv0_pack = indexformatv0.pack
832 indexformatv0_pack = indexformatv0.pack
819 indexformatv0_unpack = indexformatv0.unpack
833 indexformatv0_unpack = indexformatv0.unpack
820
834
821 class revlogoldindex(list):
835 class revlogoldindex(list):
822 def __getitem__(self, i):
836 def __getitem__(self, i):
823 if i == -1:
837 if i == -1:
824 return (0, 0, 0, -1, -1, -1, -1, nullid)
838 return (0, 0, 0, -1, -1, -1, -1, nullid)
825 return list.__getitem__(self, i)
839 return list.__getitem__(self, i)
826
840
827 # maximum <delta-chain-data>/<revision-text-length> ratio
841 # maximum <delta-chain-data>/<revision-text-length> ratio
828 LIMIT_DELTA2TEXT = 2
842 LIMIT_DELTA2TEXT = 2
829
843
830 class revlogoldio(object):
844 class revlogoldio(object):
831 def __init__(self):
845 def __init__(self):
832 self.size = indexformatv0.size
846 self.size = indexformatv0.size
833
847
834 def parseindex(self, data, inline):
848 def parseindex(self, data, inline):
835 s = self.size
849 s = self.size
836 index = []
850 index = []
837 nodemap = {nullid: nullrev}
851 nodemap = {nullid: nullrev}
838 n = off = 0
852 n = off = 0
839 l = len(data)
853 l = len(data)
840 while off + s <= l:
854 while off + s <= l:
841 cur = data[off:off + s]
855 cur = data[off:off + s]
842 off += s
856 off += s
843 e = indexformatv0_unpack(cur)
857 e = indexformatv0_unpack(cur)
844 # transform to revlogv1 format
858 # transform to revlogv1 format
845 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
859 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
846 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
860 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
847 index.append(e2)
861 index.append(e2)
848 nodemap[e[6]] = n
862 nodemap[e[6]] = n
849 n += 1
863 n += 1
850
864
851 return revlogoldindex(index), nodemap, None
865 return revlogoldindex(index), nodemap, None
852
866
853 def packentry(self, entry, node, version, rev):
867 def packentry(self, entry, node, version, rev):
854 if gettype(entry[0]):
868 if gettype(entry[0]):
855 raise RevlogError(_('index entry flags need revlog version 1'))
869 raise RevlogError(_('index entry flags need revlog version 1'))
856 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
870 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
857 node(entry[5]), node(entry[6]), entry[7])
871 node(entry[5]), node(entry[6]), entry[7])
858 return indexformatv0_pack(*e2)
872 return indexformatv0_pack(*e2)
859
873
860 # index ng:
874 # index ng:
861 # 6 bytes: offset
875 # 6 bytes: offset
862 # 2 bytes: flags
876 # 2 bytes: flags
863 # 4 bytes: compressed length
877 # 4 bytes: compressed length
864 # 4 bytes: uncompressed length
878 # 4 bytes: uncompressed length
865 # 4 bytes: base rev
879 # 4 bytes: base rev
866 # 4 bytes: link rev
880 # 4 bytes: link rev
867 # 4 bytes: parent 1 rev
881 # 4 bytes: parent 1 rev
868 # 4 bytes: parent 2 rev
882 # 4 bytes: parent 2 rev
869 # 32 bytes: nodeid
883 # 32 bytes: nodeid
870 indexformatng = struct.Struct(">Qiiiiii20s12x")
884 indexformatng = struct.Struct(">Qiiiiii20s12x")
871 indexformatng_pack = indexformatng.pack
885 indexformatng_pack = indexformatng.pack
872 versionformat = struct.Struct(">I")
886 versionformat = struct.Struct(">I")
873 versionformat_pack = versionformat.pack
887 versionformat_pack = versionformat.pack
874 versionformat_unpack = versionformat.unpack
888 versionformat_unpack = versionformat.unpack
875
889
876 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
890 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
877 # signed integer)
891 # signed integer)
878 _maxentrysize = 0x7fffffff
892 _maxentrysize = 0x7fffffff
879
893
880 class revlogio(object):
894 class revlogio(object):
881 def __init__(self):
895 def __init__(self):
882 self.size = indexformatng.size
896 self.size = indexformatng.size
883
897
884 def parseindex(self, data, inline):
898 def parseindex(self, data, inline):
885 # call the C implementation to parse the index data
899 # call the C implementation to parse the index data
886 index, cache = parsers.parse_index2(data, inline)
900 index, cache = parsers.parse_index2(data, inline)
887 return index, getattr(index, 'nodemap', None), cache
901 return index, getattr(index, 'nodemap', None), cache
888
902
889 def packentry(self, entry, node, version, rev):
903 def packentry(self, entry, node, version, rev):
890 p = indexformatng_pack(*entry)
904 p = indexformatng_pack(*entry)
891 if rev == 0:
905 if rev == 0:
892 p = versionformat_pack(version) + p[4:]
906 p = versionformat_pack(version) + p[4:]
893 return p
907 return p
894
908
895 class revlog(object):
909 class revlog(object):
896 """
910 """
897 the underlying revision storage object
911 the underlying revision storage object
898
912
899 A revlog consists of two parts, an index and the revision data.
913 A revlog consists of two parts, an index and the revision data.
900
914
901 The index is a file with a fixed record size containing
915 The index is a file with a fixed record size containing
902 information on each revision, including its nodeid (hash), the
916 information on each revision, including its nodeid (hash), the
903 nodeids of its parents, the position and offset of its data within
917 nodeids of its parents, the position and offset of its data within
904 the data file, and the revision it's based on. Finally, each entry
918 the data file, and the revision it's based on. Finally, each entry
905 contains a linkrev entry that can serve as a pointer to external
919 contains a linkrev entry that can serve as a pointer to external
906 data.
920 data.
907
921
908 The revision data itself is a linear collection of data chunks.
922 The revision data itself is a linear collection of data chunks.
909 Each chunk represents a revision and is usually represented as a
923 Each chunk represents a revision and is usually represented as a
910 delta against the previous chunk. To bound lookup time, runs of
924 delta against the previous chunk. To bound lookup time, runs of
911 deltas are limited to about 2 times the length of the original
925 deltas are limited to about 2 times the length of the original
912 version data. This makes retrieval of a version proportional to
926 version data. This makes retrieval of a version proportional to
913 its size, or O(1) relative to the number of revisions.
927 its size, or O(1) relative to the number of revisions.
914
928
915 Both pieces of the revlog are written to in an append-only
929 Both pieces of the revlog are written to in an append-only
916 fashion, which means we never need to rewrite a file to insert or
930 fashion, which means we never need to rewrite a file to insert or
917 remove data, and can use some simple techniques to avoid the need
931 remove data, and can use some simple techniques to avoid the need
918 for locking while reading.
932 for locking while reading.
919
933
920 If checkambig, indexfile is opened with checkambig=True at
934 If checkambig, indexfile is opened with checkambig=True at
921 writing, to avoid file stat ambiguity.
935 writing, to avoid file stat ambiguity.
922
936
923 If mmaplargeindex is True, and an mmapindexthreshold is set, the
937 If mmaplargeindex is True, and an mmapindexthreshold is set, the
924 index will be mmapped rather than read if it is larger than the
938 index will be mmapped rather than read if it is larger than the
925 configured threshold.
939 configured threshold.
926
940
927 If censorable is True, the revlog can have censored revisions.
941 If censorable is True, the revlog can have censored revisions.
928 """
942 """
929 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
943 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
930 mmaplargeindex=False, censorable=False):
944 mmaplargeindex=False, censorable=False):
931 """
945 """
932 create a revlog object
946 create a revlog object
933
947
934 opener is a function that abstracts the file opening operation
948 opener is a function that abstracts the file opening operation
935 and can be used to implement COW semantics or the like.
949 and can be used to implement COW semantics or the like.
936 """
950 """
937 self.indexfile = indexfile
951 self.indexfile = indexfile
938 self.datafile = datafile or (indexfile[:-2] + ".d")
952 self.datafile = datafile or (indexfile[:-2] + ".d")
939 self.opener = opener
953 self.opener = opener
940 # When True, indexfile is opened with checkambig=True at writing, to
954 # When True, indexfile is opened with checkambig=True at writing, to
941 # avoid file stat ambiguity.
955 # avoid file stat ambiguity.
942 self._checkambig = checkambig
956 self._checkambig = checkambig
943 self._censorable = censorable
957 self._censorable = censorable
944 # 3-tuple of (node, rev, text) for a raw revision.
958 # 3-tuple of (node, rev, text) for a raw revision.
945 self._cache = None
959 self._cache = None
946 # Maps rev to chain base rev.
960 # Maps rev to chain base rev.
947 self._chainbasecache = util.lrucachedict(100)
961 self._chainbasecache = util.lrucachedict(100)
948 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
962 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
949 self._chunkcache = (0, '')
963 self._chunkcache = (0, '')
950 # How much data to read and cache into the raw revlog data cache.
964 # How much data to read and cache into the raw revlog data cache.
951 self._chunkcachesize = 65536
965 self._chunkcachesize = 65536
952 self._maxchainlen = None
966 self._maxchainlen = None
953 self._deltabothparents = True
967 self._deltabothparents = True
954 self.index = []
968 self.index = []
955 # Mapping of partial identifiers to full nodes.
969 # Mapping of partial identifiers to full nodes.
956 self._pcache = {}
970 self._pcache = {}
957 # Mapping of revision integer to full node.
971 # Mapping of revision integer to full node.
958 self._nodecache = {nullid: nullrev}
972 self._nodecache = {nullid: nullrev}
959 self._nodepos = None
973 self._nodepos = None
960 self._compengine = 'zlib'
974 self._compengine = 'zlib'
961 self._maxdeltachainspan = -1
975 self._maxdeltachainspan = -1
962 self._withsparseread = False
976 self._withsparseread = False
963 self._sparserevlog = False
977 self._sparserevlog = False
964 self._srdensitythreshold = 0.50
978 self._srdensitythreshold = 0.50
965 self._srmingapsize = 262144
979 self._srmingapsize = 262144
966
980
967 mmapindexthreshold = None
981 mmapindexthreshold = None
968 v = REVLOG_DEFAULT_VERSION
982 v = REVLOG_DEFAULT_VERSION
969 opts = getattr(opener, 'options', None)
983 opts = getattr(opener, 'options', None)
970 if opts is not None:
984 if opts is not None:
971 if 'revlogv2' in opts:
985 if 'revlogv2' in opts:
972 # version 2 revlogs always use generaldelta.
986 # version 2 revlogs always use generaldelta.
973 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
987 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
974 elif 'revlogv1' in opts:
988 elif 'revlogv1' in opts:
975 if 'generaldelta' in opts:
989 if 'generaldelta' in opts:
976 v |= FLAG_GENERALDELTA
990 v |= FLAG_GENERALDELTA
977 else:
991 else:
978 v = 0
992 v = 0
979 if 'chunkcachesize' in opts:
993 if 'chunkcachesize' in opts:
980 self._chunkcachesize = opts['chunkcachesize']
994 self._chunkcachesize = opts['chunkcachesize']
981 if 'maxchainlen' in opts:
995 if 'maxchainlen' in opts:
982 self._maxchainlen = opts['maxchainlen']
996 self._maxchainlen = opts['maxchainlen']
983 if 'deltabothparents' in opts:
997 if 'deltabothparents' in opts:
984 self._deltabothparents = opts['deltabothparents']
998 self._deltabothparents = opts['deltabothparents']
985 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
999 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
986 if 'compengine' in opts:
1000 if 'compengine' in opts:
987 self._compengine = opts['compengine']
1001 self._compengine = opts['compengine']
988 if 'maxdeltachainspan' in opts:
1002 if 'maxdeltachainspan' in opts:
989 self._maxdeltachainspan = opts['maxdeltachainspan']
1003 self._maxdeltachainspan = opts['maxdeltachainspan']
990 if mmaplargeindex and 'mmapindexthreshold' in opts:
1004 if mmaplargeindex and 'mmapindexthreshold' in opts:
991 mmapindexthreshold = opts['mmapindexthreshold']
1005 mmapindexthreshold = opts['mmapindexthreshold']
992 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1006 self._sparserevlog = bool(opts.get('sparse-revlog', False))
993 withsparseread = bool(opts.get('with-sparse-read', False))
1007 withsparseread = bool(opts.get('with-sparse-read', False))
994 # sparse-revlog forces sparse-read
1008 # sparse-revlog forces sparse-read
995 self._withsparseread = self._sparserevlog or withsparseread
1009 self._withsparseread = self._sparserevlog or withsparseread
996 if 'sparse-read-density-threshold' in opts:
1010 if 'sparse-read-density-threshold' in opts:
997 self._srdensitythreshold = opts['sparse-read-density-threshold']
1011 self._srdensitythreshold = opts['sparse-read-density-threshold']
998 if 'sparse-read-min-gap-size' in opts:
1012 if 'sparse-read-min-gap-size' in opts:
999 self._srmingapsize = opts['sparse-read-min-gap-size']
1013 self._srmingapsize = opts['sparse-read-min-gap-size']
1000
1014
1001 if self._chunkcachesize <= 0:
1015 if self._chunkcachesize <= 0:
1002 raise RevlogError(_('revlog chunk cache size %r is not greater '
1016 raise RevlogError(_('revlog chunk cache size %r is not greater '
1003 'than 0') % self._chunkcachesize)
1017 'than 0') % self._chunkcachesize)
1004 elif self._chunkcachesize & (self._chunkcachesize - 1):
1018 elif self._chunkcachesize & (self._chunkcachesize - 1):
1005 raise RevlogError(_('revlog chunk cache size %r is not a power '
1019 raise RevlogError(_('revlog chunk cache size %r is not a power '
1006 'of 2') % self._chunkcachesize)
1020 'of 2') % self._chunkcachesize)
1007
1021
1008 indexdata = ''
1022 indexdata = ''
1009 self._initempty = True
1023 self._initempty = True
1010 try:
1024 try:
1011 with self._indexfp() as f:
1025 with self._indexfp() as f:
1012 if (mmapindexthreshold is not None and
1026 if (mmapindexthreshold is not None and
1013 self.opener.fstat(f).st_size >= mmapindexthreshold):
1027 self.opener.fstat(f).st_size >= mmapindexthreshold):
1014 indexdata = util.buffer(util.mmapread(f))
1028 indexdata = util.buffer(util.mmapread(f))
1015 else:
1029 else:
1016 indexdata = f.read()
1030 indexdata = f.read()
1017 if len(indexdata) > 0:
1031 if len(indexdata) > 0:
1018 v = versionformat_unpack(indexdata[:4])[0]
1032 v = versionformat_unpack(indexdata[:4])[0]
1019 self._initempty = False
1033 self._initempty = False
1020 except IOError as inst:
1034 except IOError as inst:
1021 if inst.errno != errno.ENOENT:
1035 if inst.errno != errno.ENOENT:
1022 raise
1036 raise
1023
1037
1024 self.version = v
1038 self.version = v
1025 self._inline = v & FLAG_INLINE_DATA
1039 self._inline = v & FLAG_INLINE_DATA
1026 self._generaldelta = v & FLAG_GENERALDELTA
1040 self._generaldelta = v & FLAG_GENERALDELTA
1027 flags = v & ~0xFFFF
1041 flags = v & ~0xFFFF
1028 fmt = v & 0xFFFF
1042 fmt = v & 0xFFFF
1029 if fmt == REVLOGV0:
1043 if fmt == REVLOGV0:
1030 if flags:
1044 if flags:
1031 raise RevlogError(_('unknown flags (%#04x) in version %d '
1045 raise RevlogError(_('unknown flags (%#04x) in version %d '
1032 'revlog %s') %
1046 'revlog %s') %
1033 (flags >> 16, fmt, self.indexfile))
1047 (flags >> 16, fmt, self.indexfile))
1034 elif fmt == REVLOGV1:
1048 elif fmt == REVLOGV1:
1035 if flags & ~REVLOGV1_FLAGS:
1049 if flags & ~REVLOGV1_FLAGS:
1036 raise RevlogError(_('unknown flags (%#04x) in version %d '
1050 raise RevlogError(_('unknown flags (%#04x) in version %d '
1037 'revlog %s') %
1051 'revlog %s') %
1038 (flags >> 16, fmt, self.indexfile))
1052 (flags >> 16, fmt, self.indexfile))
1039 elif fmt == REVLOGV2:
1053 elif fmt == REVLOGV2:
1040 if flags & ~REVLOGV2_FLAGS:
1054 if flags & ~REVLOGV2_FLAGS:
1041 raise RevlogError(_('unknown flags (%#04x) in version %d '
1055 raise RevlogError(_('unknown flags (%#04x) in version %d '
1042 'revlog %s') %
1056 'revlog %s') %
1043 (flags >> 16, fmt, self.indexfile))
1057 (flags >> 16, fmt, self.indexfile))
1044 else:
1058 else:
1045 raise RevlogError(_('unknown version (%d) in revlog %s') %
1059 raise RevlogError(_('unknown version (%d) in revlog %s') %
1046 (fmt, self.indexfile))
1060 (fmt, self.indexfile))
1047
1061
1048 self.storedeltachains = True
1062 self.storedeltachains = True
1049
1063
1050 self._io = revlogio()
1064 self._io = revlogio()
1051 if self.version == REVLOGV0:
1065 if self.version == REVLOGV0:
1052 self._io = revlogoldio()
1066 self._io = revlogoldio()
1053 try:
1067 try:
1054 d = self._io.parseindex(indexdata, self._inline)
1068 d = self._io.parseindex(indexdata, self._inline)
1055 except (ValueError, IndexError):
1069 except (ValueError, IndexError):
1056 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1070 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1057 self.index, nodemap, self._chunkcache = d
1071 self.index, nodemap, self._chunkcache = d
1058 if nodemap is not None:
1072 if nodemap is not None:
1059 self.nodemap = self._nodecache = nodemap
1073 self.nodemap = self._nodecache = nodemap
1060 if not self._chunkcache:
1074 if not self._chunkcache:
1061 self._chunkclear()
1075 self._chunkclear()
1062 # revnum -> (chain-length, sum-delta-length)
1076 # revnum -> (chain-length, sum-delta-length)
1063 self._chaininfocache = {}
1077 self._chaininfocache = {}
1064 # revlog header -> revlog compressor
1078 # revlog header -> revlog compressor
1065 self._decompressors = {}
1079 self._decompressors = {}
1066
1080
1067 @util.propertycache
1081 @util.propertycache
1068 def _compressor(self):
1082 def _compressor(self):
1069 return util.compengines[self._compengine].revlogcompressor()
1083 return util.compengines[self._compengine].revlogcompressor()
1070
1084
1071 def _indexfp(self, mode='r'):
1085 def _indexfp(self, mode='r'):
1072 """file object for the revlog's index file"""
1086 """file object for the revlog's index file"""
1073 args = {r'mode': mode}
1087 args = {r'mode': mode}
1074 if mode != 'r':
1088 if mode != 'r':
1075 args[r'checkambig'] = self._checkambig
1089 args[r'checkambig'] = self._checkambig
1076 if mode == 'w':
1090 if mode == 'w':
1077 args[r'atomictemp'] = True
1091 args[r'atomictemp'] = True
1078 return self.opener(self.indexfile, **args)
1092 return self.opener(self.indexfile, **args)
1079
1093
1080 def _datafp(self, mode='r'):
1094 def _datafp(self, mode='r'):
1081 """file object for the revlog's data file"""
1095 """file object for the revlog's data file"""
1082 return self.opener(self.datafile, mode=mode)
1096 return self.opener(self.datafile, mode=mode)
1083
1097
1084 @contextlib.contextmanager
1098 @contextlib.contextmanager
1085 def _datareadfp(self, existingfp=None):
1099 def _datareadfp(self, existingfp=None):
1086 """file object suitable to read data"""
1100 """file object suitable to read data"""
1087 if existingfp is not None:
1101 if existingfp is not None:
1088 yield existingfp
1102 yield existingfp
1089 else:
1103 else:
1090 if self._inline:
1104 if self._inline:
1091 func = self._indexfp
1105 func = self._indexfp
1092 else:
1106 else:
1093 func = self._datafp
1107 func = self._datafp
1094 with func() as fp:
1108 with func() as fp:
1095 yield fp
1109 yield fp
1096
1110
1097 def tip(self):
1111 def tip(self):
1098 return self.node(len(self.index) - 1)
1112 return self.node(len(self.index) - 1)
1099 def __contains__(self, rev):
1113 def __contains__(self, rev):
1100 return 0 <= rev < len(self)
1114 return 0 <= rev < len(self)
1101 def __len__(self):
1115 def __len__(self):
1102 return len(self.index)
1116 return len(self.index)
1103 def __iter__(self):
1117 def __iter__(self):
1104 return iter(pycompat.xrange(len(self)))
1118 return iter(pycompat.xrange(len(self)))
1105 def revs(self, start=0, stop=None):
1119 def revs(self, start=0, stop=None):
1106 """iterate over all rev in this revlog (from start to stop)"""
1120 """iterate over all rev in this revlog (from start to stop)"""
1107 step = 1
1121 step = 1
1108 length = len(self)
1122 length = len(self)
1109 if stop is not None:
1123 if stop is not None:
1110 if start > stop:
1124 if start > stop:
1111 step = -1
1125 step = -1
1112 stop += step
1126 stop += step
1113 if stop > length:
1127 if stop > length:
1114 stop = length
1128 stop = length
1115 else:
1129 else:
1116 stop = length
1130 stop = length
1117 return pycompat.xrange(start, stop, step)
1131 return pycompat.xrange(start, stop, step)
1118
1132
1119 @util.propertycache
1133 @util.propertycache
1120 def nodemap(self):
1134 def nodemap(self):
1121 if self.index:
1135 if self.index:
1122 # populate mapping down to the initial node
1136 # populate mapping down to the initial node
1123 node0 = self.index[0][7] # get around changelog filtering
1137 node0 = self.index[0][7] # get around changelog filtering
1124 self.rev(node0)
1138 self.rev(node0)
1125 return self._nodecache
1139 return self._nodecache
1126
1140
1127 def hasnode(self, node):
1141 def hasnode(self, node):
1128 try:
1142 try:
1129 self.rev(node)
1143 self.rev(node)
1130 return True
1144 return True
1131 except KeyError:
1145 except KeyError:
1132 return False
1146 return False
1133
1147
1134 def candelta(self, baserev, rev):
1148 def candelta(self, baserev, rev):
1135 """whether two revisions (baserev, rev) can be delta-ed or not"""
1149 """whether two revisions (baserev, rev) can be delta-ed or not"""
1136 # Disable delta if either rev requires a content-changing flag
1150 # Disable delta if either rev requires a content-changing flag
1137 # processor (ex. LFS). This is because such flag processor can alter
1151 # processor (ex. LFS). This is because such flag processor can alter
1138 # the rawtext content that the delta will be based on, and two clients
1152 # the rawtext content that the delta will be based on, and two clients
1139 # could have a same revlog node with different flags (i.e. different
1153 # could have a same revlog node with different flags (i.e. different
1140 # rawtext contents) and the delta could be incompatible.
1154 # rawtext contents) and the delta could be incompatible.
1141 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1155 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1142 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1156 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1143 return False
1157 return False
1144 return True
1158 return True
1145
1159
1146 def clearcaches(self):
1160 def clearcaches(self):
1147 self._cache = None
1161 self._cache = None
1148 self._chainbasecache.clear()
1162 self._chainbasecache.clear()
1149 self._chunkcache = (0, '')
1163 self._chunkcache = (0, '')
1150 self._pcache = {}
1164 self._pcache = {}
1151
1165
1152 try:
1166 try:
1153 self._nodecache.clearcaches()
1167 self._nodecache.clearcaches()
1154 except AttributeError:
1168 except AttributeError:
1155 self._nodecache = {nullid: nullrev}
1169 self._nodecache = {nullid: nullrev}
1156 self._nodepos = None
1170 self._nodepos = None
1157
1171
1158 def rev(self, node):
1172 def rev(self, node):
1159 try:
1173 try:
1160 return self._nodecache[node]
1174 return self._nodecache[node]
1161 except TypeError:
1175 except TypeError:
1162 raise
1176 raise
1163 except RevlogError:
1177 except RevlogError:
1164 # parsers.c radix tree lookup failed
1178 # parsers.c radix tree lookup failed
1165 if node == wdirid or node in wdirfilenodeids:
1179 if node == wdirid or node in wdirfilenodeids:
1166 raise error.WdirUnsupported
1180 raise error.WdirUnsupported
1167 raise LookupError(node, self.indexfile, _('no node'))
1181 raise LookupError(node, self.indexfile, _('no node'))
1168 except KeyError:
1182 except KeyError:
1169 # pure python cache lookup failed
1183 # pure python cache lookup failed
1170 n = self._nodecache
1184 n = self._nodecache
1171 i = self.index
1185 i = self.index
1172 p = self._nodepos
1186 p = self._nodepos
1173 if p is None:
1187 if p is None:
1174 p = len(i) - 1
1188 p = len(i) - 1
1175 else:
1189 else:
1176 assert p < len(i)
1190 assert p < len(i)
1177 for r in pycompat.xrange(p, -1, -1):
1191 for r in pycompat.xrange(p, -1, -1):
1178 v = i[r][7]
1192 v = i[r][7]
1179 n[v] = r
1193 n[v] = r
1180 if v == node:
1194 if v == node:
1181 self._nodepos = r - 1
1195 self._nodepos = r - 1
1182 return r
1196 return r
1183 if node == wdirid or node in wdirfilenodeids:
1197 if node == wdirid or node in wdirfilenodeids:
1184 raise error.WdirUnsupported
1198 raise error.WdirUnsupported
1185 raise LookupError(node, self.indexfile, _('no node'))
1199 raise LookupError(node, self.indexfile, _('no node'))
1186
1200
1187 # Accessors for index entries.
1201 # Accessors for index entries.
1188
1202
1189 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1190 # are flags.
1204 # are flags.
1191 def start(self, rev):
1205 def start(self, rev):
1192 return int(self.index[rev][0] >> 16)
1206 return int(self.index[rev][0] >> 16)
1193
1207
1194 def flags(self, rev):
1208 def flags(self, rev):
1195 return self.index[rev][0] & 0xFFFF
1209 return self.index[rev][0] & 0xFFFF
1196
1210
1197 def length(self, rev):
1211 def length(self, rev):
1198 return self.index[rev][1]
1212 return self.index[rev][1]
1199
1213
1200 def rawsize(self, rev):
1214 def rawsize(self, rev):
1201 """return the length of the uncompressed text for a given revision"""
1215 """return the length of the uncompressed text for a given revision"""
1202 l = self.index[rev][2]
1216 l = self.index[rev][2]
1203 if l >= 0:
1217 if l >= 0:
1204 return l
1218 return l
1205
1219
1206 t = self.revision(rev, raw=True)
1220 t = self.revision(rev, raw=True)
1207 return len(t)
1221 return len(t)
1208
1222
1209 def size(self, rev):
1223 def size(self, rev):
1210 """length of non-raw text (processed by a "read" flag processor)"""
1224 """length of non-raw text (processed by a "read" flag processor)"""
1211 # fast path: if no "read" flag processor could change the content,
1225 # fast path: if no "read" flag processor could change the content,
1212 # size is rawsize. note: ELLIPSIS is known to not change the content.
1226 # size is rawsize. note: ELLIPSIS is known to not change the content.
1213 flags = self.flags(rev)
1227 flags = self.flags(rev)
1214 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1228 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1215 return self.rawsize(rev)
1229 return self.rawsize(rev)
1216
1230
1217 return len(self.revision(rev, raw=False))
1231 return len(self.revision(rev, raw=False))
1218
1232
1219 def chainbase(self, rev):
1233 def chainbase(self, rev):
1220 base = self._chainbasecache.get(rev)
1234 base = self._chainbasecache.get(rev)
1221 if base is not None:
1235 if base is not None:
1222 return base
1236 return base
1223
1237
1224 index = self.index
1238 index = self.index
1225 iterrev = rev
1239 iterrev = rev
1226 base = index[iterrev][3]
1240 base = index[iterrev][3]
1227 while base != iterrev:
1241 while base != iterrev:
1228 iterrev = base
1242 iterrev = base
1229 base = index[iterrev][3]
1243 base = index[iterrev][3]
1230
1244
1231 self._chainbasecache[rev] = base
1245 self._chainbasecache[rev] = base
1232 return base
1246 return base
1233
1247
1234 def linkrev(self, rev):
1248 def linkrev(self, rev):
1235 return self.index[rev][4]
1249 return self.index[rev][4]
1236
1250
1237 def parentrevs(self, rev):
1251 def parentrevs(self, rev):
1238 try:
1252 try:
1239 entry = self.index[rev]
1253 entry = self.index[rev]
1240 except IndexError:
1254 except IndexError:
1241 if rev == wdirrev:
1255 if rev == wdirrev:
1242 raise error.WdirUnsupported
1256 raise error.WdirUnsupported
1243 raise
1257 raise
1244
1258
1245 return entry[5], entry[6]
1259 return entry[5], entry[6]
1246
1260
1247 def node(self, rev):
1261 def node(self, rev):
1248 try:
1262 try:
1249 return self.index[rev][7]
1263 return self.index[rev][7]
1250 except IndexError:
1264 except IndexError:
1251 if rev == wdirrev:
1265 if rev == wdirrev:
1252 raise error.WdirUnsupported
1266 raise error.WdirUnsupported
1253 raise
1267 raise
1254
1268
1255 # Derived from index values.
1269 # Derived from index values.
1256
1270
1257 def end(self, rev):
1271 def end(self, rev):
1258 return self.start(rev) + self.length(rev)
1272 return self.start(rev) + self.length(rev)
1259
1273
1260 def parents(self, node):
1274 def parents(self, node):
1261 i = self.index
1275 i = self.index
1262 d = i[self.rev(node)]
1276 d = i[self.rev(node)]
1263 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1277 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1264
1278
1265 def chainlen(self, rev):
1279 def chainlen(self, rev):
1266 return self._chaininfo(rev)[0]
1280 return self._chaininfo(rev)[0]
1267
1281
1268 def _chaininfo(self, rev):
1282 def _chaininfo(self, rev):
1269 chaininfocache = self._chaininfocache
1283 chaininfocache = self._chaininfocache
1270 if rev in chaininfocache:
1284 if rev in chaininfocache:
1271 return chaininfocache[rev]
1285 return chaininfocache[rev]
1272 index = self.index
1286 index = self.index
1273 generaldelta = self._generaldelta
1287 generaldelta = self._generaldelta
1274 iterrev = rev
1288 iterrev = rev
1275 e = index[iterrev]
1289 e = index[iterrev]
1276 clen = 0
1290 clen = 0
1277 compresseddeltalen = 0
1291 compresseddeltalen = 0
1278 while iterrev != e[3]:
1292 while iterrev != e[3]:
1279 clen += 1
1293 clen += 1
1280 compresseddeltalen += e[1]
1294 compresseddeltalen += e[1]
1281 if generaldelta:
1295 if generaldelta:
1282 iterrev = e[3]
1296 iterrev = e[3]
1283 else:
1297 else:
1284 iterrev -= 1
1298 iterrev -= 1
1285 if iterrev in chaininfocache:
1299 if iterrev in chaininfocache:
1286 t = chaininfocache[iterrev]
1300 t = chaininfocache[iterrev]
1287 clen += t[0]
1301 clen += t[0]
1288 compresseddeltalen += t[1]
1302 compresseddeltalen += t[1]
1289 break
1303 break
1290 e = index[iterrev]
1304 e = index[iterrev]
1291 else:
1305 else:
1292 # Add text length of base since decompressing that also takes
1306 # Add text length of base since decompressing that also takes
1293 # work. For cache hits the length is already included.
1307 # work. For cache hits the length is already included.
1294 compresseddeltalen += e[1]
1308 compresseddeltalen += e[1]
1295 r = (clen, compresseddeltalen)
1309 r = (clen, compresseddeltalen)
1296 chaininfocache[rev] = r
1310 chaininfocache[rev] = r
1297 return r
1311 return r
1298
1312
1299 def _deltachain(self, rev, stoprev=None):
1313 def _deltachain(self, rev, stoprev=None):
1300 """Obtain the delta chain for a revision.
1314 """Obtain the delta chain for a revision.
1301
1315
1302 ``stoprev`` specifies a revision to stop at. If not specified, we
1316 ``stoprev`` specifies a revision to stop at. If not specified, we
1303 stop at the base of the chain.
1317 stop at the base of the chain.
1304
1318
1305 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1319 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1306 revs in ascending order and ``stopped`` is a bool indicating whether
1320 revs in ascending order and ``stopped`` is a bool indicating whether
1307 ``stoprev`` was hit.
1321 ``stoprev`` was hit.
1308 """
1322 """
1309 # Try C implementation.
1323 # Try C implementation.
1310 try:
1324 try:
1311 return self.index.deltachain(rev, stoprev, self._generaldelta)
1325 return self.index.deltachain(rev, stoprev, self._generaldelta)
1312 except AttributeError:
1326 except AttributeError:
1313 pass
1327 pass
1314
1328
1315 chain = []
1329 chain = []
1316
1330
1317 # Alias to prevent attribute lookup in tight loop.
1331 # Alias to prevent attribute lookup in tight loop.
1318 index = self.index
1332 index = self.index
1319 generaldelta = self._generaldelta
1333 generaldelta = self._generaldelta
1320
1334
1321 iterrev = rev
1335 iterrev = rev
1322 e = index[iterrev]
1336 e = index[iterrev]
1323 while iterrev != e[3] and iterrev != stoprev:
1337 while iterrev != e[3] and iterrev != stoprev:
1324 chain.append(iterrev)
1338 chain.append(iterrev)
1325 if generaldelta:
1339 if generaldelta:
1326 iterrev = e[3]
1340 iterrev = e[3]
1327 else:
1341 else:
1328 iterrev -= 1
1342 iterrev -= 1
1329 e = index[iterrev]
1343 e = index[iterrev]
1330
1344
1331 if iterrev == stoprev:
1345 if iterrev == stoprev:
1332 stopped = True
1346 stopped = True
1333 else:
1347 else:
1334 chain.append(iterrev)
1348 chain.append(iterrev)
1335 stopped = False
1349 stopped = False
1336
1350
1337 chain.reverse()
1351 chain.reverse()
1338 return chain, stopped
1352 return chain, stopped
1339
1353
1340 def ancestors(self, revs, stoprev=0, inclusive=False):
1354 def ancestors(self, revs, stoprev=0, inclusive=False):
1341 """Generate the ancestors of 'revs' in reverse topological order.
1355 """Generate the ancestors of 'revs' in reverse topological order.
1342 Does not generate revs lower than stoprev.
1356 Does not generate revs lower than stoprev.
1343
1357
1344 See the documentation for ancestor.lazyancestors for more details."""
1358 See the documentation for ancestor.lazyancestors for more details."""
1345
1359
1346 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1360 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1347 inclusive=inclusive)
1361 inclusive=inclusive)
1348
1362
1349 def descendants(self, revs):
1363 def descendants(self, revs):
1350 """Generate the descendants of 'revs' in revision order.
1364 """Generate the descendants of 'revs' in revision order.
1351
1365
1352 Yield a sequence of revision numbers starting with a child of
1366 Yield a sequence of revision numbers starting with a child of
1353 some rev in revs, i.e., each revision is *not* considered a
1367 some rev in revs, i.e., each revision is *not* considered a
1354 descendant of itself. Results are ordered by revision number (a
1368 descendant of itself. Results are ordered by revision number (a
1355 topological sort)."""
1369 topological sort)."""
1356 first = min(revs)
1370 first = min(revs)
1357 if first == nullrev:
1371 if first == nullrev:
1358 for i in self:
1372 for i in self:
1359 yield i
1373 yield i
1360 return
1374 return
1361
1375
1362 seen = set(revs)
1376 seen = set(revs)
1363 for i in self.revs(start=first + 1):
1377 for i in self.revs(start=first + 1):
1364 for x in self.parentrevs(i):
1378 for x in self.parentrevs(i):
1365 if x != nullrev and x in seen:
1379 if x != nullrev and x in seen:
1366 seen.add(i)
1380 seen.add(i)
1367 yield i
1381 yield i
1368 break
1382 break
1369
1383
1370 def findcommonmissing(self, common=None, heads=None):
1384 def findcommonmissing(self, common=None, heads=None):
1371 """Return a tuple of the ancestors of common and the ancestors of heads
1385 """Return a tuple of the ancestors of common and the ancestors of heads
1372 that are not ancestors of common. In revset terminology, we return the
1386 that are not ancestors of common. In revset terminology, we return the
1373 tuple:
1387 tuple:
1374
1388
1375 ::common, (::heads) - (::common)
1389 ::common, (::heads) - (::common)
1376
1390
1377 The list is sorted by revision number, meaning it is
1391 The list is sorted by revision number, meaning it is
1378 topologically sorted.
1392 topologically sorted.
1379
1393
1380 'heads' and 'common' are both lists of node IDs. If heads is
1394 'heads' and 'common' are both lists of node IDs. If heads is
1381 not supplied, uses all of the revlog's heads. If common is not
1395 not supplied, uses all of the revlog's heads. If common is not
1382 supplied, uses nullid."""
1396 supplied, uses nullid."""
1383 if common is None:
1397 if common is None:
1384 common = [nullid]
1398 common = [nullid]
1385 if heads is None:
1399 if heads is None:
1386 heads = self.heads()
1400 heads = self.heads()
1387
1401
1388 common = [self.rev(n) for n in common]
1402 common = [self.rev(n) for n in common]
1389 heads = [self.rev(n) for n in heads]
1403 heads = [self.rev(n) for n in heads]
1390
1404
1391 # we want the ancestors, but inclusive
1405 # we want the ancestors, but inclusive
1392 class lazyset(object):
1406 class lazyset(object):
1393 def __init__(self, lazyvalues):
1407 def __init__(self, lazyvalues):
1394 self.addedvalues = set()
1408 self.addedvalues = set()
1395 self.lazyvalues = lazyvalues
1409 self.lazyvalues = lazyvalues
1396
1410
1397 def __contains__(self, value):
1411 def __contains__(self, value):
1398 return value in self.addedvalues or value in self.lazyvalues
1412 return value in self.addedvalues or value in self.lazyvalues
1399
1413
1400 def __iter__(self):
1414 def __iter__(self):
1401 added = self.addedvalues
1415 added = self.addedvalues
1402 for r in added:
1416 for r in added:
1403 yield r
1417 yield r
1404 for r in self.lazyvalues:
1418 for r in self.lazyvalues:
1405 if not r in added:
1419 if not r in added:
1406 yield r
1420 yield r
1407
1421
1408 def add(self, value):
1422 def add(self, value):
1409 self.addedvalues.add(value)
1423 self.addedvalues.add(value)
1410
1424
1411 def update(self, values):
1425 def update(self, values):
1412 self.addedvalues.update(values)
1426 self.addedvalues.update(values)
1413
1427
1414 has = lazyset(self.ancestors(common))
1428 has = lazyset(self.ancestors(common))
1415 has.add(nullrev)
1429 has.add(nullrev)
1416 has.update(common)
1430 has.update(common)
1417
1431
1418 # take all ancestors from heads that aren't in has
1432 # take all ancestors from heads that aren't in has
1419 missing = set()
1433 missing = set()
1420 visit = collections.deque(r for r in heads if r not in has)
1434 visit = collections.deque(r for r in heads if r not in has)
1421 while visit:
1435 while visit:
1422 r = visit.popleft()
1436 r = visit.popleft()
1423 if r in missing:
1437 if r in missing:
1424 continue
1438 continue
1425 else:
1439 else:
1426 missing.add(r)
1440 missing.add(r)
1427 for p in self.parentrevs(r):
1441 for p in self.parentrevs(r):
1428 if p not in has:
1442 if p not in has:
1429 visit.append(p)
1443 visit.append(p)
1430 missing = list(missing)
1444 missing = list(missing)
1431 missing.sort()
1445 missing.sort()
1432 return has, [self.node(miss) for miss in missing]
1446 return has, [self.node(miss) for miss in missing]
1433
1447
1434 def incrementalmissingrevs(self, common=None):
1448 def incrementalmissingrevs(self, common=None):
1435 """Return an object that can be used to incrementally compute the
1449 """Return an object that can be used to incrementally compute the
1436 revision numbers of the ancestors of arbitrary sets that are not
1450 revision numbers of the ancestors of arbitrary sets that are not
1437 ancestors of common. This is an ancestor.incrementalmissingancestors
1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1438 object.
1452 object.
1439
1453
1440 'common' is a list of revision numbers. If common is not supplied, uses
1454 'common' is a list of revision numbers. If common is not supplied, uses
1441 nullrev.
1455 nullrev.
1442 """
1456 """
1443 if common is None:
1457 if common is None:
1444 common = [nullrev]
1458 common = [nullrev]
1445
1459
1446 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1447
1461
1448 def findmissingrevs(self, common=None, heads=None):
1462 def findmissingrevs(self, common=None, heads=None):
1449 """Return the revision numbers of the ancestors of heads that
1463 """Return the revision numbers of the ancestors of heads that
1450 are not ancestors of common.
1464 are not ancestors of common.
1451
1465
1452 More specifically, return a list of revision numbers corresponding to
1466 More specifically, return a list of revision numbers corresponding to
1453 nodes N such that every N satisfies the following constraints:
1467 nodes N such that every N satisfies the following constraints:
1454
1468
1455 1. N is an ancestor of some node in 'heads'
1469 1. N is an ancestor of some node in 'heads'
1456 2. N is not an ancestor of any node in 'common'
1470 2. N is not an ancestor of any node in 'common'
1457
1471
1458 The list is sorted by revision number, meaning it is
1472 The list is sorted by revision number, meaning it is
1459 topologically sorted.
1473 topologically sorted.
1460
1474
1461 'heads' and 'common' are both lists of revision numbers. If heads is
1475 'heads' and 'common' are both lists of revision numbers. If heads is
1462 not supplied, uses all of the revlog's heads. If common is not
1476 not supplied, uses all of the revlog's heads. If common is not
1463 supplied, uses nullid."""
1477 supplied, uses nullid."""
1464 if common is None:
1478 if common is None:
1465 common = [nullrev]
1479 common = [nullrev]
1466 if heads is None:
1480 if heads is None:
1467 heads = self.headrevs()
1481 heads = self.headrevs()
1468
1482
1469 inc = self.incrementalmissingrevs(common=common)
1483 inc = self.incrementalmissingrevs(common=common)
1470 return inc.missingancestors(heads)
1484 return inc.missingancestors(heads)
1471
1485
1472 def findmissing(self, common=None, heads=None):
1486 def findmissing(self, common=None, heads=None):
1473 """Return the ancestors of heads that are not ancestors of common.
1487 """Return the ancestors of heads that are not ancestors of common.
1474
1488
1475 More specifically, return a list of nodes N such that every N
1489 More specifically, return a list of nodes N such that every N
1476 satisfies the following constraints:
1490 satisfies the following constraints:
1477
1491
1478 1. N is an ancestor of some node in 'heads'
1492 1. N is an ancestor of some node in 'heads'
1479 2. N is not an ancestor of any node in 'common'
1493 2. N is not an ancestor of any node in 'common'
1480
1494
1481 The list is sorted by revision number, meaning it is
1495 The list is sorted by revision number, meaning it is
1482 topologically sorted.
1496 topologically sorted.
1483
1497
1484 'heads' and 'common' are both lists of node IDs. If heads is
1498 'heads' and 'common' are both lists of node IDs. If heads is
1485 not supplied, uses all of the revlog's heads. If common is not
1499 not supplied, uses all of the revlog's heads. If common is not
1486 supplied, uses nullid."""
1500 supplied, uses nullid."""
1487 if common is None:
1501 if common is None:
1488 common = [nullid]
1502 common = [nullid]
1489 if heads is None:
1503 if heads is None:
1490 heads = self.heads()
1504 heads = self.heads()
1491
1505
1492 common = [self.rev(n) for n in common]
1506 common = [self.rev(n) for n in common]
1493 heads = [self.rev(n) for n in heads]
1507 heads = [self.rev(n) for n in heads]
1494
1508
1495 inc = self.incrementalmissingrevs(common=common)
1509 inc = self.incrementalmissingrevs(common=common)
1496 return [self.node(r) for r in inc.missingancestors(heads)]
1510 return [self.node(r) for r in inc.missingancestors(heads)]
1497
1511
1498 def nodesbetween(self, roots=None, heads=None):
1512 def nodesbetween(self, roots=None, heads=None):
1499 """Return a topological path from 'roots' to 'heads'.
1513 """Return a topological path from 'roots' to 'heads'.
1500
1514
1501 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1502 topologically sorted list of all nodes N that satisfy both of
1516 topologically sorted list of all nodes N that satisfy both of
1503 these constraints:
1517 these constraints:
1504
1518
1505 1. N is a descendant of some node in 'roots'
1519 1. N is a descendant of some node in 'roots'
1506 2. N is an ancestor of some node in 'heads'
1520 2. N is an ancestor of some node in 'heads'
1507
1521
1508 Every node is considered to be both a descendant and an ancestor
1522 Every node is considered to be both a descendant and an ancestor
1509 of itself, so every reachable node in 'roots' and 'heads' will be
1523 of itself, so every reachable node in 'roots' and 'heads' will be
1510 included in 'nodes'.
1524 included in 'nodes'.
1511
1525
1512 'outroots' is the list of reachable nodes in 'roots', i.e., the
1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1513 subset of 'roots' that is returned in 'nodes'. Likewise,
1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1514 'outheads' is the subset of 'heads' that is also in 'nodes'.
1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1515
1529
1516 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1517 unspecified, uses nullid as the only root. If 'heads' is
1531 unspecified, uses nullid as the only root. If 'heads' is
1518 unspecified, uses list of all of the revlog's heads."""
1532 unspecified, uses list of all of the revlog's heads."""
1519 nonodes = ([], [], [])
1533 nonodes = ([], [], [])
1520 if roots is not None:
1534 if roots is not None:
1521 roots = list(roots)
1535 roots = list(roots)
1522 if not roots:
1536 if not roots:
1523 return nonodes
1537 return nonodes
1524 lowestrev = min([self.rev(n) for n in roots])
1538 lowestrev = min([self.rev(n) for n in roots])
1525 else:
1539 else:
1526 roots = [nullid] # Everybody's a descendant of nullid
1540 roots = [nullid] # Everybody's a descendant of nullid
1527 lowestrev = nullrev
1541 lowestrev = nullrev
1528 if (lowestrev == nullrev) and (heads is None):
1542 if (lowestrev == nullrev) and (heads is None):
1529 # We want _all_ the nodes!
1543 # We want _all_ the nodes!
1530 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1544 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1531 if heads is None:
1545 if heads is None:
1532 # All nodes are ancestors, so the latest ancestor is the last
1546 # All nodes are ancestors, so the latest ancestor is the last
1533 # node.
1547 # node.
1534 highestrev = len(self) - 1
1548 highestrev = len(self) - 1
1535 # Set ancestors to None to signal that every node is an ancestor.
1549 # Set ancestors to None to signal that every node is an ancestor.
1536 ancestors = None
1550 ancestors = None
1537 # Set heads to an empty dictionary for later discovery of heads
1551 # Set heads to an empty dictionary for later discovery of heads
1538 heads = {}
1552 heads = {}
1539 else:
1553 else:
1540 heads = list(heads)
1554 heads = list(heads)
1541 if not heads:
1555 if not heads:
1542 return nonodes
1556 return nonodes
1543 ancestors = set()
1557 ancestors = set()
1544 # Turn heads into a dictionary so we can remove 'fake' heads.
1558 # Turn heads into a dictionary so we can remove 'fake' heads.
1545 # Also, later we will be using it to filter out the heads we can't
1559 # Also, later we will be using it to filter out the heads we can't
1546 # find from roots.
1560 # find from roots.
1547 heads = dict.fromkeys(heads, False)
1561 heads = dict.fromkeys(heads, False)
1548 # Start at the top and keep marking parents until we're done.
1562 # Start at the top and keep marking parents until we're done.
1549 nodestotag = set(heads)
1563 nodestotag = set(heads)
1550 # Remember where the top was so we can use it as a limit later.
1564 # Remember where the top was so we can use it as a limit later.
1551 highestrev = max([self.rev(n) for n in nodestotag])
1565 highestrev = max([self.rev(n) for n in nodestotag])
1552 while nodestotag:
1566 while nodestotag:
1553 # grab a node to tag
1567 # grab a node to tag
1554 n = nodestotag.pop()
1568 n = nodestotag.pop()
1555 # Never tag nullid
1569 # Never tag nullid
1556 if n == nullid:
1570 if n == nullid:
1557 continue
1571 continue
1558 # A node's revision number represents its place in a
1572 # A node's revision number represents its place in a
1559 # topologically sorted list of nodes.
1573 # topologically sorted list of nodes.
1560 r = self.rev(n)
1574 r = self.rev(n)
1561 if r >= lowestrev:
1575 if r >= lowestrev:
1562 if n not in ancestors:
1576 if n not in ancestors:
1563 # If we are possibly a descendant of one of the roots
1577 # If we are possibly a descendant of one of the roots
1564 # and we haven't already been marked as an ancestor
1578 # and we haven't already been marked as an ancestor
1565 ancestors.add(n) # Mark as ancestor
1579 ancestors.add(n) # Mark as ancestor
1566 # Add non-nullid parents to list of nodes to tag.
1580 # Add non-nullid parents to list of nodes to tag.
1567 nodestotag.update([p for p in self.parents(n) if
1581 nodestotag.update([p for p in self.parents(n) if
1568 p != nullid])
1582 p != nullid])
1569 elif n in heads: # We've seen it before, is it a fake head?
1583 elif n in heads: # We've seen it before, is it a fake head?
1570 # So it is, real heads should not be the ancestors of
1584 # So it is, real heads should not be the ancestors of
1571 # any other heads.
1585 # any other heads.
1572 heads.pop(n)
1586 heads.pop(n)
1573 if not ancestors:
1587 if not ancestors:
1574 return nonodes
1588 return nonodes
1575 # Now that we have our set of ancestors, we want to remove any
1589 # Now that we have our set of ancestors, we want to remove any
1576 # roots that are not ancestors.
1590 # roots that are not ancestors.
1577
1591
1578 # If one of the roots was nullid, everything is included anyway.
1592 # If one of the roots was nullid, everything is included anyway.
1579 if lowestrev > nullrev:
1593 if lowestrev > nullrev:
1580 # But, since we weren't, let's recompute the lowest rev to not
1594 # But, since we weren't, let's recompute the lowest rev to not
1581 # include roots that aren't ancestors.
1595 # include roots that aren't ancestors.
1582
1596
1583 # Filter out roots that aren't ancestors of heads
1597 # Filter out roots that aren't ancestors of heads
1584 roots = [root for root in roots if root in ancestors]
1598 roots = [root for root in roots if root in ancestors]
1585 # Recompute the lowest revision
1599 # Recompute the lowest revision
1586 if roots:
1600 if roots:
1587 lowestrev = min([self.rev(root) for root in roots])
1601 lowestrev = min([self.rev(root) for root in roots])
1588 else:
1602 else:
1589 # No more roots? Return empty list
1603 # No more roots? Return empty list
1590 return nonodes
1604 return nonodes
1591 else:
1605 else:
1592 # We are descending from nullid, and don't need to care about
1606 # We are descending from nullid, and don't need to care about
1593 # any other roots.
1607 # any other roots.
1594 lowestrev = nullrev
1608 lowestrev = nullrev
1595 roots = [nullid]
1609 roots = [nullid]
1596 # Transform our roots list into a set.
1610 # Transform our roots list into a set.
1597 descendants = set(roots)
1611 descendants = set(roots)
1598 # Also, keep the original roots so we can filter out roots that aren't
1612 # Also, keep the original roots so we can filter out roots that aren't
1599 # 'real' roots (i.e. are descended from other roots).
1613 # 'real' roots (i.e. are descended from other roots).
1600 roots = descendants.copy()
1614 roots = descendants.copy()
1601 # Our topologically sorted list of output nodes.
1615 # Our topologically sorted list of output nodes.
1602 orderedout = []
1616 orderedout = []
1603 # Don't start at nullid since we don't want nullid in our output list,
1617 # Don't start at nullid since we don't want nullid in our output list,
1604 # and if nullid shows up in descendants, empty parents will look like
1618 # and if nullid shows up in descendants, empty parents will look like
1605 # they're descendants.
1619 # they're descendants.
1606 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1620 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1607 n = self.node(r)
1621 n = self.node(r)
1608 isdescendant = False
1622 isdescendant = False
1609 if lowestrev == nullrev: # Everybody is a descendant of nullid
1623 if lowestrev == nullrev: # Everybody is a descendant of nullid
1610 isdescendant = True
1624 isdescendant = True
1611 elif n in descendants:
1625 elif n in descendants:
1612 # n is already a descendant
1626 # n is already a descendant
1613 isdescendant = True
1627 isdescendant = True
1614 # This check only needs to be done here because all the roots
1628 # This check only needs to be done here because all the roots
1615 # will start being marked is descendants before the loop.
1629 # will start being marked is descendants before the loop.
1616 if n in roots:
1630 if n in roots:
1617 # If n was a root, check if it's a 'real' root.
1631 # If n was a root, check if it's a 'real' root.
1618 p = tuple(self.parents(n))
1632 p = tuple(self.parents(n))
1619 # If any of its parents are descendants, it's not a root.
1633 # If any of its parents are descendants, it's not a root.
1620 if (p[0] in descendants) or (p[1] in descendants):
1634 if (p[0] in descendants) or (p[1] in descendants):
1621 roots.remove(n)
1635 roots.remove(n)
1622 else:
1636 else:
1623 p = tuple(self.parents(n))
1637 p = tuple(self.parents(n))
1624 # A node is a descendant if either of its parents are
1638 # A node is a descendant if either of its parents are
1625 # descendants. (We seeded the dependents list with the roots
1639 # descendants. (We seeded the dependents list with the roots
1626 # up there, remember?)
1640 # up there, remember?)
1627 if (p[0] in descendants) or (p[1] in descendants):
1641 if (p[0] in descendants) or (p[1] in descendants):
1628 descendants.add(n)
1642 descendants.add(n)
1629 isdescendant = True
1643 isdescendant = True
1630 if isdescendant and ((ancestors is None) or (n in ancestors)):
1644 if isdescendant and ((ancestors is None) or (n in ancestors)):
1631 # Only include nodes that are both descendants and ancestors.
1645 # Only include nodes that are both descendants and ancestors.
1632 orderedout.append(n)
1646 orderedout.append(n)
1633 if (ancestors is not None) and (n in heads):
1647 if (ancestors is not None) and (n in heads):
1634 # We're trying to figure out which heads are reachable
1648 # We're trying to figure out which heads are reachable
1635 # from roots.
1649 # from roots.
1636 # Mark this head as having been reached
1650 # Mark this head as having been reached
1637 heads[n] = True
1651 heads[n] = True
1638 elif ancestors is None:
1652 elif ancestors is None:
1639 # Otherwise, we're trying to discover the heads.
1653 # Otherwise, we're trying to discover the heads.
1640 # Assume this is a head because if it isn't, the next step
1654 # Assume this is a head because if it isn't, the next step
1641 # will eventually remove it.
1655 # will eventually remove it.
1642 heads[n] = True
1656 heads[n] = True
1643 # But, obviously its parents aren't.
1657 # But, obviously its parents aren't.
1644 for p in self.parents(n):
1658 for p in self.parents(n):
1645 heads.pop(p, None)
1659 heads.pop(p, None)
1646 heads = [head for head, flag in heads.iteritems() if flag]
1660 heads = [head for head, flag in heads.iteritems() if flag]
1647 roots = list(roots)
1661 roots = list(roots)
1648 assert orderedout
1662 assert orderedout
1649 assert roots
1663 assert roots
1650 assert heads
1664 assert heads
1651 return (orderedout, roots, heads)
1665 return (orderedout, roots, heads)
1652
1666
1653 def headrevs(self):
1667 def headrevs(self):
1654 try:
1668 try:
1655 return self.index.headrevs()
1669 return self.index.headrevs()
1656 except AttributeError:
1670 except AttributeError:
1657 return self._headrevs()
1671 return self._headrevs()
1658
1672
1659 def computephases(self, roots):
1673 def computephases(self, roots):
1660 return self.index.computephasesmapsets(roots)
1674 return self.index.computephasesmapsets(roots)
1661
1675
1662 def _headrevs(self):
1676 def _headrevs(self):
1663 count = len(self)
1677 count = len(self)
1664 if not count:
1678 if not count:
1665 return [nullrev]
1679 return [nullrev]
1666 # we won't iter over filtered rev so nobody is a head at start
1680 # we won't iter over filtered rev so nobody is a head at start
1667 ishead = [0] * (count + 1)
1681 ishead = [0] * (count + 1)
1668 index = self.index
1682 index = self.index
1669 for r in self:
1683 for r in self:
1670 ishead[r] = 1 # I may be an head
1684 ishead[r] = 1 # I may be an head
1671 e = index[r]
1685 e = index[r]
1672 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1686 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1673 return [r for r, val in enumerate(ishead) if val]
1687 return [r for r, val in enumerate(ishead) if val]
1674
1688
1675 def heads(self, start=None, stop=None):
1689 def heads(self, start=None, stop=None):
1676 """return the list of all nodes that have no children
1690 """return the list of all nodes that have no children
1677
1691
1678 if start is specified, only heads that are descendants of
1692 if start is specified, only heads that are descendants of
1679 start will be returned
1693 start will be returned
1680 if stop is specified, it will consider all the revs from stop
1694 if stop is specified, it will consider all the revs from stop
1681 as if they had no children
1695 as if they had no children
1682 """
1696 """
1683 if start is None and stop is None:
1697 if start is None and stop is None:
1684 if not len(self):
1698 if not len(self):
1685 return [nullid]
1699 return [nullid]
1686 return [self.node(r) for r in self.headrevs()]
1700 return [self.node(r) for r in self.headrevs()]
1687
1701
1688 if start is None:
1702 if start is None:
1689 start = nullid
1703 start = nullid
1690 if stop is None:
1704 if stop is None:
1691 stop = []
1705 stop = []
1692 stoprevs = set([self.rev(n) for n in stop])
1706 stoprevs = set([self.rev(n) for n in stop])
1693 startrev = self.rev(start)
1707 startrev = self.rev(start)
1694 reachable = {startrev}
1708 reachable = {startrev}
1695 heads = {startrev}
1709 heads = {startrev}
1696
1710
1697 parentrevs = self.parentrevs
1711 parentrevs = self.parentrevs
1698 for r in self.revs(start=startrev + 1):
1712 for r in self.revs(start=startrev + 1):
1699 for p in parentrevs(r):
1713 for p in parentrevs(r):
1700 if p in reachable:
1714 if p in reachable:
1701 if r not in stoprevs:
1715 if r not in stoprevs:
1702 reachable.add(r)
1716 reachable.add(r)
1703 heads.add(r)
1717 heads.add(r)
1704 if p in heads and p not in stoprevs:
1718 if p in heads and p not in stoprevs:
1705 heads.remove(p)
1719 heads.remove(p)
1706
1720
1707 return [self.node(r) for r in heads]
1721 return [self.node(r) for r in heads]
1708
1722
1709 def children(self, node):
1723 def children(self, node):
1710 """find the children of a given node"""
1724 """find the children of a given node"""
1711 c = []
1725 c = []
1712 p = self.rev(node)
1726 p = self.rev(node)
1713 for r in self.revs(start=p + 1):
1727 for r in self.revs(start=p + 1):
1714 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1728 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1715 if prevs:
1729 if prevs:
1716 for pr in prevs:
1730 for pr in prevs:
1717 if pr == p:
1731 if pr == p:
1718 c.append(self.node(r))
1732 c.append(self.node(r))
1719 elif p == nullrev:
1733 elif p == nullrev:
1720 c.append(self.node(r))
1734 c.append(self.node(r))
1721 return c
1735 return c
1722
1736
1723 def commonancestorsheads(self, a, b):
1737 def commonancestorsheads(self, a, b):
1724 """calculate all the heads of the common ancestors of nodes a and b"""
1738 """calculate all the heads of the common ancestors of nodes a and b"""
1725 a, b = self.rev(a), self.rev(b)
1739 a, b = self.rev(a), self.rev(b)
1726 ancs = self._commonancestorsheads(a, b)
1740 ancs = self._commonancestorsheads(a, b)
1727 return pycompat.maplist(self.node, ancs)
1741 return pycompat.maplist(self.node, ancs)
1728
1742
1729 def _commonancestorsheads(self, *revs):
1743 def _commonancestorsheads(self, *revs):
1730 """calculate all the heads of the common ancestors of revs"""
1744 """calculate all the heads of the common ancestors of revs"""
1731 try:
1745 try:
1732 ancs = self.index.commonancestorsheads(*revs)
1746 ancs = self.index.commonancestorsheads(*revs)
1733 except (AttributeError, OverflowError): # C implementation failed
1747 except (AttributeError, OverflowError): # C implementation failed
1734 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1748 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1735 return ancs
1749 return ancs
1736
1750
1737 def isancestor(self, a, b):
1751 def isancestor(self, a, b):
1738 """return True if node a is an ancestor of node b
1752 """return True if node a is an ancestor of node b
1739
1753
1740 A revision is considered an ancestor of itself."""
1754 A revision is considered an ancestor of itself."""
1741 a, b = self.rev(a), self.rev(b)
1755 a, b = self.rev(a), self.rev(b)
1742 return self.isancestorrev(a, b)
1756 return self.isancestorrev(a, b)
1743
1757
1744 def isancestorrev(self, a, b):
1758 def isancestorrev(self, a, b):
1745 """return True if revision a is an ancestor of revision b
1759 """return True if revision a is an ancestor of revision b
1746
1760
1747 A revision is considered an ancestor of itself.
1761 A revision is considered an ancestor of itself.
1748
1762
1749 The implementation of this is trivial but the use of
1763 The implementation of this is trivial but the use of
1750 commonancestorsheads is not."""
1764 commonancestorsheads is not."""
1751 if a == nullrev:
1765 if a == nullrev:
1752 return True
1766 return True
1753 elif a == b:
1767 elif a == b:
1754 return True
1768 return True
1755 elif a > b:
1769 elif a > b:
1756 return False
1770 return False
1757 return a in self._commonancestorsheads(a, b)
1771 return a in self._commonancestorsheads(a, b)
1758
1772
1759 def ancestor(self, a, b):
1773 def ancestor(self, a, b):
1760 """calculate the "best" common ancestor of nodes a and b"""
1774 """calculate the "best" common ancestor of nodes a and b"""
1761
1775
1762 a, b = self.rev(a), self.rev(b)
1776 a, b = self.rev(a), self.rev(b)
1763 try:
1777 try:
1764 ancs = self.index.ancestors(a, b)
1778 ancs = self.index.ancestors(a, b)
1765 except (AttributeError, OverflowError):
1779 except (AttributeError, OverflowError):
1766 ancs = ancestor.ancestors(self.parentrevs, a, b)
1780 ancs = ancestor.ancestors(self.parentrevs, a, b)
1767 if ancs:
1781 if ancs:
1768 # choose a consistent winner when there's a tie
1782 # choose a consistent winner when there's a tie
1769 return min(map(self.node, ancs))
1783 return min(map(self.node, ancs))
1770 return nullid
1784 return nullid
1771
1785
1772 def _match(self, id):
1786 def _match(self, id):
1773 if isinstance(id, int):
1787 if isinstance(id, int):
1774 # rev
1788 # rev
1775 return self.node(id)
1789 return self.node(id)
1776 if len(id) == 20:
1790 if len(id) == 20:
1777 # possibly a binary node
1791 # possibly a binary node
1778 # odds of a binary node being all hex in ASCII are 1 in 10**25
1792 # odds of a binary node being all hex in ASCII are 1 in 10**25
1779 try:
1793 try:
1780 node = id
1794 node = id
1781 self.rev(node) # quick search the index
1795 self.rev(node) # quick search the index
1782 return node
1796 return node
1783 except LookupError:
1797 except LookupError:
1784 pass # may be partial hex id
1798 pass # may be partial hex id
1785 try:
1799 try:
1786 # str(rev)
1800 # str(rev)
1787 rev = int(id)
1801 rev = int(id)
1788 if "%d" % rev != id:
1802 if "%d" % rev != id:
1789 raise ValueError
1803 raise ValueError
1790 if rev < 0:
1804 if rev < 0:
1791 rev = len(self) + rev
1805 rev = len(self) + rev
1792 if rev < 0 or rev >= len(self):
1806 if rev < 0 or rev >= len(self):
1793 raise ValueError
1807 raise ValueError
1794 return self.node(rev)
1808 return self.node(rev)
1795 except (ValueError, OverflowError):
1809 except (ValueError, OverflowError):
1796 pass
1810 pass
1797 if len(id) == 40:
1811 if len(id) == 40:
1798 try:
1812 try:
1799 # a full hex nodeid?
1813 # a full hex nodeid?
1800 node = bin(id)
1814 node = bin(id)
1801 self.rev(node)
1815 self.rev(node)
1802 return node
1816 return node
1803 except (TypeError, LookupError):
1817 except (TypeError, LookupError):
1804 pass
1818 pass
1805
1819
1806 def _partialmatch(self, id):
1820 def _partialmatch(self, id):
1807 # we don't care wdirfilenodeids as they should be always full hash
1821 # we don't care wdirfilenodeids as they should be always full hash
1808 maybewdir = wdirhex.startswith(id)
1822 maybewdir = wdirhex.startswith(id)
1809 try:
1823 try:
1810 partial = self.index.partialmatch(id)
1824 partial = self.index.partialmatch(id)
1811 if partial and self.hasnode(partial):
1825 if partial and self.hasnode(partial):
1812 if maybewdir:
1826 if maybewdir:
1813 # single 'ff...' match in radix tree, ambiguous with wdir
1827 # single 'ff...' match in radix tree, ambiguous with wdir
1814 raise RevlogError
1828 raise RevlogError
1815 return partial
1829 return partial
1816 if maybewdir:
1830 if maybewdir:
1817 # no 'ff...' match in radix tree, wdir identified
1831 # no 'ff...' match in radix tree, wdir identified
1818 raise error.WdirUnsupported
1832 raise error.WdirUnsupported
1819 return None
1833 return None
1820 except RevlogError:
1834 except RevlogError:
1821 # parsers.c radix tree lookup gave multiple matches
1835 # parsers.c radix tree lookup gave multiple matches
1822 # fast path: for unfiltered changelog, radix tree is accurate
1836 # fast path: for unfiltered changelog, radix tree is accurate
1823 if not getattr(self, 'filteredrevs', None):
1837 if not getattr(self, 'filteredrevs', None):
1824 raise AmbiguousPrefixLookupError(id, self.indexfile,
1838 raise AmbiguousPrefixLookupError(id, self.indexfile,
1825 _('ambiguous identifier'))
1839 _('ambiguous identifier'))
1826 # fall through to slow path that filters hidden revisions
1840 # fall through to slow path that filters hidden revisions
1827 except (AttributeError, ValueError):
1841 except (AttributeError, ValueError):
1828 # we are pure python, or key was too short to search radix tree
1842 # we are pure python, or key was too short to search radix tree
1829 pass
1843 pass
1830
1844
1831 if id in self._pcache:
1845 if id in self._pcache:
1832 return self._pcache[id]
1846 return self._pcache[id]
1833
1847
1834 if len(id) <= 40:
1848 if len(id) <= 40:
1835 try:
1849 try:
1836 # hex(node)[:...]
1850 # hex(node)[:...]
1837 l = len(id) // 2 # grab an even number of digits
1851 l = len(id) // 2 # grab an even number of digits
1838 prefix = bin(id[:l * 2])
1852 prefix = bin(id[:l * 2])
1839 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1853 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1840 nl = [n for n in nl if hex(n).startswith(id) and
1854 nl = [n for n in nl if hex(n).startswith(id) and
1841 self.hasnode(n)]
1855 self.hasnode(n)]
1842 if len(nl) > 0:
1856 if len(nl) > 0:
1843 if len(nl) == 1 and not maybewdir:
1857 if len(nl) == 1 and not maybewdir:
1844 self._pcache[id] = nl[0]
1858 self._pcache[id] = nl[0]
1845 return nl[0]
1859 return nl[0]
1846 raise AmbiguousPrefixLookupError(id, self.indexfile,
1860 raise AmbiguousPrefixLookupError(id, self.indexfile,
1847 _('ambiguous identifier'))
1861 _('ambiguous identifier'))
1848 if maybewdir:
1862 if maybewdir:
1849 raise error.WdirUnsupported
1863 raise error.WdirUnsupported
1850 return None
1864 return None
1851 except TypeError:
1865 except TypeError:
1852 pass
1866 pass
1853
1867
1854 def lookup(self, id):
1868 def lookup(self, id):
1855 """locate a node based on:
1869 """locate a node based on:
1856 - revision number or str(revision number)
1870 - revision number or str(revision number)
1857 - nodeid or subset of hex nodeid
1871 - nodeid or subset of hex nodeid
1858 """
1872 """
1859 n = self._match(id)
1873 n = self._match(id)
1860 if n is not None:
1874 if n is not None:
1861 return n
1875 return n
1862 n = self._partialmatch(id)
1876 n = self._partialmatch(id)
1863 if n:
1877 if n:
1864 return n
1878 return n
1865
1879
1866 raise LookupError(id, self.indexfile, _('no match found'))
1880 raise LookupError(id, self.indexfile, _('no match found'))
1867
1881
1868 def shortest(self, node, minlength=1):
1882 def shortest(self, node, minlength=1):
1869 """Find the shortest unambiguous prefix that matches node."""
1883 """Find the shortest unambiguous prefix that matches node."""
1870 def isvalid(prefix):
1884 def isvalid(prefix):
1871 try:
1885 try:
1872 node = self._partialmatch(prefix)
1886 node = self._partialmatch(prefix)
1873 except error.RevlogError:
1887 except error.RevlogError:
1874 return False
1888 return False
1875 except error.WdirUnsupported:
1889 except error.WdirUnsupported:
1876 # single 'ff...' match
1890 # single 'ff...' match
1877 return True
1891 return True
1878 if node is None:
1892 if node is None:
1879 raise LookupError(node, self.indexfile, _('no node'))
1893 raise LookupError(node, self.indexfile, _('no node'))
1880 return True
1894 return True
1881
1895
1882 def maybewdir(prefix):
1896 def maybewdir(prefix):
1883 return all(c == 'f' for c in prefix)
1897 return all(c == 'f' for c in prefix)
1884
1898
1885 hexnode = hex(node)
1899 hexnode = hex(node)
1886
1900
1887 def disambiguate(hexnode, minlength):
1901 def disambiguate(hexnode, minlength):
1888 """Disambiguate against wdirid."""
1902 """Disambiguate against wdirid."""
1889 for length in range(minlength, 41):
1903 for length in range(minlength, 41):
1890 prefix = hexnode[:length]
1904 prefix = hexnode[:length]
1891 if not maybewdir(prefix):
1905 if not maybewdir(prefix):
1892 return prefix
1906 return prefix
1893
1907
1894 if not getattr(self, 'filteredrevs', None):
1908 if not getattr(self, 'filteredrevs', None):
1895 try:
1909 try:
1896 length = max(self.index.shortest(node), minlength)
1910 length = max(self.index.shortest(node), minlength)
1897 return disambiguate(hexnode, length)
1911 return disambiguate(hexnode, length)
1898 except RevlogError:
1912 except RevlogError:
1899 if node != wdirid:
1913 if node != wdirid:
1900 raise LookupError(node, self.indexfile, _('no node'))
1914 raise LookupError(node, self.indexfile, _('no node'))
1901 except AttributeError:
1915 except AttributeError:
1902 # Fall through to pure code
1916 # Fall through to pure code
1903 pass
1917 pass
1904
1918
1905 if node == wdirid:
1919 if node == wdirid:
1906 for length in range(minlength, 41):
1920 for length in range(minlength, 41):
1907 prefix = hexnode[:length]
1921 prefix = hexnode[:length]
1908 if isvalid(prefix):
1922 if isvalid(prefix):
1909 return prefix
1923 return prefix
1910
1924
1911 for length in range(minlength, 41):
1925 for length in range(minlength, 41):
1912 prefix = hexnode[:length]
1926 prefix = hexnode[:length]
1913 if isvalid(prefix):
1927 if isvalid(prefix):
1914 return disambiguate(hexnode, length)
1928 return disambiguate(hexnode, length)
1915
1929
1916 def cmp(self, node, text):
1930 def cmp(self, node, text):
1917 """compare text with a given file revision
1931 """compare text with a given file revision
1918
1932
1919 returns True if text is different than what is stored.
1933 returns True if text is different than what is stored.
1920 """
1934 """
1921 p1, p2 = self.parents(node)
1935 p1, p2 = self.parents(node)
1922 return hash(text, p1, p2) != node
1936 return hash(text, p1, p2) != node
1923
1937
1924 def _cachesegment(self, offset, data):
1938 def _cachesegment(self, offset, data):
1925 """Add a segment to the revlog cache.
1939 """Add a segment to the revlog cache.
1926
1940
1927 Accepts an absolute offset and the data that is at that location.
1941 Accepts an absolute offset and the data that is at that location.
1928 """
1942 """
1929 o, d = self._chunkcache
1943 o, d = self._chunkcache
1930 # try to add to existing cache
1944 # try to add to existing cache
1931 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1945 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1932 self._chunkcache = o, d + data
1946 self._chunkcache = o, d + data
1933 else:
1947 else:
1934 self._chunkcache = offset, data
1948 self._chunkcache = offset, data
1935
1949
1936 def _readsegment(self, offset, length, df=None):
1950 def _readsegment(self, offset, length, df=None):
1937 """Load a segment of raw data from the revlog.
1951 """Load a segment of raw data from the revlog.
1938
1952
1939 Accepts an absolute offset, length to read, and an optional existing
1953 Accepts an absolute offset, length to read, and an optional existing
1940 file handle to read from.
1954 file handle to read from.
1941
1955
1942 If an existing file handle is passed, it will be seeked and the
1956 If an existing file handle is passed, it will be seeked and the
1943 original seek position will NOT be restored.
1957 original seek position will NOT be restored.
1944
1958
1945 Returns a str or buffer of raw byte data.
1959 Returns a str or buffer of raw byte data.
1946 """
1960 """
1947 # Cache data both forward and backward around the requested
1961 # Cache data both forward and backward around the requested
1948 # data, in a fixed size window. This helps speed up operations
1962 # data, in a fixed size window. This helps speed up operations
1949 # involving reading the revlog backwards.
1963 # involving reading the revlog backwards.
1950 cachesize = self._chunkcachesize
1964 cachesize = self._chunkcachesize
1951 realoffset = offset & ~(cachesize - 1)
1965 realoffset = offset & ~(cachesize - 1)
1952 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1966 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1953 - realoffset)
1967 - realoffset)
1954 with self._datareadfp(df) as df:
1968 with self._datareadfp(df) as df:
1955 df.seek(realoffset)
1969 df.seek(realoffset)
1956 d = df.read(reallength)
1970 d = df.read(reallength)
1957 self._cachesegment(realoffset, d)
1971 self._cachesegment(realoffset, d)
1958 if offset != realoffset or reallength != length:
1972 if offset != realoffset or reallength != length:
1959 return util.buffer(d, offset - realoffset, length)
1973 return util.buffer(d, offset - realoffset, length)
1960 return d
1974 return d
1961
1975
1962 def _getsegment(self, offset, length, df=None):
1976 def _getsegment(self, offset, length, df=None):
1963 """Obtain a segment of raw data from the revlog.
1977 """Obtain a segment of raw data from the revlog.
1964
1978
1965 Accepts an absolute offset, length of bytes to obtain, and an
1979 Accepts an absolute offset, length of bytes to obtain, and an
1966 optional file handle to the already-opened revlog. If the file
1980 optional file handle to the already-opened revlog. If the file
1967 handle is used, it's original seek position will not be preserved.
1981 handle is used, it's original seek position will not be preserved.
1968
1982
1969 Requests for data may be returned from a cache.
1983 Requests for data may be returned from a cache.
1970
1984
1971 Returns a str or a buffer instance of raw byte data.
1985 Returns a str or a buffer instance of raw byte data.
1972 """
1986 """
1973 o, d = self._chunkcache
1987 o, d = self._chunkcache
1974 l = len(d)
1988 l = len(d)
1975
1989
1976 # is it in the cache?
1990 # is it in the cache?
1977 cachestart = offset - o
1991 cachestart = offset - o
1978 cacheend = cachestart + length
1992 cacheend = cachestart + length
1979 if cachestart >= 0 and cacheend <= l:
1993 if cachestart >= 0 and cacheend <= l:
1980 if cachestart == 0 and cacheend == l:
1994 if cachestart == 0 and cacheend == l:
1981 return d # avoid a copy
1995 return d # avoid a copy
1982 return util.buffer(d, cachestart, cacheend - cachestart)
1996 return util.buffer(d, cachestart, cacheend - cachestart)
1983
1997
1984 return self._readsegment(offset, length, df=df)
1998 return self._readsegment(offset, length, df=df)
1985
1999
1986 def _getsegmentforrevs(self, startrev, endrev, df=None):
2000 def _getsegmentforrevs(self, startrev, endrev, df=None):
1987 """Obtain a segment of raw data corresponding to a range of revisions.
2001 """Obtain a segment of raw data corresponding to a range of revisions.
1988
2002
1989 Accepts the start and end revisions and an optional already-open
2003 Accepts the start and end revisions and an optional already-open
1990 file handle to be used for reading. If the file handle is read, its
2004 file handle to be used for reading. If the file handle is read, its
1991 seek position will not be preserved.
2005 seek position will not be preserved.
1992
2006
1993 Requests for data may be satisfied by a cache.
2007 Requests for data may be satisfied by a cache.
1994
2008
1995 Returns a 2-tuple of (offset, data) for the requested range of
2009 Returns a 2-tuple of (offset, data) for the requested range of
1996 revisions. Offset is the integer offset from the beginning of the
2010 revisions. Offset is the integer offset from the beginning of the
1997 revlog and data is a str or buffer of the raw byte data.
2011 revlog and data is a str or buffer of the raw byte data.
1998
2012
1999 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2013 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2000 to determine where each revision's data begins and ends.
2014 to determine where each revision's data begins and ends.
2001 """
2015 """
2002 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2016 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2003 # (functions are expensive).
2017 # (functions are expensive).
2004 index = self.index
2018 index = self.index
2005 istart = index[startrev]
2019 istart = index[startrev]
2006 start = int(istart[0] >> 16)
2020 start = int(istart[0] >> 16)
2007 if startrev == endrev:
2021 if startrev == endrev:
2008 end = start + istart[1]
2022 end = start + istart[1]
2009 else:
2023 else:
2010 iend = index[endrev]
2024 iend = index[endrev]
2011 end = int(iend[0] >> 16) + iend[1]
2025 end = int(iend[0] >> 16) + iend[1]
2012
2026
2013 if self._inline:
2027 if self._inline:
2014 start += (startrev + 1) * self._io.size
2028 start += (startrev + 1) * self._io.size
2015 end += (endrev + 1) * self._io.size
2029 end += (endrev + 1) * self._io.size
2016 length = end - start
2030 length = end - start
2017
2031
2018 return start, self._getsegment(start, length, df=df)
2032 return start, self._getsegment(start, length, df=df)
2019
2033
2020 def _chunk(self, rev, df=None):
2034 def _chunk(self, rev, df=None):
2021 """Obtain a single decompressed chunk for a revision.
2035 """Obtain a single decompressed chunk for a revision.
2022
2036
2023 Accepts an integer revision and an optional already-open file handle
2037 Accepts an integer revision and an optional already-open file handle
2024 to be used for reading. If used, the seek position of the file will not
2038 to be used for reading. If used, the seek position of the file will not
2025 be preserved.
2039 be preserved.
2026
2040
2027 Returns a str holding uncompressed data for the requested revision.
2041 Returns a str holding uncompressed data for the requested revision.
2028 """
2042 """
2029 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2043 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2030
2044
2031 def _chunks(self, revs, df=None, targetsize=None):
2045 def _chunks(self, revs, df=None, targetsize=None):
2032 """Obtain decompressed chunks for the specified revisions.
2046 """Obtain decompressed chunks for the specified revisions.
2033
2047
2034 Accepts an iterable of numeric revisions that are assumed to be in
2048 Accepts an iterable of numeric revisions that are assumed to be in
2035 ascending order. Also accepts an optional already-open file handle
2049 ascending order. Also accepts an optional already-open file handle
2036 to be used for reading. If used, the seek position of the file will
2050 to be used for reading. If used, the seek position of the file will
2037 not be preserved.
2051 not be preserved.
2038
2052
2039 This function is similar to calling ``self._chunk()`` multiple times,
2053 This function is similar to calling ``self._chunk()`` multiple times,
2040 but is faster.
2054 but is faster.
2041
2055
2042 Returns a list with decompressed data for each requested revision.
2056 Returns a list with decompressed data for each requested revision.
2043 """
2057 """
2044 if not revs:
2058 if not revs:
2045 return []
2059 return []
2046 start = self.start
2060 start = self.start
2047 length = self.length
2061 length = self.length
2048 inline = self._inline
2062 inline = self._inline
2049 iosize = self._io.size
2063 iosize = self._io.size
2050 buffer = util.buffer
2064 buffer = util.buffer
2051
2065
2052 l = []
2066 l = []
2053 ladd = l.append
2067 ladd = l.append
2054
2068
2055 if not self._withsparseread:
2069 if not self._withsparseread:
2056 slicedchunks = (revs,)
2070 slicedchunks = (revs,)
2057 else:
2071 else:
2058 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2072 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2059
2073
2060 for revschunk in slicedchunks:
2074 for revschunk in slicedchunks:
2061 firstrev = revschunk[0]
2075 firstrev = revschunk[0]
2062 # Skip trailing revisions with empty diff
2076 # Skip trailing revisions with empty diff
2063 for lastrev in revschunk[::-1]:
2077 for lastrev in revschunk[::-1]:
2064 if length(lastrev) != 0:
2078 if length(lastrev) != 0:
2065 break
2079 break
2066
2080
2067 try:
2081 try:
2068 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2082 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2069 except OverflowError:
2083 except OverflowError:
2070 # issue4215 - we can't cache a run of chunks greater than
2084 # issue4215 - we can't cache a run of chunks greater than
2071 # 2G on Windows
2085 # 2G on Windows
2072 return [self._chunk(rev, df=df) for rev in revschunk]
2086 return [self._chunk(rev, df=df) for rev in revschunk]
2073
2087
2074 decomp = self.decompress
2088 decomp = self.decompress
2075 for rev in revschunk:
2089 for rev in revschunk:
2076 chunkstart = start(rev)
2090 chunkstart = start(rev)
2077 if inline:
2091 if inline:
2078 chunkstart += (rev + 1) * iosize
2092 chunkstart += (rev + 1) * iosize
2079 chunklength = length(rev)
2093 chunklength = length(rev)
2080 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2094 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2081
2095
2082 return l
2096 return l
2083
2097
2084 def _chunkclear(self):
2098 def _chunkclear(self):
2085 """Clear the raw chunk cache."""
2099 """Clear the raw chunk cache."""
2086 self._chunkcache = (0, '')
2100 self._chunkcache = (0, '')
2087
2101
2088 def deltaparent(self, rev):
2102 def deltaparent(self, rev):
2089 """return deltaparent of the given revision"""
2103 """return deltaparent of the given revision"""
2090 base = self.index[rev][3]
2104 base = self.index[rev][3]
2091 if base == rev:
2105 if base == rev:
2092 return nullrev
2106 return nullrev
2093 elif self._generaldelta:
2107 elif self._generaldelta:
2094 return base
2108 return base
2095 else:
2109 else:
2096 return rev - 1
2110 return rev - 1
2097
2111
2098 def issnapshot(self, rev):
2112 def issnapshot(self, rev):
2099 """tells whether rev is a snapshot
2113 """tells whether rev is a snapshot
2100 """
2114 """
2101 if rev == nullrev:
2115 if rev == nullrev:
2102 return True
2116 return True
2103 deltap = self.deltaparent(rev)
2117 deltap = self.deltaparent(rev)
2104 if deltap == nullrev:
2118 if deltap == nullrev:
2105 return True
2119 return True
2106 p1, p2 = self.parentrevs(rev)
2120 p1, p2 = self.parentrevs(rev)
2107 if deltap in (p1, p2):
2121 if deltap in (p1, p2):
2108 return False
2122 return False
2109 return self.issnapshot(deltap)
2123 return self.issnapshot(deltap)
2110
2124
2111 def snapshotdepth(self, rev):
2125 def snapshotdepth(self, rev):
2112 """number of snapshot in the chain before this one"""
2126 """number of snapshot in the chain before this one"""
2113 if not self.issnapshot(rev):
2127 if not self.issnapshot(rev):
2114 raise ProgrammingError('revision %d not a snapshot')
2128 raise ProgrammingError('revision %d not a snapshot')
2115 return len(self._deltachain(rev)[0]) - 1
2129 return len(self._deltachain(rev)[0]) - 1
2116
2130
2117 def revdiff(self, rev1, rev2):
2131 def revdiff(self, rev1, rev2):
2118 """return or calculate a delta between two revisions
2132 """return or calculate a delta between two revisions
2119
2133
2120 The delta calculated is in binary form and is intended to be written to
2134 The delta calculated is in binary form and is intended to be written to
2121 revlog data directly. So this function needs raw revision data.
2135 revlog data directly. So this function needs raw revision data.
2122 """
2136 """
2123 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2137 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2124 return bytes(self._chunk(rev2))
2138 return bytes(self._chunk(rev2))
2125
2139
2126 return mdiff.textdiff(self.revision(rev1, raw=True),
2140 return mdiff.textdiff(self.revision(rev1, raw=True),
2127 self.revision(rev2, raw=True))
2141 self.revision(rev2, raw=True))
2128
2142
2129 def revision(self, nodeorrev, _df=None, raw=False):
2143 def revision(self, nodeorrev, _df=None, raw=False):
2130 """return an uncompressed revision of a given node or revision
2144 """return an uncompressed revision of a given node or revision
2131 number.
2145 number.
2132
2146
2133 _df - an existing file handle to read from. (internal-only)
2147 _df - an existing file handle to read from. (internal-only)
2134 raw - an optional argument specifying if the revision data is to be
2148 raw - an optional argument specifying if the revision data is to be
2135 treated as raw data when applying flag transforms. 'raw' should be set
2149 treated as raw data when applying flag transforms. 'raw' should be set
2136 to True when generating changegroups or in debug commands.
2150 to True when generating changegroups or in debug commands.
2137 """
2151 """
2138 if isinstance(nodeorrev, int):
2152 if isinstance(nodeorrev, int):
2139 rev = nodeorrev
2153 rev = nodeorrev
2140 node = self.node(rev)
2154 node = self.node(rev)
2141 else:
2155 else:
2142 node = nodeorrev
2156 node = nodeorrev
2143 rev = None
2157 rev = None
2144
2158
2145 cachedrev = None
2159 cachedrev = None
2146 flags = None
2160 flags = None
2147 rawtext = None
2161 rawtext = None
2148 if node == nullid:
2162 if node == nullid:
2149 return ""
2163 return ""
2150 if self._cache:
2164 if self._cache:
2151 if self._cache[0] == node:
2165 if self._cache[0] == node:
2152 # _cache only stores rawtext
2166 # _cache only stores rawtext
2153 if raw:
2167 if raw:
2154 return self._cache[2]
2168 return self._cache[2]
2155 # duplicated, but good for perf
2169 # duplicated, but good for perf
2156 if rev is None:
2170 if rev is None:
2157 rev = self.rev(node)
2171 rev = self.rev(node)
2158 if flags is None:
2172 if flags is None:
2159 flags = self.flags(rev)
2173 flags = self.flags(rev)
2160 # no extra flags set, no flag processor runs, text = rawtext
2174 # no extra flags set, no flag processor runs, text = rawtext
2161 if flags == REVIDX_DEFAULT_FLAGS:
2175 if flags == REVIDX_DEFAULT_FLAGS:
2162 return self._cache[2]
2176 return self._cache[2]
2163 # rawtext is reusable. need to run flag processor
2177 # rawtext is reusable. need to run flag processor
2164 rawtext = self._cache[2]
2178 rawtext = self._cache[2]
2165
2179
2166 cachedrev = self._cache[1]
2180 cachedrev = self._cache[1]
2167
2181
2168 # look up what we need to read
2182 # look up what we need to read
2169 if rawtext is None:
2183 if rawtext is None:
2170 if rev is None:
2184 if rev is None:
2171 rev = self.rev(node)
2185 rev = self.rev(node)
2172
2186
2173 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2187 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2174 if stopped:
2188 if stopped:
2175 rawtext = self._cache[2]
2189 rawtext = self._cache[2]
2176
2190
2177 # drop cache to save memory
2191 # drop cache to save memory
2178 self._cache = None
2192 self._cache = None
2179
2193
2180 targetsize = None
2194 targetsize = None
2181 rawsize = self.index[rev][2]
2195 rawsize = self.index[rev][2]
2182 if 0 <= rawsize:
2196 if 0 <= rawsize:
2183 targetsize = 4 * rawsize
2197 targetsize = 4 * rawsize
2184
2198
2185 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2199 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2186 if rawtext is None:
2200 if rawtext is None:
2187 rawtext = bytes(bins[0])
2201 rawtext = bytes(bins[0])
2188 bins = bins[1:]
2202 bins = bins[1:]
2189
2203
2190 rawtext = mdiff.patches(rawtext, bins)
2204 rawtext = mdiff.patches(rawtext, bins)
2191 self._cache = (node, rev, rawtext)
2205 self._cache = (node, rev, rawtext)
2192
2206
2193 if flags is None:
2207 if flags is None:
2194 if rev is None:
2208 if rev is None:
2195 rev = self.rev(node)
2209 rev = self.rev(node)
2196 flags = self.flags(rev)
2210 flags = self.flags(rev)
2197
2211
2198 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2212 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2199 if validatehash:
2213 if validatehash:
2200 self.checkhash(text, node, rev=rev)
2214 self.checkhash(text, node, rev=rev)
2201
2215
2202 return text
2216 return text
2203
2217
2204 def hash(self, text, p1, p2):
2218 def hash(self, text, p1, p2):
2205 """Compute a node hash.
2219 """Compute a node hash.
2206
2220
2207 Available as a function so that subclasses can replace the hash
2221 Available as a function so that subclasses can replace the hash
2208 as needed.
2222 as needed.
2209 """
2223 """
2210 return hash(text, p1, p2)
2224 return hash(text, p1, p2)
2211
2225
2212 def _processflags(self, text, flags, operation, raw=False):
2226 def _processflags(self, text, flags, operation, raw=False):
2213 """Inspect revision data flags and applies transforms defined by
2227 """Inspect revision data flags and applies transforms defined by
2214 registered flag processors.
2228 registered flag processors.
2215
2229
2216 ``text`` - the revision data to process
2230 ``text`` - the revision data to process
2217 ``flags`` - the revision flags
2231 ``flags`` - the revision flags
2218 ``operation`` - the operation being performed (read or write)
2232 ``operation`` - the operation being performed (read or write)
2219 ``raw`` - an optional argument describing if the raw transform should be
2233 ``raw`` - an optional argument describing if the raw transform should be
2220 applied.
2234 applied.
2221
2235
2222 This method processes the flags in the order (or reverse order if
2236 This method processes the flags in the order (or reverse order if
2223 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2237 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2224 flag processors registered for present flags. The order of flags defined
2238 flag processors registered for present flags. The order of flags defined
2225 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2239 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2226
2240
2227 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2241 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2228 processed text and ``validatehash`` is a bool indicating whether the
2242 processed text and ``validatehash`` is a bool indicating whether the
2229 returned text should be checked for hash integrity.
2243 returned text should be checked for hash integrity.
2230
2244
2231 Note: If the ``raw`` argument is set, it has precedence over the
2245 Note: If the ``raw`` argument is set, it has precedence over the
2232 operation and will only update the value of ``validatehash``.
2246 operation and will only update the value of ``validatehash``.
2233 """
2247 """
2234 # fast path: no flag processors will run
2248 # fast path: no flag processors will run
2235 if flags == 0:
2249 if flags == 0:
2236 return text, True
2250 return text, True
2237 if not operation in ('read', 'write'):
2251 if not operation in ('read', 'write'):
2238 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2252 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2239 # Check all flags are known.
2253 # Check all flags are known.
2240 if flags & ~REVIDX_KNOWN_FLAGS:
2254 if flags & ~REVIDX_KNOWN_FLAGS:
2241 raise RevlogError(_("incompatible revision flag '%#x'") %
2255 raise RevlogError(_("incompatible revision flag '%#x'") %
2242 (flags & ~REVIDX_KNOWN_FLAGS))
2256 (flags & ~REVIDX_KNOWN_FLAGS))
2243 validatehash = True
2257 validatehash = True
2244 # Depending on the operation (read or write), the order might be
2258 # Depending on the operation (read or write), the order might be
2245 # reversed due to non-commutative transforms.
2259 # reversed due to non-commutative transforms.
2246 orderedflags = REVIDX_FLAGS_ORDER
2260 orderedflags = REVIDX_FLAGS_ORDER
2247 if operation == 'write':
2261 if operation == 'write':
2248 orderedflags = reversed(orderedflags)
2262 orderedflags = reversed(orderedflags)
2249
2263
2250 for flag in orderedflags:
2264 for flag in orderedflags:
2251 # If a flagprocessor has been registered for a known flag, apply the
2265 # If a flagprocessor has been registered for a known flag, apply the
2252 # related operation transform and update result tuple.
2266 # related operation transform and update result tuple.
2253 if flag & flags:
2267 if flag & flags:
2254 vhash = True
2268 vhash = True
2255
2269
2256 if flag not in _flagprocessors:
2270 if flag not in _flagprocessors:
2257 message = _("missing processor for flag '%#x'") % (flag)
2271 message = _("missing processor for flag '%#x'") % (flag)
2258 raise RevlogError(message)
2272 raise RevlogError(message)
2259
2273
2260 processor = _flagprocessors[flag]
2274 processor = _flagprocessors[flag]
2261 if processor is not None:
2275 if processor is not None:
2262 readtransform, writetransform, rawtransform = processor
2276 readtransform, writetransform, rawtransform = processor
2263
2277
2264 if raw:
2278 if raw:
2265 vhash = rawtransform(self, text)
2279 vhash = rawtransform(self, text)
2266 elif operation == 'read':
2280 elif operation == 'read':
2267 text, vhash = readtransform(self, text)
2281 text, vhash = readtransform(self, text)
2268 else: # write operation
2282 else: # write operation
2269 text, vhash = writetransform(self, text)
2283 text, vhash = writetransform(self, text)
2270 validatehash = validatehash and vhash
2284 validatehash = validatehash and vhash
2271
2285
2272 return text, validatehash
2286 return text, validatehash
2273
2287
2274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2288 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2275 """Check node hash integrity.
2289 """Check node hash integrity.
2276
2290
2277 Available as a function so that subclasses can extend hash mismatch
2291 Available as a function so that subclasses can extend hash mismatch
2278 behaviors as needed.
2292 behaviors as needed.
2279 """
2293 """
2280 try:
2294 try:
2281 if p1 is None and p2 is None:
2295 if p1 is None and p2 is None:
2282 p1, p2 = self.parents(node)
2296 p1, p2 = self.parents(node)
2283 if node != self.hash(text, p1, p2):
2297 if node != self.hash(text, p1, p2):
2284 revornode = rev
2298 revornode = rev
2285 if revornode is None:
2299 if revornode is None:
2286 revornode = templatefilters.short(hex(node))
2300 revornode = templatefilters.short(hex(node))
2287 raise RevlogError(_("integrity check failed on %s:%s")
2301 raise RevlogError(_("integrity check failed on %s:%s")
2288 % (self.indexfile, pycompat.bytestr(revornode)))
2302 % (self.indexfile, pycompat.bytestr(revornode)))
2289 except RevlogError:
2303 except RevlogError:
2290 if self._censorable and _censoredtext(text):
2304 if self._censorable and _censoredtext(text):
2291 raise error.CensoredNodeError(self.indexfile, node, text)
2305 raise error.CensoredNodeError(self.indexfile, node, text)
2292 raise
2306 raise
2293
2307
2294 def _enforceinlinesize(self, tr, fp=None):
2308 def _enforceinlinesize(self, tr, fp=None):
2295 """Check if the revlog is too big for inline and convert if so.
2309 """Check if the revlog is too big for inline and convert if so.
2296
2310
2297 This should be called after revisions are added to the revlog. If the
2311 This should be called after revisions are added to the revlog. If the
2298 revlog has grown too large to be an inline revlog, it will convert it
2312 revlog has grown too large to be an inline revlog, it will convert it
2299 to use multiple index and data files.
2313 to use multiple index and data files.
2300 """
2314 """
2301 tiprev = len(self) - 1
2315 tiprev = len(self) - 1
2302 if (not self._inline or
2316 if (not self._inline or
2303 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2317 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2304 return
2318 return
2305
2319
2306 trinfo = tr.find(self.indexfile)
2320 trinfo = tr.find(self.indexfile)
2307 if trinfo is None:
2321 if trinfo is None:
2308 raise RevlogError(_("%s not found in the transaction")
2322 raise RevlogError(_("%s not found in the transaction")
2309 % self.indexfile)
2323 % self.indexfile)
2310
2324
2311 trindex = trinfo[2]
2325 trindex = trinfo[2]
2312 if trindex is not None:
2326 if trindex is not None:
2313 dataoff = self.start(trindex)
2327 dataoff = self.start(trindex)
2314 else:
2328 else:
2315 # revlog was stripped at start of transaction, use all leftover data
2329 # revlog was stripped at start of transaction, use all leftover data
2316 trindex = len(self) - 1
2330 trindex = len(self) - 1
2317 dataoff = self.end(tiprev)
2331 dataoff = self.end(tiprev)
2318
2332
2319 tr.add(self.datafile, dataoff)
2333 tr.add(self.datafile, dataoff)
2320
2334
2321 if fp:
2335 if fp:
2322 fp.flush()
2336 fp.flush()
2323 fp.close()
2337 fp.close()
2324
2338
2325 with self._datafp('w') as df:
2339 with self._datafp('w') as df:
2326 for r in self:
2340 for r in self:
2327 df.write(self._getsegmentforrevs(r, r)[1])
2341 df.write(self._getsegmentforrevs(r, r)[1])
2328
2342
2329 with self._indexfp('w') as fp:
2343 with self._indexfp('w') as fp:
2330 self.version &= ~FLAG_INLINE_DATA
2344 self.version &= ~FLAG_INLINE_DATA
2331 self._inline = False
2345 self._inline = False
2332 io = self._io
2346 io = self._io
2333 for i in self:
2347 for i in self:
2334 e = io.packentry(self.index[i], self.node, self.version, i)
2348 e = io.packentry(self.index[i], self.node, self.version, i)
2335 fp.write(e)
2349 fp.write(e)
2336
2350
2337 # the temp file replace the real index when we exit the context
2351 # the temp file replace the real index when we exit the context
2338 # manager
2352 # manager
2339
2353
2340 tr.replace(self.indexfile, trindex * self._io.size)
2354 tr.replace(self.indexfile, trindex * self._io.size)
2341 self._chunkclear()
2355 self._chunkclear()
2342
2356
2343 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2357 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2344 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2358 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2345 """add a revision to the log
2359 """add a revision to the log
2346
2360
2347 text - the revision data to add
2361 text - the revision data to add
2348 transaction - the transaction object used for rollback
2362 transaction - the transaction object used for rollback
2349 link - the linkrev data to add
2363 link - the linkrev data to add
2350 p1, p2 - the parent nodeids of the revision
2364 p1, p2 - the parent nodeids of the revision
2351 cachedelta - an optional precomputed delta
2365 cachedelta - an optional precomputed delta
2352 node - nodeid of revision; typically node is not specified, and it is
2366 node - nodeid of revision; typically node is not specified, and it is
2353 computed by default as hash(text, p1, p2), however subclasses might
2367 computed by default as hash(text, p1, p2), however subclasses might
2354 use different hashing method (and override checkhash() in such case)
2368 use different hashing method (and override checkhash() in such case)
2355 flags - the known flags to set on the revision
2369 flags - the known flags to set on the revision
2356 deltacomputer - an optional _deltacomputer instance shared between
2370 deltacomputer - an optional _deltacomputer instance shared between
2357 multiple calls
2371 multiple calls
2358 """
2372 """
2359 if link == nullrev:
2373 if link == nullrev:
2360 raise RevlogError(_("attempted to add linkrev -1 to %s")
2374 raise RevlogError(_("attempted to add linkrev -1 to %s")
2361 % self.indexfile)
2375 % self.indexfile)
2362
2376
2363 if flags:
2377 if flags:
2364 node = node or self.hash(text, p1, p2)
2378 node = node or self.hash(text, p1, p2)
2365
2379
2366 rawtext, validatehash = self._processflags(text, flags, 'write')
2380 rawtext, validatehash = self._processflags(text, flags, 'write')
2367
2381
2368 # If the flag processor modifies the revision data, ignore any provided
2382 # If the flag processor modifies the revision data, ignore any provided
2369 # cachedelta.
2383 # cachedelta.
2370 if rawtext != text:
2384 if rawtext != text:
2371 cachedelta = None
2385 cachedelta = None
2372
2386
2373 if len(rawtext) > _maxentrysize:
2387 if len(rawtext) > _maxentrysize:
2374 raise RevlogError(
2388 raise RevlogError(
2375 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2389 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2376 % (self.indexfile, len(rawtext)))
2390 % (self.indexfile, len(rawtext)))
2377
2391
2378 node = node or self.hash(rawtext, p1, p2)
2392 node = node or self.hash(rawtext, p1, p2)
2379 if node in self.nodemap:
2393 if node in self.nodemap:
2380 return node
2394 return node
2381
2395
2382 if validatehash:
2396 if validatehash:
2383 self.checkhash(rawtext, node, p1=p1, p2=p2)
2397 self.checkhash(rawtext, node, p1=p1, p2=p2)
2384
2398
2385 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2399 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2386 flags, cachedelta=cachedelta,
2400 flags, cachedelta=cachedelta,
2387 deltacomputer=deltacomputer)
2401 deltacomputer=deltacomputer)
2388
2402
2389 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2403 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2390 cachedelta=None, deltacomputer=None):
2404 cachedelta=None, deltacomputer=None):
2391 """add a raw revision with known flags, node and parents
2405 """add a raw revision with known flags, node and parents
2392 useful when reusing a revision not stored in this revlog (ex: received
2406 useful when reusing a revision not stored in this revlog (ex: received
2393 over wire, or read from an external bundle).
2407 over wire, or read from an external bundle).
2394 """
2408 """
2395 dfh = None
2409 dfh = None
2396 if not self._inline:
2410 if not self._inline:
2397 dfh = self._datafp("a+")
2411 dfh = self._datafp("a+")
2398 ifh = self._indexfp("a+")
2412 ifh = self._indexfp("a+")
2399 try:
2413 try:
2400 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2414 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2401 flags, cachedelta, ifh, dfh,
2415 flags, cachedelta, ifh, dfh,
2402 deltacomputer=deltacomputer)
2416 deltacomputer=deltacomputer)
2403 finally:
2417 finally:
2404 if dfh:
2418 if dfh:
2405 dfh.close()
2419 dfh.close()
2406 ifh.close()
2420 ifh.close()
2407
2421
2408 def compress(self, data):
2422 def compress(self, data):
2409 """Generate a possibly-compressed representation of data."""
2423 """Generate a possibly-compressed representation of data."""
2410 if not data:
2424 if not data:
2411 return '', data
2425 return '', data
2412
2426
2413 compressed = self._compressor.compress(data)
2427 compressed = self._compressor.compress(data)
2414
2428
2415 if compressed:
2429 if compressed:
2416 # The revlog compressor added the header in the returned data.
2430 # The revlog compressor added the header in the returned data.
2417 return '', compressed
2431 return '', compressed
2418
2432
2419 if data[0:1] == '\0':
2433 if data[0:1] == '\0':
2420 return '', data
2434 return '', data
2421 return 'u', data
2435 return 'u', data
2422
2436
2423 def decompress(self, data):
2437 def decompress(self, data):
2424 """Decompress a revlog chunk.
2438 """Decompress a revlog chunk.
2425
2439
2426 The chunk is expected to begin with a header identifying the
2440 The chunk is expected to begin with a header identifying the
2427 format type so it can be routed to an appropriate decompressor.
2441 format type so it can be routed to an appropriate decompressor.
2428 """
2442 """
2429 if not data:
2443 if not data:
2430 return data
2444 return data
2431
2445
2432 # Revlogs are read much more frequently than they are written and many
2446 # Revlogs are read much more frequently than they are written and many
2433 # chunks only take microseconds to decompress, so performance is
2447 # chunks only take microseconds to decompress, so performance is
2434 # important here.
2448 # important here.
2435 #
2449 #
2436 # We can make a few assumptions about revlogs:
2450 # We can make a few assumptions about revlogs:
2437 #
2451 #
2438 # 1) the majority of chunks will be compressed (as opposed to inline
2452 # 1) the majority of chunks will be compressed (as opposed to inline
2439 # raw data).
2453 # raw data).
2440 # 2) decompressing *any* data will likely by at least 10x slower than
2454 # 2) decompressing *any* data will likely by at least 10x slower than
2441 # returning raw inline data.
2455 # returning raw inline data.
2442 # 3) we want to prioritize common and officially supported compression
2456 # 3) we want to prioritize common and officially supported compression
2443 # engines
2457 # engines
2444 #
2458 #
2445 # It follows that we want to optimize for "decompress compressed data
2459 # It follows that we want to optimize for "decompress compressed data
2446 # when encoded with common and officially supported compression engines"
2460 # when encoded with common and officially supported compression engines"
2447 # case over "raw data" and "data encoded by less common or non-official
2461 # case over "raw data" and "data encoded by less common or non-official
2448 # compression engines." That is why we have the inline lookup first
2462 # compression engines." That is why we have the inline lookup first
2449 # followed by the compengines lookup.
2463 # followed by the compengines lookup.
2450 #
2464 #
2451 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2452 # compressed chunks. And this matters for changelog and manifest reads.
2466 # compressed chunks. And this matters for changelog and manifest reads.
2453 t = data[0:1]
2467 t = data[0:1]
2454
2468
2455 if t == 'x':
2469 if t == 'x':
2456 try:
2470 try:
2457 return _zlibdecompress(data)
2471 return _zlibdecompress(data)
2458 except zlib.error as e:
2472 except zlib.error as e:
2459 raise RevlogError(_('revlog decompress error: %s') %
2473 raise RevlogError(_('revlog decompress error: %s') %
2460 stringutil.forcebytestr(e))
2474 stringutil.forcebytestr(e))
2461 # '\0' is more common than 'u' so it goes first.
2475 # '\0' is more common than 'u' so it goes first.
2462 elif t == '\0':
2476 elif t == '\0':
2463 return data
2477 return data
2464 elif t == 'u':
2478 elif t == 'u':
2465 return util.buffer(data, 1)
2479 return util.buffer(data, 1)
2466
2480
2467 try:
2481 try:
2468 compressor = self._decompressors[t]
2482 compressor = self._decompressors[t]
2469 except KeyError:
2483 except KeyError:
2470 try:
2484 try:
2471 engine = util.compengines.forrevlogheader(t)
2485 engine = util.compengines.forrevlogheader(t)
2472 compressor = engine.revlogcompressor()
2486 compressor = engine.revlogcompressor()
2473 self._decompressors[t] = compressor
2487 self._decompressors[t] = compressor
2474 except KeyError:
2488 except KeyError:
2475 raise RevlogError(_('unknown compression type %r') % t)
2489 raise RevlogError(_('unknown compression type %r') % t)
2476
2490
2477 return compressor.decompress(data)
2491 return compressor.decompress(data)
2478
2492
2479 def _isgooddeltainfo(self, deltainfo, revinfo):
2493 def _isgooddeltainfo(self, deltainfo, revinfo):
2480 """Returns True if the given delta is good. Good means that it is within
2494 """Returns True if the given delta is good. Good means that it is within
2481 the disk span, disk size, and chain length bounds that we know to be
2495 the disk span, disk size, and chain length bounds that we know to be
2482 performant."""
2496 performant."""
2483 if deltainfo is None:
2497 if deltainfo is None:
2484 return False
2498 return False
2485
2499
2486 # - 'deltainfo.distance' is the distance from the base revision --
2500 # - 'deltainfo.distance' is the distance from the base revision --
2487 # bounding it limits the amount of I/O we need to do.
2501 # bounding it limits the amount of I/O we need to do.
2488 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2502 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2489 # deltas we need to apply -- bounding it limits the amount of CPU
2503 # deltas we need to apply -- bounding it limits the amount of CPU
2490 # we consume.
2504 # we consume.
2491
2505
2492 if self._sparserevlog:
2506 if self._sparserevlog:
2493 # As sparse-read will be used, we can consider that the distance,
2507 # As sparse-read will be used, we can consider that the distance,
2494 # instead of being the span of the whole chunk,
2508 # instead of being the span of the whole chunk,
2495 # is the span of the largest read chunk
2509 # is the span of the largest read chunk
2496 base = deltainfo.base
2510 base = deltainfo.base
2497
2511
2498 if base != nullrev:
2512 if base != nullrev:
2499 deltachain = self._deltachain(base)[0]
2513 deltachain = self._deltachain(base)[0]
2500 else:
2514 else:
2501 deltachain = []
2515 deltachain = []
2502
2516
2503 chunks = _slicechunk(self, deltachain, deltainfo)
2517 chunks = _slicechunk(self, deltachain, deltainfo)
2504 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2518 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2505 distance = max(all_span)
2519 distance = max(all_span)
2506 else:
2520 else:
2507 distance = deltainfo.distance
2521 distance = deltainfo.distance
2508
2522
2509 textlen = revinfo.textlen
2523 textlen = revinfo.textlen
2510 defaultmax = textlen * 4
2524 defaultmax = textlen * 4
2511 maxdist = self._maxdeltachainspan
2525 maxdist = self._maxdeltachainspan
2512 if not maxdist:
2526 if not maxdist:
2513 maxdist = distance # ensure the conditional pass
2527 maxdist = distance # ensure the conditional pass
2514 maxdist = max(maxdist, defaultmax)
2528 maxdist = max(maxdist, defaultmax)
2515 if self._sparserevlog and maxdist < self._srmingapsize:
2529 if self._sparserevlog and maxdist < self._srmingapsize:
2516 # In multiple place, we are ignoring irrelevant data range below a
2530 # In multiple place, we are ignoring irrelevant data range below a
2517 # certain size. Be also apply this tradeoff here and relax span
2531 # certain size. Be also apply this tradeoff here and relax span
2518 # constraint for small enought content.
2532 # constraint for small enought content.
2519 maxdist = self._srmingapsize
2533 maxdist = self._srmingapsize
2520
2534
2521 # Bad delta from read span:
2535 # Bad delta from read span:
2522 #
2536 #
2523 # If the span of data read is larger than the maximum allowed.
2537 # If the span of data read is larger than the maximum allowed.
2524 if maxdist < distance:
2538 if maxdist < distance:
2525 return False
2539 return False
2526
2540
2527 # Bad delta from new delta size:
2541 # Bad delta from new delta size:
2528 #
2542 #
2529 # If the delta size is larger than the target text, storing the
2543 # If the delta size is larger than the target text, storing the
2530 # delta will be inefficient.
2544 # delta will be inefficient.
2531 if textlen < deltainfo.deltalen:
2545 if textlen < deltainfo.deltalen:
2532 return False
2546 return False
2533
2547
2534 # Bad delta from cumulated payload size:
2548 # Bad delta from cumulated payload size:
2535 #
2549 #
2536 # If the sum of delta get larger than K * target text length.
2550 # If the sum of delta get larger than K * target text length.
2537 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2551 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2538 return False
2552 return False
2539
2553
2540 # Bad delta from chain length:
2554 # Bad delta from chain length:
2541 #
2555 #
2542 # If the number of delta in the chain gets too high.
2556 # If the number of delta in the chain gets too high.
2543 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2557 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2544 return False
2558 return False
2545
2559
2546 return True
2560 return True
2547
2561
2548 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2562 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2549 cachedelta, ifh, dfh, alwayscache=False,
2563 cachedelta, ifh, dfh, alwayscache=False,
2550 deltacomputer=None):
2564 deltacomputer=None):
2551 """internal function to add revisions to the log
2565 """internal function to add revisions to the log
2552
2566
2553 see addrevision for argument descriptions.
2567 see addrevision for argument descriptions.
2554
2568
2555 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2569 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2556
2570
2557 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2571 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2558 be used.
2572 be used.
2559
2573
2560 invariants:
2574 invariants:
2561 - rawtext is optional (can be None); if not set, cachedelta must be set.
2575 - rawtext is optional (can be None); if not set, cachedelta must be set.
2562 if both are set, they must correspond to each other.
2576 if both are set, they must correspond to each other.
2563 """
2577 """
2564 if node == nullid:
2578 if node == nullid:
2565 raise RevlogError(_("%s: attempt to add null revision") %
2579 raise RevlogError(_("%s: attempt to add null revision") %
2566 (self.indexfile))
2580 (self.indexfile))
2567 if node == wdirid or node in wdirfilenodeids:
2581 if node == wdirid or node in wdirfilenodeids:
2568 raise RevlogError(_("%s: attempt to add wdir revision") %
2582 raise RevlogError(_("%s: attempt to add wdir revision") %
2569 (self.indexfile))
2583 (self.indexfile))
2570
2584
2571 if self._inline:
2585 if self._inline:
2572 fh = ifh
2586 fh = ifh
2573 else:
2587 else:
2574 fh = dfh
2588 fh = dfh
2575
2589
2576 btext = [rawtext]
2590 btext = [rawtext]
2577
2591
2578 curr = len(self)
2592 curr = len(self)
2579 prev = curr - 1
2593 prev = curr - 1
2580 offset = self.end(prev)
2594 offset = self.end(prev)
2581 p1r, p2r = self.rev(p1), self.rev(p2)
2595 p1r, p2r = self.rev(p1), self.rev(p2)
2582
2596
2583 # full versions are inserted when the needed deltas
2597 # full versions are inserted when the needed deltas
2584 # become comparable to the uncompressed text
2598 # become comparable to the uncompressed text
2585 if rawtext is None:
2599 if rawtext is None:
2586 # need rawtext size, before changed by flag processors, which is
2600 # need rawtext size, before changed by flag processors, which is
2587 # the non-raw size. use revlog explicitly to avoid filelog's extra
2601 # the non-raw size. use revlog explicitly to avoid filelog's extra
2588 # logic that might remove metadata size.
2602 # logic that might remove metadata size.
2589 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2603 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2590 cachedelta[1])
2604 cachedelta[1])
2591 else:
2605 else:
2592 textlen = len(rawtext)
2606 textlen = len(rawtext)
2593
2607
2594 if deltacomputer is None:
2608 if deltacomputer is None:
2595 deltacomputer = _deltacomputer(self)
2609 deltacomputer = _deltacomputer(self)
2596
2610
2597 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2611 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2598
2612
2599 # no delta for flag processor revision (see "candelta" for why)
2613 # no delta for flag processor revision (see "candelta" for why)
2600 # not calling candelta since only one revision needs test, also to
2614 # not calling candelta since only one revision needs test, also to
2601 # avoid overhead fetching flags again.
2615 # avoid overhead fetching flags again.
2602 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2616 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2603 deltainfo = None
2617 deltainfo = None
2604 else:
2618 else:
2605 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2619 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2606
2620
2607 if deltainfo is not None:
2621 if deltainfo is not None:
2608 base = deltainfo.base
2622 base = deltainfo.base
2609 chainbase = deltainfo.chainbase
2623 chainbase = deltainfo.chainbase
2610 data = deltainfo.data
2624 data = deltainfo.data
2611 l = deltainfo.deltalen
2625 l = deltainfo.deltalen
2612 else:
2626 else:
2613 rawtext = deltacomputer.buildtext(revinfo, fh)
2627 rawtext = deltacomputer.buildtext(revinfo, fh)
2614 data = self.compress(rawtext)
2628 data = self.compress(rawtext)
2615 l = len(data[1]) + len(data[0])
2629 l = len(data[1]) + len(data[0])
2616 base = chainbase = curr
2630 base = chainbase = curr
2617
2631
2618 e = (offset_type(offset, flags), l, textlen,
2632 e = (offset_type(offset, flags), l, textlen,
2619 base, link, p1r, p2r, node)
2633 base, link, p1r, p2r, node)
2620 self.index.append(e)
2634 self.index.append(e)
2621 self.nodemap[node] = curr
2635 self.nodemap[node] = curr
2622
2636
2623 entry = self._io.packentry(e, self.node, self.version, curr)
2637 entry = self._io.packentry(e, self.node, self.version, curr)
2624 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2638 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2625
2639
2626 if alwayscache and rawtext is None:
2640 if alwayscache and rawtext is None:
2627 rawtext = deltacomputer._buildtext(revinfo, fh)
2641 rawtext = deltacomputer._buildtext(revinfo, fh)
2628
2642
2629 if type(rawtext) == bytes: # only accept immutable objects
2643 if type(rawtext) == bytes: # only accept immutable objects
2630 self._cache = (node, curr, rawtext)
2644 self._cache = (node, curr, rawtext)
2631 self._chainbasecache[curr] = chainbase
2645 self._chainbasecache[curr] = chainbase
2632 return node
2646 return node
2633
2647
2634 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2648 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2635 # Files opened in a+ mode have inconsistent behavior on various
2649 # Files opened in a+ mode have inconsistent behavior on various
2636 # platforms. Windows requires that a file positioning call be made
2650 # platforms. Windows requires that a file positioning call be made
2637 # when the file handle transitions between reads and writes. See
2651 # when the file handle transitions between reads and writes. See
2638 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2652 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2639 # platforms, Python or the platform itself can be buggy. Some versions
2653 # platforms, Python or the platform itself can be buggy. Some versions
2640 # of Solaris have been observed to not append at the end of the file
2654 # of Solaris have been observed to not append at the end of the file
2641 # if the file was seeked to before the end. See issue4943 for more.
2655 # if the file was seeked to before the end. See issue4943 for more.
2642 #
2656 #
2643 # We work around this issue by inserting a seek() before writing.
2657 # We work around this issue by inserting a seek() before writing.
2644 # Note: This is likely not necessary on Python 3.
2658 # Note: This is likely not necessary on Python 3.
2645 ifh.seek(0, os.SEEK_END)
2659 ifh.seek(0, os.SEEK_END)
2646 if dfh:
2660 if dfh:
2647 dfh.seek(0, os.SEEK_END)
2661 dfh.seek(0, os.SEEK_END)
2648
2662
2649 curr = len(self) - 1
2663 curr = len(self) - 1
2650 if not self._inline:
2664 if not self._inline:
2651 transaction.add(self.datafile, offset)
2665 transaction.add(self.datafile, offset)
2652 transaction.add(self.indexfile, curr * len(entry))
2666 transaction.add(self.indexfile, curr * len(entry))
2653 if data[0]:
2667 if data[0]:
2654 dfh.write(data[0])
2668 dfh.write(data[0])
2655 dfh.write(data[1])
2669 dfh.write(data[1])
2656 ifh.write(entry)
2670 ifh.write(entry)
2657 else:
2671 else:
2658 offset += curr * self._io.size
2672 offset += curr * self._io.size
2659 transaction.add(self.indexfile, offset, curr)
2673 transaction.add(self.indexfile, offset, curr)
2660 ifh.write(entry)
2674 ifh.write(entry)
2661 ifh.write(data[0])
2675 ifh.write(data[0])
2662 ifh.write(data[1])
2676 ifh.write(data[1])
2663 self._enforceinlinesize(transaction, ifh)
2677 self._enforceinlinesize(transaction, ifh)
2664
2678
2665 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2679 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2666 """
2680 """
2667 add a delta group
2681 add a delta group
2668
2682
2669 given a set of deltas, add them to the revision log. the
2683 given a set of deltas, add them to the revision log. the
2670 first delta is against its parent, which should be in our
2684 first delta is against its parent, which should be in our
2671 log, the rest are against the previous delta.
2685 log, the rest are against the previous delta.
2672
2686
2673 If ``addrevisioncb`` is defined, it will be called with arguments of
2687 If ``addrevisioncb`` is defined, it will be called with arguments of
2674 this revlog and the node that was added.
2688 this revlog and the node that was added.
2675 """
2689 """
2676
2690
2677 nodes = []
2691 nodes = []
2678
2692
2679 r = len(self)
2693 r = len(self)
2680 end = 0
2694 end = 0
2681 if r:
2695 if r:
2682 end = self.end(r - 1)
2696 end = self.end(r - 1)
2683 ifh = self._indexfp("a+")
2697 ifh = self._indexfp("a+")
2684 isize = r * self._io.size
2698 isize = r * self._io.size
2685 if self._inline:
2699 if self._inline:
2686 transaction.add(self.indexfile, end + isize, r)
2700 transaction.add(self.indexfile, end + isize, r)
2687 dfh = None
2701 dfh = None
2688 else:
2702 else:
2689 transaction.add(self.indexfile, isize, r)
2703 transaction.add(self.indexfile, isize, r)
2690 transaction.add(self.datafile, end)
2704 transaction.add(self.datafile, end)
2691 dfh = self._datafp("a+")
2705 dfh = self._datafp("a+")
2692 def flush():
2706 def flush():
2693 if dfh:
2707 if dfh:
2694 dfh.flush()
2708 dfh.flush()
2695 ifh.flush()
2709 ifh.flush()
2696 try:
2710 try:
2697 deltacomputer = _deltacomputer(self)
2711 deltacomputer = _deltacomputer(self)
2698 # loop through our set of deltas
2712 # loop through our set of deltas
2699 for data in deltas:
2713 for data in deltas:
2700 node, p1, p2, linknode, deltabase, delta, flags = data
2714 node, p1, p2, linknode, deltabase, delta, flags = data
2701 link = linkmapper(linknode)
2715 link = linkmapper(linknode)
2702 flags = flags or REVIDX_DEFAULT_FLAGS
2716 flags = flags or REVIDX_DEFAULT_FLAGS
2703
2717
2704 nodes.append(node)
2718 nodes.append(node)
2705
2719
2706 if node in self.nodemap:
2720 if node in self.nodemap:
2707 # this can happen if two branches make the same change
2721 # this can happen if two branches make the same change
2708 continue
2722 continue
2709
2723
2710 for p in (p1, p2):
2724 for p in (p1, p2):
2711 if p not in self.nodemap:
2725 if p not in self.nodemap:
2712 raise LookupError(p, self.indexfile,
2726 raise LookupError(p, self.indexfile,
2713 _('unknown parent'))
2727 _('unknown parent'))
2714
2728
2715 if deltabase not in self.nodemap:
2729 if deltabase not in self.nodemap:
2716 raise LookupError(deltabase, self.indexfile,
2730 raise LookupError(deltabase, self.indexfile,
2717 _('unknown delta base'))
2731 _('unknown delta base'))
2718
2732
2719 baserev = self.rev(deltabase)
2733 baserev = self.rev(deltabase)
2720
2734
2721 if baserev != nullrev and self.iscensored(baserev):
2735 if baserev != nullrev and self.iscensored(baserev):
2722 # if base is censored, delta must be full replacement in a
2736 # if base is censored, delta must be full replacement in a
2723 # single patch operation
2737 # single patch operation
2724 hlen = struct.calcsize(">lll")
2738 hlen = struct.calcsize(">lll")
2725 oldlen = self.rawsize(baserev)
2739 oldlen = self.rawsize(baserev)
2726 newlen = len(delta) - hlen
2740 newlen = len(delta) - hlen
2727 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2741 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2728 raise error.CensoredBaseError(self.indexfile,
2742 raise error.CensoredBaseError(self.indexfile,
2729 self.node(baserev))
2743 self.node(baserev))
2730
2744
2731 if not flags and self._peek_iscensored(baserev, delta, flush):
2745 if not flags and self._peek_iscensored(baserev, delta, flush):
2732 flags |= REVIDX_ISCENSORED
2746 flags |= REVIDX_ISCENSORED
2733
2747
2734 # We assume consumers of addrevisioncb will want to retrieve
2748 # We assume consumers of addrevisioncb will want to retrieve
2735 # the added revision, which will require a call to
2749 # the added revision, which will require a call to
2736 # revision(). revision() will fast path if there is a cache
2750 # revision(). revision() will fast path if there is a cache
2737 # hit. So, we tell _addrevision() to always cache in this case.
2751 # hit. So, we tell _addrevision() to always cache in this case.
2738 # We're only using addgroup() in the context of changegroup
2752 # We're only using addgroup() in the context of changegroup
2739 # generation so the revision data can always be handled as raw
2753 # generation so the revision data can always be handled as raw
2740 # by the flagprocessor.
2754 # by the flagprocessor.
2741 self._addrevision(node, None, transaction, link,
2755 self._addrevision(node, None, transaction, link,
2742 p1, p2, flags, (baserev, delta),
2756 p1, p2, flags, (baserev, delta),
2743 ifh, dfh,
2757 ifh, dfh,
2744 alwayscache=bool(addrevisioncb),
2758 alwayscache=bool(addrevisioncb),
2745 deltacomputer=deltacomputer)
2759 deltacomputer=deltacomputer)
2746
2760
2747 if addrevisioncb:
2761 if addrevisioncb:
2748 addrevisioncb(self, node)
2762 addrevisioncb(self, node)
2749
2763
2750 if not dfh and not self._inline:
2764 if not dfh and not self._inline:
2751 # addrevision switched from inline to conventional
2765 # addrevision switched from inline to conventional
2752 # reopen the index
2766 # reopen the index
2753 ifh.close()
2767 ifh.close()
2754 dfh = self._datafp("a+")
2768 dfh = self._datafp("a+")
2755 ifh = self._indexfp("a+")
2769 ifh = self._indexfp("a+")
2756 finally:
2770 finally:
2757 if dfh:
2771 if dfh:
2758 dfh.close()
2772 dfh.close()
2759 ifh.close()
2773 ifh.close()
2760
2774
2761 return nodes
2775 return nodes
2762
2776
2763 def iscensored(self, rev):
2777 def iscensored(self, rev):
2764 """Check if a file revision is censored."""
2778 """Check if a file revision is censored."""
2765 if not self._censorable:
2779 if not self._censorable:
2766 return False
2780 return False
2767
2781
2768 return self.flags(rev) & REVIDX_ISCENSORED
2782 return self.flags(rev) & REVIDX_ISCENSORED
2769
2783
2770 def _peek_iscensored(self, baserev, delta, flush):
2784 def _peek_iscensored(self, baserev, delta, flush):
2771 """Quickly check if a delta produces a censored revision."""
2785 """Quickly check if a delta produces a censored revision."""
2772 if not self._censorable:
2786 if not self._censorable:
2773 return False
2787 return False
2774
2788
2775 # Fragile heuristic: unless new file meta keys are added alphabetically
2789 # Fragile heuristic: unless new file meta keys are added alphabetically
2776 # preceding "censored", all censored revisions are prefixed by
2790 # preceding "censored", all censored revisions are prefixed by
2777 # "\1\ncensored:". A delta producing such a censored revision must be a
2791 # "\1\ncensored:". A delta producing such a censored revision must be a
2778 # full-replacement delta, so we inspect the first and only patch in the
2792 # full-replacement delta, so we inspect the first and only patch in the
2779 # delta for this prefix.
2793 # delta for this prefix.
2780 hlen = struct.calcsize(">lll")
2794 hlen = struct.calcsize(">lll")
2781 if len(delta) <= hlen:
2795 if len(delta) <= hlen:
2782 return False
2796 return False
2783
2797
2784 oldlen = self.rawsize(baserev)
2798 oldlen = self.rawsize(baserev)
2785 newlen = len(delta) - hlen
2799 newlen = len(delta) - hlen
2786 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2800 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2787 return False
2801 return False
2788
2802
2789 add = "\1\ncensored:"
2803 add = "\1\ncensored:"
2790 addlen = len(add)
2804 addlen = len(add)
2791 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2805 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2792
2806
2793 def getstrippoint(self, minlink):
2807 def getstrippoint(self, minlink):
2794 """find the minimum rev that must be stripped to strip the linkrev
2808 """find the minimum rev that must be stripped to strip the linkrev
2795
2809
2796 Returns a tuple containing the minimum rev and a set of all revs that
2810 Returns a tuple containing the minimum rev and a set of all revs that
2797 have linkrevs that will be broken by this strip.
2811 have linkrevs that will be broken by this strip.
2798 """
2812 """
2799 brokenrevs = set()
2813 brokenrevs = set()
2800 strippoint = len(self)
2814 strippoint = len(self)
2801
2815
2802 heads = {}
2816 heads = {}
2803 futurelargelinkrevs = set()
2817 futurelargelinkrevs = set()
2804 for head in self.headrevs():
2818 for head in self.headrevs():
2805 headlinkrev = self.linkrev(head)
2819 headlinkrev = self.linkrev(head)
2806 heads[head] = headlinkrev
2820 heads[head] = headlinkrev
2807 if headlinkrev >= minlink:
2821 if headlinkrev >= minlink:
2808 futurelargelinkrevs.add(headlinkrev)
2822 futurelargelinkrevs.add(headlinkrev)
2809
2823
2810 # This algorithm involves walking down the rev graph, starting at the
2824 # This algorithm involves walking down the rev graph, starting at the
2811 # heads. Since the revs are topologically sorted according to linkrev,
2825 # heads. Since the revs are topologically sorted according to linkrev,
2812 # once all head linkrevs are below the minlink, we know there are
2826 # once all head linkrevs are below the minlink, we know there are
2813 # no more revs that could have a linkrev greater than minlink.
2827 # no more revs that could have a linkrev greater than minlink.
2814 # So we can stop walking.
2828 # So we can stop walking.
2815 while futurelargelinkrevs:
2829 while futurelargelinkrevs:
2816 strippoint -= 1
2830 strippoint -= 1
2817 linkrev = heads.pop(strippoint)
2831 linkrev = heads.pop(strippoint)
2818
2832
2819 if linkrev < minlink:
2833 if linkrev < minlink:
2820 brokenrevs.add(strippoint)
2834 brokenrevs.add(strippoint)
2821 else:
2835 else:
2822 futurelargelinkrevs.remove(linkrev)
2836 futurelargelinkrevs.remove(linkrev)
2823
2837
2824 for p in self.parentrevs(strippoint):
2838 for p in self.parentrevs(strippoint):
2825 if p != nullrev:
2839 if p != nullrev:
2826 plinkrev = self.linkrev(p)
2840 plinkrev = self.linkrev(p)
2827 heads[p] = plinkrev
2841 heads[p] = plinkrev
2828 if plinkrev >= minlink:
2842 if plinkrev >= minlink:
2829 futurelargelinkrevs.add(plinkrev)
2843 futurelargelinkrevs.add(plinkrev)
2830
2844
2831 return strippoint, brokenrevs
2845 return strippoint, brokenrevs
2832
2846
2833 def strip(self, minlink, transaction):
2847 def strip(self, minlink, transaction):
2834 """truncate the revlog on the first revision with a linkrev >= minlink
2848 """truncate the revlog on the first revision with a linkrev >= minlink
2835
2849
2836 This function is called when we're stripping revision minlink and
2850 This function is called when we're stripping revision minlink and
2837 its descendants from the repository.
2851 its descendants from the repository.
2838
2852
2839 We have to remove all revisions with linkrev >= minlink, because
2853 We have to remove all revisions with linkrev >= minlink, because
2840 the equivalent changelog revisions will be renumbered after the
2854 the equivalent changelog revisions will be renumbered after the
2841 strip.
2855 strip.
2842
2856
2843 So we truncate the revlog on the first of these revisions, and
2857 So we truncate the revlog on the first of these revisions, and
2844 trust that the caller has saved the revisions that shouldn't be
2858 trust that the caller has saved the revisions that shouldn't be
2845 removed and that it'll re-add them after this truncation.
2859 removed and that it'll re-add them after this truncation.
2846 """
2860 """
2847 if len(self) == 0:
2861 if len(self) == 0:
2848 return
2862 return
2849
2863
2850 rev, _ = self.getstrippoint(minlink)
2864 rev, _ = self.getstrippoint(minlink)
2851 if rev == len(self):
2865 if rev == len(self):
2852 return
2866 return
2853
2867
2854 # first truncate the files on disk
2868 # first truncate the files on disk
2855 end = self.start(rev)
2869 end = self.start(rev)
2856 if not self._inline:
2870 if not self._inline:
2857 transaction.add(self.datafile, end)
2871 transaction.add(self.datafile, end)
2858 end = rev * self._io.size
2872 end = rev * self._io.size
2859 else:
2873 else:
2860 end += rev * self._io.size
2874 end += rev * self._io.size
2861
2875
2862 transaction.add(self.indexfile, end)
2876 transaction.add(self.indexfile, end)
2863
2877
2864 # then reset internal state in memory to forget those revisions
2878 # then reset internal state in memory to forget those revisions
2865 self._cache = None
2879 self._cache = None
2866 self._chaininfocache = {}
2880 self._chaininfocache = {}
2867 self._chunkclear()
2881 self._chunkclear()
2868 for x in pycompat.xrange(rev, len(self)):
2882 for x in pycompat.xrange(rev, len(self)):
2869 del self.nodemap[self.node(x)]
2883 del self.nodemap[self.node(x)]
2870
2884
2871 del self.index[rev:-1]
2885 del self.index[rev:-1]
2872 self._nodepos = None
2886 self._nodepos = None
2873
2887
2874 def checksize(self):
2888 def checksize(self):
2875 expected = 0
2889 expected = 0
2876 if len(self):
2890 if len(self):
2877 expected = max(0, self.end(len(self) - 1))
2891 expected = max(0, self.end(len(self) - 1))
2878
2892
2879 try:
2893 try:
2880 with self._datafp() as f:
2894 with self._datafp() as f:
2881 f.seek(0, 2)
2895 f.seek(0, 2)
2882 actual = f.tell()
2896 actual = f.tell()
2883 dd = actual - expected
2897 dd = actual - expected
2884 except IOError as inst:
2898 except IOError as inst:
2885 if inst.errno != errno.ENOENT:
2899 if inst.errno != errno.ENOENT:
2886 raise
2900 raise
2887 dd = 0
2901 dd = 0
2888
2902
2889 try:
2903 try:
2890 f = self.opener(self.indexfile)
2904 f = self.opener(self.indexfile)
2891 f.seek(0, 2)
2905 f.seek(0, 2)
2892 actual = f.tell()
2906 actual = f.tell()
2893 f.close()
2907 f.close()
2894 s = self._io.size
2908 s = self._io.size
2895 i = max(0, actual // s)
2909 i = max(0, actual // s)
2896 di = actual - (i * s)
2910 di = actual - (i * s)
2897 if self._inline:
2911 if self._inline:
2898 databytes = 0
2912 databytes = 0
2899 for r in self:
2913 for r in self:
2900 databytes += max(0, self.length(r))
2914 databytes += max(0, self.length(r))
2901 dd = 0
2915 dd = 0
2902 di = actual - len(self) * s - databytes
2916 di = actual - len(self) * s - databytes
2903 except IOError as inst:
2917 except IOError as inst:
2904 if inst.errno != errno.ENOENT:
2918 if inst.errno != errno.ENOENT:
2905 raise
2919 raise
2906 di = 0
2920 di = 0
2907
2921
2908 return (dd, di)
2922 return (dd, di)
2909
2923
2910 def files(self):
2924 def files(self):
2911 res = [self.indexfile]
2925 res = [self.indexfile]
2912 if not self._inline:
2926 if not self._inline:
2913 res.append(self.datafile)
2927 res.append(self.datafile)
2914 return res
2928 return res
2915
2929
2916 DELTAREUSEALWAYS = 'always'
2930 DELTAREUSEALWAYS = 'always'
2917 DELTAREUSESAMEREVS = 'samerevs'
2931 DELTAREUSESAMEREVS = 'samerevs'
2918 DELTAREUSENEVER = 'never'
2932 DELTAREUSENEVER = 'never'
2919
2933
2920 DELTAREUSEFULLADD = 'fulladd'
2934 DELTAREUSEFULLADD = 'fulladd'
2921
2935
2922 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2936 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2923
2937
2924 def clone(self, tr, destrevlog, addrevisioncb=None,
2938 def clone(self, tr, destrevlog, addrevisioncb=None,
2925 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2939 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2926 """Copy this revlog to another, possibly with format changes.
2940 """Copy this revlog to another, possibly with format changes.
2927
2941
2928 The destination revlog will contain the same revisions and nodes.
2942 The destination revlog will contain the same revisions and nodes.
2929 However, it may not be bit-for-bit identical due to e.g. delta encoding
2943 However, it may not be bit-for-bit identical due to e.g. delta encoding
2930 differences.
2944 differences.
2931
2945
2932 The ``deltareuse`` argument control how deltas from the existing revlog
2946 The ``deltareuse`` argument control how deltas from the existing revlog
2933 are preserved in the destination revlog. The argument can have the
2947 are preserved in the destination revlog. The argument can have the
2934 following values:
2948 following values:
2935
2949
2936 DELTAREUSEALWAYS
2950 DELTAREUSEALWAYS
2937 Deltas will always be reused (if possible), even if the destination
2951 Deltas will always be reused (if possible), even if the destination
2938 revlog would not select the same revisions for the delta. This is the
2952 revlog would not select the same revisions for the delta. This is the
2939 fastest mode of operation.
2953 fastest mode of operation.
2940 DELTAREUSESAMEREVS
2954 DELTAREUSESAMEREVS
2941 Deltas will be reused if the destination revlog would pick the same
2955 Deltas will be reused if the destination revlog would pick the same
2942 revisions for the delta. This mode strikes a balance between speed
2956 revisions for the delta. This mode strikes a balance between speed
2943 and optimization.
2957 and optimization.
2944 DELTAREUSENEVER
2958 DELTAREUSENEVER
2945 Deltas will never be reused. This is the slowest mode of execution.
2959 Deltas will never be reused. This is the slowest mode of execution.
2946 This mode can be used to recompute deltas (e.g. if the diff/delta
2960 This mode can be used to recompute deltas (e.g. if the diff/delta
2947 algorithm changes).
2961 algorithm changes).
2948
2962
2949 Delta computation can be slow, so the choice of delta reuse policy can
2963 Delta computation can be slow, so the choice of delta reuse policy can
2950 significantly affect run time.
2964 significantly affect run time.
2951
2965
2952 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2966 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2953 two extremes. Deltas will be reused if they are appropriate. But if the
2967 two extremes. Deltas will be reused if they are appropriate. But if the
2954 delta could choose a better revision, it will do so. This means if you
2968 delta could choose a better revision, it will do so. This means if you
2955 are converting a non-generaldelta revlog to a generaldelta revlog,
2969 are converting a non-generaldelta revlog to a generaldelta revlog,
2956 deltas will be recomputed if the delta's parent isn't a parent of the
2970 deltas will be recomputed if the delta's parent isn't a parent of the
2957 revision.
2971 revision.
2958
2972
2959 In addition to the delta policy, the ``deltabothparents`` argument
2973 In addition to the delta policy, the ``deltabothparents`` argument
2960 controls whether to compute deltas against both parents for merges.
2974 controls whether to compute deltas against both parents for merges.
2961 By default, the current default is used.
2975 By default, the current default is used.
2962 """
2976 """
2963 if deltareuse not in self.DELTAREUSEALL:
2977 if deltareuse not in self.DELTAREUSEALL:
2964 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2978 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2965
2979
2966 if len(destrevlog):
2980 if len(destrevlog):
2967 raise ValueError(_('destination revlog is not empty'))
2981 raise ValueError(_('destination revlog is not empty'))
2968
2982
2969 if getattr(self, 'filteredrevs', None):
2983 if getattr(self, 'filteredrevs', None):
2970 raise ValueError(_('source revlog has filtered revisions'))
2984 raise ValueError(_('source revlog has filtered revisions'))
2971 if getattr(destrevlog, 'filteredrevs', None):
2985 if getattr(destrevlog, 'filteredrevs', None):
2972 raise ValueError(_('destination revlog has filtered revisions'))
2986 raise ValueError(_('destination revlog has filtered revisions'))
2973
2987
2974 # lazydeltabase controls whether to reuse a cached delta, if possible.
2988 # lazydeltabase controls whether to reuse a cached delta, if possible.
2975 oldlazydeltabase = destrevlog._lazydeltabase
2989 oldlazydeltabase = destrevlog._lazydeltabase
2976 oldamd = destrevlog._deltabothparents
2990 oldamd = destrevlog._deltabothparents
2977
2991
2978 try:
2992 try:
2979 if deltareuse == self.DELTAREUSEALWAYS:
2993 if deltareuse == self.DELTAREUSEALWAYS:
2980 destrevlog._lazydeltabase = True
2994 destrevlog._lazydeltabase = True
2981 elif deltareuse == self.DELTAREUSESAMEREVS:
2995 elif deltareuse == self.DELTAREUSESAMEREVS:
2982 destrevlog._lazydeltabase = False
2996 destrevlog._lazydeltabase = False
2983
2997
2984 destrevlog._deltabothparents = deltabothparents or oldamd
2998 destrevlog._deltabothparents = deltabothparents or oldamd
2985
2999
2986 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3000 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2987 self.DELTAREUSESAMEREVS)
3001 self.DELTAREUSESAMEREVS)
2988
3002
2989 deltacomputer = _deltacomputer(destrevlog)
3003 deltacomputer = _deltacomputer(destrevlog)
2990 index = self.index
3004 index = self.index
2991 for rev in self:
3005 for rev in self:
2992 entry = index[rev]
3006 entry = index[rev]
2993
3007
2994 # Some classes override linkrev to take filtered revs into
3008 # Some classes override linkrev to take filtered revs into
2995 # account. Use raw entry from index.
3009 # account. Use raw entry from index.
2996 flags = entry[0] & 0xffff
3010 flags = entry[0] & 0xffff
2997 linkrev = entry[4]
3011 linkrev = entry[4]
2998 p1 = index[entry[5]][7]
3012 p1 = index[entry[5]][7]
2999 p2 = index[entry[6]][7]
3013 p2 = index[entry[6]][7]
3000 node = entry[7]
3014 node = entry[7]
3001
3015
3002 # (Possibly) reuse the delta from the revlog if allowed and
3016 # (Possibly) reuse the delta from the revlog if allowed and
3003 # the revlog chunk is a delta.
3017 # the revlog chunk is a delta.
3004 cachedelta = None
3018 cachedelta = None
3005 rawtext = None
3019 rawtext = None
3006 if populatecachedelta:
3020 if populatecachedelta:
3007 dp = self.deltaparent(rev)
3021 dp = self.deltaparent(rev)
3008 if dp != nullrev:
3022 if dp != nullrev:
3009 cachedelta = (dp, bytes(self._chunk(rev)))
3023 cachedelta = (dp, bytes(self._chunk(rev)))
3010
3024
3011 if not cachedelta:
3025 if not cachedelta:
3012 rawtext = self.revision(rev, raw=True)
3026 rawtext = self.revision(rev, raw=True)
3013
3027
3014
3028
3015 if deltareuse == self.DELTAREUSEFULLADD:
3029 if deltareuse == self.DELTAREUSEFULLADD:
3016 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3030 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3017 cachedelta=cachedelta,
3031 cachedelta=cachedelta,
3018 node=node, flags=flags,
3032 node=node, flags=flags,
3019 deltacomputer=deltacomputer)
3033 deltacomputer=deltacomputer)
3020 else:
3034 else:
3021 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3035 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3022 checkambig=False)
3036 checkambig=False)
3023 dfh = None
3037 dfh = None
3024 if not destrevlog._inline:
3038 if not destrevlog._inline:
3025 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3039 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3026 try:
3040 try:
3027 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3041 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3028 p2, flags, cachedelta, ifh, dfh,
3042 p2, flags, cachedelta, ifh, dfh,
3029 deltacomputer=deltacomputer)
3043 deltacomputer=deltacomputer)
3030 finally:
3044 finally:
3031 if dfh:
3045 if dfh:
3032 dfh.close()
3046 dfh.close()
3033 ifh.close()
3047 ifh.close()
3034
3048
3035 if addrevisioncb:
3049 if addrevisioncb:
3036 addrevisioncb(self, rev, node)
3050 addrevisioncb(self, rev, node)
3037 finally:
3051 finally:
3038 destrevlog._lazydeltabase = oldlazydeltabase
3052 destrevlog._lazydeltabase = oldlazydeltabase
3039 destrevlog._deltabothparents = oldamd
3053 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now