##// END OF EJS Templates
revlog: obtain the first node at the lowest layer while building pure nodemap...
Yuya Nishihara -
r39177:5f924aa0 default
parent child Browse files
Show More
@@ -1,3007 +1,3008 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # Trim empty revs at the end, but never the very first revision of a chain
266 # Trim empty revs at the end, but never the very first revision of a chain
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 endidx -= 1
268 endidx -= 1
269
269
270 return revs[startidx:endidx]
270 return revs[startidx:endidx]
271
271
272 def _segmentspan(revlog, revs):
272 def _segmentspan(revlog, revs):
273 """Get the byte span of a segment of revisions
273 """Get the byte span of a segment of revisions
274
274
275 revs is a sorted array of revision numbers
275 revs is a sorted array of revision numbers
276
276
277 >>> revlog = _testrevlog([
277 >>> revlog = _testrevlog([
278 ... 5, #0
278 ... 5, #0
279 ... 10, #1
279 ... 10, #1
280 ... 12, #2
280 ... 12, #2
281 ... 12, #3 (empty)
281 ... 12, #3 (empty)
282 ... 17, #4
282 ... 17, #4
283 ... ])
283 ... ])
284
284
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 17
286 17
287 >>> _segmentspan(revlog, [0, 4])
287 >>> _segmentspan(revlog, [0, 4])
288 17
288 17
289 >>> _segmentspan(revlog, [3, 4])
289 >>> _segmentspan(revlog, [3, 4])
290 5
290 5
291 >>> _segmentspan(revlog, [1, 2, 3,])
291 >>> _segmentspan(revlog, [1, 2, 3,])
292 7
292 7
293 >>> _segmentspan(revlog, [1, 3])
293 >>> _segmentspan(revlog, [1, 3])
294 7
294 7
295 """
295 """
296 if not revs:
296 if not revs:
297 return 0
297 return 0
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299
299
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302
302
303 ``revs`` is sliced into groups that should be read in one time.
303 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
304 Assume that revs are sorted.
305
305
306 The initial chunk is sliced until the overall density (payload/chunks-span
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 `revlog._srmingapsize` is skipped.
308 `revlog._srmingapsize` is skipped.
309
309
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 For consistency with other slicing choice, this limit won't go lower than
311 For consistency with other slicing choice, this limit won't go lower than
312 `revlog._srmingapsize`.
312 `revlog._srmingapsize`.
313
313
314 If individual revisions chunk are larger than this limit, they will still
314 If individual revisions chunk are larger than this limit, they will still
315 be raised individually.
315 be raised individually.
316
316
317 >>> revlog = _testrevlog([
317 >>> revlog = _testrevlog([
318 ... 5, #00 (5)
318 ... 5, #00 (5)
319 ... 10, #01 (5)
319 ... 10, #01 (5)
320 ... 12, #02 (2)
320 ... 12, #02 (2)
321 ... 12, #03 (empty)
321 ... 12, #03 (empty)
322 ... 27, #04 (15)
322 ... 27, #04 (15)
323 ... 31, #05 (4)
323 ... 31, #05 (4)
324 ... 31, #06 (empty)
324 ... 31, #06 (empty)
325 ... 42, #07 (11)
325 ... 42, #07 (11)
326 ... 47, #08 (5)
326 ... 47, #08 (5)
327 ... 47, #09 (empty)
327 ... 47, #09 (empty)
328 ... 48, #10 (1)
328 ... 48, #10 (1)
329 ... 51, #11 (3)
329 ... 51, #11 (3)
330 ... 74, #12 (23)
330 ... 74, #12 (23)
331 ... 85, #13 (11)
331 ... 85, #13 (11)
332 ... 86, #14 (1)
332 ... 86, #14 (1)
333 ... 91, #15 (5)
333 ... 91, #15 (5)
334 ... ])
334 ... ])
335
335
336 >>> list(_slicechunk(revlog, list(range(16))))
336 >>> list(_slicechunk(revlog, list(range(16))))
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 >>> list(_slicechunk(revlog, [0, 15]))
338 >>> list(_slicechunk(revlog, [0, 15]))
339 [[0], [15]]
339 [[0], [15]]
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 [[0], [11], [15]]
341 [[0], [11], [15]]
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 [[0], [11, 13, 15]]
343 [[0], [11, 13, 15]]
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 [[1, 2], [5, 8, 10, 11], [14]]
345 [[1, 2], [5, 8, 10, 11], [14]]
346
346
347 Slicing with a maximum chunk size
347 Slicing with a maximum chunk size
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 [[0], [11], [13], [15]]
349 [[0], [11], [13], [15]]
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 [[0], [11], [13, 15]]
351 [[0], [11], [13, 15]]
352 """
352 """
353 if targetsize is not None:
353 if targetsize is not None:
354 targetsize = max(targetsize, revlog._srmingapsize)
354 targetsize = max(targetsize, revlog._srmingapsize)
355 # targetsize should not be specified when evaluating delta candidates:
355 # targetsize should not be specified when evaluating delta candidates:
356 # * targetsize is used to ensure we stay within specification when reading,
356 # * targetsize is used to ensure we stay within specification when reading,
357 # * deltainfo is used to pick are good delta chain when writing.
357 # * deltainfo is used to pick are good delta chain when writing.
358 if not (deltainfo is None or targetsize is None):
358 if not (deltainfo is None or targetsize is None):
359 msg = 'cannot use `targetsize` with a `deltainfo`'
359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 raise error.ProgrammingError(msg)
360 raise error.ProgrammingError(msg)
361 for chunk in _slicechunktodensity(revlog, revs,
361 for chunk in _slicechunktodensity(revlog, revs,
362 deltainfo,
362 deltainfo,
363 revlog._srdensitythreshold,
363 revlog._srdensitythreshold,
364 revlog._srmingapsize):
364 revlog._srmingapsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 yield subchunk
366 yield subchunk
367
367
368 def _slicechunktosize(revlog, revs, targetsize=None):
368 def _slicechunktosize(revlog, revs, targetsize=None):
369 """slice revs to match the target size
369 """slice revs to match the target size
370
370
371 This is intended to be used on chunk that density slicing selected by that
371 This is intended to be used on chunk that density slicing selected by that
372 are still too large compared to the read garantee of revlog. This might
372 are still too large compared to the read garantee of revlog. This might
373 happens when "minimal gap size" interrupted the slicing or when chain are
373 happens when "minimal gap size" interrupted the slicing or when chain are
374 built in a way that create large blocks next to each other.
374 built in a way that create large blocks next to each other.
375
375
376 >>> revlog = _testrevlog([
376 >>> revlog = _testrevlog([
377 ... 3, #0 (3)
377 ... 3, #0 (3)
378 ... 5, #1 (2)
378 ... 5, #1 (2)
379 ... 6, #2 (1)
379 ... 6, #2 (1)
380 ... 8, #3 (2)
380 ... 8, #3 (2)
381 ... 8, #4 (empty)
381 ... 8, #4 (empty)
382 ... 11, #5 (3)
382 ... 11, #5 (3)
383 ... 12, #6 (1)
383 ... 12, #6 (1)
384 ... 13, #7 (1)
384 ... 13, #7 (1)
385 ... 14, #8 (1)
385 ... 14, #8 (1)
386 ... ])
386 ... ])
387
387
388 Cases where chunk is already small enough
388 Cases where chunk is already small enough
389 >>> list(_slicechunktosize(revlog, [0], 3))
389 >>> list(_slicechunktosize(revlog, [0], 3))
390 [[0]]
390 [[0]]
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 [[6, 7]]
392 [[6, 7]]
393 >>> list(_slicechunktosize(revlog, [0], None))
393 >>> list(_slicechunktosize(revlog, [0], None))
394 [[0]]
394 [[0]]
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 [[6, 7]]
396 [[6, 7]]
397
397
398 cases where we need actual slicing
398 cases where we need actual slicing
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 [[0], [1]]
400 [[0], [1]]
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 [[1], [3]]
402 [[1], [3]]
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 [[1, 2], [3]]
404 [[1, 2], [3]]
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 [[3], [5]]
406 [[3], [5]]
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 [[3], [5]]
408 [[3], [5]]
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 [[5], [6, 7, 8]]
410 [[5], [6, 7, 8]]
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413
413
414 Case with too large individual chunk (must return valid chunk)
414 Case with too large individual chunk (must return valid chunk)
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 [[0], [1]]
416 [[0], [1]]
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 [[1], [3]]
418 [[1], [3]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 [[3], [5]]
420 [[3], [5]]
421 """
421 """
422 assert targetsize is None or 0 <= targetsize
422 assert targetsize is None or 0 <= targetsize
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 yield revs
424 yield revs
425 return
425 return
426
426
427 startrevidx = 0
427 startrevidx = 0
428 startdata = revlog.start(revs[0])
428 startdata = revlog.start(revs[0])
429 endrevidx = 0
429 endrevidx = 0
430 iterrevs = enumerate(revs)
430 iterrevs = enumerate(revs)
431 next(iterrevs) # skip first rev.
431 next(iterrevs) # skip first rev.
432 for idx, r in iterrevs:
432 for idx, r in iterrevs:
433 span = revlog.end(r) - startdata
433 span = revlog.end(r) - startdata
434 if span <= targetsize:
434 if span <= targetsize:
435 endrevidx = idx
435 endrevidx = idx
436 else:
436 else:
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440 startrevidx = idx
440 startrevidx = idx
441 startdata = revlog.start(r)
441 startdata = revlog.start(r)
442 endrevidx = idx
442 endrevidx = idx
443 yield _trimchunk(revlog, revs, startrevidx)
443 yield _trimchunk(revlog, revs, startrevidx)
444
444
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 mingapsize=0):
446 mingapsize=0):
447 """slice revs to reduce the amount of unrelated data to be read from disk.
447 """slice revs to reduce the amount of unrelated data to be read from disk.
448
448
449 ``revs`` is sliced into groups that should be read in one time.
449 ``revs`` is sliced into groups that should be read in one time.
450 Assume that revs are sorted.
450 Assume that revs are sorted.
451
451
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 to the top of the revlog.
453 to the top of the revlog.
454
454
455 The initial chunk is sliced until the overall density (payload/chunks-span
455 The initial chunk is sliced until the overall density (payload/chunks-span
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 skipped.
457 skipped.
458
458
459 >>> revlog = _testrevlog([
459 >>> revlog = _testrevlog([
460 ... 5, #00 (5)
460 ... 5, #00 (5)
461 ... 10, #01 (5)
461 ... 10, #01 (5)
462 ... 12, #02 (2)
462 ... 12, #02 (2)
463 ... 12, #03 (empty)
463 ... 12, #03 (empty)
464 ... 27, #04 (15)
464 ... 27, #04 (15)
465 ... 31, #05 (4)
465 ... 31, #05 (4)
466 ... 31, #06 (empty)
466 ... 31, #06 (empty)
467 ... 42, #07 (11)
467 ... 42, #07 (11)
468 ... 47, #08 (5)
468 ... 47, #08 (5)
469 ... 47, #09 (empty)
469 ... 47, #09 (empty)
470 ... 48, #10 (1)
470 ... 48, #10 (1)
471 ... 51, #11 (3)
471 ... 51, #11 (3)
472 ... 74, #12 (23)
472 ... 74, #12 (23)
473 ... 85, #13 (11)
473 ... 85, #13 (11)
474 ... 86, #14 (1)
474 ... 86, #14 (1)
475 ... 91, #15 (5)
475 ... 91, #15 (5)
476 ... ])
476 ... ])
477
477
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 [[0], [15]]
481 [[0], [15]]
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 [[0], [11], [15]]
483 [[0], [11], [15]]
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 [[0], [11, 13, 15]]
485 [[0], [11, 13, 15]]
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 [[1, 2], [5, 8, 10, 11], [14]]
487 [[1, 2], [5, 8, 10, 11], [14]]
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 ... mingapsize=20))
489 ... mingapsize=20))
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 ... targetdensity=0.95))
492 ... targetdensity=0.95))
493 [[1, 2], [5], [8, 10, 11], [14]]
493 [[1, 2], [5], [8, 10, 11], [14]]
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 ... targetdensity=0.95, mingapsize=12))
495 ... targetdensity=0.95, mingapsize=12))
496 [[1, 2], [5, 8, 10, 11], [14]]
496 [[1, 2], [5, 8, 10, 11], [14]]
497 """
497 """
498 start = revlog.start
498 start = revlog.start
499 length = revlog.length
499 length = revlog.length
500
500
501 if len(revs) <= 1:
501 if len(revs) <= 1:
502 yield revs
502 yield revs
503 return
503 return
504
504
505 nextrev = len(revlog)
505 nextrev = len(revlog)
506 nextoffset = revlog.end(nextrev - 1)
506 nextoffset = revlog.end(nextrev - 1)
507
507
508 if deltainfo is None:
508 if deltainfo is None:
509 deltachainspan = _segmentspan(revlog, revs)
509 deltachainspan = _segmentspan(revlog, revs)
510 chainpayload = sum(length(r) for r in revs)
510 chainpayload = sum(length(r) for r in revs)
511 else:
511 else:
512 deltachainspan = deltainfo.distance
512 deltachainspan = deltainfo.distance
513 chainpayload = deltainfo.compresseddeltalen
513 chainpayload = deltainfo.compresseddeltalen
514
514
515 if deltachainspan < mingapsize:
515 if deltachainspan < mingapsize:
516 yield revs
516 yield revs
517 return
517 return
518
518
519 readdata = deltachainspan
519 readdata = deltachainspan
520
520
521 if deltachainspan:
521 if deltachainspan:
522 density = chainpayload / float(deltachainspan)
522 density = chainpayload / float(deltachainspan)
523 else:
523 else:
524 density = 1.0
524 density = 1.0
525
525
526 if density >= targetdensity:
526 if density >= targetdensity:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 if deltainfo is not None:
530 if deltainfo is not None:
531 revs = list(revs)
531 revs = list(revs)
532 revs.append(nextrev)
532 revs.append(nextrev)
533
533
534 # Store the gaps in a heap to have them sorted by decreasing size
534 # Store the gaps in a heap to have them sorted by decreasing size
535 gapsheap = []
535 gapsheap = []
536 heapq.heapify(gapsheap)
536 heapq.heapify(gapsheap)
537 prevend = None
537 prevend = None
538 for i, rev in enumerate(revs):
538 for i, rev in enumerate(revs):
539 if rev < nextrev:
539 if rev < nextrev:
540 revstart = start(rev)
540 revstart = start(rev)
541 revlen = length(rev)
541 revlen = length(rev)
542 else:
542 else:
543 revstart = nextoffset
543 revstart = nextoffset
544 revlen = deltainfo.deltalen
544 revlen = deltainfo.deltalen
545
545
546 # Skip empty revisions to form larger holes
546 # Skip empty revisions to form larger holes
547 if revlen == 0:
547 if revlen == 0:
548 continue
548 continue
549
549
550 if prevend is not None:
550 if prevend is not None:
551 gapsize = revstart - prevend
551 gapsize = revstart - prevend
552 # only consider holes that are large enough
552 # only consider holes that are large enough
553 if gapsize > mingapsize:
553 if gapsize > mingapsize:
554 heapq.heappush(gapsheap, (-gapsize, i))
554 heapq.heappush(gapsheap, (-gapsize, i))
555
555
556 prevend = revstart + revlen
556 prevend = revstart + revlen
557
557
558 # Collect the indices of the largest holes until the density is acceptable
558 # Collect the indices of the largest holes until the density is acceptable
559 indicesheap = []
559 indicesheap = []
560 heapq.heapify(indicesheap)
560 heapq.heapify(indicesheap)
561 while gapsheap and density < targetdensity:
561 while gapsheap and density < targetdensity:
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563
563
564 heapq.heappush(indicesheap, gapidx)
564 heapq.heappush(indicesheap, gapidx)
565
565
566 # the gap sizes are stored as negatives to be sorted decreasingly
566 # the gap sizes are stored as negatives to be sorted decreasingly
567 # by the heap
567 # by the heap
568 readdata -= (-oppgapsize)
568 readdata -= (-oppgapsize)
569 if readdata > 0:
569 if readdata > 0:
570 density = chainpayload / float(readdata)
570 density = chainpayload / float(readdata)
571 else:
571 else:
572 density = 1.0
572 density = 1.0
573
573
574 # Cut the revs at collected indices
574 # Cut the revs at collected indices
575 previdx = 0
575 previdx = 0
576 while indicesheap:
576 while indicesheap:
577 idx = heapq.heappop(indicesheap)
577 idx = heapq.heappop(indicesheap)
578
578
579 chunk = _trimchunk(revlog, revs, previdx, idx)
579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 if chunk:
580 if chunk:
581 yield chunk
581 yield chunk
582
582
583 previdx = idx
583 previdx = idx
584
584
585 chunk = _trimchunk(revlog, revs, previdx)
585 chunk = _trimchunk(revlog, revs, previdx)
586 if chunk:
586 if chunk:
587 yield chunk
587 yield chunk
588
588
589 @attr.s(slots=True, frozen=True)
589 @attr.s(slots=True, frozen=True)
590 class _deltainfo(object):
590 class _deltainfo(object):
591 distance = attr.ib()
591 distance = attr.ib()
592 deltalen = attr.ib()
592 deltalen = attr.ib()
593 data = attr.ib()
593 data = attr.ib()
594 base = attr.ib()
594 base = attr.ib()
595 chainbase = attr.ib()
595 chainbase = attr.ib()
596 chainlen = attr.ib()
596 chainlen = attr.ib()
597 compresseddeltalen = attr.ib()
597 compresseddeltalen = attr.ib()
598
598
599 class _deltacomputer(object):
599 class _deltacomputer(object):
600 def __init__(self, revlog):
600 def __init__(self, revlog):
601 self.revlog = revlog
601 self.revlog = revlog
602
602
603 def _getcandidaterevs(self, p1, p2, cachedelta):
603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 """
604 """
605 Provides revisions that present an interest to be diffed against,
605 Provides revisions that present an interest to be diffed against,
606 grouped by level of easiness.
606 grouped by level of easiness.
607 """
607 """
608 revlog = self.revlog
608 revlog = self.revlog
609 gdelta = revlog._generaldelta
609 gdelta = revlog._generaldelta
610 curr = len(revlog)
610 curr = len(revlog)
611 prev = curr - 1
611 prev = curr - 1
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613
613
614 # should we try to build a delta?
614 # should we try to build a delta?
615 if prev != nullrev and revlog.storedeltachains:
615 if prev != nullrev and revlog.storedeltachains:
616 tested = set()
616 tested = set()
617 # This condition is true most of the time when processing
617 # This condition is true most of the time when processing
618 # changegroup data into a generaldelta repo. The only time it
618 # changegroup data into a generaldelta repo. The only time it
619 # isn't true is if this is the first revision in a delta chain
619 # isn't true is if this is the first revision in a delta chain
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 if cachedelta and gdelta and revlog._lazydeltabase:
621 if cachedelta and gdelta and revlog._lazydeltabase:
622 # Assume what we received from the server is a good choice
622 # Assume what we received from the server is a good choice
623 # build delta will reuse the cache
623 # build delta will reuse the cache
624 yield (cachedelta[0],)
624 yield (cachedelta[0],)
625 tested.add(cachedelta[0])
625 tested.add(cachedelta[0])
626
626
627 if gdelta:
627 if gdelta:
628 # exclude already lazy tested base if any
628 # exclude already lazy tested base if any
629 parents = [p for p in (p1r, p2r)
629 parents = [p for p in (p1r, p2r)
630 if p != nullrev and p not in tested]
630 if p != nullrev and p not in tested]
631
631
632 if not revlog._deltabothparents and len(parents) == 2:
632 if not revlog._deltabothparents and len(parents) == 2:
633 parents.sort()
633 parents.sort()
634 # To minimize the chance of having to build a fulltext,
634 # To minimize the chance of having to build a fulltext,
635 # pick first whichever parent is closest to us (max rev)
635 # pick first whichever parent is closest to us (max rev)
636 yield (parents[1],)
636 yield (parents[1],)
637 # then the other one (min rev) if the first did not fit
637 # then the other one (min rev) if the first did not fit
638 yield (parents[0],)
638 yield (parents[0],)
639 tested.update(parents)
639 tested.update(parents)
640 elif len(parents) > 0:
640 elif len(parents) > 0:
641 # Test all parents (1 or 2), and keep the best candidate
641 # Test all parents (1 or 2), and keep the best candidate
642 yield parents
642 yield parents
643 tested.update(parents)
643 tested.update(parents)
644
644
645 if prev not in tested:
645 if prev not in tested:
646 # other approach failed try against prev to hopefully save us a
646 # other approach failed try against prev to hopefully save us a
647 # fulltext.
647 # fulltext.
648 yield (prev,)
648 yield (prev,)
649 tested.add(prev)
649 tested.add(prev)
650
650
651 def buildtext(self, revinfo, fh):
651 def buildtext(self, revinfo, fh):
652 """Builds a fulltext version of a revision
652 """Builds a fulltext version of a revision
653
653
654 revinfo: _revisioninfo instance that contains all needed info
654 revinfo: _revisioninfo instance that contains all needed info
655 fh: file handle to either the .i or the .d revlog file,
655 fh: file handle to either the .i or the .d revlog file,
656 depending on whether it is inlined or not
656 depending on whether it is inlined or not
657 """
657 """
658 btext = revinfo.btext
658 btext = revinfo.btext
659 if btext[0] is not None:
659 if btext[0] is not None:
660 return btext[0]
660 return btext[0]
661
661
662 revlog = self.revlog
662 revlog = self.revlog
663 cachedelta = revinfo.cachedelta
663 cachedelta = revinfo.cachedelta
664 flags = revinfo.flags
664 flags = revinfo.flags
665 node = revinfo.node
665 node = revinfo.node
666
666
667 baserev = cachedelta[0]
667 baserev = cachedelta[0]
668 delta = cachedelta[1]
668 delta = cachedelta[1]
669 # special case deltas which replace entire base; no need to decode
669 # special case deltas which replace entire base; no need to decode
670 # base revision. this neatly avoids censored bases, which throw when
670 # base revision. this neatly avoids censored bases, which throw when
671 # they're decoded.
671 # they're decoded.
672 hlen = struct.calcsize(">lll")
672 hlen = struct.calcsize(">lll")
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 len(delta) - hlen):
674 len(delta) - hlen):
675 btext[0] = delta[hlen:]
675 btext[0] = delta[hlen:]
676 else:
676 else:
677 # deltabase is rawtext before changed by flag processors, which is
677 # deltabase is rawtext before changed by flag processors, which is
678 # equivalent to non-raw text
678 # equivalent to non-raw text
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 btext[0] = mdiff.patch(basetext, delta)
680 btext[0] = mdiff.patch(basetext, delta)
681
681
682 try:
682 try:
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 btext[0], validatehash = res
684 btext[0], validatehash = res
685 if validatehash:
685 if validatehash:
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 if flags & REVIDX_ISCENSORED:
687 if flags & REVIDX_ISCENSORED:
688 raise RevlogError(_('node %s is not censored') % node)
688 raise RevlogError(_('node %s is not censored') % node)
689 except CensoredNodeError:
689 except CensoredNodeError:
690 # must pass the censored index flag to add censored revisions
690 # must pass the censored index flag to add censored revisions
691 if not flags & REVIDX_ISCENSORED:
691 if not flags & REVIDX_ISCENSORED:
692 raise
692 raise
693 return btext[0]
693 return btext[0]
694
694
695 def _builddeltadiff(self, base, revinfo, fh):
695 def _builddeltadiff(self, base, revinfo, fh):
696 revlog = self.revlog
696 revlog = self.revlog
697 t = self.buildtext(revinfo, fh)
697 t = self.buildtext(revinfo, fh)
698 if revlog.iscensored(base):
698 if revlog.iscensored(base):
699 # deltas based on a censored revision must replace the
699 # deltas based on a censored revision must replace the
700 # full content in one patch, so delta works everywhere
700 # full content in one patch, so delta works everywhere
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 delta = header + t
702 delta = header + t
703 else:
703 else:
704 ptext = revlog.revision(base, _df=fh, raw=True)
704 ptext = revlog.revision(base, _df=fh, raw=True)
705 delta = mdiff.textdiff(ptext, t)
705 delta = mdiff.textdiff(ptext, t)
706
706
707 return delta
707 return delta
708
708
709 def _builddeltainfo(self, revinfo, base, fh):
709 def _builddeltainfo(self, revinfo, base, fh):
710 # can we use the cached delta?
710 # can we use the cached delta?
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 delta = revinfo.cachedelta[1]
712 delta = revinfo.cachedelta[1]
713 else:
713 else:
714 delta = self._builddeltadiff(base, revinfo, fh)
714 delta = self._builddeltadiff(base, revinfo, fh)
715 revlog = self.revlog
715 revlog = self.revlog
716 header, data = revlog.compress(delta)
716 header, data = revlog.compress(delta)
717 deltalen = len(header) + len(data)
717 deltalen = len(header) + len(data)
718 chainbase = revlog.chainbase(base)
718 chainbase = revlog.chainbase(base)
719 offset = revlog.end(len(revlog) - 1)
719 offset = revlog.end(len(revlog) - 1)
720 dist = deltalen + offset - revlog.start(chainbase)
720 dist = deltalen + offset - revlog.start(chainbase)
721 if revlog._generaldelta:
721 if revlog._generaldelta:
722 deltabase = base
722 deltabase = base
723 else:
723 else:
724 deltabase = chainbase
724 deltabase = chainbase
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 chainlen += 1
726 chainlen += 1
727 compresseddeltalen += deltalen
727 compresseddeltalen += deltalen
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 chainbase, chainlen, compresseddeltalen)
729 chainbase, chainlen, compresseddeltalen)
730
730
731 def finddeltainfo(self, revinfo, fh):
731 def finddeltainfo(self, revinfo, fh):
732 """Find an acceptable delta against a candidate revision
732 """Find an acceptable delta against a candidate revision
733
733
734 revinfo: information about the revision (instance of _revisioninfo)
734 revinfo: information about the revision (instance of _revisioninfo)
735 fh: file handle to either the .i or the .d revlog file,
735 fh: file handle to either the .i or the .d revlog file,
736 depending on whether it is inlined or not
736 depending on whether it is inlined or not
737
737
738 Returns the first acceptable candidate revision, as ordered by
738 Returns the first acceptable candidate revision, as ordered by
739 _getcandidaterevs
739 _getcandidaterevs
740 """
740 """
741 if not revinfo.textlen:
741 if not revinfo.textlen:
742 return None # empty file do not need delta
742 return None # empty file do not need delta
743
743
744 cachedelta = revinfo.cachedelta
744 cachedelta = revinfo.cachedelta
745 p1 = revinfo.p1
745 p1 = revinfo.p1
746 p2 = revinfo.p2
746 p2 = revinfo.p2
747 revlog = self.revlog
747 revlog = self.revlog
748
748
749 deltalength = self.revlog.length
749 deltalength = self.revlog.length
750 deltaparent = self.revlog.deltaparent
750 deltaparent = self.revlog.deltaparent
751
751
752 deltainfo = None
752 deltainfo = None
753 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
753 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
754 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
754 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
755 # filter out delta base that will never produce good delta
755 # filter out delta base that will never produce good delta
756 candidaterevs = [r for r in candidaterevs
756 candidaterevs = [r for r in candidaterevs
757 if self.revlog.length(r) <= deltas_limit]
757 if self.revlog.length(r) <= deltas_limit]
758 nominateddeltas = []
758 nominateddeltas = []
759 for candidaterev in candidaterevs:
759 for candidaterev in candidaterevs:
760 # skip over empty delta (no need to include them in a chain)
760 # skip over empty delta (no need to include them in a chain)
761 while candidaterev != nullrev and not deltalength(candidaterev):
761 while candidaterev != nullrev and not deltalength(candidaterev):
762 candidaterev = deltaparent(candidaterev)
762 candidaterev = deltaparent(candidaterev)
763 # no need to try a delta against nullid, this will be handled
763 # no need to try a delta against nullid, this will be handled
764 # by fulltext later.
764 # by fulltext later.
765 if candidaterev == nullrev:
765 if candidaterev == nullrev:
766 continue
766 continue
767 # no delta for rawtext-changing revs (see "candelta" for why)
767 # no delta for rawtext-changing revs (see "candelta" for why)
768 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
768 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
769 continue
769 continue
770 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
770 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
771 if revlog._isgooddeltainfo(candidatedelta, revinfo):
771 if revlog._isgooddeltainfo(candidatedelta, revinfo):
772 nominateddeltas.append(candidatedelta)
772 nominateddeltas.append(candidatedelta)
773 if nominateddeltas:
773 if nominateddeltas:
774 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
774 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
775 break
775 break
776
776
777 return deltainfo
777 return deltainfo
778
778
779 @attr.s(slots=True, frozen=True)
779 @attr.s(slots=True, frozen=True)
780 class _revisioninfo(object):
780 class _revisioninfo(object):
781 """Information about a revision that allows building its fulltext
781 """Information about a revision that allows building its fulltext
782 node: expected hash of the revision
782 node: expected hash of the revision
783 p1, p2: parent revs of the revision
783 p1, p2: parent revs of the revision
784 btext: built text cache consisting of a one-element list
784 btext: built text cache consisting of a one-element list
785 cachedelta: (baserev, uncompressed_delta) or None
785 cachedelta: (baserev, uncompressed_delta) or None
786 flags: flags associated to the revision storage
786 flags: flags associated to the revision storage
787
787
788 One of btext[0] or cachedelta must be set.
788 One of btext[0] or cachedelta must be set.
789 """
789 """
790 node = attr.ib()
790 node = attr.ib()
791 p1 = attr.ib()
791 p1 = attr.ib()
792 p2 = attr.ib()
792 p2 = attr.ib()
793 btext = attr.ib()
793 btext = attr.ib()
794 textlen = attr.ib()
794 textlen = attr.ib()
795 cachedelta = attr.ib()
795 cachedelta = attr.ib()
796 flags = attr.ib()
796 flags = attr.ib()
797
797
798 # index v0:
798 # index v0:
799 # 4 bytes: offset
799 # 4 bytes: offset
800 # 4 bytes: compressed length
800 # 4 bytes: compressed length
801 # 4 bytes: base rev
801 # 4 bytes: base rev
802 # 4 bytes: link rev
802 # 4 bytes: link rev
803 # 20 bytes: parent 1 nodeid
803 # 20 bytes: parent 1 nodeid
804 # 20 bytes: parent 2 nodeid
804 # 20 bytes: parent 2 nodeid
805 # 20 bytes: nodeid
805 # 20 bytes: nodeid
806 indexformatv0 = struct.Struct(">4l20s20s20s")
806 indexformatv0 = struct.Struct(">4l20s20s20s")
807 indexformatv0_pack = indexformatv0.pack
807 indexformatv0_pack = indexformatv0.pack
808 indexformatv0_unpack = indexformatv0.unpack
808 indexformatv0_unpack = indexformatv0.unpack
809
809
810 class revlogoldindex(list):
810 class revlogoldindex(list):
811 def __getitem__(self, i):
811 def __getitem__(self, i):
812 if i == -1:
812 if i == -1:
813 return (0, 0, 0, -1, -1, -1, -1, nullid)
813 return (0, 0, 0, -1, -1, -1, -1, nullid)
814 return list.__getitem__(self, i)
814 return list.__getitem__(self, i)
815
815
816 # maximum <delta-chain-data>/<revision-text-length> ratio
816 # maximum <delta-chain-data>/<revision-text-length> ratio
817 LIMIT_DELTA2TEXT = 2
817 LIMIT_DELTA2TEXT = 2
818
818
819 class revlogoldio(object):
819 class revlogoldio(object):
820 def __init__(self):
820 def __init__(self):
821 self.size = indexformatv0.size
821 self.size = indexformatv0.size
822
822
823 def parseindex(self, data, inline):
823 def parseindex(self, data, inline):
824 s = self.size
824 s = self.size
825 index = []
825 index = []
826 nodemap = {nullid: nullrev}
826 nodemap = {nullid: nullrev}
827 n = off = 0
827 n = off = 0
828 l = len(data)
828 l = len(data)
829 while off + s <= l:
829 while off + s <= l:
830 cur = data[off:off + s]
830 cur = data[off:off + s]
831 off += s
831 off += s
832 e = indexformatv0_unpack(cur)
832 e = indexformatv0_unpack(cur)
833 # transform to revlogv1 format
833 # transform to revlogv1 format
834 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
834 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
835 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
835 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
836 index.append(e2)
836 index.append(e2)
837 nodemap[e[6]] = n
837 nodemap[e[6]] = n
838 n += 1
838 n += 1
839
839
840 return revlogoldindex(index), nodemap, None
840 return revlogoldindex(index), nodemap, None
841
841
842 def packentry(self, entry, node, version, rev):
842 def packentry(self, entry, node, version, rev):
843 if gettype(entry[0]):
843 if gettype(entry[0]):
844 raise RevlogError(_('index entry flags need revlog version 1'))
844 raise RevlogError(_('index entry flags need revlog version 1'))
845 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
845 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
846 node(entry[5]), node(entry[6]), entry[7])
846 node(entry[5]), node(entry[6]), entry[7])
847 return indexformatv0_pack(*e2)
847 return indexformatv0_pack(*e2)
848
848
849 # index ng:
849 # index ng:
850 # 6 bytes: offset
850 # 6 bytes: offset
851 # 2 bytes: flags
851 # 2 bytes: flags
852 # 4 bytes: compressed length
852 # 4 bytes: compressed length
853 # 4 bytes: uncompressed length
853 # 4 bytes: uncompressed length
854 # 4 bytes: base rev
854 # 4 bytes: base rev
855 # 4 bytes: link rev
855 # 4 bytes: link rev
856 # 4 bytes: parent 1 rev
856 # 4 bytes: parent 1 rev
857 # 4 bytes: parent 2 rev
857 # 4 bytes: parent 2 rev
858 # 32 bytes: nodeid
858 # 32 bytes: nodeid
859 indexformatng = struct.Struct(">Qiiiiii20s12x")
859 indexformatng = struct.Struct(">Qiiiiii20s12x")
860 indexformatng_pack = indexformatng.pack
860 indexformatng_pack = indexformatng.pack
861 versionformat = struct.Struct(">I")
861 versionformat = struct.Struct(">I")
862 versionformat_pack = versionformat.pack
862 versionformat_pack = versionformat.pack
863 versionformat_unpack = versionformat.unpack
863 versionformat_unpack = versionformat.unpack
864
864
865 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
865 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
866 # signed integer)
866 # signed integer)
867 _maxentrysize = 0x7fffffff
867 _maxentrysize = 0x7fffffff
868
868
869 class revlogio(object):
869 class revlogio(object):
870 def __init__(self):
870 def __init__(self):
871 self.size = indexformatng.size
871 self.size = indexformatng.size
872
872
873 def parseindex(self, data, inline):
873 def parseindex(self, data, inline):
874 # call the C implementation to parse the index data
874 # call the C implementation to parse the index data
875 index, cache = parsers.parse_index2(data, inline)
875 index, cache = parsers.parse_index2(data, inline)
876 return index, getattr(index, 'nodemap', None), cache
876 return index, getattr(index, 'nodemap', None), cache
877
877
878 def packentry(self, entry, node, version, rev):
878 def packentry(self, entry, node, version, rev):
879 p = indexformatng_pack(*entry)
879 p = indexformatng_pack(*entry)
880 if rev == 0:
880 if rev == 0:
881 p = versionformat_pack(version) + p[4:]
881 p = versionformat_pack(version) + p[4:]
882 return p
882 return p
883
883
884 class revlog(object):
884 class revlog(object):
885 """
885 """
886 the underlying revision storage object
886 the underlying revision storage object
887
887
888 A revlog consists of two parts, an index and the revision data.
888 A revlog consists of two parts, an index and the revision data.
889
889
890 The index is a file with a fixed record size containing
890 The index is a file with a fixed record size containing
891 information on each revision, including its nodeid (hash), the
891 information on each revision, including its nodeid (hash), the
892 nodeids of its parents, the position and offset of its data within
892 nodeids of its parents, the position and offset of its data within
893 the data file, and the revision it's based on. Finally, each entry
893 the data file, and the revision it's based on. Finally, each entry
894 contains a linkrev entry that can serve as a pointer to external
894 contains a linkrev entry that can serve as a pointer to external
895 data.
895 data.
896
896
897 The revision data itself is a linear collection of data chunks.
897 The revision data itself is a linear collection of data chunks.
898 Each chunk represents a revision and is usually represented as a
898 Each chunk represents a revision and is usually represented as a
899 delta against the previous chunk. To bound lookup time, runs of
899 delta against the previous chunk. To bound lookup time, runs of
900 deltas are limited to about 2 times the length of the original
900 deltas are limited to about 2 times the length of the original
901 version data. This makes retrieval of a version proportional to
901 version data. This makes retrieval of a version proportional to
902 its size, or O(1) relative to the number of revisions.
902 its size, or O(1) relative to the number of revisions.
903
903
904 Both pieces of the revlog are written to in an append-only
904 Both pieces of the revlog are written to in an append-only
905 fashion, which means we never need to rewrite a file to insert or
905 fashion, which means we never need to rewrite a file to insert or
906 remove data, and can use some simple techniques to avoid the need
906 remove data, and can use some simple techniques to avoid the need
907 for locking while reading.
907 for locking while reading.
908
908
909 If checkambig, indexfile is opened with checkambig=True at
909 If checkambig, indexfile is opened with checkambig=True at
910 writing, to avoid file stat ambiguity.
910 writing, to avoid file stat ambiguity.
911
911
912 If mmaplargeindex is True, and an mmapindexthreshold is set, the
912 If mmaplargeindex is True, and an mmapindexthreshold is set, the
913 index will be mmapped rather than read if it is larger than the
913 index will be mmapped rather than read if it is larger than the
914 configured threshold.
914 configured threshold.
915
915
916 If censorable is True, the revlog can have censored revisions.
916 If censorable is True, the revlog can have censored revisions.
917 """
917 """
918 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
918 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
919 mmaplargeindex=False, censorable=False):
919 mmaplargeindex=False, censorable=False):
920 """
920 """
921 create a revlog object
921 create a revlog object
922
922
923 opener is a function that abstracts the file opening operation
923 opener is a function that abstracts the file opening operation
924 and can be used to implement COW semantics or the like.
924 and can be used to implement COW semantics or the like.
925 """
925 """
926 self.indexfile = indexfile
926 self.indexfile = indexfile
927 self.datafile = datafile or (indexfile[:-2] + ".d")
927 self.datafile = datafile or (indexfile[:-2] + ".d")
928 self.opener = opener
928 self.opener = opener
929 # When True, indexfile is opened with checkambig=True at writing, to
929 # When True, indexfile is opened with checkambig=True at writing, to
930 # avoid file stat ambiguity.
930 # avoid file stat ambiguity.
931 self._checkambig = checkambig
931 self._checkambig = checkambig
932 self._censorable = censorable
932 self._censorable = censorable
933 # 3-tuple of (node, rev, text) for a raw revision.
933 # 3-tuple of (node, rev, text) for a raw revision.
934 self._cache = None
934 self._cache = None
935 # Maps rev to chain base rev.
935 # Maps rev to chain base rev.
936 self._chainbasecache = util.lrucachedict(100)
936 self._chainbasecache = util.lrucachedict(100)
937 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
937 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
938 self._chunkcache = (0, '')
938 self._chunkcache = (0, '')
939 # How much data to read and cache into the raw revlog data cache.
939 # How much data to read and cache into the raw revlog data cache.
940 self._chunkcachesize = 65536
940 self._chunkcachesize = 65536
941 self._maxchainlen = None
941 self._maxchainlen = None
942 self._deltabothparents = True
942 self._deltabothparents = True
943 self.index = []
943 self.index = []
944 # Mapping of partial identifiers to full nodes.
944 # Mapping of partial identifiers to full nodes.
945 self._pcache = {}
945 self._pcache = {}
946 # Mapping of revision integer to full node.
946 # Mapping of revision integer to full node.
947 self._nodecache = {nullid: nullrev}
947 self._nodecache = {nullid: nullrev}
948 self._nodepos = None
948 self._nodepos = None
949 self._compengine = 'zlib'
949 self._compengine = 'zlib'
950 self._maxdeltachainspan = -1
950 self._maxdeltachainspan = -1
951 self._withsparseread = False
951 self._withsparseread = False
952 self._sparserevlog = False
952 self._sparserevlog = False
953 self._srdensitythreshold = 0.50
953 self._srdensitythreshold = 0.50
954 self._srmingapsize = 262144
954 self._srmingapsize = 262144
955
955
956 mmapindexthreshold = None
956 mmapindexthreshold = None
957 v = REVLOG_DEFAULT_VERSION
957 v = REVLOG_DEFAULT_VERSION
958 opts = getattr(opener, 'options', None)
958 opts = getattr(opener, 'options', None)
959 if opts is not None:
959 if opts is not None:
960 if 'revlogv2' in opts:
960 if 'revlogv2' in opts:
961 # version 2 revlogs always use generaldelta.
961 # version 2 revlogs always use generaldelta.
962 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
962 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
963 elif 'revlogv1' in opts:
963 elif 'revlogv1' in opts:
964 if 'generaldelta' in opts:
964 if 'generaldelta' in opts:
965 v |= FLAG_GENERALDELTA
965 v |= FLAG_GENERALDELTA
966 else:
966 else:
967 v = 0
967 v = 0
968 if 'chunkcachesize' in opts:
968 if 'chunkcachesize' in opts:
969 self._chunkcachesize = opts['chunkcachesize']
969 self._chunkcachesize = opts['chunkcachesize']
970 if 'maxchainlen' in opts:
970 if 'maxchainlen' in opts:
971 self._maxchainlen = opts['maxchainlen']
971 self._maxchainlen = opts['maxchainlen']
972 if 'deltabothparents' in opts:
972 if 'deltabothparents' in opts:
973 self._deltabothparents = opts['deltabothparents']
973 self._deltabothparents = opts['deltabothparents']
974 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
974 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
975 if 'compengine' in opts:
975 if 'compengine' in opts:
976 self._compengine = opts['compengine']
976 self._compengine = opts['compengine']
977 if 'maxdeltachainspan' in opts:
977 if 'maxdeltachainspan' in opts:
978 self._maxdeltachainspan = opts['maxdeltachainspan']
978 self._maxdeltachainspan = opts['maxdeltachainspan']
979 if mmaplargeindex and 'mmapindexthreshold' in opts:
979 if mmaplargeindex and 'mmapindexthreshold' in opts:
980 mmapindexthreshold = opts['mmapindexthreshold']
980 mmapindexthreshold = opts['mmapindexthreshold']
981 self._sparserevlog = bool(opts.get('sparse-revlog', False))
981 self._sparserevlog = bool(opts.get('sparse-revlog', False))
982 withsparseread = bool(opts.get('with-sparse-read', False))
982 withsparseread = bool(opts.get('with-sparse-read', False))
983 # sparse-revlog forces sparse-read
983 # sparse-revlog forces sparse-read
984 self._withsparseread = self._sparserevlog or withsparseread
984 self._withsparseread = self._sparserevlog or withsparseread
985 if 'sparse-read-density-threshold' in opts:
985 if 'sparse-read-density-threshold' in opts:
986 self._srdensitythreshold = opts['sparse-read-density-threshold']
986 self._srdensitythreshold = opts['sparse-read-density-threshold']
987 if 'sparse-read-min-gap-size' in opts:
987 if 'sparse-read-min-gap-size' in opts:
988 self._srmingapsize = opts['sparse-read-min-gap-size']
988 self._srmingapsize = opts['sparse-read-min-gap-size']
989
989
990 if self._chunkcachesize <= 0:
990 if self._chunkcachesize <= 0:
991 raise RevlogError(_('revlog chunk cache size %r is not greater '
991 raise RevlogError(_('revlog chunk cache size %r is not greater '
992 'than 0') % self._chunkcachesize)
992 'than 0') % self._chunkcachesize)
993 elif self._chunkcachesize & (self._chunkcachesize - 1):
993 elif self._chunkcachesize & (self._chunkcachesize - 1):
994 raise RevlogError(_('revlog chunk cache size %r is not a power '
994 raise RevlogError(_('revlog chunk cache size %r is not a power '
995 'of 2') % self._chunkcachesize)
995 'of 2') % self._chunkcachesize)
996
996
997 indexdata = ''
997 indexdata = ''
998 self._initempty = True
998 self._initempty = True
999 try:
999 try:
1000 with self._indexfp() as f:
1000 with self._indexfp() as f:
1001 if (mmapindexthreshold is not None and
1001 if (mmapindexthreshold is not None and
1002 self.opener.fstat(f).st_size >= mmapindexthreshold):
1002 self.opener.fstat(f).st_size >= mmapindexthreshold):
1003 indexdata = util.buffer(util.mmapread(f))
1003 indexdata = util.buffer(util.mmapread(f))
1004 else:
1004 else:
1005 indexdata = f.read()
1005 indexdata = f.read()
1006 if len(indexdata) > 0:
1006 if len(indexdata) > 0:
1007 v = versionformat_unpack(indexdata[:4])[0]
1007 v = versionformat_unpack(indexdata[:4])[0]
1008 self._initempty = False
1008 self._initempty = False
1009 except IOError as inst:
1009 except IOError as inst:
1010 if inst.errno != errno.ENOENT:
1010 if inst.errno != errno.ENOENT:
1011 raise
1011 raise
1012
1012
1013 self.version = v
1013 self.version = v
1014 self._inline = v & FLAG_INLINE_DATA
1014 self._inline = v & FLAG_INLINE_DATA
1015 self._generaldelta = v & FLAG_GENERALDELTA
1015 self._generaldelta = v & FLAG_GENERALDELTA
1016 flags = v & ~0xFFFF
1016 flags = v & ~0xFFFF
1017 fmt = v & 0xFFFF
1017 fmt = v & 0xFFFF
1018 if fmt == REVLOGV0:
1018 if fmt == REVLOGV0:
1019 if flags:
1019 if flags:
1020 raise RevlogError(_('unknown flags (%#04x) in version %d '
1020 raise RevlogError(_('unknown flags (%#04x) in version %d '
1021 'revlog %s') %
1021 'revlog %s') %
1022 (flags >> 16, fmt, self.indexfile))
1022 (flags >> 16, fmt, self.indexfile))
1023 elif fmt == REVLOGV1:
1023 elif fmt == REVLOGV1:
1024 if flags & ~REVLOGV1_FLAGS:
1024 if flags & ~REVLOGV1_FLAGS:
1025 raise RevlogError(_('unknown flags (%#04x) in version %d '
1025 raise RevlogError(_('unknown flags (%#04x) in version %d '
1026 'revlog %s') %
1026 'revlog %s') %
1027 (flags >> 16, fmt, self.indexfile))
1027 (flags >> 16, fmt, self.indexfile))
1028 elif fmt == REVLOGV2:
1028 elif fmt == REVLOGV2:
1029 if flags & ~REVLOGV2_FLAGS:
1029 if flags & ~REVLOGV2_FLAGS:
1030 raise RevlogError(_('unknown flags (%#04x) in version %d '
1030 raise RevlogError(_('unknown flags (%#04x) in version %d '
1031 'revlog %s') %
1031 'revlog %s') %
1032 (flags >> 16, fmt, self.indexfile))
1032 (flags >> 16, fmt, self.indexfile))
1033 else:
1033 else:
1034 raise RevlogError(_('unknown version (%d) in revlog %s') %
1034 raise RevlogError(_('unknown version (%d) in revlog %s') %
1035 (fmt, self.indexfile))
1035 (fmt, self.indexfile))
1036
1036
1037 self.storedeltachains = True
1037 self.storedeltachains = True
1038
1038
1039 self._io = revlogio()
1039 self._io = revlogio()
1040 if self.version == REVLOGV0:
1040 if self.version == REVLOGV0:
1041 self._io = revlogoldio()
1041 self._io = revlogoldio()
1042 try:
1042 try:
1043 d = self._io.parseindex(indexdata, self._inline)
1043 d = self._io.parseindex(indexdata, self._inline)
1044 except (ValueError, IndexError):
1044 except (ValueError, IndexError):
1045 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1045 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1046 self.index, nodemap, self._chunkcache = d
1046 self.index, nodemap, self._chunkcache = d
1047 if nodemap is not None:
1047 if nodemap is not None:
1048 self.nodemap = self._nodecache = nodemap
1048 self.nodemap = self._nodecache = nodemap
1049 if not self._chunkcache:
1049 if not self._chunkcache:
1050 self._chunkclear()
1050 self._chunkclear()
1051 # revnum -> (chain-length, sum-delta-length)
1051 # revnum -> (chain-length, sum-delta-length)
1052 self._chaininfocache = {}
1052 self._chaininfocache = {}
1053 # revlog header -> revlog compressor
1053 # revlog header -> revlog compressor
1054 self._decompressors = {}
1054 self._decompressors = {}
1055
1055
1056 @util.propertycache
1056 @util.propertycache
1057 def _compressor(self):
1057 def _compressor(self):
1058 return util.compengines[self._compengine].revlogcompressor()
1058 return util.compengines[self._compengine].revlogcompressor()
1059
1059
1060 def _indexfp(self, mode='r'):
1060 def _indexfp(self, mode='r'):
1061 """file object for the revlog's index file"""
1061 """file object for the revlog's index file"""
1062 args = {r'mode': mode}
1062 args = {r'mode': mode}
1063 if mode != 'r':
1063 if mode != 'r':
1064 args[r'checkambig'] = self._checkambig
1064 args[r'checkambig'] = self._checkambig
1065 if mode == 'w':
1065 if mode == 'w':
1066 args[r'atomictemp'] = True
1066 args[r'atomictemp'] = True
1067 return self.opener(self.indexfile, **args)
1067 return self.opener(self.indexfile, **args)
1068
1068
1069 def _datafp(self, mode='r'):
1069 def _datafp(self, mode='r'):
1070 """file object for the revlog's data file"""
1070 """file object for the revlog's data file"""
1071 return self.opener(self.datafile, mode=mode)
1071 return self.opener(self.datafile, mode=mode)
1072
1072
1073 @contextlib.contextmanager
1073 @contextlib.contextmanager
1074 def _datareadfp(self, existingfp=None):
1074 def _datareadfp(self, existingfp=None):
1075 """file object suitable to read data"""
1075 """file object suitable to read data"""
1076 if existingfp is not None:
1076 if existingfp is not None:
1077 yield existingfp
1077 yield existingfp
1078 else:
1078 else:
1079 if self._inline:
1079 if self._inline:
1080 func = self._indexfp
1080 func = self._indexfp
1081 else:
1081 else:
1082 func = self._datafp
1082 func = self._datafp
1083 with func() as fp:
1083 with func() as fp:
1084 yield fp
1084 yield fp
1085
1085
1086 def tip(self):
1086 def tip(self):
1087 return self.node(len(self.index) - 1)
1087 return self.node(len(self.index) - 1)
1088 def __contains__(self, rev):
1088 def __contains__(self, rev):
1089 return 0 <= rev < len(self)
1089 return 0 <= rev < len(self)
1090 def __len__(self):
1090 def __len__(self):
1091 return len(self.index)
1091 return len(self.index)
1092 def __iter__(self):
1092 def __iter__(self):
1093 return iter(pycompat.xrange(len(self)))
1093 return iter(pycompat.xrange(len(self)))
1094 def revs(self, start=0, stop=None):
1094 def revs(self, start=0, stop=None):
1095 """iterate over all rev in this revlog (from start to stop)"""
1095 """iterate over all rev in this revlog (from start to stop)"""
1096 step = 1
1096 step = 1
1097 length = len(self)
1097 length = len(self)
1098 if stop is not None:
1098 if stop is not None:
1099 if start > stop:
1099 if start > stop:
1100 step = -1
1100 step = -1
1101 stop += step
1101 stop += step
1102 if stop > length:
1102 if stop > length:
1103 stop = length
1103 stop = length
1104 else:
1104 else:
1105 stop = length
1105 stop = length
1106 return pycompat.xrange(start, stop, step)
1106 return pycompat.xrange(start, stop, step)
1107
1107
1108 @util.propertycache
1108 @util.propertycache
1109 def nodemap(self):
1109 def nodemap(self):
1110 if self.index:
1110 if self.index:
1111 # populate mapping down to the initial node
1111 # populate mapping down to the initial node
1112 self.rev(self.node(0))
1112 node0 = self.index[0][7] # get around changelog filtering
1113 self.rev(node0)
1113 return self._nodecache
1114 return self._nodecache
1114
1115
1115 def hasnode(self, node):
1116 def hasnode(self, node):
1116 try:
1117 try:
1117 self.rev(node)
1118 self.rev(node)
1118 return True
1119 return True
1119 except KeyError:
1120 except KeyError:
1120 return False
1121 return False
1121
1122
1122 def candelta(self, baserev, rev):
1123 def candelta(self, baserev, rev):
1123 """whether two revisions (baserev, rev) can be delta-ed or not"""
1124 """whether two revisions (baserev, rev) can be delta-ed or not"""
1124 # Disable delta if either rev requires a content-changing flag
1125 # Disable delta if either rev requires a content-changing flag
1125 # processor (ex. LFS). This is because such flag processor can alter
1126 # processor (ex. LFS). This is because such flag processor can alter
1126 # the rawtext content that the delta will be based on, and two clients
1127 # the rawtext content that the delta will be based on, and two clients
1127 # could have a same revlog node with different flags (i.e. different
1128 # could have a same revlog node with different flags (i.e. different
1128 # rawtext contents) and the delta could be incompatible.
1129 # rawtext contents) and the delta could be incompatible.
1129 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1130 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1130 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1131 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1131 return False
1132 return False
1132 return True
1133 return True
1133
1134
1134 def clearcaches(self):
1135 def clearcaches(self):
1135 self._cache = None
1136 self._cache = None
1136 self._chainbasecache.clear()
1137 self._chainbasecache.clear()
1137 self._chunkcache = (0, '')
1138 self._chunkcache = (0, '')
1138 self._pcache = {}
1139 self._pcache = {}
1139
1140
1140 try:
1141 try:
1141 self._nodecache.clearcaches()
1142 self._nodecache.clearcaches()
1142 except AttributeError:
1143 except AttributeError:
1143 self._nodecache = {nullid: nullrev}
1144 self._nodecache = {nullid: nullrev}
1144 self._nodepos = None
1145 self._nodepos = None
1145
1146
1146 def rev(self, node):
1147 def rev(self, node):
1147 try:
1148 try:
1148 return self._nodecache[node]
1149 return self._nodecache[node]
1149 except TypeError:
1150 except TypeError:
1150 raise
1151 raise
1151 except RevlogError:
1152 except RevlogError:
1152 # parsers.c radix tree lookup failed
1153 # parsers.c radix tree lookup failed
1153 if node == wdirid or node in wdirfilenodeids:
1154 if node == wdirid or node in wdirfilenodeids:
1154 raise error.WdirUnsupported
1155 raise error.WdirUnsupported
1155 raise LookupError(node, self.indexfile, _('no node'))
1156 raise LookupError(node, self.indexfile, _('no node'))
1156 except KeyError:
1157 except KeyError:
1157 # pure python cache lookup failed
1158 # pure python cache lookup failed
1158 n = self._nodecache
1159 n = self._nodecache
1159 i = self.index
1160 i = self.index
1160 p = self._nodepos
1161 p = self._nodepos
1161 if p is None:
1162 if p is None:
1162 p = len(i) - 1
1163 p = len(i) - 1
1163 else:
1164 else:
1164 assert p < len(i)
1165 assert p < len(i)
1165 for r in pycompat.xrange(p, -1, -1):
1166 for r in pycompat.xrange(p, -1, -1):
1166 v = i[r][7]
1167 v = i[r][7]
1167 n[v] = r
1168 n[v] = r
1168 if v == node:
1169 if v == node:
1169 self._nodepos = r - 1
1170 self._nodepos = r - 1
1170 return r
1171 return r
1171 if node == wdirid or node in wdirfilenodeids:
1172 if node == wdirid or node in wdirfilenodeids:
1172 raise error.WdirUnsupported
1173 raise error.WdirUnsupported
1173 raise LookupError(node, self.indexfile, _('no node'))
1174 raise LookupError(node, self.indexfile, _('no node'))
1174
1175
1175 # Accessors for index entries.
1176 # Accessors for index entries.
1176
1177
1177 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1178 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1178 # are flags.
1179 # are flags.
1179 def start(self, rev):
1180 def start(self, rev):
1180 return int(self.index[rev][0] >> 16)
1181 return int(self.index[rev][0] >> 16)
1181
1182
1182 def flags(self, rev):
1183 def flags(self, rev):
1183 return self.index[rev][0] & 0xFFFF
1184 return self.index[rev][0] & 0xFFFF
1184
1185
1185 def length(self, rev):
1186 def length(self, rev):
1186 return self.index[rev][1]
1187 return self.index[rev][1]
1187
1188
1188 def rawsize(self, rev):
1189 def rawsize(self, rev):
1189 """return the length of the uncompressed text for a given revision"""
1190 """return the length of the uncompressed text for a given revision"""
1190 l = self.index[rev][2]
1191 l = self.index[rev][2]
1191 if l >= 0:
1192 if l >= 0:
1192 return l
1193 return l
1193
1194
1194 t = self.revision(rev, raw=True)
1195 t = self.revision(rev, raw=True)
1195 return len(t)
1196 return len(t)
1196
1197
1197 def size(self, rev):
1198 def size(self, rev):
1198 """length of non-raw text (processed by a "read" flag processor)"""
1199 """length of non-raw text (processed by a "read" flag processor)"""
1199 # fast path: if no "read" flag processor could change the content,
1200 # fast path: if no "read" flag processor could change the content,
1200 # size is rawsize. note: ELLIPSIS is known to not change the content.
1201 # size is rawsize. note: ELLIPSIS is known to not change the content.
1201 flags = self.flags(rev)
1202 flags = self.flags(rev)
1202 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1203 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1203 return self.rawsize(rev)
1204 return self.rawsize(rev)
1204
1205
1205 return len(self.revision(rev, raw=False))
1206 return len(self.revision(rev, raw=False))
1206
1207
1207 def chainbase(self, rev):
1208 def chainbase(self, rev):
1208 base = self._chainbasecache.get(rev)
1209 base = self._chainbasecache.get(rev)
1209 if base is not None:
1210 if base is not None:
1210 return base
1211 return base
1211
1212
1212 index = self.index
1213 index = self.index
1213 iterrev = rev
1214 iterrev = rev
1214 base = index[iterrev][3]
1215 base = index[iterrev][3]
1215 while base != iterrev:
1216 while base != iterrev:
1216 iterrev = base
1217 iterrev = base
1217 base = index[iterrev][3]
1218 base = index[iterrev][3]
1218
1219
1219 self._chainbasecache[rev] = base
1220 self._chainbasecache[rev] = base
1220 return base
1221 return base
1221
1222
1222 def linkrev(self, rev):
1223 def linkrev(self, rev):
1223 return self.index[rev][4]
1224 return self.index[rev][4]
1224
1225
1225 def parentrevs(self, rev):
1226 def parentrevs(self, rev):
1226 try:
1227 try:
1227 entry = self.index[rev]
1228 entry = self.index[rev]
1228 except IndexError:
1229 except IndexError:
1229 if rev == wdirrev:
1230 if rev == wdirrev:
1230 raise error.WdirUnsupported
1231 raise error.WdirUnsupported
1231 raise
1232 raise
1232
1233
1233 return entry[5], entry[6]
1234 return entry[5], entry[6]
1234
1235
1235 def node(self, rev):
1236 def node(self, rev):
1236 try:
1237 try:
1237 return self.index[rev][7]
1238 return self.index[rev][7]
1238 except IndexError:
1239 except IndexError:
1239 if rev == wdirrev:
1240 if rev == wdirrev:
1240 raise error.WdirUnsupported
1241 raise error.WdirUnsupported
1241 raise
1242 raise
1242
1243
1243 # Derived from index values.
1244 # Derived from index values.
1244
1245
1245 def end(self, rev):
1246 def end(self, rev):
1246 return self.start(rev) + self.length(rev)
1247 return self.start(rev) + self.length(rev)
1247
1248
1248 def parents(self, node):
1249 def parents(self, node):
1249 i = self.index
1250 i = self.index
1250 d = i[self.rev(node)]
1251 d = i[self.rev(node)]
1251 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1252 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1252
1253
1253 def chainlen(self, rev):
1254 def chainlen(self, rev):
1254 return self._chaininfo(rev)[0]
1255 return self._chaininfo(rev)[0]
1255
1256
1256 def _chaininfo(self, rev):
1257 def _chaininfo(self, rev):
1257 chaininfocache = self._chaininfocache
1258 chaininfocache = self._chaininfocache
1258 if rev in chaininfocache:
1259 if rev in chaininfocache:
1259 return chaininfocache[rev]
1260 return chaininfocache[rev]
1260 index = self.index
1261 index = self.index
1261 generaldelta = self._generaldelta
1262 generaldelta = self._generaldelta
1262 iterrev = rev
1263 iterrev = rev
1263 e = index[iterrev]
1264 e = index[iterrev]
1264 clen = 0
1265 clen = 0
1265 compresseddeltalen = 0
1266 compresseddeltalen = 0
1266 while iterrev != e[3]:
1267 while iterrev != e[3]:
1267 clen += 1
1268 clen += 1
1268 compresseddeltalen += e[1]
1269 compresseddeltalen += e[1]
1269 if generaldelta:
1270 if generaldelta:
1270 iterrev = e[3]
1271 iterrev = e[3]
1271 else:
1272 else:
1272 iterrev -= 1
1273 iterrev -= 1
1273 if iterrev in chaininfocache:
1274 if iterrev in chaininfocache:
1274 t = chaininfocache[iterrev]
1275 t = chaininfocache[iterrev]
1275 clen += t[0]
1276 clen += t[0]
1276 compresseddeltalen += t[1]
1277 compresseddeltalen += t[1]
1277 break
1278 break
1278 e = index[iterrev]
1279 e = index[iterrev]
1279 else:
1280 else:
1280 # Add text length of base since decompressing that also takes
1281 # Add text length of base since decompressing that also takes
1281 # work. For cache hits the length is already included.
1282 # work. For cache hits the length is already included.
1282 compresseddeltalen += e[1]
1283 compresseddeltalen += e[1]
1283 r = (clen, compresseddeltalen)
1284 r = (clen, compresseddeltalen)
1284 chaininfocache[rev] = r
1285 chaininfocache[rev] = r
1285 return r
1286 return r
1286
1287
1287 def _deltachain(self, rev, stoprev=None):
1288 def _deltachain(self, rev, stoprev=None):
1288 """Obtain the delta chain for a revision.
1289 """Obtain the delta chain for a revision.
1289
1290
1290 ``stoprev`` specifies a revision to stop at. If not specified, we
1291 ``stoprev`` specifies a revision to stop at. If not specified, we
1291 stop at the base of the chain.
1292 stop at the base of the chain.
1292
1293
1293 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1294 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1294 revs in ascending order and ``stopped`` is a bool indicating whether
1295 revs in ascending order and ``stopped`` is a bool indicating whether
1295 ``stoprev`` was hit.
1296 ``stoprev`` was hit.
1296 """
1297 """
1297 # Try C implementation.
1298 # Try C implementation.
1298 try:
1299 try:
1299 return self.index.deltachain(rev, stoprev, self._generaldelta)
1300 return self.index.deltachain(rev, stoprev, self._generaldelta)
1300 except AttributeError:
1301 except AttributeError:
1301 pass
1302 pass
1302
1303
1303 chain = []
1304 chain = []
1304
1305
1305 # Alias to prevent attribute lookup in tight loop.
1306 # Alias to prevent attribute lookup in tight loop.
1306 index = self.index
1307 index = self.index
1307 generaldelta = self._generaldelta
1308 generaldelta = self._generaldelta
1308
1309
1309 iterrev = rev
1310 iterrev = rev
1310 e = index[iterrev]
1311 e = index[iterrev]
1311 while iterrev != e[3] and iterrev != stoprev:
1312 while iterrev != e[3] and iterrev != stoprev:
1312 chain.append(iterrev)
1313 chain.append(iterrev)
1313 if generaldelta:
1314 if generaldelta:
1314 iterrev = e[3]
1315 iterrev = e[3]
1315 else:
1316 else:
1316 iterrev -= 1
1317 iterrev -= 1
1317 e = index[iterrev]
1318 e = index[iterrev]
1318
1319
1319 if iterrev == stoprev:
1320 if iterrev == stoprev:
1320 stopped = True
1321 stopped = True
1321 else:
1322 else:
1322 chain.append(iterrev)
1323 chain.append(iterrev)
1323 stopped = False
1324 stopped = False
1324
1325
1325 chain.reverse()
1326 chain.reverse()
1326 return chain, stopped
1327 return chain, stopped
1327
1328
1328 def ancestors(self, revs, stoprev=0, inclusive=False):
1329 def ancestors(self, revs, stoprev=0, inclusive=False):
1329 """Generate the ancestors of 'revs' in reverse topological order.
1330 """Generate the ancestors of 'revs' in reverse topological order.
1330 Does not generate revs lower than stoprev.
1331 Does not generate revs lower than stoprev.
1331
1332
1332 See the documentation for ancestor.lazyancestors for more details."""
1333 See the documentation for ancestor.lazyancestors for more details."""
1333
1334
1334 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1335 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1335 inclusive=inclusive)
1336 inclusive=inclusive)
1336
1337
1337 def descendants(self, revs):
1338 def descendants(self, revs):
1338 """Generate the descendants of 'revs' in revision order.
1339 """Generate the descendants of 'revs' in revision order.
1339
1340
1340 Yield a sequence of revision numbers starting with a child of
1341 Yield a sequence of revision numbers starting with a child of
1341 some rev in revs, i.e., each revision is *not* considered a
1342 some rev in revs, i.e., each revision is *not* considered a
1342 descendant of itself. Results are ordered by revision number (a
1343 descendant of itself. Results are ordered by revision number (a
1343 topological sort)."""
1344 topological sort)."""
1344 first = min(revs)
1345 first = min(revs)
1345 if first == nullrev:
1346 if first == nullrev:
1346 for i in self:
1347 for i in self:
1347 yield i
1348 yield i
1348 return
1349 return
1349
1350
1350 seen = set(revs)
1351 seen = set(revs)
1351 for i in self.revs(start=first + 1):
1352 for i in self.revs(start=first + 1):
1352 for x in self.parentrevs(i):
1353 for x in self.parentrevs(i):
1353 if x != nullrev and x in seen:
1354 if x != nullrev and x in seen:
1354 seen.add(i)
1355 seen.add(i)
1355 yield i
1356 yield i
1356 break
1357 break
1357
1358
1358 def findcommonmissing(self, common=None, heads=None):
1359 def findcommonmissing(self, common=None, heads=None):
1359 """Return a tuple of the ancestors of common and the ancestors of heads
1360 """Return a tuple of the ancestors of common and the ancestors of heads
1360 that are not ancestors of common. In revset terminology, we return the
1361 that are not ancestors of common. In revset terminology, we return the
1361 tuple:
1362 tuple:
1362
1363
1363 ::common, (::heads) - (::common)
1364 ::common, (::heads) - (::common)
1364
1365
1365 The list is sorted by revision number, meaning it is
1366 The list is sorted by revision number, meaning it is
1366 topologically sorted.
1367 topologically sorted.
1367
1368
1368 'heads' and 'common' are both lists of node IDs. If heads is
1369 'heads' and 'common' are both lists of node IDs. If heads is
1369 not supplied, uses all of the revlog's heads. If common is not
1370 not supplied, uses all of the revlog's heads. If common is not
1370 supplied, uses nullid."""
1371 supplied, uses nullid."""
1371 if common is None:
1372 if common is None:
1372 common = [nullid]
1373 common = [nullid]
1373 if heads is None:
1374 if heads is None:
1374 heads = self.heads()
1375 heads = self.heads()
1375
1376
1376 common = [self.rev(n) for n in common]
1377 common = [self.rev(n) for n in common]
1377 heads = [self.rev(n) for n in heads]
1378 heads = [self.rev(n) for n in heads]
1378
1379
1379 # we want the ancestors, but inclusive
1380 # we want the ancestors, but inclusive
1380 class lazyset(object):
1381 class lazyset(object):
1381 def __init__(self, lazyvalues):
1382 def __init__(self, lazyvalues):
1382 self.addedvalues = set()
1383 self.addedvalues = set()
1383 self.lazyvalues = lazyvalues
1384 self.lazyvalues = lazyvalues
1384
1385
1385 def __contains__(self, value):
1386 def __contains__(self, value):
1386 return value in self.addedvalues or value in self.lazyvalues
1387 return value in self.addedvalues or value in self.lazyvalues
1387
1388
1388 def __iter__(self):
1389 def __iter__(self):
1389 added = self.addedvalues
1390 added = self.addedvalues
1390 for r in added:
1391 for r in added:
1391 yield r
1392 yield r
1392 for r in self.lazyvalues:
1393 for r in self.lazyvalues:
1393 if not r in added:
1394 if not r in added:
1394 yield r
1395 yield r
1395
1396
1396 def add(self, value):
1397 def add(self, value):
1397 self.addedvalues.add(value)
1398 self.addedvalues.add(value)
1398
1399
1399 def update(self, values):
1400 def update(self, values):
1400 self.addedvalues.update(values)
1401 self.addedvalues.update(values)
1401
1402
1402 has = lazyset(self.ancestors(common))
1403 has = lazyset(self.ancestors(common))
1403 has.add(nullrev)
1404 has.add(nullrev)
1404 has.update(common)
1405 has.update(common)
1405
1406
1406 # take all ancestors from heads that aren't in has
1407 # take all ancestors from heads that aren't in has
1407 missing = set()
1408 missing = set()
1408 visit = collections.deque(r for r in heads if r not in has)
1409 visit = collections.deque(r for r in heads if r not in has)
1409 while visit:
1410 while visit:
1410 r = visit.popleft()
1411 r = visit.popleft()
1411 if r in missing:
1412 if r in missing:
1412 continue
1413 continue
1413 else:
1414 else:
1414 missing.add(r)
1415 missing.add(r)
1415 for p in self.parentrevs(r):
1416 for p in self.parentrevs(r):
1416 if p not in has:
1417 if p not in has:
1417 visit.append(p)
1418 visit.append(p)
1418 missing = list(missing)
1419 missing = list(missing)
1419 missing.sort()
1420 missing.sort()
1420 return has, [self.node(miss) for miss in missing]
1421 return has, [self.node(miss) for miss in missing]
1421
1422
1422 def incrementalmissingrevs(self, common=None):
1423 def incrementalmissingrevs(self, common=None):
1423 """Return an object that can be used to incrementally compute the
1424 """Return an object that can be used to incrementally compute the
1424 revision numbers of the ancestors of arbitrary sets that are not
1425 revision numbers of the ancestors of arbitrary sets that are not
1425 ancestors of common. This is an ancestor.incrementalmissingancestors
1426 ancestors of common. This is an ancestor.incrementalmissingancestors
1426 object.
1427 object.
1427
1428
1428 'common' is a list of revision numbers. If common is not supplied, uses
1429 'common' is a list of revision numbers. If common is not supplied, uses
1429 nullrev.
1430 nullrev.
1430 """
1431 """
1431 if common is None:
1432 if common is None:
1432 common = [nullrev]
1433 common = [nullrev]
1433
1434
1434 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1435 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1435
1436
1436 def findmissingrevs(self, common=None, heads=None):
1437 def findmissingrevs(self, common=None, heads=None):
1437 """Return the revision numbers of the ancestors of heads that
1438 """Return the revision numbers of the ancestors of heads that
1438 are not ancestors of common.
1439 are not ancestors of common.
1439
1440
1440 More specifically, return a list of revision numbers corresponding to
1441 More specifically, return a list of revision numbers corresponding to
1441 nodes N such that every N satisfies the following constraints:
1442 nodes N such that every N satisfies the following constraints:
1442
1443
1443 1. N is an ancestor of some node in 'heads'
1444 1. N is an ancestor of some node in 'heads'
1444 2. N is not an ancestor of any node in 'common'
1445 2. N is not an ancestor of any node in 'common'
1445
1446
1446 The list is sorted by revision number, meaning it is
1447 The list is sorted by revision number, meaning it is
1447 topologically sorted.
1448 topologically sorted.
1448
1449
1449 'heads' and 'common' are both lists of revision numbers. If heads is
1450 'heads' and 'common' are both lists of revision numbers. If heads is
1450 not supplied, uses all of the revlog's heads. If common is not
1451 not supplied, uses all of the revlog's heads. If common is not
1451 supplied, uses nullid."""
1452 supplied, uses nullid."""
1452 if common is None:
1453 if common is None:
1453 common = [nullrev]
1454 common = [nullrev]
1454 if heads is None:
1455 if heads is None:
1455 heads = self.headrevs()
1456 heads = self.headrevs()
1456
1457
1457 inc = self.incrementalmissingrevs(common=common)
1458 inc = self.incrementalmissingrevs(common=common)
1458 return inc.missingancestors(heads)
1459 return inc.missingancestors(heads)
1459
1460
1460 def findmissing(self, common=None, heads=None):
1461 def findmissing(self, common=None, heads=None):
1461 """Return the ancestors of heads that are not ancestors of common.
1462 """Return the ancestors of heads that are not ancestors of common.
1462
1463
1463 More specifically, return a list of nodes N such that every N
1464 More specifically, return a list of nodes N such that every N
1464 satisfies the following constraints:
1465 satisfies the following constraints:
1465
1466
1466 1. N is an ancestor of some node in 'heads'
1467 1. N is an ancestor of some node in 'heads'
1467 2. N is not an ancestor of any node in 'common'
1468 2. N is not an ancestor of any node in 'common'
1468
1469
1469 The list is sorted by revision number, meaning it is
1470 The list is sorted by revision number, meaning it is
1470 topologically sorted.
1471 topologically sorted.
1471
1472
1472 'heads' and 'common' are both lists of node IDs. If heads is
1473 'heads' and 'common' are both lists of node IDs. If heads is
1473 not supplied, uses all of the revlog's heads. If common is not
1474 not supplied, uses all of the revlog's heads. If common is not
1474 supplied, uses nullid."""
1475 supplied, uses nullid."""
1475 if common is None:
1476 if common is None:
1476 common = [nullid]
1477 common = [nullid]
1477 if heads is None:
1478 if heads is None:
1478 heads = self.heads()
1479 heads = self.heads()
1479
1480
1480 common = [self.rev(n) for n in common]
1481 common = [self.rev(n) for n in common]
1481 heads = [self.rev(n) for n in heads]
1482 heads = [self.rev(n) for n in heads]
1482
1483
1483 inc = self.incrementalmissingrevs(common=common)
1484 inc = self.incrementalmissingrevs(common=common)
1484 return [self.node(r) for r in inc.missingancestors(heads)]
1485 return [self.node(r) for r in inc.missingancestors(heads)]
1485
1486
1486 def nodesbetween(self, roots=None, heads=None):
1487 def nodesbetween(self, roots=None, heads=None):
1487 """Return a topological path from 'roots' to 'heads'.
1488 """Return a topological path from 'roots' to 'heads'.
1488
1489
1489 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1490 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1490 topologically sorted list of all nodes N that satisfy both of
1491 topologically sorted list of all nodes N that satisfy both of
1491 these constraints:
1492 these constraints:
1492
1493
1493 1. N is a descendant of some node in 'roots'
1494 1. N is a descendant of some node in 'roots'
1494 2. N is an ancestor of some node in 'heads'
1495 2. N is an ancestor of some node in 'heads'
1495
1496
1496 Every node is considered to be both a descendant and an ancestor
1497 Every node is considered to be both a descendant and an ancestor
1497 of itself, so every reachable node in 'roots' and 'heads' will be
1498 of itself, so every reachable node in 'roots' and 'heads' will be
1498 included in 'nodes'.
1499 included in 'nodes'.
1499
1500
1500 'outroots' is the list of reachable nodes in 'roots', i.e., the
1501 'outroots' is the list of reachable nodes in 'roots', i.e., the
1501 subset of 'roots' that is returned in 'nodes'. Likewise,
1502 subset of 'roots' that is returned in 'nodes'. Likewise,
1502 'outheads' is the subset of 'heads' that is also in 'nodes'.
1503 'outheads' is the subset of 'heads' that is also in 'nodes'.
1503
1504
1504 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1505 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1505 unspecified, uses nullid as the only root. If 'heads' is
1506 unspecified, uses nullid as the only root. If 'heads' is
1506 unspecified, uses list of all of the revlog's heads."""
1507 unspecified, uses list of all of the revlog's heads."""
1507 nonodes = ([], [], [])
1508 nonodes = ([], [], [])
1508 if roots is not None:
1509 if roots is not None:
1509 roots = list(roots)
1510 roots = list(roots)
1510 if not roots:
1511 if not roots:
1511 return nonodes
1512 return nonodes
1512 lowestrev = min([self.rev(n) for n in roots])
1513 lowestrev = min([self.rev(n) for n in roots])
1513 else:
1514 else:
1514 roots = [nullid] # Everybody's a descendant of nullid
1515 roots = [nullid] # Everybody's a descendant of nullid
1515 lowestrev = nullrev
1516 lowestrev = nullrev
1516 if (lowestrev == nullrev) and (heads is None):
1517 if (lowestrev == nullrev) and (heads is None):
1517 # We want _all_ the nodes!
1518 # We want _all_ the nodes!
1518 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1519 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1519 if heads is None:
1520 if heads is None:
1520 # All nodes are ancestors, so the latest ancestor is the last
1521 # All nodes are ancestors, so the latest ancestor is the last
1521 # node.
1522 # node.
1522 highestrev = len(self) - 1
1523 highestrev = len(self) - 1
1523 # Set ancestors to None to signal that every node is an ancestor.
1524 # Set ancestors to None to signal that every node is an ancestor.
1524 ancestors = None
1525 ancestors = None
1525 # Set heads to an empty dictionary for later discovery of heads
1526 # Set heads to an empty dictionary for later discovery of heads
1526 heads = {}
1527 heads = {}
1527 else:
1528 else:
1528 heads = list(heads)
1529 heads = list(heads)
1529 if not heads:
1530 if not heads:
1530 return nonodes
1531 return nonodes
1531 ancestors = set()
1532 ancestors = set()
1532 # Turn heads into a dictionary so we can remove 'fake' heads.
1533 # Turn heads into a dictionary so we can remove 'fake' heads.
1533 # Also, later we will be using it to filter out the heads we can't
1534 # Also, later we will be using it to filter out the heads we can't
1534 # find from roots.
1535 # find from roots.
1535 heads = dict.fromkeys(heads, False)
1536 heads = dict.fromkeys(heads, False)
1536 # Start at the top and keep marking parents until we're done.
1537 # Start at the top and keep marking parents until we're done.
1537 nodestotag = set(heads)
1538 nodestotag = set(heads)
1538 # Remember where the top was so we can use it as a limit later.
1539 # Remember where the top was so we can use it as a limit later.
1539 highestrev = max([self.rev(n) for n in nodestotag])
1540 highestrev = max([self.rev(n) for n in nodestotag])
1540 while nodestotag:
1541 while nodestotag:
1541 # grab a node to tag
1542 # grab a node to tag
1542 n = nodestotag.pop()
1543 n = nodestotag.pop()
1543 # Never tag nullid
1544 # Never tag nullid
1544 if n == nullid:
1545 if n == nullid:
1545 continue
1546 continue
1546 # A node's revision number represents its place in a
1547 # A node's revision number represents its place in a
1547 # topologically sorted list of nodes.
1548 # topologically sorted list of nodes.
1548 r = self.rev(n)
1549 r = self.rev(n)
1549 if r >= lowestrev:
1550 if r >= lowestrev:
1550 if n not in ancestors:
1551 if n not in ancestors:
1551 # If we are possibly a descendant of one of the roots
1552 # If we are possibly a descendant of one of the roots
1552 # and we haven't already been marked as an ancestor
1553 # and we haven't already been marked as an ancestor
1553 ancestors.add(n) # Mark as ancestor
1554 ancestors.add(n) # Mark as ancestor
1554 # Add non-nullid parents to list of nodes to tag.
1555 # Add non-nullid parents to list of nodes to tag.
1555 nodestotag.update([p for p in self.parents(n) if
1556 nodestotag.update([p for p in self.parents(n) if
1556 p != nullid])
1557 p != nullid])
1557 elif n in heads: # We've seen it before, is it a fake head?
1558 elif n in heads: # We've seen it before, is it a fake head?
1558 # So it is, real heads should not be the ancestors of
1559 # So it is, real heads should not be the ancestors of
1559 # any other heads.
1560 # any other heads.
1560 heads.pop(n)
1561 heads.pop(n)
1561 if not ancestors:
1562 if not ancestors:
1562 return nonodes
1563 return nonodes
1563 # Now that we have our set of ancestors, we want to remove any
1564 # Now that we have our set of ancestors, we want to remove any
1564 # roots that are not ancestors.
1565 # roots that are not ancestors.
1565
1566
1566 # If one of the roots was nullid, everything is included anyway.
1567 # If one of the roots was nullid, everything is included anyway.
1567 if lowestrev > nullrev:
1568 if lowestrev > nullrev:
1568 # But, since we weren't, let's recompute the lowest rev to not
1569 # But, since we weren't, let's recompute the lowest rev to not
1569 # include roots that aren't ancestors.
1570 # include roots that aren't ancestors.
1570
1571
1571 # Filter out roots that aren't ancestors of heads
1572 # Filter out roots that aren't ancestors of heads
1572 roots = [root for root in roots if root in ancestors]
1573 roots = [root for root in roots if root in ancestors]
1573 # Recompute the lowest revision
1574 # Recompute the lowest revision
1574 if roots:
1575 if roots:
1575 lowestrev = min([self.rev(root) for root in roots])
1576 lowestrev = min([self.rev(root) for root in roots])
1576 else:
1577 else:
1577 # No more roots? Return empty list
1578 # No more roots? Return empty list
1578 return nonodes
1579 return nonodes
1579 else:
1580 else:
1580 # We are descending from nullid, and don't need to care about
1581 # We are descending from nullid, and don't need to care about
1581 # any other roots.
1582 # any other roots.
1582 lowestrev = nullrev
1583 lowestrev = nullrev
1583 roots = [nullid]
1584 roots = [nullid]
1584 # Transform our roots list into a set.
1585 # Transform our roots list into a set.
1585 descendants = set(roots)
1586 descendants = set(roots)
1586 # Also, keep the original roots so we can filter out roots that aren't
1587 # Also, keep the original roots so we can filter out roots that aren't
1587 # 'real' roots (i.e. are descended from other roots).
1588 # 'real' roots (i.e. are descended from other roots).
1588 roots = descendants.copy()
1589 roots = descendants.copy()
1589 # Our topologically sorted list of output nodes.
1590 # Our topologically sorted list of output nodes.
1590 orderedout = []
1591 orderedout = []
1591 # Don't start at nullid since we don't want nullid in our output list,
1592 # Don't start at nullid since we don't want nullid in our output list,
1592 # and if nullid shows up in descendants, empty parents will look like
1593 # and if nullid shows up in descendants, empty parents will look like
1593 # they're descendants.
1594 # they're descendants.
1594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1595 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1595 n = self.node(r)
1596 n = self.node(r)
1596 isdescendant = False
1597 isdescendant = False
1597 if lowestrev == nullrev: # Everybody is a descendant of nullid
1598 if lowestrev == nullrev: # Everybody is a descendant of nullid
1598 isdescendant = True
1599 isdescendant = True
1599 elif n in descendants:
1600 elif n in descendants:
1600 # n is already a descendant
1601 # n is already a descendant
1601 isdescendant = True
1602 isdescendant = True
1602 # This check only needs to be done here because all the roots
1603 # This check only needs to be done here because all the roots
1603 # will start being marked is descendants before the loop.
1604 # will start being marked is descendants before the loop.
1604 if n in roots:
1605 if n in roots:
1605 # If n was a root, check if it's a 'real' root.
1606 # If n was a root, check if it's a 'real' root.
1606 p = tuple(self.parents(n))
1607 p = tuple(self.parents(n))
1607 # If any of its parents are descendants, it's not a root.
1608 # If any of its parents are descendants, it's not a root.
1608 if (p[0] in descendants) or (p[1] in descendants):
1609 if (p[0] in descendants) or (p[1] in descendants):
1609 roots.remove(n)
1610 roots.remove(n)
1610 else:
1611 else:
1611 p = tuple(self.parents(n))
1612 p = tuple(self.parents(n))
1612 # A node is a descendant if either of its parents are
1613 # A node is a descendant if either of its parents are
1613 # descendants. (We seeded the dependents list with the roots
1614 # descendants. (We seeded the dependents list with the roots
1614 # up there, remember?)
1615 # up there, remember?)
1615 if (p[0] in descendants) or (p[1] in descendants):
1616 if (p[0] in descendants) or (p[1] in descendants):
1616 descendants.add(n)
1617 descendants.add(n)
1617 isdescendant = True
1618 isdescendant = True
1618 if isdescendant and ((ancestors is None) or (n in ancestors)):
1619 if isdescendant and ((ancestors is None) or (n in ancestors)):
1619 # Only include nodes that are both descendants and ancestors.
1620 # Only include nodes that are both descendants and ancestors.
1620 orderedout.append(n)
1621 orderedout.append(n)
1621 if (ancestors is not None) and (n in heads):
1622 if (ancestors is not None) and (n in heads):
1622 # We're trying to figure out which heads are reachable
1623 # We're trying to figure out which heads are reachable
1623 # from roots.
1624 # from roots.
1624 # Mark this head as having been reached
1625 # Mark this head as having been reached
1625 heads[n] = True
1626 heads[n] = True
1626 elif ancestors is None:
1627 elif ancestors is None:
1627 # Otherwise, we're trying to discover the heads.
1628 # Otherwise, we're trying to discover the heads.
1628 # Assume this is a head because if it isn't, the next step
1629 # Assume this is a head because if it isn't, the next step
1629 # will eventually remove it.
1630 # will eventually remove it.
1630 heads[n] = True
1631 heads[n] = True
1631 # But, obviously its parents aren't.
1632 # But, obviously its parents aren't.
1632 for p in self.parents(n):
1633 for p in self.parents(n):
1633 heads.pop(p, None)
1634 heads.pop(p, None)
1634 heads = [head for head, flag in heads.iteritems() if flag]
1635 heads = [head for head, flag in heads.iteritems() if flag]
1635 roots = list(roots)
1636 roots = list(roots)
1636 assert orderedout
1637 assert orderedout
1637 assert roots
1638 assert roots
1638 assert heads
1639 assert heads
1639 return (orderedout, roots, heads)
1640 return (orderedout, roots, heads)
1640
1641
1641 def headrevs(self):
1642 def headrevs(self):
1642 try:
1643 try:
1643 return self.index.headrevs()
1644 return self.index.headrevs()
1644 except AttributeError:
1645 except AttributeError:
1645 return self._headrevs()
1646 return self._headrevs()
1646
1647
1647 def computephases(self, roots):
1648 def computephases(self, roots):
1648 return self.index.computephasesmapsets(roots)
1649 return self.index.computephasesmapsets(roots)
1649
1650
1650 def _headrevs(self):
1651 def _headrevs(self):
1651 count = len(self)
1652 count = len(self)
1652 if not count:
1653 if not count:
1653 return [nullrev]
1654 return [nullrev]
1654 # we won't iter over filtered rev so nobody is a head at start
1655 # we won't iter over filtered rev so nobody is a head at start
1655 ishead = [0] * (count + 1)
1656 ishead = [0] * (count + 1)
1656 index = self.index
1657 index = self.index
1657 for r in self:
1658 for r in self:
1658 ishead[r] = 1 # I may be an head
1659 ishead[r] = 1 # I may be an head
1659 e = index[r]
1660 e = index[r]
1660 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1661 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1661 return [r for r, val in enumerate(ishead) if val]
1662 return [r for r, val in enumerate(ishead) if val]
1662
1663
1663 def heads(self, start=None, stop=None):
1664 def heads(self, start=None, stop=None):
1664 """return the list of all nodes that have no children
1665 """return the list of all nodes that have no children
1665
1666
1666 if start is specified, only heads that are descendants of
1667 if start is specified, only heads that are descendants of
1667 start will be returned
1668 start will be returned
1668 if stop is specified, it will consider all the revs from stop
1669 if stop is specified, it will consider all the revs from stop
1669 as if they had no children
1670 as if they had no children
1670 """
1671 """
1671 if start is None and stop is None:
1672 if start is None and stop is None:
1672 if not len(self):
1673 if not len(self):
1673 return [nullid]
1674 return [nullid]
1674 return [self.node(r) for r in self.headrevs()]
1675 return [self.node(r) for r in self.headrevs()]
1675
1676
1676 if start is None:
1677 if start is None:
1677 start = nullid
1678 start = nullid
1678 if stop is None:
1679 if stop is None:
1679 stop = []
1680 stop = []
1680 stoprevs = set([self.rev(n) for n in stop])
1681 stoprevs = set([self.rev(n) for n in stop])
1681 startrev = self.rev(start)
1682 startrev = self.rev(start)
1682 reachable = {startrev}
1683 reachable = {startrev}
1683 heads = {startrev}
1684 heads = {startrev}
1684
1685
1685 parentrevs = self.parentrevs
1686 parentrevs = self.parentrevs
1686 for r in self.revs(start=startrev + 1):
1687 for r in self.revs(start=startrev + 1):
1687 for p in parentrevs(r):
1688 for p in parentrevs(r):
1688 if p in reachable:
1689 if p in reachable:
1689 if r not in stoprevs:
1690 if r not in stoprevs:
1690 reachable.add(r)
1691 reachable.add(r)
1691 heads.add(r)
1692 heads.add(r)
1692 if p in heads and p not in stoprevs:
1693 if p in heads and p not in stoprevs:
1693 heads.remove(p)
1694 heads.remove(p)
1694
1695
1695 return [self.node(r) for r in heads]
1696 return [self.node(r) for r in heads]
1696
1697
1697 def children(self, node):
1698 def children(self, node):
1698 """find the children of a given node"""
1699 """find the children of a given node"""
1699 c = []
1700 c = []
1700 p = self.rev(node)
1701 p = self.rev(node)
1701 for r in self.revs(start=p + 1):
1702 for r in self.revs(start=p + 1):
1702 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1703 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1703 if prevs:
1704 if prevs:
1704 for pr in prevs:
1705 for pr in prevs:
1705 if pr == p:
1706 if pr == p:
1706 c.append(self.node(r))
1707 c.append(self.node(r))
1707 elif p == nullrev:
1708 elif p == nullrev:
1708 c.append(self.node(r))
1709 c.append(self.node(r))
1709 return c
1710 return c
1710
1711
1711 def commonancestorsheads(self, a, b):
1712 def commonancestorsheads(self, a, b):
1712 """calculate all the heads of the common ancestors of nodes a and b"""
1713 """calculate all the heads of the common ancestors of nodes a and b"""
1713 a, b = self.rev(a), self.rev(b)
1714 a, b = self.rev(a), self.rev(b)
1714 ancs = self._commonancestorsheads(a, b)
1715 ancs = self._commonancestorsheads(a, b)
1715 return pycompat.maplist(self.node, ancs)
1716 return pycompat.maplist(self.node, ancs)
1716
1717
1717 def _commonancestorsheads(self, *revs):
1718 def _commonancestorsheads(self, *revs):
1718 """calculate all the heads of the common ancestors of revs"""
1719 """calculate all the heads of the common ancestors of revs"""
1719 try:
1720 try:
1720 ancs = self.index.commonancestorsheads(*revs)
1721 ancs = self.index.commonancestorsheads(*revs)
1721 except (AttributeError, OverflowError): # C implementation failed
1722 except (AttributeError, OverflowError): # C implementation failed
1722 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1723 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1723 return ancs
1724 return ancs
1724
1725
1725 def isancestor(self, a, b):
1726 def isancestor(self, a, b):
1726 """return True if node a is an ancestor of node b
1727 """return True if node a is an ancestor of node b
1727
1728
1728 A revision is considered an ancestor of itself."""
1729 A revision is considered an ancestor of itself."""
1729 a, b = self.rev(a), self.rev(b)
1730 a, b = self.rev(a), self.rev(b)
1730 return self.isancestorrev(a, b)
1731 return self.isancestorrev(a, b)
1731
1732
1732 def isancestorrev(self, a, b):
1733 def isancestorrev(self, a, b):
1733 """return True if revision a is an ancestor of revision b
1734 """return True if revision a is an ancestor of revision b
1734
1735
1735 A revision is considered an ancestor of itself.
1736 A revision is considered an ancestor of itself.
1736
1737
1737 The implementation of this is trivial but the use of
1738 The implementation of this is trivial but the use of
1738 commonancestorsheads is not."""
1739 commonancestorsheads is not."""
1739 if a == nullrev:
1740 if a == nullrev:
1740 return True
1741 return True
1741 elif a == b:
1742 elif a == b:
1742 return True
1743 return True
1743 elif a > b:
1744 elif a > b:
1744 return False
1745 return False
1745 return a in self._commonancestorsheads(a, b)
1746 return a in self._commonancestorsheads(a, b)
1746
1747
1747 def ancestor(self, a, b):
1748 def ancestor(self, a, b):
1748 """calculate the "best" common ancestor of nodes a and b"""
1749 """calculate the "best" common ancestor of nodes a and b"""
1749
1750
1750 a, b = self.rev(a), self.rev(b)
1751 a, b = self.rev(a), self.rev(b)
1751 try:
1752 try:
1752 ancs = self.index.ancestors(a, b)
1753 ancs = self.index.ancestors(a, b)
1753 except (AttributeError, OverflowError):
1754 except (AttributeError, OverflowError):
1754 ancs = ancestor.ancestors(self.parentrevs, a, b)
1755 ancs = ancestor.ancestors(self.parentrevs, a, b)
1755 if ancs:
1756 if ancs:
1756 # choose a consistent winner when there's a tie
1757 # choose a consistent winner when there's a tie
1757 return min(map(self.node, ancs))
1758 return min(map(self.node, ancs))
1758 return nullid
1759 return nullid
1759
1760
1760 def _match(self, id):
1761 def _match(self, id):
1761 if isinstance(id, int):
1762 if isinstance(id, int):
1762 # rev
1763 # rev
1763 return self.node(id)
1764 return self.node(id)
1764 if len(id) == 20:
1765 if len(id) == 20:
1765 # possibly a binary node
1766 # possibly a binary node
1766 # odds of a binary node being all hex in ASCII are 1 in 10**25
1767 # odds of a binary node being all hex in ASCII are 1 in 10**25
1767 try:
1768 try:
1768 node = id
1769 node = id
1769 self.rev(node) # quick search the index
1770 self.rev(node) # quick search the index
1770 return node
1771 return node
1771 except LookupError:
1772 except LookupError:
1772 pass # may be partial hex id
1773 pass # may be partial hex id
1773 try:
1774 try:
1774 # str(rev)
1775 # str(rev)
1775 rev = int(id)
1776 rev = int(id)
1776 if "%d" % rev != id:
1777 if "%d" % rev != id:
1777 raise ValueError
1778 raise ValueError
1778 if rev < 0:
1779 if rev < 0:
1779 rev = len(self) + rev
1780 rev = len(self) + rev
1780 if rev < 0 or rev >= len(self):
1781 if rev < 0 or rev >= len(self):
1781 raise ValueError
1782 raise ValueError
1782 return self.node(rev)
1783 return self.node(rev)
1783 except (ValueError, OverflowError):
1784 except (ValueError, OverflowError):
1784 pass
1785 pass
1785 if len(id) == 40:
1786 if len(id) == 40:
1786 try:
1787 try:
1787 # a full hex nodeid?
1788 # a full hex nodeid?
1788 node = bin(id)
1789 node = bin(id)
1789 self.rev(node)
1790 self.rev(node)
1790 return node
1791 return node
1791 except (TypeError, LookupError):
1792 except (TypeError, LookupError):
1792 pass
1793 pass
1793
1794
1794 def _partialmatch(self, id):
1795 def _partialmatch(self, id):
1795 # we don't care wdirfilenodeids as they should be always full hash
1796 # we don't care wdirfilenodeids as they should be always full hash
1796 maybewdir = wdirhex.startswith(id)
1797 maybewdir = wdirhex.startswith(id)
1797 try:
1798 try:
1798 partial = self.index.partialmatch(id)
1799 partial = self.index.partialmatch(id)
1799 if partial and self.hasnode(partial):
1800 if partial and self.hasnode(partial):
1800 if maybewdir:
1801 if maybewdir:
1801 # single 'ff...' match in radix tree, ambiguous with wdir
1802 # single 'ff...' match in radix tree, ambiguous with wdir
1802 raise RevlogError
1803 raise RevlogError
1803 return partial
1804 return partial
1804 if maybewdir:
1805 if maybewdir:
1805 # no 'ff...' match in radix tree, wdir identified
1806 # no 'ff...' match in radix tree, wdir identified
1806 raise error.WdirUnsupported
1807 raise error.WdirUnsupported
1807 return None
1808 return None
1808 except RevlogError:
1809 except RevlogError:
1809 # parsers.c radix tree lookup gave multiple matches
1810 # parsers.c radix tree lookup gave multiple matches
1810 # fast path: for unfiltered changelog, radix tree is accurate
1811 # fast path: for unfiltered changelog, radix tree is accurate
1811 if not getattr(self, 'filteredrevs', None):
1812 if not getattr(self, 'filteredrevs', None):
1812 raise AmbiguousPrefixLookupError(id, self.indexfile,
1813 raise AmbiguousPrefixLookupError(id, self.indexfile,
1813 _('ambiguous identifier'))
1814 _('ambiguous identifier'))
1814 # fall through to slow path that filters hidden revisions
1815 # fall through to slow path that filters hidden revisions
1815 except (AttributeError, ValueError):
1816 except (AttributeError, ValueError):
1816 # we are pure python, or key was too short to search radix tree
1817 # we are pure python, or key was too short to search radix tree
1817 pass
1818 pass
1818
1819
1819 if id in self._pcache:
1820 if id in self._pcache:
1820 return self._pcache[id]
1821 return self._pcache[id]
1821
1822
1822 if len(id) <= 40:
1823 if len(id) <= 40:
1823 try:
1824 try:
1824 # hex(node)[:...]
1825 # hex(node)[:...]
1825 l = len(id) // 2 # grab an even number of digits
1826 l = len(id) // 2 # grab an even number of digits
1826 prefix = bin(id[:l * 2])
1827 prefix = bin(id[:l * 2])
1827 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1828 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1828 nl = [n for n in nl if hex(n).startswith(id) and
1829 nl = [n for n in nl if hex(n).startswith(id) and
1829 self.hasnode(n)]
1830 self.hasnode(n)]
1830 if len(nl) > 0:
1831 if len(nl) > 0:
1831 if len(nl) == 1 and not maybewdir:
1832 if len(nl) == 1 and not maybewdir:
1832 self._pcache[id] = nl[0]
1833 self._pcache[id] = nl[0]
1833 return nl[0]
1834 return nl[0]
1834 raise AmbiguousPrefixLookupError(id, self.indexfile,
1835 raise AmbiguousPrefixLookupError(id, self.indexfile,
1835 _('ambiguous identifier'))
1836 _('ambiguous identifier'))
1836 if maybewdir:
1837 if maybewdir:
1837 raise error.WdirUnsupported
1838 raise error.WdirUnsupported
1838 return None
1839 return None
1839 except TypeError:
1840 except TypeError:
1840 pass
1841 pass
1841
1842
1842 def lookup(self, id):
1843 def lookup(self, id):
1843 """locate a node based on:
1844 """locate a node based on:
1844 - revision number or str(revision number)
1845 - revision number or str(revision number)
1845 - nodeid or subset of hex nodeid
1846 - nodeid or subset of hex nodeid
1846 """
1847 """
1847 n = self._match(id)
1848 n = self._match(id)
1848 if n is not None:
1849 if n is not None:
1849 return n
1850 return n
1850 n = self._partialmatch(id)
1851 n = self._partialmatch(id)
1851 if n:
1852 if n:
1852 return n
1853 return n
1853
1854
1854 raise LookupError(id, self.indexfile, _('no match found'))
1855 raise LookupError(id, self.indexfile, _('no match found'))
1855
1856
1856 def shortest(self, node, minlength=1):
1857 def shortest(self, node, minlength=1):
1857 """Find the shortest unambiguous prefix that matches node."""
1858 """Find the shortest unambiguous prefix that matches node."""
1858 def isvalid(prefix):
1859 def isvalid(prefix):
1859 try:
1860 try:
1860 node = self._partialmatch(prefix)
1861 node = self._partialmatch(prefix)
1861 except error.RevlogError:
1862 except error.RevlogError:
1862 return False
1863 return False
1863 except error.WdirUnsupported:
1864 except error.WdirUnsupported:
1864 # single 'ff...' match
1865 # single 'ff...' match
1865 return True
1866 return True
1866 if node is None:
1867 if node is None:
1867 raise LookupError(node, self.indexfile, _('no node'))
1868 raise LookupError(node, self.indexfile, _('no node'))
1868 return True
1869 return True
1869
1870
1870 def maybewdir(prefix):
1871 def maybewdir(prefix):
1871 return all(c == 'f' for c in prefix)
1872 return all(c == 'f' for c in prefix)
1872
1873
1873 hexnode = hex(node)
1874 hexnode = hex(node)
1874
1875
1875 def disambiguate(hexnode, minlength):
1876 def disambiguate(hexnode, minlength):
1876 """Disambiguate against wdirid."""
1877 """Disambiguate against wdirid."""
1877 for length in range(minlength, 41):
1878 for length in range(minlength, 41):
1878 prefix = hexnode[:length]
1879 prefix = hexnode[:length]
1879 if not maybewdir(prefix):
1880 if not maybewdir(prefix):
1880 return prefix
1881 return prefix
1881
1882
1882 if not getattr(self, 'filteredrevs', None):
1883 if not getattr(self, 'filteredrevs', None):
1883 try:
1884 try:
1884 length = max(self.index.shortest(node), minlength)
1885 length = max(self.index.shortest(node), minlength)
1885 return disambiguate(hexnode, length)
1886 return disambiguate(hexnode, length)
1886 except RevlogError:
1887 except RevlogError:
1887 if node != wdirid:
1888 if node != wdirid:
1888 raise LookupError(node, self.indexfile, _('no node'))
1889 raise LookupError(node, self.indexfile, _('no node'))
1889 except AttributeError:
1890 except AttributeError:
1890 # Fall through to pure code
1891 # Fall through to pure code
1891 pass
1892 pass
1892
1893
1893 if node == wdirid:
1894 if node == wdirid:
1894 for length in range(minlength, 41):
1895 for length in range(minlength, 41):
1895 prefix = hexnode[:length]
1896 prefix = hexnode[:length]
1896 if isvalid(prefix):
1897 if isvalid(prefix):
1897 return prefix
1898 return prefix
1898
1899
1899 for length in range(minlength, 41):
1900 for length in range(minlength, 41):
1900 prefix = hexnode[:length]
1901 prefix = hexnode[:length]
1901 if isvalid(prefix):
1902 if isvalid(prefix):
1902 return disambiguate(hexnode, length)
1903 return disambiguate(hexnode, length)
1903
1904
1904 def cmp(self, node, text):
1905 def cmp(self, node, text):
1905 """compare text with a given file revision
1906 """compare text with a given file revision
1906
1907
1907 returns True if text is different than what is stored.
1908 returns True if text is different than what is stored.
1908 """
1909 """
1909 p1, p2 = self.parents(node)
1910 p1, p2 = self.parents(node)
1910 return hash(text, p1, p2) != node
1911 return hash(text, p1, p2) != node
1911
1912
1912 def _cachesegment(self, offset, data):
1913 def _cachesegment(self, offset, data):
1913 """Add a segment to the revlog cache.
1914 """Add a segment to the revlog cache.
1914
1915
1915 Accepts an absolute offset and the data that is at that location.
1916 Accepts an absolute offset and the data that is at that location.
1916 """
1917 """
1917 o, d = self._chunkcache
1918 o, d = self._chunkcache
1918 # try to add to existing cache
1919 # try to add to existing cache
1919 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1920 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1920 self._chunkcache = o, d + data
1921 self._chunkcache = o, d + data
1921 else:
1922 else:
1922 self._chunkcache = offset, data
1923 self._chunkcache = offset, data
1923
1924
1924 def _readsegment(self, offset, length, df=None):
1925 def _readsegment(self, offset, length, df=None):
1925 """Load a segment of raw data from the revlog.
1926 """Load a segment of raw data from the revlog.
1926
1927
1927 Accepts an absolute offset, length to read, and an optional existing
1928 Accepts an absolute offset, length to read, and an optional existing
1928 file handle to read from.
1929 file handle to read from.
1929
1930
1930 If an existing file handle is passed, it will be seeked and the
1931 If an existing file handle is passed, it will be seeked and the
1931 original seek position will NOT be restored.
1932 original seek position will NOT be restored.
1932
1933
1933 Returns a str or buffer of raw byte data.
1934 Returns a str or buffer of raw byte data.
1934 """
1935 """
1935 # Cache data both forward and backward around the requested
1936 # Cache data both forward and backward around the requested
1936 # data, in a fixed size window. This helps speed up operations
1937 # data, in a fixed size window. This helps speed up operations
1937 # involving reading the revlog backwards.
1938 # involving reading the revlog backwards.
1938 cachesize = self._chunkcachesize
1939 cachesize = self._chunkcachesize
1939 realoffset = offset & ~(cachesize - 1)
1940 realoffset = offset & ~(cachesize - 1)
1940 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1941 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1941 - realoffset)
1942 - realoffset)
1942 with self._datareadfp(df) as df:
1943 with self._datareadfp(df) as df:
1943 df.seek(realoffset)
1944 df.seek(realoffset)
1944 d = df.read(reallength)
1945 d = df.read(reallength)
1945 self._cachesegment(realoffset, d)
1946 self._cachesegment(realoffset, d)
1946 if offset != realoffset or reallength != length:
1947 if offset != realoffset or reallength != length:
1947 return util.buffer(d, offset - realoffset, length)
1948 return util.buffer(d, offset - realoffset, length)
1948 return d
1949 return d
1949
1950
1950 def _getsegment(self, offset, length, df=None):
1951 def _getsegment(self, offset, length, df=None):
1951 """Obtain a segment of raw data from the revlog.
1952 """Obtain a segment of raw data from the revlog.
1952
1953
1953 Accepts an absolute offset, length of bytes to obtain, and an
1954 Accepts an absolute offset, length of bytes to obtain, and an
1954 optional file handle to the already-opened revlog. If the file
1955 optional file handle to the already-opened revlog. If the file
1955 handle is used, it's original seek position will not be preserved.
1956 handle is used, it's original seek position will not be preserved.
1956
1957
1957 Requests for data may be returned from a cache.
1958 Requests for data may be returned from a cache.
1958
1959
1959 Returns a str or a buffer instance of raw byte data.
1960 Returns a str or a buffer instance of raw byte data.
1960 """
1961 """
1961 o, d = self._chunkcache
1962 o, d = self._chunkcache
1962 l = len(d)
1963 l = len(d)
1963
1964
1964 # is it in the cache?
1965 # is it in the cache?
1965 cachestart = offset - o
1966 cachestart = offset - o
1966 cacheend = cachestart + length
1967 cacheend = cachestart + length
1967 if cachestart >= 0 and cacheend <= l:
1968 if cachestart >= 0 and cacheend <= l:
1968 if cachestart == 0 and cacheend == l:
1969 if cachestart == 0 and cacheend == l:
1969 return d # avoid a copy
1970 return d # avoid a copy
1970 return util.buffer(d, cachestart, cacheend - cachestart)
1971 return util.buffer(d, cachestart, cacheend - cachestart)
1971
1972
1972 return self._readsegment(offset, length, df=df)
1973 return self._readsegment(offset, length, df=df)
1973
1974
1974 def _getsegmentforrevs(self, startrev, endrev, df=None):
1975 def _getsegmentforrevs(self, startrev, endrev, df=None):
1975 """Obtain a segment of raw data corresponding to a range of revisions.
1976 """Obtain a segment of raw data corresponding to a range of revisions.
1976
1977
1977 Accepts the start and end revisions and an optional already-open
1978 Accepts the start and end revisions and an optional already-open
1978 file handle to be used for reading. If the file handle is read, its
1979 file handle to be used for reading. If the file handle is read, its
1979 seek position will not be preserved.
1980 seek position will not be preserved.
1980
1981
1981 Requests for data may be satisfied by a cache.
1982 Requests for data may be satisfied by a cache.
1982
1983
1983 Returns a 2-tuple of (offset, data) for the requested range of
1984 Returns a 2-tuple of (offset, data) for the requested range of
1984 revisions. Offset is the integer offset from the beginning of the
1985 revisions. Offset is the integer offset from the beginning of the
1985 revlog and data is a str or buffer of the raw byte data.
1986 revlog and data is a str or buffer of the raw byte data.
1986
1987
1987 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1988 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1988 to determine where each revision's data begins and ends.
1989 to determine where each revision's data begins and ends.
1989 """
1990 """
1990 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1991 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1991 # (functions are expensive).
1992 # (functions are expensive).
1992 index = self.index
1993 index = self.index
1993 istart = index[startrev]
1994 istart = index[startrev]
1994 start = int(istart[0] >> 16)
1995 start = int(istart[0] >> 16)
1995 if startrev == endrev:
1996 if startrev == endrev:
1996 end = start + istart[1]
1997 end = start + istart[1]
1997 else:
1998 else:
1998 iend = index[endrev]
1999 iend = index[endrev]
1999 end = int(iend[0] >> 16) + iend[1]
2000 end = int(iend[0] >> 16) + iend[1]
2000
2001
2001 if self._inline:
2002 if self._inline:
2002 start += (startrev + 1) * self._io.size
2003 start += (startrev + 1) * self._io.size
2003 end += (endrev + 1) * self._io.size
2004 end += (endrev + 1) * self._io.size
2004 length = end - start
2005 length = end - start
2005
2006
2006 return start, self._getsegment(start, length, df=df)
2007 return start, self._getsegment(start, length, df=df)
2007
2008
2008 def _chunk(self, rev, df=None):
2009 def _chunk(self, rev, df=None):
2009 """Obtain a single decompressed chunk for a revision.
2010 """Obtain a single decompressed chunk for a revision.
2010
2011
2011 Accepts an integer revision and an optional already-open file handle
2012 Accepts an integer revision and an optional already-open file handle
2012 to be used for reading. If used, the seek position of the file will not
2013 to be used for reading. If used, the seek position of the file will not
2013 be preserved.
2014 be preserved.
2014
2015
2015 Returns a str holding uncompressed data for the requested revision.
2016 Returns a str holding uncompressed data for the requested revision.
2016 """
2017 """
2017 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2018 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2018
2019
2019 def _chunks(self, revs, df=None, targetsize=None):
2020 def _chunks(self, revs, df=None, targetsize=None):
2020 """Obtain decompressed chunks for the specified revisions.
2021 """Obtain decompressed chunks for the specified revisions.
2021
2022
2022 Accepts an iterable of numeric revisions that are assumed to be in
2023 Accepts an iterable of numeric revisions that are assumed to be in
2023 ascending order. Also accepts an optional already-open file handle
2024 ascending order. Also accepts an optional already-open file handle
2024 to be used for reading. If used, the seek position of the file will
2025 to be used for reading. If used, the seek position of the file will
2025 not be preserved.
2026 not be preserved.
2026
2027
2027 This function is similar to calling ``self._chunk()`` multiple times,
2028 This function is similar to calling ``self._chunk()`` multiple times,
2028 but is faster.
2029 but is faster.
2029
2030
2030 Returns a list with decompressed data for each requested revision.
2031 Returns a list with decompressed data for each requested revision.
2031 """
2032 """
2032 if not revs:
2033 if not revs:
2033 return []
2034 return []
2034 start = self.start
2035 start = self.start
2035 length = self.length
2036 length = self.length
2036 inline = self._inline
2037 inline = self._inline
2037 iosize = self._io.size
2038 iosize = self._io.size
2038 buffer = util.buffer
2039 buffer = util.buffer
2039
2040
2040 l = []
2041 l = []
2041 ladd = l.append
2042 ladd = l.append
2042
2043
2043 if not self._withsparseread:
2044 if not self._withsparseread:
2044 slicedchunks = (revs,)
2045 slicedchunks = (revs,)
2045 else:
2046 else:
2046 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2047 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2047
2048
2048 for revschunk in slicedchunks:
2049 for revschunk in slicedchunks:
2049 firstrev = revschunk[0]
2050 firstrev = revschunk[0]
2050 # Skip trailing revisions with empty diff
2051 # Skip trailing revisions with empty diff
2051 for lastrev in revschunk[::-1]:
2052 for lastrev in revschunk[::-1]:
2052 if length(lastrev) != 0:
2053 if length(lastrev) != 0:
2053 break
2054 break
2054
2055
2055 try:
2056 try:
2056 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2057 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2057 except OverflowError:
2058 except OverflowError:
2058 # issue4215 - we can't cache a run of chunks greater than
2059 # issue4215 - we can't cache a run of chunks greater than
2059 # 2G on Windows
2060 # 2G on Windows
2060 return [self._chunk(rev, df=df) for rev in revschunk]
2061 return [self._chunk(rev, df=df) for rev in revschunk]
2061
2062
2062 decomp = self.decompress
2063 decomp = self.decompress
2063 for rev in revschunk:
2064 for rev in revschunk:
2064 chunkstart = start(rev)
2065 chunkstart = start(rev)
2065 if inline:
2066 if inline:
2066 chunkstart += (rev + 1) * iosize
2067 chunkstart += (rev + 1) * iosize
2067 chunklength = length(rev)
2068 chunklength = length(rev)
2068 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2069 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2069
2070
2070 return l
2071 return l
2071
2072
2072 def _chunkclear(self):
2073 def _chunkclear(self):
2073 """Clear the raw chunk cache."""
2074 """Clear the raw chunk cache."""
2074 self._chunkcache = (0, '')
2075 self._chunkcache = (0, '')
2075
2076
2076 def deltaparent(self, rev):
2077 def deltaparent(self, rev):
2077 """return deltaparent of the given revision"""
2078 """return deltaparent of the given revision"""
2078 base = self.index[rev][3]
2079 base = self.index[rev][3]
2079 if base == rev:
2080 if base == rev:
2080 return nullrev
2081 return nullrev
2081 elif self._generaldelta:
2082 elif self._generaldelta:
2082 return base
2083 return base
2083 else:
2084 else:
2084 return rev - 1
2085 return rev - 1
2085
2086
2086 def revdiff(self, rev1, rev2):
2087 def revdiff(self, rev1, rev2):
2087 """return or calculate a delta between two revisions
2088 """return or calculate a delta between two revisions
2088
2089
2089 The delta calculated is in binary form and is intended to be written to
2090 The delta calculated is in binary form and is intended to be written to
2090 revlog data directly. So this function needs raw revision data.
2091 revlog data directly. So this function needs raw revision data.
2091 """
2092 """
2092 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2093 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2093 return bytes(self._chunk(rev2))
2094 return bytes(self._chunk(rev2))
2094
2095
2095 return mdiff.textdiff(self.revision(rev1, raw=True),
2096 return mdiff.textdiff(self.revision(rev1, raw=True),
2096 self.revision(rev2, raw=True))
2097 self.revision(rev2, raw=True))
2097
2098
2098 def revision(self, nodeorrev, _df=None, raw=False):
2099 def revision(self, nodeorrev, _df=None, raw=False):
2099 """return an uncompressed revision of a given node or revision
2100 """return an uncompressed revision of a given node or revision
2100 number.
2101 number.
2101
2102
2102 _df - an existing file handle to read from. (internal-only)
2103 _df - an existing file handle to read from. (internal-only)
2103 raw - an optional argument specifying if the revision data is to be
2104 raw - an optional argument specifying if the revision data is to be
2104 treated as raw data when applying flag transforms. 'raw' should be set
2105 treated as raw data when applying flag transforms. 'raw' should be set
2105 to True when generating changegroups or in debug commands.
2106 to True when generating changegroups or in debug commands.
2106 """
2107 """
2107 if isinstance(nodeorrev, int):
2108 if isinstance(nodeorrev, int):
2108 rev = nodeorrev
2109 rev = nodeorrev
2109 node = self.node(rev)
2110 node = self.node(rev)
2110 else:
2111 else:
2111 node = nodeorrev
2112 node = nodeorrev
2112 rev = None
2113 rev = None
2113
2114
2114 cachedrev = None
2115 cachedrev = None
2115 flags = None
2116 flags = None
2116 rawtext = None
2117 rawtext = None
2117 if node == nullid:
2118 if node == nullid:
2118 return ""
2119 return ""
2119 if self._cache:
2120 if self._cache:
2120 if self._cache[0] == node:
2121 if self._cache[0] == node:
2121 # _cache only stores rawtext
2122 # _cache only stores rawtext
2122 if raw:
2123 if raw:
2123 return self._cache[2]
2124 return self._cache[2]
2124 # duplicated, but good for perf
2125 # duplicated, but good for perf
2125 if rev is None:
2126 if rev is None:
2126 rev = self.rev(node)
2127 rev = self.rev(node)
2127 if flags is None:
2128 if flags is None:
2128 flags = self.flags(rev)
2129 flags = self.flags(rev)
2129 # no extra flags set, no flag processor runs, text = rawtext
2130 # no extra flags set, no flag processor runs, text = rawtext
2130 if flags == REVIDX_DEFAULT_FLAGS:
2131 if flags == REVIDX_DEFAULT_FLAGS:
2131 return self._cache[2]
2132 return self._cache[2]
2132 # rawtext is reusable. need to run flag processor
2133 # rawtext is reusable. need to run flag processor
2133 rawtext = self._cache[2]
2134 rawtext = self._cache[2]
2134
2135
2135 cachedrev = self._cache[1]
2136 cachedrev = self._cache[1]
2136
2137
2137 # look up what we need to read
2138 # look up what we need to read
2138 if rawtext is None:
2139 if rawtext is None:
2139 if rev is None:
2140 if rev is None:
2140 rev = self.rev(node)
2141 rev = self.rev(node)
2141
2142
2142 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2143 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2143 if stopped:
2144 if stopped:
2144 rawtext = self._cache[2]
2145 rawtext = self._cache[2]
2145
2146
2146 # drop cache to save memory
2147 # drop cache to save memory
2147 self._cache = None
2148 self._cache = None
2148
2149
2149 targetsize = None
2150 targetsize = None
2150 rawsize = self.index[rev][2]
2151 rawsize = self.index[rev][2]
2151 if 0 <= rawsize:
2152 if 0 <= rawsize:
2152 targetsize = 4 * rawsize
2153 targetsize = 4 * rawsize
2153
2154
2154 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2155 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2155 if rawtext is None:
2156 if rawtext is None:
2156 rawtext = bytes(bins[0])
2157 rawtext = bytes(bins[0])
2157 bins = bins[1:]
2158 bins = bins[1:]
2158
2159
2159 rawtext = mdiff.patches(rawtext, bins)
2160 rawtext = mdiff.patches(rawtext, bins)
2160 self._cache = (node, rev, rawtext)
2161 self._cache = (node, rev, rawtext)
2161
2162
2162 if flags is None:
2163 if flags is None:
2163 if rev is None:
2164 if rev is None:
2164 rev = self.rev(node)
2165 rev = self.rev(node)
2165 flags = self.flags(rev)
2166 flags = self.flags(rev)
2166
2167
2167 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2168 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2168 if validatehash:
2169 if validatehash:
2169 self.checkhash(text, node, rev=rev)
2170 self.checkhash(text, node, rev=rev)
2170
2171
2171 return text
2172 return text
2172
2173
2173 def hash(self, text, p1, p2):
2174 def hash(self, text, p1, p2):
2174 """Compute a node hash.
2175 """Compute a node hash.
2175
2176
2176 Available as a function so that subclasses can replace the hash
2177 Available as a function so that subclasses can replace the hash
2177 as needed.
2178 as needed.
2178 """
2179 """
2179 return hash(text, p1, p2)
2180 return hash(text, p1, p2)
2180
2181
2181 def _processflags(self, text, flags, operation, raw=False):
2182 def _processflags(self, text, flags, operation, raw=False):
2182 """Inspect revision data flags and applies transforms defined by
2183 """Inspect revision data flags and applies transforms defined by
2183 registered flag processors.
2184 registered flag processors.
2184
2185
2185 ``text`` - the revision data to process
2186 ``text`` - the revision data to process
2186 ``flags`` - the revision flags
2187 ``flags`` - the revision flags
2187 ``operation`` - the operation being performed (read or write)
2188 ``operation`` - the operation being performed (read or write)
2188 ``raw`` - an optional argument describing if the raw transform should be
2189 ``raw`` - an optional argument describing if the raw transform should be
2189 applied.
2190 applied.
2190
2191
2191 This method processes the flags in the order (or reverse order if
2192 This method processes the flags in the order (or reverse order if
2192 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2193 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2193 flag processors registered for present flags. The order of flags defined
2194 flag processors registered for present flags. The order of flags defined
2194 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2195 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2195
2196
2196 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2197 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2197 processed text and ``validatehash`` is a bool indicating whether the
2198 processed text and ``validatehash`` is a bool indicating whether the
2198 returned text should be checked for hash integrity.
2199 returned text should be checked for hash integrity.
2199
2200
2200 Note: If the ``raw`` argument is set, it has precedence over the
2201 Note: If the ``raw`` argument is set, it has precedence over the
2201 operation and will only update the value of ``validatehash``.
2202 operation and will only update the value of ``validatehash``.
2202 """
2203 """
2203 # fast path: no flag processors will run
2204 # fast path: no flag processors will run
2204 if flags == 0:
2205 if flags == 0:
2205 return text, True
2206 return text, True
2206 if not operation in ('read', 'write'):
2207 if not operation in ('read', 'write'):
2207 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2208 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2208 # Check all flags are known.
2209 # Check all flags are known.
2209 if flags & ~REVIDX_KNOWN_FLAGS:
2210 if flags & ~REVIDX_KNOWN_FLAGS:
2210 raise RevlogError(_("incompatible revision flag '%#x'") %
2211 raise RevlogError(_("incompatible revision flag '%#x'") %
2211 (flags & ~REVIDX_KNOWN_FLAGS))
2212 (flags & ~REVIDX_KNOWN_FLAGS))
2212 validatehash = True
2213 validatehash = True
2213 # Depending on the operation (read or write), the order might be
2214 # Depending on the operation (read or write), the order might be
2214 # reversed due to non-commutative transforms.
2215 # reversed due to non-commutative transforms.
2215 orderedflags = REVIDX_FLAGS_ORDER
2216 orderedflags = REVIDX_FLAGS_ORDER
2216 if operation == 'write':
2217 if operation == 'write':
2217 orderedflags = reversed(orderedflags)
2218 orderedflags = reversed(orderedflags)
2218
2219
2219 for flag in orderedflags:
2220 for flag in orderedflags:
2220 # If a flagprocessor has been registered for a known flag, apply the
2221 # If a flagprocessor has been registered for a known flag, apply the
2221 # related operation transform and update result tuple.
2222 # related operation transform and update result tuple.
2222 if flag & flags:
2223 if flag & flags:
2223 vhash = True
2224 vhash = True
2224
2225
2225 if flag not in _flagprocessors:
2226 if flag not in _flagprocessors:
2226 message = _("missing processor for flag '%#x'") % (flag)
2227 message = _("missing processor for flag '%#x'") % (flag)
2227 raise RevlogError(message)
2228 raise RevlogError(message)
2228
2229
2229 processor = _flagprocessors[flag]
2230 processor = _flagprocessors[flag]
2230 if processor is not None:
2231 if processor is not None:
2231 readtransform, writetransform, rawtransform = processor
2232 readtransform, writetransform, rawtransform = processor
2232
2233
2233 if raw:
2234 if raw:
2234 vhash = rawtransform(self, text)
2235 vhash = rawtransform(self, text)
2235 elif operation == 'read':
2236 elif operation == 'read':
2236 text, vhash = readtransform(self, text)
2237 text, vhash = readtransform(self, text)
2237 else: # write operation
2238 else: # write operation
2238 text, vhash = writetransform(self, text)
2239 text, vhash = writetransform(self, text)
2239 validatehash = validatehash and vhash
2240 validatehash = validatehash and vhash
2240
2241
2241 return text, validatehash
2242 return text, validatehash
2242
2243
2243 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2244 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2244 """Check node hash integrity.
2245 """Check node hash integrity.
2245
2246
2246 Available as a function so that subclasses can extend hash mismatch
2247 Available as a function so that subclasses can extend hash mismatch
2247 behaviors as needed.
2248 behaviors as needed.
2248 """
2249 """
2249 try:
2250 try:
2250 if p1 is None and p2 is None:
2251 if p1 is None and p2 is None:
2251 p1, p2 = self.parents(node)
2252 p1, p2 = self.parents(node)
2252 if node != self.hash(text, p1, p2):
2253 if node != self.hash(text, p1, p2):
2253 revornode = rev
2254 revornode = rev
2254 if revornode is None:
2255 if revornode is None:
2255 revornode = templatefilters.short(hex(node))
2256 revornode = templatefilters.short(hex(node))
2256 raise RevlogError(_("integrity check failed on %s:%s")
2257 raise RevlogError(_("integrity check failed on %s:%s")
2257 % (self.indexfile, pycompat.bytestr(revornode)))
2258 % (self.indexfile, pycompat.bytestr(revornode)))
2258 except RevlogError:
2259 except RevlogError:
2259 if self._censorable and _censoredtext(text):
2260 if self._censorable and _censoredtext(text):
2260 raise error.CensoredNodeError(self.indexfile, node, text)
2261 raise error.CensoredNodeError(self.indexfile, node, text)
2261 raise
2262 raise
2262
2263
2263 def _enforceinlinesize(self, tr, fp=None):
2264 def _enforceinlinesize(self, tr, fp=None):
2264 """Check if the revlog is too big for inline and convert if so.
2265 """Check if the revlog is too big for inline and convert if so.
2265
2266
2266 This should be called after revisions are added to the revlog. If the
2267 This should be called after revisions are added to the revlog. If the
2267 revlog has grown too large to be an inline revlog, it will convert it
2268 revlog has grown too large to be an inline revlog, it will convert it
2268 to use multiple index and data files.
2269 to use multiple index and data files.
2269 """
2270 """
2270 tiprev = len(self) - 1
2271 tiprev = len(self) - 1
2271 if (not self._inline or
2272 if (not self._inline or
2272 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2273 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2273 return
2274 return
2274
2275
2275 trinfo = tr.find(self.indexfile)
2276 trinfo = tr.find(self.indexfile)
2276 if trinfo is None:
2277 if trinfo is None:
2277 raise RevlogError(_("%s not found in the transaction")
2278 raise RevlogError(_("%s not found in the transaction")
2278 % self.indexfile)
2279 % self.indexfile)
2279
2280
2280 trindex = trinfo[2]
2281 trindex = trinfo[2]
2281 if trindex is not None:
2282 if trindex is not None:
2282 dataoff = self.start(trindex)
2283 dataoff = self.start(trindex)
2283 else:
2284 else:
2284 # revlog was stripped at start of transaction, use all leftover data
2285 # revlog was stripped at start of transaction, use all leftover data
2285 trindex = len(self) - 1
2286 trindex = len(self) - 1
2286 dataoff = self.end(tiprev)
2287 dataoff = self.end(tiprev)
2287
2288
2288 tr.add(self.datafile, dataoff)
2289 tr.add(self.datafile, dataoff)
2289
2290
2290 if fp:
2291 if fp:
2291 fp.flush()
2292 fp.flush()
2292 fp.close()
2293 fp.close()
2293
2294
2294 with self._datafp('w') as df:
2295 with self._datafp('w') as df:
2295 for r in self:
2296 for r in self:
2296 df.write(self._getsegmentforrevs(r, r)[1])
2297 df.write(self._getsegmentforrevs(r, r)[1])
2297
2298
2298 with self._indexfp('w') as fp:
2299 with self._indexfp('w') as fp:
2299 self.version &= ~FLAG_INLINE_DATA
2300 self.version &= ~FLAG_INLINE_DATA
2300 self._inline = False
2301 self._inline = False
2301 io = self._io
2302 io = self._io
2302 for i in self:
2303 for i in self:
2303 e = io.packentry(self.index[i], self.node, self.version, i)
2304 e = io.packentry(self.index[i], self.node, self.version, i)
2304 fp.write(e)
2305 fp.write(e)
2305
2306
2306 # the temp file replace the real index when we exit the context
2307 # the temp file replace the real index when we exit the context
2307 # manager
2308 # manager
2308
2309
2309 tr.replace(self.indexfile, trindex * self._io.size)
2310 tr.replace(self.indexfile, trindex * self._io.size)
2310 self._chunkclear()
2311 self._chunkclear()
2311
2312
2312 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2313 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2313 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2314 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2314 """add a revision to the log
2315 """add a revision to the log
2315
2316
2316 text - the revision data to add
2317 text - the revision data to add
2317 transaction - the transaction object used for rollback
2318 transaction - the transaction object used for rollback
2318 link - the linkrev data to add
2319 link - the linkrev data to add
2319 p1, p2 - the parent nodeids of the revision
2320 p1, p2 - the parent nodeids of the revision
2320 cachedelta - an optional precomputed delta
2321 cachedelta - an optional precomputed delta
2321 node - nodeid of revision; typically node is not specified, and it is
2322 node - nodeid of revision; typically node is not specified, and it is
2322 computed by default as hash(text, p1, p2), however subclasses might
2323 computed by default as hash(text, p1, p2), however subclasses might
2323 use different hashing method (and override checkhash() in such case)
2324 use different hashing method (and override checkhash() in such case)
2324 flags - the known flags to set on the revision
2325 flags - the known flags to set on the revision
2325 deltacomputer - an optional _deltacomputer instance shared between
2326 deltacomputer - an optional _deltacomputer instance shared between
2326 multiple calls
2327 multiple calls
2327 """
2328 """
2328 if link == nullrev:
2329 if link == nullrev:
2329 raise RevlogError(_("attempted to add linkrev -1 to %s")
2330 raise RevlogError(_("attempted to add linkrev -1 to %s")
2330 % self.indexfile)
2331 % self.indexfile)
2331
2332
2332 if flags:
2333 if flags:
2333 node = node or self.hash(text, p1, p2)
2334 node = node or self.hash(text, p1, p2)
2334
2335
2335 rawtext, validatehash = self._processflags(text, flags, 'write')
2336 rawtext, validatehash = self._processflags(text, flags, 'write')
2336
2337
2337 # If the flag processor modifies the revision data, ignore any provided
2338 # If the flag processor modifies the revision data, ignore any provided
2338 # cachedelta.
2339 # cachedelta.
2339 if rawtext != text:
2340 if rawtext != text:
2340 cachedelta = None
2341 cachedelta = None
2341
2342
2342 if len(rawtext) > _maxentrysize:
2343 if len(rawtext) > _maxentrysize:
2343 raise RevlogError(
2344 raise RevlogError(
2344 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2345 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2345 % (self.indexfile, len(rawtext)))
2346 % (self.indexfile, len(rawtext)))
2346
2347
2347 node = node or self.hash(rawtext, p1, p2)
2348 node = node or self.hash(rawtext, p1, p2)
2348 if node in self.nodemap:
2349 if node in self.nodemap:
2349 return node
2350 return node
2350
2351
2351 if validatehash:
2352 if validatehash:
2352 self.checkhash(rawtext, node, p1=p1, p2=p2)
2353 self.checkhash(rawtext, node, p1=p1, p2=p2)
2353
2354
2354 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2355 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2355 flags, cachedelta=cachedelta,
2356 flags, cachedelta=cachedelta,
2356 deltacomputer=deltacomputer)
2357 deltacomputer=deltacomputer)
2357
2358
2358 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2359 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2359 cachedelta=None, deltacomputer=None):
2360 cachedelta=None, deltacomputer=None):
2360 """add a raw revision with known flags, node and parents
2361 """add a raw revision with known flags, node and parents
2361 useful when reusing a revision not stored in this revlog (ex: received
2362 useful when reusing a revision not stored in this revlog (ex: received
2362 over wire, or read from an external bundle).
2363 over wire, or read from an external bundle).
2363 """
2364 """
2364 dfh = None
2365 dfh = None
2365 if not self._inline:
2366 if not self._inline:
2366 dfh = self._datafp("a+")
2367 dfh = self._datafp("a+")
2367 ifh = self._indexfp("a+")
2368 ifh = self._indexfp("a+")
2368 try:
2369 try:
2369 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2370 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2370 flags, cachedelta, ifh, dfh,
2371 flags, cachedelta, ifh, dfh,
2371 deltacomputer=deltacomputer)
2372 deltacomputer=deltacomputer)
2372 finally:
2373 finally:
2373 if dfh:
2374 if dfh:
2374 dfh.close()
2375 dfh.close()
2375 ifh.close()
2376 ifh.close()
2376
2377
2377 def compress(self, data):
2378 def compress(self, data):
2378 """Generate a possibly-compressed representation of data."""
2379 """Generate a possibly-compressed representation of data."""
2379 if not data:
2380 if not data:
2380 return '', data
2381 return '', data
2381
2382
2382 compressed = self._compressor.compress(data)
2383 compressed = self._compressor.compress(data)
2383
2384
2384 if compressed:
2385 if compressed:
2385 # The revlog compressor added the header in the returned data.
2386 # The revlog compressor added the header in the returned data.
2386 return '', compressed
2387 return '', compressed
2387
2388
2388 if data[0:1] == '\0':
2389 if data[0:1] == '\0':
2389 return '', data
2390 return '', data
2390 return 'u', data
2391 return 'u', data
2391
2392
2392 def decompress(self, data):
2393 def decompress(self, data):
2393 """Decompress a revlog chunk.
2394 """Decompress a revlog chunk.
2394
2395
2395 The chunk is expected to begin with a header identifying the
2396 The chunk is expected to begin with a header identifying the
2396 format type so it can be routed to an appropriate decompressor.
2397 format type so it can be routed to an appropriate decompressor.
2397 """
2398 """
2398 if not data:
2399 if not data:
2399 return data
2400 return data
2400
2401
2401 # Revlogs are read much more frequently than they are written and many
2402 # Revlogs are read much more frequently than they are written and many
2402 # chunks only take microseconds to decompress, so performance is
2403 # chunks only take microseconds to decompress, so performance is
2403 # important here.
2404 # important here.
2404 #
2405 #
2405 # We can make a few assumptions about revlogs:
2406 # We can make a few assumptions about revlogs:
2406 #
2407 #
2407 # 1) the majority of chunks will be compressed (as opposed to inline
2408 # 1) the majority of chunks will be compressed (as opposed to inline
2408 # raw data).
2409 # raw data).
2409 # 2) decompressing *any* data will likely by at least 10x slower than
2410 # 2) decompressing *any* data will likely by at least 10x slower than
2410 # returning raw inline data.
2411 # returning raw inline data.
2411 # 3) we want to prioritize common and officially supported compression
2412 # 3) we want to prioritize common and officially supported compression
2412 # engines
2413 # engines
2413 #
2414 #
2414 # It follows that we want to optimize for "decompress compressed data
2415 # It follows that we want to optimize for "decompress compressed data
2415 # when encoded with common and officially supported compression engines"
2416 # when encoded with common and officially supported compression engines"
2416 # case over "raw data" and "data encoded by less common or non-official
2417 # case over "raw data" and "data encoded by less common or non-official
2417 # compression engines." That is why we have the inline lookup first
2418 # compression engines." That is why we have the inline lookup first
2418 # followed by the compengines lookup.
2419 # followed by the compengines lookup.
2419 #
2420 #
2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2421 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2421 # compressed chunks. And this matters for changelog and manifest reads.
2422 # compressed chunks. And this matters for changelog and manifest reads.
2422 t = data[0:1]
2423 t = data[0:1]
2423
2424
2424 if t == 'x':
2425 if t == 'x':
2425 try:
2426 try:
2426 return _zlibdecompress(data)
2427 return _zlibdecompress(data)
2427 except zlib.error as e:
2428 except zlib.error as e:
2428 raise RevlogError(_('revlog decompress error: %s') %
2429 raise RevlogError(_('revlog decompress error: %s') %
2429 stringutil.forcebytestr(e))
2430 stringutil.forcebytestr(e))
2430 # '\0' is more common than 'u' so it goes first.
2431 # '\0' is more common than 'u' so it goes first.
2431 elif t == '\0':
2432 elif t == '\0':
2432 return data
2433 return data
2433 elif t == 'u':
2434 elif t == 'u':
2434 return util.buffer(data, 1)
2435 return util.buffer(data, 1)
2435
2436
2436 try:
2437 try:
2437 compressor = self._decompressors[t]
2438 compressor = self._decompressors[t]
2438 except KeyError:
2439 except KeyError:
2439 try:
2440 try:
2440 engine = util.compengines.forrevlogheader(t)
2441 engine = util.compengines.forrevlogheader(t)
2441 compressor = engine.revlogcompressor()
2442 compressor = engine.revlogcompressor()
2442 self._decompressors[t] = compressor
2443 self._decompressors[t] = compressor
2443 except KeyError:
2444 except KeyError:
2444 raise RevlogError(_('unknown compression type %r') % t)
2445 raise RevlogError(_('unknown compression type %r') % t)
2445
2446
2446 return compressor.decompress(data)
2447 return compressor.decompress(data)
2447
2448
2448 def _isgooddeltainfo(self, deltainfo, revinfo):
2449 def _isgooddeltainfo(self, deltainfo, revinfo):
2449 """Returns True if the given delta is good. Good means that it is within
2450 """Returns True if the given delta is good. Good means that it is within
2450 the disk span, disk size, and chain length bounds that we know to be
2451 the disk span, disk size, and chain length bounds that we know to be
2451 performant."""
2452 performant."""
2452 if deltainfo is None:
2453 if deltainfo is None:
2453 return False
2454 return False
2454
2455
2455 # - 'deltainfo.distance' is the distance from the base revision --
2456 # - 'deltainfo.distance' is the distance from the base revision --
2456 # bounding it limits the amount of I/O we need to do.
2457 # bounding it limits the amount of I/O we need to do.
2457 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2458 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2458 # deltas we need to apply -- bounding it limits the amount of CPU
2459 # deltas we need to apply -- bounding it limits the amount of CPU
2459 # we consume.
2460 # we consume.
2460
2461
2461 if self._sparserevlog:
2462 if self._sparserevlog:
2462 # As sparse-read will be used, we can consider that the distance,
2463 # As sparse-read will be used, we can consider that the distance,
2463 # instead of being the span of the whole chunk,
2464 # instead of being the span of the whole chunk,
2464 # is the span of the largest read chunk
2465 # is the span of the largest read chunk
2465 base = deltainfo.base
2466 base = deltainfo.base
2466
2467
2467 if base != nullrev:
2468 if base != nullrev:
2468 deltachain = self._deltachain(base)[0]
2469 deltachain = self._deltachain(base)[0]
2469 else:
2470 else:
2470 deltachain = []
2471 deltachain = []
2471
2472
2472 chunks = _slicechunk(self, deltachain, deltainfo)
2473 chunks = _slicechunk(self, deltachain, deltainfo)
2473 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2474 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2474 else:
2475 else:
2475 distance = deltainfo.distance
2476 distance = deltainfo.distance
2476
2477
2477 textlen = revinfo.textlen
2478 textlen = revinfo.textlen
2478 defaultmax = textlen * 4
2479 defaultmax = textlen * 4
2479 maxdist = self._maxdeltachainspan
2480 maxdist = self._maxdeltachainspan
2480 if not maxdist:
2481 if not maxdist:
2481 maxdist = distance # ensure the conditional pass
2482 maxdist = distance # ensure the conditional pass
2482 maxdist = max(maxdist, defaultmax)
2483 maxdist = max(maxdist, defaultmax)
2483 if self._sparserevlog and maxdist < self._srmingapsize:
2484 if self._sparserevlog and maxdist < self._srmingapsize:
2484 # In multiple place, we are ignoring irrelevant data range below a
2485 # In multiple place, we are ignoring irrelevant data range below a
2485 # certain size. Be also apply this tradeoff here and relax span
2486 # certain size. Be also apply this tradeoff here and relax span
2486 # constraint for small enought content.
2487 # constraint for small enought content.
2487 maxdist = self._srmingapsize
2488 maxdist = self._srmingapsize
2488
2489
2489 # Bad delta from read span:
2490 # Bad delta from read span:
2490 #
2491 #
2491 # If the span of data read is larger than the maximum allowed.
2492 # If the span of data read is larger than the maximum allowed.
2492 if maxdist < distance:
2493 if maxdist < distance:
2493 return False
2494 return False
2494
2495
2495 # Bad delta from new delta size:
2496 # Bad delta from new delta size:
2496 #
2497 #
2497 # If the delta size is larger than the target text, storing the
2498 # If the delta size is larger than the target text, storing the
2498 # delta will be inefficient.
2499 # delta will be inefficient.
2499 if textlen < deltainfo.deltalen:
2500 if textlen < deltainfo.deltalen:
2500 return False
2501 return False
2501
2502
2502 # Bad delta from cumulated payload size:
2503 # Bad delta from cumulated payload size:
2503 #
2504 #
2504 # If the sum of delta get larger than K * target text length.
2505 # If the sum of delta get larger than K * target text length.
2505 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2506 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2506 return False
2507 return False
2507
2508
2508 # Bad delta from chain length:
2509 # Bad delta from chain length:
2509 #
2510 #
2510 # If the number of delta in the chain gets too high.
2511 # If the number of delta in the chain gets too high.
2511 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2512 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2512 return False
2513 return False
2513
2514
2514 return True
2515 return True
2515
2516
2516 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2517 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2517 cachedelta, ifh, dfh, alwayscache=False,
2518 cachedelta, ifh, dfh, alwayscache=False,
2518 deltacomputer=None):
2519 deltacomputer=None):
2519 """internal function to add revisions to the log
2520 """internal function to add revisions to the log
2520
2521
2521 see addrevision for argument descriptions.
2522 see addrevision for argument descriptions.
2522
2523
2523 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2524 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2524
2525
2525 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2526 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2526 be used.
2527 be used.
2527
2528
2528 invariants:
2529 invariants:
2529 - rawtext is optional (can be None); if not set, cachedelta must be set.
2530 - rawtext is optional (can be None); if not set, cachedelta must be set.
2530 if both are set, they must correspond to each other.
2531 if both are set, they must correspond to each other.
2531 """
2532 """
2532 if node == nullid:
2533 if node == nullid:
2533 raise RevlogError(_("%s: attempt to add null revision") %
2534 raise RevlogError(_("%s: attempt to add null revision") %
2534 (self.indexfile))
2535 (self.indexfile))
2535 if node == wdirid or node in wdirfilenodeids:
2536 if node == wdirid or node in wdirfilenodeids:
2536 raise RevlogError(_("%s: attempt to add wdir revision") %
2537 raise RevlogError(_("%s: attempt to add wdir revision") %
2537 (self.indexfile))
2538 (self.indexfile))
2538
2539
2539 if self._inline:
2540 if self._inline:
2540 fh = ifh
2541 fh = ifh
2541 else:
2542 else:
2542 fh = dfh
2543 fh = dfh
2543
2544
2544 btext = [rawtext]
2545 btext = [rawtext]
2545
2546
2546 curr = len(self)
2547 curr = len(self)
2547 prev = curr - 1
2548 prev = curr - 1
2548 offset = self.end(prev)
2549 offset = self.end(prev)
2549 p1r, p2r = self.rev(p1), self.rev(p2)
2550 p1r, p2r = self.rev(p1), self.rev(p2)
2550
2551
2551 # full versions are inserted when the needed deltas
2552 # full versions are inserted when the needed deltas
2552 # become comparable to the uncompressed text
2553 # become comparable to the uncompressed text
2553 if rawtext is None:
2554 if rawtext is None:
2554 # need rawtext size, before changed by flag processors, which is
2555 # need rawtext size, before changed by flag processors, which is
2555 # the non-raw size. use revlog explicitly to avoid filelog's extra
2556 # the non-raw size. use revlog explicitly to avoid filelog's extra
2556 # logic that might remove metadata size.
2557 # logic that might remove metadata size.
2557 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2558 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2558 cachedelta[1])
2559 cachedelta[1])
2559 else:
2560 else:
2560 textlen = len(rawtext)
2561 textlen = len(rawtext)
2561
2562
2562 if deltacomputer is None:
2563 if deltacomputer is None:
2563 deltacomputer = _deltacomputer(self)
2564 deltacomputer = _deltacomputer(self)
2564
2565
2565 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2566 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2566
2567
2567 # no delta for flag processor revision (see "candelta" for why)
2568 # no delta for flag processor revision (see "candelta" for why)
2568 # not calling candelta since only one revision needs test, also to
2569 # not calling candelta since only one revision needs test, also to
2569 # avoid overhead fetching flags again.
2570 # avoid overhead fetching flags again.
2570 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2571 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2571 deltainfo = None
2572 deltainfo = None
2572 else:
2573 else:
2573 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2574 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2574
2575
2575 if deltainfo is not None:
2576 if deltainfo is not None:
2576 base = deltainfo.base
2577 base = deltainfo.base
2577 chainbase = deltainfo.chainbase
2578 chainbase = deltainfo.chainbase
2578 data = deltainfo.data
2579 data = deltainfo.data
2579 l = deltainfo.deltalen
2580 l = deltainfo.deltalen
2580 else:
2581 else:
2581 rawtext = deltacomputer.buildtext(revinfo, fh)
2582 rawtext = deltacomputer.buildtext(revinfo, fh)
2582 data = self.compress(rawtext)
2583 data = self.compress(rawtext)
2583 l = len(data[1]) + len(data[0])
2584 l = len(data[1]) + len(data[0])
2584 base = chainbase = curr
2585 base = chainbase = curr
2585
2586
2586 e = (offset_type(offset, flags), l, textlen,
2587 e = (offset_type(offset, flags), l, textlen,
2587 base, link, p1r, p2r, node)
2588 base, link, p1r, p2r, node)
2588 self.index.append(e)
2589 self.index.append(e)
2589 self.nodemap[node] = curr
2590 self.nodemap[node] = curr
2590
2591
2591 entry = self._io.packentry(e, self.node, self.version, curr)
2592 entry = self._io.packentry(e, self.node, self.version, curr)
2592 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2593 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2593
2594
2594 if alwayscache and rawtext is None:
2595 if alwayscache and rawtext is None:
2595 rawtext = deltacomputer._buildtext(revinfo, fh)
2596 rawtext = deltacomputer._buildtext(revinfo, fh)
2596
2597
2597 if type(rawtext) == bytes: # only accept immutable objects
2598 if type(rawtext) == bytes: # only accept immutable objects
2598 self._cache = (node, curr, rawtext)
2599 self._cache = (node, curr, rawtext)
2599 self._chainbasecache[curr] = chainbase
2600 self._chainbasecache[curr] = chainbase
2600 return node
2601 return node
2601
2602
2602 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2603 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2603 # Files opened in a+ mode have inconsistent behavior on various
2604 # Files opened in a+ mode have inconsistent behavior on various
2604 # platforms. Windows requires that a file positioning call be made
2605 # platforms. Windows requires that a file positioning call be made
2605 # when the file handle transitions between reads and writes. See
2606 # when the file handle transitions between reads and writes. See
2606 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2607 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2607 # platforms, Python or the platform itself can be buggy. Some versions
2608 # platforms, Python or the platform itself can be buggy. Some versions
2608 # of Solaris have been observed to not append at the end of the file
2609 # of Solaris have been observed to not append at the end of the file
2609 # if the file was seeked to before the end. See issue4943 for more.
2610 # if the file was seeked to before the end. See issue4943 for more.
2610 #
2611 #
2611 # We work around this issue by inserting a seek() before writing.
2612 # We work around this issue by inserting a seek() before writing.
2612 # Note: This is likely not necessary on Python 3.
2613 # Note: This is likely not necessary on Python 3.
2613 ifh.seek(0, os.SEEK_END)
2614 ifh.seek(0, os.SEEK_END)
2614 if dfh:
2615 if dfh:
2615 dfh.seek(0, os.SEEK_END)
2616 dfh.seek(0, os.SEEK_END)
2616
2617
2617 curr = len(self) - 1
2618 curr = len(self) - 1
2618 if not self._inline:
2619 if not self._inline:
2619 transaction.add(self.datafile, offset)
2620 transaction.add(self.datafile, offset)
2620 transaction.add(self.indexfile, curr * len(entry))
2621 transaction.add(self.indexfile, curr * len(entry))
2621 if data[0]:
2622 if data[0]:
2622 dfh.write(data[0])
2623 dfh.write(data[0])
2623 dfh.write(data[1])
2624 dfh.write(data[1])
2624 ifh.write(entry)
2625 ifh.write(entry)
2625 else:
2626 else:
2626 offset += curr * self._io.size
2627 offset += curr * self._io.size
2627 transaction.add(self.indexfile, offset, curr)
2628 transaction.add(self.indexfile, offset, curr)
2628 ifh.write(entry)
2629 ifh.write(entry)
2629 ifh.write(data[0])
2630 ifh.write(data[0])
2630 ifh.write(data[1])
2631 ifh.write(data[1])
2631 self._enforceinlinesize(transaction, ifh)
2632 self._enforceinlinesize(transaction, ifh)
2632
2633
2633 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2634 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2634 """
2635 """
2635 add a delta group
2636 add a delta group
2636
2637
2637 given a set of deltas, add them to the revision log. the
2638 given a set of deltas, add them to the revision log. the
2638 first delta is against its parent, which should be in our
2639 first delta is against its parent, which should be in our
2639 log, the rest are against the previous delta.
2640 log, the rest are against the previous delta.
2640
2641
2641 If ``addrevisioncb`` is defined, it will be called with arguments of
2642 If ``addrevisioncb`` is defined, it will be called with arguments of
2642 this revlog and the node that was added.
2643 this revlog and the node that was added.
2643 """
2644 """
2644
2645
2645 nodes = []
2646 nodes = []
2646
2647
2647 r = len(self)
2648 r = len(self)
2648 end = 0
2649 end = 0
2649 if r:
2650 if r:
2650 end = self.end(r - 1)
2651 end = self.end(r - 1)
2651 ifh = self._indexfp("a+")
2652 ifh = self._indexfp("a+")
2652 isize = r * self._io.size
2653 isize = r * self._io.size
2653 if self._inline:
2654 if self._inline:
2654 transaction.add(self.indexfile, end + isize, r)
2655 transaction.add(self.indexfile, end + isize, r)
2655 dfh = None
2656 dfh = None
2656 else:
2657 else:
2657 transaction.add(self.indexfile, isize, r)
2658 transaction.add(self.indexfile, isize, r)
2658 transaction.add(self.datafile, end)
2659 transaction.add(self.datafile, end)
2659 dfh = self._datafp("a+")
2660 dfh = self._datafp("a+")
2660 def flush():
2661 def flush():
2661 if dfh:
2662 if dfh:
2662 dfh.flush()
2663 dfh.flush()
2663 ifh.flush()
2664 ifh.flush()
2664 try:
2665 try:
2665 deltacomputer = _deltacomputer(self)
2666 deltacomputer = _deltacomputer(self)
2666 # loop through our set of deltas
2667 # loop through our set of deltas
2667 for data in deltas:
2668 for data in deltas:
2668 node, p1, p2, linknode, deltabase, delta, flags = data
2669 node, p1, p2, linknode, deltabase, delta, flags = data
2669 link = linkmapper(linknode)
2670 link = linkmapper(linknode)
2670 flags = flags or REVIDX_DEFAULT_FLAGS
2671 flags = flags or REVIDX_DEFAULT_FLAGS
2671
2672
2672 nodes.append(node)
2673 nodes.append(node)
2673
2674
2674 if node in self.nodemap:
2675 if node in self.nodemap:
2675 # this can happen if two branches make the same change
2676 # this can happen if two branches make the same change
2676 continue
2677 continue
2677
2678
2678 for p in (p1, p2):
2679 for p in (p1, p2):
2679 if p not in self.nodemap:
2680 if p not in self.nodemap:
2680 raise LookupError(p, self.indexfile,
2681 raise LookupError(p, self.indexfile,
2681 _('unknown parent'))
2682 _('unknown parent'))
2682
2683
2683 if deltabase not in self.nodemap:
2684 if deltabase not in self.nodemap:
2684 raise LookupError(deltabase, self.indexfile,
2685 raise LookupError(deltabase, self.indexfile,
2685 _('unknown delta base'))
2686 _('unknown delta base'))
2686
2687
2687 baserev = self.rev(deltabase)
2688 baserev = self.rev(deltabase)
2688
2689
2689 if baserev != nullrev and self.iscensored(baserev):
2690 if baserev != nullrev and self.iscensored(baserev):
2690 # if base is censored, delta must be full replacement in a
2691 # if base is censored, delta must be full replacement in a
2691 # single patch operation
2692 # single patch operation
2692 hlen = struct.calcsize(">lll")
2693 hlen = struct.calcsize(">lll")
2693 oldlen = self.rawsize(baserev)
2694 oldlen = self.rawsize(baserev)
2694 newlen = len(delta) - hlen
2695 newlen = len(delta) - hlen
2695 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2696 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2696 raise error.CensoredBaseError(self.indexfile,
2697 raise error.CensoredBaseError(self.indexfile,
2697 self.node(baserev))
2698 self.node(baserev))
2698
2699
2699 if not flags and self._peek_iscensored(baserev, delta, flush):
2700 if not flags and self._peek_iscensored(baserev, delta, flush):
2700 flags |= REVIDX_ISCENSORED
2701 flags |= REVIDX_ISCENSORED
2701
2702
2702 # We assume consumers of addrevisioncb will want to retrieve
2703 # We assume consumers of addrevisioncb will want to retrieve
2703 # the added revision, which will require a call to
2704 # the added revision, which will require a call to
2704 # revision(). revision() will fast path if there is a cache
2705 # revision(). revision() will fast path if there is a cache
2705 # hit. So, we tell _addrevision() to always cache in this case.
2706 # hit. So, we tell _addrevision() to always cache in this case.
2706 # We're only using addgroup() in the context of changegroup
2707 # We're only using addgroup() in the context of changegroup
2707 # generation so the revision data can always be handled as raw
2708 # generation so the revision data can always be handled as raw
2708 # by the flagprocessor.
2709 # by the flagprocessor.
2709 self._addrevision(node, None, transaction, link,
2710 self._addrevision(node, None, transaction, link,
2710 p1, p2, flags, (baserev, delta),
2711 p1, p2, flags, (baserev, delta),
2711 ifh, dfh,
2712 ifh, dfh,
2712 alwayscache=bool(addrevisioncb),
2713 alwayscache=bool(addrevisioncb),
2713 deltacomputer=deltacomputer)
2714 deltacomputer=deltacomputer)
2714
2715
2715 if addrevisioncb:
2716 if addrevisioncb:
2716 addrevisioncb(self, node)
2717 addrevisioncb(self, node)
2717
2718
2718 if not dfh and not self._inline:
2719 if not dfh and not self._inline:
2719 # addrevision switched from inline to conventional
2720 # addrevision switched from inline to conventional
2720 # reopen the index
2721 # reopen the index
2721 ifh.close()
2722 ifh.close()
2722 dfh = self._datafp("a+")
2723 dfh = self._datafp("a+")
2723 ifh = self._indexfp("a+")
2724 ifh = self._indexfp("a+")
2724 finally:
2725 finally:
2725 if dfh:
2726 if dfh:
2726 dfh.close()
2727 dfh.close()
2727 ifh.close()
2728 ifh.close()
2728
2729
2729 return nodes
2730 return nodes
2730
2731
2731 def iscensored(self, rev):
2732 def iscensored(self, rev):
2732 """Check if a file revision is censored."""
2733 """Check if a file revision is censored."""
2733 if not self._censorable:
2734 if not self._censorable:
2734 return False
2735 return False
2735
2736
2736 return self.flags(rev) & REVIDX_ISCENSORED
2737 return self.flags(rev) & REVIDX_ISCENSORED
2737
2738
2738 def _peek_iscensored(self, baserev, delta, flush):
2739 def _peek_iscensored(self, baserev, delta, flush):
2739 """Quickly check if a delta produces a censored revision."""
2740 """Quickly check if a delta produces a censored revision."""
2740 if not self._censorable:
2741 if not self._censorable:
2741 return False
2742 return False
2742
2743
2743 # Fragile heuristic: unless new file meta keys are added alphabetically
2744 # Fragile heuristic: unless new file meta keys are added alphabetically
2744 # preceding "censored", all censored revisions are prefixed by
2745 # preceding "censored", all censored revisions are prefixed by
2745 # "\1\ncensored:". A delta producing such a censored revision must be a
2746 # "\1\ncensored:". A delta producing such a censored revision must be a
2746 # full-replacement delta, so we inspect the first and only patch in the
2747 # full-replacement delta, so we inspect the first and only patch in the
2747 # delta for this prefix.
2748 # delta for this prefix.
2748 hlen = struct.calcsize(">lll")
2749 hlen = struct.calcsize(">lll")
2749 if len(delta) <= hlen:
2750 if len(delta) <= hlen:
2750 return False
2751 return False
2751
2752
2752 oldlen = self.rawsize(baserev)
2753 oldlen = self.rawsize(baserev)
2753 newlen = len(delta) - hlen
2754 newlen = len(delta) - hlen
2754 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2755 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2755 return False
2756 return False
2756
2757
2757 add = "\1\ncensored:"
2758 add = "\1\ncensored:"
2758 addlen = len(add)
2759 addlen = len(add)
2759 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2760 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2760
2761
2761 def getstrippoint(self, minlink):
2762 def getstrippoint(self, minlink):
2762 """find the minimum rev that must be stripped to strip the linkrev
2763 """find the minimum rev that must be stripped to strip the linkrev
2763
2764
2764 Returns a tuple containing the minimum rev and a set of all revs that
2765 Returns a tuple containing the minimum rev and a set of all revs that
2765 have linkrevs that will be broken by this strip.
2766 have linkrevs that will be broken by this strip.
2766 """
2767 """
2767 brokenrevs = set()
2768 brokenrevs = set()
2768 strippoint = len(self)
2769 strippoint = len(self)
2769
2770
2770 heads = {}
2771 heads = {}
2771 futurelargelinkrevs = set()
2772 futurelargelinkrevs = set()
2772 for head in self.headrevs():
2773 for head in self.headrevs():
2773 headlinkrev = self.linkrev(head)
2774 headlinkrev = self.linkrev(head)
2774 heads[head] = headlinkrev
2775 heads[head] = headlinkrev
2775 if headlinkrev >= minlink:
2776 if headlinkrev >= minlink:
2776 futurelargelinkrevs.add(headlinkrev)
2777 futurelargelinkrevs.add(headlinkrev)
2777
2778
2778 # This algorithm involves walking down the rev graph, starting at the
2779 # This algorithm involves walking down the rev graph, starting at the
2779 # heads. Since the revs are topologically sorted according to linkrev,
2780 # heads. Since the revs are topologically sorted according to linkrev,
2780 # once all head linkrevs are below the minlink, we know there are
2781 # once all head linkrevs are below the minlink, we know there are
2781 # no more revs that could have a linkrev greater than minlink.
2782 # no more revs that could have a linkrev greater than minlink.
2782 # So we can stop walking.
2783 # So we can stop walking.
2783 while futurelargelinkrevs:
2784 while futurelargelinkrevs:
2784 strippoint -= 1
2785 strippoint -= 1
2785 linkrev = heads.pop(strippoint)
2786 linkrev = heads.pop(strippoint)
2786
2787
2787 if linkrev < minlink:
2788 if linkrev < minlink:
2788 brokenrevs.add(strippoint)
2789 brokenrevs.add(strippoint)
2789 else:
2790 else:
2790 futurelargelinkrevs.remove(linkrev)
2791 futurelargelinkrevs.remove(linkrev)
2791
2792
2792 for p in self.parentrevs(strippoint):
2793 for p in self.parentrevs(strippoint):
2793 if p != nullrev:
2794 if p != nullrev:
2794 plinkrev = self.linkrev(p)
2795 plinkrev = self.linkrev(p)
2795 heads[p] = plinkrev
2796 heads[p] = plinkrev
2796 if plinkrev >= minlink:
2797 if plinkrev >= minlink:
2797 futurelargelinkrevs.add(plinkrev)
2798 futurelargelinkrevs.add(plinkrev)
2798
2799
2799 return strippoint, brokenrevs
2800 return strippoint, brokenrevs
2800
2801
2801 def strip(self, minlink, transaction):
2802 def strip(self, minlink, transaction):
2802 """truncate the revlog on the first revision with a linkrev >= minlink
2803 """truncate the revlog on the first revision with a linkrev >= minlink
2803
2804
2804 This function is called when we're stripping revision minlink and
2805 This function is called when we're stripping revision minlink and
2805 its descendants from the repository.
2806 its descendants from the repository.
2806
2807
2807 We have to remove all revisions with linkrev >= minlink, because
2808 We have to remove all revisions with linkrev >= minlink, because
2808 the equivalent changelog revisions will be renumbered after the
2809 the equivalent changelog revisions will be renumbered after the
2809 strip.
2810 strip.
2810
2811
2811 So we truncate the revlog on the first of these revisions, and
2812 So we truncate the revlog on the first of these revisions, and
2812 trust that the caller has saved the revisions that shouldn't be
2813 trust that the caller has saved the revisions that shouldn't be
2813 removed and that it'll re-add them after this truncation.
2814 removed and that it'll re-add them after this truncation.
2814 """
2815 """
2815 if len(self) == 0:
2816 if len(self) == 0:
2816 return
2817 return
2817
2818
2818 rev, _ = self.getstrippoint(minlink)
2819 rev, _ = self.getstrippoint(minlink)
2819 if rev == len(self):
2820 if rev == len(self):
2820 return
2821 return
2821
2822
2822 # first truncate the files on disk
2823 # first truncate the files on disk
2823 end = self.start(rev)
2824 end = self.start(rev)
2824 if not self._inline:
2825 if not self._inline:
2825 transaction.add(self.datafile, end)
2826 transaction.add(self.datafile, end)
2826 end = rev * self._io.size
2827 end = rev * self._io.size
2827 else:
2828 else:
2828 end += rev * self._io.size
2829 end += rev * self._io.size
2829
2830
2830 transaction.add(self.indexfile, end)
2831 transaction.add(self.indexfile, end)
2831
2832
2832 # then reset internal state in memory to forget those revisions
2833 # then reset internal state in memory to forget those revisions
2833 self._cache = None
2834 self._cache = None
2834 self._chaininfocache = {}
2835 self._chaininfocache = {}
2835 self._chunkclear()
2836 self._chunkclear()
2836 for x in pycompat.xrange(rev, len(self)):
2837 for x in pycompat.xrange(rev, len(self)):
2837 del self.nodemap[self.node(x)]
2838 del self.nodemap[self.node(x)]
2838
2839
2839 del self.index[rev:-1]
2840 del self.index[rev:-1]
2840 self._nodepos = None
2841 self._nodepos = None
2841
2842
2842 def checksize(self):
2843 def checksize(self):
2843 expected = 0
2844 expected = 0
2844 if len(self):
2845 if len(self):
2845 expected = max(0, self.end(len(self) - 1))
2846 expected = max(0, self.end(len(self) - 1))
2846
2847
2847 try:
2848 try:
2848 with self._datafp() as f:
2849 with self._datafp() as f:
2849 f.seek(0, 2)
2850 f.seek(0, 2)
2850 actual = f.tell()
2851 actual = f.tell()
2851 dd = actual - expected
2852 dd = actual - expected
2852 except IOError as inst:
2853 except IOError as inst:
2853 if inst.errno != errno.ENOENT:
2854 if inst.errno != errno.ENOENT:
2854 raise
2855 raise
2855 dd = 0
2856 dd = 0
2856
2857
2857 try:
2858 try:
2858 f = self.opener(self.indexfile)
2859 f = self.opener(self.indexfile)
2859 f.seek(0, 2)
2860 f.seek(0, 2)
2860 actual = f.tell()
2861 actual = f.tell()
2861 f.close()
2862 f.close()
2862 s = self._io.size
2863 s = self._io.size
2863 i = max(0, actual // s)
2864 i = max(0, actual // s)
2864 di = actual - (i * s)
2865 di = actual - (i * s)
2865 if self._inline:
2866 if self._inline:
2866 databytes = 0
2867 databytes = 0
2867 for r in self:
2868 for r in self:
2868 databytes += max(0, self.length(r))
2869 databytes += max(0, self.length(r))
2869 dd = 0
2870 dd = 0
2870 di = actual - len(self) * s - databytes
2871 di = actual - len(self) * s - databytes
2871 except IOError as inst:
2872 except IOError as inst:
2872 if inst.errno != errno.ENOENT:
2873 if inst.errno != errno.ENOENT:
2873 raise
2874 raise
2874 di = 0
2875 di = 0
2875
2876
2876 return (dd, di)
2877 return (dd, di)
2877
2878
2878 def files(self):
2879 def files(self):
2879 res = [self.indexfile]
2880 res = [self.indexfile]
2880 if not self._inline:
2881 if not self._inline:
2881 res.append(self.datafile)
2882 res.append(self.datafile)
2882 return res
2883 return res
2883
2884
2884 DELTAREUSEALWAYS = 'always'
2885 DELTAREUSEALWAYS = 'always'
2885 DELTAREUSESAMEREVS = 'samerevs'
2886 DELTAREUSESAMEREVS = 'samerevs'
2886 DELTAREUSENEVER = 'never'
2887 DELTAREUSENEVER = 'never'
2887
2888
2888 DELTAREUSEFULLADD = 'fulladd'
2889 DELTAREUSEFULLADD = 'fulladd'
2889
2890
2890 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2891 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2891
2892
2892 def clone(self, tr, destrevlog, addrevisioncb=None,
2893 def clone(self, tr, destrevlog, addrevisioncb=None,
2893 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2894 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2894 """Copy this revlog to another, possibly with format changes.
2895 """Copy this revlog to another, possibly with format changes.
2895
2896
2896 The destination revlog will contain the same revisions and nodes.
2897 The destination revlog will contain the same revisions and nodes.
2897 However, it may not be bit-for-bit identical due to e.g. delta encoding
2898 However, it may not be bit-for-bit identical due to e.g. delta encoding
2898 differences.
2899 differences.
2899
2900
2900 The ``deltareuse`` argument control how deltas from the existing revlog
2901 The ``deltareuse`` argument control how deltas from the existing revlog
2901 are preserved in the destination revlog. The argument can have the
2902 are preserved in the destination revlog. The argument can have the
2902 following values:
2903 following values:
2903
2904
2904 DELTAREUSEALWAYS
2905 DELTAREUSEALWAYS
2905 Deltas will always be reused (if possible), even if the destination
2906 Deltas will always be reused (if possible), even if the destination
2906 revlog would not select the same revisions for the delta. This is the
2907 revlog would not select the same revisions for the delta. This is the
2907 fastest mode of operation.
2908 fastest mode of operation.
2908 DELTAREUSESAMEREVS
2909 DELTAREUSESAMEREVS
2909 Deltas will be reused if the destination revlog would pick the same
2910 Deltas will be reused if the destination revlog would pick the same
2910 revisions for the delta. This mode strikes a balance between speed
2911 revisions for the delta. This mode strikes a balance between speed
2911 and optimization.
2912 and optimization.
2912 DELTAREUSENEVER
2913 DELTAREUSENEVER
2913 Deltas will never be reused. This is the slowest mode of execution.
2914 Deltas will never be reused. This is the slowest mode of execution.
2914 This mode can be used to recompute deltas (e.g. if the diff/delta
2915 This mode can be used to recompute deltas (e.g. if the diff/delta
2915 algorithm changes).
2916 algorithm changes).
2916
2917
2917 Delta computation can be slow, so the choice of delta reuse policy can
2918 Delta computation can be slow, so the choice of delta reuse policy can
2918 significantly affect run time.
2919 significantly affect run time.
2919
2920
2920 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2921 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2921 two extremes. Deltas will be reused if they are appropriate. But if the
2922 two extremes. Deltas will be reused if they are appropriate. But if the
2922 delta could choose a better revision, it will do so. This means if you
2923 delta could choose a better revision, it will do so. This means if you
2923 are converting a non-generaldelta revlog to a generaldelta revlog,
2924 are converting a non-generaldelta revlog to a generaldelta revlog,
2924 deltas will be recomputed if the delta's parent isn't a parent of the
2925 deltas will be recomputed if the delta's parent isn't a parent of the
2925 revision.
2926 revision.
2926
2927
2927 In addition to the delta policy, the ``deltabothparents`` argument
2928 In addition to the delta policy, the ``deltabothparents`` argument
2928 controls whether to compute deltas against both parents for merges.
2929 controls whether to compute deltas against both parents for merges.
2929 By default, the current default is used.
2930 By default, the current default is used.
2930 """
2931 """
2931 if deltareuse not in self.DELTAREUSEALL:
2932 if deltareuse not in self.DELTAREUSEALL:
2932 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2933 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2933
2934
2934 if len(destrevlog):
2935 if len(destrevlog):
2935 raise ValueError(_('destination revlog is not empty'))
2936 raise ValueError(_('destination revlog is not empty'))
2936
2937
2937 if getattr(self, 'filteredrevs', None):
2938 if getattr(self, 'filteredrevs', None):
2938 raise ValueError(_('source revlog has filtered revisions'))
2939 raise ValueError(_('source revlog has filtered revisions'))
2939 if getattr(destrevlog, 'filteredrevs', None):
2940 if getattr(destrevlog, 'filteredrevs', None):
2940 raise ValueError(_('destination revlog has filtered revisions'))
2941 raise ValueError(_('destination revlog has filtered revisions'))
2941
2942
2942 # lazydeltabase controls whether to reuse a cached delta, if possible.
2943 # lazydeltabase controls whether to reuse a cached delta, if possible.
2943 oldlazydeltabase = destrevlog._lazydeltabase
2944 oldlazydeltabase = destrevlog._lazydeltabase
2944 oldamd = destrevlog._deltabothparents
2945 oldamd = destrevlog._deltabothparents
2945
2946
2946 try:
2947 try:
2947 if deltareuse == self.DELTAREUSEALWAYS:
2948 if deltareuse == self.DELTAREUSEALWAYS:
2948 destrevlog._lazydeltabase = True
2949 destrevlog._lazydeltabase = True
2949 elif deltareuse == self.DELTAREUSESAMEREVS:
2950 elif deltareuse == self.DELTAREUSESAMEREVS:
2950 destrevlog._lazydeltabase = False
2951 destrevlog._lazydeltabase = False
2951
2952
2952 destrevlog._deltabothparents = deltabothparents or oldamd
2953 destrevlog._deltabothparents = deltabothparents or oldamd
2953
2954
2954 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2955 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2955 self.DELTAREUSESAMEREVS)
2956 self.DELTAREUSESAMEREVS)
2956
2957
2957 deltacomputer = _deltacomputer(destrevlog)
2958 deltacomputer = _deltacomputer(destrevlog)
2958 index = self.index
2959 index = self.index
2959 for rev in self:
2960 for rev in self:
2960 entry = index[rev]
2961 entry = index[rev]
2961
2962
2962 # Some classes override linkrev to take filtered revs into
2963 # Some classes override linkrev to take filtered revs into
2963 # account. Use raw entry from index.
2964 # account. Use raw entry from index.
2964 flags = entry[0] & 0xffff
2965 flags = entry[0] & 0xffff
2965 linkrev = entry[4]
2966 linkrev = entry[4]
2966 p1 = index[entry[5]][7]
2967 p1 = index[entry[5]][7]
2967 p2 = index[entry[6]][7]
2968 p2 = index[entry[6]][7]
2968 node = entry[7]
2969 node = entry[7]
2969
2970
2970 # (Possibly) reuse the delta from the revlog if allowed and
2971 # (Possibly) reuse the delta from the revlog if allowed and
2971 # the revlog chunk is a delta.
2972 # the revlog chunk is a delta.
2972 cachedelta = None
2973 cachedelta = None
2973 rawtext = None
2974 rawtext = None
2974 if populatecachedelta:
2975 if populatecachedelta:
2975 dp = self.deltaparent(rev)
2976 dp = self.deltaparent(rev)
2976 if dp != nullrev:
2977 if dp != nullrev:
2977 cachedelta = (dp, bytes(self._chunk(rev)))
2978 cachedelta = (dp, bytes(self._chunk(rev)))
2978
2979
2979 if not cachedelta:
2980 if not cachedelta:
2980 rawtext = self.revision(rev, raw=True)
2981 rawtext = self.revision(rev, raw=True)
2981
2982
2982
2983
2983 if deltareuse == self.DELTAREUSEFULLADD:
2984 if deltareuse == self.DELTAREUSEFULLADD:
2984 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2985 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2985 cachedelta=cachedelta,
2986 cachedelta=cachedelta,
2986 node=node, flags=flags,
2987 node=node, flags=flags,
2987 deltacomputer=deltacomputer)
2988 deltacomputer=deltacomputer)
2988 else:
2989 else:
2989 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2990 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2990 checkambig=False)
2991 checkambig=False)
2991 dfh = None
2992 dfh = None
2992 if not destrevlog._inline:
2993 if not destrevlog._inline:
2993 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2994 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2994 try:
2995 try:
2995 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2996 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2996 p2, flags, cachedelta, ifh, dfh,
2997 p2, flags, cachedelta, ifh, dfh,
2997 deltacomputer=deltacomputer)
2998 deltacomputer=deltacomputer)
2998 finally:
2999 finally:
2999 if dfh:
3000 if dfh:
3000 dfh.close()
3001 dfh.close()
3001 ifh.close()
3002 ifh.close()
3002
3003
3003 if addrevisioncb:
3004 if addrevisioncb:
3004 addrevisioncb(self, rev, node)
3005 addrevisioncb(self, rev, node)
3005 finally:
3006 finally:
3006 destrevlog._lazydeltabase = oldlazydeltabase
3007 destrevlog._lazydeltabase = oldlazydeltabase
3007 destrevlog._deltabothparents = oldamd
3008 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now