##// END OF EJS Templates
pure: create type for revlog v0 index...
Martin von Zweigbergk -
r38885:da5a666f default
parent child Browse files
Show More
@@ -1,2966 +1,2971 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # Trim empty revs at the end, but never the very first revision of a chain
266 # Trim empty revs at the end, but never the very first revision of a chain
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 endidx -= 1
268 endidx -= 1
269
269
270 return revs[startidx:endidx]
270 return revs[startidx:endidx]
271
271
272 def _segmentspan(revlog, revs):
272 def _segmentspan(revlog, revs):
273 """Get the byte span of a segment of revisions
273 """Get the byte span of a segment of revisions
274
274
275 revs is a sorted array of revision numbers
275 revs is a sorted array of revision numbers
276
276
277 >>> revlog = _testrevlog([
277 >>> revlog = _testrevlog([
278 ... 5, #0
278 ... 5, #0
279 ... 10, #1
279 ... 10, #1
280 ... 12, #2
280 ... 12, #2
281 ... 12, #3 (empty)
281 ... 12, #3 (empty)
282 ... 17, #4
282 ... 17, #4
283 ... ])
283 ... ])
284
284
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 17
286 17
287 >>> _segmentspan(revlog, [0, 4])
287 >>> _segmentspan(revlog, [0, 4])
288 17
288 17
289 >>> _segmentspan(revlog, [3, 4])
289 >>> _segmentspan(revlog, [3, 4])
290 5
290 5
291 >>> _segmentspan(revlog, [1, 2, 3,])
291 >>> _segmentspan(revlog, [1, 2, 3,])
292 7
292 7
293 >>> _segmentspan(revlog, [1, 3])
293 >>> _segmentspan(revlog, [1, 3])
294 7
294 7
295 """
295 """
296 if not revs:
296 if not revs:
297 return 0
297 return 0
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299
299
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302
302
303 ``revs`` is sliced into groups that should be read in one time.
303 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
304 Assume that revs are sorted.
305
305
306 The initial chunk is sliced until the overall density (payload/chunks-span
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 `revlog._srmingapsize` is skipped.
308 `revlog._srmingapsize` is skipped.
309
309
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 For consistency with other slicing choice, this limit won't go lower than
311 For consistency with other slicing choice, this limit won't go lower than
312 `revlog._srmingapsize`.
312 `revlog._srmingapsize`.
313
313
314 If individual revisions chunk are larger than this limit, they will still
314 If individual revisions chunk are larger than this limit, they will still
315 be raised individually.
315 be raised individually.
316
316
317 >>> revlog = _testrevlog([
317 >>> revlog = _testrevlog([
318 ... 5, #00 (5)
318 ... 5, #00 (5)
319 ... 10, #01 (5)
319 ... 10, #01 (5)
320 ... 12, #02 (2)
320 ... 12, #02 (2)
321 ... 12, #03 (empty)
321 ... 12, #03 (empty)
322 ... 27, #04 (15)
322 ... 27, #04 (15)
323 ... 31, #05 (4)
323 ... 31, #05 (4)
324 ... 31, #06 (empty)
324 ... 31, #06 (empty)
325 ... 42, #07 (11)
325 ... 42, #07 (11)
326 ... 47, #08 (5)
326 ... 47, #08 (5)
327 ... 47, #09 (empty)
327 ... 47, #09 (empty)
328 ... 48, #10 (1)
328 ... 48, #10 (1)
329 ... 51, #11 (3)
329 ... 51, #11 (3)
330 ... 74, #12 (23)
330 ... 74, #12 (23)
331 ... 85, #13 (11)
331 ... 85, #13 (11)
332 ... 86, #14 (1)
332 ... 86, #14 (1)
333 ... 91, #15 (5)
333 ... 91, #15 (5)
334 ... ])
334 ... ])
335
335
336 >>> list(_slicechunk(revlog, list(range(16))))
336 >>> list(_slicechunk(revlog, list(range(16))))
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 >>> list(_slicechunk(revlog, [0, 15]))
338 >>> list(_slicechunk(revlog, [0, 15]))
339 [[0], [15]]
339 [[0], [15]]
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 [[0], [11], [15]]
341 [[0], [11], [15]]
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 [[0], [11, 13, 15]]
343 [[0], [11, 13, 15]]
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 [[1, 2], [5, 8, 10, 11], [14]]
345 [[1, 2], [5, 8, 10, 11], [14]]
346
346
347 Slicing with a maximum chunk size
347 Slicing with a maximum chunk size
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 [[0], [11], [13], [15]]
349 [[0], [11], [13], [15]]
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 [[0], [11], [13, 15]]
351 [[0], [11], [13, 15]]
352 """
352 """
353 if targetsize is not None:
353 if targetsize is not None:
354 targetsize = max(targetsize, revlog._srmingapsize)
354 targetsize = max(targetsize, revlog._srmingapsize)
355 # targetsize should not be specified when evaluating delta candidates:
355 # targetsize should not be specified when evaluating delta candidates:
356 # * targetsize is used to ensure we stay within specification when reading,
356 # * targetsize is used to ensure we stay within specification when reading,
357 # * deltainfo is used to pick are good delta chain when writing.
357 # * deltainfo is used to pick are good delta chain when writing.
358 if not (deltainfo is None or targetsize is None):
358 if not (deltainfo is None or targetsize is None):
359 msg = 'cannot use `targetsize` with a `deltainfo`'
359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 raise error.ProgrammingError(msg)
360 raise error.ProgrammingError(msg)
361 for chunk in _slicechunktodensity(revlog, revs,
361 for chunk in _slicechunktodensity(revlog, revs,
362 deltainfo,
362 deltainfo,
363 revlog._srdensitythreshold,
363 revlog._srdensitythreshold,
364 revlog._srmingapsize):
364 revlog._srmingapsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 yield subchunk
366 yield subchunk
367
367
368 def _slicechunktosize(revlog, revs, targetsize=None):
368 def _slicechunktosize(revlog, revs, targetsize=None):
369 """slice revs to match the target size
369 """slice revs to match the target size
370
370
371 This is intended to be used on chunk that density slicing selected by that
371 This is intended to be used on chunk that density slicing selected by that
372 are still too large compared to the read garantee of revlog. This might
372 are still too large compared to the read garantee of revlog. This might
373 happens when "minimal gap size" interrupted the slicing or when chain are
373 happens when "minimal gap size" interrupted the slicing or when chain are
374 built in a way that create large blocks next to each other.
374 built in a way that create large blocks next to each other.
375
375
376 >>> revlog = _testrevlog([
376 >>> revlog = _testrevlog([
377 ... 3, #0 (3)
377 ... 3, #0 (3)
378 ... 5, #1 (2)
378 ... 5, #1 (2)
379 ... 6, #2 (1)
379 ... 6, #2 (1)
380 ... 8, #3 (2)
380 ... 8, #3 (2)
381 ... 8, #4 (empty)
381 ... 8, #4 (empty)
382 ... 11, #5 (3)
382 ... 11, #5 (3)
383 ... 12, #6 (1)
383 ... 12, #6 (1)
384 ... 13, #7 (1)
384 ... 13, #7 (1)
385 ... 14, #8 (1)
385 ... 14, #8 (1)
386 ... ])
386 ... ])
387
387
388 Cases where chunk is already small enough
388 Cases where chunk is already small enough
389 >>> list(_slicechunktosize(revlog, [0], 3))
389 >>> list(_slicechunktosize(revlog, [0], 3))
390 [[0]]
390 [[0]]
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 [[6, 7]]
392 [[6, 7]]
393 >>> list(_slicechunktosize(revlog, [0], None))
393 >>> list(_slicechunktosize(revlog, [0], None))
394 [[0]]
394 [[0]]
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 [[6, 7]]
396 [[6, 7]]
397
397
398 cases where we need actual slicing
398 cases where we need actual slicing
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 [[0], [1]]
400 [[0], [1]]
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 [[1], [3]]
402 [[1], [3]]
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 [[1, 2], [3]]
404 [[1, 2], [3]]
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 [[3], [5]]
406 [[3], [5]]
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 [[3], [5]]
408 [[3], [5]]
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 [[5], [6, 7, 8]]
410 [[5], [6, 7, 8]]
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413
413
414 Case with too large individual chunk (must return valid chunk)
414 Case with too large individual chunk (must return valid chunk)
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 [[0], [1]]
416 [[0], [1]]
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 [[1], [3]]
418 [[1], [3]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 [[3], [5]]
420 [[3], [5]]
421 """
421 """
422 assert targetsize is None or 0 <= targetsize
422 assert targetsize is None or 0 <= targetsize
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 yield revs
424 yield revs
425 return
425 return
426
426
427 startrevidx = 0
427 startrevidx = 0
428 startdata = revlog.start(revs[0])
428 startdata = revlog.start(revs[0])
429 endrevidx = 0
429 endrevidx = 0
430 iterrevs = enumerate(revs)
430 iterrevs = enumerate(revs)
431 next(iterrevs) # skip first rev.
431 next(iterrevs) # skip first rev.
432 for idx, r in iterrevs:
432 for idx, r in iterrevs:
433 span = revlog.end(r) - startdata
433 span = revlog.end(r) - startdata
434 if span <= targetsize:
434 if span <= targetsize:
435 endrevidx = idx
435 endrevidx = idx
436 else:
436 else:
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440 startrevidx = idx
440 startrevidx = idx
441 startdata = revlog.start(r)
441 startdata = revlog.start(r)
442 endrevidx = idx
442 endrevidx = idx
443 yield _trimchunk(revlog, revs, startrevidx)
443 yield _trimchunk(revlog, revs, startrevidx)
444
444
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 mingapsize=0):
446 mingapsize=0):
447 """slice revs to reduce the amount of unrelated data to be read from disk.
447 """slice revs to reduce the amount of unrelated data to be read from disk.
448
448
449 ``revs`` is sliced into groups that should be read in one time.
449 ``revs`` is sliced into groups that should be read in one time.
450 Assume that revs are sorted.
450 Assume that revs are sorted.
451
451
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 to the top of the revlog.
453 to the top of the revlog.
454
454
455 The initial chunk is sliced until the overall density (payload/chunks-span
455 The initial chunk is sliced until the overall density (payload/chunks-span
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 skipped.
457 skipped.
458
458
459 >>> revlog = _testrevlog([
459 >>> revlog = _testrevlog([
460 ... 5, #00 (5)
460 ... 5, #00 (5)
461 ... 10, #01 (5)
461 ... 10, #01 (5)
462 ... 12, #02 (2)
462 ... 12, #02 (2)
463 ... 12, #03 (empty)
463 ... 12, #03 (empty)
464 ... 27, #04 (15)
464 ... 27, #04 (15)
465 ... 31, #05 (4)
465 ... 31, #05 (4)
466 ... 31, #06 (empty)
466 ... 31, #06 (empty)
467 ... 42, #07 (11)
467 ... 42, #07 (11)
468 ... 47, #08 (5)
468 ... 47, #08 (5)
469 ... 47, #09 (empty)
469 ... 47, #09 (empty)
470 ... 48, #10 (1)
470 ... 48, #10 (1)
471 ... 51, #11 (3)
471 ... 51, #11 (3)
472 ... 74, #12 (23)
472 ... 74, #12 (23)
473 ... 85, #13 (11)
473 ... 85, #13 (11)
474 ... 86, #14 (1)
474 ... 86, #14 (1)
475 ... 91, #15 (5)
475 ... 91, #15 (5)
476 ... ])
476 ... ])
477
477
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 [[0], [15]]
481 [[0], [15]]
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 [[0], [11], [15]]
483 [[0], [11], [15]]
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 [[0], [11, 13, 15]]
485 [[0], [11, 13, 15]]
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 [[1, 2], [5, 8, 10, 11], [14]]
487 [[1, 2], [5, 8, 10, 11], [14]]
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 ... mingapsize=20))
489 ... mingapsize=20))
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 ... targetdensity=0.95))
492 ... targetdensity=0.95))
493 [[1, 2], [5], [8, 10, 11], [14]]
493 [[1, 2], [5], [8, 10, 11], [14]]
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 ... targetdensity=0.95, mingapsize=12))
495 ... targetdensity=0.95, mingapsize=12))
496 [[1, 2], [5, 8, 10, 11], [14]]
496 [[1, 2], [5, 8, 10, 11], [14]]
497 """
497 """
498 start = revlog.start
498 start = revlog.start
499 length = revlog.length
499 length = revlog.length
500
500
501 if len(revs) <= 1:
501 if len(revs) <= 1:
502 yield revs
502 yield revs
503 return
503 return
504
504
505 nextrev = len(revlog)
505 nextrev = len(revlog)
506 nextoffset = revlog.end(nextrev - 1)
506 nextoffset = revlog.end(nextrev - 1)
507
507
508 if deltainfo is None:
508 if deltainfo is None:
509 deltachainspan = _segmentspan(revlog, revs)
509 deltachainspan = _segmentspan(revlog, revs)
510 chainpayload = sum(length(r) for r in revs)
510 chainpayload = sum(length(r) for r in revs)
511 else:
511 else:
512 deltachainspan = deltainfo.distance
512 deltachainspan = deltainfo.distance
513 chainpayload = deltainfo.compresseddeltalen
513 chainpayload = deltainfo.compresseddeltalen
514
514
515 if deltachainspan < mingapsize:
515 if deltachainspan < mingapsize:
516 yield revs
516 yield revs
517 return
517 return
518
518
519 readdata = deltachainspan
519 readdata = deltachainspan
520
520
521 if deltachainspan:
521 if deltachainspan:
522 density = chainpayload / float(deltachainspan)
522 density = chainpayload / float(deltachainspan)
523 else:
523 else:
524 density = 1.0
524 density = 1.0
525
525
526 if density >= targetdensity:
526 if density >= targetdensity:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 if deltainfo is not None:
530 if deltainfo is not None:
531 revs = list(revs)
531 revs = list(revs)
532 revs.append(nextrev)
532 revs.append(nextrev)
533
533
534 # Store the gaps in a heap to have them sorted by decreasing size
534 # Store the gaps in a heap to have them sorted by decreasing size
535 gapsheap = []
535 gapsheap = []
536 heapq.heapify(gapsheap)
536 heapq.heapify(gapsheap)
537 prevend = None
537 prevend = None
538 for i, rev in enumerate(revs):
538 for i, rev in enumerate(revs):
539 if rev < nextrev:
539 if rev < nextrev:
540 revstart = start(rev)
540 revstart = start(rev)
541 revlen = length(rev)
541 revlen = length(rev)
542 else:
542 else:
543 revstart = nextoffset
543 revstart = nextoffset
544 revlen = deltainfo.deltalen
544 revlen = deltainfo.deltalen
545
545
546 # Skip empty revisions to form larger holes
546 # Skip empty revisions to form larger holes
547 if revlen == 0:
547 if revlen == 0:
548 continue
548 continue
549
549
550 if prevend is not None:
550 if prevend is not None:
551 gapsize = revstart - prevend
551 gapsize = revstart - prevend
552 # only consider holes that are large enough
552 # only consider holes that are large enough
553 if gapsize > mingapsize:
553 if gapsize > mingapsize:
554 heapq.heappush(gapsheap, (-gapsize, i))
554 heapq.heappush(gapsheap, (-gapsize, i))
555
555
556 prevend = revstart + revlen
556 prevend = revstart + revlen
557
557
558 # Collect the indices of the largest holes until the density is acceptable
558 # Collect the indices of the largest holes until the density is acceptable
559 indicesheap = []
559 indicesheap = []
560 heapq.heapify(indicesheap)
560 heapq.heapify(indicesheap)
561 while gapsheap and density < targetdensity:
561 while gapsheap and density < targetdensity:
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563
563
564 heapq.heappush(indicesheap, gapidx)
564 heapq.heappush(indicesheap, gapidx)
565
565
566 # the gap sizes are stored as negatives to be sorted decreasingly
566 # the gap sizes are stored as negatives to be sorted decreasingly
567 # by the heap
567 # by the heap
568 readdata -= (-oppgapsize)
568 readdata -= (-oppgapsize)
569 if readdata > 0:
569 if readdata > 0:
570 density = chainpayload / float(readdata)
570 density = chainpayload / float(readdata)
571 else:
571 else:
572 density = 1.0
572 density = 1.0
573
573
574 # Cut the revs at collected indices
574 # Cut the revs at collected indices
575 previdx = 0
575 previdx = 0
576 while indicesheap:
576 while indicesheap:
577 idx = heapq.heappop(indicesheap)
577 idx = heapq.heappop(indicesheap)
578
578
579 chunk = _trimchunk(revlog, revs, previdx, idx)
579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 if chunk:
580 if chunk:
581 yield chunk
581 yield chunk
582
582
583 previdx = idx
583 previdx = idx
584
584
585 chunk = _trimchunk(revlog, revs, previdx)
585 chunk = _trimchunk(revlog, revs, previdx)
586 if chunk:
586 if chunk:
587 yield chunk
587 yield chunk
588
588
589 @attr.s(slots=True, frozen=True)
589 @attr.s(slots=True, frozen=True)
590 class _deltainfo(object):
590 class _deltainfo(object):
591 distance = attr.ib()
591 distance = attr.ib()
592 deltalen = attr.ib()
592 deltalen = attr.ib()
593 data = attr.ib()
593 data = attr.ib()
594 base = attr.ib()
594 base = attr.ib()
595 chainbase = attr.ib()
595 chainbase = attr.ib()
596 chainlen = attr.ib()
596 chainlen = attr.ib()
597 compresseddeltalen = attr.ib()
597 compresseddeltalen = attr.ib()
598
598
599 class _deltacomputer(object):
599 class _deltacomputer(object):
600 def __init__(self, revlog):
600 def __init__(self, revlog):
601 self.revlog = revlog
601 self.revlog = revlog
602
602
603 def _getcandidaterevs(self, p1, p2, cachedelta):
603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 """
604 """
605 Provides revisions that present an interest to be diffed against,
605 Provides revisions that present an interest to be diffed against,
606 grouped by level of easiness.
606 grouped by level of easiness.
607 """
607 """
608 revlog = self.revlog
608 revlog = self.revlog
609 gdelta = revlog._generaldelta
609 gdelta = revlog._generaldelta
610 curr = len(revlog)
610 curr = len(revlog)
611 prev = curr - 1
611 prev = curr - 1
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613
613
614 # should we try to build a delta?
614 # should we try to build a delta?
615 if prev != nullrev and revlog.storedeltachains:
615 if prev != nullrev and revlog.storedeltachains:
616 tested = set()
616 tested = set()
617 # This condition is true most of the time when processing
617 # This condition is true most of the time when processing
618 # changegroup data into a generaldelta repo. The only time it
618 # changegroup data into a generaldelta repo. The only time it
619 # isn't true is if this is the first revision in a delta chain
619 # isn't true is if this is the first revision in a delta chain
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 if cachedelta and gdelta and revlog._lazydeltabase:
621 if cachedelta and gdelta and revlog._lazydeltabase:
622 # Assume what we received from the server is a good choice
622 # Assume what we received from the server is a good choice
623 # build delta will reuse the cache
623 # build delta will reuse the cache
624 yield (cachedelta[0],)
624 yield (cachedelta[0],)
625 tested.add(cachedelta[0])
625 tested.add(cachedelta[0])
626
626
627 if gdelta:
627 if gdelta:
628 # exclude already lazy tested base if any
628 # exclude already lazy tested base if any
629 parents = [p for p in (p1r, p2r)
629 parents = [p for p in (p1r, p2r)
630 if p != nullrev and p not in tested]
630 if p != nullrev and p not in tested]
631
631
632 if not revlog._deltabothparents and len(parents) == 2:
632 if not revlog._deltabothparents and len(parents) == 2:
633 parents.sort()
633 parents.sort()
634 # To minimize the chance of having to build a fulltext,
634 # To minimize the chance of having to build a fulltext,
635 # pick first whichever parent is closest to us (max rev)
635 # pick first whichever parent is closest to us (max rev)
636 yield (parents[1],)
636 yield (parents[1],)
637 # then the other one (min rev) if the first did not fit
637 # then the other one (min rev) if the first did not fit
638 yield (parents[0],)
638 yield (parents[0],)
639 tested.update(parents)
639 tested.update(parents)
640 elif len(parents) > 0:
640 elif len(parents) > 0:
641 # Test all parents (1 or 2), and keep the best candidate
641 # Test all parents (1 or 2), and keep the best candidate
642 yield parents
642 yield parents
643 tested.update(parents)
643 tested.update(parents)
644
644
645 if prev not in tested:
645 if prev not in tested:
646 # other approach failed try against prev to hopefully save us a
646 # other approach failed try against prev to hopefully save us a
647 # fulltext.
647 # fulltext.
648 yield (prev,)
648 yield (prev,)
649 tested.add(prev)
649 tested.add(prev)
650
650
651 def buildtext(self, revinfo, fh):
651 def buildtext(self, revinfo, fh):
652 """Builds a fulltext version of a revision
652 """Builds a fulltext version of a revision
653
653
654 revinfo: _revisioninfo instance that contains all needed info
654 revinfo: _revisioninfo instance that contains all needed info
655 fh: file handle to either the .i or the .d revlog file,
655 fh: file handle to either the .i or the .d revlog file,
656 depending on whether it is inlined or not
656 depending on whether it is inlined or not
657 """
657 """
658 btext = revinfo.btext
658 btext = revinfo.btext
659 if btext[0] is not None:
659 if btext[0] is not None:
660 return btext[0]
660 return btext[0]
661
661
662 revlog = self.revlog
662 revlog = self.revlog
663 cachedelta = revinfo.cachedelta
663 cachedelta = revinfo.cachedelta
664 flags = revinfo.flags
664 flags = revinfo.flags
665 node = revinfo.node
665 node = revinfo.node
666
666
667 baserev = cachedelta[0]
667 baserev = cachedelta[0]
668 delta = cachedelta[1]
668 delta = cachedelta[1]
669 # special case deltas which replace entire base; no need to decode
669 # special case deltas which replace entire base; no need to decode
670 # base revision. this neatly avoids censored bases, which throw when
670 # base revision. this neatly avoids censored bases, which throw when
671 # they're decoded.
671 # they're decoded.
672 hlen = struct.calcsize(">lll")
672 hlen = struct.calcsize(">lll")
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 len(delta) - hlen):
674 len(delta) - hlen):
675 btext[0] = delta[hlen:]
675 btext[0] = delta[hlen:]
676 else:
676 else:
677 # deltabase is rawtext before changed by flag processors, which is
677 # deltabase is rawtext before changed by flag processors, which is
678 # equivalent to non-raw text
678 # equivalent to non-raw text
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 btext[0] = mdiff.patch(basetext, delta)
680 btext[0] = mdiff.patch(basetext, delta)
681
681
682 try:
682 try:
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 btext[0], validatehash = res
684 btext[0], validatehash = res
685 if validatehash:
685 if validatehash:
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 if flags & REVIDX_ISCENSORED:
687 if flags & REVIDX_ISCENSORED:
688 raise RevlogError(_('node %s is not censored') % node)
688 raise RevlogError(_('node %s is not censored') % node)
689 except CensoredNodeError:
689 except CensoredNodeError:
690 # must pass the censored index flag to add censored revisions
690 # must pass the censored index flag to add censored revisions
691 if not flags & REVIDX_ISCENSORED:
691 if not flags & REVIDX_ISCENSORED:
692 raise
692 raise
693 return btext[0]
693 return btext[0]
694
694
695 def _builddeltadiff(self, base, revinfo, fh):
695 def _builddeltadiff(self, base, revinfo, fh):
696 revlog = self.revlog
696 revlog = self.revlog
697 t = self.buildtext(revinfo, fh)
697 t = self.buildtext(revinfo, fh)
698 if revlog.iscensored(base):
698 if revlog.iscensored(base):
699 # deltas based on a censored revision must replace the
699 # deltas based on a censored revision must replace the
700 # full content in one patch, so delta works everywhere
700 # full content in one patch, so delta works everywhere
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 delta = header + t
702 delta = header + t
703 else:
703 else:
704 ptext = revlog.revision(base, _df=fh, raw=True)
704 ptext = revlog.revision(base, _df=fh, raw=True)
705 delta = mdiff.textdiff(ptext, t)
705 delta = mdiff.textdiff(ptext, t)
706
706
707 return delta
707 return delta
708
708
709 def _builddeltainfo(self, revinfo, base, fh):
709 def _builddeltainfo(self, revinfo, base, fh):
710 # can we use the cached delta?
710 # can we use the cached delta?
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 delta = revinfo.cachedelta[1]
712 delta = revinfo.cachedelta[1]
713 else:
713 else:
714 delta = self._builddeltadiff(base, revinfo, fh)
714 delta = self._builddeltadiff(base, revinfo, fh)
715 revlog = self.revlog
715 revlog = self.revlog
716 header, data = revlog.compress(delta)
716 header, data = revlog.compress(delta)
717 deltalen = len(header) + len(data)
717 deltalen = len(header) + len(data)
718 chainbase = revlog.chainbase(base)
718 chainbase = revlog.chainbase(base)
719 offset = revlog.end(len(revlog) - 1)
719 offset = revlog.end(len(revlog) - 1)
720 dist = deltalen + offset - revlog.start(chainbase)
720 dist = deltalen + offset - revlog.start(chainbase)
721 if revlog._generaldelta:
721 if revlog._generaldelta:
722 deltabase = base
722 deltabase = base
723 else:
723 else:
724 deltabase = chainbase
724 deltabase = chainbase
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 chainlen += 1
726 chainlen += 1
727 compresseddeltalen += deltalen
727 compresseddeltalen += deltalen
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 chainbase, chainlen, compresseddeltalen)
729 chainbase, chainlen, compresseddeltalen)
730
730
731 def finddeltainfo(self, revinfo, fh):
731 def finddeltainfo(self, revinfo, fh):
732 """Find an acceptable delta against a candidate revision
732 """Find an acceptable delta against a candidate revision
733
733
734 revinfo: information about the revision (instance of _revisioninfo)
734 revinfo: information about the revision (instance of _revisioninfo)
735 fh: file handle to either the .i or the .d revlog file,
735 fh: file handle to either the .i or the .d revlog file,
736 depending on whether it is inlined or not
736 depending on whether it is inlined or not
737
737
738 Returns the first acceptable candidate revision, as ordered by
738 Returns the first acceptable candidate revision, as ordered by
739 _getcandidaterevs
739 _getcandidaterevs
740 """
740 """
741 cachedelta = revinfo.cachedelta
741 cachedelta = revinfo.cachedelta
742 p1 = revinfo.p1
742 p1 = revinfo.p1
743 p2 = revinfo.p2
743 p2 = revinfo.p2
744 revlog = self.revlog
744 revlog = self.revlog
745
745
746 deltainfo = None
746 deltainfo = None
747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
748 nominateddeltas = []
748 nominateddeltas = []
749 for candidaterev in candidaterevs:
749 for candidaterev in candidaterevs:
750 # no delta for rawtext-changing revs (see "candelta" for why)
750 # no delta for rawtext-changing revs (see "candelta" for why)
751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
752 continue
752 continue
753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
755 nominateddeltas.append(candidatedelta)
755 nominateddeltas.append(candidatedelta)
756 if nominateddeltas:
756 if nominateddeltas:
757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
758 break
758 break
759
759
760 return deltainfo
760 return deltainfo
761
761
762 @attr.s(slots=True, frozen=True)
762 @attr.s(slots=True, frozen=True)
763 class _revisioninfo(object):
763 class _revisioninfo(object):
764 """Information about a revision that allows building its fulltext
764 """Information about a revision that allows building its fulltext
765 node: expected hash of the revision
765 node: expected hash of the revision
766 p1, p2: parent revs of the revision
766 p1, p2: parent revs of the revision
767 btext: built text cache consisting of a one-element list
767 btext: built text cache consisting of a one-element list
768 cachedelta: (baserev, uncompressed_delta) or None
768 cachedelta: (baserev, uncompressed_delta) or None
769 flags: flags associated to the revision storage
769 flags: flags associated to the revision storage
770
770
771 One of btext[0] or cachedelta must be set.
771 One of btext[0] or cachedelta must be set.
772 """
772 """
773 node = attr.ib()
773 node = attr.ib()
774 p1 = attr.ib()
774 p1 = attr.ib()
775 p2 = attr.ib()
775 p2 = attr.ib()
776 btext = attr.ib()
776 btext = attr.ib()
777 textlen = attr.ib()
777 textlen = attr.ib()
778 cachedelta = attr.ib()
778 cachedelta = attr.ib()
779 flags = attr.ib()
779 flags = attr.ib()
780
780
781 # index v0:
781 # index v0:
782 # 4 bytes: offset
782 # 4 bytes: offset
783 # 4 bytes: compressed length
783 # 4 bytes: compressed length
784 # 4 bytes: base rev
784 # 4 bytes: base rev
785 # 4 bytes: link rev
785 # 4 bytes: link rev
786 # 20 bytes: parent 1 nodeid
786 # 20 bytes: parent 1 nodeid
787 # 20 bytes: parent 2 nodeid
787 # 20 bytes: parent 2 nodeid
788 # 20 bytes: nodeid
788 # 20 bytes: nodeid
789 indexformatv0 = struct.Struct(">4l20s20s20s")
789 indexformatv0 = struct.Struct(">4l20s20s20s")
790 indexformatv0_pack = indexformatv0.pack
790 indexformatv0_pack = indexformatv0.pack
791 indexformatv0_unpack = indexformatv0.unpack
791 indexformatv0_unpack = indexformatv0.unpack
792
792
793 class revlogoldindex(list):
794 def __len__(self):
795 return list.__len__(self) + 1
796 def __getitem__(self, i):
797 if i == -1 or i == len(self) - 1:
798 return (0, 0, 0, -1, -1, -1, -1, nullid)
799 return list.__getitem__(self, i)
800
793 class revlogoldio(object):
801 class revlogoldio(object):
794 def __init__(self):
802 def __init__(self):
795 self.size = indexformatv0.size
803 self.size = indexformatv0.size
796
804
797 def parseindex(self, data, inline):
805 def parseindex(self, data, inline):
798 s = self.size
806 s = self.size
799 index = []
807 index = []
800 nodemap = {nullid: nullrev}
808 nodemap = {nullid: nullrev}
801 n = off = 0
809 n = off = 0
802 l = len(data)
810 l = len(data)
803 while off + s <= l:
811 while off + s <= l:
804 cur = data[off:off + s]
812 cur = data[off:off + s]
805 off += s
813 off += s
806 e = indexformatv0_unpack(cur)
814 e = indexformatv0_unpack(cur)
807 # transform to revlogv1 format
815 # transform to revlogv1 format
808 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
816 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
809 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
817 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
810 index.append(e2)
818 index.append(e2)
811 nodemap[e[6]] = n
819 nodemap[e[6]] = n
812 n += 1
820 n += 1
813
821
814 # add the magic null revision at -1
822 return revlogoldindex(index), nodemap, None
815 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
816
817 return index, nodemap, None
818
823
819 def packentry(self, entry, node, version, rev):
824 def packentry(self, entry, node, version, rev):
820 if gettype(entry[0]):
825 if gettype(entry[0]):
821 raise RevlogError(_('index entry flags need revlog version 1'))
826 raise RevlogError(_('index entry flags need revlog version 1'))
822 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
827 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
823 node(entry[5]), node(entry[6]), entry[7])
828 node(entry[5]), node(entry[6]), entry[7])
824 return indexformatv0_pack(*e2)
829 return indexformatv0_pack(*e2)
825
830
826 # index ng:
831 # index ng:
827 # 6 bytes: offset
832 # 6 bytes: offset
828 # 2 bytes: flags
833 # 2 bytes: flags
829 # 4 bytes: compressed length
834 # 4 bytes: compressed length
830 # 4 bytes: uncompressed length
835 # 4 bytes: uncompressed length
831 # 4 bytes: base rev
836 # 4 bytes: base rev
832 # 4 bytes: link rev
837 # 4 bytes: link rev
833 # 4 bytes: parent 1 rev
838 # 4 bytes: parent 1 rev
834 # 4 bytes: parent 2 rev
839 # 4 bytes: parent 2 rev
835 # 32 bytes: nodeid
840 # 32 bytes: nodeid
836 indexformatng = struct.Struct(">Qiiiiii20s12x")
841 indexformatng = struct.Struct(">Qiiiiii20s12x")
837 indexformatng_pack = indexformatng.pack
842 indexformatng_pack = indexformatng.pack
838 versionformat = struct.Struct(">I")
843 versionformat = struct.Struct(">I")
839 versionformat_pack = versionformat.pack
844 versionformat_pack = versionformat.pack
840 versionformat_unpack = versionformat.unpack
845 versionformat_unpack = versionformat.unpack
841
846
842 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
847 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
843 # signed integer)
848 # signed integer)
844 _maxentrysize = 0x7fffffff
849 _maxentrysize = 0x7fffffff
845
850
846 class revlogio(object):
851 class revlogio(object):
847 def __init__(self):
852 def __init__(self):
848 self.size = indexformatng.size
853 self.size = indexformatng.size
849
854
850 def parseindex(self, data, inline):
855 def parseindex(self, data, inline):
851 # call the C implementation to parse the index data
856 # call the C implementation to parse the index data
852 index, cache = parsers.parse_index2(data, inline)
857 index, cache = parsers.parse_index2(data, inline)
853 return index, getattr(index, 'nodemap', None), cache
858 return index, getattr(index, 'nodemap', None), cache
854
859
855 def packentry(self, entry, node, version, rev):
860 def packentry(self, entry, node, version, rev):
856 p = indexformatng_pack(*entry)
861 p = indexformatng_pack(*entry)
857 if rev == 0:
862 if rev == 0:
858 p = versionformat_pack(version) + p[4:]
863 p = versionformat_pack(version) + p[4:]
859 return p
864 return p
860
865
861 class revlog(object):
866 class revlog(object):
862 """
867 """
863 the underlying revision storage object
868 the underlying revision storage object
864
869
865 A revlog consists of two parts, an index and the revision data.
870 A revlog consists of two parts, an index and the revision data.
866
871
867 The index is a file with a fixed record size containing
872 The index is a file with a fixed record size containing
868 information on each revision, including its nodeid (hash), the
873 information on each revision, including its nodeid (hash), the
869 nodeids of its parents, the position and offset of its data within
874 nodeids of its parents, the position and offset of its data within
870 the data file, and the revision it's based on. Finally, each entry
875 the data file, and the revision it's based on. Finally, each entry
871 contains a linkrev entry that can serve as a pointer to external
876 contains a linkrev entry that can serve as a pointer to external
872 data.
877 data.
873
878
874 The revision data itself is a linear collection of data chunks.
879 The revision data itself is a linear collection of data chunks.
875 Each chunk represents a revision and is usually represented as a
880 Each chunk represents a revision and is usually represented as a
876 delta against the previous chunk. To bound lookup time, runs of
881 delta against the previous chunk. To bound lookup time, runs of
877 deltas are limited to about 2 times the length of the original
882 deltas are limited to about 2 times the length of the original
878 version data. This makes retrieval of a version proportional to
883 version data. This makes retrieval of a version proportional to
879 its size, or O(1) relative to the number of revisions.
884 its size, or O(1) relative to the number of revisions.
880
885
881 Both pieces of the revlog are written to in an append-only
886 Both pieces of the revlog are written to in an append-only
882 fashion, which means we never need to rewrite a file to insert or
887 fashion, which means we never need to rewrite a file to insert or
883 remove data, and can use some simple techniques to avoid the need
888 remove data, and can use some simple techniques to avoid the need
884 for locking while reading.
889 for locking while reading.
885
890
886 If checkambig, indexfile is opened with checkambig=True at
891 If checkambig, indexfile is opened with checkambig=True at
887 writing, to avoid file stat ambiguity.
892 writing, to avoid file stat ambiguity.
888
893
889 If mmaplargeindex is True, and an mmapindexthreshold is set, the
894 If mmaplargeindex is True, and an mmapindexthreshold is set, the
890 index will be mmapped rather than read if it is larger than the
895 index will be mmapped rather than read if it is larger than the
891 configured threshold.
896 configured threshold.
892
897
893 If censorable is True, the revlog can have censored revisions.
898 If censorable is True, the revlog can have censored revisions.
894 """
899 """
895 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
900 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
896 mmaplargeindex=False, censorable=False):
901 mmaplargeindex=False, censorable=False):
897 """
902 """
898 create a revlog object
903 create a revlog object
899
904
900 opener is a function that abstracts the file opening operation
905 opener is a function that abstracts the file opening operation
901 and can be used to implement COW semantics or the like.
906 and can be used to implement COW semantics or the like.
902 """
907 """
903 self.indexfile = indexfile
908 self.indexfile = indexfile
904 self.datafile = datafile or (indexfile[:-2] + ".d")
909 self.datafile = datafile or (indexfile[:-2] + ".d")
905 self.opener = opener
910 self.opener = opener
906 # When True, indexfile is opened with checkambig=True at writing, to
911 # When True, indexfile is opened with checkambig=True at writing, to
907 # avoid file stat ambiguity.
912 # avoid file stat ambiguity.
908 self._checkambig = checkambig
913 self._checkambig = checkambig
909 self._censorable = censorable
914 self._censorable = censorable
910 # 3-tuple of (node, rev, text) for a raw revision.
915 # 3-tuple of (node, rev, text) for a raw revision.
911 self._cache = None
916 self._cache = None
912 # Maps rev to chain base rev.
917 # Maps rev to chain base rev.
913 self._chainbasecache = util.lrucachedict(100)
918 self._chainbasecache = util.lrucachedict(100)
914 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
919 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
915 self._chunkcache = (0, '')
920 self._chunkcache = (0, '')
916 # How much data to read and cache into the raw revlog data cache.
921 # How much data to read and cache into the raw revlog data cache.
917 self._chunkcachesize = 65536
922 self._chunkcachesize = 65536
918 self._maxchainlen = None
923 self._maxchainlen = None
919 self._deltabothparents = True
924 self._deltabothparents = True
920 self.index = []
925 self.index = []
921 # Mapping of partial identifiers to full nodes.
926 # Mapping of partial identifiers to full nodes.
922 self._pcache = {}
927 self._pcache = {}
923 # Mapping of revision integer to full node.
928 # Mapping of revision integer to full node.
924 self._nodecache = {nullid: nullrev}
929 self._nodecache = {nullid: nullrev}
925 self._nodepos = None
930 self._nodepos = None
926 self._compengine = 'zlib'
931 self._compengine = 'zlib'
927 self._maxdeltachainspan = -1
932 self._maxdeltachainspan = -1
928 self._withsparseread = False
933 self._withsparseread = False
929 self._sparserevlog = False
934 self._sparserevlog = False
930 self._srdensitythreshold = 0.50
935 self._srdensitythreshold = 0.50
931 self._srmingapsize = 262144
936 self._srmingapsize = 262144
932
937
933 mmapindexthreshold = None
938 mmapindexthreshold = None
934 v = REVLOG_DEFAULT_VERSION
939 v = REVLOG_DEFAULT_VERSION
935 opts = getattr(opener, 'options', None)
940 opts = getattr(opener, 'options', None)
936 if opts is not None:
941 if opts is not None:
937 if 'revlogv2' in opts:
942 if 'revlogv2' in opts:
938 # version 2 revlogs always use generaldelta.
943 # version 2 revlogs always use generaldelta.
939 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
944 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
940 elif 'revlogv1' in opts:
945 elif 'revlogv1' in opts:
941 if 'generaldelta' in opts:
946 if 'generaldelta' in opts:
942 v |= FLAG_GENERALDELTA
947 v |= FLAG_GENERALDELTA
943 else:
948 else:
944 v = 0
949 v = 0
945 if 'chunkcachesize' in opts:
950 if 'chunkcachesize' in opts:
946 self._chunkcachesize = opts['chunkcachesize']
951 self._chunkcachesize = opts['chunkcachesize']
947 if 'maxchainlen' in opts:
952 if 'maxchainlen' in opts:
948 self._maxchainlen = opts['maxchainlen']
953 self._maxchainlen = opts['maxchainlen']
949 if 'deltabothparents' in opts:
954 if 'deltabothparents' in opts:
950 self._deltabothparents = opts['deltabothparents']
955 self._deltabothparents = opts['deltabothparents']
951 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
956 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
952 if 'compengine' in opts:
957 if 'compengine' in opts:
953 self._compengine = opts['compengine']
958 self._compengine = opts['compengine']
954 if 'maxdeltachainspan' in opts:
959 if 'maxdeltachainspan' in opts:
955 self._maxdeltachainspan = opts['maxdeltachainspan']
960 self._maxdeltachainspan = opts['maxdeltachainspan']
956 if mmaplargeindex and 'mmapindexthreshold' in opts:
961 if mmaplargeindex and 'mmapindexthreshold' in opts:
957 mmapindexthreshold = opts['mmapindexthreshold']
962 mmapindexthreshold = opts['mmapindexthreshold']
958 self._sparserevlog = bool(opts.get('sparse-revlog', False))
963 self._sparserevlog = bool(opts.get('sparse-revlog', False))
959 withsparseread = bool(opts.get('with-sparse-read', False))
964 withsparseread = bool(opts.get('with-sparse-read', False))
960 # sparse-revlog forces sparse-read
965 # sparse-revlog forces sparse-read
961 self._withsparseread = self._sparserevlog or withsparseread
966 self._withsparseread = self._sparserevlog or withsparseread
962 if 'sparse-read-density-threshold' in opts:
967 if 'sparse-read-density-threshold' in opts:
963 self._srdensitythreshold = opts['sparse-read-density-threshold']
968 self._srdensitythreshold = opts['sparse-read-density-threshold']
964 if 'sparse-read-min-gap-size' in opts:
969 if 'sparse-read-min-gap-size' in opts:
965 self._srmingapsize = opts['sparse-read-min-gap-size']
970 self._srmingapsize = opts['sparse-read-min-gap-size']
966
971
967 if self._chunkcachesize <= 0:
972 if self._chunkcachesize <= 0:
968 raise RevlogError(_('revlog chunk cache size %r is not greater '
973 raise RevlogError(_('revlog chunk cache size %r is not greater '
969 'than 0') % self._chunkcachesize)
974 'than 0') % self._chunkcachesize)
970 elif self._chunkcachesize & (self._chunkcachesize - 1):
975 elif self._chunkcachesize & (self._chunkcachesize - 1):
971 raise RevlogError(_('revlog chunk cache size %r is not a power '
976 raise RevlogError(_('revlog chunk cache size %r is not a power '
972 'of 2') % self._chunkcachesize)
977 'of 2') % self._chunkcachesize)
973
978
974 indexdata = ''
979 indexdata = ''
975 self._initempty = True
980 self._initempty = True
976 try:
981 try:
977 with self._indexfp() as f:
982 with self._indexfp() as f:
978 if (mmapindexthreshold is not None and
983 if (mmapindexthreshold is not None and
979 self.opener.fstat(f).st_size >= mmapindexthreshold):
984 self.opener.fstat(f).st_size >= mmapindexthreshold):
980 indexdata = util.buffer(util.mmapread(f))
985 indexdata = util.buffer(util.mmapread(f))
981 else:
986 else:
982 indexdata = f.read()
987 indexdata = f.read()
983 if len(indexdata) > 0:
988 if len(indexdata) > 0:
984 v = versionformat_unpack(indexdata[:4])[0]
989 v = versionformat_unpack(indexdata[:4])[0]
985 self._initempty = False
990 self._initempty = False
986 except IOError as inst:
991 except IOError as inst:
987 if inst.errno != errno.ENOENT:
992 if inst.errno != errno.ENOENT:
988 raise
993 raise
989
994
990 self.version = v
995 self.version = v
991 self._inline = v & FLAG_INLINE_DATA
996 self._inline = v & FLAG_INLINE_DATA
992 self._generaldelta = v & FLAG_GENERALDELTA
997 self._generaldelta = v & FLAG_GENERALDELTA
993 flags = v & ~0xFFFF
998 flags = v & ~0xFFFF
994 fmt = v & 0xFFFF
999 fmt = v & 0xFFFF
995 if fmt == REVLOGV0:
1000 if fmt == REVLOGV0:
996 if flags:
1001 if flags:
997 raise RevlogError(_('unknown flags (%#04x) in version %d '
1002 raise RevlogError(_('unknown flags (%#04x) in version %d '
998 'revlog %s') %
1003 'revlog %s') %
999 (flags >> 16, fmt, self.indexfile))
1004 (flags >> 16, fmt, self.indexfile))
1000 elif fmt == REVLOGV1:
1005 elif fmt == REVLOGV1:
1001 if flags & ~REVLOGV1_FLAGS:
1006 if flags & ~REVLOGV1_FLAGS:
1002 raise RevlogError(_('unknown flags (%#04x) in version %d '
1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
1003 'revlog %s') %
1008 'revlog %s') %
1004 (flags >> 16, fmt, self.indexfile))
1009 (flags >> 16, fmt, self.indexfile))
1005 elif fmt == REVLOGV2:
1010 elif fmt == REVLOGV2:
1006 if flags & ~REVLOGV2_FLAGS:
1011 if flags & ~REVLOGV2_FLAGS:
1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
1012 raise RevlogError(_('unknown flags (%#04x) in version %d '
1008 'revlog %s') %
1013 'revlog %s') %
1009 (flags >> 16, fmt, self.indexfile))
1014 (flags >> 16, fmt, self.indexfile))
1010 else:
1015 else:
1011 raise RevlogError(_('unknown version (%d) in revlog %s') %
1016 raise RevlogError(_('unknown version (%d) in revlog %s') %
1012 (fmt, self.indexfile))
1017 (fmt, self.indexfile))
1013
1018
1014 self.storedeltachains = True
1019 self.storedeltachains = True
1015
1020
1016 self._io = revlogio()
1021 self._io = revlogio()
1017 if self.version == REVLOGV0:
1022 if self.version == REVLOGV0:
1018 self._io = revlogoldio()
1023 self._io = revlogoldio()
1019 try:
1024 try:
1020 d = self._io.parseindex(indexdata, self._inline)
1025 d = self._io.parseindex(indexdata, self._inline)
1021 except (ValueError, IndexError):
1026 except (ValueError, IndexError):
1022 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1027 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1023 self.index, nodemap, self._chunkcache = d
1028 self.index, nodemap, self._chunkcache = d
1024 if nodemap is not None:
1029 if nodemap is not None:
1025 self.nodemap = self._nodecache = nodemap
1030 self.nodemap = self._nodecache = nodemap
1026 if not self._chunkcache:
1031 if not self._chunkcache:
1027 self._chunkclear()
1032 self._chunkclear()
1028 # revnum -> (chain-length, sum-delta-length)
1033 # revnum -> (chain-length, sum-delta-length)
1029 self._chaininfocache = {}
1034 self._chaininfocache = {}
1030 # revlog header -> revlog compressor
1035 # revlog header -> revlog compressor
1031 self._decompressors = {}
1036 self._decompressors = {}
1032
1037
1033 @util.propertycache
1038 @util.propertycache
1034 def _compressor(self):
1039 def _compressor(self):
1035 return util.compengines[self._compengine].revlogcompressor()
1040 return util.compengines[self._compengine].revlogcompressor()
1036
1041
1037 def _indexfp(self, mode='r'):
1042 def _indexfp(self, mode='r'):
1038 """file object for the revlog's index file"""
1043 """file object for the revlog's index file"""
1039 args = {r'mode': mode}
1044 args = {r'mode': mode}
1040 if mode != 'r':
1045 if mode != 'r':
1041 args[r'checkambig'] = self._checkambig
1046 args[r'checkambig'] = self._checkambig
1042 if mode == 'w':
1047 if mode == 'w':
1043 args[r'atomictemp'] = True
1048 args[r'atomictemp'] = True
1044 return self.opener(self.indexfile, **args)
1049 return self.opener(self.indexfile, **args)
1045
1050
1046 def _datafp(self, mode='r'):
1051 def _datafp(self, mode='r'):
1047 """file object for the revlog's data file"""
1052 """file object for the revlog's data file"""
1048 return self.opener(self.datafile, mode=mode)
1053 return self.opener(self.datafile, mode=mode)
1049
1054
1050 @contextlib.contextmanager
1055 @contextlib.contextmanager
1051 def _datareadfp(self, existingfp=None):
1056 def _datareadfp(self, existingfp=None):
1052 """file object suitable to read data"""
1057 """file object suitable to read data"""
1053 if existingfp is not None:
1058 if existingfp is not None:
1054 yield existingfp
1059 yield existingfp
1055 else:
1060 else:
1056 if self._inline:
1061 if self._inline:
1057 func = self._indexfp
1062 func = self._indexfp
1058 else:
1063 else:
1059 func = self._datafp
1064 func = self._datafp
1060 with func() as fp:
1065 with func() as fp:
1061 yield fp
1066 yield fp
1062
1067
1063 def tip(self):
1068 def tip(self):
1064 return self.node(len(self.index) - 2)
1069 return self.node(len(self.index) - 2)
1065 def __contains__(self, rev):
1070 def __contains__(self, rev):
1066 return 0 <= rev < len(self)
1071 return 0 <= rev < len(self)
1067 def __len__(self):
1072 def __len__(self):
1068 return len(self.index) - 1
1073 return len(self.index) - 1
1069 def __iter__(self):
1074 def __iter__(self):
1070 return iter(pycompat.xrange(len(self)))
1075 return iter(pycompat.xrange(len(self)))
1071 def revs(self, start=0, stop=None):
1076 def revs(self, start=0, stop=None):
1072 """iterate over all rev in this revlog (from start to stop)"""
1077 """iterate over all rev in this revlog (from start to stop)"""
1073 step = 1
1078 step = 1
1074 length = len(self)
1079 length = len(self)
1075 if stop is not None:
1080 if stop is not None:
1076 if start > stop:
1081 if start > stop:
1077 step = -1
1082 step = -1
1078 stop += step
1083 stop += step
1079 if stop > length:
1084 if stop > length:
1080 stop = length
1085 stop = length
1081 else:
1086 else:
1082 stop = length
1087 stop = length
1083 return pycompat.xrange(start, stop, step)
1088 return pycompat.xrange(start, stop, step)
1084
1089
1085 @util.propertycache
1090 @util.propertycache
1086 def nodemap(self):
1091 def nodemap(self):
1087 self.rev(self.node(0))
1092 self.rev(self.node(0))
1088 return self._nodecache
1093 return self._nodecache
1089
1094
1090 def hasnode(self, node):
1095 def hasnode(self, node):
1091 try:
1096 try:
1092 self.rev(node)
1097 self.rev(node)
1093 return True
1098 return True
1094 except KeyError:
1099 except KeyError:
1095 return False
1100 return False
1096
1101
1097 def candelta(self, baserev, rev):
1102 def candelta(self, baserev, rev):
1098 """whether two revisions (baserev, rev) can be delta-ed or not"""
1103 """whether two revisions (baserev, rev) can be delta-ed or not"""
1099 # Disable delta if either rev requires a content-changing flag
1104 # Disable delta if either rev requires a content-changing flag
1100 # processor (ex. LFS). This is because such flag processor can alter
1105 # processor (ex. LFS). This is because such flag processor can alter
1101 # the rawtext content that the delta will be based on, and two clients
1106 # the rawtext content that the delta will be based on, and two clients
1102 # could have a same revlog node with different flags (i.e. different
1107 # could have a same revlog node with different flags (i.e. different
1103 # rawtext contents) and the delta could be incompatible.
1108 # rawtext contents) and the delta could be incompatible.
1104 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1109 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1105 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1110 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1106 return False
1111 return False
1107 return True
1112 return True
1108
1113
1109 def clearcaches(self):
1114 def clearcaches(self):
1110 self._cache = None
1115 self._cache = None
1111 self._chainbasecache.clear()
1116 self._chainbasecache.clear()
1112 self._chunkcache = (0, '')
1117 self._chunkcache = (0, '')
1113 self._pcache = {}
1118 self._pcache = {}
1114
1119
1115 try:
1120 try:
1116 self._nodecache.clearcaches()
1121 self._nodecache.clearcaches()
1117 except AttributeError:
1122 except AttributeError:
1118 self._nodecache = {nullid: nullrev}
1123 self._nodecache = {nullid: nullrev}
1119 self._nodepos = None
1124 self._nodepos = None
1120
1125
1121 def rev(self, node):
1126 def rev(self, node):
1122 try:
1127 try:
1123 return self._nodecache[node]
1128 return self._nodecache[node]
1124 except TypeError:
1129 except TypeError:
1125 raise
1130 raise
1126 except RevlogError:
1131 except RevlogError:
1127 # parsers.c radix tree lookup failed
1132 # parsers.c radix tree lookup failed
1128 if node == wdirid or node in wdirfilenodeids:
1133 if node == wdirid or node in wdirfilenodeids:
1129 raise error.WdirUnsupported
1134 raise error.WdirUnsupported
1130 raise LookupError(node, self.indexfile, _('no node'))
1135 raise LookupError(node, self.indexfile, _('no node'))
1131 except KeyError:
1136 except KeyError:
1132 # pure python cache lookup failed
1137 # pure python cache lookup failed
1133 n = self._nodecache
1138 n = self._nodecache
1134 i = self.index
1139 i = self.index
1135 p = self._nodepos
1140 p = self._nodepos
1136 if p is None:
1141 if p is None:
1137 p = len(i) - 2
1142 p = len(i) - 2
1138 else:
1143 else:
1139 assert p < len(i)
1144 assert p < len(i)
1140 for r in pycompat.xrange(p, -1, -1):
1145 for r in pycompat.xrange(p, -1, -1):
1141 v = i[r][7]
1146 v = i[r][7]
1142 n[v] = r
1147 n[v] = r
1143 if v == node:
1148 if v == node:
1144 self._nodepos = r - 1
1149 self._nodepos = r - 1
1145 return r
1150 return r
1146 if node == wdirid or node in wdirfilenodeids:
1151 if node == wdirid or node in wdirfilenodeids:
1147 raise error.WdirUnsupported
1152 raise error.WdirUnsupported
1148 raise LookupError(node, self.indexfile, _('no node'))
1153 raise LookupError(node, self.indexfile, _('no node'))
1149
1154
1150 # Accessors for index entries.
1155 # Accessors for index entries.
1151
1156
1152 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1157 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1153 # are flags.
1158 # are flags.
1154 def start(self, rev):
1159 def start(self, rev):
1155 return int(self.index[rev][0] >> 16)
1160 return int(self.index[rev][0] >> 16)
1156
1161
1157 def flags(self, rev):
1162 def flags(self, rev):
1158 return self.index[rev][0] & 0xFFFF
1163 return self.index[rev][0] & 0xFFFF
1159
1164
1160 def length(self, rev):
1165 def length(self, rev):
1161 return self.index[rev][1]
1166 return self.index[rev][1]
1162
1167
1163 def rawsize(self, rev):
1168 def rawsize(self, rev):
1164 """return the length of the uncompressed text for a given revision"""
1169 """return the length of the uncompressed text for a given revision"""
1165 l = self.index[rev][2]
1170 l = self.index[rev][2]
1166 if l >= 0:
1171 if l >= 0:
1167 return l
1172 return l
1168
1173
1169 t = self.revision(rev, raw=True)
1174 t = self.revision(rev, raw=True)
1170 return len(t)
1175 return len(t)
1171
1176
1172 def size(self, rev):
1177 def size(self, rev):
1173 """length of non-raw text (processed by a "read" flag processor)"""
1178 """length of non-raw text (processed by a "read" flag processor)"""
1174 # fast path: if no "read" flag processor could change the content,
1179 # fast path: if no "read" flag processor could change the content,
1175 # size is rawsize. note: ELLIPSIS is known to not change the content.
1180 # size is rawsize. note: ELLIPSIS is known to not change the content.
1176 flags = self.flags(rev)
1181 flags = self.flags(rev)
1177 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1182 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1178 return self.rawsize(rev)
1183 return self.rawsize(rev)
1179
1184
1180 return len(self.revision(rev, raw=False))
1185 return len(self.revision(rev, raw=False))
1181
1186
1182 def chainbase(self, rev):
1187 def chainbase(self, rev):
1183 base = self._chainbasecache.get(rev)
1188 base = self._chainbasecache.get(rev)
1184 if base is not None:
1189 if base is not None:
1185 return base
1190 return base
1186
1191
1187 index = self.index
1192 index = self.index
1188 iterrev = rev
1193 iterrev = rev
1189 base = index[iterrev][3]
1194 base = index[iterrev][3]
1190 while base != iterrev:
1195 while base != iterrev:
1191 iterrev = base
1196 iterrev = base
1192 base = index[iterrev][3]
1197 base = index[iterrev][3]
1193
1198
1194 self._chainbasecache[rev] = base
1199 self._chainbasecache[rev] = base
1195 return base
1200 return base
1196
1201
1197 def linkrev(self, rev):
1202 def linkrev(self, rev):
1198 return self.index[rev][4]
1203 return self.index[rev][4]
1199
1204
1200 def parentrevs(self, rev):
1205 def parentrevs(self, rev):
1201 try:
1206 try:
1202 entry = self.index[rev]
1207 entry = self.index[rev]
1203 except IndexError:
1208 except IndexError:
1204 if rev == wdirrev:
1209 if rev == wdirrev:
1205 raise error.WdirUnsupported
1210 raise error.WdirUnsupported
1206 raise
1211 raise
1207
1212
1208 return entry[5], entry[6]
1213 return entry[5], entry[6]
1209
1214
1210 def node(self, rev):
1215 def node(self, rev):
1211 try:
1216 try:
1212 return self.index[rev][7]
1217 return self.index[rev][7]
1213 except IndexError:
1218 except IndexError:
1214 if rev == wdirrev:
1219 if rev == wdirrev:
1215 raise error.WdirUnsupported
1220 raise error.WdirUnsupported
1216 raise
1221 raise
1217
1222
1218 # Derived from index values.
1223 # Derived from index values.
1219
1224
1220 def end(self, rev):
1225 def end(self, rev):
1221 return self.start(rev) + self.length(rev)
1226 return self.start(rev) + self.length(rev)
1222
1227
1223 def parents(self, node):
1228 def parents(self, node):
1224 i = self.index
1229 i = self.index
1225 d = i[self.rev(node)]
1230 d = i[self.rev(node)]
1226 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1231 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1227
1232
1228 def chainlen(self, rev):
1233 def chainlen(self, rev):
1229 return self._chaininfo(rev)[0]
1234 return self._chaininfo(rev)[0]
1230
1235
1231 def _chaininfo(self, rev):
1236 def _chaininfo(self, rev):
1232 chaininfocache = self._chaininfocache
1237 chaininfocache = self._chaininfocache
1233 if rev in chaininfocache:
1238 if rev in chaininfocache:
1234 return chaininfocache[rev]
1239 return chaininfocache[rev]
1235 index = self.index
1240 index = self.index
1236 generaldelta = self._generaldelta
1241 generaldelta = self._generaldelta
1237 iterrev = rev
1242 iterrev = rev
1238 e = index[iterrev]
1243 e = index[iterrev]
1239 clen = 0
1244 clen = 0
1240 compresseddeltalen = 0
1245 compresseddeltalen = 0
1241 while iterrev != e[3]:
1246 while iterrev != e[3]:
1242 clen += 1
1247 clen += 1
1243 compresseddeltalen += e[1]
1248 compresseddeltalen += e[1]
1244 if generaldelta:
1249 if generaldelta:
1245 iterrev = e[3]
1250 iterrev = e[3]
1246 else:
1251 else:
1247 iterrev -= 1
1252 iterrev -= 1
1248 if iterrev in chaininfocache:
1253 if iterrev in chaininfocache:
1249 t = chaininfocache[iterrev]
1254 t = chaininfocache[iterrev]
1250 clen += t[0]
1255 clen += t[0]
1251 compresseddeltalen += t[1]
1256 compresseddeltalen += t[1]
1252 break
1257 break
1253 e = index[iterrev]
1258 e = index[iterrev]
1254 else:
1259 else:
1255 # Add text length of base since decompressing that also takes
1260 # Add text length of base since decompressing that also takes
1256 # work. For cache hits the length is already included.
1261 # work. For cache hits the length is already included.
1257 compresseddeltalen += e[1]
1262 compresseddeltalen += e[1]
1258 r = (clen, compresseddeltalen)
1263 r = (clen, compresseddeltalen)
1259 chaininfocache[rev] = r
1264 chaininfocache[rev] = r
1260 return r
1265 return r
1261
1266
1262 def _deltachain(self, rev, stoprev=None):
1267 def _deltachain(self, rev, stoprev=None):
1263 """Obtain the delta chain for a revision.
1268 """Obtain the delta chain for a revision.
1264
1269
1265 ``stoprev`` specifies a revision to stop at. If not specified, we
1270 ``stoprev`` specifies a revision to stop at. If not specified, we
1266 stop at the base of the chain.
1271 stop at the base of the chain.
1267
1272
1268 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1273 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1269 revs in ascending order and ``stopped`` is a bool indicating whether
1274 revs in ascending order and ``stopped`` is a bool indicating whether
1270 ``stoprev`` was hit.
1275 ``stoprev`` was hit.
1271 """
1276 """
1272 # Try C implementation.
1277 # Try C implementation.
1273 try:
1278 try:
1274 return self.index.deltachain(rev, stoprev, self._generaldelta)
1279 return self.index.deltachain(rev, stoprev, self._generaldelta)
1275 except AttributeError:
1280 except AttributeError:
1276 pass
1281 pass
1277
1282
1278 chain = []
1283 chain = []
1279
1284
1280 # Alias to prevent attribute lookup in tight loop.
1285 # Alias to prevent attribute lookup in tight loop.
1281 index = self.index
1286 index = self.index
1282 generaldelta = self._generaldelta
1287 generaldelta = self._generaldelta
1283
1288
1284 iterrev = rev
1289 iterrev = rev
1285 e = index[iterrev]
1290 e = index[iterrev]
1286 while iterrev != e[3] and iterrev != stoprev:
1291 while iterrev != e[3] and iterrev != stoprev:
1287 chain.append(iterrev)
1292 chain.append(iterrev)
1288 if generaldelta:
1293 if generaldelta:
1289 iterrev = e[3]
1294 iterrev = e[3]
1290 else:
1295 else:
1291 iterrev -= 1
1296 iterrev -= 1
1292 e = index[iterrev]
1297 e = index[iterrev]
1293
1298
1294 if iterrev == stoprev:
1299 if iterrev == stoprev:
1295 stopped = True
1300 stopped = True
1296 else:
1301 else:
1297 chain.append(iterrev)
1302 chain.append(iterrev)
1298 stopped = False
1303 stopped = False
1299
1304
1300 chain.reverse()
1305 chain.reverse()
1301 return chain, stopped
1306 return chain, stopped
1302
1307
1303 def ancestors(self, revs, stoprev=0, inclusive=False):
1308 def ancestors(self, revs, stoprev=0, inclusive=False):
1304 """Generate the ancestors of 'revs' in reverse topological order.
1309 """Generate the ancestors of 'revs' in reverse topological order.
1305 Does not generate revs lower than stoprev.
1310 Does not generate revs lower than stoprev.
1306
1311
1307 See the documentation for ancestor.lazyancestors for more details."""
1312 See the documentation for ancestor.lazyancestors for more details."""
1308
1313
1309 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1314 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1310 inclusive=inclusive)
1315 inclusive=inclusive)
1311
1316
1312 def descendants(self, revs):
1317 def descendants(self, revs):
1313 """Generate the descendants of 'revs' in revision order.
1318 """Generate the descendants of 'revs' in revision order.
1314
1319
1315 Yield a sequence of revision numbers starting with a child of
1320 Yield a sequence of revision numbers starting with a child of
1316 some rev in revs, i.e., each revision is *not* considered a
1321 some rev in revs, i.e., each revision is *not* considered a
1317 descendant of itself. Results are ordered by revision number (a
1322 descendant of itself. Results are ordered by revision number (a
1318 topological sort)."""
1323 topological sort)."""
1319 first = min(revs)
1324 first = min(revs)
1320 if first == nullrev:
1325 if first == nullrev:
1321 for i in self:
1326 for i in self:
1322 yield i
1327 yield i
1323 return
1328 return
1324
1329
1325 seen = set(revs)
1330 seen = set(revs)
1326 for i in self.revs(start=first + 1):
1331 for i in self.revs(start=first + 1):
1327 for x in self.parentrevs(i):
1332 for x in self.parentrevs(i):
1328 if x != nullrev and x in seen:
1333 if x != nullrev and x in seen:
1329 seen.add(i)
1334 seen.add(i)
1330 yield i
1335 yield i
1331 break
1336 break
1332
1337
1333 def findcommonmissing(self, common=None, heads=None):
1338 def findcommonmissing(self, common=None, heads=None):
1334 """Return a tuple of the ancestors of common and the ancestors of heads
1339 """Return a tuple of the ancestors of common and the ancestors of heads
1335 that are not ancestors of common. In revset terminology, we return the
1340 that are not ancestors of common. In revset terminology, we return the
1336 tuple:
1341 tuple:
1337
1342
1338 ::common, (::heads) - (::common)
1343 ::common, (::heads) - (::common)
1339
1344
1340 The list is sorted by revision number, meaning it is
1345 The list is sorted by revision number, meaning it is
1341 topologically sorted.
1346 topologically sorted.
1342
1347
1343 'heads' and 'common' are both lists of node IDs. If heads is
1348 'heads' and 'common' are both lists of node IDs. If heads is
1344 not supplied, uses all of the revlog's heads. If common is not
1349 not supplied, uses all of the revlog's heads. If common is not
1345 supplied, uses nullid."""
1350 supplied, uses nullid."""
1346 if common is None:
1351 if common is None:
1347 common = [nullid]
1352 common = [nullid]
1348 if heads is None:
1353 if heads is None:
1349 heads = self.heads()
1354 heads = self.heads()
1350
1355
1351 common = [self.rev(n) for n in common]
1356 common = [self.rev(n) for n in common]
1352 heads = [self.rev(n) for n in heads]
1357 heads = [self.rev(n) for n in heads]
1353
1358
1354 # we want the ancestors, but inclusive
1359 # we want the ancestors, but inclusive
1355 class lazyset(object):
1360 class lazyset(object):
1356 def __init__(self, lazyvalues):
1361 def __init__(self, lazyvalues):
1357 self.addedvalues = set()
1362 self.addedvalues = set()
1358 self.lazyvalues = lazyvalues
1363 self.lazyvalues = lazyvalues
1359
1364
1360 def __contains__(self, value):
1365 def __contains__(self, value):
1361 return value in self.addedvalues or value in self.lazyvalues
1366 return value in self.addedvalues or value in self.lazyvalues
1362
1367
1363 def __iter__(self):
1368 def __iter__(self):
1364 added = self.addedvalues
1369 added = self.addedvalues
1365 for r in added:
1370 for r in added:
1366 yield r
1371 yield r
1367 for r in self.lazyvalues:
1372 for r in self.lazyvalues:
1368 if not r in added:
1373 if not r in added:
1369 yield r
1374 yield r
1370
1375
1371 def add(self, value):
1376 def add(self, value):
1372 self.addedvalues.add(value)
1377 self.addedvalues.add(value)
1373
1378
1374 def update(self, values):
1379 def update(self, values):
1375 self.addedvalues.update(values)
1380 self.addedvalues.update(values)
1376
1381
1377 has = lazyset(self.ancestors(common))
1382 has = lazyset(self.ancestors(common))
1378 has.add(nullrev)
1383 has.add(nullrev)
1379 has.update(common)
1384 has.update(common)
1380
1385
1381 # take all ancestors from heads that aren't in has
1386 # take all ancestors from heads that aren't in has
1382 missing = set()
1387 missing = set()
1383 visit = collections.deque(r for r in heads if r not in has)
1388 visit = collections.deque(r for r in heads if r not in has)
1384 while visit:
1389 while visit:
1385 r = visit.popleft()
1390 r = visit.popleft()
1386 if r in missing:
1391 if r in missing:
1387 continue
1392 continue
1388 else:
1393 else:
1389 missing.add(r)
1394 missing.add(r)
1390 for p in self.parentrevs(r):
1395 for p in self.parentrevs(r):
1391 if p not in has:
1396 if p not in has:
1392 visit.append(p)
1397 visit.append(p)
1393 missing = list(missing)
1398 missing = list(missing)
1394 missing.sort()
1399 missing.sort()
1395 return has, [self.node(miss) for miss in missing]
1400 return has, [self.node(miss) for miss in missing]
1396
1401
1397 def incrementalmissingrevs(self, common=None):
1402 def incrementalmissingrevs(self, common=None):
1398 """Return an object that can be used to incrementally compute the
1403 """Return an object that can be used to incrementally compute the
1399 revision numbers of the ancestors of arbitrary sets that are not
1404 revision numbers of the ancestors of arbitrary sets that are not
1400 ancestors of common. This is an ancestor.incrementalmissingancestors
1405 ancestors of common. This is an ancestor.incrementalmissingancestors
1401 object.
1406 object.
1402
1407
1403 'common' is a list of revision numbers. If common is not supplied, uses
1408 'common' is a list of revision numbers. If common is not supplied, uses
1404 nullrev.
1409 nullrev.
1405 """
1410 """
1406 if common is None:
1411 if common is None:
1407 common = [nullrev]
1412 common = [nullrev]
1408
1413
1409 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1414 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1410
1415
1411 def findmissingrevs(self, common=None, heads=None):
1416 def findmissingrevs(self, common=None, heads=None):
1412 """Return the revision numbers of the ancestors of heads that
1417 """Return the revision numbers of the ancestors of heads that
1413 are not ancestors of common.
1418 are not ancestors of common.
1414
1419
1415 More specifically, return a list of revision numbers corresponding to
1420 More specifically, return a list of revision numbers corresponding to
1416 nodes N such that every N satisfies the following constraints:
1421 nodes N such that every N satisfies the following constraints:
1417
1422
1418 1. N is an ancestor of some node in 'heads'
1423 1. N is an ancestor of some node in 'heads'
1419 2. N is not an ancestor of any node in 'common'
1424 2. N is not an ancestor of any node in 'common'
1420
1425
1421 The list is sorted by revision number, meaning it is
1426 The list is sorted by revision number, meaning it is
1422 topologically sorted.
1427 topologically sorted.
1423
1428
1424 'heads' and 'common' are both lists of revision numbers. If heads is
1429 'heads' and 'common' are both lists of revision numbers. If heads is
1425 not supplied, uses all of the revlog's heads. If common is not
1430 not supplied, uses all of the revlog's heads. If common is not
1426 supplied, uses nullid."""
1431 supplied, uses nullid."""
1427 if common is None:
1432 if common is None:
1428 common = [nullrev]
1433 common = [nullrev]
1429 if heads is None:
1434 if heads is None:
1430 heads = self.headrevs()
1435 heads = self.headrevs()
1431
1436
1432 inc = self.incrementalmissingrevs(common=common)
1437 inc = self.incrementalmissingrevs(common=common)
1433 return inc.missingancestors(heads)
1438 return inc.missingancestors(heads)
1434
1439
1435 def findmissing(self, common=None, heads=None):
1440 def findmissing(self, common=None, heads=None):
1436 """Return the ancestors of heads that are not ancestors of common.
1441 """Return the ancestors of heads that are not ancestors of common.
1437
1442
1438 More specifically, return a list of nodes N such that every N
1443 More specifically, return a list of nodes N such that every N
1439 satisfies the following constraints:
1444 satisfies the following constraints:
1440
1445
1441 1. N is an ancestor of some node in 'heads'
1446 1. N is an ancestor of some node in 'heads'
1442 2. N is not an ancestor of any node in 'common'
1447 2. N is not an ancestor of any node in 'common'
1443
1448
1444 The list is sorted by revision number, meaning it is
1449 The list is sorted by revision number, meaning it is
1445 topologically sorted.
1450 topologically sorted.
1446
1451
1447 'heads' and 'common' are both lists of node IDs. If heads is
1452 'heads' and 'common' are both lists of node IDs. If heads is
1448 not supplied, uses all of the revlog's heads. If common is not
1453 not supplied, uses all of the revlog's heads. If common is not
1449 supplied, uses nullid."""
1454 supplied, uses nullid."""
1450 if common is None:
1455 if common is None:
1451 common = [nullid]
1456 common = [nullid]
1452 if heads is None:
1457 if heads is None:
1453 heads = self.heads()
1458 heads = self.heads()
1454
1459
1455 common = [self.rev(n) for n in common]
1460 common = [self.rev(n) for n in common]
1456 heads = [self.rev(n) for n in heads]
1461 heads = [self.rev(n) for n in heads]
1457
1462
1458 inc = self.incrementalmissingrevs(common=common)
1463 inc = self.incrementalmissingrevs(common=common)
1459 return [self.node(r) for r in inc.missingancestors(heads)]
1464 return [self.node(r) for r in inc.missingancestors(heads)]
1460
1465
1461 def nodesbetween(self, roots=None, heads=None):
1466 def nodesbetween(self, roots=None, heads=None):
1462 """Return a topological path from 'roots' to 'heads'.
1467 """Return a topological path from 'roots' to 'heads'.
1463
1468
1464 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1469 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1465 topologically sorted list of all nodes N that satisfy both of
1470 topologically sorted list of all nodes N that satisfy both of
1466 these constraints:
1471 these constraints:
1467
1472
1468 1. N is a descendant of some node in 'roots'
1473 1. N is a descendant of some node in 'roots'
1469 2. N is an ancestor of some node in 'heads'
1474 2. N is an ancestor of some node in 'heads'
1470
1475
1471 Every node is considered to be both a descendant and an ancestor
1476 Every node is considered to be both a descendant and an ancestor
1472 of itself, so every reachable node in 'roots' and 'heads' will be
1477 of itself, so every reachable node in 'roots' and 'heads' will be
1473 included in 'nodes'.
1478 included in 'nodes'.
1474
1479
1475 'outroots' is the list of reachable nodes in 'roots', i.e., the
1480 'outroots' is the list of reachable nodes in 'roots', i.e., the
1476 subset of 'roots' that is returned in 'nodes'. Likewise,
1481 subset of 'roots' that is returned in 'nodes'. Likewise,
1477 'outheads' is the subset of 'heads' that is also in 'nodes'.
1482 'outheads' is the subset of 'heads' that is also in 'nodes'.
1478
1483
1479 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1484 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1480 unspecified, uses nullid as the only root. If 'heads' is
1485 unspecified, uses nullid as the only root. If 'heads' is
1481 unspecified, uses list of all of the revlog's heads."""
1486 unspecified, uses list of all of the revlog's heads."""
1482 nonodes = ([], [], [])
1487 nonodes = ([], [], [])
1483 if roots is not None:
1488 if roots is not None:
1484 roots = list(roots)
1489 roots = list(roots)
1485 if not roots:
1490 if not roots:
1486 return nonodes
1491 return nonodes
1487 lowestrev = min([self.rev(n) for n in roots])
1492 lowestrev = min([self.rev(n) for n in roots])
1488 else:
1493 else:
1489 roots = [nullid] # Everybody's a descendant of nullid
1494 roots = [nullid] # Everybody's a descendant of nullid
1490 lowestrev = nullrev
1495 lowestrev = nullrev
1491 if (lowestrev == nullrev) and (heads is None):
1496 if (lowestrev == nullrev) and (heads is None):
1492 # We want _all_ the nodes!
1497 # We want _all_ the nodes!
1493 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1498 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1494 if heads is None:
1499 if heads is None:
1495 # All nodes are ancestors, so the latest ancestor is the last
1500 # All nodes are ancestors, so the latest ancestor is the last
1496 # node.
1501 # node.
1497 highestrev = len(self) - 1
1502 highestrev = len(self) - 1
1498 # Set ancestors to None to signal that every node is an ancestor.
1503 # Set ancestors to None to signal that every node is an ancestor.
1499 ancestors = None
1504 ancestors = None
1500 # Set heads to an empty dictionary for later discovery of heads
1505 # Set heads to an empty dictionary for later discovery of heads
1501 heads = {}
1506 heads = {}
1502 else:
1507 else:
1503 heads = list(heads)
1508 heads = list(heads)
1504 if not heads:
1509 if not heads:
1505 return nonodes
1510 return nonodes
1506 ancestors = set()
1511 ancestors = set()
1507 # Turn heads into a dictionary so we can remove 'fake' heads.
1512 # Turn heads into a dictionary so we can remove 'fake' heads.
1508 # Also, later we will be using it to filter out the heads we can't
1513 # Also, later we will be using it to filter out the heads we can't
1509 # find from roots.
1514 # find from roots.
1510 heads = dict.fromkeys(heads, False)
1515 heads = dict.fromkeys(heads, False)
1511 # Start at the top and keep marking parents until we're done.
1516 # Start at the top and keep marking parents until we're done.
1512 nodestotag = set(heads)
1517 nodestotag = set(heads)
1513 # Remember where the top was so we can use it as a limit later.
1518 # Remember where the top was so we can use it as a limit later.
1514 highestrev = max([self.rev(n) for n in nodestotag])
1519 highestrev = max([self.rev(n) for n in nodestotag])
1515 while nodestotag:
1520 while nodestotag:
1516 # grab a node to tag
1521 # grab a node to tag
1517 n = nodestotag.pop()
1522 n = nodestotag.pop()
1518 # Never tag nullid
1523 # Never tag nullid
1519 if n == nullid:
1524 if n == nullid:
1520 continue
1525 continue
1521 # A node's revision number represents its place in a
1526 # A node's revision number represents its place in a
1522 # topologically sorted list of nodes.
1527 # topologically sorted list of nodes.
1523 r = self.rev(n)
1528 r = self.rev(n)
1524 if r >= lowestrev:
1529 if r >= lowestrev:
1525 if n not in ancestors:
1530 if n not in ancestors:
1526 # If we are possibly a descendant of one of the roots
1531 # If we are possibly a descendant of one of the roots
1527 # and we haven't already been marked as an ancestor
1532 # and we haven't already been marked as an ancestor
1528 ancestors.add(n) # Mark as ancestor
1533 ancestors.add(n) # Mark as ancestor
1529 # Add non-nullid parents to list of nodes to tag.
1534 # Add non-nullid parents to list of nodes to tag.
1530 nodestotag.update([p for p in self.parents(n) if
1535 nodestotag.update([p for p in self.parents(n) if
1531 p != nullid])
1536 p != nullid])
1532 elif n in heads: # We've seen it before, is it a fake head?
1537 elif n in heads: # We've seen it before, is it a fake head?
1533 # So it is, real heads should not be the ancestors of
1538 # So it is, real heads should not be the ancestors of
1534 # any other heads.
1539 # any other heads.
1535 heads.pop(n)
1540 heads.pop(n)
1536 if not ancestors:
1541 if not ancestors:
1537 return nonodes
1542 return nonodes
1538 # Now that we have our set of ancestors, we want to remove any
1543 # Now that we have our set of ancestors, we want to remove any
1539 # roots that are not ancestors.
1544 # roots that are not ancestors.
1540
1545
1541 # If one of the roots was nullid, everything is included anyway.
1546 # If one of the roots was nullid, everything is included anyway.
1542 if lowestrev > nullrev:
1547 if lowestrev > nullrev:
1543 # But, since we weren't, let's recompute the lowest rev to not
1548 # But, since we weren't, let's recompute the lowest rev to not
1544 # include roots that aren't ancestors.
1549 # include roots that aren't ancestors.
1545
1550
1546 # Filter out roots that aren't ancestors of heads
1551 # Filter out roots that aren't ancestors of heads
1547 roots = [root for root in roots if root in ancestors]
1552 roots = [root for root in roots if root in ancestors]
1548 # Recompute the lowest revision
1553 # Recompute the lowest revision
1549 if roots:
1554 if roots:
1550 lowestrev = min([self.rev(root) for root in roots])
1555 lowestrev = min([self.rev(root) for root in roots])
1551 else:
1556 else:
1552 # No more roots? Return empty list
1557 # No more roots? Return empty list
1553 return nonodes
1558 return nonodes
1554 else:
1559 else:
1555 # We are descending from nullid, and don't need to care about
1560 # We are descending from nullid, and don't need to care about
1556 # any other roots.
1561 # any other roots.
1557 lowestrev = nullrev
1562 lowestrev = nullrev
1558 roots = [nullid]
1563 roots = [nullid]
1559 # Transform our roots list into a set.
1564 # Transform our roots list into a set.
1560 descendants = set(roots)
1565 descendants = set(roots)
1561 # Also, keep the original roots so we can filter out roots that aren't
1566 # Also, keep the original roots so we can filter out roots that aren't
1562 # 'real' roots (i.e. are descended from other roots).
1567 # 'real' roots (i.e. are descended from other roots).
1563 roots = descendants.copy()
1568 roots = descendants.copy()
1564 # Our topologically sorted list of output nodes.
1569 # Our topologically sorted list of output nodes.
1565 orderedout = []
1570 orderedout = []
1566 # Don't start at nullid since we don't want nullid in our output list,
1571 # Don't start at nullid since we don't want nullid in our output list,
1567 # and if nullid shows up in descendants, empty parents will look like
1572 # and if nullid shows up in descendants, empty parents will look like
1568 # they're descendants.
1573 # they're descendants.
1569 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1574 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1570 n = self.node(r)
1575 n = self.node(r)
1571 isdescendant = False
1576 isdescendant = False
1572 if lowestrev == nullrev: # Everybody is a descendant of nullid
1577 if lowestrev == nullrev: # Everybody is a descendant of nullid
1573 isdescendant = True
1578 isdescendant = True
1574 elif n in descendants:
1579 elif n in descendants:
1575 # n is already a descendant
1580 # n is already a descendant
1576 isdescendant = True
1581 isdescendant = True
1577 # This check only needs to be done here because all the roots
1582 # This check only needs to be done here because all the roots
1578 # will start being marked is descendants before the loop.
1583 # will start being marked is descendants before the loop.
1579 if n in roots:
1584 if n in roots:
1580 # If n was a root, check if it's a 'real' root.
1585 # If n was a root, check if it's a 'real' root.
1581 p = tuple(self.parents(n))
1586 p = tuple(self.parents(n))
1582 # If any of its parents are descendants, it's not a root.
1587 # If any of its parents are descendants, it's not a root.
1583 if (p[0] in descendants) or (p[1] in descendants):
1588 if (p[0] in descendants) or (p[1] in descendants):
1584 roots.remove(n)
1589 roots.remove(n)
1585 else:
1590 else:
1586 p = tuple(self.parents(n))
1591 p = tuple(self.parents(n))
1587 # A node is a descendant if either of its parents are
1592 # A node is a descendant if either of its parents are
1588 # descendants. (We seeded the dependents list with the roots
1593 # descendants. (We seeded the dependents list with the roots
1589 # up there, remember?)
1594 # up there, remember?)
1590 if (p[0] in descendants) or (p[1] in descendants):
1595 if (p[0] in descendants) or (p[1] in descendants):
1591 descendants.add(n)
1596 descendants.add(n)
1592 isdescendant = True
1597 isdescendant = True
1593 if isdescendant and ((ancestors is None) or (n in ancestors)):
1598 if isdescendant and ((ancestors is None) or (n in ancestors)):
1594 # Only include nodes that are both descendants and ancestors.
1599 # Only include nodes that are both descendants and ancestors.
1595 orderedout.append(n)
1600 orderedout.append(n)
1596 if (ancestors is not None) and (n in heads):
1601 if (ancestors is not None) and (n in heads):
1597 # We're trying to figure out which heads are reachable
1602 # We're trying to figure out which heads are reachable
1598 # from roots.
1603 # from roots.
1599 # Mark this head as having been reached
1604 # Mark this head as having been reached
1600 heads[n] = True
1605 heads[n] = True
1601 elif ancestors is None:
1606 elif ancestors is None:
1602 # Otherwise, we're trying to discover the heads.
1607 # Otherwise, we're trying to discover the heads.
1603 # Assume this is a head because if it isn't, the next step
1608 # Assume this is a head because if it isn't, the next step
1604 # will eventually remove it.
1609 # will eventually remove it.
1605 heads[n] = True
1610 heads[n] = True
1606 # But, obviously its parents aren't.
1611 # But, obviously its parents aren't.
1607 for p in self.parents(n):
1612 for p in self.parents(n):
1608 heads.pop(p, None)
1613 heads.pop(p, None)
1609 heads = [head for head, flag in heads.iteritems() if flag]
1614 heads = [head for head, flag in heads.iteritems() if flag]
1610 roots = list(roots)
1615 roots = list(roots)
1611 assert orderedout
1616 assert orderedout
1612 assert roots
1617 assert roots
1613 assert heads
1618 assert heads
1614 return (orderedout, roots, heads)
1619 return (orderedout, roots, heads)
1615
1620
1616 def headrevs(self):
1621 def headrevs(self):
1617 try:
1622 try:
1618 return self.index.headrevs()
1623 return self.index.headrevs()
1619 except AttributeError:
1624 except AttributeError:
1620 return self._headrevs()
1625 return self._headrevs()
1621
1626
1622 def computephases(self, roots):
1627 def computephases(self, roots):
1623 return self.index.computephasesmapsets(roots)
1628 return self.index.computephasesmapsets(roots)
1624
1629
1625 def _headrevs(self):
1630 def _headrevs(self):
1626 count = len(self)
1631 count = len(self)
1627 if not count:
1632 if not count:
1628 return [nullrev]
1633 return [nullrev]
1629 # we won't iter over filtered rev so nobody is a head at start
1634 # we won't iter over filtered rev so nobody is a head at start
1630 ishead = [0] * (count + 1)
1635 ishead = [0] * (count + 1)
1631 index = self.index
1636 index = self.index
1632 for r in self:
1637 for r in self:
1633 ishead[r] = 1 # I may be an head
1638 ishead[r] = 1 # I may be an head
1634 e = index[r]
1639 e = index[r]
1635 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1640 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1636 return [r for r, val in enumerate(ishead) if val]
1641 return [r for r, val in enumerate(ishead) if val]
1637
1642
1638 def heads(self, start=None, stop=None):
1643 def heads(self, start=None, stop=None):
1639 """return the list of all nodes that have no children
1644 """return the list of all nodes that have no children
1640
1645
1641 if start is specified, only heads that are descendants of
1646 if start is specified, only heads that are descendants of
1642 start will be returned
1647 start will be returned
1643 if stop is specified, it will consider all the revs from stop
1648 if stop is specified, it will consider all the revs from stop
1644 as if they had no children
1649 as if they had no children
1645 """
1650 """
1646 if start is None and stop is None:
1651 if start is None and stop is None:
1647 if not len(self):
1652 if not len(self):
1648 return [nullid]
1653 return [nullid]
1649 return [self.node(r) for r in self.headrevs()]
1654 return [self.node(r) for r in self.headrevs()]
1650
1655
1651 if start is None:
1656 if start is None:
1652 start = nullid
1657 start = nullid
1653 if stop is None:
1658 if stop is None:
1654 stop = []
1659 stop = []
1655 stoprevs = set([self.rev(n) for n in stop])
1660 stoprevs = set([self.rev(n) for n in stop])
1656 startrev = self.rev(start)
1661 startrev = self.rev(start)
1657 reachable = {startrev}
1662 reachable = {startrev}
1658 heads = {startrev}
1663 heads = {startrev}
1659
1664
1660 parentrevs = self.parentrevs
1665 parentrevs = self.parentrevs
1661 for r in self.revs(start=startrev + 1):
1666 for r in self.revs(start=startrev + 1):
1662 for p in parentrevs(r):
1667 for p in parentrevs(r):
1663 if p in reachable:
1668 if p in reachable:
1664 if r not in stoprevs:
1669 if r not in stoprevs:
1665 reachable.add(r)
1670 reachable.add(r)
1666 heads.add(r)
1671 heads.add(r)
1667 if p in heads and p not in stoprevs:
1672 if p in heads and p not in stoprevs:
1668 heads.remove(p)
1673 heads.remove(p)
1669
1674
1670 return [self.node(r) for r in heads]
1675 return [self.node(r) for r in heads]
1671
1676
1672 def children(self, node):
1677 def children(self, node):
1673 """find the children of a given node"""
1678 """find the children of a given node"""
1674 c = []
1679 c = []
1675 p = self.rev(node)
1680 p = self.rev(node)
1676 for r in self.revs(start=p + 1):
1681 for r in self.revs(start=p + 1):
1677 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1682 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1678 if prevs:
1683 if prevs:
1679 for pr in prevs:
1684 for pr in prevs:
1680 if pr == p:
1685 if pr == p:
1681 c.append(self.node(r))
1686 c.append(self.node(r))
1682 elif p == nullrev:
1687 elif p == nullrev:
1683 c.append(self.node(r))
1688 c.append(self.node(r))
1684 return c
1689 return c
1685
1690
1686 def commonancestorsheads(self, a, b):
1691 def commonancestorsheads(self, a, b):
1687 """calculate all the heads of the common ancestors of nodes a and b"""
1692 """calculate all the heads of the common ancestors of nodes a and b"""
1688 a, b = self.rev(a), self.rev(b)
1693 a, b = self.rev(a), self.rev(b)
1689 ancs = self._commonancestorsheads(a, b)
1694 ancs = self._commonancestorsheads(a, b)
1690 return pycompat.maplist(self.node, ancs)
1695 return pycompat.maplist(self.node, ancs)
1691
1696
1692 def _commonancestorsheads(self, *revs):
1697 def _commonancestorsheads(self, *revs):
1693 """calculate all the heads of the common ancestors of revs"""
1698 """calculate all the heads of the common ancestors of revs"""
1694 try:
1699 try:
1695 ancs = self.index.commonancestorsheads(*revs)
1700 ancs = self.index.commonancestorsheads(*revs)
1696 except (AttributeError, OverflowError): # C implementation failed
1701 except (AttributeError, OverflowError): # C implementation failed
1697 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1702 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1698 return ancs
1703 return ancs
1699
1704
1700 def isancestor(self, a, b):
1705 def isancestor(self, a, b):
1701 """return True if node a is an ancestor of node b
1706 """return True if node a is an ancestor of node b
1702
1707
1703 A revision is considered an ancestor of itself."""
1708 A revision is considered an ancestor of itself."""
1704 a, b = self.rev(a), self.rev(b)
1709 a, b = self.rev(a), self.rev(b)
1705 return self.isancestorrev(a, b)
1710 return self.isancestorrev(a, b)
1706
1711
1707 def descendant(self, a, b):
1712 def descendant(self, a, b):
1708 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1713 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1709 util.nouideprecwarn(msg, '4.7')
1714 util.nouideprecwarn(msg, '4.7')
1710 return self.isancestorrev(a, b)
1715 return self.isancestorrev(a, b)
1711
1716
1712 def isancestorrev(self, a, b):
1717 def isancestorrev(self, a, b):
1713 """return True if revision a is an ancestor of revision b
1718 """return True if revision a is an ancestor of revision b
1714
1719
1715 A revision is considered an ancestor of itself.
1720 A revision is considered an ancestor of itself.
1716
1721
1717 The implementation of this is trivial but the use of
1722 The implementation of this is trivial but the use of
1718 commonancestorsheads is not."""
1723 commonancestorsheads is not."""
1719 if a == nullrev:
1724 if a == nullrev:
1720 return True
1725 return True
1721 elif a == b:
1726 elif a == b:
1722 return True
1727 return True
1723 elif a > b:
1728 elif a > b:
1724 return False
1729 return False
1725 return a in self._commonancestorsheads(a, b)
1730 return a in self._commonancestorsheads(a, b)
1726
1731
1727 def ancestor(self, a, b):
1732 def ancestor(self, a, b):
1728 """calculate the "best" common ancestor of nodes a and b"""
1733 """calculate the "best" common ancestor of nodes a and b"""
1729
1734
1730 a, b = self.rev(a), self.rev(b)
1735 a, b = self.rev(a), self.rev(b)
1731 try:
1736 try:
1732 ancs = self.index.ancestors(a, b)
1737 ancs = self.index.ancestors(a, b)
1733 except (AttributeError, OverflowError):
1738 except (AttributeError, OverflowError):
1734 ancs = ancestor.ancestors(self.parentrevs, a, b)
1739 ancs = ancestor.ancestors(self.parentrevs, a, b)
1735 if ancs:
1740 if ancs:
1736 # choose a consistent winner when there's a tie
1741 # choose a consistent winner when there's a tie
1737 return min(map(self.node, ancs))
1742 return min(map(self.node, ancs))
1738 return nullid
1743 return nullid
1739
1744
1740 def _match(self, id):
1745 def _match(self, id):
1741 if isinstance(id, int):
1746 if isinstance(id, int):
1742 # rev
1747 # rev
1743 return self.node(id)
1748 return self.node(id)
1744 if len(id) == 20:
1749 if len(id) == 20:
1745 # possibly a binary node
1750 # possibly a binary node
1746 # odds of a binary node being all hex in ASCII are 1 in 10**25
1751 # odds of a binary node being all hex in ASCII are 1 in 10**25
1747 try:
1752 try:
1748 node = id
1753 node = id
1749 self.rev(node) # quick search the index
1754 self.rev(node) # quick search the index
1750 return node
1755 return node
1751 except LookupError:
1756 except LookupError:
1752 pass # may be partial hex id
1757 pass # may be partial hex id
1753 try:
1758 try:
1754 # str(rev)
1759 # str(rev)
1755 rev = int(id)
1760 rev = int(id)
1756 if "%d" % rev != id:
1761 if "%d" % rev != id:
1757 raise ValueError
1762 raise ValueError
1758 if rev < 0:
1763 if rev < 0:
1759 rev = len(self) + rev
1764 rev = len(self) + rev
1760 if rev < 0 or rev >= len(self):
1765 if rev < 0 or rev >= len(self):
1761 raise ValueError
1766 raise ValueError
1762 return self.node(rev)
1767 return self.node(rev)
1763 except (ValueError, OverflowError):
1768 except (ValueError, OverflowError):
1764 pass
1769 pass
1765 if len(id) == 40:
1770 if len(id) == 40:
1766 try:
1771 try:
1767 # a full hex nodeid?
1772 # a full hex nodeid?
1768 node = bin(id)
1773 node = bin(id)
1769 self.rev(node)
1774 self.rev(node)
1770 return node
1775 return node
1771 except (TypeError, LookupError):
1776 except (TypeError, LookupError):
1772 pass
1777 pass
1773
1778
1774 def _partialmatch(self, id):
1779 def _partialmatch(self, id):
1775 # we don't care wdirfilenodeids as they should be always full hash
1780 # we don't care wdirfilenodeids as they should be always full hash
1776 maybewdir = wdirhex.startswith(id)
1781 maybewdir = wdirhex.startswith(id)
1777 try:
1782 try:
1778 partial = self.index.partialmatch(id)
1783 partial = self.index.partialmatch(id)
1779 if partial and self.hasnode(partial):
1784 if partial and self.hasnode(partial):
1780 if maybewdir:
1785 if maybewdir:
1781 # single 'ff...' match in radix tree, ambiguous with wdir
1786 # single 'ff...' match in radix tree, ambiguous with wdir
1782 raise RevlogError
1787 raise RevlogError
1783 return partial
1788 return partial
1784 if maybewdir:
1789 if maybewdir:
1785 # no 'ff...' match in radix tree, wdir identified
1790 # no 'ff...' match in radix tree, wdir identified
1786 raise error.WdirUnsupported
1791 raise error.WdirUnsupported
1787 return None
1792 return None
1788 except RevlogError:
1793 except RevlogError:
1789 # parsers.c radix tree lookup gave multiple matches
1794 # parsers.c radix tree lookup gave multiple matches
1790 # fast path: for unfiltered changelog, radix tree is accurate
1795 # fast path: for unfiltered changelog, radix tree is accurate
1791 if not getattr(self, 'filteredrevs', None):
1796 if not getattr(self, 'filteredrevs', None):
1792 raise AmbiguousPrefixLookupError(id, self.indexfile,
1797 raise AmbiguousPrefixLookupError(id, self.indexfile,
1793 _('ambiguous identifier'))
1798 _('ambiguous identifier'))
1794 # fall through to slow path that filters hidden revisions
1799 # fall through to slow path that filters hidden revisions
1795 except (AttributeError, ValueError):
1800 except (AttributeError, ValueError):
1796 # we are pure python, or key was too short to search radix tree
1801 # we are pure python, or key was too short to search radix tree
1797 pass
1802 pass
1798
1803
1799 if id in self._pcache:
1804 if id in self._pcache:
1800 return self._pcache[id]
1805 return self._pcache[id]
1801
1806
1802 if len(id) <= 40:
1807 if len(id) <= 40:
1803 try:
1808 try:
1804 # hex(node)[:...]
1809 # hex(node)[:...]
1805 l = len(id) // 2 # grab an even number of digits
1810 l = len(id) // 2 # grab an even number of digits
1806 prefix = bin(id[:l * 2])
1811 prefix = bin(id[:l * 2])
1807 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1812 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1808 nl = [n for n in nl if hex(n).startswith(id) and
1813 nl = [n for n in nl if hex(n).startswith(id) and
1809 self.hasnode(n)]
1814 self.hasnode(n)]
1810 if len(nl) > 0:
1815 if len(nl) > 0:
1811 if len(nl) == 1 and not maybewdir:
1816 if len(nl) == 1 and not maybewdir:
1812 self._pcache[id] = nl[0]
1817 self._pcache[id] = nl[0]
1813 return nl[0]
1818 return nl[0]
1814 raise AmbiguousPrefixLookupError(id, self.indexfile,
1819 raise AmbiguousPrefixLookupError(id, self.indexfile,
1815 _('ambiguous identifier'))
1820 _('ambiguous identifier'))
1816 if maybewdir:
1821 if maybewdir:
1817 raise error.WdirUnsupported
1822 raise error.WdirUnsupported
1818 return None
1823 return None
1819 except TypeError:
1824 except TypeError:
1820 pass
1825 pass
1821
1826
1822 def lookup(self, id):
1827 def lookup(self, id):
1823 """locate a node based on:
1828 """locate a node based on:
1824 - revision number or str(revision number)
1829 - revision number or str(revision number)
1825 - nodeid or subset of hex nodeid
1830 - nodeid or subset of hex nodeid
1826 """
1831 """
1827 n = self._match(id)
1832 n = self._match(id)
1828 if n is not None:
1833 if n is not None:
1829 return n
1834 return n
1830 n = self._partialmatch(id)
1835 n = self._partialmatch(id)
1831 if n:
1836 if n:
1832 return n
1837 return n
1833
1838
1834 raise LookupError(id, self.indexfile, _('no match found'))
1839 raise LookupError(id, self.indexfile, _('no match found'))
1835
1840
1836 def shortest(self, node, minlength=1):
1841 def shortest(self, node, minlength=1):
1837 """Find the shortest unambiguous prefix that matches node."""
1842 """Find the shortest unambiguous prefix that matches node."""
1838 def isvalid(prefix):
1843 def isvalid(prefix):
1839 try:
1844 try:
1840 node = self._partialmatch(prefix)
1845 node = self._partialmatch(prefix)
1841 except error.RevlogError:
1846 except error.RevlogError:
1842 return False
1847 return False
1843 except error.WdirUnsupported:
1848 except error.WdirUnsupported:
1844 # single 'ff...' match
1849 # single 'ff...' match
1845 return True
1850 return True
1846 if node is None:
1851 if node is None:
1847 raise LookupError(node, self.indexfile, _('no node'))
1852 raise LookupError(node, self.indexfile, _('no node'))
1848 return True
1853 return True
1849
1854
1850 def maybewdir(prefix):
1855 def maybewdir(prefix):
1851 return all(c == 'f' for c in prefix)
1856 return all(c == 'f' for c in prefix)
1852
1857
1853 hexnode = hex(node)
1858 hexnode = hex(node)
1854
1859
1855 def disambiguate(hexnode, minlength):
1860 def disambiguate(hexnode, minlength):
1856 """Disambiguate against wdirid."""
1861 """Disambiguate against wdirid."""
1857 for length in range(minlength, 41):
1862 for length in range(minlength, 41):
1858 prefix = hexnode[:length]
1863 prefix = hexnode[:length]
1859 if not maybewdir(prefix):
1864 if not maybewdir(prefix):
1860 return prefix
1865 return prefix
1861
1866
1862 if not getattr(self, 'filteredrevs', None):
1867 if not getattr(self, 'filteredrevs', None):
1863 try:
1868 try:
1864 length = max(self.index.shortest(node), minlength)
1869 length = max(self.index.shortest(node), minlength)
1865 return disambiguate(hexnode, length)
1870 return disambiguate(hexnode, length)
1866 except RevlogError:
1871 except RevlogError:
1867 if node != wdirid:
1872 if node != wdirid:
1868 raise LookupError(node, self.indexfile, _('no node'))
1873 raise LookupError(node, self.indexfile, _('no node'))
1869 except AttributeError:
1874 except AttributeError:
1870 # Fall through to pure code
1875 # Fall through to pure code
1871 pass
1876 pass
1872
1877
1873 if node == wdirid:
1878 if node == wdirid:
1874 for length in range(minlength, 41):
1879 for length in range(minlength, 41):
1875 prefix = hexnode[:length]
1880 prefix = hexnode[:length]
1876 if isvalid(prefix):
1881 if isvalid(prefix):
1877 return prefix
1882 return prefix
1878
1883
1879 for length in range(minlength, 41):
1884 for length in range(minlength, 41):
1880 prefix = hexnode[:length]
1885 prefix = hexnode[:length]
1881 if isvalid(prefix):
1886 if isvalid(prefix):
1882 return disambiguate(hexnode, length)
1887 return disambiguate(hexnode, length)
1883
1888
1884 def cmp(self, node, text):
1889 def cmp(self, node, text):
1885 """compare text with a given file revision
1890 """compare text with a given file revision
1886
1891
1887 returns True if text is different than what is stored.
1892 returns True if text is different than what is stored.
1888 """
1893 """
1889 p1, p2 = self.parents(node)
1894 p1, p2 = self.parents(node)
1890 return hash(text, p1, p2) != node
1895 return hash(text, p1, p2) != node
1891
1896
1892 def _cachesegment(self, offset, data):
1897 def _cachesegment(self, offset, data):
1893 """Add a segment to the revlog cache.
1898 """Add a segment to the revlog cache.
1894
1899
1895 Accepts an absolute offset and the data that is at that location.
1900 Accepts an absolute offset and the data that is at that location.
1896 """
1901 """
1897 o, d = self._chunkcache
1902 o, d = self._chunkcache
1898 # try to add to existing cache
1903 # try to add to existing cache
1899 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1904 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1900 self._chunkcache = o, d + data
1905 self._chunkcache = o, d + data
1901 else:
1906 else:
1902 self._chunkcache = offset, data
1907 self._chunkcache = offset, data
1903
1908
1904 def _readsegment(self, offset, length, df=None):
1909 def _readsegment(self, offset, length, df=None):
1905 """Load a segment of raw data from the revlog.
1910 """Load a segment of raw data from the revlog.
1906
1911
1907 Accepts an absolute offset, length to read, and an optional existing
1912 Accepts an absolute offset, length to read, and an optional existing
1908 file handle to read from.
1913 file handle to read from.
1909
1914
1910 If an existing file handle is passed, it will be seeked and the
1915 If an existing file handle is passed, it will be seeked and the
1911 original seek position will NOT be restored.
1916 original seek position will NOT be restored.
1912
1917
1913 Returns a str or buffer of raw byte data.
1918 Returns a str or buffer of raw byte data.
1914 """
1919 """
1915 # Cache data both forward and backward around the requested
1920 # Cache data both forward and backward around the requested
1916 # data, in a fixed size window. This helps speed up operations
1921 # data, in a fixed size window. This helps speed up operations
1917 # involving reading the revlog backwards.
1922 # involving reading the revlog backwards.
1918 cachesize = self._chunkcachesize
1923 cachesize = self._chunkcachesize
1919 realoffset = offset & ~(cachesize - 1)
1924 realoffset = offset & ~(cachesize - 1)
1920 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1925 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1921 - realoffset)
1926 - realoffset)
1922 with self._datareadfp(df) as df:
1927 with self._datareadfp(df) as df:
1923 df.seek(realoffset)
1928 df.seek(realoffset)
1924 d = df.read(reallength)
1929 d = df.read(reallength)
1925 self._cachesegment(realoffset, d)
1930 self._cachesegment(realoffset, d)
1926 if offset != realoffset or reallength != length:
1931 if offset != realoffset or reallength != length:
1927 return util.buffer(d, offset - realoffset, length)
1932 return util.buffer(d, offset - realoffset, length)
1928 return d
1933 return d
1929
1934
1930 def _getsegment(self, offset, length, df=None):
1935 def _getsegment(self, offset, length, df=None):
1931 """Obtain a segment of raw data from the revlog.
1936 """Obtain a segment of raw data from the revlog.
1932
1937
1933 Accepts an absolute offset, length of bytes to obtain, and an
1938 Accepts an absolute offset, length of bytes to obtain, and an
1934 optional file handle to the already-opened revlog. If the file
1939 optional file handle to the already-opened revlog. If the file
1935 handle is used, it's original seek position will not be preserved.
1940 handle is used, it's original seek position will not be preserved.
1936
1941
1937 Requests for data may be returned from a cache.
1942 Requests for data may be returned from a cache.
1938
1943
1939 Returns a str or a buffer instance of raw byte data.
1944 Returns a str or a buffer instance of raw byte data.
1940 """
1945 """
1941 o, d = self._chunkcache
1946 o, d = self._chunkcache
1942 l = len(d)
1947 l = len(d)
1943
1948
1944 # is it in the cache?
1949 # is it in the cache?
1945 cachestart = offset - o
1950 cachestart = offset - o
1946 cacheend = cachestart + length
1951 cacheend = cachestart + length
1947 if cachestart >= 0 and cacheend <= l:
1952 if cachestart >= 0 and cacheend <= l:
1948 if cachestart == 0 and cacheend == l:
1953 if cachestart == 0 and cacheend == l:
1949 return d # avoid a copy
1954 return d # avoid a copy
1950 return util.buffer(d, cachestart, cacheend - cachestart)
1955 return util.buffer(d, cachestart, cacheend - cachestart)
1951
1956
1952 return self._readsegment(offset, length, df=df)
1957 return self._readsegment(offset, length, df=df)
1953
1958
1954 def _getsegmentforrevs(self, startrev, endrev, df=None):
1959 def _getsegmentforrevs(self, startrev, endrev, df=None):
1955 """Obtain a segment of raw data corresponding to a range of revisions.
1960 """Obtain a segment of raw data corresponding to a range of revisions.
1956
1961
1957 Accepts the start and end revisions and an optional already-open
1962 Accepts the start and end revisions and an optional already-open
1958 file handle to be used for reading. If the file handle is read, its
1963 file handle to be used for reading. If the file handle is read, its
1959 seek position will not be preserved.
1964 seek position will not be preserved.
1960
1965
1961 Requests for data may be satisfied by a cache.
1966 Requests for data may be satisfied by a cache.
1962
1967
1963 Returns a 2-tuple of (offset, data) for the requested range of
1968 Returns a 2-tuple of (offset, data) for the requested range of
1964 revisions. Offset is the integer offset from the beginning of the
1969 revisions. Offset is the integer offset from the beginning of the
1965 revlog and data is a str or buffer of the raw byte data.
1970 revlog and data is a str or buffer of the raw byte data.
1966
1971
1967 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1972 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1968 to determine where each revision's data begins and ends.
1973 to determine where each revision's data begins and ends.
1969 """
1974 """
1970 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1975 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1971 # (functions are expensive).
1976 # (functions are expensive).
1972 index = self.index
1977 index = self.index
1973 istart = index[startrev]
1978 istart = index[startrev]
1974 start = int(istart[0] >> 16)
1979 start = int(istart[0] >> 16)
1975 if startrev == endrev:
1980 if startrev == endrev:
1976 end = start + istart[1]
1981 end = start + istart[1]
1977 else:
1982 else:
1978 iend = index[endrev]
1983 iend = index[endrev]
1979 end = int(iend[0] >> 16) + iend[1]
1984 end = int(iend[0] >> 16) + iend[1]
1980
1985
1981 if self._inline:
1986 if self._inline:
1982 start += (startrev + 1) * self._io.size
1987 start += (startrev + 1) * self._io.size
1983 end += (endrev + 1) * self._io.size
1988 end += (endrev + 1) * self._io.size
1984 length = end - start
1989 length = end - start
1985
1990
1986 return start, self._getsegment(start, length, df=df)
1991 return start, self._getsegment(start, length, df=df)
1987
1992
1988 def _chunk(self, rev, df=None):
1993 def _chunk(self, rev, df=None):
1989 """Obtain a single decompressed chunk for a revision.
1994 """Obtain a single decompressed chunk for a revision.
1990
1995
1991 Accepts an integer revision and an optional already-open file handle
1996 Accepts an integer revision and an optional already-open file handle
1992 to be used for reading. If used, the seek position of the file will not
1997 to be used for reading. If used, the seek position of the file will not
1993 be preserved.
1998 be preserved.
1994
1999
1995 Returns a str holding uncompressed data for the requested revision.
2000 Returns a str holding uncompressed data for the requested revision.
1996 """
2001 """
1997 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2002 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1998
2003
1999 def _chunks(self, revs, df=None, targetsize=None):
2004 def _chunks(self, revs, df=None, targetsize=None):
2000 """Obtain decompressed chunks for the specified revisions.
2005 """Obtain decompressed chunks for the specified revisions.
2001
2006
2002 Accepts an iterable of numeric revisions that are assumed to be in
2007 Accepts an iterable of numeric revisions that are assumed to be in
2003 ascending order. Also accepts an optional already-open file handle
2008 ascending order. Also accepts an optional already-open file handle
2004 to be used for reading. If used, the seek position of the file will
2009 to be used for reading. If used, the seek position of the file will
2005 not be preserved.
2010 not be preserved.
2006
2011
2007 This function is similar to calling ``self._chunk()`` multiple times,
2012 This function is similar to calling ``self._chunk()`` multiple times,
2008 but is faster.
2013 but is faster.
2009
2014
2010 Returns a list with decompressed data for each requested revision.
2015 Returns a list with decompressed data for each requested revision.
2011 """
2016 """
2012 if not revs:
2017 if not revs:
2013 return []
2018 return []
2014 start = self.start
2019 start = self.start
2015 length = self.length
2020 length = self.length
2016 inline = self._inline
2021 inline = self._inline
2017 iosize = self._io.size
2022 iosize = self._io.size
2018 buffer = util.buffer
2023 buffer = util.buffer
2019
2024
2020 l = []
2025 l = []
2021 ladd = l.append
2026 ladd = l.append
2022
2027
2023 if not self._withsparseread:
2028 if not self._withsparseread:
2024 slicedchunks = (revs,)
2029 slicedchunks = (revs,)
2025 else:
2030 else:
2026 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2031 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2027
2032
2028 for revschunk in slicedchunks:
2033 for revschunk in slicedchunks:
2029 firstrev = revschunk[0]
2034 firstrev = revschunk[0]
2030 # Skip trailing revisions with empty diff
2035 # Skip trailing revisions with empty diff
2031 for lastrev in revschunk[::-1]:
2036 for lastrev in revschunk[::-1]:
2032 if length(lastrev) != 0:
2037 if length(lastrev) != 0:
2033 break
2038 break
2034
2039
2035 try:
2040 try:
2036 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2041 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2037 except OverflowError:
2042 except OverflowError:
2038 # issue4215 - we can't cache a run of chunks greater than
2043 # issue4215 - we can't cache a run of chunks greater than
2039 # 2G on Windows
2044 # 2G on Windows
2040 return [self._chunk(rev, df=df) for rev in revschunk]
2045 return [self._chunk(rev, df=df) for rev in revschunk]
2041
2046
2042 decomp = self.decompress
2047 decomp = self.decompress
2043 for rev in revschunk:
2048 for rev in revschunk:
2044 chunkstart = start(rev)
2049 chunkstart = start(rev)
2045 if inline:
2050 if inline:
2046 chunkstart += (rev + 1) * iosize
2051 chunkstart += (rev + 1) * iosize
2047 chunklength = length(rev)
2052 chunklength = length(rev)
2048 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2053 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2049
2054
2050 return l
2055 return l
2051
2056
2052 def _chunkclear(self):
2057 def _chunkclear(self):
2053 """Clear the raw chunk cache."""
2058 """Clear the raw chunk cache."""
2054 self._chunkcache = (0, '')
2059 self._chunkcache = (0, '')
2055
2060
2056 def deltaparent(self, rev):
2061 def deltaparent(self, rev):
2057 """return deltaparent of the given revision"""
2062 """return deltaparent of the given revision"""
2058 base = self.index[rev][3]
2063 base = self.index[rev][3]
2059 if base == rev:
2064 if base == rev:
2060 return nullrev
2065 return nullrev
2061 elif self._generaldelta:
2066 elif self._generaldelta:
2062 return base
2067 return base
2063 else:
2068 else:
2064 return rev - 1
2069 return rev - 1
2065
2070
2066 def revdiff(self, rev1, rev2):
2071 def revdiff(self, rev1, rev2):
2067 """return or calculate a delta between two revisions
2072 """return or calculate a delta between two revisions
2068
2073
2069 The delta calculated is in binary form and is intended to be written to
2074 The delta calculated is in binary form and is intended to be written to
2070 revlog data directly. So this function needs raw revision data.
2075 revlog data directly. So this function needs raw revision data.
2071 """
2076 """
2072 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2077 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2073 return bytes(self._chunk(rev2))
2078 return bytes(self._chunk(rev2))
2074
2079
2075 return mdiff.textdiff(self.revision(rev1, raw=True),
2080 return mdiff.textdiff(self.revision(rev1, raw=True),
2076 self.revision(rev2, raw=True))
2081 self.revision(rev2, raw=True))
2077
2082
2078 def revision(self, nodeorrev, _df=None, raw=False):
2083 def revision(self, nodeorrev, _df=None, raw=False):
2079 """return an uncompressed revision of a given node or revision
2084 """return an uncompressed revision of a given node or revision
2080 number.
2085 number.
2081
2086
2082 _df - an existing file handle to read from. (internal-only)
2087 _df - an existing file handle to read from. (internal-only)
2083 raw - an optional argument specifying if the revision data is to be
2088 raw - an optional argument specifying if the revision data is to be
2084 treated as raw data when applying flag transforms. 'raw' should be set
2089 treated as raw data when applying flag transforms. 'raw' should be set
2085 to True when generating changegroups or in debug commands.
2090 to True when generating changegroups or in debug commands.
2086 """
2091 """
2087 if isinstance(nodeorrev, int):
2092 if isinstance(nodeorrev, int):
2088 rev = nodeorrev
2093 rev = nodeorrev
2089 node = self.node(rev)
2094 node = self.node(rev)
2090 else:
2095 else:
2091 node = nodeorrev
2096 node = nodeorrev
2092 rev = None
2097 rev = None
2093
2098
2094 cachedrev = None
2099 cachedrev = None
2095 flags = None
2100 flags = None
2096 rawtext = None
2101 rawtext = None
2097 if node == nullid:
2102 if node == nullid:
2098 return ""
2103 return ""
2099 if self._cache:
2104 if self._cache:
2100 if self._cache[0] == node:
2105 if self._cache[0] == node:
2101 # _cache only stores rawtext
2106 # _cache only stores rawtext
2102 if raw:
2107 if raw:
2103 return self._cache[2]
2108 return self._cache[2]
2104 # duplicated, but good for perf
2109 # duplicated, but good for perf
2105 if rev is None:
2110 if rev is None:
2106 rev = self.rev(node)
2111 rev = self.rev(node)
2107 if flags is None:
2112 if flags is None:
2108 flags = self.flags(rev)
2113 flags = self.flags(rev)
2109 # no extra flags set, no flag processor runs, text = rawtext
2114 # no extra flags set, no flag processor runs, text = rawtext
2110 if flags == REVIDX_DEFAULT_FLAGS:
2115 if flags == REVIDX_DEFAULT_FLAGS:
2111 return self._cache[2]
2116 return self._cache[2]
2112 # rawtext is reusable. need to run flag processor
2117 # rawtext is reusable. need to run flag processor
2113 rawtext = self._cache[2]
2118 rawtext = self._cache[2]
2114
2119
2115 cachedrev = self._cache[1]
2120 cachedrev = self._cache[1]
2116
2121
2117 # look up what we need to read
2122 # look up what we need to read
2118 if rawtext is None:
2123 if rawtext is None:
2119 if rev is None:
2124 if rev is None:
2120 rev = self.rev(node)
2125 rev = self.rev(node)
2121
2126
2122 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2127 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2123 if stopped:
2128 if stopped:
2124 rawtext = self._cache[2]
2129 rawtext = self._cache[2]
2125
2130
2126 # drop cache to save memory
2131 # drop cache to save memory
2127 self._cache = None
2132 self._cache = None
2128
2133
2129 targetsize = None
2134 targetsize = None
2130 rawsize = self.index[rev][2]
2135 rawsize = self.index[rev][2]
2131 if 0 <= rawsize:
2136 if 0 <= rawsize:
2132 targetsize = 4 * rawsize
2137 targetsize = 4 * rawsize
2133
2138
2134 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2139 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2135 if rawtext is None:
2140 if rawtext is None:
2136 rawtext = bytes(bins[0])
2141 rawtext = bytes(bins[0])
2137 bins = bins[1:]
2142 bins = bins[1:]
2138
2143
2139 rawtext = mdiff.patches(rawtext, bins)
2144 rawtext = mdiff.patches(rawtext, bins)
2140 self._cache = (node, rev, rawtext)
2145 self._cache = (node, rev, rawtext)
2141
2146
2142 if flags is None:
2147 if flags is None:
2143 if rev is None:
2148 if rev is None:
2144 rev = self.rev(node)
2149 rev = self.rev(node)
2145 flags = self.flags(rev)
2150 flags = self.flags(rev)
2146
2151
2147 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2152 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2148 if validatehash:
2153 if validatehash:
2149 self.checkhash(text, node, rev=rev)
2154 self.checkhash(text, node, rev=rev)
2150
2155
2151 return text
2156 return text
2152
2157
2153 def hash(self, text, p1, p2):
2158 def hash(self, text, p1, p2):
2154 """Compute a node hash.
2159 """Compute a node hash.
2155
2160
2156 Available as a function so that subclasses can replace the hash
2161 Available as a function so that subclasses can replace the hash
2157 as needed.
2162 as needed.
2158 """
2163 """
2159 return hash(text, p1, p2)
2164 return hash(text, p1, p2)
2160
2165
2161 def _processflags(self, text, flags, operation, raw=False):
2166 def _processflags(self, text, flags, operation, raw=False):
2162 """Inspect revision data flags and applies transforms defined by
2167 """Inspect revision data flags and applies transforms defined by
2163 registered flag processors.
2168 registered flag processors.
2164
2169
2165 ``text`` - the revision data to process
2170 ``text`` - the revision data to process
2166 ``flags`` - the revision flags
2171 ``flags`` - the revision flags
2167 ``operation`` - the operation being performed (read or write)
2172 ``operation`` - the operation being performed (read or write)
2168 ``raw`` - an optional argument describing if the raw transform should be
2173 ``raw`` - an optional argument describing if the raw transform should be
2169 applied.
2174 applied.
2170
2175
2171 This method processes the flags in the order (or reverse order if
2176 This method processes the flags in the order (or reverse order if
2172 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2177 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2173 flag processors registered for present flags. The order of flags defined
2178 flag processors registered for present flags. The order of flags defined
2174 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2179 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2175
2180
2176 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2181 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2177 processed text and ``validatehash`` is a bool indicating whether the
2182 processed text and ``validatehash`` is a bool indicating whether the
2178 returned text should be checked for hash integrity.
2183 returned text should be checked for hash integrity.
2179
2184
2180 Note: If the ``raw`` argument is set, it has precedence over the
2185 Note: If the ``raw`` argument is set, it has precedence over the
2181 operation and will only update the value of ``validatehash``.
2186 operation and will only update the value of ``validatehash``.
2182 """
2187 """
2183 # fast path: no flag processors will run
2188 # fast path: no flag processors will run
2184 if flags == 0:
2189 if flags == 0:
2185 return text, True
2190 return text, True
2186 if not operation in ('read', 'write'):
2191 if not operation in ('read', 'write'):
2187 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2192 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2188 # Check all flags are known.
2193 # Check all flags are known.
2189 if flags & ~REVIDX_KNOWN_FLAGS:
2194 if flags & ~REVIDX_KNOWN_FLAGS:
2190 raise RevlogError(_("incompatible revision flag '%#x'") %
2195 raise RevlogError(_("incompatible revision flag '%#x'") %
2191 (flags & ~REVIDX_KNOWN_FLAGS))
2196 (flags & ~REVIDX_KNOWN_FLAGS))
2192 validatehash = True
2197 validatehash = True
2193 # Depending on the operation (read or write), the order might be
2198 # Depending on the operation (read or write), the order might be
2194 # reversed due to non-commutative transforms.
2199 # reversed due to non-commutative transforms.
2195 orderedflags = REVIDX_FLAGS_ORDER
2200 orderedflags = REVIDX_FLAGS_ORDER
2196 if operation == 'write':
2201 if operation == 'write':
2197 orderedflags = reversed(orderedflags)
2202 orderedflags = reversed(orderedflags)
2198
2203
2199 for flag in orderedflags:
2204 for flag in orderedflags:
2200 # If a flagprocessor has been registered for a known flag, apply the
2205 # If a flagprocessor has been registered for a known flag, apply the
2201 # related operation transform and update result tuple.
2206 # related operation transform and update result tuple.
2202 if flag & flags:
2207 if flag & flags:
2203 vhash = True
2208 vhash = True
2204
2209
2205 if flag not in _flagprocessors:
2210 if flag not in _flagprocessors:
2206 message = _("missing processor for flag '%#x'") % (flag)
2211 message = _("missing processor for flag '%#x'") % (flag)
2207 raise RevlogError(message)
2212 raise RevlogError(message)
2208
2213
2209 processor = _flagprocessors[flag]
2214 processor = _flagprocessors[flag]
2210 if processor is not None:
2215 if processor is not None:
2211 readtransform, writetransform, rawtransform = processor
2216 readtransform, writetransform, rawtransform = processor
2212
2217
2213 if raw:
2218 if raw:
2214 vhash = rawtransform(self, text)
2219 vhash = rawtransform(self, text)
2215 elif operation == 'read':
2220 elif operation == 'read':
2216 text, vhash = readtransform(self, text)
2221 text, vhash = readtransform(self, text)
2217 else: # write operation
2222 else: # write operation
2218 text, vhash = writetransform(self, text)
2223 text, vhash = writetransform(self, text)
2219 validatehash = validatehash and vhash
2224 validatehash = validatehash and vhash
2220
2225
2221 return text, validatehash
2226 return text, validatehash
2222
2227
2223 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2228 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2224 """Check node hash integrity.
2229 """Check node hash integrity.
2225
2230
2226 Available as a function so that subclasses can extend hash mismatch
2231 Available as a function so that subclasses can extend hash mismatch
2227 behaviors as needed.
2232 behaviors as needed.
2228 """
2233 """
2229 try:
2234 try:
2230 if p1 is None and p2 is None:
2235 if p1 is None and p2 is None:
2231 p1, p2 = self.parents(node)
2236 p1, p2 = self.parents(node)
2232 if node != self.hash(text, p1, p2):
2237 if node != self.hash(text, p1, p2):
2233 revornode = rev
2238 revornode = rev
2234 if revornode is None:
2239 if revornode is None:
2235 revornode = templatefilters.short(hex(node))
2240 revornode = templatefilters.short(hex(node))
2236 raise RevlogError(_("integrity check failed on %s:%s")
2241 raise RevlogError(_("integrity check failed on %s:%s")
2237 % (self.indexfile, pycompat.bytestr(revornode)))
2242 % (self.indexfile, pycompat.bytestr(revornode)))
2238 except RevlogError:
2243 except RevlogError:
2239 if self._censorable and _censoredtext(text):
2244 if self._censorable and _censoredtext(text):
2240 raise error.CensoredNodeError(self.indexfile, node, text)
2245 raise error.CensoredNodeError(self.indexfile, node, text)
2241 raise
2246 raise
2242
2247
2243 def _enforceinlinesize(self, tr, fp=None):
2248 def _enforceinlinesize(self, tr, fp=None):
2244 """Check if the revlog is too big for inline and convert if so.
2249 """Check if the revlog is too big for inline and convert if so.
2245
2250
2246 This should be called after revisions are added to the revlog. If the
2251 This should be called after revisions are added to the revlog. If the
2247 revlog has grown too large to be an inline revlog, it will convert it
2252 revlog has grown too large to be an inline revlog, it will convert it
2248 to use multiple index and data files.
2253 to use multiple index and data files.
2249 """
2254 """
2250 tiprev = len(self) - 1
2255 tiprev = len(self) - 1
2251 if (not self._inline or
2256 if (not self._inline or
2252 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2257 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2253 return
2258 return
2254
2259
2255 trinfo = tr.find(self.indexfile)
2260 trinfo = tr.find(self.indexfile)
2256 if trinfo is None:
2261 if trinfo is None:
2257 raise RevlogError(_("%s not found in the transaction")
2262 raise RevlogError(_("%s not found in the transaction")
2258 % self.indexfile)
2263 % self.indexfile)
2259
2264
2260 trindex = trinfo[2]
2265 trindex = trinfo[2]
2261 if trindex is not None:
2266 if trindex is not None:
2262 dataoff = self.start(trindex)
2267 dataoff = self.start(trindex)
2263 else:
2268 else:
2264 # revlog was stripped at start of transaction, use all leftover data
2269 # revlog was stripped at start of transaction, use all leftover data
2265 trindex = len(self) - 1
2270 trindex = len(self) - 1
2266 dataoff = self.end(tiprev)
2271 dataoff = self.end(tiprev)
2267
2272
2268 tr.add(self.datafile, dataoff)
2273 tr.add(self.datafile, dataoff)
2269
2274
2270 if fp:
2275 if fp:
2271 fp.flush()
2276 fp.flush()
2272 fp.close()
2277 fp.close()
2273
2278
2274 with self._datafp('w') as df:
2279 with self._datafp('w') as df:
2275 for r in self:
2280 for r in self:
2276 df.write(self._getsegmentforrevs(r, r)[1])
2281 df.write(self._getsegmentforrevs(r, r)[1])
2277
2282
2278 with self._indexfp('w') as fp:
2283 with self._indexfp('w') as fp:
2279 self.version &= ~FLAG_INLINE_DATA
2284 self.version &= ~FLAG_INLINE_DATA
2280 self._inline = False
2285 self._inline = False
2281 io = self._io
2286 io = self._io
2282 for i in self:
2287 for i in self:
2283 e = io.packentry(self.index[i], self.node, self.version, i)
2288 e = io.packentry(self.index[i], self.node, self.version, i)
2284 fp.write(e)
2289 fp.write(e)
2285
2290
2286 # the temp file replace the real index when we exit the context
2291 # the temp file replace the real index when we exit the context
2287 # manager
2292 # manager
2288
2293
2289 tr.replace(self.indexfile, trindex * self._io.size)
2294 tr.replace(self.indexfile, trindex * self._io.size)
2290 self._chunkclear()
2295 self._chunkclear()
2291
2296
2292 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2297 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2293 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2298 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2294 """add a revision to the log
2299 """add a revision to the log
2295
2300
2296 text - the revision data to add
2301 text - the revision data to add
2297 transaction - the transaction object used for rollback
2302 transaction - the transaction object used for rollback
2298 link - the linkrev data to add
2303 link - the linkrev data to add
2299 p1, p2 - the parent nodeids of the revision
2304 p1, p2 - the parent nodeids of the revision
2300 cachedelta - an optional precomputed delta
2305 cachedelta - an optional precomputed delta
2301 node - nodeid of revision; typically node is not specified, and it is
2306 node - nodeid of revision; typically node is not specified, and it is
2302 computed by default as hash(text, p1, p2), however subclasses might
2307 computed by default as hash(text, p1, p2), however subclasses might
2303 use different hashing method (and override checkhash() in such case)
2308 use different hashing method (and override checkhash() in such case)
2304 flags - the known flags to set on the revision
2309 flags - the known flags to set on the revision
2305 deltacomputer - an optional _deltacomputer instance shared between
2310 deltacomputer - an optional _deltacomputer instance shared between
2306 multiple calls
2311 multiple calls
2307 """
2312 """
2308 if link == nullrev:
2313 if link == nullrev:
2309 raise RevlogError(_("attempted to add linkrev -1 to %s")
2314 raise RevlogError(_("attempted to add linkrev -1 to %s")
2310 % self.indexfile)
2315 % self.indexfile)
2311
2316
2312 if flags:
2317 if flags:
2313 node = node or self.hash(text, p1, p2)
2318 node = node or self.hash(text, p1, p2)
2314
2319
2315 rawtext, validatehash = self._processflags(text, flags, 'write')
2320 rawtext, validatehash = self._processflags(text, flags, 'write')
2316
2321
2317 # If the flag processor modifies the revision data, ignore any provided
2322 # If the flag processor modifies the revision data, ignore any provided
2318 # cachedelta.
2323 # cachedelta.
2319 if rawtext != text:
2324 if rawtext != text:
2320 cachedelta = None
2325 cachedelta = None
2321
2326
2322 if len(rawtext) > _maxentrysize:
2327 if len(rawtext) > _maxentrysize:
2323 raise RevlogError(
2328 raise RevlogError(
2324 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2329 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2325 % (self.indexfile, len(rawtext)))
2330 % (self.indexfile, len(rawtext)))
2326
2331
2327 node = node or self.hash(rawtext, p1, p2)
2332 node = node or self.hash(rawtext, p1, p2)
2328 if node in self.nodemap:
2333 if node in self.nodemap:
2329 return node
2334 return node
2330
2335
2331 if validatehash:
2336 if validatehash:
2332 self.checkhash(rawtext, node, p1=p1, p2=p2)
2337 self.checkhash(rawtext, node, p1=p1, p2=p2)
2333
2338
2334 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2339 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2335 flags, cachedelta=cachedelta,
2340 flags, cachedelta=cachedelta,
2336 deltacomputer=deltacomputer)
2341 deltacomputer=deltacomputer)
2337
2342
2338 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2343 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2339 cachedelta=None, deltacomputer=None):
2344 cachedelta=None, deltacomputer=None):
2340 """add a raw revision with known flags, node and parents
2345 """add a raw revision with known flags, node and parents
2341 useful when reusing a revision not stored in this revlog (ex: received
2346 useful when reusing a revision not stored in this revlog (ex: received
2342 over wire, or read from an external bundle).
2347 over wire, or read from an external bundle).
2343 """
2348 """
2344 dfh = None
2349 dfh = None
2345 if not self._inline:
2350 if not self._inline:
2346 dfh = self._datafp("a+")
2351 dfh = self._datafp("a+")
2347 ifh = self._indexfp("a+")
2352 ifh = self._indexfp("a+")
2348 try:
2353 try:
2349 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2354 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2350 flags, cachedelta, ifh, dfh,
2355 flags, cachedelta, ifh, dfh,
2351 deltacomputer=deltacomputer)
2356 deltacomputer=deltacomputer)
2352 finally:
2357 finally:
2353 if dfh:
2358 if dfh:
2354 dfh.close()
2359 dfh.close()
2355 ifh.close()
2360 ifh.close()
2356
2361
2357 def compress(self, data):
2362 def compress(self, data):
2358 """Generate a possibly-compressed representation of data."""
2363 """Generate a possibly-compressed representation of data."""
2359 if not data:
2364 if not data:
2360 return '', data
2365 return '', data
2361
2366
2362 compressed = self._compressor.compress(data)
2367 compressed = self._compressor.compress(data)
2363
2368
2364 if compressed:
2369 if compressed:
2365 # The revlog compressor added the header in the returned data.
2370 # The revlog compressor added the header in the returned data.
2366 return '', compressed
2371 return '', compressed
2367
2372
2368 if data[0:1] == '\0':
2373 if data[0:1] == '\0':
2369 return '', data
2374 return '', data
2370 return 'u', data
2375 return 'u', data
2371
2376
2372 def decompress(self, data):
2377 def decompress(self, data):
2373 """Decompress a revlog chunk.
2378 """Decompress a revlog chunk.
2374
2379
2375 The chunk is expected to begin with a header identifying the
2380 The chunk is expected to begin with a header identifying the
2376 format type so it can be routed to an appropriate decompressor.
2381 format type so it can be routed to an appropriate decompressor.
2377 """
2382 """
2378 if not data:
2383 if not data:
2379 return data
2384 return data
2380
2385
2381 # Revlogs are read much more frequently than they are written and many
2386 # Revlogs are read much more frequently than they are written and many
2382 # chunks only take microseconds to decompress, so performance is
2387 # chunks only take microseconds to decompress, so performance is
2383 # important here.
2388 # important here.
2384 #
2389 #
2385 # We can make a few assumptions about revlogs:
2390 # We can make a few assumptions about revlogs:
2386 #
2391 #
2387 # 1) the majority of chunks will be compressed (as opposed to inline
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2388 # raw data).
2393 # raw data).
2389 # 2) decompressing *any* data will likely by at least 10x slower than
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2390 # returning raw inline data.
2395 # returning raw inline data.
2391 # 3) we want to prioritize common and officially supported compression
2396 # 3) we want to prioritize common and officially supported compression
2392 # engines
2397 # engines
2393 #
2398 #
2394 # It follows that we want to optimize for "decompress compressed data
2399 # It follows that we want to optimize for "decompress compressed data
2395 # when encoded with common and officially supported compression engines"
2400 # when encoded with common and officially supported compression engines"
2396 # case over "raw data" and "data encoded by less common or non-official
2401 # case over "raw data" and "data encoded by less common or non-official
2397 # compression engines." That is why we have the inline lookup first
2402 # compression engines." That is why we have the inline lookup first
2398 # followed by the compengines lookup.
2403 # followed by the compengines lookup.
2399 #
2404 #
2400 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2401 # compressed chunks. And this matters for changelog and manifest reads.
2406 # compressed chunks. And this matters for changelog and manifest reads.
2402 t = data[0:1]
2407 t = data[0:1]
2403
2408
2404 if t == 'x':
2409 if t == 'x':
2405 try:
2410 try:
2406 return _zlibdecompress(data)
2411 return _zlibdecompress(data)
2407 except zlib.error as e:
2412 except zlib.error as e:
2408 raise RevlogError(_('revlog decompress error: %s') %
2413 raise RevlogError(_('revlog decompress error: %s') %
2409 stringutil.forcebytestr(e))
2414 stringutil.forcebytestr(e))
2410 # '\0' is more common than 'u' so it goes first.
2415 # '\0' is more common than 'u' so it goes first.
2411 elif t == '\0':
2416 elif t == '\0':
2412 return data
2417 return data
2413 elif t == 'u':
2418 elif t == 'u':
2414 return util.buffer(data, 1)
2419 return util.buffer(data, 1)
2415
2420
2416 try:
2421 try:
2417 compressor = self._decompressors[t]
2422 compressor = self._decompressors[t]
2418 except KeyError:
2423 except KeyError:
2419 try:
2424 try:
2420 engine = util.compengines.forrevlogheader(t)
2425 engine = util.compengines.forrevlogheader(t)
2421 compressor = engine.revlogcompressor()
2426 compressor = engine.revlogcompressor()
2422 self._decompressors[t] = compressor
2427 self._decompressors[t] = compressor
2423 except KeyError:
2428 except KeyError:
2424 raise RevlogError(_('unknown compression type %r') % t)
2429 raise RevlogError(_('unknown compression type %r') % t)
2425
2430
2426 return compressor.decompress(data)
2431 return compressor.decompress(data)
2427
2432
2428 def _isgooddeltainfo(self, deltainfo, revinfo):
2433 def _isgooddeltainfo(self, deltainfo, revinfo):
2429 """Returns True if the given delta is good. Good means that it is within
2434 """Returns True if the given delta is good. Good means that it is within
2430 the disk span, disk size, and chain length bounds that we know to be
2435 the disk span, disk size, and chain length bounds that we know to be
2431 performant."""
2436 performant."""
2432 if deltainfo is None:
2437 if deltainfo is None:
2433 return False
2438 return False
2434
2439
2435 # - 'deltainfo.distance' is the distance from the base revision --
2440 # - 'deltainfo.distance' is the distance from the base revision --
2436 # bounding it limits the amount of I/O we need to do.
2441 # bounding it limits the amount of I/O we need to do.
2437 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2442 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2438 # deltas we need to apply -- bounding it limits the amount of CPU
2443 # deltas we need to apply -- bounding it limits the amount of CPU
2439 # we consume.
2444 # we consume.
2440
2445
2441 if self._sparserevlog:
2446 if self._sparserevlog:
2442 # As sparse-read will be used, we can consider that the distance,
2447 # As sparse-read will be used, we can consider that the distance,
2443 # instead of being the span of the whole chunk,
2448 # instead of being the span of the whole chunk,
2444 # is the span of the largest read chunk
2449 # is the span of the largest read chunk
2445 base = deltainfo.base
2450 base = deltainfo.base
2446
2451
2447 if base != nullrev:
2452 if base != nullrev:
2448 deltachain = self._deltachain(base)[0]
2453 deltachain = self._deltachain(base)[0]
2449 else:
2454 else:
2450 deltachain = []
2455 deltachain = []
2451
2456
2452 chunks = _slicechunk(self, deltachain, deltainfo)
2457 chunks = _slicechunk(self, deltachain, deltainfo)
2453 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2458 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2454 else:
2459 else:
2455 distance = deltainfo.distance
2460 distance = deltainfo.distance
2456
2461
2457 textlen = revinfo.textlen
2462 textlen = revinfo.textlen
2458 defaultmax = textlen * 4
2463 defaultmax = textlen * 4
2459 maxdist = self._maxdeltachainspan
2464 maxdist = self._maxdeltachainspan
2460 if not maxdist:
2465 if not maxdist:
2461 maxdist = distance # ensure the conditional pass
2466 maxdist = distance # ensure the conditional pass
2462 maxdist = max(maxdist, defaultmax)
2467 maxdist = max(maxdist, defaultmax)
2463 if self._sparserevlog and maxdist < self._srmingapsize:
2468 if self._sparserevlog and maxdist < self._srmingapsize:
2464 # In multiple place, we are ignoring irrelevant data range below a
2469 # In multiple place, we are ignoring irrelevant data range below a
2465 # certain size. Be also apply this tradeoff here and relax span
2470 # certain size. Be also apply this tradeoff here and relax span
2466 # constraint for small enought content.
2471 # constraint for small enought content.
2467 maxdist = self._srmingapsize
2472 maxdist = self._srmingapsize
2468 if (distance > maxdist or deltainfo.deltalen > textlen or
2473 if (distance > maxdist or deltainfo.deltalen > textlen or
2469 deltainfo.compresseddeltalen > textlen * 2 or
2474 deltainfo.compresseddeltalen > textlen * 2 or
2470 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2475 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2471 return False
2476 return False
2472
2477
2473 return True
2478 return True
2474
2479
2475 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2480 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2476 cachedelta, ifh, dfh, alwayscache=False,
2481 cachedelta, ifh, dfh, alwayscache=False,
2477 deltacomputer=None):
2482 deltacomputer=None):
2478 """internal function to add revisions to the log
2483 """internal function to add revisions to the log
2479
2484
2480 see addrevision for argument descriptions.
2485 see addrevision for argument descriptions.
2481
2486
2482 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2487 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2483
2488
2484 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2489 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2485 be used.
2490 be used.
2486
2491
2487 invariants:
2492 invariants:
2488 - rawtext is optional (can be None); if not set, cachedelta must be set.
2493 - rawtext is optional (can be None); if not set, cachedelta must be set.
2489 if both are set, they must correspond to each other.
2494 if both are set, they must correspond to each other.
2490 """
2495 """
2491 if node == nullid:
2496 if node == nullid:
2492 raise RevlogError(_("%s: attempt to add null revision") %
2497 raise RevlogError(_("%s: attempt to add null revision") %
2493 (self.indexfile))
2498 (self.indexfile))
2494 if node == wdirid or node in wdirfilenodeids:
2499 if node == wdirid or node in wdirfilenodeids:
2495 raise RevlogError(_("%s: attempt to add wdir revision") %
2500 raise RevlogError(_("%s: attempt to add wdir revision") %
2496 (self.indexfile))
2501 (self.indexfile))
2497
2502
2498 if self._inline:
2503 if self._inline:
2499 fh = ifh
2504 fh = ifh
2500 else:
2505 else:
2501 fh = dfh
2506 fh = dfh
2502
2507
2503 btext = [rawtext]
2508 btext = [rawtext]
2504
2509
2505 curr = len(self)
2510 curr = len(self)
2506 prev = curr - 1
2511 prev = curr - 1
2507 offset = self.end(prev)
2512 offset = self.end(prev)
2508 p1r, p2r = self.rev(p1), self.rev(p2)
2513 p1r, p2r = self.rev(p1), self.rev(p2)
2509
2514
2510 # full versions are inserted when the needed deltas
2515 # full versions are inserted when the needed deltas
2511 # become comparable to the uncompressed text
2516 # become comparable to the uncompressed text
2512 if rawtext is None:
2517 if rawtext is None:
2513 # need rawtext size, before changed by flag processors, which is
2518 # need rawtext size, before changed by flag processors, which is
2514 # the non-raw size. use revlog explicitly to avoid filelog's extra
2519 # the non-raw size. use revlog explicitly to avoid filelog's extra
2515 # logic that might remove metadata size.
2520 # logic that might remove metadata size.
2516 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2521 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2517 cachedelta[1])
2522 cachedelta[1])
2518 else:
2523 else:
2519 textlen = len(rawtext)
2524 textlen = len(rawtext)
2520
2525
2521 if deltacomputer is None:
2526 if deltacomputer is None:
2522 deltacomputer = _deltacomputer(self)
2527 deltacomputer = _deltacomputer(self)
2523
2528
2524 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2529 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2525
2530
2526 # no delta for flag processor revision (see "candelta" for why)
2531 # no delta for flag processor revision (see "candelta" for why)
2527 # not calling candelta since only one revision needs test, also to
2532 # not calling candelta since only one revision needs test, also to
2528 # avoid overhead fetching flags again.
2533 # avoid overhead fetching flags again.
2529 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2534 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2530 deltainfo = None
2535 deltainfo = None
2531 else:
2536 else:
2532 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2533
2538
2534 if deltainfo is not None:
2539 if deltainfo is not None:
2535 base = deltainfo.base
2540 base = deltainfo.base
2536 chainbase = deltainfo.chainbase
2541 chainbase = deltainfo.chainbase
2537 data = deltainfo.data
2542 data = deltainfo.data
2538 l = deltainfo.deltalen
2543 l = deltainfo.deltalen
2539 else:
2544 else:
2540 rawtext = deltacomputer.buildtext(revinfo, fh)
2545 rawtext = deltacomputer.buildtext(revinfo, fh)
2541 data = self.compress(rawtext)
2546 data = self.compress(rawtext)
2542 l = len(data[1]) + len(data[0])
2547 l = len(data[1]) + len(data[0])
2543 base = chainbase = curr
2548 base = chainbase = curr
2544
2549
2545 e = (offset_type(offset, flags), l, textlen,
2550 e = (offset_type(offset, flags), l, textlen,
2546 base, link, p1r, p2r, node)
2551 base, link, p1r, p2r, node)
2547 self.index.insert(-1, e)
2552 self.index.insert(-1, e)
2548 self.nodemap[node] = curr
2553 self.nodemap[node] = curr
2549
2554
2550 entry = self._io.packentry(e, self.node, self.version, curr)
2555 entry = self._io.packentry(e, self.node, self.version, curr)
2551 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2552
2557
2553 if alwayscache and rawtext is None:
2558 if alwayscache and rawtext is None:
2554 rawtext = deltacomputer._buildtext(revinfo, fh)
2559 rawtext = deltacomputer._buildtext(revinfo, fh)
2555
2560
2556 if type(rawtext) == bytes: # only accept immutable objects
2561 if type(rawtext) == bytes: # only accept immutable objects
2557 self._cache = (node, curr, rawtext)
2562 self._cache = (node, curr, rawtext)
2558 self._chainbasecache[curr] = chainbase
2563 self._chainbasecache[curr] = chainbase
2559 return node
2564 return node
2560
2565
2561 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2562 # Files opened in a+ mode have inconsistent behavior on various
2567 # Files opened in a+ mode have inconsistent behavior on various
2563 # platforms. Windows requires that a file positioning call be made
2568 # platforms. Windows requires that a file positioning call be made
2564 # when the file handle transitions between reads and writes. See
2569 # when the file handle transitions between reads and writes. See
2565 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2566 # platforms, Python or the platform itself can be buggy. Some versions
2571 # platforms, Python or the platform itself can be buggy. Some versions
2567 # of Solaris have been observed to not append at the end of the file
2572 # of Solaris have been observed to not append at the end of the file
2568 # if the file was seeked to before the end. See issue4943 for more.
2573 # if the file was seeked to before the end. See issue4943 for more.
2569 #
2574 #
2570 # We work around this issue by inserting a seek() before writing.
2575 # We work around this issue by inserting a seek() before writing.
2571 # Note: This is likely not necessary on Python 3.
2576 # Note: This is likely not necessary on Python 3.
2572 ifh.seek(0, os.SEEK_END)
2577 ifh.seek(0, os.SEEK_END)
2573 if dfh:
2578 if dfh:
2574 dfh.seek(0, os.SEEK_END)
2579 dfh.seek(0, os.SEEK_END)
2575
2580
2576 curr = len(self) - 1
2581 curr = len(self) - 1
2577 if not self._inline:
2582 if not self._inline:
2578 transaction.add(self.datafile, offset)
2583 transaction.add(self.datafile, offset)
2579 transaction.add(self.indexfile, curr * len(entry))
2584 transaction.add(self.indexfile, curr * len(entry))
2580 if data[0]:
2585 if data[0]:
2581 dfh.write(data[0])
2586 dfh.write(data[0])
2582 dfh.write(data[1])
2587 dfh.write(data[1])
2583 ifh.write(entry)
2588 ifh.write(entry)
2584 else:
2589 else:
2585 offset += curr * self._io.size
2590 offset += curr * self._io.size
2586 transaction.add(self.indexfile, offset, curr)
2591 transaction.add(self.indexfile, offset, curr)
2587 ifh.write(entry)
2592 ifh.write(entry)
2588 ifh.write(data[0])
2593 ifh.write(data[0])
2589 ifh.write(data[1])
2594 ifh.write(data[1])
2590 self._enforceinlinesize(transaction, ifh)
2595 self._enforceinlinesize(transaction, ifh)
2591
2596
2592 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2597 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2593 """
2598 """
2594 add a delta group
2599 add a delta group
2595
2600
2596 given a set of deltas, add them to the revision log. the
2601 given a set of deltas, add them to the revision log. the
2597 first delta is against its parent, which should be in our
2602 first delta is against its parent, which should be in our
2598 log, the rest are against the previous delta.
2603 log, the rest are against the previous delta.
2599
2604
2600 If ``addrevisioncb`` is defined, it will be called with arguments of
2605 If ``addrevisioncb`` is defined, it will be called with arguments of
2601 this revlog and the node that was added.
2606 this revlog and the node that was added.
2602 """
2607 """
2603
2608
2604 nodes = []
2609 nodes = []
2605
2610
2606 r = len(self)
2611 r = len(self)
2607 end = 0
2612 end = 0
2608 if r:
2613 if r:
2609 end = self.end(r - 1)
2614 end = self.end(r - 1)
2610 ifh = self._indexfp("a+")
2615 ifh = self._indexfp("a+")
2611 isize = r * self._io.size
2616 isize = r * self._io.size
2612 if self._inline:
2617 if self._inline:
2613 transaction.add(self.indexfile, end + isize, r)
2618 transaction.add(self.indexfile, end + isize, r)
2614 dfh = None
2619 dfh = None
2615 else:
2620 else:
2616 transaction.add(self.indexfile, isize, r)
2621 transaction.add(self.indexfile, isize, r)
2617 transaction.add(self.datafile, end)
2622 transaction.add(self.datafile, end)
2618 dfh = self._datafp("a+")
2623 dfh = self._datafp("a+")
2619 def flush():
2624 def flush():
2620 if dfh:
2625 if dfh:
2621 dfh.flush()
2626 dfh.flush()
2622 ifh.flush()
2627 ifh.flush()
2623 try:
2628 try:
2624 deltacomputer = _deltacomputer(self)
2629 deltacomputer = _deltacomputer(self)
2625 # loop through our set of deltas
2630 # loop through our set of deltas
2626 for data in deltas:
2631 for data in deltas:
2627 node, p1, p2, linknode, deltabase, delta, flags = data
2632 node, p1, p2, linknode, deltabase, delta, flags = data
2628 link = linkmapper(linknode)
2633 link = linkmapper(linknode)
2629 flags = flags or REVIDX_DEFAULT_FLAGS
2634 flags = flags or REVIDX_DEFAULT_FLAGS
2630
2635
2631 nodes.append(node)
2636 nodes.append(node)
2632
2637
2633 if node in self.nodemap:
2638 if node in self.nodemap:
2634 # this can happen if two branches make the same change
2639 # this can happen if two branches make the same change
2635 continue
2640 continue
2636
2641
2637 for p in (p1, p2):
2642 for p in (p1, p2):
2638 if p not in self.nodemap:
2643 if p not in self.nodemap:
2639 raise LookupError(p, self.indexfile,
2644 raise LookupError(p, self.indexfile,
2640 _('unknown parent'))
2645 _('unknown parent'))
2641
2646
2642 if deltabase not in self.nodemap:
2647 if deltabase not in self.nodemap:
2643 raise LookupError(deltabase, self.indexfile,
2648 raise LookupError(deltabase, self.indexfile,
2644 _('unknown delta base'))
2649 _('unknown delta base'))
2645
2650
2646 baserev = self.rev(deltabase)
2651 baserev = self.rev(deltabase)
2647
2652
2648 if baserev != nullrev and self.iscensored(baserev):
2653 if baserev != nullrev and self.iscensored(baserev):
2649 # if base is censored, delta must be full replacement in a
2654 # if base is censored, delta must be full replacement in a
2650 # single patch operation
2655 # single patch operation
2651 hlen = struct.calcsize(">lll")
2656 hlen = struct.calcsize(">lll")
2652 oldlen = self.rawsize(baserev)
2657 oldlen = self.rawsize(baserev)
2653 newlen = len(delta) - hlen
2658 newlen = len(delta) - hlen
2654 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2659 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2655 raise error.CensoredBaseError(self.indexfile,
2660 raise error.CensoredBaseError(self.indexfile,
2656 self.node(baserev))
2661 self.node(baserev))
2657
2662
2658 if not flags and self._peek_iscensored(baserev, delta, flush):
2663 if not flags and self._peek_iscensored(baserev, delta, flush):
2659 flags |= REVIDX_ISCENSORED
2664 flags |= REVIDX_ISCENSORED
2660
2665
2661 # We assume consumers of addrevisioncb will want to retrieve
2666 # We assume consumers of addrevisioncb will want to retrieve
2662 # the added revision, which will require a call to
2667 # the added revision, which will require a call to
2663 # revision(). revision() will fast path if there is a cache
2668 # revision(). revision() will fast path if there is a cache
2664 # hit. So, we tell _addrevision() to always cache in this case.
2669 # hit. So, we tell _addrevision() to always cache in this case.
2665 # We're only using addgroup() in the context of changegroup
2670 # We're only using addgroup() in the context of changegroup
2666 # generation so the revision data can always be handled as raw
2671 # generation so the revision data can always be handled as raw
2667 # by the flagprocessor.
2672 # by the flagprocessor.
2668 self._addrevision(node, None, transaction, link,
2673 self._addrevision(node, None, transaction, link,
2669 p1, p2, flags, (baserev, delta),
2674 p1, p2, flags, (baserev, delta),
2670 ifh, dfh,
2675 ifh, dfh,
2671 alwayscache=bool(addrevisioncb),
2676 alwayscache=bool(addrevisioncb),
2672 deltacomputer=deltacomputer)
2677 deltacomputer=deltacomputer)
2673
2678
2674 if addrevisioncb:
2679 if addrevisioncb:
2675 addrevisioncb(self, node)
2680 addrevisioncb(self, node)
2676
2681
2677 if not dfh and not self._inline:
2682 if not dfh and not self._inline:
2678 # addrevision switched from inline to conventional
2683 # addrevision switched from inline to conventional
2679 # reopen the index
2684 # reopen the index
2680 ifh.close()
2685 ifh.close()
2681 dfh = self._datafp("a+")
2686 dfh = self._datafp("a+")
2682 ifh = self._indexfp("a+")
2687 ifh = self._indexfp("a+")
2683 finally:
2688 finally:
2684 if dfh:
2689 if dfh:
2685 dfh.close()
2690 dfh.close()
2686 ifh.close()
2691 ifh.close()
2687
2692
2688 return nodes
2693 return nodes
2689
2694
2690 def iscensored(self, rev):
2695 def iscensored(self, rev):
2691 """Check if a file revision is censored."""
2696 """Check if a file revision is censored."""
2692 if not self._censorable:
2697 if not self._censorable:
2693 return False
2698 return False
2694
2699
2695 return self.flags(rev) & REVIDX_ISCENSORED
2700 return self.flags(rev) & REVIDX_ISCENSORED
2696
2701
2697 def _peek_iscensored(self, baserev, delta, flush):
2702 def _peek_iscensored(self, baserev, delta, flush):
2698 """Quickly check if a delta produces a censored revision."""
2703 """Quickly check if a delta produces a censored revision."""
2699 if not self._censorable:
2704 if not self._censorable:
2700 return False
2705 return False
2701
2706
2702 # Fragile heuristic: unless new file meta keys are added alphabetically
2707 # Fragile heuristic: unless new file meta keys are added alphabetically
2703 # preceding "censored", all censored revisions are prefixed by
2708 # preceding "censored", all censored revisions are prefixed by
2704 # "\1\ncensored:". A delta producing such a censored revision must be a
2709 # "\1\ncensored:". A delta producing such a censored revision must be a
2705 # full-replacement delta, so we inspect the first and only patch in the
2710 # full-replacement delta, so we inspect the first and only patch in the
2706 # delta for this prefix.
2711 # delta for this prefix.
2707 hlen = struct.calcsize(">lll")
2712 hlen = struct.calcsize(">lll")
2708 if len(delta) <= hlen:
2713 if len(delta) <= hlen:
2709 return False
2714 return False
2710
2715
2711 oldlen = self.rawsize(baserev)
2716 oldlen = self.rawsize(baserev)
2712 newlen = len(delta) - hlen
2717 newlen = len(delta) - hlen
2713 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2714 return False
2719 return False
2715
2720
2716 add = "\1\ncensored:"
2721 add = "\1\ncensored:"
2717 addlen = len(add)
2722 addlen = len(add)
2718 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2723 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2719
2724
2720 def getstrippoint(self, minlink):
2725 def getstrippoint(self, minlink):
2721 """find the minimum rev that must be stripped to strip the linkrev
2726 """find the minimum rev that must be stripped to strip the linkrev
2722
2727
2723 Returns a tuple containing the minimum rev and a set of all revs that
2728 Returns a tuple containing the minimum rev and a set of all revs that
2724 have linkrevs that will be broken by this strip.
2729 have linkrevs that will be broken by this strip.
2725 """
2730 """
2726 brokenrevs = set()
2731 brokenrevs = set()
2727 strippoint = len(self)
2732 strippoint = len(self)
2728
2733
2729 heads = {}
2734 heads = {}
2730 futurelargelinkrevs = set()
2735 futurelargelinkrevs = set()
2731 for head in self.headrevs():
2736 for head in self.headrevs():
2732 headlinkrev = self.linkrev(head)
2737 headlinkrev = self.linkrev(head)
2733 heads[head] = headlinkrev
2738 heads[head] = headlinkrev
2734 if headlinkrev >= minlink:
2739 if headlinkrev >= minlink:
2735 futurelargelinkrevs.add(headlinkrev)
2740 futurelargelinkrevs.add(headlinkrev)
2736
2741
2737 # This algorithm involves walking down the rev graph, starting at the
2742 # This algorithm involves walking down the rev graph, starting at the
2738 # heads. Since the revs are topologically sorted according to linkrev,
2743 # heads. Since the revs are topologically sorted according to linkrev,
2739 # once all head linkrevs are below the minlink, we know there are
2744 # once all head linkrevs are below the minlink, we know there are
2740 # no more revs that could have a linkrev greater than minlink.
2745 # no more revs that could have a linkrev greater than minlink.
2741 # So we can stop walking.
2746 # So we can stop walking.
2742 while futurelargelinkrevs:
2747 while futurelargelinkrevs:
2743 strippoint -= 1
2748 strippoint -= 1
2744 linkrev = heads.pop(strippoint)
2749 linkrev = heads.pop(strippoint)
2745
2750
2746 if linkrev < minlink:
2751 if linkrev < minlink:
2747 brokenrevs.add(strippoint)
2752 brokenrevs.add(strippoint)
2748 else:
2753 else:
2749 futurelargelinkrevs.remove(linkrev)
2754 futurelargelinkrevs.remove(linkrev)
2750
2755
2751 for p in self.parentrevs(strippoint):
2756 for p in self.parentrevs(strippoint):
2752 if p != nullrev:
2757 if p != nullrev:
2753 plinkrev = self.linkrev(p)
2758 plinkrev = self.linkrev(p)
2754 heads[p] = plinkrev
2759 heads[p] = plinkrev
2755 if plinkrev >= minlink:
2760 if plinkrev >= minlink:
2756 futurelargelinkrevs.add(plinkrev)
2761 futurelargelinkrevs.add(plinkrev)
2757
2762
2758 return strippoint, brokenrevs
2763 return strippoint, brokenrevs
2759
2764
2760 def strip(self, minlink, transaction):
2765 def strip(self, minlink, transaction):
2761 """truncate the revlog on the first revision with a linkrev >= minlink
2766 """truncate the revlog on the first revision with a linkrev >= minlink
2762
2767
2763 This function is called when we're stripping revision minlink and
2768 This function is called when we're stripping revision minlink and
2764 its descendants from the repository.
2769 its descendants from the repository.
2765
2770
2766 We have to remove all revisions with linkrev >= minlink, because
2771 We have to remove all revisions with linkrev >= minlink, because
2767 the equivalent changelog revisions will be renumbered after the
2772 the equivalent changelog revisions will be renumbered after the
2768 strip.
2773 strip.
2769
2774
2770 So we truncate the revlog on the first of these revisions, and
2775 So we truncate the revlog on the first of these revisions, and
2771 trust that the caller has saved the revisions that shouldn't be
2776 trust that the caller has saved the revisions that shouldn't be
2772 removed and that it'll re-add them after this truncation.
2777 removed and that it'll re-add them after this truncation.
2773 """
2778 """
2774 if len(self) == 0:
2779 if len(self) == 0:
2775 return
2780 return
2776
2781
2777 rev, _ = self.getstrippoint(minlink)
2782 rev, _ = self.getstrippoint(minlink)
2778 if rev == len(self):
2783 if rev == len(self):
2779 return
2784 return
2780
2785
2781 # first truncate the files on disk
2786 # first truncate the files on disk
2782 end = self.start(rev)
2787 end = self.start(rev)
2783 if not self._inline:
2788 if not self._inline:
2784 transaction.add(self.datafile, end)
2789 transaction.add(self.datafile, end)
2785 end = rev * self._io.size
2790 end = rev * self._io.size
2786 else:
2791 else:
2787 end += rev * self._io.size
2792 end += rev * self._io.size
2788
2793
2789 transaction.add(self.indexfile, end)
2794 transaction.add(self.indexfile, end)
2790
2795
2791 # then reset internal state in memory to forget those revisions
2796 # then reset internal state in memory to forget those revisions
2792 self._cache = None
2797 self._cache = None
2793 self._chaininfocache = {}
2798 self._chaininfocache = {}
2794 self._chunkclear()
2799 self._chunkclear()
2795 for x in pycompat.xrange(rev, len(self)):
2800 for x in pycompat.xrange(rev, len(self)):
2796 del self.nodemap[self.node(x)]
2801 del self.nodemap[self.node(x)]
2797
2802
2798 del self.index[rev:-1]
2803 del self.index[rev:-1]
2799 self._nodepos = None
2804 self._nodepos = None
2800
2805
2801 def checksize(self):
2806 def checksize(self):
2802 expected = 0
2807 expected = 0
2803 if len(self):
2808 if len(self):
2804 expected = max(0, self.end(len(self) - 1))
2809 expected = max(0, self.end(len(self) - 1))
2805
2810
2806 try:
2811 try:
2807 with self._datafp() as f:
2812 with self._datafp() as f:
2808 f.seek(0, 2)
2813 f.seek(0, 2)
2809 actual = f.tell()
2814 actual = f.tell()
2810 dd = actual - expected
2815 dd = actual - expected
2811 except IOError as inst:
2816 except IOError as inst:
2812 if inst.errno != errno.ENOENT:
2817 if inst.errno != errno.ENOENT:
2813 raise
2818 raise
2814 dd = 0
2819 dd = 0
2815
2820
2816 try:
2821 try:
2817 f = self.opener(self.indexfile)
2822 f = self.opener(self.indexfile)
2818 f.seek(0, 2)
2823 f.seek(0, 2)
2819 actual = f.tell()
2824 actual = f.tell()
2820 f.close()
2825 f.close()
2821 s = self._io.size
2826 s = self._io.size
2822 i = max(0, actual // s)
2827 i = max(0, actual // s)
2823 di = actual - (i * s)
2828 di = actual - (i * s)
2824 if self._inline:
2829 if self._inline:
2825 databytes = 0
2830 databytes = 0
2826 for r in self:
2831 for r in self:
2827 databytes += max(0, self.length(r))
2832 databytes += max(0, self.length(r))
2828 dd = 0
2833 dd = 0
2829 di = actual - len(self) * s - databytes
2834 di = actual - len(self) * s - databytes
2830 except IOError as inst:
2835 except IOError as inst:
2831 if inst.errno != errno.ENOENT:
2836 if inst.errno != errno.ENOENT:
2832 raise
2837 raise
2833 di = 0
2838 di = 0
2834
2839
2835 return (dd, di)
2840 return (dd, di)
2836
2841
2837 def files(self):
2842 def files(self):
2838 res = [self.indexfile]
2843 res = [self.indexfile]
2839 if not self._inline:
2844 if not self._inline:
2840 res.append(self.datafile)
2845 res.append(self.datafile)
2841 return res
2846 return res
2842
2847
2843 DELTAREUSEALWAYS = 'always'
2848 DELTAREUSEALWAYS = 'always'
2844 DELTAREUSESAMEREVS = 'samerevs'
2849 DELTAREUSESAMEREVS = 'samerevs'
2845 DELTAREUSENEVER = 'never'
2850 DELTAREUSENEVER = 'never'
2846
2851
2847 DELTAREUSEFULLADD = 'fulladd'
2852 DELTAREUSEFULLADD = 'fulladd'
2848
2853
2849 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2854 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2850
2855
2851 def clone(self, tr, destrevlog, addrevisioncb=None,
2856 def clone(self, tr, destrevlog, addrevisioncb=None,
2852 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2857 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2853 """Copy this revlog to another, possibly with format changes.
2858 """Copy this revlog to another, possibly with format changes.
2854
2859
2855 The destination revlog will contain the same revisions and nodes.
2860 The destination revlog will contain the same revisions and nodes.
2856 However, it may not be bit-for-bit identical due to e.g. delta encoding
2861 However, it may not be bit-for-bit identical due to e.g. delta encoding
2857 differences.
2862 differences.
2858
2863
2859 The ``deltareuse`` argument control how deltas from the existing revlog
2864 The ``deltareuse`` argument control how deltas from the existing revlog
2860 are preserved in the destination revlog. The argument can have the
2865 are preserved in the destination revlog. The argument can have the
2861 following values:
2866 following values:
2862
2867
2863 DELTAREUSEALWAYS
2868 DELTAREUSEALWAYS
2864 Deltas will always be reused (if possible), even if the destination
2869 Deltas will always be reused (if possible), even if the destination
2865 revlog would not select the same revisions for the delta. This is the
2870 revlog would not select the same revisions for the delta. This is the
2866 fastest mode of operation.
2871 fastest mode of operation.
2867 DELTAREUSESAMEREVS
2872 DELTAREUSESAMEREVS
2868 Deltas will be reused if the destination revlog would pick the same
2873 Deltas will be reused if the destination revlog would pick the same
2869 revisions for the delta. This mode strikes a balance between speed
2874 revisions for the delta. This mode strikes a balance between speed
2870 and optimization.
2875 and optimization.
2871 DELTAREUSENEVER
2876 DELTAREUSENEVER
2872 Deltas will never be reused. This is the slowest mode of execution.
2877 Deltas will never be reused. This is the slowest mode of execution.
2873 This mode can be used to recompute deltas (e.g. if the diff/delta
2878 This mode can be used to recompute deltas (e.g. if the diff/delta
2874 algorithm changes).
2879 algorithm changes).
2875
2880
2876 Delta computation can be slow, so the choice of delta reuse policy can
2881 Delta computation can be slow, so the choice of delta reuse policy can
2877 significantly affect run time.
2882 significantly affect run time.
2878
2883
2879 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2884 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2880 two extremes. Deltas will be reused if they are appropriate. But if the
2885 two extremes. Deltas will be reused if they are appropriate. But if the
2881 delta could choose a better revision, it will do so. This means if you
2886 delta could choose a better revision, it will do so. This means if you
2882 are converting a non-generaldelta revlog to a generaldelta revlog,
2887 are converting a non-generaldelta revlog to a generaldelta revlog,
2883 deltas will be recomputed if the delta's parent isn't a parent of the
2888 deltas will be recomputed if the delta's parent isn't a parent of the
2884 revision.
2889 revision.
2885
2890
2886 In addition to the delta policy, the ``deltabothparents`` argument
2891 In addition to the delta policy, the ``deltabothparents`` argument
2887 controls whether to compute deltas against both parents for merges.
2892 controls whether to compute deltas against both parents for merges.
2888 By default, the current default is used.
2893 By default, the current default is used.
2889 """
2894 """
2890 if deltareuse not in self.DELTAREUSEALL:
2895 if deltareuse not in self.DELTAREUSEALL:
2891 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2896 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2892
2897
2893 if len(destrevlog):
2898 if len(destrevlog):
2894 raise ValueError(_('destination revlog is not empty'))
2899 raise ValueError(_('destination revlog is not empty'))
2895
2900
2896 if getattr(self, 'filteredrevs', None):
2901 if getattr(self, 'filteredrevs', None):
2897 raise ValueError(_('source revlog has filtered revisions'))
2902 raise ValueError(_('source revlog has filtered revisions'))
2898 if getattr(destrevlog, 'filteredrevs', None):
2903 if getattr(destrevlog, 'filteredrevs', None):
2899 raise ValueError(_('destination revlog has filtered revisions'))
2904 raise ValueError(_('destination revlog has filtered revisions'))
2900
2905
2901 # lazydeltabase controls whether to reuse a cached delta, if possible.
2906 # lazydeltabase controls whether to reuse a cached delta, if possible.
2902 oldlazydeltabase = destrevlog._lazydeltabase
2907 oldlazydeltabase = destrevlog._lazydeltabase
2903 oldamd = destrevlog._deltabothparents
2908 oldamd = destrevlog._deltabothparents
2904
2909
2905 try:
2910 try:
2906 if deltareuse == self.DELTAREUSEALWAYS:
2911 if deltareuse == self.DELTAREUSEALWAYS:
2907 destrevlog._lazydeltabase = True
2912 destrevlog._lazydeltabase = True
2908 elif deltareuse == self.DELTAREUSESAMEREVS:
2913 elif deltareuse == self.DELTAREUSESAMEREVS:
2909 destrevlog._lazydeltabase = False
2914 destrevlog._lazydeltabase = False
2910
2915
2911 destrevlog._deltabothparents = deltabothparents or oldamd
2916 destrevlog._deltabothparents = deltabothparents or oldamd
2912
2917
2913 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2918 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2914 self.DELTAREUSESAMEREVS)
2919 self.DELTAREUSESAMEREVS)
2915
2920
2916 deltacomputer = _deltacomputer(destrevlog)
2921 deltacomputer = _deltacomputer(destrevlog)
2917 index = self.index
2922 index = self.index
2918 for rev in self:
2923 for rev in self:
2919 entry = index[rev]
2924 entry = index[rev]
2920
2925
2921 # Some classes override linkrev to take filtered revs into
2926 # Some classes override linkrev to take filtered revs into
2922 # account. Use raw entry from index.
2927 # account. Use raw entry from index.
2923 flags = entry[0] & 0xffff
2928 flags = entry[0] & 0xffff
2924 linkrev = entry[4]
2929 linkrev = entry[4]
2925 p1 = index[entry[5]][7]
2930 p1 = index[entry[5]][7]
2926 p2 = index[entry[6]][7]
2931 p2 = index[entry[6]][7]
2927 node = entry[7]
2932 node = entry[7]
2928
2933
2929 # (Possibly) reuse the delta from the revlog if allowed and
2934 # (Possibly) reuse the delta from the revlog if allowed and
2930 # the revlog chunk is a delta.
2935 # the revlog chunk is a delta.
2931 cachedelta = None
2936 cachedelta = None
2932 rawtext = None
2937 rawtext = None
2933 if populatecachedelta:
2938 if populatecachedelta:
2934 dp = self.deltaparent(rev)
2939 dp = self.deltaparent(rev)
2935 if dp != nullrev:
2940 if dp != nullrev:
2936 cachedelta = (dp, bytes(self._chunk(rev)))
2941 cachedelta = (dp, bytes(self._chunk(rev)))
2937
2942
2938 if not cachedelta:
2943 if not cachedelta:
2939 rawtext = self.revision(rev, raw=True)
2944 rawtext = self.revision(rev, raw=True)
2940
2945
2941
2946
2942 if deltareuse == self.DELTAREUSEFULLADD:
2947 if deltareuse == self.DELTAREUSEFULLADD:
2943 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2948 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2944 cachedelta=cachedelta,
2949 cachedelta=cachedelta,
2945 node=node, flags=flags,
2950 node=node, flags=flags,
2946 deltacomputer=deltacomputer)
2951 deltacomputer=deltacomputer)
2947 else:
2952 else:
2948 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2953 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2949 checkambig=False)
2954 checkambig=False)
2950 dfh = None
2955 dfh = None
2951 if not destrevlog._inline:
2956 if not destrevlog._inline:
2952 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2957 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2953 try:
2958 try:
2954 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2959 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2955 p2, flags, cachedelta, ifh, dfh,
2960 p2, flags, cachedelta, ifh, dfh,
2956 deltacomputer=deltacomputer)
2961 deltacomputer=deltacomputer)
2957 finally:
2962 finally:
2958 if dfh:
2963 if dfh:
2959 dfh.close()
2964 dfh.close()
2960 ifh.close()
2965 ifh.close()
2961
2966
2962 if addrevisioncb:
2967 if addrevisioncb:
2963 addrevisioncb(self, rev, node)
2968 addrevisioncb(self, rev, node)
2964 finally:
2969 finally:
2965 destrevlog._lazydeltabase = oldlazydeltabase
2970 destrevlog._lazydeltabase = oldlazydeltabase
2966 destrevlog._deltabothparents = oldamd
2971 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now