##// END OF EJS Templates
revlog: do not search for delta for empty content...
Boris Feld -
r39120:dbb3e9e4 default
parent child Browse files
Show More
@@ -1,2995 +1,2998 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # Trim empty revs at the end, but never the very first revision of a chain
266 # Trim empty revs at the end, but never the very first revision of a chain
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 endidx -= 1
268 endidx -= 1
269
269
270 return revs[startidx:endidx]
270 return revs[startidx:endidx]
271
271
272 def _segmentspan(revlog, revs):
272 def _segmentspan(revlog, revs):
273 """Get the byte span of a segment of revisions
273 """Get the byte span of a segment of revisions
274
274
275 revs is a sorted array of revision numbers
275 revs is a sorted array of revision numbers
276
276
277 >>> revlog = _testrevlog([
277 >>> revlog = _testrevlog([
278 ... 5, #0
278 ... 5, #0
279 ... 10, #1
279 ... 10, #1
280 ... 12, #2
280 ... 12, #2
281 ... 12, #3 (empty)
281 ... 12, #3 (empty)
282 ... 17, #4
282 ... 17, #4
283 ... ])
283 ... ])
284
284
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 17
286 17
287 >>> _segmentspan(revlog, [0, 4])
287 >>> _segmentspan(revlog, [0, 4])
288 17
288 17
289 >>> _segmentspan(revlog, [3, 4])
289 >>> _segmentspan(revlog, [3, 4])
290 5
290 5
291 >>> _segmentspan(revlog, [1, 2, 3,])
291 >>> _segmentspan(revlog, [1, 2, 3,])
292 7
292 7
293 >>> _segmentspan(revlog, [1, 3])
293 >>> _segmentspan(revlog, [1, 3])
294 7
294 7
295 """
295 """
296 if not revs:
296 if not revs:
297 return 0
297 return 0
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299
299
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302
302
303 ``revs`` is sliced into groups that should be read in one time.
303 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
304 Assume that revs are sorted.
305
305
306 The initial chunk is sliced until the overall density (payload/chunks-span
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 `revlog._srmingapsize` is skipped.
308 `revlog._srmingapsize` is skipped.
309
309
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 For consistency with other slicing choice, this limit won't go lower than
311 For consistency with other slicing choice, this limit won't go lower than
312 `revlog._srmingapsize`.
312 `revlog._srmingapsize`.
313
313
314 If individual revisions chunk are larger than this limit, they will still
314 If individual revisions chunk are larger than this limit, they will still
315 be raised individually.
315 be raised individually.
316
316
317 >>> revlog = _testrevlog([
317 >>> revlog = _testrevlog([
318 ... 5, #00 (5)
318 ... 5, #00 (5)
319 ... 10, #01 (5)
319 ... 10, #01 (5)
320 ... 12, #02 (2)
320 ... 12, #02 (2)
321 ... 12, #03 (empty)
321 ... 12, #03 (empty)
322 ... 27, #04 (15)
322 ... 27, #04 (15)
323 ... 31, #05 (4)
323 ... 31, #05 (4)
324 ... 31, #06 (empty)
324 ... 31, #06 (empty)
325 ... 42, #07 (11)
325 ... 42, #07 (11)
326 ... 47, #08 (5)
326 ... 47, #08 (5)
327 ... 47, #09 (empty)
327 ... 47, #09 (empty)
328 ... 48, #10 (1)
328 ... 48, #10 (1)
329 ... 51, #11 (3)
329 ... 51, #11 (3)
330 ... 74, #12 (23)
330 ... 74, #12 (23)
331 ... 85, #13 (11)
331 ... 85, #13 (11)
332 ... 86, #14 (1)
332 ... 86, #14 (1)
333 ... 91, #15 (5)
333 ... 91, #15 (5)
334 ... ])
334 ... ])
335
335
336 >>> list(_slicechunk(revlog, list(range(16))))
336 >>> list(_slicechunk(revlog, list(range(16))))
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 >>> list(_slicechunk(revlog, [0, 15]))
338 >>> list(_slicechunk(revlog, [0, 15]))
339 [[0], [15]]
339 [[0], [15]]
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 [[0], [11], [15]]
341 [[0], [11], [15]]
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 [[0], [11, 13, 15]]
343 [[0], [11, 13, 15]]
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 [[1, 2], [5, 8, 10, 11], [14]]
345 [[1, 2], [5, 8, 10, 11], [14]]
346
346
347 Slicing with a maximum chunk size
347 Slicing with a maximum chunk size
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 [[0], [11], [13], [15]]
349 [[0], [11], [13], [15]]
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 [[0], [11], [13, 15]]
351 [[0], [11], [13, 15]]
352 """
352 """
353 if targetsize is not None:
353 if targetsize is not None:
354 targetsize = max(targetsize, revlog._srmingapsize)
354 targetsize = max(targetsize, revlog._srmingapsize)
355 # targetsize should not be specified when evaluating delta candidates:
355 # targetsize should not be specified when evaluating delta candidates:
356 # * targetsize is used to ensure we stay within specification when reading,
356 # * targetsize is used to ensure we stay within specification when reading,
357 # * deltainfo is used to pick are good delta chain when writing.
357 # * deltainfo is used to pick are good delta chain when writing.
358 if not (deltainfo is None or targetsize is None):
358 if not (deltainfo is None or targetsize is None):
359 msg = 'cannot use `targetsize` with a `deltainfo`'
359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 raise error.ProgrammingError(msg)
360 raise error.ProgrammingError(msg)
361 for chunk in _slicechunktodensity(revlog, revs,
361 for chunk in _slicechunktodensity(revlog, revs,
362 deltainfo,
362 deltainfo,
363 revlog._srdensitythreshold,
363 revlog._srdensitythreshold,
364 revlog._srmingapsize):
364 revlog._srmingapsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 yield subchunk
366 yield subchunk
367
367
368 def _slicechunktosize(revlog, revs, targetsize=None):
368 def _slicechunktosize(revlog, revs, targetsize=None):
369 """slice revs to match the target size
369 """slice revs to match the target size
370
370
371 This is intended to be used on chunk that density slicing selected by that
371 This is intended to be used on chunk that density slicing selected by that
372 are still too large compared to the read garantee of revlog. This might
372 are still too large compared to the read garantee of revlog. This might
373 happens when "minimal gap size" interrupted the slicing or when chain are
373 happens when "minimal gap size" interrupted the slicing or when chain are
374 built in a way that create large blocks next to each other.
374 built in a way that create large blocks next to each other.
375
375
376 >>> revlog = _testrevlog([
376 >>> revlog = _testrevlog([
377 ... 3, #0 (3)
377 ... 3, #0 (3)
378 ... 5, #1 (2)
378 ... 5, #1 (2)
379 ... 6, #2 (1)
379 ... 6, #2 (1)
380 ... 8, #3 (2)
380 ... 8, #3 (2)
381 ... 8, #4 (empty)
381 ... 8, #4 (empty)
382 ... 11, #5 (3)
382 ... 11, #5 (3)
383 ... 12, #6 (1)
383 ... 12, #6 (1)
384 ... 13, #7 (1)
384 ... 13, #7 (1)
385 ... 14, #8 (1)
385 ... 14, #8 (1)
386 ... ])
386 ... ])
387
387
388 Cases where chunk is already small enough
388 Cases where chunk is already small enough
389 >>> list(_slicechunktosize(revlog, [0], 3))
389 >>> list(_slicechunktosize(revlog, [0], 3))
390 [[0]]
390 [[0]]
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 [[6, 7]]
392 [[6, 7]]
393 >>> list(_slicechunktosize(revlog, [0], None))
393 >>> list(_slicechunktosize(revlog, [0], None))
394 [[0]]
394 [[0]]
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 [[6, 7]]
396 [[6, 7]]
397
397
398 cases where we need actual slicing
398 cases where we need actual slicing
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 [[0], [1]]
400 [[0], [1]]
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 [[1], [3]]
402 [[1], [3]]
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 [[1, 2], [3]]
404 [[1, 2], [3]]
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 [[3], [5]]
406 [[3], [5]]
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 [[3], [5]]
408 [[3], [5]]
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 [[5], [6, 7, 8]]
410 [[5], [6, 7, 8]]
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413
413
414 Case with too large individual chunk (must return valid chunk)
414 Case with too large individual chunk (must return valid chunk)
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 [[0], [1]]
416 [[0], [1]]
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 [[1], [3]]
418 [[1], [3]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 [[3], [5]]
420 [[3], [5]]
421 """
421 """
422 assert targetsize is None or 0 <= targetsize
422 assert targetsize is None or 0 <= targetsize
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 yield revs
424 yield revs
425 return
425 return
426
426
427 startrevidx = 0
427 startrevidx = 0
428 startdata = revlog.start(revs[0])
428 startdata = revlog.start(revs[0])
429 endrevidx = 0
429 endrevidx = 0
430 iterrevs = enumerate(revs)
430 iterrevs = enumerate(revs)
431 next(iterrevs) # skip first rev.
431 next(iterrevs) # skip first rev.
432 for idx, r in iterrevs:
432 for idx, r in iterrevs:
433 span = revlog.end(r) - startdata
433 span = revlog.end(r) - startdata
434 if span <= targetsize:
434 if span <= targetsize:
435 endrevidx = idx
435 endrevidx = idx
436 else:
436 else:
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440 startrevidx = idx
440 startrevidx = idx
441 startdata = revlog.start(r)
441 startdata = revlog.start(r)
442 endrevidx = idx
442 endrevidx = idx
443 yield _trimchunk(revlog, revs, startrevidx)
443 yield _trimchunk(revlog, revs, startrevidx)
444
444
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 mingapsize=0):
446 mingapsize=0):
447 """slice revs to reduce the amount of unrelated data to be read from disk.
447 """slice revs to reduce the amount of unrelated data to be read from disk.
448
448
449 ``revs`` is sliced into groups that should be read in one time.
449 ``revs`` is sliced into groups that should be read in one time.
450 Assume that revs are sorted.
450 Assume that revs are sorted.
451
451
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 to the top of the revlog.
453 to the top of the revlog.
454
454
455 The initial chunk is sliced until the overall density (payload/chunks-span
455 The initial chunk is sliced until the overall density (payload/chunks-span
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 skipped.
457 skipped.
458
458
459 >>> revlog = _testrevlog([
459 >>> revlog = _testrevlog([
460 ... 5, #00 (5)
460 ... 5, #00 (5)
461 ... 10, #01 (5)
461 ... 10, #01 (5)
462 ... 12, #02 (2)
462 ... 12, #02 (2)
463 ... 12, #03 (empty)
463 ... 12, #03 (empty)
464 ... 27, #04 (15)
464 ... 27, #04 (15)
465 ... 31, #05 (4)
465 ... 31, #05 (4)
466 ... 31, #06 (empty)
466 ... 31, #06 (empty)
467 ... 42, #07 (11)
467 ... 42, #07 (11)
468 ... 47, #08 (5)
468 ... 47, #08 (5)
469 ... 47, #09 (empty)
469 ... 47, #09 (empty)
470 ... 48, #10 (1)
470 ... 48, #10 (1)
471 ... 51, #11 (3)
471 ... 51, #11 (3)
472 ... 74, #12 (23)
472 ... 74, #12 (23)
473 ... 85, #13 (11)
473 ... 85, #13 (11)
474 ... 86, #14 (1)
474 ... 86, #14 (1)
475 ... 91, #15 (5)
475 ... 91, #15 (5)
476 ... ])
476 ... ])
477
477
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 [[0], [15]]
481 [[0], [15]]
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 [[0], [11], [15]]
483 [[0], [11], [15]]
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 [[0], [11, 13, 15]]
485 [[0], [11, 13, 15]]
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 [[1, 2], [5, 8, 10, 11], [14]]
487 [[1, 2], [5, 8, 10, 11], [14]]
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 ... mingapsize=20))
489 ... mingapsize=20))
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 ... targetdensity=0.95))
492 ... targetdensity=0.95))
493 [[1, 2], [5], [8, 10, 11], [14]]
493 [[1, 2], [5], [8, 10, 11], [14]]
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 ... targetdensity=0.95, mingapsize=12))
495 ... targetdensity=0.95, mingapsize=12))
496 [[1, 2], [5, 8, 10, 11], [14]]
496 [[1, 2], [5, 8, 10, 11], [14]]
497 """
497 """
498 start = revlog.start
498 start = revlog.start
499 length = revlog.length
499 length = revlog.length
500
500
501 if len(revs) <= 1:
501 if len(revs) <= 1:
502 yield revs
502 yield revs
503 return
503 return
504
504
505 nextrev = len(revlog)
505 nextrev = len(revlog)
506 nextoffset = revlog.end(nextrev - 1)
506 nextoffset = revlog.end(nextrev - 1)
507
507
508 if deltainfo is None:
508 if deltainfo is None:
509 deltachainspan = _segmentspan(revlog, revs)
509 deltachainspan = _segmentspan(revlog, revs)
510 chainpayload = sum(length(r) for r in revs)
510 chainpayload = sum(length(r) for r in revs)
511 else:
511 else:
512 deltachainspan = deltainfo.distance
512 deltachainspan = deltainfo.distance
513 chainpayload = deltainfo.compresseddeltalen
513 chainpayload = deltainfo.compresseddeltalen
514
514
515 if deltachainspan < mingapsize:
515 if deltachainspan < mingapsize:
516 yield revs
516 yield revs
517 return
517 return
518
518
519 readdata = deltachainspan
519 readdata = deltachainspan
520
520
521 if deltachainspan:
521 if deltachainspan:
522 density = chainpayload / float(deltachainspan)
522 density = chainpayload / float(deltachainspan)
523 else:
523 else:
524 density = 1.0
524 density = 1.0
525
525
526 if density >= targetdensity:
526 if density >= targetdensity:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 if deltainfo is not None:
530 if deltainfo is not None:
531 revs = list(revs)
531 revs = list(revs)
532 revs.append(nextrev)
532 revs.append(nextrev)
533
533
534 # Store the gaps in a heap to have them sorted by decreasing size
534 # Store the gaps in a heap to have them sorted by decreasing size
535 gapsheap = []
535 gapsheap = []
536 heapq.heapify(gapsheap)
536 heapq.heapify(gapsheap)
537 prevend = None
537 prevend = None
538 for i, rev in enumerate(revs):
538 for i, rev in enumerate(revs):
539 if rev < nextrev:
539 if rev < nextrev:
540 revstart = start(rev)
540 revstart = start(rev)
541 revlen = length(rev)
541 revlen = length(rev)
542 else:
542 else:
543 revstart = nextoffset
543 revstart = nextoffset
544 revlen = deltainfo.deltalen
544 revlen = deltainfo.deltalen
545
545
546 # Skip empty revisions to form larger holes
546 # Skip empty revisions to form larger holes
547 if revlen == 0:
547 if revlen == 0:
548 continue
548 continue
549
549
550 if prevend is not None:
550 if prevend is not None:
551 gapsize = revstart - prevend
551 gapsize = revstart - prevend
552 # only consider holes that are large enough
552 # only consider holes that are large enough
553 if gapsize > mingapsize:
553 if gapsize > mingapsize:
554 heapq.heappush(gapsheap, (-gapsize, i))
554 heapq.heappush(gapsheap, (-gapsize, i))
555
555
556 prevend = revstart + revlen
556 prevend = revstart + revlen
557
557
558 # Collect the indices of the largest holes until the density is acceptable
558 # Collect the indices of the largest holes until the density is acceptable
559 indicesheap = []
559 indicesheap = []
560 heapq.heapify(indicesheap)
560 heapq.heapify(indicesheap)
561 while gapsheap and density < targetdensity:
561 while gapsheap and density < targetdensity:
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563
563
564 heapq.heappush(indicesheap, gapidx)
564 heapq.heappush(indicesheap, gapidx)
565
565
566 # the gap sizes are stored as negatives to be sorted decreasingly
566 # the gap sizes are stored as negatives to be sorted decreasingly
567 # by the heap
567 # by the heap
568 readdata -= (-oppgapsize)
568 readdata -= (-oppgapsize)
569 if readdata > 0:
569 if readdata > 0:
570 density = chainpayload / float(readdata)
570 density = chainpayload / float(readdata)
571 else:
571 else:
572 density = 1.0
572 density = 1.0
573
573
574 # Cut the revs at collected indices
574 # Cut the revs at collected indices
575 previdx = 0
575 previdx = 0
576 while indicesheap:
576 while indicesheap:
577 idx = heapq.heappop(indicesheap)
577 idx = heapq.heappop(indicesheap)
578
578
579 chunk = _trimchunk(revlog, revs, previdx, idx)
579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 if chunk:
580 if chunk:
581 yield chunk
581 yield chunk
582
582
583 previdx = idx
583 previdx = idx
584
584
585 chunk = _trimchunk(revlog, revs, previdx)
585 chunk = _trimchunk(revlog, revs, previdx)
586 if chunk:
586 if chunk:
587 yield chunk
587 yield chunk
588
588
589 @attr.s(slots=True, frozen=True)
589 @attr.s(slots=True, frozen=True)
590 class _deltainfo(object):
590 class _deltainfo(object):
591 distance = attr.ib()
591 distance = attr.ib()
592 deltalen = attr.ib()
592 deltalen = attr.ib()
593 data = attr.ib()
593 data = attr.ib()
594 base = attr.ib()
594 base = attr.ib()
595 chainbase = attr.ib()
595 chainbase = attr.ib()
596 chainlen = attr.ib()
596 chainlen = attr.ib()
597 compresseddeltalen = attr.ib()
597 compresseddeltalen = attr.ib()
598
598
599 class _deltacomputer(object):
599 class _deltacomputer(object):
600 def __init__(self, revlog):
600 def __init__(self, revlog):
601 self.revlog = revlog
601 self.revlog = revlog
602
602
603 def _getcandidaterevs(self, p1, p2, cachedelta):
603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 """
604 """
605 Provides revisions that present an interest to be diffed against,
605 Provides revisions that present an interest to be diffed against,
606 grouped by level of easiness.
606 grouped by level of easiness.
607 """
607 """
608 revlog = self.revlog
608 revlog = self.revlog
609 gdelta = revlog._generaldelta
609 gdelta = revlog._generaldelta
610 curr = len(revlog)
610 curr = len(revlog)
611 prev = curr - 1
611 prev = curr - 1
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613
613
614 # should we try to build a delta?
614 # should we try to build a delta?
615 if prev != nullrev and revlog.storedeltachains:
615 if prev != nullrev and revlog.storedeltachains:
616 tested = set()
616 tested = set()
617 # This condition is true most of the time when processing
617 # This condition is true most of the time when processing
618 # changegroup data into a generaldelta repo. The only time it
618 # changegroup data into a generaldelta repo. The only time it
619 # isn't true is if this is the first revision in a delta chain
619 # isn't true is if this is the first revision in a delta chain
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 if cachedelta and gdelta and revlog._lazydeltabase:
621 if cachedelta and gdelta and revlog._lazydeltabase:
622 # Assume what we received from the server is a good choice
622 # Assume what we received from the server is a good choice
623 # build delta will reuse the cache
623 # build delta will reuse the cache
624 yield (cachedelta[0],)
624 yield (cachedelta[0],)
625 tested.add(cachedelta[0])
625 tested.add(cachedelta[0])
626
626
627 if gdelta:
627 if gdelta:
628 # exclude already lazy tested base if any
628 # exclude already lazy tested base if any
629 parents = [p for p in (p1r, p2r)
629 parents = [p for p in (p1r, p2r)
630 if p != nullrev and p not in tested]
630 if p != nullrev and p not in tested]
631
631
632 if not revlog._deltabothparents and len(parents) == 2:
632 if not revlog._deltabothparents and len(parents) == 2:
633 parents.sort()
633 parents.sort()
634 # To minimize the chance of having to build a fulltext,
634 # To minimize the chance of having to build a fulltext,
635 # pick first whichever parent is closest to us (max rev)
635 # pick first whichever parent is closest to us (max rev)
636 yield (parents[1],)
636 yield (parents[1],)
637 # then the other one (min rev) if the first did not fit
637 # then the other one (min rev) if the first did not fit
638 yield (parents[0],)
638 yield (parents[0],)
639 tested.update(parents)
639 tested.update(parents)
640 elif len(parents) > 0:
640 elif len(parents) > 0:
641 # Test all parents (1 or 2), and keep the best candidate
641 # Test all parents (1 or 2), and keep the best candidate
642 yield parents
642 yield parents
643 tested.update(parents)
643 tested.update(parents)
644
644
645 if prev not in tested:
645 if prev not in tested:
646 # other approach failed try against prev to hopefully save us a
646 # other approach failed try against prev to hopefully save us a
647 # fulltext.
647 # fulltext.
648 yield (prev,)
648 yield (prev,)
649 tested.add(prev)
649 tested.add(prev)
650
650
651 def buildtext(self, revinfo, fh):
651 def buildtext(self, revinfo, fh):
652 """Builds a fulltext version of a revision
652 """Builds a fulltext version of a revision
653
653
654 revinfo: _revisioninfo instance that contains all needed info
654 revinfo: _revisioninfo instance that contains all needed info
655 fh: file handle to either the .i or the .d revlog file,
655 fh: file handle to either the .i or the .d revlog file,
656 depending on whether it is inlined or not
656 depending on whether it is inlined or not
657 """
657 """
658 btext = revinfo.btext
658 btext = revinfo.btext
659 if btext[0] is not None:
659 if btext[0] is not None:
660 return btext[0]
660 return btext[0]
661
661
662 revlog = self.revlog
662 revlog = self.revlog
663 cachedelta = revinfo.cachedelta
663 cachedelta = revinfo.cachedelta
664 flags = revinfo.flags
664 flags = revinfo.flags
665 node = revinfo.node
665 node = revinfo.node
666
666
667 baserev = cachedelta[0]
667 baserev = cachedelta[0]
668 delta = cachedelta[1]
668 delta = cachedelta[1]
669 # special case deltas which replace entire base; no need to decode
669 # special case deltas which replace entire base; no need to decode
670 # base revision. this neatly avoids censored bases, which throw when
670 # base revision. this neatly avoids censored bases, which throw when
671 # they're decoded.
671 # they're decoded.
672 hlen = struct.calcsize(">lll")
672 hlen = struct.calcsize(">lll")
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 len(delta) - hlen):
674 len(delta) - hlen):
675 btext[0] = delta[hlen:]
675 btext[0] = delta[hlen:]
676 else:
676 else:
677 # deltabase is rawtext before changed by flag processors, which is
677 # deltabase is rawtext before changed by flag processors, which is
678 # equivalent to non-raw text
678 # equivalent to non-raw text
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 btext[0] = mdiff.patch(basetext, delta)
680 btext[0] = mdiff.patch(basetext, delta)
681
681
682 try:
682 try:
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 btext[0], validatehash = res
684 btext[0], validatehash = res
685 if validatehash:
685 if validatehash:
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 if flags & REVIDX_ISCENSORED:
687 if flags & REVIDX_ISCENSORED:
688 raise RevlogError(_('node %s is not censored') % node)
688 raise RevlogError(_('node %s is not censored') % node)
689 except CensoredNodeError:
689 except CensoredNodeError:
690 # must pass the censored index flag to add censored revisions
690 # must pass the censored index flag to add censored revisions
691 if not flags & REVIDX_ISCENSORED:
691 if not flags & REVIDX_ISCENSORED:
692 raise
692 raise
693 return btext[0]
693 return btext[0]
694
694
695 def _builddeltadiff(self, base, revinfo, fh):
695 def _builddeltadiff(self, base, revinfo, fh):
696 revlog = self.revlog
696 revlog = self.revlog
697 t = self.buildtext(revinfo, fh)
697 t = self.buildtext(revinfo, fh)
698 if revlog.iscensored(base):
698 if revlog.iscensored(base):
699 # deltas based on a censored revision must replace the
699 # deltas based on a censored revision must replace the
700 # full content in one patch, so delta works everywhere
700 # full content in one patch, so delta works everywhere
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 delta = header + t
702 delta = header + t
703 else:
703 else:
704 ptext = revlog.revision(base, _df=fh, raw=True)
704 ptext = revlog.revision(base, _df=fh, raw=True)
705 delta = mdiff.textdiff(ptext, t)
705 delta = mdiff.textdiff(ptext, t)
706
706
707 return delta
707 return delta
708
708
709 def _builddeltainfo(self, revinfo, base, fh):
709 def _builddeltainfo(self, revinfo, base, fh):
710 # can we use the cached delta?
710 # can we use the cached delta?
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 delta = revinfo.cachedelta[1]
712 delta = revinfo.cachedelta[1]
713 else:
713 else:
714 delta = self._builddeltadiff(base, revinfo, fh)
714 delta = self._builddeltadiff(base, revinfo, fh)
715 revlog = self.revlog
715 revlog = self.revlog
716 header, data = revlog.compress(delta)
716 header, data = revlog.compress(delta)
717 deltalen = len(header) + len(data)
717 deltalen = len(header) + len(data)
718 chainbase = revlog.chainbase(base)
718 chainbase = revlog.chainbase(base)
719 offset = revlog.end(len(revlog) - 1)
719 offset = revlog.end(len(revlog) - 1)
720 dist = deltalen + offset - revlog.start(chainbase)
720 dist = deltalen + offset - revlog.start(chainbase)
721 if revlog._generaldelta:
721 if revlog._generaldelta:
722 deltabase = base
722 deltabase = base
723 else:
723 else:
724 deltabase = chainbase
724 deltabase = chainbase
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 chainlen += 1
726 chainlen += 1
727 compresseddeltalen += deltalen
727 compresseddeltalen += deltalen
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 chainbase, chainlen, compresseddeltalen)
729 chainbase, chainlen, compresseddeltalen)
730
730
731 def finddeltainfo(self, revinfo, fh):
731 def finddeltainfo(self, revinfo, fh):
732 """Find an acceptable delta against a candidate revision
732 """Find an acceptable delta against a candidate revision
733
733
734 revinfo: information about the revision (instance of _revisioninfo)
734 revinfo: information about the revision (instance of _revisioninfo)
735 fh: file handle to either the .i or the .d revlog file,
735 fh: file handle to either the .i or the .d revlog file,
736 depending on whether it is inlined or not
736 depending on whether it is inlined or not
737
737
738 Returns the first acceptable candidate revision, as ordered by
738 Returns the first acceptable candidate revision, as ordered by
739 _getcandidaterevs
739 _getcandidaterevs
740 """
740 """
741 if not revinfo.textlen:
742 return None # empty file do not need delta
743
741 cachedelta = revinfo.cachedelta
744 cachedelta = revinfo.cachedelta
742 p1 = revinfo.p1
745 p1 = revinfo.p1
743 p2 = revinfo.p2
746 p2 = revinfo.p2
744 revlog = self.revlog
747 revlog = self.revlog
745
748
746 deltalength = self.revlog.length
749 deltalength = self.revlog.length
747 deltaparent = self.revlog.deltaparent
750 deltaparent = self.revlog.deltaparent
748
751
749 deltainfo = None
752 deltainfo = None
750 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
753 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
751 nominateddeltas = []
754 nominateddeltas = []
752 for candidaterev in candidaterevs:
755 for candidaterev in candidaterevs:
753 # skip over empty delta (no need to include them in a chain)
756 # skip over empty delta (no need to include them in a chain)
754 while candidaterev != nullrev and not deltalength(candidaterev):
757 while candidaterev != nullrev and not deltalength(candidaterev):
755 candidaterev = deltaparent(candidaterev)
758 candidaterev = deltaparent(candidaterev)
756 # no need to try a delta against nullid, this will be handled
759 # no need to try a delta against nullid, this will be handled
757 # by fulltext later.
760 # by fulltext later.
758 if candidaterev == nullrev:
761 if candidaterev == nullrev:
759 continue
762 continue
760 # no delta for rawtext-changing revs (see "candelta" for why)
763 # no delta for rawtext-changing revs (see "candelta" for why)
761 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
764 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
762 continue
765 continue
763 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
766 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
764 if revlog._isgooddeltainfo(candidatedelta, revinfo):
767 if revlog._isgooddeltainfo(candidatedelta, revinfo):
765 nominateddeltas.append(candidatedelta)
768 nominateddeltas.append(candidatedelta)
766 if nominateddeltas:
769 if nominateddeltas:
767 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
770 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
768 break
771 break
769
772
770 return deltainfo
773 return deltainfo
771
774
772 @attr.s(slots=True, frozen=True)
775 @attr.s(slots=True, frozen=True)
773 class _revisioninfo(object):
776 class _revisioninfo(object):
774 """Information about a revision that allows building its fulltext
777 """Information about a revision that allows building its fulltext
775 node: expected hash of the revision
778 node: expected hash of the revision
776 p1, p2: parent revs of the revision
779 p1, p2: parent revs of the revision
777 btext: built text cache consisting of a one-element list
780 btext: built text cache consisting of a one-element list
778 cachedelta: (baserev, uncompressed_delta) or None
781 cachedelta: (baserev, uncompressed_delta) or None
779 flags: flags associated to the revision storage
782 flags: flags associated to the revision storage
780
783
781 One of btext[0] or cachedelta must be set.
784 One of btext[0] or cachedelta must be set.
782 """
785 """
783 node = attr.ib()
786 node = attr.ib()
784 p1 = attr.ib()
787 p1 = attr.ib()
785 p2 = attr.ib()
788 p2 = attr.ib()
786 btext = attr.ib()
789 btext = attr.ib()
787 textlen = attr.ib()
790 textlen = attr.ib()
788 cachedelta = attr.ib()
791 cachedelta = attr.ib()
789 flags = attr.ib()
792 flags = attr.ib()
790
793
791 # index v0:
794 # index v0:
792 # 4 bytes: offset
795 # 4 bytes: offset
793 # 4 bytes: compressed length
796 # 4 bytes: compressed length
794 # 4 bytes: base rev
797 # 4 bytes: base rev
795 # 4 bytes: link rev
798 # 4 bytes: link rev
796 # 20 bytes: parent 1 nodeid
799 # 20 bytes: parent 1 nodeid
797 # 20 bytes: parent 2 nodeid
800 # 20 bytes: parent 2 nodeid
798 # 20 bytes: nodeid
801 # 20 bytes: nodeid
799 indexformatv0 = struct.Struct(">4l20s20s20s")
802 indexformatv0 = struct.Struct(">4l20s20s20s")
800 indexformatv0_pack = indexformatv0.pack
803 indexformatv0_pack = indexformatv0.pack
801 indexformatv0_unpack = indexformatv0.unpack
804 indexformatv0_unpack = indexformatv0.unpack
802
805
803 class revlogoldindex(list):
806 class revlogoldindex(list):
804 def __getitem__(self, i):
807 def __getitem__(self, i):
805 if i == -1:
808 if i == -1:
806 return (0, 0, 0, -1, -1, -1, -1, nullid)
809 return (0, 0, 0, -1, -1, -1, -1, nullid)
807 return list.__getitem__(self, i)
810 return list.__getitem__(self, i)
808
811
809 class revlogoldio(object):
812 class revlogoldio(object):
810 def __init__(self):
813 def __init__(self):
811 self.size = indexformatv0.size
814 self.size = indexformatv0.size
812
815
813 def parseindex(self, data, inline):
816 def parseindex(self, data, inline):
814 s = self.size
817 s = self.size
815 index = []
818 index = []
816 nodemap = {nullid: nullrev}
819 nodemap = {nullid: nullrev}
817 n = off = 0
820 n = off = 0
818 l = len(data)
821 l = len(data)
819 while off + s <= l:
822 while off + s <= l:
820 cur = data[off:off + s]
823 cur = data[off:off + s]
821 off += s
824 off += s
822 e = indexformatv0_unpack(cur)
825 e = indexformatv0_unpack(cur)
823 # transform to revlogv1 format
826 # transform to revlogv1 format
824 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
827 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
825 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
828 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
826 index.append(e2)
829 index.append(e2)
827 nodemap[e[6]] = n
830 nodemap[e[6]] = n
828 n += 1
831 n += 1
829
832
830 return revlogoldindex(index), nodemap, None
833 return revlogoldindex(index), nodemap, None
831
834
832 def packentry(self, entry, node, version, rev):
835 def packentry(self, entry, node, version, rev):
833 if gettype(entry[0]):
836 if gettype(entry[0]):
834 raise RevlogError(_('index entry flags need revlog version 1'))
837 raise RevlogError(_('index entry flags need revlog version 1'))
835 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
838 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
836 node(entry[5]), node(entry[6]), entry[7])
839 node(entry[5]), node(entry[6]), entry[7])
837 return indexformatv0_pack(*e2)
840 return indexformatv0_pack(*e2)
838
841
839 # index ng:
842 # index ng:
840 # 6 bytes: offset
843 # 6 bytes: offset
841 # 2 bytes: flags
844 # 2 bytes: flags
842 # 4 bytes: compressed length
845 # 4 bytes: compressed length
843 # 4 bytes: uncompressed length
846 # 4 bytes: uncompressed length
844 # 4 bytes: base rev
847 # 4 bytes: base rev
845 # 4 bytes: link rev
848 # 4 bytes: link rev
846 # 4 bytes: parent 1 rev
849 # 4 bytes: parent 1 rev
847 # 4 bytes: parent 2 rev
850 # 4 bytes: parent 2 rev
848 # 32 bytes: nodeid
851 # 32 bytes: nodeid
849 indexformatng = struct.Struct(">Qiiiiii20s12x")
852 indexformatng = struct.Struct(">Qiiiiii20s12x")
850 indexformatng_pack = indexformatng.pack
853 indexformatng_pack = indexformatng.pack
851 versionformat = struct.Struct(">I")
854 versionformat = struct.Struct(">I")
852 versionformat_pack = versionformat.pack
855 versionformat_pack = versionformat.pack
853 versionformat_unpack = versionformat.unpack
856 versionformat_unpack = versionformat.unpack
854
857
855 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
858 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
856 # signed integer)
859 # signed integer)
857 _maxentrysize = 0x7fffffff
860 _maxentrysize = 0x7fffffff
858
861
859 class revlogio(object):
862 class revlogio(object):
860 def __init__(self):
863 def __init__(self):
861 self.size = indexformatng.size
864 self.size = indexformatng.size
862
865
863 def parseindex(self, data, inline):
866 def parseindex(self, data, inline):
864 # call the C implementation to parse the index data
867 # call the C implementation to parse the index data
865 index, cache = parsers.parse_index2(data, inline)
868 index, cache = parsers.parse_index2(data, inline)
866 return index, getattr(index, 'nodemap', None), cache
869 return index, getattr(index, 'nodemap', None), cache
867
870
868 def packentry(self, entry, node, version, rev):
871 def packentry(self, entry, node, version, rev):
869 p = indexformatng_pack(*entry)
872 p = indexformatng_pack(*entry)
870 if rev == 0:
873 if rev == 0:
871 p = versionformat_pack(version) + p[4:]
874 p = versionformat_pack(version) + p[4:]
872 return p
875 return p
873
876
874 class revlog(object):
877 class revlog(object):
875 """
878 """
876 the underlying revision storage object
879 the underlying revision storage object
877
880
878 A revlog consists of two parts, an index and the revision data.
881 A revlog consists of two parts, an index and the revision data.
879
882
880 The index is a file with a fixed record size containing
883 The index is a file with a fixed record size containing
881 information on each revision, including its nodeid (hash), the
884 information on each revision, including its nodeid (hash), the
882 nodeids of its parents, the position and offset of its data within
885 nodeids of its parents, the position and offset of its data within
883 the data file, and the revision it's based on. Finally, each entry
886 the data file, and the revision it's based on. Finally, each entry
884 contains a linkrev entry that can serve as a pointer to external
887 contains a linkrev entry that can serve as a pointer to external
885 data.
888 data.
886
889
887 The revision data itself is a linear collection of data chunks.
890 The revision data itself is a linear collection of data chunks.
888 Each chunk represents a revision and is usually represented as a
891 Each chunk represents a revision and is usually represented as a
889 delta against the previous chunk. To bound lookup time, runs of
892 delta against the previous chunk. To bound lookup time, runs of
890 deltas are limited to about 2 times the length of the original
893 deltas are limited to about 2 times the length of the original
891 version data. This makes retrieval of a version proportional to
894 version data. This makes retrieval of a version proportional to
892 its size, or O(1) relative to the number of revisions.
895 its size, or O(1) relative to the number of revisions.
893
896
894 Both pieces of the revlog are written to in an append-only
897 Both pieces of the revlog are written to in an append-only
895 fashion, which means we never need to rewrite a file to insert or
898 fashion, which means we never need to rewrite a file to insert or
896 remove data, and can use some simple techniques to avoid the need
899 remove data, and can use some simple techniques to avoid the need
897 for locking while reading.
900 for locking while reading.
898
901
899 If checkambig, indexfile is opened with checkambig=True at
902 If checkambig, indexfile is opened with checkambig=True at
900 writing, to avoid file stat ambiguity.
903 writing, to avoid file stat ambiguity.
901
904
902 If mmaplargeindex is True, and an mmapindexthreshold is set, the
905 If mmaplargeindex is True, and an mmapindexthreshold is set, the
903 index will be mmapped rather than read if it is larger than the
906 index will be mmapped rather than read if it is larger than the
904 configured threshold.
907 configured threshold.
905
908
906 If censorable is True, the revlog can have censored revisions.
909 If censorable is True, the revlog can have censored revisions.
907 """
910 """
908 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
911 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
909 mmaplargeindex=False, censorable=False):
912 mmaplargeindex=False, censorable=False):
910 """
913 """
911 create a revlog object
914 create a revlog object
912
915
913 opener is a function that abstracts the file opening operation
916 opener is a function that abstracts the file opening operation
914 and can be used to implement COW semantics or the like.
917 and can be used to implement COW semantics or the like.
915 """
918 """
916 self.indexfile = indexfile
919 self.indexfile = indexfile
917 self.datafile = datafile or (indexfile[:-2] + ".d")
920 self.datafile = datafile or (indexfile[:-2] + ".d")
918 self.opener = opener
921 self.opener = opener
919 # When True, indexfile is opened with checkambig=True at writing, to
922 # When True, indexfile is opened with checkambig=True at writing, to
920 # avoid file stat ambiguity.
923 # avoid file stat ambiguity.
921 self._checkambig = checkambig
924 self._checkambig = checkambig
922 self._censorable = censorable
925 self._censorable = censorable
923 # 3-tuple of (node, rev, text) for a raw revision.
926 # 3-tuple of (node, rev, text) for a raw revision.
924 self._cache = None
927 self._cache = None
925 # Maps rev to chain base rev.
928 # Maps rev to chain base rev.
926 self._chainbasecache = util.lrucachedict(100)
929 self._chainbasecache = util.lrucachedict(100)
927 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
930 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
928 self._chunkcache = (0, '')
931 self._chunkcache = (0, '')
929 # How much data to read and cache into the raw revlog data cache.
932 # How much data to read and cache into the raw revlog data cache.
930 self._chunkcachesize = 65536
933 self._chunkcachesize = 65536
931 self._maxchainlen = None
934 self._maxchainlen = None
932 self._deltabothparents = True
935 self._deltabothparents = True
933 self.index = []
936 self.index = []
934 # Mapping of partial identifiers to full nodes.
937 # Mapping of partial identifiers to full nodes.
935 self._pcache = {}
938 self._pcache = {}
936 # Mapping of revision integer to full node.
939 # Mapping of revision integer to full node.
937 self._nodecache = {nullid: nullrev}
940 self._nodecache = {nullid: nullrev}
938 self._nodepos = None
941 self._nodepos = None
939 self._compengine = 'zlib'
942 self._compengine = 'zlib'
940 self._maxdeltachainspan = -1
943 self._maxdeltachainspan = -1
941 self._withsparseread = False
944 self._withsparseread = False
942 self._sparserevlog = False
945 self._sparserevlog = False
943 self._srdensitythreshold = 0.50
946 self._srdensitythreshold = 0.50
944 self._srmingapsize = 262144
947 self._srmingapsize = 262144
945
948
946 mmapindexthreshold = None
949 mmapindexthreshold = None
947 v = REVLOG_DEFAULT_VERSION
950 v = REVLOG_DEFAULT_VERSION
948 opts = getattr(opener, 'options', None)
951 opts = getattr(opener, 'options', None)
949 if opts is not None:
952 if opts is not None:
950 if 'revlogv2' in opts:
953 if 'revlogv2' in opts:
951 # version 2 revlogs always use generaldelta.
954 # version 2 revlogs always use generaldelta.
952 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
955 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
953 elif 'revlogv1' in opts:
956 elif 'revlogv1' in opts:
954 if 'generaldelta' in opts:
957 if 'generaldelta' in opts:
955 v |= FLAG_GENERALDELTA
958 v |= FLAG_GENERALDELTA
956 else:
959 else:
957 v = 0
960 v = 0
958 if 'chunkcachesize' in opts:
961 if 'chunkcachesize' in opts:
959 self._chunkcachesize = opts['chunkcachesize']
962 self._chunkcachesize = opts['chunkcachesize']
960 if 'maxchainlen' in opts:
963 if 'maxchainlen' in opts:
961 self._maxchainlen = opts['maxchainlen']
964 self._maxchainlen = opts['maxchainlen']
962 if 'deltabothparents' in opts:
965 if 'deltabothparents' in opts:
963 self._deltabothparents = opts['deltabothparents']
966 self._deltabothparents = opts['deltabothparents']
964 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
967 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
965 if 'compengine' in opts:
968 if 'compengine' in opts:
966 self._compengine = opts['compengine']
969 self._compengine = opts['compengine']
967 if 'maxdeltachainspan' in opts:
970 if 'maxdeltachainspan' in opts:
968 self._maxdeltachainspan = opts['maxdeltachainspan']
971 self._maxdeltachainspan = opts['maxdeltachainspan']
969 if mmaplargeindex and 'mmapindexthreshold' in opts:
972 if mmaplargeindex and 'mmapindexthreshold' in opts:
970 mmapindexthreshold = opts['mmapindexthreshold']
973 mmapindexthreshold = opts['mmapindexthreshold']
971 self._sparserevlog = bool(opts.get('sparse-revlog', False))
974 self._sparserevlog = bool(opts.get('sparse-revlog', False))
972 withsparseread = bool(opts.get('with-sparse-read', False))
975 withsparseread = bool(opts.get('with-sparse-read', False))
973 # sparse-revlog forces sparse-read
976 # sparse-revlog forces sparse-read
974 self._withsparseread = self._sparserevlog or withsparseread
977 self._withsparseread = self._sparserevlog or withsparseread
975 if 'sparse-read-density-threshold' in opts:
978 if 'sparse-read-density-threshold' in opts:
976 self._srdensitythreshold = opts['sparse-read-density-threshold']
979 self._srdensitythreshold = opts['sparse-read-density-threshold']
977 if 'sparse-read-min-gap-size' in opts:
980 if 'sparse-read-min-gap-size' in opts:
978 self._srmingapsize = opts['sparse-read-min-gap-size']
981 self._srmingapsize = opts['sparse-read-min-gap-size']
979
982
980 if self._chunkcachesize <= 0:
983 if self._chunkcachesize <= 0:
981 raise RevlogError(_('revlog chunk cache size %r is not greater '
984 raise RevlogError(_('revlog chunk cache size %r is not greater '
982 'than 0') % self._chunkcachesize)
985 'than 0') % self._chunkcachesize)
983 elif self._chunkcachesize & (self._chunkcachesize - 1):
986 elif self._chunkcachesize & (self._chunkcachesize - 1):
984 raise RevlogError(_('revlog chunk cache size %r is not a power '
987 raise RevlogError(_('revlog chunk cache size %r is not a power '
985 'of 2') % self._chunkcachesize)
988 'of 2') % self._chunkcachesize)
986
989
987 indexdata = ''
990 indexdata = ''
988 self._initempty = True
991 self._initempty = True
989 try:
992 try:
990 with self._indexfp() as f:
993 with self._indexfp() as f:
991 if (mmapindexthreshold is not None and
994 if (mmapindexthreshold is not None and
992 self.opener.fstat(f).st_size >= mmapindexthreshold):
995 self.opener.fstat(f).st_size >= mmapindexthreshold):
993 indexdata = util.buffer(util.mmapread(f))
996 indexdata = util.buffer(util.mmapread(f))
994 else:
997 else:
995 indexdata = f.read()
998 indexdata = f.read()
996 if len(indexdata) > 0:
999 if len(indexdata) > 0:
997 v = versionformat_unpack(indexdata[:4])[0]
1000 v = versionformat_unpack(indexdata[:4])[0]
998 self._initempty = False
1001 self._initempty = False
999 except IOError as inst:
1002 except IOError as inst:
1000 if inst.errno != errno.ENOENT:
1003 if inst.errno != errno.ENOENT:
1001 raise
1004 raise
1002
1005
1003 self.version = v
1006 self.version = v
1004 self._inline = v & FLAG_INLINE_DATA
1007 self._inline = v & FLAG_INLINE_DATA
1005 self._generaldelta = v & FLAG_GENERALDELTA
1008 self._generaldelta = v & FLAG_GENERALDELTA
1006 flags = v & ~0xFFFF
1009 flags = v & ~0xFFFF
1007 fmt = v & 0xFFFF
1010 fmt = v & 0xFFFF
1008 if fmt == REVLOGV0:
1011 if fmt == REVLOGV0:
1009 if flags:
1012 if flags:
1010 raise RevlogError(_('unknown flags (%#04x) in version %d '
1013 raise RevlogError(_('unknown flags (%#04x) in version %d '
1011 'revlog %s') %
1014 'revlog %s') %
1012 (flags >> 16, fmt, self.indexfile))
1015 (flags >> 16, fmt, self.indexfile))
1013 elif fmt == REVLOGV1:
1016 elif fmt == REVLOGV1:
1014 if flags & ~REVLOGV1_FLAGS:
1017 if flags & ~REVLOGV1_FLAGS:
1015 raise RevlogError(_('unknown flags (%#04x) in version %d '
1018 raise RevlogError(_('unknown flags (%#04x) in version %d '
1016 'revlog %s') %
1019 'revlog %s') %
1017 (flags >> 16, fmt, self.indexfile))
1020 (flags >> 16, fmt, self.indexfile))
1018 elif fmt == REVLOGV2:
1021 elif fmt == REVLOGV2:
1019 if flags & ~REVLOGV2_FLAGS:
1022 if flags & ~REVLOGV2_FLAGS:
1020 raise RevlogError(_('unknown flags (%#04x) in version %d '
1023 raise RevlogError(_('unknown flags (%#04x) in version %d '
1021 'revlog %s') %
1024 'revlog %s') %
1022 (flags >> 16, fmt, self.indexfile))
1025 (flags >> 16, fmt, self.indexfile))
1023 else:
1026 else:
1024 raise RevlogError(_('unknown version (%d) in revlog %s') %
1027 raise RevlogError(_('unknown version (%d) in revlog %s') %
1025 (fmt, self.indexfile))
1028 (fmt, self.indexfile))
1026
1029
1027 self.storedeltachains = True
1030 self.storedeltachains = True
1028
1031
1029 self._io = revlogio()
1032 self._io = revlogio()
1030 if self.version == REVLOGV0:
1033 if self.version == REVLOGV0:
1031 self._io = revlogoldio()
1034 self._io = revlogoldio()
1032 try:
1035 try:
1033 d = self._io.parseindex(indexdata, self._inline)
1036 d = self._io.parseindex(indexdata, self._inline)
1034 except (ValueError, IndexError):
1037 except (ValueError, IndexError):
1035 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1038 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1036 self.index, nodemap, self._chunkcache = d
1039 self.index, nodemap, self._chunkcache = d
1037 if nodemap is not None:
1040 if nodemap is not None:
1038 self.nodemap = self._nodecache = nodemap
1041 self.nodemap = self._nodecache = nodemap
1039 if not self._chunkcache:
1042 if not self._chunkcache:
1040 self._chunkclear()
1043 self._chunkclear()
1041 # revnum -> (chain-length, sum-delta-length)
1044 # revnum -> (chain-length, sum-delta-length)
1042 self._chaininfocache = {}
1045 self._chaininfocache = {}
1043 # revlog header -> revlog compressor
1046 # revlog header -> revlog compressor
1044 self._decompressors = {}
1047 self._decompressors = {}
1045
1048
1046 @util.propertycache
1049 @util.propertycache
1047 def _compressor(self):
1050 def _compressor(self):
1048 return util.compengines[self._compengine].revlogcompressor()
1051 return util.compengines[self._compengine].revlogcompressor()
1049
1052
1050 def _indexfp(self, mode='r'):
1053 def _indexfp(self, mode='r'):
1051 """file object for the revlog's index file"""
1054 """file object for the revlog's index file"""
1052 args = {r'mode': mode}
1055 args = {r'mode': mode}
1053 if mode != 'r':
1056 if mode != 'r':
1054 args[r'checkambig'] = self._checkambig
1057 args[r'checkambig'] = self._checkambig
1055 if mode == 'w':
1058 if mode == 'w':
1056 args[r'atomictemp'] = True
1059 args[r'atomictemp'] = True
1057 return self.opener(self.indexfile, **args)
1060 return self.opener(self.indexfile, **args)
1058
1061
1059 def _datafp(self, mode='r'):
1062 def _datafp(self, mode='r'):
1060 """file object for the revlog's data file"""
1063 """file object for the revlog's data file"""
1061 return self.opener(self.datafile, mode=mode)
1064 return self.opener(self.datafile, mode=mode)
1062
1065
1063 @contextlib.contextmanager
1066 @contextlib.contextmanager
1064 def _datareadfp(self, existingfp=None):
1067 def _datareadfp(self, existingfp=None):
1065 """file object suitable to read data"""
1068 """file object suitable to read data"""
1066 if existingfp is not None:
1069 if existingfp is not None:
1067 yield existingfp
1070 yield existingfp
1068 else:
1071 else:
1069 if self._inline:
1072 if self._inline:
1070 func = self._indexfp
1073 func = self._indexfp
1071 else:
1074 else:
1072 func = self._datafp
1075 func = self._datafp
1073 with func() as fp:
1076 with func() as fp:
1074 yield fp
1077 yield fp
1075
1078
1076 def tip(self):
1079 def tip(self):
1077 return self.node(len(self.index) - 1)
1080 return self.node(len(self.index) - 1)
1078 def __contains__(self, rev):
1081 def __contains__(self, rev):
1079 return 0 <= rev < len(self)
1082 return 0 <= rev < len(self)
1080 def __len__(self):
1083 def __len__(self):
1081 return len(self.index)
1084 return len(self.index)
1082 def __iter__(self):
1085 def __iter__(self):
1083 return iter(pycompat.xrange(len(self)))
1086 return iter(pycompat.xrange(len(self)))
1084 def revs(self, start=0, stop=None):
1087 def revs(self, start=0, stop=None):
1085 """iterate over all rev in this revlog (from start to stop)"""
1088 """iterate over all rev in this revlog (from start to stop)"""
1086 step = 1
1089 step = 1
1087 length = len(self)
1090 length = len(self)
1088 if stop is not None:
1091 if stop is not None:
1089 if start > stop:
1092 if start > stop:
1090 step = -1
1093 step = -1
1091 stop += step
1094 stop += step
1092 if stop > length:
1095 if stop > length:
1093 stop = length
1096 stop = length
1094 else:
1097 else:
1095 stop = length
1098 stop = length
1096 return pycompat.xrange(start, stop, step)
1099 return pycompat.xrange(start, stop, step)
1097
1100
1098 @util.propertycache
1101 @util.propertycache
1099 def nodemap(self):
1102 def nodemap(self):
1100 self.rev(self.node(0))
1103 self.rev(self.node(0))
1101 return self._nodecache
1104 return self._nodecache
1102
1105
1103 def hasnode(self, node):
1106 def hasnode(self, node):
1104 try:
1107 try:
1105 self.rev(node)
1108 self.rev(node)
1106 return True
1109 return True
1107 except KeyError:
1110 except KeyError:
1108 return False
1111 return False
1109
1112
1110 def candelta(self, baserev, rev):
1113 def candelta(self, baserev, rev):
1111 """whether two revisions (baserev, rev) can be delta-ed or not"""
1114 """whether two revisions (baserev, rev) can be delta-ed or not"""
1112 # Disable delta if either rev requires a content-changing flag
1115 # Disable delta if either rev requires a content-changing flag
1113 # processor (ex. LFS). This is because such flag processor can alter
1116 # processor (ex. LFS). This is because such flag processor can alter
1114 # the rawtext content that the delta will be based on, and two clients
1117 # the rawtext content that the delta will be based on, and two clients
1115 # could have a same revlog node with different flags (i.e. different
1118 # could have a same revlog node with different flags (i.e. different
1116 # rawtext contents) and the delta could be incompatible.
1119 # rawtext contents) and the delta could be incompatible.
1117 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1120 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1118 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1121 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1119 return False
1122 return False
1120 return True
1123 return True
1121
1124
1122 def clearcaches(self):
1125 def clearcaches(self):
1123 self._cache = None
1126 self._cache = None
1124 self._chainbasecache.clear()
1127 self._chainbasecache.clear()
1125 self._chunkcache = (0, '')
1128 self._chunkcache = (0, '')
1126 self._pcache = {}
1129 self._pcache = {}
1127
1130
1128 try:
1131 try:
1129 self._nodecache.clearcaches()
1132 self._nodecache.clearcaches()
1130 except AttributeError:
1133 except AttributeError:
1131 self._nodecache = {nullid: nullrev}
1134 self._nodecache = {nullid: nullrev}
1132 self._nodepos = None
1135 self._nodepos = None
1133
1136
1134 def rev(self, node):
1137 def rev(self, node):
1135 try:
1138 try:
1136 return self._nodecache[node]
1139 return self._nodecache[node]
1137 except TypeError:
1140 except TypeError:
1138 raise
1141 raise
1139 except RevlogError:
1142 except RevlogError:
1140 # parsers.c radix tree lookup failed
1143 # parsers.c radix tree lookup failed
1141 if node == wdirid or node in wdirfilenodeids:
1144 if node == wdirid or node in wdirfilenodeids:
1142 raise error.WdirUnsupported
1145 raise error.WdirUnsupported
1143 raise LookupError(node, self.indexfile, _('no node'))
1146 raise LookupError(node, self.indexfile, _('no node'))
1144 except KeyError:
1147 except KeyError:
1145 # pure python cache lookup failed
1148 # pure python cache lookup failed
1146 n = self._nodecache
1149 n = self._nodecache
1147 i = self.index
1150 i = self.index
1148 p = self._nodepos
1151 p = self._nodepos
1149 if p is None:
1152 if p is None:
1150 p = len(i) - 1
1153 p = len(i) - 1
1151 else:
1154 else:
1152 assert p < len(i)
1155 assert p < len(i)
1153 for r in pycompat.xrange(p, -1, -1):
1156 for r in pycompat.xrange(p, -1, -1):
1154 v = i[r][7]
1157 v = i[r][7]
1155 n[v] = r
1158 n[v] = r
1156 if v == node:
1159 if v == node:
1157 self._nodepos = r - 1
1160 self._nodepos = r - 1
1158 return r
1161 return r
1159 if node == wdirid or node in wdirfilenodeids:
1162 if node == wdirid or node in wdirfilenodeids:
1160 raise error.WdirUnsupported
1163 raise error.WdirUnsupported
1161 raise LookupError(node, self.indexfile, _('no node'))
1164 raise LookupError(node, self.indexfile, _('no node'))
1162
1165
1163 # Accessors for index entries.
1166 # Accessors for index entries.
1164
1167
1165 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1168 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1166 # are flags.
1169 # are flags.
1167 def start(self, rev):
1170 def start(self, rev):
1168 return int(self.index[rev][0] >> 16)
1171 return int(self.index[rev][0] >> 16)
1169
1172
1170 def flags(self, rev):
1173 def flags(self, rev):
1171 return self.index[rev][0] & 0xFFFF
1174 return self.index[rev][0] & 0xFFFF
1172
1175
1173 def length(self, rev):
1176 def length(self, rev):
1174 return self.index[rev][1]
1177 return self.index[rev][1]
1175
1178
1176 def rawsize(self, rev):
1179 def rawsize(self, rev):
1177 """return the length of the uncompressed text for a given revision"""
1180 """return the length of the uncompressed text for a given revision"""
1178 l = self.index[rev][2]
1181 l = self.index[rev][2]
1179 if l >= 0:
1182 if l >= 0:
1180 return l
1183 return l
1181
1184
1182 t = self.revision(rev, raw=True)
1185 t = self.revision(rev, raw=True)
1183 return len(t)
1186 return len(t)
1184
1187
1185 def size(self, rev):
1188 def size(self, rev):
1186 """length of non-raw text (processed by a "read" flag processor)"""
1189 """length of non-raw text (processed by a "read" flag processor)"""
1187 # fast path: if no "read" flag processor could change the content,
1190 # fast path: if no "read" flag processor could change the content,
1188 # size is rawsize. note: ELLIPSIS is known to not change the content.
1191 # size is rawsize. note: ELLIPSIS is known to not change the content.
1189 flags = self.flags(rev)
1192 flags = self.flags(rev)
1190 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1193 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1191 return self.rawsize(rev)
1194 return self.rawsize(rev)
1192
1195
1193 return len(self.revision(rev, raw=False))
1196 return len(self.revision(rev, raw=False))
1194
1197
1195 def chainbase(self, rev):
1198 def chainbase(self, rev):
1196 base = self._chainbasecache.get(rev)
1199 base = self._chainbasecache.get(rev)
1197 if base is not None:
1200 if base is not None:
1198 return base
1201 return base
1199
1202
1200 index = self.index
1203 index = self.index
1201 iterrev = rev
1204 iterrev = rev
1202 base = index[iterrev][3]
1205 base = index[iterrev][3]
1203 while base != iterrev:
1206 while base != iterrev:
1204 iterrev = base
1207 iterrev = base
1205 base = index[iterrev][3]
1208 base = index[iterrev][3]
1206
1209
1207 self._chainbasecache[rev] = base
1210 self._chainbasecache[rev] = base
1208 return base
1211 return base
1209
1212
1210 def linkrev(self, rev):
1213 def linkrev(self, rev):
1211 return self.index[rev][4]
1214 return self.index[rev][4]
1212
1215
1213 def parentrevs(self, rev):
1216 def parentrevs(self, rev):
1214 try:
1217 try:
1215 entry = self.index[rev]
1218 entry = self.index[rev]
1216 except IndexError:
1219 except IndexError:
1217 if rev == wdirrev:
1220 if rev == wdirrev:
1218 raise error.WdirUnsupported
1221 raise error.WdirUnsupported
1219 raise
1222 raise
1220
1223
1221 return entry[5], entry[6]
1224 return entry[5], entry[6]
1222
1225
1223 def node(self, rev):
1226 def node(self, rev):
1224 try:
1227 try:
1225 return self.index[rev][7]
1228 return self.index[rev][7]
1226 except IndexError:
1229 except IndexError:
1227 if rev == wdirrev:
1230 if rev == wdirrev:
1228 raise error.WdirUnsupported
1231 raise error.WdirUnsupported
1229 raise
1232 raise
1230
1233
1231 # Derived from index values.
1234 # Derived from index values.
1232
1235
1233 def end(self, rev):
1236 def end(self, rev):
1234 return self.start(rev) + self.length(rev)
1237 return self.start(rev) + self.length(rev)
1235
1238
1236 def parents(self, node):
1239 def parents(self, node):
1237 i = self.index
1240 i = self.index
1238 d = i[self.rev(node)]
1241 d = i[self.rev(node)]
1239 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1242 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1240
1243
1241 def chainlen(self, rev):
1244 def chainlen(self, rev):
1242 return self._chaininfo(rev)[0]
1245 return self._chaininfo(rev)[0]
1243
1246
1244 def _chaininfo(self, rev):
1247 def _chaininfo(self, rev):
1245 chaininfocache = self._chaininfocache
1248 chaininfocache = self._chaininfocache
1246 if rev in chaininfocache:
1249 if rev in chaininfocache:
1247 return chaininfocache[rev]
1250 return chaininfocache[rev]
1248 index = self.index
1251 index = self.index
1249 generaldelta = self._generaldelta
1252 generaldelta = self._generaldelta
1250 iterrev = rev
1253 iterrev = rev
1251 e = index[iterrev]
1254 e = index[iterrev]
1252 clen = 0
1255 clen = 0
1253 compresseddeltalen = 0
1256 compresseddeltalen = 0
1254 while iterrev != e[3]:
1257 while iterrev != e[3]:
1255 clen += 1
1258 clen += 1
1256 compresseddeltalen += e[1]
1259 compresseddeltalen += e[1]
1257 if generaldelta:
1260 if generaldelta:
1258 iterrev = e[3]
1261 iterrev = e[3]
1259 else:
1262 else:
1260 iterrev -= 1
1263 iterrev -= 1
1261 if iterrev in chaininfocache:
1264 if iterrev in chaininfocache:
1262 t = chaininfocache[iterrev]
1265 t = chaininfocache[iterrev]
1263 clen += t[0]
1266 clen += t[0]
1264 compresseddeltalen += t[1]
1267 compresseddeltalen += t[1]
1265 break
1268 break
1266 e = index[iterrev]
1269 e = index[iterrev]
1267 else:
1270 else:
1268 # Add text length of base since decompressing that also takes
1271 # Add text length of base since decompressing that also takes
1269 # work. For cache hits the length is already included.
1272 # work. For cache hits the length is already included.
1270 compresseddeltalen += e[1]
1273 compresseddeltalen += e[1]
1271 r = (clen, compresseddeltalen)
1274 r = (clen, compresseddeltalen)
1272 chaininfocache[rev] = r
1275 chaininfocache[rev] = r
1273 return r
1276 return r
1274
1277
1275 def _deltachain(self, rev, stoprev=None):
1278 def _deltachain(self, rev, stoprev=None):
1276 """Obtain the delta chain for a revision.
1279 """Obtain the delta chain for a revision.
1277
1280
1278 ``stoprev`` specifies a revision to stop at. If not specified, we
1281 ``stoprev`` specifies a revision to stop at. If not specified, we
1279 stop at the base of the chain.
1282 stop at the base of the chain.
1280
1283
1281 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1284 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1282 revs in ascending order and ``stopped`` is a bool indicating whether
1285 revs in ascending order and ``stopped`` is a bool indicating whether
1283 ``stoprev`` was hit.
1286 ``stoprev`` was hit.
1284 """
1287 """
1285 # Try C implementation.
1288 # Try C implementation.
1286 try:
1289 try:
1287 return self.index.deltachain(rev, stoprev, self._generaldelta)
1290 return self.index.deltachain(rev, stoprev, self._generaldelta)
1288 except AttributeError:
1291 except AttributeError:
1289 pass
1292 pass
1290
1293
1291 chain = []
1294 chain = []
1292
1295
1293 # Alias to prevent attribute lookup in tight loop.
1296 # Alias to prevent attribute lookup in tight loop.
1294 index = self.index
1297 index = self.index
1295 generaldelta = self._generaldelta
1298 generaldelta = self._generaldelta
1296
1299
1297 iterrev = rev
1300 iterrev = rev
1298 e = index[iterrev]
1301 e = index[iterrev]
1299 while iterrev != e[3] and iterrev != stoprev:
1302 while iterrev != e[3] and iterrev != stoprev:
1300 chain.append(iterrev)
1303 chain.append(iterrev)
1301 if generaldelta:
1304 if generaldelta:
1302 iterrev = e[3]
1305 iterrev = e[3]
1303 else:
1306 else:
1304 iterrev -= 1
1307 iterrev -= 1
1305 e = index[iterrev]
1308 e = index[iterrev]
1306
1309
1307 if iterrev == stoprev:
1310 if iterrev == stoprev:
1308 stopped = True
1311 stopped = True
1309 else:
1312 else:
1310 chain.append(iterrev)
1313 chain.append(iterrev)
1311 stopped = False
1314 stopped = False
1312
1315
1313 chain.reverse()
1316 chain.reverse()
1314 return chain, stopped
1317 return chain, stopped
1315
1318
1316 def ancestors(self, revs, stoprev=0, inclusive=False):
1319 def ancestors(self, revs, stoprev=0, inclusive=False):
1317 """Generate the ancestors of 'revs' in reverse topological order.
1320 """Generate the ancestors of 'revs' in reverse topological order.
1318 Does not generate revs lower than stoprev.
1321 Does not generate revs lower than stoprev.
1319
1322
1320 See the documentation for ancestor.lazyancestors for more details."""
1323 See the documentation for ancestor.lazyancestors for more details."""
1321
1324
1322 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1325 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1323 inclusive=inclusive)
1326 inclusive=inclusive)
1324
1327
1325 def descendants(self, revs):
1328 def descendants(self, revs):
1326 """Generate the descendants of 'revs' in revision order.
1329 """Generate the descendants of 'revs' in revision order.
1327
1330
1328 Yield a sequence of revision numbers starting with a child of
1331 Yield a sequence of revision numbers starting with a child of
1329 some rev in revs, i.e., each revision is *not* considered a
1332 some rev in revs, i.e., each revision is *not* considered a
1330 descendant of itself. Results are ordered by revision number (a
1333 descendant of itself. Results are ordered by revision number (a
1331 topological sort)."""
1334 topological sort)."""
1332 first = min(revs)
1335 first = min(revs)
1333 if first == nullrev:
1336 if first == nullrev:
1334 for i in self:
1337 for i in self:
1335 yield i
1338 yield i
1336 return
1339 return
1337
1340
1338 seen = set(revs)
1341 seen = set(revs)
1339 for i in self.revs(start=first + 1):
1342 for i in self.revs(start=first + 1):
1340 for x in self.parentrevs(i):
1343 for x in self.parentrevs(i):
1341 if x != nullrev and x in seen:
1344 if x != nullrev and x in seen:
1342 seen.add(i)
1345 seen.add(i)
1343 yield i
1346 yield i
1344 break
1347 break
1345
1348
1346 def findcommonmissing(self, common=None, heads=None):
1349 def findcommonmissing(self, common=None, heads=None):
1347 """Return a tuple of the ancestors of common and the ancestors of heads
1350 """Return a tuple of the ancestors of common and the ancestors of heads
1348 that are not ancestors of common. In revset terminology, we return the
1351 that are not ancestors of common. In revset terminology, we return the
1349 tuple:
1352 tuple:
1350
1353
1351 ::common, (::heads) - (::common)
1354 ::common, (::heads) - (::common)
1352
1355
1353 The list is sorted by revision number, meaning it is
1356 The list is sorted by revision number, meaning it is
1354 topologically sorted.
1357 topologically sorted.
1355
1358
1356 'heads' and 'common' are both lists of node IDs. If heads is
1359 'heads' and 'common' are both lists of node IDs. If heads is
1357 not supplied, uses all of the revlog's heads. If common is not
1360 not supplied, uses all of the revlog's heads. If common is not
1358 supplied, uses nullid."""
1361 supplied, uses nullid."""
1359 if common is None:
1362 if common is None:
1360 common = [nullid]
1363 common = [nullid]
1361 if heads is None:
1364 if heads is None:
1362 heads = self.heads()
1365 heads = self.heads()
1363
1366
1364 common = [self.rev(n) for n in common]
1367 common = [self.rev(n) for n in common]
1365 heads = [self.rev(n) for n in heads]
1368 heads = [self.rev(n) for n in heads]
1366
1369
1367 # we want the ancestors, but inclusive
1370 # we want the ancestors, but inclusive
1368 class lazyset(object):
1371 class lazyset(object):
1369 def __init__(self, lazyvalues):
1372 def __init__(self, lazyvalues):
1370 self.addedvalues = set()
1373 self.addedvalues = set()
1371 self.lazyvalues = lazyvalues
1374 self.lazyvalues = lazyvalues
1372
1375
1373 def __contains__(self, value):
1376 def __contains__(self, value):
1374 return value in self.addedvalues or value in self.lazyvalues
1377 return value in self.addedvalues or value in self.lazyvalues
1375
1378
1376 def __iter__(self):
1379 def __iter__(self):
1377 added = self.addedvalues
1380 added = self.addedvalues
1378 for r in added:
1381 for r in added:
1379 yield r
1382 yield r
1380 for r in self.lazyvalues:
1383 for r in self.lazyvalues:
1381 if not r in added:
1384 if not r in added:
1382 yield r
1385 yield r
1383
1386
1384 def add(self, value):
1387 def add(self, value):
1385 self.addedvalues.add(value)
1388 self.addedvalues.add(value)
1386
1389
1387 def update(self, values):
1390 def update(self, values):
1388 self.addedvalues.update(values)
1391 self.addedvalues.update(values)
1389
1392
1390 has = lazyset(self.ancestors(common))
1393 has = lazyset(self.ancestors(common))
1391 has.add(nullrev)
1394 has.add(nullrev)
1392 has.update(common)
1395 has.update(common)
1393
1396
1394 # take all ancestors from heads that aren't in has
1397 # take all ancestors from heads that aren't in has
1395 missing = set()
1398 missing = set()
1396 visit = collections.deque(r for r in heads if r not in has)
1399 visit = collections.deque(r for r in heads if r not in has)
1397 while visit:
1400 while visit:
1398 r = visit.popleft()
1401 r = visit.popleft()
1399 if r in missing:
1402 if r in missing:
1400 continue
1403 continue
1401 else:
1404 else:
1402 missing.add(r)
1405 missing.add(r)
1403 for p in self.parentrevs(r):
1406 for p in self.parentrevs(r):
1404 if p not in has:
1407 if p not in has:
1405 visit.append(p)
1408 visit.append(p)
1406 missing = list(missing)
1409 missing = list(missing)
1407 missing.sort()
1410 missing.sort()
1408 return has, [self.node(miss) for miss in missing]
1411 return has, [self.node(miss) for miss in missing]
1409
1412
1410 def incrementalmissingrevs(self, common=None):
1413 def incrementalmissingrevs(self, common=None):
1411 """Return an object that can be used to incrementally compute the
1414 """Return an object that can be used to incrementally compute the
1412 revision numbers of the ancestors of arbitrary sets that are not
1415 revision numbers of the ancestors of arbitrary sets that are not
1413 ancestors of common. This is an ancestor.incrementalmissingancestors
1416 ancestors of common. This is an ancestor.incrementalmissingancestors
1414 object.
1417 object.
1415
1418
1416 'common' is a list of revision numbers. If common is not supplied, uses
1419 'common' is a list of revision numbers. If common is not supplied, uses
1417 nullrev.
1420 nullrev.
1418 """
1421 """
1419 if common is None:
1422 if common is None:
1420 common = [nullrev]
1423 common = [nullrev]
1421
1424
1422 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1425 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1423
1426
1424 def findmissingrevs(self, common=None, heads=None):
1427 def findmissingrevs(self, common=None, heads=None):
1425 """Return the revision numbers of the ancestors of heads that
1428 """Return the revision numbers of the ancestors of heads that
1426 are not ancestors of common.
1429 are not ancestors of common.
1427
1430
1428 More specifically, return a list of revision numbers corresponding to
1431 More specifically, return a list of revision numbers corresponding to
1429 nodes N such that every N satisfies the following constraints:
1432 nodes N such that every N satisfies the following constraints:
1430
1433
1431 1. N is an ancestor of some node in 'heads'
1434 1. N is an ancestor of some node in 'heads'
1432 2. N is not an ancestor of any node in 'common'
1435 2. N is not an ancestor of any node in 'common'
1433
1436
1434 The list is sorted by revision number, meaning it is
1437 The list is sorted by revision number, meaning it is
1435 topologically sorted.
1438 topologically sorted.
1436
1439
1437 'heads' and 'common' are both lists of revision numbers. If heads is
1440 'heads' and 'common' are both lists of revision numbers. If heads is
1438 not supplied, uses all of the revlog's heads. If common is not
1441 not supplied, uses all of the revlog's heads. If common is not
1439 supplied, uses nullid."""
1442 supplied, uses nullid."""
1440 if common is None:
1443 if common is None:
1441 common = [nullrev]
1444 common = [nullrev]
1442 if heads is None:
1445 if heads is None:
1443 heads = self.headrevs()
1446 heads = self.headrevs()
1444
1447
1445 inc = self.incrementalmissingrevs(common=common)
1448 inc = self.incrementalmissingrevs(common=common)
1446 return inc.missingancestors(heads)
1449 return inc.missingancestors(heads)
1447
1450
1448 def findmissing(self, common=None, heads=None):
1451 def findmissing(self, common=None, heads=None):
1449 """Return the ancestors of heads that are not ancestors of common.
1452 """Return the ancestors of heads that are not ancestors of common.
1450
1453
1451 More specifically, return a list of nodes N such that every N
1454 More specifically, return a list of nodes N such that every N
1452 satisfies the following constraints:
1455 satisfies the following constraints:
1453
1456
1454 1. N is an ancestor of some node in 'heads'
1457 1. N is an ancestor of some node in 'heads'
1455 2. N is not an ancestor of any node in 'common'
1458 2. N is not an ancestor of any node in 'common'
1456
1459
1457 The list is sorted by revision number, meaning it is
1460 The list is sorted by revision number, meaning it is
1458 topologically sorted.
1461 topologically sorted.
1459
1462
1460 'heads' and 'common' are both lists of node IDs. If heads is
1463 'heads' and 'common' are both lists of node IDs. If heads is
1461 not supplied, uses all of the revlog's heads. If common is not
1464 not supplied, uses all of the revlog's heads. If common is not
1462 supplied, uses nullid."""
1465 supplied, uses nullid."""
1463 if common is None:
1466 if common is None:
1464 common = [nullid]
1467 common = [nullid]
1465 if heads is None:
1468 if heads is None:
1466 heads = self.heads()
1469 heads = self.heads()
1467
1470
1468 common = [self.rev(n) for n in common]
1471 common = [self.rev(n) for n in common]
1469 heads = [self.rev(n) for n in heads]
1472 heads = [self.rev(n) for n in heads]
1470
1473
1471 inc = self.incrementalmissingrevs(common=common)
1474 inc = self.incrementalmissingrevs(common=common)
1472 return [self.node(r) for r in inc.missingancestors(heads)]
1475 return [self.node(r) for r in inc.missingancestors(heads)]
1473
1476
1474 def nodesbetween(self, roots=None, heads=None):
1477 def nodesbetween(self, roots=None, heads=None):
1475 """Return a topological path from 'roots' to 'heads'.
1478 """Return a topological path from 'roots' to 'heads'.
1476
1479
1477 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1480 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1478 topologically sorted list of all nodes N that satisfy both of
1481 topologically sorted list of all nodes N that satisfy both of
1479 these constraints:
1482 these constraints:
1480
1483
1481 1. N is a descendant of some node in 'roots'
1484 1. N is a descendant of some node in 'roots'
1482 2. N is an ancestor of some node in 'heads'
1485 2. N is an ancestor of some node in 'heads'
1483
1486
1484 Every node is considered to be both a descendant and an ancestor
1487 Every node is considered to be both a descendant and an ancestor
1485 of itself, so every reachable node in 'roots' and 'heads' will be
1488 of itself, so every reachable node in 'roots' and 'heads' will be
1486 included in 'nodes'.
1489 included in 'nodes'.
1487
1490
1488 'outroots' is the list of reachable nodes in 'roots', i.e., the
1491 'outroots' is the list of reachable nodes in 'roots', i.e., the
1489 subset of 'roots' that is returned in 'nodes'. Likewise,
1492 subset of 'roots' that is returned in 'nodes'. Likewise,
1490 'outheads' is the subset of 'heads' that is also in 'nodes'.
1493 'outheads' is the subset of 'heads' that is also in 'nodes'.
1491
1494
1492 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1495 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1493 unspecified, uses nullid as the only root. If 'heads' is
1496 unspecified, uses nullid as the only root. If 'heads' is
1494 unspecified, uses list of all of the revlog's heads."""
1497 unspecified, uses list of all of the revlog's heads."""
1495 nonodes = ([], [], [])
1498 nonodes = ([], [], [])
1496 if roots is not None:
1499 if roots is not None:
1497 roots = list(roots)
1500 roots = list(roots)
1498 if not roots:
1501 if not roots:
1499 return nonodes
1502 return nonodes
1500 lowestrev = min([self.rev(n) for n in roots])
1503 lowestrev = min([self.rev(n) for n in roots])
1501 else:
1504 else:
1502 roots = [nullid] # Everybody's a descendant of nullid
1505 roots = [nullid] # Everybody's a descendant of nullid
1503 lowestrev = nullrev
1506 lowestrev = nullrev
1504 if (lowestrev == nullrev) and (heads is None):
1507 if (lowestrev == nullrev) and (heads is None):
1505 # We want _all_ the nodes!
1508 # We want _all_ the nodes!
1506 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1509 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1507 if heads is None:
1510 if heads is None:
1508 # All nodes are ancestors, so the latest ancestor is the last
1511 # All nodes are ancestors, so the latest ancestor is the last
1509 # node.
1512 # node.
1510 highestrev = len(self) - 1
1513 highestrev = len(self) - 1
1511 # Set ancestors to None to signal that every node is an ancestor.
1514 # Set ancestors to None to signal that every node is an ancestor.
1512 ancestors = None
1515 ancestors = None
1513 # Set heads to an empty dictionary for later discovery of heads
1516 # Set heads to an empty dictionary for later discovery of heads
1514 heads = {}
1517 heads = {}
1515 else:
1518 else:
1516 heads = list(heads)
1519 heads = list(heads)
1517 if not heads:
1520 if not heads:
1518 return nonodes
1521 return nonodes
1519 ancestors = set()
1522 ancestors = set()
1520 # Turn heads into a dictionary so we can remove 'fake' heads.
1523 # Turn heads into a dictionary so we can remove 'fake' heads.
1521 # Also, later we will be using it to filter out the heads we can't
1524 # Also, later we will be using it to filter out the heads we can't
1522 # find from roots.
1525 # find from roots.
1523 heads = dict.fromkeys(heads, False)
1526 heads = dict.fromkeys(heads, False)
1524 # Start at the top and keep marking parents until we're done.
1527 # Start at the top and keep marking parents until we're done.
1525 nodestotag = set(heads)
1528 nodestotag = set(heads)
1526 # Remember where the top was so we can use it as a limit later.
1529 # Remember where the top was so we can use it as a limit later.
1527 highestrev = max([self.rev(n) for n in nodestotag])
1530 highestrev = max([self.rev(n) for n in nodestotag])
1528 while nodestotag:
1531 while nodestotag:
1529 # grab a node to tag
1532 # grab a node to tag
1530 n = nodestotag.pop()
1533 n = nodestotag.pop()
1531 # Never tag nullid
1534 # Never tag nullid
1532 if n == nullid:
1535 if n == nullid:
1533 continue
1536 continue
1534 # A node's revision number represents its place in a
1537 # A node's revision number represents its place in a
1535 # topologically sorted list of nodes.
1538 # topologically sorted list of nodes.
1536 r = self.rev(n)
1539 r = self.rev(n)
1537 if r >= lowestrev:
1540 if r >= lowestrev:
1538 if n not in ancestors:
1541 if n not in ancestors:
1539 # If we are possibly a descendant of one of the roots
1542 # If we are possibly a descendant of one of the roots
1540 # and we haven't already been marked as an ancestor
1543 # and we haven't already been marked as an ancestor
1541 ancestors.add(n) # Mark as ancestor
1544 ancestors.add(n) # Mark as ancestor
1542 # Add non-nullid parents to list of nodes to tag.
1545 # Add non-nullid parents to list of nodes to tag.
1543 nodestotag.update([p for p in self.parents(n) if
1546 nodestotag.update([p for p in self.parents(n) if
1544 p != nullid])
1547 p != nullid])
1545 elif n in heads: # We've seen it before, is it a fake head?
1548 elif n in heads: # We've seen it before, is it a fake head?
1546 # So it is, real heads should not be the ancestors of
1549 # So it is, real heads should not be the ancestors of
1547 # any other heads.
1550 # any other heads.
1548 heads.pop(n)
1551 heads.pop(n)
1549 if not ancestors:
1552 if not ancestors:
1550 return nonodes
1553 return nonodes
1551 # Now that we have our set of ancestors, we want to remove any
1554 # Now that we have our set of ancestors, we want to remove any
1552 # roots that are not ancestors.
1555 # roots that are not ancestors.
1553
1556
1554 # If one of the roots was nullid, everything is included anyway.
1557 # If one of the roots was nullid, everything is included anyway.
1555 if lowestrev > nullrev:
1558 if lowestrev > nullrev:
1556 # But, since we weren't, let's recompute the lowest rev to not
1559 # But, since we weren't, let's recompute the lowest rev to not
1557 # include roots that aren't ancestors.
1560 # include roots that aren't ancestors.
1558
1561
1559 # Filter out roots that aren't ancestors of heads
1562 # Filter out roots that aren't ancestors of heads
1560 roots = [root for root in roots if root in ancestors]
1563 roots = [root for root in roots if root in ancestors]
1561 # Recompute the lowest revision
1564 # Recompute the lowest revision
1562 if roots:
1565 if roots:
1563 lowestrev = min([self.rev(root) for root in roots])
1566 lowestrev = min([self.rev(root) for root in roots])
1564 else:
1567 else:
1565 # No more roots? Return empty list
1568 # No more roots? Return empty list
1566 return nonodes
1569 return nonodes
1567 else:
1570 else:
1568 # We are descending from nullid, and don't need to care about
1571 # We are descending from nullid, and don't need to care about
1569 # any other roots.
1572 # any other roots.
1570 lowestrev = nullrev
1573 lowestrev = nullrev
1571 roots = [nullid]
1574 roots = [nullid]
1572 # Transform our roots list into a set.
1575 # Transform our roots list into a set.
1573 descendants = set(roots)
1576 descendants = set(roots)
1574 # Also, keep the original roots so we can filter out roots that aren't
1577 # Also, keep the original roots so we can filter out roots that aren't
1575 # 'real' roots (i.e. are descended from other roots).
1578 # 'real' roots (i.e. are descended from other roots).
1576 roots = descendants.copy()
1579 roots = descendants.copy()
1577 # Our topologically sorted list of output nodes.
1580 # Our topologically sorted list of output nodes.
1578 orderedout = []
1581 orderedout = []
1579 # Don't start at nullid since we don't want nullid in our output list,
1582 # Don't start at nullid since we don't want nullid in our output list,
1580 # and if nullid shows up in descendants, empty parents will look like
1583 # and if nullid shows up in descendants, empty parents will look like
1581 # they're descendants.
1584 # they're descendants.
1582 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1585 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1583 n = self.node(r)
1586 n = self.node(r)
1584 isdescendant = False
1587 isdescendant = False
1585 if lowestrev == nullrev: # Everybody is a descendant of nullid
1588 if lowestrev == nullrev: # Everybody is a descendant of nullid
1586 isdescendant = True
1589 isdescendant = True
1587 elif n in descendants:
1590 elif n in descendants:
1588 # n is already a descendant
1591 # n is already a descendant
1589 isdescendant = True
1592 isdescendant = True
1590 # This check only needs to be done here because all the roots
1593 # This check only needs to be done here because all the roots
1591 # will start being marked is descendants before the loop.
1594 # will start being marked is descendants before the loop.
1592 if n in roots:
1595 if n in roots:
1593 # If n was a root, check if it's a 'real' root.
1596 # If n was a root, check if it's a 'real' root.
1594 p = tuple(self.parents(n))
1597 p = tuple(self.parents(n))
1595 # If any of its parents are descendants, it's not a root.
1598 # If any of its parents are descendants, it's not a root.
1596 if (p[0] in descendants) or (p[1] in descendants):
1599 if (p[0] in descendants) or (p[1] in descendants):
1597 roots.remove(n)
1600 roots.remove(n)
1598 else:
1601 else:
1599 p = tuple(self.parents(n))
1602 p = tuple(self.parents(n))
1600 # A node is a descendant if either of its parents are
1603 # A node is a descendant if either of its parents are
1601 # descendants. (We seeded the dependents list with the roots
1604 # descendants. (We seeded the dependents list with the roots
1602 # up there, remember?)
1605 # up there, remember?)
1603 if (p[0] in descendants) or (p[1] in descendants):
1606 if (p[0] in descendants) or (p[1] in descendants):
1604 descendants.add(n)
1607 descendants.add(n)
1605 isdescendant = True
1608 isdescendant = True
1606 if isdescendant and ((ancestors is None) or (n in ancestors)):
1609 if isdescendant and ((ancestors is None) or (n in ancestors)):
1607 # Only include nodes that are both descendants and ancestors.
1610 # Only include nodes that are both descendants and ancestors.
1608 orderedout.append(n)
1611 orderedout.append(n)
1609 if (ancestors is not None) and (n in heads):
1612 if (ancestors is not None) and (n in heads):
1610 # We're trying to figure out which heads are reachable
1613 # We're trying to figure out which heads are reachable
1611 # from roots.
1614 # from roots.
1612 # Mark this head as having been reached
1615 # Mark this head as having been reached
1613 heads[n] = True
1616 heads[n] = True
1614 elif ancestors is None:
1617 elif ancestors is None:
1615 # Otherwise, we're trying to discover the heads.
1618 # Otherwise, we're trying to discover the heads.
1616 # Assume this is a head because if it isn't, the next step
1619 # Assume this is a head because if it isn't, the next step
1617 # will eventually remove it.
1620 # will eventually remove it.
1618 heads[n] = True
1621 heads[n] = True
1619 # But, obviously its parents aren't.
1622 # But, obviously its parents aren't.
1620 for p in self.parents(n):
1623 for p in self.parents(n):
1621 heads.pop(p, None)
1624 heads.pop(p, None)
1622 heads = [head for head, flag in heads.iteritems() if flag]
1625 heads = [head for head, flag in heads.iteritems() if flag]
1623 roots = list(roots)
1626 roots = list(roots)
1624 assert orderedout
1627 assert orderedout
1625 assert roots
1628 assert roots
1626 assert heads
1629 assert heads
1627 return (orderedout, roots, heads)
1630 return (orderedout, roots, heads)
1628
1631
1629 def headrevs(self):
1632 def headrevs(self):
1630 try:
1633 try:
1631 return self.index.headrevs()
1634 return self.index.headrevs()
1632 except AttributeError:
1635 except AttributeError:
1633 return self._headrevs()
1636 return self._headrevs()
1634
1637
1635 def computephases(self, roots):
1638 def computephases(self, roots):
1636 return self.index.computephasesmapsets(roots)
1639 return self.index.computephasesmapsets(roots)
1637
1640
1638 def _headrevs(self):
1641 def _headrevs(self):
1639 count = len(self)
1642 count = len(self)
1640 if not count:
1643 if not count:
1641 return [nullrev]
1644 return [nullrev]
1642 # we won't iter over filtered rev so nobody is a head at start
1645 # we won't iter over filtered rev so nobody is a head at start
1643 ishead = [0] * (count + 1)
1646 ishead = [0] * (count + 1)
1644 index = self.index
1647 index = self.index
1645 for r in self:
1648 for r in self:
1646 ishead[r] = 1 # I may be an head
1649 ishead[r] = 1 # I may be an head
1647 e = index[r]
1650 e = index[r]
1648 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1651 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1649 return [r for r, val in enumerate(ishead) if val]
1652 return [r for r, val in enumerate(ishead) if val]
1650
1653
1651 def heads(self, start=None, stop=None):
1654 def heads(self, start=None, stop=None):
1652 """return the list of all nodes that have no children
1655 """return the list of all nodes that have no children
1653
1656
1654 if start is specified, only heads that are descendants of
1657 if start is specified, only heads that are descendants of
1655 start will be returned
1658 start will be returned
1656 if stop is specified, it will consider all the revs from stop
1659 if stop is specified, it will consider all the revs from stop
1657 as if they had no children
1660 as if they had no children
1658 """
1661 """
1659 if start is None and stop is None:
1662 if start is None and stop is None:
1660 if not len(self):
1663 if not len(self):
1661 return [nullid]
1664 return [nullid]
1662 return [self.node(r) for r in self.headrevs()]
1665 return [self.node(r) for r in self.headrevs()]
1663
1666
1664 if start is None:
1667 if start is None:
1665 start = nullid
1668 start = nullid
1666 if stop is None:
1669 if stop is None:
1667 stop = []
1670 stop = []
1668 stoprevs = set([self.rev(n) for n in stop])
1671 stoprevs = set([self.rev(n) for n in stop])
1669 startrev = self.rev(start)
1672 startrev = self.rev(start)
1670 reachable = {startrev}
1673 reachable = {startrev}
1671 heads = {startrev}
1674 heads = {startrev}
1672
1675
1673 parentrevs = self.parentrevs
1676 parentrevs = self.parentrevs
1674 for r in self.revs(start=startrev + 1):
1677 for r in self.revs(start=startrev + 1):
1675 for p in parentrevs(r):
1678 for p in parentrevs(r):
1676 if p in reachable:
1679 if p in reachable:
1677 if r not in stoprevs:
1680 if r not in stoprevs:
1678 reachable.add(r)
1681 reachable.add(r)
1679 heads.add(r)
1682 heads.add(r)
1680 if p in heads and p not in stoprevs:
1683 if p in heads and p not in stoprevs:
1681 heads.remove(p)
1684 heads.remove(p)
1682
1685
1683 return [self.node(r) for r in heads]
1686 return [self.node(r) for r in heads]
1684
1687
1685 def children(self, node):
1688 def children(self, node):
1686 """find the children of a given node"""
1689 """find the children of a given node"""
1687 c = []
1690 c = []
1688 p = self.rev(node)
1691 p = self.rev(node)
1689 for r in self.revs(start=p + 1):
1692 for r in self.revs(start=p + 1):
1690 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1693 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1691 if prevs:
1694 if prevs:
1692 for pr in prevs:
1695 for pr in prevs:
1693 if pr == p:
1696 if pr == p:
1694 c.append(self.node(r))
1697 c.append(self.node(r))
1695 elif p == nullrev:
1698 elif p == nullrev:
1696 c.append(self.node(r))
1699 c.append(self.node(r))
1697 return c
1700 return c
1698
1701
1699 def commonancestorsheads(self, a, b):
1702 def commonancestorsheads(self, a, b):
1700 """calculate all the heads of the common ancestors of nodes a and b"""
1703 """calculate all the heads of the common ancestors of nodes a and b"""
1701 a, b = self.rev(a), self.rev(b)
1704 a, b = self.rev(a), self.rev(b)
1702 ancs = self._commonancestorsheads(a, b)
1705 ancs = self._commonancestorsheads(a, b)
1703 return pycompat.maplist(self.node, ancs)
1706 return pycompat.maplist(self.node, ancs)
1704
1707
1705 def _commonancestorsheads(self, *revs):
1708 def _commonancestorsheads(self, *revs):
1706 """calculate all the heads of the common ancestors of revs"""
1709 """calculate all the heads of the common ancestors of revs"""
1707 try:
1710 try:
1708 ancs = self.index.commonancestorsheads(*revs)
1711 ancs = self.index.commonancestorsheads(*revs)
1709 except (AttributeError, OverflowError): # C implementation failed
1712 except (AttributeError, OverflowError): # C implementation failed
1710 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1713 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1711 return ancs
1714 return ancs
1712
1715
1713 def isancestor(self, a, b):
1716 def isancestor(self, a, b):
1714 """return True if node a is an ancestor of node b
1717 """return True if node a is an ancestor of node b
1715
1718
1716 A revision is considered an ancestor of itself."""
1719 A revision is considered an ancestor of itself."""
1717 a, b = self.rev(a), self.rev(b)
1720 a, b = self.rev(a), self.rev(b)
1718 return self.isancestorrev(a, b)
1721 return self.isancestorrev(a, b)
1719
1722
1720 def isancestorrev(self, a, b):
1723 def isancestorrev(self, a, b):
1721 """return True if revision a is an ancestor of revision b
1724 """return True if revision a is an ancestor of revision b
1722
1725
1723 A revision is considered an ancestor of itself.
1726 A revision is considered an ancestor of itself.
1724
1727
1725 The implementation of this is trivial but the use of
1728 The implementation of this is trivial but the use of
1726 commonancestorsheads is not."""
1729 commonancestorsheads is not."""
1727 if a == nullrev:
1730 if a == nullrev:
1728 return True
1731 return True
1729 elif a == b:
1732 elif a == b:
1730 return True
1733 return True
1731 elif a > b:
1734 elif a > b:
1732 return False
1735 return False
1733 return a in self._commonancestorsheads(a, b)
1736 return a in self._commonancestorsheads(a, b)
1734
1737
1735 def ancestor(self, a, b):
1738 def ancestor(self, a, b):
1736 """calculate the "best" common ancestor of nodes a and b"""
1739 """calculate the "best" common ancestor of nodes a and b"""
1737
1740
1738 a, b = self.rev(a), self.rev(b)
1741 a, b = self.rev(a), self.rev(b)
1739 try:
1742 try:
1740 ancs = self.index.ancestors(a, b)
1743 ancs = self.index.ancestors(a, b)
1741 except (AttributeError, OverflowError):
1744 except (AttributeError, OverflowError):
1742 ancs = ancestor.ancestors(self.parentrevs, a, b)
1745 ancs = ancestor.ancestors(self.parentrevs, a, b)
1743 if ancs:
1746 if ancs:
1744 # choose a consistent winner when there's a tie
1747 # choose a consistent winner when there's a tie
1745 return min(map(self.node, ancs))
1748 return min(map(self.node, ancs))
1746 return nullid
1749 return nullid
1747
1750
1748 def _match(self, id):
1751 def _match(self, id):
1749 if isinstance(id, int):
1752 if isinstance(id, int):
1750 # rev
1753 # rev
1751 return self.node(id)
1754 return self.node(id)
1752 if len(id) == 20:
1755 if len(id) == 20:
1753 # possibly a binary node
1756 # possibly a binary node
1754 # odds of a binary node being all hex in ASCII are 1 in 10**25
1757 # odds of a binary node being all hex in ASCII are 1 in 10**25
1755 try:
1758 try:
1756 node = id
1759 node = id
1757 self.rev(node) # quick search the index
1760 self.rev(node) # quick search the index
1758 return node
1761 return node
1759 except LookupError:
1762 except LookupError:
1760 pass # may be partial hex id
1763 pass # may be partial hex id
1761 try:
1764 try:
1762 # str(rev)
1765 # str(rev)
1763 rev = int(id)
1766 rev = int(id)
1764 if "%d" % rev != id:
1767 if "%d" % rev != id:
1765 raise ValueError
1768 raise ValueError
1766 if rev < 0:
1769 if rev < 0:
1767 rev = len(self) + rev
1770 rev = len(self) + rev
1768 if rev < 0 or rev >= len(self):
1771 if rev < 0 or rev >= len(self):
1769 raise ValueError
1772 raise ValueError
1770 return self.node(rev)
1773 return self.node(rev)
1771 except (ValueError, OverflowError):
1774 except (ValueError, OverflowError):
1772 pass
1775 pass
1773 if len(id) == 40:
1776 if len(id) == 40:
1774 try:
1777 try:
1775 # a full hex nodeid?
1778 # a full hex nodeid?
1776 node = bin(id)
1779 node = bin(id)
1777 self.rev(node)
1780 self.rev(node)
1778 return node
1781 return node
1779 except (TypeError, LookupError):
1782 except (TypeError, LookupError):
1780 pass
1783 pass
1781
1784
1782 def _partialmatch(self, id):
1785 def _partialmatch(self, id):
1783 # we don't care wdirfilenodeids as they should be always full hash
1786 # we don't care wdirfilenodeids as they should be always full hash
1784 maybewdir = wdirhex.startswith(id)
1787 maybewdir = wdirhex.startswith(id)
1785 try:
1788 try:
1786 partial = self.index.partialmatch(id)
1789 partial = self.index.partialmatch(id)
1787 if partial and self.hasnode(partial):
1790 if partial and self.hasnode(partial):
1788 if maybewdir:
1791 if maybewdir:
1789 # single 'ff...' match in radix tree, ambiguous with wdir
1792 # single 'ff...' match in radix tree, ambiguous with wdir
1790 raise RevlogError
1793 raise RevlogError
1791 return partial
1794 return partial
1792 if maybewdir:
1795 if maybewdir:
1793 # no 'ff...' match in radix tree, wdir identified
1796 # no 'ff...' match in radix tree, wdir identified
1794 raise error.WdirUnsupported
1797 raise error.WdirUnsupported
1795 return None
1798 return None
1796 except RevlogError:
1799 except RevlogError:
1797 # parsers.c radix tree lookup gave multiple matches
1800 # parsers.c radix tree lookup gave multiple matches
1798 # fast path: for unfiltered changelog, radix tree is accurate
1801 # fast path: for unfiltered changelog, radix tree is accurate
1799 if not getattr(self, 'filteredrevs', None):
1802 if not getattr(self, 'filteredrevs', None):
1800 raise AmbiguousPrefixLookupError(id, self.indexfile,
1803 raise AmbiguousPrefixLookupError(id, self.indexfile,
1801 _('ambiguous identifier'))
1804 _('ambiguous identifier'))
1802 # fall through to slow path that filters hidden revisions
1805 # fall through to slow path that filters hidden revisions
1803 except (AttributeError, ValueError):
1806 except (AttributeError, ValueError):
1804 # we are pure python, or key was too short to search radix tree
1807 # we are pure python, or key was too short to search radix tree
1805 pass
1808 pass
1806
1809
1807 if id in self._pcache:
1810 if id in self._pcache:
1808 return self._pcache[id]
1811 return self._pcache[id]
1809
1812
1810 if len(id) <= 40:
1813 if len(id) <= 40:
1811 try:
1814 try:
1812 # hex(node)[:...]
1815 # hex(node)[:...]
1813 l = len(id) // 2 # grab an even number of digits
1816 l = len(id) // 2 # grab an even number of digits
1814 prefix = bin(id[:l * 2])
1817 prefix = bin(id[:l * 2])
1815 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1818 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1816 nl = [n for n in nl if hex(n).startswith(id) and
1819 nl = [n for n in nl if hex(n).startswith(id) and
1817 self.hasnode(n)]
1820 self.hasnode(n)]
1818 if len(nl) > 0:
1821 if len(nl) > 0:
1819 if len(nl) == 1 and not maybewdir:
1822 if len(nl) == 1 and not maybewdir:
1820 self._pcache[id] = nl[0]
1823 self._pcache[id] = nl[0]
1821 return nl[0]
1824 return nl[0]
1822 raise AmbiguousPrefixLookupError(id, self.indexfile,
1825 raise AmbiguousPrefixLookupError(id, self.indexfile,
1823 _('ambiguous identifier'))
1826 _('ambiguous identifier'))
1824 if maybewdir:
1827 if maybewdir:
1825 raise error.WdirUnsupported
1828 raise error.WdirUnsupported
1826 return None
1829 return None
1827 except TypeError:
1830 except TypeError:
1828 pass
1831 pass
1829
1832
1830 def lookup(self, id):
1833 def lookup(self, id):
1831 """locate a node based on:
1834 """locate a node based on:
1832 - revision number or str(revision number)
1835 - revision number or str(revision number)
1833 - nodeid or subset of hex nodeid
1836 - nodeid or subset of hex nodeid
1834 """
1837 """
1835 n = self._match(id)
1838 n = self._match(id)
1836 if n is not None:
1839 if n is not None:
1837 return n
1840 return n
1838 n = self._partialmatch(id)
1841 n = self._partialmatch(id)
1839 if n:
1842 if n:
1840 return n
1843 return n
1841
1844
1842 raise LookupError(id, self.indexfile, _('no match found'))
1845 raise LookupError(id, self.indexfile, _('no match found'))
1843
1846
1844 def shortest(self, node, minlength=1):
1847 def shortest(self, node, minlength=1):
1845 """Find the shortest unambiguous prefix that matches node."""
1848 """Find the shortest unambiguous prefix that matches node."""
1846 def isvalid(prefix):
1849 def isvalid(prefix):
1847 try:
1850 try:
1848 node = self._partialmatch(prefix)
1851 node = self._partialmatch(prefix)
1849 except error.RevlogError:
1852 except error.RevlogError:
1850 return False
1853 return False
1851 except error.WdirUnsupported:
1854 except error.WdirUnsupported:
1852 # single 'ff...' match
1855 # single 'ff...' match
1853 return True
1856 return True
1854 if node is None:
1857 if node is None:
1855 raise LookupError(node, self.indexfile, _('no node'))
1858 raise LookupError(node, self.indexfile, _('no node'))
1856 return True
1859 return True
1857
1860
1858 def maybewdir(prefix):
1861 def maybewdir(prefix):
1859 return all(c == 'f' for c in prefix)
1862 return all(c == 'f' for c in prefix)
1860
1863
1861 hexnode = hex(node)
1864 hexnode = hex(node)
1862
1865
1863 def disambiguate(hexnode, minlength):
1866 def disambiguate(hexnode, minlength):
1864 """Disambiguate against wdirid."""
1867 """Disambiguate against wdirid."""
1865 for length in range(minlength, 41):
1868 for length in range(minlength, 41):
1866 prefix = hexnode[:length]
1869 prefix = hexnode[:length]
1867 if not maybewdir(prefix):
1870 if not maybewdir(prefix):
1868 return prefix
1871 return prefix
1869
1872
1870 if not getattr(self, 'filteredrevs', None):
1873 if not getattr(self, 'filteredrevs', None):
1871 try:
1874 try:
1872 length = max(self.index.shortest(node), minlength)
1875 length = max(self.index.shortest(node), minlength)
1873 return disambiguate(hexnode, length)
1876 return disambiguate(hexnode, length)
1874 except RevlogError:
1877 except RevlogError:
1875 if node != wdirid:
1878 if node != wdirid:
1876 raise LookupError(node, self.indexfile, _('no node'))
1879 raise LookupError(node, self.indexfile, _('no node'))
1877 except AttributeError:
1880 except AttributeError:
1878 # Fall through to pure code
1881 # Fall through to pure code
1879 pass
1882 pass
1880
1883
1881 if node == wdirid:
1884 if node == wdirid:
1882 for length in range(minlength, 41):
1885 for length in range(minlength, 41):
1883 prefix = hexnode[:length]
1886 prefix = hexnode[:length]
1884 if isvalid(prefix):
1887 if isvalid(prefix):
1885 return prefix
1888 return prefix
1886
1889
1887 for length in range(minlength, 41):
1890 for length in range(minlength, 41):
1888 prefix = hexnode[:length]
1891 prefix = hexnode[:length]
1889 if isvalid(prefix):
1892 if isvalid(prefix):
1890 return disambiguate(hexnode, length)
1893 return disambiguate(hexnode, length)
1891
1894
1892 def cmp(self, node, text):
1895 def cmp(self, node, text):
1893 """compare text with a given file revision
1896 """compare text with a given file revision
1894
1897
1895 returns True if text is different than what is stored.
1898 returns True if text is different than what is stored.
1896 """
1899 """
1897 p1, p2 = self.parents(node)
1900 p1, p2 = self.parents(node)
1898 return hash(text, p1, p2) != node
1901 return hash(text, p1, p2) != node
1899
1902
1900 def _cachesegment(self, offset, data):
1903 def _cachesegment(self, offset, data):
1901 """Add a segment to the revlog cache.
1904 """Add a segment to the revlog cache.
1902
1905
1903 Accepts an absolute offset and the data that is at that location.
1906 Accepts an absolute offset and the data that is at that location.
1904 """
1907 """
1905 o, d = self._chunkcache
1908 o, d = self._chunkcache
1906 # try to add to existing cache
1909 # try to add to existing cache
1907 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1910 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1908 self._chunkcache = o, d + data
1911 self._chunkcache = o, d + data
1909 else:
1912 else:
1910 self._chunkcache = offset, data
1913 self._chunkcache = offset, data
1911
1914
1912 def _readsegment(self, offset, length, df=None):
1915 def _readsegment(self, offset, length, df=None):
1913 """Load a segment of raw data from the revlog.
1916 """Load a segment of raw data from the revlog.
1914
1917
1915 Accepts an absolute offset, length to read, and an optional existing
1918 Accepts an absolute offset, length to read, and an optional existing
1916 file handle to read from.
1919 file handle to read from.
1917
1920
1918 If an existing file handle is passed, it will be seeked and the
1921 If an existing file handle is passed, it will be seeked and the
1919 original seek position will NOT be restored.
1922 original seek position will NOT be restored.
1920
1923
1921 Returns a str or buffer of raw byte data.
1924 Returns a str or buffer of raw byte data.
1922 """
1925 """
1923 # Cache data both forward and backward around the requested
1926 # Cache data both forward and backward around the requested
1924 # data, in a fixed size window. This helps speed up operations
1927 # data, in a fixed size window. This helps speed up operations
1925 # involving reading the revlog backwards.
1928 # involving reading the revlog backwards.
1926 cachesize = self._chunkcachesize
1929 cachesize = self._chunkcachesize
1927 realoffset = offset & ~(cachesize - 1)
1930 realoffset = offset & ~(cachesize - 1)
1928 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1931 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1929 - realoffset)
1932 - realoffset)
1930 with self._datareadfp(df) as df:
1933 with self._datareadfp(df) as df:
1931 df.seek(realoffset)
1934 df.seek(realoffset)
1932 d = df.read(reallength)
1935 d = df.read(reallength)
1933 self._cachesegment(realoffset, d)
1936 self._cachesegment(realoffset, d)
1934 if offset != realoffset or reallength != length:
1937 if offset != realoffset or reallength != length:
1935 return util.buffer(d, offset - realoffset, length)
1938 return util.buffer(d, offset - realoffset, length)
1936 return d
1939 return d
1937
1940
1938 def _getsegment(self, offset, length, df=None):
1941 def _getsegment(self, offset, length, df=None):
1939 """Obtain a segment of raw data from the revlog.
1942 """Obtain a segment of raw data from the revlog.
1940
1943
1941 Accepts an absolute offset, length of bytes to obtain, and an
1944 Accepts an absolute offset, length of bytes to obtain, and an
1942 optional file handle to the already-opened revlog. If the file
1945 optional file handle to the already-opened revlog. If the file
1943 handle is used, it's original seek position will not be preserved.
1946 handle is used, it's original seek position will not be preserved.
1944
1947
1945 Requests for data may be returned from a cache.
1948 Requests for data may be returned from a cache.
1946
1949
1947 Returns a str or a buffer instance of raw byte data.
1950 Returns a str or a buffer instance of raw byte data.
1948 """
1951 """
1949 o, d = self._chunkcache
1952 o, d = self._chunkcache
1950 l = len(d)
1953 l = len(d)
1951
1954
1952 # is it in the cache?
1955 # is it in the cache?
1953 cachestart = offset - o
1956 cachestart = offset - o
1954 cacheend = cachestart + length
1957 cacheend = cachestart + length
1955 if cachestart >= 0 and cacheend <= l:
1958 if cachestart >= 0 and cacheend <= l:
1956 if cachestart == 0 and cacheend == l:
1959 if cachestart == 0 and cacheend == l:
1957 return d # avoid a copy
1960 return d # avoid a copy
1958 return util.buffer(d, cachestart, cacheend - cachestart)
1961 return util.buffer(d, cachestart, cacheend - cachestart)
1959
1962
1960 return self._readsegment(offset, length, df=df)
1963 return self._readsegment(offset, length, df=df)
1961
1964
1962 def _getsegmentforrevs(self, startrev, endrev, df=None):
1965 def _getsegmentforrevs(self, startrev, endrev, df=None):
1963 """Obtain a segment of raw data corresponding to a range of revisions.
1966 """Obtain a segment of raw data corresponding to a range of revisions.
1964
1967
1965 Accepts the start and end revisions and an optional already-open
1968 Accepts the start and end revisions and an optional already-open
1966 file handle to be used for reading. If the file handle is read, its
1969 file handle to be used for reading. If the file handle is read, its
1967 seek position will not be preserved.
1970 seek position will not be preserved.
1968
1971
1969 Requests for data may be satisfied by a cache.
1972 Requests for data may be satisfied by a cache.
1970
1973
1971 Returns a 2-tuple of (offset, data) for the requested range of
1974 Returns a 2-tuple of (offset, data) for the requested range of
1972 revisions. Offset is the integer offset from the beginning of the
1975 revisions. Offset is the integer offset from the beginning of the
1973 revlog and data is a str or buffer of the raw byte data.
1976 revlog and data is a str or buffer of the raw byte data.
1974
1977
1975 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1978 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1976 to determine where each revision's data begins and ends.
1979 to determine where each revision's data begins and ends.
1977 """
1980 """
1978 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1981 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1979 # (functions are expensive).
1982 # (functions are expensive).
1980 index = self.index
1983 index = self.index
1981 istart = index[startrev]
1984 istart = index[startrev]
1982 start = int(istart[0] >> 16)
1985 start = int(istart[0] >> 16)
1983 if startrev == endrev:
1986 if startrev == endrev:
1984 end = start + istart[1]
1987 end = start + istart[1]
1985 else:
1988 else:
1986 iend = index[endrev]
1989 iend = index[endrev]
1987 end = int(iend[0] >> 16) + iend[1]
1990 end = int(iend[0] >> 16) + iend[1]
1988
1991
1989 if self._inline:
1992 if self._inline:
1990 start += (startrev + 1) * self._io.size
1993 start += (startrev + 1) * self._io.size
1991 end += (endrev + 1) * self._io.size
1994 end += (endrev + 1) * self._io.size
1992 length = end - start
1995 length = end - start
1993
1996
1994 return start, self._getsegment(start, length, df=df)
1997 return start, self._getsegment(start, length, df=df)
1995
1998
1996 def _chunk(self, rev, df=None):
1999 def _chunk(self, rev, df=None):
1997 """Obtain a single decompressed chunk for a revision.
2000 """Obtain a single decompressed chunk for a revision.
1998
2001
1999 Accepts an integer revision and an optional already-open file handle
2002 Accepts an integer revision and an optional already-open file handle
2000 to be used for reading. If used, the seek position of the file will not
2003 to be used for reading. If used, the seek position of the file will not
2001 be preserved.
2004 be preserved.
2002
2005
2003 Returns a str holding uncompressed data for the requested revision.
2006 Returns a str holding uncompressed data for the requested revision.
2004 """
2007 """
2005 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2008 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2006
2009
2007 def _chunks(self, revs, df=None, targetsize=None):
2010 def _chunks(self, revs, df=None, targetsize=None):
2008 """Obtain decompressed chunks for the specified revisions.
2011 """Obtain decompressed chunks for the specified revisions.
2009
2012
2010 Accepts an iterable of numeric revisions that are assumed to be in
2013 Accepts an iterable of numeric revisions that are assumed to be in
2011 ascending order. Also accepts an optional already-open file handle
2014 ascending order. Also accepts an optional already-open file handle
2012 to be used for reading. If used, the seek position of the file will
2015 to be used for reading. If used, the seek position of the file will
2013 not be preserved.
2016 not be preserved.
2014
2017
2015 This function is similar to calling ``self._chunk()`` multiple times,
2018 This function is similar to calling ``self._chunk()`` multiple times,
2016 but is faster.
2019 but is faster.
2017
2020
2018 Returns a list with decompressed data for each requested revision.
2021 Returns a list with decompressed data for each requested revision.
2019 """
2022 """
2020 if not revs:
2023 if not revs:
2021 return []
2024 return []
2022 start = self.start
2025 start = self.start
2023 length = self.length
2026 length = self.length
2024 inline = self._inline
2027 inline = self._inline
2025 iosize = self._io.size
2028 iosize = self._io.size
2026 buffer = util.buffer
2029 buffer = util.buffer
2027
2030
2028 l = []
2031 l = []
2029 ladd = l.append
2032 ladd = l.append
2030
2033
2031 if not self._withsparseread:
2034 if not self._withsparseread:
2032 slicedchunks = (revs,)
2035 slicedchunks = (revs,)
2033 else:
2036 else:
2034 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2037 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2035
2038
2036 for revschunk in slicedchunks:
2039 for revschunk in slicedchunks:
2037 firstrev = revschunk[0]
2040 firstrev = revschunk[0]
2038 # Skip trailing revisions with empty diff
2041 # Skip trailing revisions with empty diff
2039 for lastrev in revschunk[::-1]:
2042 for lastrev in revschunk[::-1]:
2040 if length(lastrev) != 0:
2043 if length(lastrev) != 0:
2041 break
2044 break
2042
2045
2043 try:
2046 try:
2044 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2047 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2045 except OverflowError:
2048 except OverflowError:
2046 # issue4215 - we can't cache a run of chunks greater than
2049 # issue4215 - we can't cache a run of chunks greater than
2047 # 2G on Windows
2050 # 2G on Windows
2048 return [self._chunk(rev, df=df) for rev in revschunk]
2051 return [self._chunk(rev, df=df) for rev in revschunk]
2049
2052
2050 decomp = self.decompress
2053 decomp = self.decompress
2051 for rev in revschunk:
2054 for rev in revschunk:
2052 chunkstart = start(rev)
2055 chunkstart = start(rev)
2053 if inline:
2056 if inline:
2054 chunkstart += (rev + 1) * iosize
2057 chunkstart += (rev + 1) * iosize
2055 chunklength = length(rev)
2058 chunklength = length(rev)
2056 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2059 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2057
2060
2058 return l
2061 return l
2059
2062
2060 def _chunkclear(self):
2063 def _chunkclear(self):
2061 """Clear the raw chunk cache."""
2064 """Clear the raw chunk cache."""
2062 self._chunkcache = (0, '')
2065 self._chunkcache = (0, '')
2063
2066
2064 def deltaparent(self, rev):
2067 def deltaparent(self, rev):
2065 """return deltaparent of the given revision"""
2068 """return deltaparent of the given revision"""
2066 base = self.index[rev][3]
2069 base = self.index[rev][3]
2067 if base == rev:
2070 if base == rev:
2068 return nullrev
2071 return nullrev
2069 elif self._generaldelta:
2072 elif self._generaldelta:
2070 return base
2073 return base
2071 else:
2074 else:
2072 return rev - 1
2075 return rev - 1
2073
2076
2074 def revdiff(self, rev1, rev2):
2077 def revdiff(self, rev1, rev2):
2075 """return or calculate a delta between two revisions
2078 """return or calculate a delta between two revisions
2076
2079
2077 The delta calculated is in binary form and is intended to be written to
2080 The delta calculated is in binary form and is intended to be written to
2078 revlog data directly. So this function needs raw revision data.
2081 revlog data directly. So this function needs raw revision data.
2079 """
2082 """
2080 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2083 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2081 return bytes(self._chunk(rev2))
2084 return bytes(self._chunk(rev2))
2082
2085
2083 return mdiff.textdiff(self.revision(rev1, raw=True),
2086 return mdiff.textdiff(self.revision(rev1, raw=True),
2084 self.revision(rev2, raw=True))
2087 self.revision(rev2, raw=True))
2085
2088
2086 def revision(self, nodeorrev, _df=None, raw=False):
2089 def revision(self, nodeorrev, _df=None, raw=False):
2087 """return an uncompressed revision of a given node or revision
2090 """return an uncompressed revision of a given node or revision
2088 number.
2091 number.
2089
2092
2090 _df - an existing file handle to read from. (internal-only)
2093 _df - an existing file handle to read from. (internal-only)
2091 raw - an optional argument specifying if the revision data is to be
2094 raw - an optional argument specifying if the revision data is to be
2092 treated as raw data when applying flag transforms. 'raw' should be set
2095 treated as raw data when applying flag transforms. 'raw' should be set
2093 to True when generating changegroups or in debug commands.
2096 to True when generating changegroups or in debug commands.
2094 """
2097 """
2095 if isinstance(nodeorrev, int):
2098 if isinstance(nodeorrev, int):
2096 rev = nodeorrev
2099 rev = nodeorrev
2097 node = self.node(rev)
2100 node = self.node(rev)
2098 else:
2101 else:
2099 node = nodeorrev
2102 node = nodeorrev
2100 rev = None
2103 rev = None
2101
2104
2102 cachedrev = None
2105 cachedrev = None
2103 flags = None
2106 flags = None
2104 rawtext = None
2107 rawtext = None
2105 if node == nullid:
2108 if node == nullid:
2106 return ""
2109 return ""
2107 if self._cache:
2110 if self._cache:
2108 if self._cache[0] == node:
2111 if self._cache[0] == node:
2109 # _cache only stores rawtext
2112 # _cache only stores rawtext
2110 if raw:
2113 if raw:
2111 return self._cache[2]
2114 return self._cache[2]
2112 # duplicated, but good for perf
2115 # duplicated, but good for perf
2113 if rev is None:
2116 if rev is None:
2114 rev = self.rev(node)
2117 rev = self.rev(node)
2115 if flags is None:
2118 if flags is None:
2116 flags = self.flags(rev)
2119 flags = self.flags(rev)
2117 # no extra flags set, no flag processor runs, text = rawtext
2120 # no extra flags set, no flag processor runs, text = rawtext
2118 if flags == REVIDX_DEFAULT_FLAGS:
2121 if flags == REVIDX_DEFAULT_FLAGS:
2119 return self._cache[2]
2122 return self._cache[2]
2120 # rawtext is reusable. need to run flag processor
2123 # rawtext is reusable. need to run flag processor
2121 rawtext = self._cache[2]
2124 rawtext = self._cache[2]
2122
2125
2123 cachedrev = self._cache[1]
2126 cachedrev = self._cache[1]
2124
2127
2125 # look up what we need to read
2128 # look up what we need to read
2126 if rawtext is None:
2129 if rawtext is None:
2127 if rev is None:
2130 if rev is None:
2128 rev = self.rev(node)
2131 rev = self.rev(node)
2129
2132
2130 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2133 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2131 if stopped:
2134 if stopped:
2132 rawtext = self._cache[2]
2135 rawtext = self._cache[2]
2133
2136
2134 # drop cache to save memory
2137 # drop cache to save memory
2135 self._cache = None
2138 self._cache = None
2136
2139
2137 targetsize = None
2140 targetsize = None
2138 rawsize = self.index[rev][2]
2141 rawsize = self.index[rev][2]
2139 if 0 <= rawsize:
2142 if 0 <= rawsize:
2140 targetsize = 4 * rawsize
2143 targetsize = 4 * rawsize
2141
2144
2142 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2145 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2143 if rawtext is None:
2146 if rawtext is None:
2144 rawtext = bytes(bins[0])
2147 rawtext = bytes(bins[0])
2145 bins = bins[1:]
2148 bins = bins[1:]
2146
2149
2147 rawtext = mdiff.patches(rawtext, bins)
2150 rawtext = mdiff.patches(rawtext, bins)
2148 self._cache = (node, rev, rawtext)
2151 self._cache = (node, rev, rawtext)
2149
2152
2150 if flags is None:
2153 if flags is None:
2151 if rev is None:
2154 if rev is None:
2152 rev = self.rev(node)
2155 rev = self.rev(node)
2153 flags = self.flags(rev)
2156 flags = self.flags(rev)
2154
2157
2155 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2158 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2156 if validatehash:
2159 if validatehash:
2157 self.checkhash(text, node, rev=rev)
2160 self.checkhash(text, node, rev=rev)
2158
2161
2159 return text
2162 return text
2160
2163
2161 def hash(self, text, p1, p2):
2164 def hash(self, text, p1, p2):
2162 """Compute a node hash.
2165 """Compute a node hash.
2163
2166
2164 Available as a function so that subclasses can replace the hash
2167 Available as a function so that subclasses can replace the hash
2165 as needed.
2168 as needed.
2166 """
2169 """
2167 return hash(text, p1, p2)
2170 return hash(text, p1, p2)
2168
2171
2169 def _processflags(self, text, flags, operation, raw=False):
2172 def _processflags(self, text, flags, operation, raw=False):
2170 """Inspect revision data flags and applies transforms defined by
2173 """Inspect revision data flags and applies transforms defined by
2171 registered flag processors.
2174 registered flag processors.
2172
2175
2173 ``text`` - the revision data to process
2176 ``text`` - the revision data to process
2174 ``flags`` - the revision flags
2177 ``flags`` - the revision flags
2175 ``operation`` - the operation being performed (read or write)
2178 ``operation`` - the operation being performed (read or write)
2176 ``raw`` - an optional argument describing if the raw transform should be
2179 ``raw`` - an optional argument describing if the raw transform should be
2177 applied.
2180 applied.
2178
2181
2179 This method processes the flags in the order (or reverse order if
2182 This method processes the flags in the order (or reverse order if
2180 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2183 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2181 flag processors registered for present flags. The order of flags defined
2184 flag processors registered for present flags. The order of flags defined
2182 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2185 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2183
2186
2184 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2187 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2185 processed text and ``validatehash`` is a bool indicating whether the
2188 processed text and ``validatehash`` is a bool indicating whether the
2186 returned text should be checked for hash integrity.
2189 returned text should be checked for hash integrity.
2187
2190
2188 Note: If the ``raw`` argument is set, it has precedence over the
2191 Note: If the ``raw`` argument is set, it has precedence over the
2189 operation and will only update the value of ``validatehash``.
2192 operation and will only update the value of ``validatehash``.
2190 """
2193 """
2191 # fast path: no flag processors will run
2194 # fast path: no flag processors will run
2192 if flags == 0:
2195 if flags == 0:
2193 return text, True
2196 return text, True
2194 if not operation in ('read', 'write'):
2197 if not operation in ('read', 'write'):
2195 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2198 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2196 # Check all flags are known.
2199 # Check all flags are known.
2197 if flags & ~REVIDX_KNOWN_FLAGS:
2200 if flags & ~REVIDX_KNOWN_FLAGS:
2198 raise RevlogError(_("incompatible revision flag '%#x'") %
2201 raise RevlogError(_("incompatible revision flag '%#x'") %
2199 (flags & ~REVIDX_KNOWN_FLAGS))
2202 (flags & ~REVIDX_KNOWN_FLAGS))
2200 validatehash = True
2203 validatehash = True
2201 # Depending on the operation (read or write), the order might be
2204 # Depending on the operation (read or write), the order might be
2202 # reversed due to non-commutative transforms.
2205 # reversed due to non-commutative transforms.
2203 orderedflags = REVIDX_FLAGS_ORDER
2206 orderedflags = REVIDX_FLAGS_ORDER
2204 if operation == 'write':
2207 if operation == 'write':
2205 orderedflags = reversed(orderedflags)
2208 orderedflags = reversed(orderedflags)
2206
2209
2207 for flag in orderedflags:
2210 for flag in orderedflags:
2208 # If a flagprocessor has been registered for a known flag, apply the
2211 # If a flagprocessor has been registered for a known flag, apply the
2209 # related operation transform and update result tuple.
2212 # related operation transform and update result tuple.
2210 if flag & flags:
2213 if flag & flags:
2211 vhash = True
2214 vhash = True
2212
2215
2213 if flag not in _flagprocessors:
2216 if flag not in _flagprocessors:
2214 message = _("missing processor for flag '%#x'") % (flag)
2217 message = _("missing processor for flag '%#x'") % (flag)
2215 raise RevlogError(message)
2218 raise RevlogError(message)
2216
2219
2217 processor = _flagprocessors[flag]
2220 processor = _flagprocessors[flag]
2218 if processor is not None:
2221 if processor is not None:
2219 readtransform, writetransform, rawtransform = processor
2222 readtransform, writetransform, rawtransform = processor
2220
2223
2221 if raw:
2224 if raw:
2222 vhash = rawtransform(self, text)
2225 vhash = rawtransform(self, text)
2223 elif operation == 'read':
2226 elif operation == 'read':
2224 text, vhash = readtransform(self, text)
2227 text, vhash = readtransform(self, text)
2225 else: # write operation
2228 else: # write operation
2226 text, vhash = writetransform(self, text)
2229 text, vhash = writetransform(self, text)
2227 validatehash = validatehash and vhash
2230 validatehash = validatehash and vhash
2228
2231
2229 return text, validatehash
2232 return text, validatehash
2230
2233
2231 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2234 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2232 """Check node hash integrity.
2235 """Check node hash integrity.
2233
2236
2234 Available as a function so that subclasses can extend hash mismatch
2237 Available as a function so that subclasses can extend hash mismatch
2235 behaviors as needed.
2238 behaviors as needed.
2236 """
2239 """
2237 try:
2240 try:
2238 if p1 is None and p2 is None:
2241 if p1 is None and p2 is None:
2239 p1, p2 = self.parents(node)
2242 p1, p2 = self.parents(node)
2240 if node != self.hash(text, p1, p2):
2243 if node != self.hash(text, p1, p2):
2241 revornode = rev
2244 revornode = rev
2242 if revornode is None:
2245 if revornode is None:
2243 revornode = templatefilters.short(hex(node))
2246 revornode = templatefilters.short(hex(node))
2244 raise RevlogError(_("integrity check failed on %s:%s")
2247 raise RevlogError(_("integrity check failed on %s:%s")
2245 % (self.indexfile, pycompat.bytestr(revornode)))
2248 % (self.indexfile, pycompat.bytestr(revornode)))
2246 except RevlogError:
2249 except RevlogError:
2247 if self._censorable and _censoredtext(text):
2250 if self._censorable and _censoredtext(text):
2248 raise error.CensoredNodeError(self.indexfile, node, text)
2251 raise error.CensoredNodeError(self.indexfile, node, text)
2249 raise
2252 raise
2250
2253
2251 def _enforceinlinesize(self, tr, fp=None):
2254 def _enforceinlinesize(self, tr, fp=None):
2252 """Check if the revlog is too big for inline and convert if so.
2255 """Check if the revlog is too big for inline and convert if so.
2253
2256
2254 This should be called after revisions are added to the revlog. If the
2257 This should be called after revisions are added to the revlog. If the
2255 revlog has grown too large to be an inline revlog, it will convert it
2258 revlog has grown too large to be an inline revlog, it will convert it
2256 to use multiple index and data files.
2259 to use multiple index and data files.
2257 """
2260 """
2258 tiprev = len(self) - 1
2261 tiprev = len(self) - 1
2259 if (not self._inline or
2262 if (not self._inline or
2260 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2263 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2261 return
2264 return
2262
2265
2263 trinfo = tr.find(self.indexfile)
2266 trinfo = tr.find(self.indexfile)
2264 if trinfo is None:
2267 if trinfo is None:
2265 raise RevlogError(_("%s not found in the transaction")
2268 raise RevlogError(_("%s not found in the transaction")
2266 % self.indexfile)
2269 % self.indexfile)
2267
2270
2268 trindex = trinfo[2]
2271 trindex = trinfo[2]
2269 if trindex is not None:
2272 if trindex is not None:
2270 dataoff = self.start(trindex)
2273 dataoff = self.start(trindex)
2271 else:
2274 else:
2272 # revlog was stripped at start of transaction, use all leftover data
2275 # revlog was stripped at start of transaction, use all leftover data
2273 trindex = len(self) - 1
2276 trindex = len(self) - 1
2274 dataoff = self.end(tiprev)
2277 dataoff = self.end(tiprev)
2275
2278
2276 tr.add(self.datafile, dataoff)
2279 tr.add(self.datafile, dataoff)
2277
2280
2278 if fp:
2281 if fp:
2279 fp.flush()
2282 fp.flush()
2280 fp.close()
2283 fp.close()
2281
2284
2282 with self._datafp('w') as df:
2285 with self._datafp('w') as df:
2283 for r in self:
2286 for r in self:
2284 df.write(self._getsegmentforrevs(r, r)[1])
2287 df.write(self._getsegmentforrevs(r, r)[1])
2285
2288
2286 with self._indexfp('w') as fp:
2289 with self._indexfp('w') as fp:
2287 self.version &= ~FLAG_INLINE_DATA
2290 self.version &= ~FLAG_INLINE_DATA
2288 self._inline = False
2291 self._inline = False
2289 io = self._io
2292 io = self._io
2290 for i in self:
2293 for i in self:
2291 e = io.packentry(self.index[i], self.node, self.version, i)
2294 e = io.packentry(self.index[i], self.node, self.version, i)
2292 fp.write(e)
2295 fp.write(e)
2293
2296
2294 # the temp file replace the real index when we exit the context
2297 # the temp file replace the real index when we exit the context
2295 # manager
2298 # manager
2296
2299
2297 tr.replace(self.indexfile, trindex * self._io.size)
2300 tr.replace(self.indexfile, trindex * self._io.size)
2298 self._chunkclear()
2301 self._chunkclear()
2299
2302
2300 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2303 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2301 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2304 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2302 """add a revision to the log
2305 """add a revision to the log
2303
2306
2304 text - the revision data to add
2307 text - the revision data to add
2305 transaction - the transaction object used for rollback
2308 transaction - the transaction object used for rollback
2306 link - the linkrev data to add
2309 link - the linkrev data to add
2307 p1, p2 - the parent nodeids of the revision
2310 p1, p2 - the parent nodeids of the revision
2308 cachedelta - an optional precomputed delta
2311 cachedelta - an optional precomputed delta
2309 node - nodeid of revision; typically node is not specified, and it is
2312 node - nodeid of revision; typically node is not specified, and it is
2310 computed by default as hash(text, p1, p2), however subclasses might
2313 computed by default as hash(text, p1, p2), however subclasses might
2311 use different hashing method (and override checkhash() in such case)
2314 use different hashing method (and override checkhash() in such case)
2312 flags - the known flags to set on the revision
2315 flags - the known flags to set on the revision
2313 deltacomputer - an optional _deltacomputer instance shared between
2316 deltacomputer - an optional _deltacomputer instance shared between
2314 multiple calls
2317 multiple calls
2315 """
2318 """
2316 if link == nullrev:
2319 if link == nullrev:
2317 raise RevlogError(_("attempted to add linkrev -1 to %s")
2320 raise RevlogError(_("attempted to add linkrev -1 to %s")
2318 % self.indexfile)
2321 % self.indexfile)
2319
2322
2320 if flags:
2323 if flags:
2321 node = node or self.hash(text, p1, p2)
2324 node = node or self.hash(text, p1, p2)
2322
2325
2323 rawtext, validatehash = self._processflags(text, flags, 'write')
2326 rawtext, validatehash = self._processflags(text, flags, 'write')
2324
2327
2325 # If the flag processor modifies the revision data, ignore any provided
2328 # If the flag processor modifies the revision data, ignore any provided
2326 # cachedelta.
2329 # cachedelta.
2327 if rawtext != text:
2330 if rawtext != text:
2328 cachedelta = None
2331 cachedelta = None
2329
2332
2330 if len(rawtext) > _maxentrysize:
2333 if len(rawtext) > _maxentrysize:
2331 raise RevlogError(
2334 raise RevlogError(
2332 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2335 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2333 % (self.indexfile, len(rawtext)))
2336 % (self.indexfile, len(rawtext)))
2334
2337
2335 node = node or self.hash(rawtext, p1, p2)
2338 node = node or self.hash(rawtext, p1, p2)
2336 if node in self.nodemap:
2339 if node in self.nodemap:
2337 return node
2340 return node
2338
2341
2339 if validatehash:
2342 if validatehash:
2340 self.checkhash(rawtext, node, p1=p1, p2=p2)
2343 self.checkhash(rawtext, node, p1=p1, p2=p2)
2341
2344
2342 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2345 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2343 flags, cachedelta=cachedelta,
2346 flags, cachedelta=cachedelta,
2344 deltacomputer=deltacomputer)
2347 deltacomputer=deltacomputer)
2345
2348
2346 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2349 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2347 cachedelta=None, deltacomputer=None):
2350 cachedelta=None, deltacomputer=None):
2348 """add a raw revision with known flags, node and parents
2351 """add a raw revision with known flags, node and parents
2349 useful when reusing a revision not stored in this revlog (ex: received
2352 useful when reusing a revision not stored in this revlog (ex: received
2350 over wire, or read from an external bundle).
2353 over wire, or read from an external bundle).
2351 """
2354 """
2352 dfh = None
2355 dfh = None
2353 if not self._inline:
2356 if not self._inline:
2354 dfh = self._datafp("a+")
2357 dfh = self._datafp("a+")
2355 ifh = self._indexfp("a+")
2358 ifh = self._indexfp("a+")
2356 try:
2359 try:
2357 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2360 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2358 flags, cachedelta, ifh, dfh,
2361 flags, cachedelta, ifh, dfh,
2359 deltacomputer=deltacomputer)
2362 deltacomputer=deltacomputer)
2360 finally:
2363 finally:
2361 if dfh:
2364 if dfh:
2362 dfh.close()
2365 dfh.close()
2363 ifh.close()
2366 ifh.close()
2364
2367
2365 def compress(self, data):
2368 def compress(self, data):
2366 """Generate a possibly-compressed representation of data."""
2369 """Generate a possibly-compressed representation of data."""
2367 if not data:
2370 if not data:
2368 return '', data
2371 return '', data
2369
2372
2370 compressed = self._compressor.compress(data)
2373 compressed = self._compressor.compress(data)
2371
2374
2372 if compressed:
2375 if compressed:
2373 # The revlog compressor added the header in the returned data.
2376 # The revlog compressor added the header in the returned data.
2374 return '', compressed
2377 return '', compressed
2375
2378
2376 if data[0:1] == '\0':
2379 if data[0:1] == '\0':
2377 return '', data
2380 return '', data
2378 return 'u', data
2381 return 'u', data
2379
2382
2380 def decompress(self, data):
2383 def decompress(self, data):
2381 """Decompress a revlog chunk.
2384 """Decompress a revlog chunk.
2382
2385
2383 The chunk is expected to begin with a header identifying the
2386 The chunk is expected to begin with a header identifying the
2384 format type so it can be routed to an appropriate decompressor.
2387 format type so it can be routed to an appropriate decompressor.
2385 """
2388 """
2386 if not data:
2389 if not data:
2387 return data
2390 return data
2388
2391
2389 # Revlogs are read much more frequently than they are written and many
2392 # Revlogs are read much more frequently than they are written and many
2390 # chunks only take microseconds to decompress, so performance is
2393 # chunks only take microseconds to decompress, so performance is
2391 # important here.
2394 # important here.
2392 #
2395 #
2393 # We can make a few assumptions about revlogs:
2396 # We can make a few assumptions about revlogs:
2394 #
2397 #
2395 # 1) the majority of chunks will be compressed (as opposed to inline
2398 # 1) the majority of chunks will be compressed (as opposed to inline
2396 # raw data).
2399 # raw data).
2397 # 2) decompressing *any* data will likely by at least 10x slower than
2400 # 2) decompressing *any* data will likely by at least 10x slower than
2398 # returning raw inline data.
2401 # returning raw inline data.
2399 # 3) we want to prioritize common and officially supported compression
2402 # 3) we want to prioritize common and officially supported compression
2400 # engines
2403 # engines
2401 #
2404 #
2402 # It follows that we want to optimize for "decompress compressed data
2405 # It follows that we want to optimize for "decompress compressed data
2403 # when encoded with common and officially supported compression engines"
2406 # when encoded with common and officially supported compression engines"
2404 # case over "raw data" and "data encoded by less common or non-official
2407 # case over "raw data" and "data encoded by less common or non-official
2405 # compression engines." That is why we have the inline lookup first
2408 # compression engines." That is why we have the inline lookup first
2406 # followed by the compengines lookup.
2409 # followed by the compengines lookup.
2407 #
2410 #
2408 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2411 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2409 # compressed chunks. And this matters for changelog and manifest reads.
2412 # compressed chunks. And this matters for changelog and manifest reads.
2410 t = data[0:1]
2413 t = data[0:1]
2411
2414
2412 if t == 'x':
2415 if t == 'x':
2413 try:
2416 try:
2414 return _zlibdecompress(data)
2417 return _zlibdecompress(data)
2415 except zlib.error as e:
2418 except zlib.error as e:
2416 raise RevlogError(_('revlog decompress error: %s') %
2419 raise RevlogError(_('revlog decompress error: %s') %
2417 stringutil.forcebytestr(e))
2420 stringutil.forcebytestr(e))
2418 # '\0' is more common than 'u' so it goes first.
2421 # '\0' is more common than 'u' so it goes first.
2419 elif t == '\0':
2422 elif t == '\0':
2420 return data
2423 return data
2421 elif t == 'u':
2424 elif t == 'u':
2422 return util.buffer(data, 1)
2425 return util.buffer(data, 1)
2423
2426
2424 try:
2427 try:
2425 compressor = self._decompressors[t]
2428 compressor = self._decompressors[t]
2426 except KeyError:
2429 except KeyError:
2427 try:
2430 try:
2428 engine = util.compengines.forrevlogheader(t)
2431 engine = util.compengines.forrevlogheader(t)
2429 compressor = engine.revlogcompressor()
2432 compressor = engine.revlogcompressor()
2430 self._decompressors[t] = compressor
2433 self._decompressors[t] = compressor
2431 except KeyError:
2434 except KeyError:
2432 raise RevlogError(_('unknown compression type %r') % t)
2435 raise RevlogError(_('unknown compression type %r') % t)
2433
2436
2434 return compressor.decompress(data)
2437 return compressor.decompress(data)
2435
2438
2436 def _isgooddeltainfo(self, deltainfo, revinfo):
2439 def _isgooddeltainfo(self, deltainfo, revinfo):
2437 """Returns True if the given delta is good. Good means that it is within
2440 """Returns True if the given delta is good. Good means that it is within
2438 the disk span, disk size, and chain length bounds that we know to be
2441 the disk span, disk size, and chain length bounds that we know to be
2439 performant."""
2442 performant."""
2440 if deltainfo is None:
2443 if deltainfo is None:
2441 return False
2444 return False
2442
2445
2443 # - 'deltainfo.distance' is the distance from the base revision --
2446 # - 'deltainfo.distance' is the distance from the base revision --
2444 # bounding it limits the amount of I/O we need to do.
2447 # bounding it limits the amount of I/O we need to do.
2445 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2448 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2446 # deltas we need to apply -- bounding it limits the amount of CPU
2449 # deltas we need to apply -- bounding it limits the amount of CPU
2447 # we consume.
2450 # we consume.
2448
2451
2449 if self._sparserevlog:
2452 if self._sparserevlog:
2450 # As sparse-read will be used, we can consider that the distance,
2453 # As sparse-read will be used, we can consider that the distance,
2451 # instead of being the span of the whole chunk,
2454 # instead of being the span of the whole chunk,
2452 # is the span of the largest read chunk
2455 # is the span of the largest read chunk
2453 base = deltainfo.base
2456 base = deltainfo.base
2454
2457
2455 if base != nullrev:
2458 if base != nullrev:
2456 deltachain = self._deltachain(base)[0]
2459 deltachain = self._deltachain(base)[0]
2457 else:
2460 else:
2458 deltachain = []
2461 deltachain = []
2459
2462
2460 chunks = _slicechunk(self, deltachain, deltainfo)
2463 chunks = _slicechunk(self, deltachain, deltainfo)
2461 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2464 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2462 else:
2465 else:
2463 distance = deltainfo.distance
2466 distance = deltainfo.distance
2464
2467
2465 textlen = revinfo.textlen
2468 textlen = revinfo.textlen
2466 defaultmax = textlen * 4
2469 defaultmax = textlen * 4
2467 maxdist = self._maxdeltachainspan
2470 maxdist = self._maxdeltachainspan
2468 if not maxdist:
2471 if not maxdist:
2469 maxdist = distance # ensure the conditional pass
2472 maxdist = distance # ensure the conditional pass
2470 maxdist = max(maxdist, defaultmax)
2473 maxdist = max(maxdist, defaultmax)
2471 if self._sparserevlog and maxdist < self._srmingapsize:
2474 if self._sparserevlog and maxdist < self._srmingapsize:
2472 # In multiple place, we are ignoring irrelevant data range below a
2475 # In multiple place, we are ignoring irrelevant data range below a
2473 # certain size. Be also apply this tradeoff here and relax span
2476 # certain size. Be also apply this tradeoff here and relax span
2474 # constraint for small enought content.
2477 # constraint for small enought content.
2475 maxdist = self._srmingapsize
2478 maxdist = self._srmingapsize
2476
2479
2477 # Bad delta from read span:
2480 # Bad delta from read span:
2478 #
2481 #
2479 # If the span of data read is larger than the maximum allowed.
2482 # If the span of data read is larger than the maximum allowed.
2480 if maxdist < distance:
2483 if maxdist < distance:
2481 return False
2484 return False
2482
2485
2483 # Bad delta from new delta size:
2486 # Bad delta from new delta size:
2484 #
2487 #
2485 # If the delta size is larger than the target text, storing the
2488 # If the delta size is larger than the target text, storing the
2486 # delta will be inefficient.
2489 # delta will be inefficient.
2487 if textlen < deltainfo.deltalen:
2490 if textlen < deltainfo.deltalen:
2488 return False
2491 return False
2489
2492
2490 # Bad delta from cumulated payload size:
2493 # Bad delta from cumulated payload size:
2491 #
2494 #
2492 # If the sum of delta get larger than K * target text length.
2495 # If the sum of delta get larger than K * target text length.
2493 if textlen * 2 < deltainfo.compresseddeltalen:
2496 if textlen * 2 < deltainfo.compresseddeltalen:
2494 return False
2497 return False
2495
2498
2496 # Bad delta from chain length:
2499 # Bad delta from chain length:
2497 #
2500 #
2498 # If the number of delta in the chain gets too high.
2501 # If the number of delta in the chain gets too high.
2499 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2502 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2500 return False
2503 return False
2501
2504
2502 return True
2505 return True
2503
2506
2504 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2507 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2505 cachedelta, ifh, dfh, alwayscache=False,
2508 cachedelta, ifh, dfh, alwayscache=False,
2506 deltacomputer=None):
2509 deltacomputer=None):
2507 """internal function to add revisions to the log
2510 """internal function to add revisions to the log
2508
2511
2509 see addrevision for argument descriptions.
2512 see addrevision for argument descriptions.
2510
2513
2511 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2514 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2512
2515
2513 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2516 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2514 be used.
2517 be used.
2515
2518
2516 invariants:
2519 invariants:
2517 - rawtext is optional (can be None); if not set, cachedelta must be set.
2520 - rawtext is optional (can be None); if not set, cachedelta must be set.
2518 if both are set, they must correspond to each other.
2521 if both are set, they must correspond to each other.
2519 """
2522 """
2520 if node == nullid:
2523 if node == nullid:
2521 raise RevlogError(_("%s: attempt to add null revision") %
2524 raise RevlogError(_("%s: attempt to add null revision") %
2522 (self.indexfile))
2525 (self.indexfile))
2523 if node == wdirid or node in wdirfilenodeids:
2526 if node == wdirid or node in wdirfilenodeids:
2524 raise RevlogError(_("%s: attempt to add wdir revision") %
2527 raise RevlogError(_("%s: attempt to add wdir revision") %
2525 (self.indexfile))
2528 (self.indexfile))
2526
2529
2527 if self._inline:
2530 if self._inline:
2528 fh = ifh
2531 fh = ifh
2529 else:
2532 else:
2530 fh = dfh
2533 fh = dfh
2531
2534
2532 btext = [rawtext]
2535 btext = [rawtext]
2533
2536
2534 curr = len(self)
2537 curr = len(self)
2535 prev = curr - 1
2538 prev = curr - 1
2536 offset = self.end(prev)
2539 offset = self.end(prev)
2537 p1r, p2r = self.rev(p1), self.rev(p2)
2540 p1r, p2r = self.rev(p1), self.rev(p2)
2538
2541
2539 # full versions are inserted when the needed deltas
2542 # full versions are inserted when the needed deltas
2540 # become comparable to the uncompressed text
2543 # become comparable to the uncompressed text
2541 if rawtext is None:
2544 if rawtext is None:
2542 # need rawtext size, before changed by flag processors, which is
2545 # need rawtext size, before changed by flag processors, which is
2543 # the non-raw size. use revlog explicitly to avoid filelog's extra
2546 # the non-raw size. use revlog explicitly to avoid filelog's extra
2544 # logic that might remove metadata size.
2547 # logic that might remove metadata size.
2545 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2548 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2546 cachedelta[1])
2549 cachedelta[1])
2547 else:
2550 else:
2548 textlen = len(rawtext)
2551 textlen = len(rawtext)
2549
2552
2550 if deltacomputer is None:
2553 if deltacomputer is None:
2551 deltacomputer = _deltacomputer(self)
2554 deltacomputer = _deltacomputer(self)
2552
2555
2553 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2556 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2554
2557
2555 # no delta for flag processor revision (see "candelta" for why)
2558 # no delta for flag processor revision (see "candelta" for why)
2556 # not calling candelta since only one revision needs test, also to
2559 # not calling candelta since only one revision needs test, also to
2557 # avoid overhead fetching flags again.
2560 # avoid overhead fetching flags again.
2558 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2561 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2559 deltainfo = None
2562 deltainfo = None
2560 else:
2563 else:
2561 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2564 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2562
2565
2563 if deltainfo is not None:
2566 if deltainfo is not None:
2564 base = deltainfo.base
2567 base = deltainfo.base
2565 chainbase = deltainfo.chainbase
2568 chainbase = deltainfo.chainbase
2566 data = deltainfo.data
2569 data = deltainfo.data
2567 l = deltainfo.deltalen
2570 l = deltainfo.deltalen
2568 else:
2571 else:
2569 rawtext = deltacomputer.buildtext(revinfo, fh)
2572 rawtext = deltacomputer.buildtext(revinfo, fh)
2570 data = self.compress(rawtext)
2573 data = self.compress(rawtext)
2571 l = len(data[1]) + len(data[0])
2574 l = len(data[1]) + len(data[0])
2572 base = chainbase = curr
2575 base = chainbase = curr
2573
2576
2574 e = (offset_type(offset, flags), l, textlen,
2577 e = (offset_type(offset, flags), l, textlen,
2575 base, link, p1r, p2r, node)
2578 base, link, p1r, p2r, node)
2576 self.index.append(e)
2579 self.index.append(e)
2577 self.nodemap[node] = curr
2580 self.nodemap[node] = curr
2578
2581
2579 entry = self._io.packentry(e, self.node, self.version, curr)
2582 entry = self._io.packentry(e, self.node, self.version, curr)
2580 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2583 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2581
2584
2582 if alwayscache and rawtext is None:
2585 if alwayscache and rawtext is None:
2583 rawtext = deltacomputer._buildtext(revinfo, fh)
2586 rawtext = deltacomputer._buildtext(revinfo, fh)
2584
2587
2585 if type(rawtext) == bytes: # only accept immutable objects
2588 if type(rawtext) == bytes: # only accept immutable objects
2586 self._cache = (node, curr, rawtext)
2589 self._cache = (node, curr, rawtext)
2587 self._chainbasecache[curr] = chainbase
2590 self._chainbasecache[curr] = chainbase
2588 return node
2591 return node
2589
2592
2590 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2593 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2591 # Files opened in a+ mode have inconsistent behavior on various
2594 # Files opened in a+ mode have inconsistent behavior on various
2592 # platforms. Windows requires that a file positioning call be made
2595 # platforms. Windows requires that a file positioning call be made
2593 # when the file handle transitions between reads and writes. See
2596 # when the file handle transitions between reads and writes. See
2594 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2597 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2595 # platforms, Python or the platform itself can be buggy. Some versions
2598 # platforms, Python or the platform itself can be buggy. Some versions
2596 # of Solaris have been observed to not append at the end of the file
2599 # of Solaris have been observed to not append at the end of the file
2597 # if the file was seeked to before the end. See issue4943 for more.
2600 # if the file was seeked to before the end. See issue4943 for more.
2598 #
2601 #
2599 # We work around this issue by inserting a seek() before writing.
2602 # We work around this issue by inserting a seek() before writing.
2600 # Note: This is likely not necessary on Python 3.
2603 # Note: This is likely not necessary on Python 3.
2601 ifh.seek(0, os.SEEK_END)
2604 ifh.seek(0, os.SEEK_END)
2602 if dfh:
2605 if dfh:
2603 dfh.seek(0, os.SEEK_END)
2606 dfh.seek(0, os.SEEK_END)
2604
2607
2605 curr = len(self) - 1
2608 curr = len(self) - 1
2606 if not self._inline:
2609 if not self._inline:
2607 transaction.add(self.datafile, offset)
2610 transaction.add(self.datafile, offset)
2608 transaction.add(self.indexfile, curr * len(entry))
2611 transaction.add(self.indexfile, curr * len(entry))
2609 if data[0]:
2612 if data[0]:
2610 dfh.write(data[0])
2613 dfh.write(data[0])
2611 dfh.write(data[1])
2614 dfh.write(data[1])
2612 ifh.write(entry)
2615 ifh.write(entry)
2613 else:
2616 else:
2614 offset += curr * self._io.size
2617 offset += curr * self._io.size
2615 transaction.add(self.indexfile, offset, curr)
2618 transaction.add(self.indexfile, offset, curr)
2616 ifh.write(entry)
2619 ifh.write(entry)
2617 ifh.write(data[0])
2620 ifh.write(data[0])
2618 ifh.write(data[1])
2621 ifh.write(data[1])
2619 self._enforceinlinesize(transaction, ifh)
2622 self._enforceinlinesize(transaction, ifh)
2620
2623
2621 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2624 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2622 """
2625 """
2623 add a delta group
2626 add a delta group
2624
2627
2625 given a set of deltas, add them to the revision log. the
2628 given a set of deltas, add them to the revision log. the
2626 first delta is against its parent, which should be in our
2629 first delta is against its parent, which should be in our
2627 log, the rest are against the previous delta.
2630 log, the rest are against the previous delta.
2628
2631
2629 If ``addrevisioncb`` is defined, it will be called with arguments of
2632 If ``addrevisioncb`` is defined, it will be called with arguments of
2630 this revlog and the node that was added.
2633 this revlog and the node that was added.
2631 """
2634 """
2632
2635
2633 nodes = []
2636 nodes = []
2634
2637
2635 r = len(self)
2638 r = len(self)
2636 end = 0
2639 end = 0
2637 if r:
2640 if r:
2638 end = self.end(r - 1)
2641 end = self.end(r - 1)
2639 ifh = self._indexfp("a+")
2642 ifh = self._indexfp("a+")
2640 isize = r * self._io.size
2643 isize = r * self._io.size
2641 if self._inline:
2644 if self._inline:
2642 transaction.add(self.indexfile, end + isize, r)
2645 transaction.add(self.indexfile, end + isize, r)
2643 dfh = None
2646 dfh = None
2644 else:
2647 else:
2645 transaction.add(self.indexfile, isize, r)
2648 transaction.add(self.indexfile, isize, r)
2646 transaction.add(self.datafile, end)
2649 transaction.add(self.datafile, end)
2647 dfh = self._datafp("a+")
2650 dfh = self._datafp("a+")
2648 def flush():
2651 def flush():
2649 if dfh:
2652 if dfh:
2650 dfh.flush()
2653 dfh.flush()
2651 ifh.flush()
2654 ifh.flush()
2652 try:
2655 try:
2653 deltacomputer = _deltacomputer(self)
2656 deltacomputer = _deltacomputer(self)
2654 # loop through our set of deltas
2657 # loop through our set of deltas
2655 for data in deltas:
2658 for data in deltas:
2656 node, p1, p2, linknode, deltabase, delta, flags = data
2659 node, p1, p2, linknode, deltabase, delta, flags = data
2657 link = linkmapper(linknode)
2660 link = linkmapper(linknode)
2658 flags = flags or REVIDX_DEFAULT_FLAGS
2661 flags = flags or REVIDX_DEFAULT_FLAGS
2659
2662
2660 nodes.append(node)
2663 nodes.append(node)
2661
2664
2662 if node in self.nodemap:
2665 if node in self.nodemap:
2663 # this can happen if two branches make the same change
2666 # this can happen if two branches make the same change
2664 continue
2667 continue
2665
2668
2666 for p in (p1, p2):
2669 for p in (p1, p2):
2667 if p not in self.nodemap:
2670 if p not in self.nodemap:
2668 raise LookupError(p, self.indexfile,
2671 raise LookupError(p, self.indexfile,
2669 _('unknown parent'))
2672 _('unknown parent'))
2670
2673
2671 if deltabase not in self.nodemap:
2674 if deltabase not in self.nodemap:
2672 raise LookupError(deltabase, self.indexfile,
2675 raise LookupError(deltabase, self.indexfile,
2673 _('unknown delta base'))
2676 _('unknown delta base'))
2674
2677
2675 baserev = self.rev(deltabase)
2678 baserev = self.rev(deltabase)
2676
2679
2677 if baserev != nullrev and self.iscensored(baserev):
2680 if baserev != nullrev and self.iscensored(baserev):
2678 # if base is censored, delta must be full replacement in a
2681 # if base is censored, delta must be full replacement in a
2679 # single patch operation
2682 # single patch operation
2680 hlen = struct.calcsize(">lll")
2683 hlen = struct.calcsize(">lll")
2681 oldlen = self.rawsize(baserev)
2684 oldlen = self.rawsize(baserev)
2682 newlen = len(delta) - hlen
2685 newlen = len(delta) - hlen
2683 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2686 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2684 raise error.CensoredBaseError(self.indexfile,
2687 raise error.CensoredBaseError(self.indexfile,
2685 self.node(baserev))
2688 self.node(baserev))
2686
2689
2687 if not flags and self._peek_iscensored(baserev, delta, flush):
2690 if not flags and self._peek_iscensored(baserev, delta, flush):
2688 flags |= REVIDX_ISCENSORED
2691 flags |= REVIDX_ISCENSORED
2689
2692
2690 # We assume consumers of addrevisioncb will want to retrieve
2693 # We assume consumers of addrevisioncb will want to retrieve
2691 # the added revision, which will require a call to
2694 # the added revision, which will require a call to
2692 # revision(). revision() will fast path if there is a cache
2695 # revision(). revision() will fast path if there is a cache
2693 # hit. So, we tell _addrevision() to always cache in this case.
2696 # hit. So, we tell _addrevision() to always cache in this case.
2694 # We're only using addgroup() in the context of changegroup
2697 # We're only using addgroup() in the context of changegroup
2695 # generation so the revision data can always be handled as raw
2698 # generation so the revision data can always be handled as raw
2696 # by the flagprocessor.
2699 # by the flagprocessor.
2697 self._addrevision(node, None, transaction, link,
2700 self._addrevision(node, None, transaction, link,
2698 p1, p2, flags, (baserev, delta),
2701 p1, p2, flags, (baserev, delta),
2699 ifh, dfh,
2702 ifh, dfh,
2700 alwayscache=bool(addrevisioncb),
2703 alwayscache=bool(addrevisioncb),
2701 deltacomputer=deltacomputer)
2704 deltacomputer=deltacomputer)
2702
2705
2703 if addrevisioncb:
2706 if addrevisioncb:
2704 addrevisioncb(self, node)
2707 addrevisioncb(self, node)
2705
2708
2706 if not dfh and not self._inline:
2709 if not dfh and not self._inline:
2707 # addrevision switched from inline to conventional
2710 # addrevision switched from inline to conventional
2708 # reopen the index
2711 # reopen the index
2709 ifh.close()
2712 ifh.close()
2710 dfh = self._datafp("a+")
2713 dfh = self._datafp("a+")
2711 ifh = self._indexfp("a+")
2714 ifh = self._indexfp("a+")
2712 finally:
2715 finally:
2713 if dfh:
2716 if dfh:
2714 dfh.close()
2717 dfh.close()
2715 ifh.close()
2718 ifh.close()
2716
2719
2717 return nodes
2720 return nodes
2718
2721
2719 def iscensored(self, rev):
2722 def iscensored(self, rev):
2720 """Check if a file revision is censored."""
2723 """Check if a file revision is censored."""
2721 if not self._censorable:
2724 if not self._censorable:
2722 return False
2725 return False
2723
2726
2724 return self.flags(rev) & REVIDX_ISCENSORED
2727 return self.flags(rev) & REVIDX_ISCENSORED
2725
2728
2726 def _peek_iscensored(self, baserev, delta, flush):
2729 def _peek_iscensored(self, baserev, delta, flush):
2727 """Quickly check if a delta produces a censored revision."""
2730 """Quickly check if a delta produces a censored revision."""
2728 if not self._censorable:
2731 if not self._censorable:
2729 return False
2732 return False
2730
2733
2731 # Fragile heuristic: unless new file meta keys are added alphabetically
2734 # Fragile heuristic: unless new file meta keys are added alphabetically
2732 # preceding "censored", all censored revisions are prefixed by
2735 # preceding "censored", all censored revisions are prefixed by
2733 # "\1\ncensored:". A delta producing such a censored revision must be a
2736 # "\1\ncensored:". A delta producing such a censored revision must be a
2734 # full-replacement delta, so we inspect the first and only patch in the
2737 # full-replacement delta, so we inspect the first and only patch in the
2735 # delta for this prefix.
2738 # delta for this prefix.
2736 hlen = struct.calcsize(">lll")
2739 hlen = struct.calcsize(">lll")
2737 if len(delta) <= hlen:
2740 if len(delta) <= hlen:
2738 return False
2741 return False
2739
2742
2740 oldlen = self.rawsize(baserev)
2743 oldlen = self.rawsize(baserev)
2741 newlen = len(delta) - hlen
2744 newlen = len(delta) - hlen
2742 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2745 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2743 return False
2746 return False
2744
2747
2745 add = "\1\ncensored:"
2748 add = "\1\ncensored:"
2746 addlen = len(add)
2749 addlen = len(add)
2747 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2750 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2748
2751
2749 def getstrippoint(self, minlink):
2752 def getstrippoint(self, minlink):
2750 """find the minimum rev that must be stripped to strip the linkrev
2753 """find the minimum rev that must be stripped to strip the linkrev
2751
2754
2752 Returns a tuple containing the minimum rev and a set of all revs that
2755 Returns a tuple containing the minimum rev and a set of all revs that
2753 have linkrevs that will be broken by this strip.
2756 have linkrevs that will be broken by this strip.
2754 """
2757 """
2755 brokenrevs = set()
2758 brokenrevs = set()
2756 strippoint = len(self)
2759 strippoint = len(self)
2757
2760
2758 heads = {}
2761 heads = {}
2759 futurelargelinkrevs = set()
2762 futurelargelinkrevs = set()
2760 for head in self.headrevs():
2763 for head in self.headrevs():
2761 headlinkrev = self.linkrev(head)
2764 headlinkrev = self.linkrev(head)
2762 heads[head] = headlinkrev
2765 heads[head] = headlinkrev
2763 if headlinkrev >= minlink:
2766 if headlinkrev >= minlink:
2764 futurelargelinkrevs.add(headlinkrev)
2767 futurelargelinkrevs.add(headlinkrev)
2765
2768
2766 # This algorithm involves walking down the rev graph, starting at the
2769 # This algorithm involves walking down the rev graph, starting at the
2767 # heads. Since the revs are topologically sorted according to linkrev,
2770 # heads. Since the revs are topologically sorted according to linkrev,
2768 # once all head linkrevs are below the minlink, we know there are
2771 # once all head linkrevs are below the minlink, we know there are
2769 # no more revs that could have a linkrev greater than minlink.
2772 # no more revs that could have a linkrev greater than minlink.
2770 # So we can stop walking.
2773 # So we can stop walking.
2771 while futurelargelinkrevs:
2774 while futurelargelinkrevs:
2772 strippoint -= 1
2775 strippoint -= 1
2773 linkrev = heads.pop(strippoint)
2776 linkrev = heads.pop(strippoint)
2774
2777
2775 if linkrev < minlink:
2778 if linkrev < minlink:
2776 brokenrevs.add(strippoint)
2779 brokenrevs.add(strippoint)
2777 else:
2780 else:
2778 futurelargelinkrevs.remove(linkrev)
2781 futurelargelinkrevs.remove(linkrev)
2779
2782
2780 for p in self.parentrevs(strippoint):
2783 for p in self.parentrevs(strippoint):
2781 if p != nullrev:
2784 if p != nullrev:
2782 plinkrev = self.linkrev(p)
2785 plinkrev = self.linkrev(p)
2783 heads[p] = plinkrev
2786 heads[p] = plinkrev
2784 if plinkrev >= minlink:
2787 if plinkrev >= minlink:
2785 futurelargelinkrevs.add(plinkrev)
2788 futurelargelinkrevs.add(plinkrev)
2786
2789
2787 return strippoint, brokenrevs
2790 return strippoint, brokenrevs
2788
2791
2789 def strip(self, minlink, transaction):
2792 def strip(self, minlink, transaction):
2790 """truncate the revlog on the first revision with a linkrev >= minlink
2793 """truncate the revlog on the first revision with a linkrev >= minlink
2791
2794
2792 This function is called when we're stripping revision minlink and
2795 This function is called when we're stripping revision minlink and
2793 its descendants from the repository.
2796 its descendants from the repository.
2794
2797
2795 We have to remove all revisions with linkrev >= minlink, because
2798 We have to remove all revisions with linkrev >= minlink, because
2796 the equivalent changelog revisions will be renumbered after the
2799 the equivalent changelog revisions will be renumbered after the
2797 strip.
2800 strip.
2798
2801
2799 So we truncate the revlog on the first of these revisions, and
2802 So we truncate the revlog on the first of these revisions, and
2800 trust that the caller has saved the revisions that shouldn't be
2803 trust that the caller has saved the revisions that shouldn't be
2801 removed and that it'll re-add them after this truncation.
2804 removed and that it'll re-add them after this truncation.
2802 """
2805 """
2803 if len(self) == 0:
2806 if len(self) == 0:
2804 return
2807 return
2805
2808
2806 rev, _ = self.getstrippoint(minlink)
2809 rev, _ = self.getstrippoint(minlink)
2807 if rev == len(self):
2810 if rev == len(self):
2808 return
2811 return
2809
2812
2810 # first truncate the files on disk
2813 # first truncate the files on disk
2811 end = self.start(rev)
2814 end = self.start(rev)
2812 if not self._inline:
2815 if not self._inline:
2813 transaction.add(self.datafile, end)
2816 transaction.add(self.datafile, end)
2814 end = rev * self._io.size
2817 end = rev * self._io.size
2815 else:
2818 else:
2816 end += rev * self._io.size
2819 end += rev * self._io.size
2817
2820
2818 transaction.add(self.indexfile, end)
2821 transaction.add(self.indexfile, end)
2819
2822
2820 # then reset internal state in memory to forget those revisions
2823 # then reset internal state in memory to forget those revisions
2821 self._cache = None
2824 self._cache = None
2822 self._chaininfocache = {}
2825 self._chaininfocache = {}
2823 self._chunkclear()
2826 self._chunkclear()
2824 for x in pycompat.xrange(rev, len(self)):
2827 for x in pycompat.xrange(rev, len(self)):
2825 del self.nodemap[self.node(x)]
2828 del self.nodemap[self.node(x)]
2826
2829
2827 del self.index[rev:-1]
2830 del self.index[rev:-1]
2828 self._nodepos = None
2831 self._nodepos = None
2829
2832
2830 def checksize(self):
2833 def checksize(self):
2831 expected = 0
2834 expected = 0
2832 if len(self):
2835 if len(self):
2833 expected = max(0, self.end(len(self) - 1))
2836 expected = max(0, self.end(len(self) - 1))
2834
2837
2835 try:
2838 try:
2836 with self._datafp() as f:
2839 with self._datafp() as f:
2837 f.seek(0, 2)
2840 f.seek(0, 2)
2838 actual = f.tell()
2841 actual = f.tell()
2839 dd = actual - expected
2842 dd = actual - expected
2840 except IOError as inst:
2843 except IOError as inst:
2841 if inst.errno != errno.ENOENT:
2844 if inst.errno != errno.ENOENT:
2842 raise
2845 raise
2843 dd = 0
2846 dd = 0
2844
2847
2845 try:
2848 try:
2846 f = self.opener(self.indexfile)
2849 f = self.opener(self.indexfile)
2847 f.seek(0, 2)
2850 f.seek(0, 2)
2848 actual = f.tell()
2851 actual = f.tell()
2849 f.close()
2852 f.close()
2850 s = self._io.size
2853 s = self._io.size
2851 i = max(0, actual // s)
2854 i = max(0, actual // s)
2852 di = actual - (i * s)
2855 di = actual - (i * s)
2853 if self._inline:
2856 if self._inline:
2854 databytes = 0
2857 databytes = 0
2855 for r in self:
2858 for r in self:
2856 databytes += max(0, self.length(r))
2859 databytes += max(0, self.length(r))
2857 dd = 0
2860 dd = 0
2858 di = actual - len(self) * s - databytes
2861 di = actual - len(self) * s - databytes
2859 except IOError as inst:
2862 except IOError as inst:
2860 if inst.errno != errno.ENOENT:
2863 if inst.errno != errno.ENOENT:
2861 raise
2864 raise
2862 di = 0
2865 di = 0
2863
2866
2864 return (dd, di)
2867 return (dd, di)
2865
2868
2866 def files(self):
2869 def files(self):
2867 res = [self.indexfile]
2870 res = [self.indexfile]
2868 if not self._inline:
2871 if not self._inline:
2869 res.append(self.datafile)
2872 res.append(self.datafile)
2870 return res
2873 return res
2871
2874
2872 DELTAREUSEALWAYS = 'always'
2875 DELTAREUSEALWAYS = 'always'
2873 DELTAREUSESAMEREVS = 'samerevs'
2876 DELTAREUSESAMEREVS = 'samerevs'
2874 DELTAREUSENEVER = 'never'
2877 DELTAREUSENEVER = 'never'
2875
2878
2876 DELTAREUSEFULLADD = 'fulladd'
2879 DELTAREUSEFULLADD = 'fulladd'
2877
2880
2878 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2881 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2879
2882
2880 def clone(self, tr, destrevlog, addrevisioncb=None,
2883 def clone(self, tr, destrevlog, addrevisioncb=None,
2881 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2884 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2882 """Copy this revlog to another, possibly with format changes.
2885 """Copy this revlog to another, possibly with format changes.
2883
2886
2884 The destination revlog will contain the same revisions and nodes.
2887 The destination revlog will contain the same revisions and nodes.
2885 However, it may not be bit-for-bit identical due to e.g. delta encoding
2888 However, it may not be bit-for-bit identical due to e.g. delta encoding
2886 differences.
2889 differences.
2887
2890
2888 The ``deltareuse`` argument control how deltas from the existing revlog
2891 The ``deltareuse`` argument control how deltas from the existing revlog
2889 are preserved in the destination revlog. The argument can have the
2892 are preserved in the destination revlog. The argument can have the
2890 following values:
2893 following values:
2891
2894
2892 DELTAREUSEALWAYS
2895 DELTAREUSEALWAYS
2893 Deltas will always be reused (if possible), even if the destination
2896 Deltas will always be reused (if possible), even if the destination
2894 revlog would not select the same revisions for the delta. This is the
2897 revlog would not select the same revisions for the delta. This is the
2895 fastest mode of operation.
2898 fastest mode of operation.
2896 DELTAREUSESAMEREVS
2899 DELTAREUSESAMEREVS
2897 Deltas will be reused if the destination revlog would pick the same
2900 Deltas will be reused if the destination revlog would pick the same
2898 revisions for the delta. This mode strikes a balance between speed
2901 revisions for the delta. This mode strikes a balance between speed
2899 and optimization.
2902 and optimization.
2900 DELTAREUSENEVER
2903 DELTAREUSENEVER
2901 Deltas will never be reused. This is the slowest mode of execution.
2904 Deltas will never be reused. This is the slowest mode of execution.
2902 This mode can be used to recompute deltas (e.g. if the diff/delta
2905 This mode can be used to recompute deltas (e.g. if the diff/delta
2903 algorithm changes).
2906 algorithm changes).
2904
2907
2905 Delta computation can be slow, so the choice of delta reuse policy can
2908 Delta computation can be slow, so the choice of delta reuse policy can
2906 significantly affect run time.
2909 significantly affect run time.
2907
2910
2908 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2911 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2909 two extremes. Deltas will be reused if they are appropriate. But if the
2912 two extremes. Deltas will be reused if they are appropriate. But if the
2910 delta could choose a better revision, it will do so. This means if you
2913 delta could choose a better revision, it will do so. This means if you
2911 are converting a non-generaldelta revlog to a generaldelta revlog,
2914 are converting a non-generaldelta revlog to a generaldelta revlog,
2912 deltas will be recomputed if the delta's parent isn't a parent of the
2915 deltas will be recomputed if the delta's parent isn't a parent of the
2913 revision.
2916 revision.
2914
2917
2915 In addition to the delta policy, the ``deltabothparents`` argument
2918 In addition to the delta policy, the ``deltabothparents`` argument
2916 controls whether to compute deltas against both parents for merges.
2919 controls whether to compute deltas against both parents for merges.
2917 By default, the current default is used.
2920 By default, the current default is used.
2918 """
2921 """
2919 if deltareuse not in self.DELTAREUSEALL:
2922 if deltareuse not in self.DELTAREUSEALL:
2920 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2923 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2921
2924
2922 if len(destrevlog):
2925 if len(destrevlog):
2923 raise ValueError(_('destination revlog is not empty'))
2926 raise ValueError(_('destination revlog is not empty'))
2924
2927
2925 if getattr(self, 'filteredrevs', None):
2928 if getattr(self, 'filteredrevs', None):
2926 raise ValueError(_('source revlog has filtered revisions'))
2929 raise ValueError(_('source revlog has filtered revisions'))
2927 if getattr(destrevlog, 'filteredrevs', None):
2930 if getattr(destrevlog, 'filteredrevs', None):
2928 raise ValueError(_('destination revlog has filtered revisions'))
2931 raise ValueError(_('destination revlog has filtered revisions'))
2929
2932
2930 # lazydeltabase controls whether to reuse a cached delta, if possible.
2933 # lazydeltabase controls whether to reuse a cached delta, if possible.
2931 oldlazydeltabase = destrevlog._lazydeltabase
2934 oldlazydeltabase = destrevlog._lazydeltabase
2932 oldamd = destrevlog._deltabothparents
2935 oldamd = destrevlog._deltabothparents
2933
2936
2934 try:
2937 try:
2935 if deltareuse == self.DELTAREUSEALWAYS:
2938 if deltareuse == self.DELTAREUSEALWAYS:
2936 destrevlog._lazydeltabase = True
2939 destrevlog._lazydeltabase = True
2937 elif deltareuse == self.DELTAREUSESAMEREVS:
2940 elif deltareuse == self.DELTAREUSESAMEREVS:
2938 destrevlog._lazydeltabase = False
2941 destrevlog._lazydeltabase = False
2939
2942
2940 destrevlog._deltabothparents = deltabothparents or oldamd
2943 destrevlog._deltabothparents = deltabothparents or oldamd
2941
2944
2942 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2945 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2943 self.DELTAREUSESAMEREVS)
2946 self.DELTAREUSESAMEREVS)
2944
2947
2945 deltacomputer = _deltacomputer(destrevlog)
2948 deltacomputer = _deltacomputer(destrevlog)
2946 index = self.index
2949 index = self.index
2947 for rev in self:
2950 for rev in self:
2948 entry = index[rev]
2951 entry = index[rev]
2949
2952
2950 # Some classes override linkrev to take filtered revs into
2953 # Some classes override linkrev to take filtered revs into
2951 # account. Use raw entry from index.
2954 # account. Use raw entry from index.
2952 flags = entry[0] & 0xffff
2955 flags = entry[0] & 0xffff
2953 linkrev = entry[4]
2956 linkrev = entry[4]
2954 p1 = index[entry[5]][7]
2957 p1 = index[entry[5]][7]
2955 p2 = index[entry[6]][7]
2958 p2 = index[entry[6]][7]
2956 node = entry[7]
2959 node = entry[7]
2957
2960
2958 # (Possibly) reuse the delta from the revlog if allowed and
2961 # (Possibly) reuse the delta from the revlog if allowed and
2959 # the revlog chunk is a delta.
2962 # the revlog chunk is a delta.
2960 cachedelta = None
2963 cachedelta = None
2961 rawtext = None
2964 rawtext = None
2962 if populatecachedelta:
2965 if populatecachedelta:
2963 dp = self.deltaparent(rev)
2966 dp = self.deltaparent(rev)
2964 if dp != nullrev:
2967 if dp != nullrev:
2965 cachedelta = (dp, bytes(self._chunk(rev)))
2968 cachedelta = (dp, bytes(self._chunk(rev)))
2966
2969
2967 if not cachedelta:
2970 if not cachedelta:
2968 rawtext = self.revision(rev, raw=True)
2971 rawtext = self.revision(rev, raw=True)
2969
2972
2970
2973
2971 if deltareuse == self.DELTAREUSEFULLADD:
2974 if deltareuse == self.DELTAREUSEFULLADD:
2972 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2975 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2973 cachedelta=cachedelta,
2976 cachedelta=cachedelta,
2974 node=node, flags=flags,
2977 node=node, flags=flags,
2975 deltacomputer=deltacomputer)
2978 deltacomputer=deltacomputer)
2976 else:
2979 else:
2977 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2980 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2978 checkambig=False)
2981 checkambig=False)
2979 dfh = None
2982 dfh = None
2980 if not destrevlog._inline:
2983 if not destrevlog._inline:
2981 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2984 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2982 try:
2985 try:
2983 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2986 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2984 p2, flags, cachedelta, ifh, dfh,
2987 p2, flags, cachedelta, ifh, dfh,
2985 deltacomputer=deltacomputer)
2988 deltacomputer=deltacomputer)
2986 finally:
2989 finally:
2987 if dfh:
2990 if dfh:
2988 dfh.close()
2991 dfh.close()
2989 ifh.close()
2992 ifh.close()
2990
2993
2991 if addrevisioncb:
2994 if addrevisioncb:
2992 addrevisioncb(self, rev, node)
2995 addrevisioncb(self, rev, node)
2993 finally:
2996 finally:
2994 destrevlog._lazydeltabase = oldlazydeltabase
2997 destrevlog._lazydeltabase = oldlazydeltabase
2995 destrevlog._deltabothparents = oldamd
2998 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now