##// END OF EJS Templates
revlog: document and test _slicechunk
Boris Feld -
r38659:f62b8fb0 default
parent child Browse files
Show More
@@ -1,2710 +1,2744 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 class _testrevlog(object):
199 class _testrevlog(object):
200 """minimalist fake revlog to use in doctests"""
200 """minimalist fake revlog to use in doctests"""
201
201
202 def __init__(self, data, density=0.5, mingap=0):
202 def __init__(self, data, density=0.5, mingap=0):
203 """data is an list of revision payload boundaries"""
203 """data is an list of revision payload boundaries"""
204 self._data = data
204 self._data = data
205 self._srdensitythreshold = density
205 self._srdensitythreshold = density
206 self._srmingapsize = mingap
206 self._srmingapsize = mingap
207
207
208 def start(self, rev):
208 def start(self, rev):
209 if rev == 0:
209 if rev == 0:
210 return 0
210 return 0
211 return self._data[rev - 1]
211 return self._data[rev - 1]
212
212
213 def end(self, rev):
213 def end(self, rev):
214 return self._data[rev]
214 return self._data[rev]
215
215
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def _trimchunk(revlog, revs, startidx, endidx=None):
219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 """returns revs[startidx:endidx] without empty trailing revs
220 """returns revs[startidx:endidx] without empty trailing revs
221
221
222 Doctest Setup
222 Doctest Setup
223 >>> revlog = _testrevlog([
223 >>> revlog = _testrevlog([
224 ... 5, #0
224 ... 5, #0
225 ... 10, #1
225 ... 10, #1
226 ... 12, #2
226 ... 12, #2
227 ... 12, #3 (empty)
227 ... 12, #3 (empty)
228 ... 17, #4
228 ... 17, #4
229 ... 21, #5
229 ... 21, #5
230 ... 21, #6 (empty)
230 ... 21, #6 (empty)
231 ... ])
231 ... ])
232
232
233 Contiguous cases:
233 Contiguous cases:
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 [0, 1, 2, 3, 4, 5]
235 [0, 1, 2, 3, 4, 5]
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 [0, 1, 2, 3, 4]
237 [0, 1, 2, 3, 4]
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 [0, 1, 2]
239 [0, 1, 2]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 [2]
241 [2]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 [3, 4, 5]
243 [3, 4, 5]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 [3, 4]
245 [3, 4]
246
246
247 Discontiguous cases:
247 Discontiguous cases:
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 [1, 3, 5]
249 [1, 3, 5]
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 [1]
251 [1]
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 [3, 5]
253 [3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 [3, 5]
255 [3, 5]
256 """
256 """
257 length = revlog.length
257 length = revlog.length
258
258
259 if endidx is None:
259 if endidx is None:
260 endidx = len(revs)
260 endidx = len(revs)
261
261
262 # Trim empty revs at the end, but never the very first revision of a chain
262 # Trim empty revs at the end, but never the very first revision of a chain
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 endidx -= 1
264 endidx -= 1
265
265
266 return revs[startidx:endidx]
266 return revs[startidx:endidx]
267
267
268 def _segmentspan(revlog, revs):
268 def _segmentspan(revlog, revs):
269 """Get the byte span of a segment of revisions
269 """Get the byte span of a segment of revisions
270
270
271 revs is a sorted array of revision numbers
271 revs is a sorted array of revision numbers
272
272
273 >>> revlog = _testrevlog([
273 >>> revlog = _testrevlog([
274 ... 5, #0
274 ... 5, #0
275 ... 10, #1
275 ... 10, #1
276 ... 12, #2
276 ... 12, #2
277 ... 12, #3 (empty)
277 ... 12, #3 (empty)
278 ... 17, #4
278 ... 17, #4
279 ... ])
279 ... ])
280
280
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 17
282 17
283 >>> _segmentspan(revlog, [0, 4])
283 >>> _segmentspan(revlog, [0, 4])
284 17
284 17
285 >>> _segmentspan(revlog, [3, 4])
285 >>> _segmentspan(revlog, [3, 4])
286 5
286 5
287 >>> _segmentspan(revlog, [1, 2, 3,])
287 >>> _segmentspan(revlog, [1, 2, 3,])
288 7
288 7
289 >>> _segmentspan(revlog, [1, 3])
289 >>> _segmentspan(revlog, [1, 3])
290 7
290 7
291 """
291 """
292 if not revs:
292 if not revs:
293 return 0
293 return 0
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
294 return revlog.end(revs[-1]) - revlog.start(revs[0])
295
295
296 def _slicechunk(revlog, revs):
296 def _slicechunk(revlog, revs):
297 """slice revs to reduce the amount of unrelated data to be read from disk.
297 """slice revs to reduce the amount of unrelated data to be read from disk.
298
298
299 ``revs`` is sliced into groups that should be read in one time.
299 ``revs`` is sliced into groups that should be read in one time.
300 Assume that revs are sorted.
300 Assume that revs are sorted.
301
302 The initial chunk is sliced until the overall density (payload/chunks-span
303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 `revlog._srmingapsize` is skipped.
305
306 >>> revlog = _testrevlog([
307 ... 5, #00 (5)
308 ... 10, #01 (5)
309 ... 12, #02 (2)
310 ... 12, #03 (empty)
311 ... 27, #04 (15)
312 ... 31, #05 (4)
313 ... 31, #06 (empty)
314 ... 42, #07 (11)
315 ... 47, #08 (5)
316 ... 47, #09 (empty)
317 ... 48, #10 (1)
318 ... 51, #11 (3)
319 ... 74, #12 (23)
320 ... 85, #13 (11)
321 ... 86, #14 (1)
322 ... 91, #15 (5)
323 ... ])
324
325 >>> list(_slicechunk(revlog, range(16)))
326 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
327 >>> list(_slicechunk(revlog, [0, 15]))
328 [[0], [15]]
329 >>> list(_slicechunk(revlog, [0, 11, 15]))
330 [[0], [11], [15]]
331 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
332 [[0], [11, 13, 15]]
333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
334 [[1, 2], [5, 8, 10, 11], [14]]
301 """
335 """
302 start = revlog.start
336 start = revlog.start
303 length = revlog.length
337 length = revlog.length
304
338
305 if len(revs) <= 1:
339 if len(revs) <= 1:
306 yield revs
340 yield revs
307 return
341 return
308
342
309 readdata = deltachainspan = _segmentspan(revlog, revs)
343 readdata = deltachainspan = _segmentspan(revlog, revs)
310
344
311 if deltachainspan < revlog._srmingapsize:
345 if deltachainspan < revlog._srmingapsize:
312 yield revs
346 yield revs
313 return
347 return
314
348
315 chainpayload = sum(length(r) for r in revs)
349 chainpayload = sum(length(r) for r in revs)
316
350
317 if deltachainspan:
351 if deltachainspan:
318 density = chainpayload / float(deltachainspan)
352 density = chainpayload / float(deltachainspan)
319 else:
353 else:
320 density = 1.0
354 density = 1.0
321
355
322 if density >= revlog._srdensitythreshold:
356 if density >= revlog._srdensitythreshold:
323 yield revs
357 yield revs
324 return
358 return
325
359
326 # Store the gaps in a heap to have them sorted by decreasing size
360 # Store the gaps in a heap to have them sorted by decreasing size
327 gapsheap = []
361 gapsheap = []
328 heapq.heapify(gapsheap)
362 heapq.heapify(gapsheap)
329 prevend = None
363 prevend = None
330 for i, rev in enumerate(revs):
364 for i, rev in enumerate(revs):
331 revstart = start(rev)
365 revstart = start(rev)
332 revlen = length(rev)
366 revlen = length(rev)
333
367
334 # Skip empty revisions to form larger holes
368 # Skip empty revisions to form larger holes
335 if revlen == 0:
369 if revlen == 0:
336 continue
370 continue
337
371
338 if prevend is not None:
372 if prevend is not None:
339 gapsize = revstart - prevend
373 gapsize = revstart - prevend
340 # only consider holes that are large enough
374 # only consider holes that are large enough
341 if gapsize > revlog._srmingapsize:
375 if gapsize > revlog._srmingapsize:
342 heapq.heappush(gapsheap, (-gapsize, i))
376 heapq.heappush(gapsheap, (-gapsize, i))
343
377
344 prevend = revstart + revlen
378 prevend = revstart + revlen
345
379
346 # Collect the indices of the largest holes until the density is acceptable
380 # Collect the indices of the largest holes until the density is acceptable
347 indicesheap = []
381 indicesheap = []
348 heapq.heapify(indicesheap)
382 heapq.heapify(indicesheap)
349 while gapsheap and density < revlog._srdensitythreshold:
383 while gapsheap and density < revlog._srdensitythreshold:
350 oppgapsize, gapidx = heapq.heappop(gapsheap)
384 oppgapsize, gapidx = heapq.heappop(gapsheap)
351
385
352 heapq.heappush(indicesheap, gapidx)
386 heapq.heappush(indicesheap, gapidx)
353
387
354 # the gap sizes are stored as negatives to be sorted decreasingly
388 # the gap sizes are stored as negatives to be sorted decreasingly
355 # by the heap
389 # by the heap
356 readdata -= (-oppgapsize)
390 readdata -= (-oppgapsize)
357 if readdata > 0:
391 if readdata > 0:
358 density = chainpayload / float(readdata)
392 density = chainpayload / float(readdata)
359 else:
393 else:
360 density = 1.0
394 density = 1.0
361
395
362 # Cut the revs at collected indices
396 # Cut the revs at collected indices
363 previdx = 0
397 previdx = 0
364 while indicesheap:
398 while indicesheap:
365 idx = heapq.heappop(indicesheap)
399 idx = heapq.heappop(indicesheap)
366
400
367 chunk = _trimchunk(revlog, revs, previdx, idx)
401 chunk = _trimchunk(revlog, revs, previdx, idx)
368 if chunk:
402 if chunk:
369 yield chunk
403 yield chunk
370
404
371 previdx = idx
405 previdx = idx
372
406
373 chunk = _trimchunk(revlog, revs, previdx)
407 chunk = _trimchunk(revlog, revs, previdx)
374 if chunk:
408 if chunk:
375 yield chunk
409 yield chunk
376
410
377 @attr.s(slots=True, frozen=True)
411 @attr.s(slots=True, frozen=True)
378 class _deltainfo(object):
412 class _deltainfo(object):
379 distance = attr.ib()
413 distance = attr.ib()
380 deltalen = attr.ib()
414 deltalen = attr.ib()
381 data = attr.ib()
415 data = attr.ib()
382 base = attr.ib()
416 base = attr.ib()
383 chainbase = attr.ib()
417 chainbase = attr.ib()
384 chainlen = attr.ib()
418 chainlen = attr.ib()
385 compresseddeltalen = attr.ib()
419 compresseddeltalen = attr.ib()
386
420
387 class _deltacomputer(object):
421 class _deltacomputer(object):
388 def __init__(self, revlog):
422 def __init__(self, revlog):
389 self.revlog = revlog
423 self.revlog = revlog
390
424
391 def _getcandidaterevs(self, p1, p2, cachedelta):
425 def _getcandidaterevs(self, p1, p2, cachedelta):
392 """
426 """
393 Provides revisions that present an interest to be diffed against,
427 Provides revisions that present an interest to be diffed against,
394 grouped by level of easiness.
428 grouped by level of easiness.
395 """
429 """
396 revlog = self.revlog
430 revlog = self.revlog
397 gdelta = revlog._generaldelta
431 gdelta = revlog._generaldelta
398 curr = len(revlog)
432 curr = len(revlog)
399 prev = curr - 1
433 prev = curr - 1
400 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
434 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
401
435
402 # should we try to build a delta?
436 # should we try to build a delta?
403 if prev != nullrev and revlog.storedeltachains:
437 if prev != nullrev and revlog.storedeltachains:
404 tested = set()
438 tested = set()
405 # This condition is true most of the time when processing
439 # This condition is true most of the time when processing
406 # changegroup data into a generaldelta repo. The only time it
440 # changegroup data into a generaldelta repo. The only time it
407 # isn't true is if this is the first revision in a delta chain
441 # isn't true is if this is the first revision in a delta chain
408 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
442 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
409 if cachedelta and gdelta and revlog._lazydeltabase:
443 if cachedelta and gdelta and revlog._lazydeltabase:
410 # Assume what we received from the server is a good choice
444 # Assume what we received from the server is a good choice
411 # build delta will reuse the cache
445 # build delta will reuse the cache
412 yield (cachedelta[0],)
446 yield (cachedelta[0],)
413 tested.add(cachedelta[0])
447 tested.add(cachedelta[0])
414
448
415 if gdelta:
449 if gdelta:
416 # exclude already lazy tested base if any
450 # exclude already lazy tested base if any
417 parents = [p for p in (p1r, p2r)
451 parents = [p for p in (p1r, p2r)
418 if p != nullrev and p not in tested]
452 if p != nullrev and p not in tested]
419
453
420 if not revlog._aggressivemergedeltas and len(parents) == 2:
454 if not revlog._aggressivemergedeltas and len(parents) == 2:
421 parents.sort()
455 parents.sort()
422 # To minimize the chance of having to build a fulltext,
456 # To minimize the chance of having to build a fulltext,
423 # pick first whichever parent is closest to us (max rev)
457 # pick first whichever parent is closest to us (max rev)
424 yield (parents[1],)
458 yield (parents[1],)
425 # then the other one (min rev) if the first did not fit
459 # then the other one (min rev) if the first did not fit
426 yield (parents[0],)
460 yield (parents[0],)
427 tested.update(parents)
461 tested.update(parents)
428 elif len(parents) > 0:
462 elif len(parents) > 0:
429 # Test all parents (1 or 2), and keep the best candidate
463 # Test all parents (1 or 2), and keep the best candidate
430 yield parents
464 yield parents
431 tested.update(parents)
465 tested.update(parents)
432
466
433 if prev not in tested:
467 if prev not in tested:
434 # other approach failed try against prev to hopefully save us a
468 # other approach failed try against prev to hopefully save us a
435 # fulltext.
469 # fulltext.
436 yield (prev,)
470 yield (prev,)
437 tested.add(prev)
471 tested.add(prev)
438
472
439 def buildtext(self, revinfo, fh):
473 def buildtext(self, revinfo, fh):
440 """Builds a fulltext version of a revision
474 """Builds a fulltext version of a revision
441
475
442 revinfo: _revisioninfo instance that contains all needed info
476 revinfo: _revisioninfo instance that contains all needed info
443 fh: file handle to either the .i or the .d revlog file,
477 fh: file handle to either the .i or the .d revlog file,
444 depending on whether it is inlined or not
478 depending on whether it is inlined or not
445 """
479 """
446 btext = revinfo.btext
480 btext = revinfo.btext
447 if btext[0] is not None:
481 if btext[0] is not None:
448 return btext[0]
482 return btext[0]
449
483
450 revlog = self.revlog
484 revlog = self.revlog
451 cachedelta = revinfo.cachedelta
485 cachedelta = revinfo.cachedelta
452 flags = revinfo.flags
486 flags = revinfo.flags
453 node = revinfo.node
487 node = revinfo.node
454
488
455 baserev = cachedelta[0]
489 baserev = cachedelta[0]
456 delta = cachedelta[1]
490 delta = cachedelta[1]
457 # special case deltas which replace entire base; no need to decode
491 # special case deltas which replace entire base; no need to decode
458 # base revision. this neatly avoids censored bases, which throw when
492 # base revision. this neatly avoids censored bases, which throw when
459 # they're decoded.
493 # they're decoded.
460 hlen = struct.calcsize(">lll")
494 hlen = struct.calcsize(">lll")
461 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
495 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
462 len(delta) - hlen):
496 len(delta) - hlen):
463 btext[0] = delta[hlen:]
497 btext[0] = delta[hlen:]
464 else:
498 else:
465 # deltabase is rawtext before changed by flag processors, which is
499 # deltabase is rawtext before changed by flag processors, which is
466 # equivalent to non-raw text
500 # equivalent to non-raw text
467 basetext = revlog.revision(baserev, _df=fh, raw=False)
501 basetext = revlog.revision(baserev, _df=fh, raw=False)
468 btext[0] = mdiff.patch(basetext, delta)
502 btext[0] = mdiff.patch(basetext, delta)
469
503
470 try:
504 try:
471 res = revlog._processflags(btext[0], flags, 'read', raw=True)
505 res = revlog._processflags(btext[0], flags, 'read', raw=True)
472 btext[0], validatehash = res
506 btext[0], validatehash = res
473 if validatehash:
507 if validatehash:
474 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
508 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
475 if flags & REVIDX_ISCENSORED:
509 if flags & REVIDX_ISCENSORED:
476 raise RevlogError(_('node %s is not censored') % node)
510 raise RevlogError(_('node %s is not censored') % node)
477 except CensoredNodeError:
511 except CensoredNodeError:
478 # must pass the censored index flag to add censored revisions
512 # must pass the censored index flag to add censored revisions
479 if not flags & REVIDX_ISCENSORED:
513 if not flags & REVIDX_ISCENSORED:
480 raise
514 raise
481 return btext[0]
515 return btext[0]
482
516
483 def _builddeltadiff(self, base, revinfo, fh):
517 def _builddeltadiff(self, base, revinfo, fh):
484 revlog = self.revlog
518 revlog = self.revlog
485 t = self.buildtext(revinfo, fh)
519 t = self.buildtext(revinfo, fh)
486 if revlog.iscensored(base):
520 if revlog.iscensored(base):
487 # deltas based on a censored revision must replace the
521 # deltas based on a censored revision must replace the
488 # full content in one patch, so delta works everywhere
522 # full content in one patch, so delta works everywhere
489 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
523 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
490 delta = header + t
524 delta = header + t
491 else:
525 else:
492 ptext = revlog.revision(base, _df=fh, raw=True)
526 ptext = revlog.revision(base, _df=fh, raw=True)
493 delta = mdiff.textdiff(ptext, t)
527 delta = mdiff.textdiff(ptext, t)
494
528
495 return delta
529 return delta
496
530
497 def _builddeltainfo(self, revinfo, base, fh):
531 def _builddeltainfo(self, revinfo, base, fh):
498 # can we use the cached delta?
532 # can we use the cached delta?
499 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
533 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
500 delta = revinfo.cachedelta[1]
534 delta = revinfo.cachedelta[1]
501 else:
535 else:
502 delta = self._builddeltadiff(base, revinfo, fh)
536 delta = self._builddeltadiff(base, revinfo, fh)
503 revlog = self.revlog
537 revlog = self.revlog
504 header, data = revlog.compress(delta)
538 header, data = revlog.compress(delta)
505 deltalen = len(header) + len(data)
539 deltalen = len(header) + len(data)
506 chainbase = revlog.chainbase(base)
540 chainbase = revlog.chainbase(base)
507 offset = revlog.end(len(revlog) - 1)
541 offset = revlog.end(len(revlog) - 1)
508 dist = deltalen + offset - revlog.start(chainbase)
542 dist = deltalen + offset - revlog.start(chainbase)
509 if revlog._generaldelta:
543 if revlog._generaldelta:
510 deltabase = base
544 deltabase = base
511 else:
545 else:
512 deltabase = chainbase
546 deltabase = chainbase
513 chainlen, compresseddeltalen = revlog._chaininfo(base)
547 chainlen, compresseddeltalen = revlog._chaininfo(base)
514 chainlen += 1
548 chainlen += 1
515 compresseddeltalen += deltalen
549 compresseddeltalen += deltalen
516 return _deltainfo(dist, deltalen, (header, data), deltabase,
550 return _deltainfo(dist, deltalen, (header, data), deltabase,
517 chainbase, chainlen, compresseddeltalen)
551 chainbase, chainlen, compresseddeltalen)
518
552
519 def finddeltainfo(self, revinfo, fh):
553 def finddeltainfo(self, revinfo, fh):
520 """Find an acceptable delta against a candidate revision
554 """Find an acceptable delta against a candidate revision
521
555
522 revinfo: information about the revision (instance of _revisioninfo)
556 revinfo: information about the revision (instance of _revisioninfo)
523 fh: file handle to either the .i or the .d revlog file,
557 fh: file handle to either the .i or the .d revlog file,
524 depending on whether it is inlined or not
558 depending on whether it is inlined or not
525
559
526 Returns the first acceptable candidate revision, as ordered by
560 Returns the first acceptable candidate revision, as ordered by
527 _getcandidaterevs
561 _getcandidaterevs
528 """
562 """
529 cachedelta = revinfo.cachedelta
563 cachedelta = revinfo.cachedelta
530 p1 = revinfo.p1
564 p1 = revinfo.p1
531 p2 = revinfo.p2
565 p2 = revinfo.p2
532 revlog = self.revlog
566 revlog = self.revlog
533
567
534 deltainfo = None
568 deltainfo = None
535 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
569 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
536 nominateddeltas = []
570 nominateddeltas = []
537 for candidaterev in candidaterevs:
571 for candidaterev in candidaterevs:
538 # no delta for rawtext-changing revs (see "candelta" for why)
572 # no delta for rawtext-changing revs (see "candelta" for why)
539 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
573 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
540 continue
574 continue
541 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
575 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
542 if revlog._isgooddeltainfo(candidatedelta, revinfo):
576 if revlog._isgooddeltainfo(candidatedelta, revinfo):
543 nominateddeltas.append(candidatedelta)
577 nominateddeltas.append(candidatedelta)
544 if nominateddeltas:
578 if nominateddeltas:
545 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
579 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
546 break
580 break
547
581
548 return deltainfo
582 return deltainfo
549
583
550 @attr.s(slots=True, frozen=True)
584 @attr.s(slots=True, frozen=True)
551 class _revisioninfo(object):
585 class _revisioninfo(object):
552 """Information about a revision that allows building its fulltext
586 """Information about a revision that allows building its fulltext
553 node: expected hash of the revision
587 node: expected hash of the revision
554 p1, p2: parent revs of the revision
588 p1, p2: parent revs of the revision
555 btext: built text cache consisting of a one-element list
589 btext: built text cache consisting of a one-element list
556 cachedelta: (baserev, uncompressed_delta) or None
590 cachedelta: (baserev, uncompressed_delta) or None
557 flags: flags associated to the revision storage
591 flags: flags associated to the revision storage
558
592
559 One of btext[0] or cachedelta must be set.
593 One of btext[0] or cachedelta must be set.
560 """
594 """
561 node = attr.ib()
595 node = attr.ib()
562 p1 = attr.ib()
596 p1 = attr.ib()
563 p2 = attr.ib()
597 p2 = attr.ib()
564 btext = attr.ib()
598 btext = attr.ib()
565 textlen = attr.ib()
599 textlen = attr.ib()
566 cachedelta = attr.ib()
600 cachedelta = attr.ib()
567 flags = attr.ib()
601 flags = attr.ib()
568
602
569 # index v0:
603 # index v0:
570 # 4 bytes: offset
604 # 4 bytes: offset
571 # 4 bytes: compressed length
605 # 4 bytes: compressed length
572 # 4 bytes: base rev
606 # 4 bytes: base rev
573 # 4 bytes: link rev
607 # 4 bytes: link rev
574 # 20 bytes: parent 1 nodeid
608 # 20 bytes: parent 1 nodeid
575 # 20 bytes: parent 2 nodeid
609 # 20 bytes: parent 2 nodeid
576 # 20 bytes: nodeid
610 # 20 bytes: nodeid
577 indexformatv0 = struct.Struct(">4l20s20s20s")
611 indexformatv0 = struct.Struct(">4l20s20s20s")
578 indexformatv0_pack = indexformatv0.pack
612 indexformatv0_pack = indexformatv0.pack
579 indexformatv0_unpack = indexformatv0.unpack
613 indexformatv0_unpack = indexformatv0.unpack
580
614
581 class revlogoldio(object):
615 class revlogoldio(object):
582 def __init__(self):
616 def __init__(self):
583 self.size = indexformatv0.size
617 self.size = indexformatv0.size
584
618
585 def parseindex(self, data, inline):
619 def parseindex(self, data, inline):
586 s = self.size
620 s = self.size
587 index = []
621 index = []
588 nodemap = {nullid: nullrev}
622 nodemap = {nullid: nullrev}
589 n = off = 0
623 n = off = 0
590 l = len(data)
624 l = len(data)
591 while off + s <= l:
625 while off + s <= l:
592 cur = data[off:off + s]
626 cur = data[off:off + s]
593 off += s
627 off += s
594 e = indexformatv0_unpack(cur)
628 e = indexformatv0_unpack(cur)
595 # transform to revlogv1 format
629 # transform to revlogv1 format
596 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
630 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
597 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
631 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
598 index.append(e2)
632 index.append(e2)
599 nodemap[e[6]] = n
633 nodemap[e[6]] = n
600 n += 1
634 n += 1
601
635
602 # add the magic null revision at -1
636 # add the magic null revision at -1
603 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
637 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
604
638
605 return index, nodemap, None
639 return index, nodemap, None
606
640
607 def packentry(self, entry, node, version, rev):
641 def packentry(self, entry, node, version, rev):
608 if gettype(entry[0]):
642 if gettype(entry[0]):
609 raise RevlogError(_('index entry flags need revlog version 1'))
643 raise RevlogError(_('index entry flags need revlog version 1'))
610 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
644 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
611 node(entry[5]), node(entry[6]), entry[7])
645 node(entry[5]), node(entry[6]), entry[7])
612 return indexformatv0_pack(*e2)
646 return indexformatv0_pack(*e2)
613
647
614 # index ng:
648 # index ng:
615 # 6 bytes: offset
649 # 6 bytes: offset
616 # 2 bytes: flags
650 # 2 bytes: flags
617 # 4 bytes: compressed length
651 # 4 bytes: compressed length
618 # 4 bytes: uncompressed length
652 # 4 bytes: uncompressed length
619 # 4 bytes: base rev
653 # 4 bytes: base rev
620 # 4 bytes: link rev
654 # 4 bytes: link rev
621 # 4 bytes: parent 1 rev
655 # 4 bytes: parent 1 rev
622 # 4 bytes: parent 2 rev
656 # 4 bytes: parent 2 rev
623 # 32 bytes: nodeid
657 # 32 bytes: nodeid
624 indexformatng = struct.Struct(">Qiiiiii20s12x")
658 indexformatng = struct.Struct(">Qiiiiii20s12x")
625 indexformatng_pack = indexformatng.pack
659 indexformatng_pack = indexformatng.pack
626 versionformat = struct.Struct(">I")
660 versionformat = struct.Struct(">I")
627 versionformat_pack = versionformat.pack
661 versionformat_pack = versionformat.pack
628 versionformat_unpack = versionformat.unpack
662 versionformat_unpack = versionformat.unpack
629
663
630 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
664 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
631 # signed integer)
665 # signed integer)
632 _maxentrysize = 0x7fffffff
666 _maxentrysize = 0x7fffffff
633
667
634 class revlogio(object):
668 class revlogio(object):
635 def __init__(self):
669 def __init__(self):
636 self.size = indexformatng.size
670 self.size = indexformatng.size
637
671
638 def parseindex(self, data, inline):
672 def parseindex(self, data, inline):
639 # call the C implementation to parse the index data
673 # call the C implementation to parse the index data
640 index, cache = parsers.parse_index2(data, inline)
674 index, cache = parsers.parse_index2(data, inline)
641 return index, getattr(index, 'nodemap', None), cache
675 return index, getattr(index, 'nodemap', None), cache
642
676
643 def packentry(self, entry, node, version, rev):
677 def packentry(self, entry, node, version, rev):
644 p = indexformatng_pack(*entry)
678 p = indexformatng_pack(*entry)
645 if rev == 0:
679 if rev == 0:
646 p = versionformat_pack(version) + p[4:]
680 p = versionformat_pack(version) + p[4:]
647 return p
681 return p
648
682
649 class revlog(object):
683 class revlog(object):
650 """
684 """
651 the underlying revision storage object
685 the underlying revision storage object
652
686
653 A revlog consists of two parts, an index and the revision data.
687 A revlog consists of two parts, an index and the revision data.
654
688
655 The index is a file with a fixed record size containing
689 The index is a file with a fixed record size containing
656 information on each revision, including its nodeid (hash), the
690 information on each revision, including its nodeid (hash), the
657 nodeids of its parents, the position and offset of its data within
691 nodeids of its parents, the position and offset of its data within
658 the data file, and the revision it's based on. Finally, each entry
692 the data file, and the revision it's based on. Finally, each entry
659 contains a linkrev entry that can serve as a pointer to external
693 contains a linkrev entry that can serve as a pointer to external
660 data.
694 data.
661
695
662 The revision data itself is a linear collection of data chunks.
696 The revision data itself is a linear collection of data chunks.
663 Each chunk represents a revision and is usually represented as a
697 Each chunk represents a revision and is usually represented as a
664 delta against the previous chunk. To bound lookup time, runs of
698 delta against the previous chunk. To bound lookup time, runs of
665 deltas are limited to about 2 times the length of the original
699 deltas are limited to about 2 times the length of the original
666 version data. This makes retrieval of a version proportional to
700 version data. This makes retrieval of a version proportional to
667 its size, or O(1) relative to the number of revisions.
701 its size, or O(1) relative to the number of revisions.
668
702
669 Both pieces of the revlog are written to in an append-only
703 Both pieces of the revlog are written to in an append-only
670 fashion, which means we never need to rewrite a file to insert or
704 fashion, which means we never need to rewrite a file to insert or
671 remove data, and can use some simple techniques to avoid the need
705 remove data, and can use some simple techniques to avoid the need
672 for locking while reading.
706 for locking while reading.
673
707
674 If checkambig, indexfile is opened with checkambig=True at
708 If checkambig, indexfile is opened with checkambig=True at
675 writing, to avoid file stat ambiguity.
709 writing, to avoid file stat ambiguity.
676
710
677 If mmaplargeindex is True, and an mmapindexthreshold is set, the
711 If mmaplargeindex is True, and an mmapindexthreshold is set, the
678 index will be mmapped rather than read if it is larger than the
712 index will be mmapped rather than read if it is larger than the
679 configured threshold.
713 configured threshold.
680
714
681 If censorable is True, the revlog can have censored revisions.
715 If censorable is True, the revlog can have censored revisions.
682 """
716 """
683 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
717 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
684 mmaplargeindex=False, censorable=False):
718 mmaplargeindex=False, censorable=False):
685 """
719 """
686 create a revlog object
720 create a revlog object
687
721
688 opener is a function that abstracts the file opening operation
722 opener is a function that abstracts the file opening operation
689 and can be used to implement COW semantics or the like.
723 and can be used to implement COW semantics or the like.
690 """
724 """
691 self.indexfile = indexfile
725 self.indexfile = indexfile
692 self.datafile = datafile or (indexfile[:-2] + ".d")
726 self.datafile = datafile or (indexfile[:-2] + ".d")
693 self.opener = opener
727 self.opener = opener
694 # When True, indexfile is opened with checkambig=True at writing, to
728 # When True, indexfile is opened with checkambig=True at writing, to
695 # avoid file stat ambiguity.
729 # avoid file stat ambiguity.
696 self._checkambig = checkambig
730 self._checkambig = checkambig
697 self._censorable = censorable
731 self._censorable = censorable
698 # 3-tuple of (node, rev, text) for a raw revision.
732 # 3-tuple of (node, rev, text) for a raw revision.
699 self._cache = None
733 self._cache = None
700 # Maps rev to chain base rev.
734 # Maps rev to chain base rev.
701 self._chainbasecache = util.lrucachedict(100)
735 self._chainbasecache = util.lrucachedict(100)
702 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
736 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
703 self._chunkcache = (0, '')
737 self._chunkcache = (0, '')
704 # How much data to read and cache into the raw revlog data cache.
738 # How much data to read and cache into the raw revlog data cache.
705 self._chunkcachesize = 65536
739 self._chunkcachesize = 65536
706 self._maxchainlen = None
740 self._maxchainlen = None
707 self._aggressivemergedeltas = True
741 self._aggressivemergedeltas = True
708 self.index = []
742 self.index = []
709 # Mapping of partial identifiers to full nodes.
743 # Mapping of partial identifiers to full nodes.
710 self._pcache = {}
744 self._pcache = {}
711 # Mapping of revision integer to full node.
745 # Mapping of revision integer to full node.
712 self._nodecache = {nullid: nullrev}
746 self._nodecache = {nullid: nullrev}
713 self._nodepos = None
747 self._nodepos = None
714 self._compengine = 'zlib'
748 self._compengine = 'zlib'
715 self._maxdeltachainspan = -1
749 self._maxdeltachainspan = -1
716 self._withsparseread = False
750 self._withsparseread = False
717 self._srdensitythreshold = 0.50
751 self._srdensitythreshold = 0.50
718 self._srmingapsize = 262144
752 self._srmingapsize = 262144
719
753
720 mmapindexthreshold = None
754 mmapindexthreshold = None
721 v = REVLOG_DEFAULT_VERSION
755 v = REVLOG_DEFAULT_VERSION
722 opts = getattr(opener, 'options', None)
756 opts = getattr(opener, 'options', None)
723 if opts is not None:
757 if opts is not None:
724 if 'revlogv2' in opts:
758 if 'revlogv2' in opts:
725 # version 2 revlogs always use generaldelta.
759 # version 2 revlogs always use generaldelta.
726 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
760 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
727 elif 'revlogv1' in opts:
761 elif 'revlogv1' in opts:
728 if 'generaldelta' in opts:
762 if 'generaldelta' in opts:
729 v |= FLAG_GENERALDELTA
763 v |= FLAG_GENERALDELTA
730 else:
764 else:
731 v = 0
765 v = 0
732 if 'chunkcachesize' in opts:
766 if 'chunkcachesize' in opts:
733 self._chunkcachesize = opts['chunkcachesize']
767 self._chunkcachesize = opts['chunkcachesize']
734 if 'maxchainlen' in opts:
768 if 'maxchainlen' in opts:
735 self._maxchainlen = opts['maxchainlen']
769 self._maxchainlen = opts['maxchainlen']
736 if 'aggressivemergedeltas' in opts:
770 if 'aggressivemergedeltas' in opts:
737 self._aggressivemergedeltas = opts['aggressivemergedeltas']
771 self._aggressivemergedeltas = opts['aggressivemergedeltas']
738 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
772 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
739 if 'compengine' in opts:
773 if 'compengine' in opts:
740 self._compengine = opts['compengine']
774 self._compengine = opts['compengine']
741 if 'maxdeltachainspan' in opts:
775 if 'maxdeltachainspan' in opts:
742 self._maxdeltachainspan = opts['maxdeltachainspan']
776 self._maxdeltachainspan = opts['maxdeltachainspan']
743 if mmaplargeindex and 'mmapindexthreshold' in opts:
777 if mmaplargeindex and 'mmapindexthreshold' in opts:
744 mmapindexthreshold = opts['mmapindexthreshold']
778 mmapindexthreshold = opts['mmapindexthreshold']
745 self._withsparseread = bool(opts.get('with-sparse-read', False))
779 self._withsparseread = bool(opts.get('with-sparse-read', False))
746 if 'sparse-read-density-threshold' in opts:
780 if 'sparse-read-density-threshold' in opts:
747 self._srdensitythreshold = opts['sparse-read-density-threshold']
781 self._srdensitythreshold = opts['sparse-read-density-threshold']
748 if 'sparse-read-min-gap-size' in opts:
782 if 'sparse-read-min-gap-size' in opts:
749 self._srmingapsize = opts['sparse-read-min-gap-size']
783 self._srmingapsize = opts['sparse-read-min-gap-size']
750
784
751 if self._chunkcachesize <= 0:
785 if self._chunkcachesize <= 0:
752 raise RevlogError(_('revlog chunk cache size %r is not greater '
786 raise RevlogError(_('revlog chunk cache size %r is not greater '
753 'than 0') % self._chunkcachesize)
787 'than 0') % self._chunkcachesize)
754 elif self._chunkcachesize & (self._chunkcachesize - 1):
788 elif self._chunkcachesize & (self._chunkcachesize - 1):
755 raise RevlogError(_('revlog chunk cache size %r is not a power '
789 raise RevlogError(_('revlog chunk cache size %r is not a power '
756 'of 2') % self._chunkcachesize)
790 'of 2') % self._chunkcachesize)
757
791
758 indexdata = ''
792 indexdata = ''
759 self._initempty = True
793 self._initempty = True
760 try:
794 try:
761 with self._indexfp() as f:
795 with self._indexfp() as f:
762 if (mmapindexthreshold is not None and
796 if (mmapindexthreshold is not None and
763 self.opener.fstat(f).st_size >= mmapindexthreshold):
797 self.opener.fstat(f).st_size >= mmapindexthreshold):
764 indexdata = util.buffer(util.mmapread(f))
798 indexdata = util.buffer(util.mmapread(f))
765 else:
799 else:
766 indexdata = f.read()
800 indexdata = f.read()
767 if len(indexdata) > 0:
801 if len(indexdata) > 0:
768 v = versionformat_unpack(indexdata[:4])[0]
802 v = versionformat_unpack(indexdata[:4])[0]
769 self._initempty = False
803 self._initempty = False
770 except IOError as inst:
804 except IOError as inst:
771 if inst.errno != errno.ENOENT:
805 if inst.errno != errno.ENOENT:
772 raise
806 raise
773
807
774 self.version = v
808 self.version = v
775 self._inline = v & FLAG_INLINE_DATA
809 self._inline = v & FLAG_INLINE_DATA
776 self._generaldelta = v & FLAG_GENERALDELTA
810 self._generaldelta = v & FLAG_GENERALDELTA
777 flags = v & ~0xFFFF
811 flags = v & ~0xFFFF
778 fmt = v & 0xFFFF
812 fmt = v & 0xFFFF
779 if fmt == REVLOGV0:
813 if fmt == REVLOGV0:
780 if flags:
814 if flags:
781 raise RevlogError(_('unknown flags (%#04x) in version %d '
815 raise RevlogError(_('unknown flags (%#04x) in version %d '
782 'revlog %s') %
816 'revlog %s') %
783 (flags >> 16, fmt, self.indexfile))
817 (flags >> 16, fmt, self.indexfile))
784 elif fmt == REVLOGV1:
818 elif fmt == REVLOGV1:
785 if flags & ~REVLOGV1_FLAGS:
819 if flags & ~REVLOGV1_FLAGS:
786 raise RevlogError(_('unknown flags (%#04x) in version %d '
820 raise RevlogError(_('unknown flags (%#04x) in version %d '
787 'revlog %s') %
821 'revlog %s') %
788 (flags >> 16, fmt, self.indexfile))
822 (flags >> 16, fmt, self.indexfile))
789 elif fmt == REVLOGV2:
823 elif fmt == REVLOGV2:
790 if flags & ~REVLOGV2_FLAGS:
824 if flags & ~REVLOGV2_FLAGS:
791 raise RevlogError(_('unknown flags (%#04x) in version %d '
825 raise RevlogError(_('unknown flags (%#04x) in version %d '
792 'revlog %s') %
826 'revlog %s') %
793 (flags >> 16, fmt, self.indexfile))
827 (flags >> 16, fmt, self.indexfile))
794 else:
828 else:
795 raise RevlogError(_('unknown version (%d) in revlog %s') %
829 raise RevlogError(_('unknown version (%d) in revlog %s') %
796 (fmt, self.indexfile))
830 (fmt, self.indexfile))
797
831
798 self.storedeltachains = True
832 self.storedeltachains = True
799
833
800 self._io = revlogio()
834 self._io = revlogio()
801 if self.version == REVLOGV0:
835 if self.version == REVLOGV0:
802 self._io = revlogoldio()
836 self._io = revlogoldio()
803 try:
837 try:
804 d = self._io.parseindex(indexdata, self._inline)
838 d = self._io.parseindex(indexdata, self._inline)
805 except (ValueError, IndexError):
839 except (ValueError, IndexError):
806 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
840 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
807 self.index, nodemap, self._chunkcache = d
841 self.index, nodemap, self._chunkcache = d
808 if nodemap is not None:
842 if nodemap is not None:
809 self.nodemap = self._nodecache = nodemap
843 self.nodemap = self._nodecache = nodemap
810 if not self._chunkcache:
844 if not self._chunkcache:
811 self._chunkclear()
845 self._chunkclear()
812 # revnum -> (chain-length, sum-delta-length)
846 # revnum -> (chain-length, sum-delta-length)
813 self._chaininfocache = {}
847 self._chaininfocache = {}
814 # revlog header -> revlog compressor
848 # revlog header -> revlog compressor
815 self._decompressors = {}
849 self._decompressors = {}
816
850
817 @util.propertycache
851 @util.propertycache
818 def _compressor(self):
852 def _compressor(self):
819 return util.compengines[self._compengine].revlogcompressor()
853 return util.compengines[self._compengine].revlogcompressor()
820
854
821 def _indexfp(self, mode='r'):
855 def _indexfp(self, mode='r'):
822 """file object for the revlog's index file"""
856 """file object for the revlog's index file"""
823 args = {r'mode': mode}
857 args = {r'mode': mode}
824 if mode != 'r':
858 if mode != 'r':
825 args[r'checkambig'] = self._checkambig
859 args[r'checkambig'] = self._checkambig
826 if mode == 'w':
860 if mode == 'w':
827 args[r'atomictemp'] = True
861 args[r'atomictemp'] = True
828 return self.opener(self.indexfile, **args)
862 return self.opener(self.indexfile, **args)
829
863
830 def _datafp(self, mode='r'):
864 def _datafp(self, mode='r'):
831 """file object for the revlog's data file"""
865 """file object for the revlog's data file"""
832 return self.opener(self.datafile, mode=mode)
866 return self.opener(self.datafile, mode=mode)
833
867
834 @contextlib.contextmanager
868 @contextlib.contextmanager
835 def _datareadfp(self, existingfp=None):
869 def _datareadfp(self, existingfp=None):
836 """file object suitable to read data"""
870 """file object suitable to read data"""
837 if existingfp is not None:
871 if existingfp is not None:
838 yield existingfp
872 yield existingfp
839 else:
873 else:
840 if self._inline:
874 if self._inline:
841 func = self._indexfp
875 func = self._indexfp
842 else:
876 else:
843 func = self._datafp
877 func = self._datafp
844 with func() as fp:
878 with func() as fp:
845 yield fp
879 yield fp
846
880
847 def tip(self):
881 def tip(self):
848 return self.node(len(self.index) - 2)
882 return self.node(len(self.index) - 2)
849 def __contains__(self, rev):
883 def __contains__(self, rev):
850 return 0 <= rev < len(self)
884 return 0 <= rev < len(self)
851 def __len__(self):
885 def __len__(self):
852 return len(self.index) - 1
886 return len(self.index) - 1
853 def __iter__(self):
887 def __iter__(self):
854 return iter(xrange(len(self)))
888 return iter(xrange(len(self)))
855 def revs(self, start=0, stop=None):
889 def revs(self, start=0, stop=None):
856 """iterate over all rev in this revlog (from start to stop)"""
890 """iterate over all rev in this revlog (from start to stop)"""
857 step = 1
891 step = 1
858 if stop is not None:
892 if stop is not None:
859 if start > stop:
893 if start > stop:
860 step = -1
894 step = -1
861 stop += step
895 stop += step
862 else:
896 else:
863 stop = len(self)
897 stop = len(self)
864 return xrange(start, stop, step)
898 return xrange(start, stop, step)
865
899
866 @util.propertycache
900 @util.propertycache
867 def nodemap(self):
901 def nodemap(self):
868 self.rev(self.node(0))
902 self.rev(self.node(0))
869 return self._nodecache
903 return self._nodecache
870
904
871 def hasnode(self, node):
905 def hasnode(self, node):
872 try:
906 try:
873 self.rev(node)
907 self.rev(node)
874 return True
908 return True
875 except KeyError:
909 except KeyError:
876 return False
910 return False
877
911
878 def candelta(self, baserev, rev):
912 def candelta(self, baserev, rev):
879 """whether two revisions (baserev, rev) can be delta-ed or not"""
913 """whether two revisions (baserev, rev) can be delta-ed or not"""
880 # Disable delta if either rev requires a content-changing flag
914 # Disable delta if either rev requires a content-changing flag
881 # processor (ex. LFS). This is because such flag processor can alter
915 # processor (ex. LFS). This is because such flag processor can alter
882 # the rawtext content that the delta will be based on, and two clients
916 # the rawtext content that the delta will be based on, and two clients
883 # could have a same revlog node with different flags (i.e. different
917 # could have a same revlog node with different flags (i.e. different
884 # rawtext contents) and the delta could be incompatible.
918 # rawtext contents) and the delta could be incompatible.
885 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
919 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
886 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
920 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
887 return False
921 return False
888 return True
922 return True
889
923
890 def clearcaches(self):
924 def clearcaches(self):
891 self._cache = None
925 self._cache = None
892 self._chainbasecache.clear()
926 self._chainbasecache.clear()
893 self._chunkcache = (0, '')
927 self._chunkcache = (0, '')
894 self._pcache = {}
928 self._pcache = {}
895
929
896 try:
930 try:
897 self._nodecache.clearcaches()
931 self._nodecache.clearcaches()
898 except AttributeError:
932 except AttributeError:
899 self._nodecache = {nullid: nullrev}
933 self._nodecache = {nullid: nullrev}
900 self._nodepos = None
934 self._nodepos = None
901
935
902 def rev(self, node):
936 def rev(self, node):
903 try:
937 try:
904 return self._nodecache[node]
938 return self._nodecache[node]
905 except TypeError:
939 except TypeError:
906 raise
940 raise
907 except RevlogError:
941 except RevlogError:
908 # parsers.c radix tree lookup failed
942 # parsers.c radix tree lookup failed
909 if node == wdirid or node in wdirfilenodeids:
943 if node == wdirid or node in wdirfilenodeids:
910 raise error.WdirUnsupported
944 raise error.WdirUnsupported
911 raise LookupError(node, self.indexfile, _('no node'))
945 raise LookupError(node, self.indexfile, _('no node'))
912 except KeyError:
946 except KeyError:
913 # pure python cache lookup failed
947 # pure python cache lookup failed
914 n = self._nodecache
948 n = self._nodecache
915 i = self.index
949 i = self.index
916 p = self._nodepos
950 p = self._nodepos
917 if p is None:
951 if p is None:
918 p = len(i) - 2
952 p = len(i) - 2
919 else:
953 else:
920 assert p < len(i)
954 assert p < len(i)
921 for r in xrange(p, -1, -1):
955 for r in xrange(p, -1, -1):
922 v = i[r][7]
956 v = i[r][7]
923 n[v] = r
957 n[v] = r
924 if v == node:
958 if v == node:
925 self._nodepos = r - 1
959 self._nodepos = r - 1
926 return r
960 return r
927 if node == wdirid or node in wdirfilenodeids:
961 if node == wdirid or node in wdirfilenodeids:
928 raise error.WdirUnsupported
962 raise error.WdirUnsupported
929 raise LookupError(node, self.indexfile, _('no node'))
963 raise LookupError(node, self.indexfile, _('no node'))
930
964
931 # Accessors for index entries.
965 # Accessors for index entries.
932
966
933 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
967 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
934 # are flags.
968 # are flags.
935 def start(self, rev):
969 def start(self, rev):
936 return int(self.index[rev][0] >> 16)
970 return int(self.index[rev][0] >> 16)
937
971
938 def flags(self, rev):
972 def flags(self, rev):
939 return self.index[rev][0] & 0xFFFF
973 return self.index[rev][0] & 0xFFFF
940
974
941 def length(self, rev):
975 def length(self, rev):
942 return self.index[rev][1]
976 return self.index[rev][1]
943
977
944 def rawsize(self, rev):
978 def rawsize(self, rev):
945 """return the length of the uncompressed text for a given revision"""
979 """return the length of the uncompressed text for a given revision"""
946 l = self.index[rev][2]
980 l = self.index[rev][2]
947 if l >= 0:
981 if l >= 0:
948 return l
982 return l
949
983
950 t = self.revision(rev, raw=True)
984 t = self.revision(rev, raw=True)
951 return len(t)
985 return len(t)
952
986
953 def size(self, rev):
987 def size(self, rev):
954 """length of non-raw text (processed by a "read" flag processor)"""
988 """length of non-raw text (processed by a "read" flag processor)"""
955 # fast path: if no "read" flag processor could change the content,
989 # fast path: if no "read" flag processor could change the content,
956 # size is rawsize. note: ELLIPSIS is known to not change the content.
990 # size is rawsize. note: ELLIPSIS is known to not change the content.
957 flags = self.flags(rev)
991 flags = self.flags(rev)
958 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
992 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
959 return self.rawsize(rev)
993 return self.rawsize(rev)
960
994
961 return len(self.revision(rev, raw=False))
995 return len(self.revision(rev, raw=False))
962
996
963 def chainbase(self, rev):
997 def chainbase(self, rev):
964 base = self._chainbasecache.get(rev)
998 base = self._chainbasecache.get(rev)
965 if base is not None:
999 if base is not None:
966 return base
1000 return base
967
1001
968 index = self.index
1002 index = self.index
969 iterrev = rev
1003 iterrev = rev
970 base = index[iterrev][3]
1004 base = index[iterrev][3]
971 while base != iterrev:
1005 while base != iterrev:
972 iterrev = base
1006 iterrev = base
973 base = index[iterrev][3]
1007 base = index[iterrev][3]
974
1008
975 self._chainbasecache[rev] = base
1009 self._chainbasecache[rev] = base
976 return base
1010 return base
977
1011
978 def linkrev(self, rev):
1012 def linkrev(self, rev):
979 return self.index[rev][4]
1013 return self.index[rev][4]
980
1014
981 def parentrevs(self, rev):
1015 def parentrevs(self, rev):
982 try:
1016 try:
983 entry = self.index[rev]
1017 entry = self.index[rev]
984 except IndexError:
1018 except IndexError:
985 if rev == wdirrev:
1019 if rev == wdirrev:
986 raise error.WdirUnsupported
1020 raise error.WdirUnsupported
987 raise
1021 raise
988
1022
989 return entry[5], entry[6]
1023 return entry[5], entry[6]
990
1024
991 def node(self, rev):
1025 def node(self, rev):
992 try:
1026 try:
993 return self.index[rev][7]
1027 return self.index[rev][7]
994 except IndexError:
1028 except IndexError:
995 if rev == wdirrev:
1029 if rev == wdirrev:
996 raise error.WdirUnsupported
1030 raise error.WdirUnsupported
997 raise
1031 raise
998
1032
999 # Derived from index values.
1033 # Derived from index values.
1000
1034
1001 def end(self, rev):
1035 def end(self, rev):
1002 return self.start(rev) + self.length(rev)
1036 return self.start(rev) + self.length(rev)
1003
1037
1004 def parents(self, node):
1038 def parents(self, node):
1005 i = self.index
1039 i = self.index
1006 d = i[self.rev(node)]
1040 d = i[self.rev(node)]
1007 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1041 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1008
1042
1009 def chainlen(self, rev):
1043 def chainlen(self, rev):
1010 return self._chaininfo(rev)[0]
1044 return self._chaininfo(rev)[0]
1011
1045
1012 def _chaininfo(self, rev):
1046 def _chaininfo(self, rev):
1013 chaininfocache = self._chaininfocache
1047 chaininfocache = self._chaininfocache
1014 if rev in chaininfocache:
1048 if rev in chaininfocache:
1015 return chaininfocache[rev]
1049 return chaininfocache[rev]
1016 index = self.index
1050 index = self.index
1017 generaldelta = self._generaldelta
1051 generaldelta = self._generaldelta
1018 iterrev = rev
1052 iterrev = rev
1019 e = index[iterrev]
1053 e = index[iterrev]
1020 clen = 0
1054 clen = 0
1021 compresseddeltalen = 0
1055 compresseddeltalen = 0
1022 while iterrev != e[3]:
1056 while iterrev != e[3]:
1023 clen += 1
1057 clen += 1
1024 compresseddeltalen += e[1]
1058 compresseddeltalen += e[1]
1025 if generaldelta:
1059 if generaldelta:
1026 iterrev = e[3]
1060 iterrev = e[3]
1027 else:
1061 else:
1028 iterrev -= 1
1062 iterrev -= 1
1029 if iterrev in chaininfocache:
1063 if iterrev in chaininfocache:
1030 t = chaininfocache[iterrev]
1064 t = chaininfocache[iterrev]
1031 clen += t[0]
1065 clen += t[0]
1032 compresseddeltalen += t[1]
1066 compresseddeltalen += t[1]
1033 break
1067 break
1034 e = index[iterrev]
1068 e = index[iterrev]
1035 else:
1069 else:
1036 # Add text length of base since decompressing that also takes
1070 # Add text length of base since decompressing that also takes
1037 # work. For cache hits the length is already included.
1071 # work. For cache hits the length is already included.
1038 compresseddeltalen += e[1]
1072 compresseddeltalen += e[1]
1039 r = (clen, compresseddeltalen)
1073 r = (clen, compresseddeltalen)
1040 chaininfocache[rev] = r
1074 chaininfocache[rev] = r
1041 return r
1075 return r
1042
1076
1043 def _deltachain(self, rev, stoprev=None):
1077 def _deltachain(self, rev, stoprev=None):
1044 """Obtain the delta chain for a revision.
1078 """Obtain the delta chain for a revision.
1045
1079
1046 ``stoprev`` specifies a revision to stop at. If not specified, we
1080 ``stoprev`` specifies a revision to stop at. If not specified, we
1047 stop at the base of the chain.
1081 stop at the base of the chain.
1048
1082
1049 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1083 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1050 revs in ascending order and ``stopped`` is a bool indicating whether
1084 revs in ascending order and ``stopped`` is a bool indicating whether
1051 ``stoprev`` was hit.
1085 ``stoprev`` was hit.
1052 """
1086 """
1053 # Try C implementation.
1087 # Try C implementation.
1054 try:
1088 try:
1055 return self.index.deltachain(rev, stoprev, self._generaldelta)
1089 return self.index.deltachain(rev, stoprev, self._generaldelta)
1056 except AttributeError:
1090 except AttributeError:
1057 pass
1091 pass
1058
1092
1059 chain = []
1093 chain = []
1060
1094
1061 # Alias to prevent attribute lookup in tight loop.
1095 # Alias to prevent attribute lookup in tight loop.
1062 index = self.index
1096 index = self.index
1063 generaldelta = self._generaldelta
1097 generaldelta = self._generaldelta
1064
1098
1065 iterrev = rev
1099 iterrev = rev
1066 e = index[iterrev]
1100 e = index[iterrev]
1067 while iterrev != e[3] and iterrev != stoprev:
1101 while iterrev != e[3] and iterrev != stoprev:
1068 chain.append(iterrev)
1102 chain.append(iterrev)
1069 if generaldelta:
1103 if generaldelta:
1070 iterrev = e[3]
1104 iterrev = e[3]
1071 else:
1105 else:
1072 iterrev -= 1
1106 iterrev -= 1
1073 e = index[iterrev]
1107 e = index[iterrev]
1074
1108
1075 if iterrev == stoprev:
1109 if iterrev == stoprev:
1076 stopped = True
1110 stopped = True
1077 else:
1111 else:
1078 chain.append(iterrev)
1112 chain.append(iterrev)
1079 stopped = False
1113 stopped = False
1080
1114
1081 chain.reverse()
1115 chain.reverse()
1082 return chain, stopped
1116 return chain, stopped
1083
1117
1084 def ancestors(self, revs, stoprev=0, inclusive=False):
1118 def ancestors(self, revs, stoprev=0, inclusive=False):
1085 """Generate the ancestors of 'revs' in reverse topological order.
1119 """Generate the ancestors of 'revs' in reverse topological order.
1086 Does not generate revs lower than stoprev.
1120 Does not generate revs lower than stoprev.
1087
1121
1088 See the documentation for ancestor.lazyancestors for more details."""
1122 See the documentation for ancestor.lazyancestors for more details."""
1089
1123
1090 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1124 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1091 inclusive=inclusive)
1125 inclusive=inclusive)
1092
1126
1093 def descendants(self, revs):
1127 def descendants(self, revs):
1094 """Generate the descendants of 'revs' in revision order.
1128 """Generate the descendants of 'revs' in revision order.
1095
1129
1096 Yield a sequence of revision numbers starting with a child of
1130 Yield a sequence of revision numbers starting with a child of
1097 some rev in revs, i.e., each revision is *not* considered a
1131 some rev in revs, i.e., each revision is *not* considered a
1098 descendant of itself. Results are ordered by revision number (a
1132 descendant of itself. Results are ordered by revision number (a
1099 topological sort)."""
1133 topological sort)."""
1100 first = min(revs)
1134 first = min(revs)
1101 if first == nullrev:
1135 if first == nullrev:
1102 for i in self:
1136 for i in self:
1103 yield i
1137 yield i
1104 return
1138 return
1105
1139
1106 seen = set(revs)
1140 seen = set(revs)
1107 for i in self.revs(start=first + 1):
1141 for i in self.revs(start=first + 1):
1108 for x in self.parentrevs(i):
1142 for x in self.parentrevs(i):
1109 if x != nullrev and x in seen:
1143 if x != nullrev and x in seen:
1110 seen.add(i)
1144 seen.add(i)
1111 yield i
1145 yield i
1112 break
1146 break
1113
1147
1114 def findcommonmissing(self, common=None, heads=None):
1148 def findcommonmissing(self, common=None, heads=None):
1115 """Return a tuple of the ancestors of common and the ancestors of heads
1149 """Return a tuple of the ancestors of common and the ancestors of heads
1116 that are not ancestors of common. In revset terminology, we return the
1150 that are not ancestors of common. In revset terminology, we return the
1117 tuple:
1151 tuple:
1118
1152
1119 ::common, (::heads) - (::common)
1153 ::common, (::heads) - (::common)
1120
1154
1121 The list is sorted by revision number, meaning it is
1155 The list is sorted by revision number, meaning it is
1122 topologically sorted.
1156 topologically sorted.
1123
1157
1124 'heads' and 'common' are both lists of node IDs. If heads is
1158 'heads' and 'common' are both lists of node IDs. If heads is
1125 not supplied, uses all of the revlog's heads. If common is not
1159 not supplied, uses all of the revlog's heads. If common is not
1126 supplied, uses nullid."""
1160 supplied, uses nullid."""
1127 if common is None:
1161 if common is None:
1128 common = [nullid]
1162 common = [nullid]
1129 if heads is None:
1163 if heads is None:
1130 heads = self.heads()
1164 heads = self.heads()
1131
1165
1132 common = [self.rev(n) for n in common]
1166 common = [self.rev(n) for n in common]
1133 heads = [self.rev(n) for n in heads]
1167 heads = [self.rev(n) for n in heads]
1134
1168
1135 # we want the ancestors, but inclusive
1169 # we want the ancestors, but inclusive
1136 class lazyset(object):
1170 class lazyset(object):
1137 def __init__(self, lazyvalues):
1171 def __init__(self, lazyvalues):
1138 self.addedvalues = set()
1172 self.addedvalues = set()
1139 self.lazyvalues = lazyvalues
1173 self.lazyvalues = lazyvalues
1140
1174
1141 def __contains__(self, value):
1175 def __contains__(self, value):
1142 return value in self.addedvalues or value in self.lazyvalues
1176 return value in self.addedvalues or value in self.lazyvalues
1143
1177
1144 def __iter__(self):
1178 def __iter__(self):
1145 added = self.addedvalues
1179 added = self.addedvalues
1146 for r in added:
1180 for r in added:
1147 yield r
1181 yield r
1148 for r in self.lazyvalues:
1182 for r in self.lazyvalues:
1149 if not r in added:
1183 if not r in added:
1150 yield r
1184 yield r
1151
1185
1152 def add(self, value):
1186 def add(self, value):
1153 self.addedvalues.add(value)
1187 self.addedvalues.add(value)
1154
1188
1155 def update(self, values):
1189 def update(self, values):
1156 self.addedvalues.update(values)
1190 self.addedvalues.update(values)
1157
1191
1158 has = lazyset(self.ancestors(common))
1192 has = lazyset(self.ancestors(common))
1159 has.add(nullrev)
1193 has.add(nullrev)
1160 has.update(common)
1194 has.update(common)
1161
1195
1162 # take all ancestors from heads that aren't in has
1196 # take all ancestors from heads that aren't in has
1163 missing = set()
1197 missing = set()
1164 visit = collections.deque(r for r in heads if r not in has)
1198 visit = collections.deque(r for r in heads if r not in has)
1165 while visit:
1199 while visit:
1166 r = visit.popleft()
1200 r = visit.popleft()
1167 if r in missing:
1201 if r in missing:
1168 continue
1202 continue
1169 else:
1203 else:
1170 missing.add(r)
1204 missing.add(r)
1171 for p in self.parentrevs(r):
1205 for p in self.parentrevs(r):
1172 if p not in has:
1206 if p not in has:
1173 visit.append(p)
1207 visit.append(p)
1174 missing = list(missing)
1208 missing = list(missing)
1175 missing.sort()
1209 missing.sort()
1176 return has, [self.node(miss) for miss in missing]
1210 return has, [self.node(miss) for miss in missing]
1177
1211
1178 def incrementalmissingrevs(self, common=None):
1212 def incrementalmissingrevs(self, common=None):
1179 """Return an object that can be used to incrementally compute the
1213 """Return an object that can be used to incrementally compute the
1180 revision numbers of the ancestors of arbitrary sets that are not
1214 revision numbers of the ancestors of arbitrary sets that are not
1181 ancestors of common. This is an ancestor.incrementalmissingancestors
1215 ancestors of common. This is an ancestor.incrementalmissingancestors
1182 object.
1216 object.
1183
1217
1184 'common' is a list of revision numbers. If common is not supplied, uses
1218 'common' is a list of revision numbers. If common is not supplied, uses
1185 nullrev.
1219 nullrev.
1186 """
1220 """
1187 if common is None:
1221 if common is None:
1188 common = [nullrev]
1222 common = [nullrev]
1189
1223
1190 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1224 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1191
1225
1192 def findmissingrevs(self, common=None, heads=None):
1226 def findmissingrevs(self, common=None, heads=None):
1193 """Return the revision numbers of the ancestors of heads that
1227 """Return the revision numbers of the ancestors of heads that
1194 are not ancestors of common.
1228 are not ancestors of common.
1195
1229
1196 More specifically, return a list of revision numbers corresponding to
1230 More specifically, return a list of revision numbers corresponding to
1197 nodes N such that every N satisfies the following constraints:
1231 nodes N such that every N satisfies the following constraints:
1198
1232
1199 1. N is an ancestor of some node in 'heads'
1233 1. N is an ancestor of some node in 'heads'
1200 2. N is not an ancestor of any node in 'common'
1234 2. N is not an ancestor of any node in 'common'
1201
1235
1202 The list is sorted by revision number, meaning it is
1236 The list is sorted by revision number, meaning it is
1203 topologically sorted.
1237 topologically sorted.
1204
1238
1205 'heads' and 'common' are both lists of revision numbers. If heads is
1239 'heads' and 'common' are both lists of revision numbers. If heads is
1206 not supplied, uses all of the revlog's heads. If common is not
1240 not supplied, uses all of the revlog's heads. If common is not
1207 supplied, uses nullid."""
1241 supplied, uses nullid."""
1208 if common is None:
1242 if common is None:
1209 common = [nullrev]
1243 common = [nullrev]
1210 if heads is None:
1244 if heads is None:
1211 heads = self.headrevs()
1245 heads = self.headrevs()
1212
1246
1213 inc = self.incrementalmissingrevs(common=common)
1247 inc = self.incrementalmissingrevs(common=common)
1214 return inc.missingancestors(heads)
1248 return inc.missingancestors(heads)
1215
1249
1216 def findmissing(self, common=None, heads=None):
1250 def findmissing(self, common=None, heads=None):
1217 """Return the ancestors of heads that are not ancestors of common.
1251 """Return the ancestors of heads that are not ancestors of common.
1218
1252
1219 More specifically, return a list of nodes N such that every N
1253 More specifically, return a list of nodes N such that every N
1220 satisfies the following constraints:
1254 satisfies the following constraints:
1221
1255
1222 1. N is an ancestor of some node in 'heads'
1256 1. N is an ancestor of some node in 'heads'
1223 2. N is not an ancestor of any node in 'common'
1257 2. N is not an ancestor of any node in 'common'
1224
1258
1225 The list is sorted by revision number, meaning it is
1259 The list is sorted by revision number, meaning it is
1226 topologically sorted.
1260 topologically sorted.
1227
1261
1228 'heads' and 'common' are both lists of node IDs. If heads is
1262 'heads' and 'common' are both lists of node IDs. If heads is
1229 not supplied, uses all of the revlog's heads. If common is not
1263 not supplied, uses all of the revlog's heads. If common is not
1230 supplied, uses nullid."""
1264 supplied, uses nullid."""
1231 if common is None:
1265 if common is None:
1232 common = [nullid]
1266 common = [nullid]
1233 if heads is None:
1267 if heads is None:
1234 heads = self.heads()
1268 heads = self.heads()
1235
1269
1236 common = [self.rev(n) for n in common]
1270 common = [self.rev(n) for n in common]
1237 heads = [self.rev(n) for n in heads]
1271 heads = [self.rev(n) for n in heads]
1238
1272
1239 inc = self.incrementalmissingrevs(common=common)
1273 inc = self.incrementalmissingrevs(common=common)
1240 return [self.node(r) for r in inc.missingancestors(heads)]
1274 return [self.node(r) for r in inc.missingancestors(heads)]
1241
1275
1242 def nodesbetween(self, roots=None, heads=None):
1276 def nodesbetween(self, roots=None, heads=None):
1243 """Return a topological path from 'roots' to 'heads'.
1277 """Return a topological path from 'roots' to 'heads'.
1244
1278
1245 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1279 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1246 topologically sorted list of all nodes N that satisfy both of
1280 topologically sorted list of all nodes N that satisfy both of
1247 these constraints:
1281 these constraints:
1248
1282
1249 1. N is a descendant of some node in 'roots'
1283 1. N is a descendant of some node in 'roots'
1250 2. N is an ancestor of some node in 'heads'
1284 2. N is an ancestor of some node in 'heads'
1251
1285
1252 Every node is considered to be both a descendant and an ancestor
1286 Every node is considered to be both a descendant and an ancestor
1253 of itself, so every reachable node in 'roots' and 'heads' will be
1287 of itself, so every reachable node in 'roots' and 'heads' will be
1254 included in 'nodes'.
1288 included in 'nodes'.
1255
1289
1256 'outroots' is the list of reachable nodes in 'roots', i.e., the
1290 'outroots' is the list of reachable nodes in 'roots', i.e., the
1257 subset of 'roots' that is returned in 'nodes'. Likewise,
1291 subset of 'roots' that is returned in 'nodes'. Likewise,
1258 'outheads' is the subset of 'heads' that is also in 'nodes'.
1292 'outheads' is the subset of 'heads' that is also in 'nodes'.
1259
1293
1260 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1294 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1261 unspecified, uses nullid as the only root. If 'heads' is
1295 unspecified, uses nullid as the only root. If 'heads' is
1262 unspecified, uses list of all of the revlog's heads."""
1296 unspecified, uses list of all of the revlog's heads."""
1263 nonodes = ([], [], [])
1297 nonodes = ([], [], [])
1264 if roots is not None:
1298 if roots is not None:
1265 roots = list(roots)
1299 roots = list(roots)
1266 if not roots:
1300 if not roots:
1267 return nonodes
1301 return nonodes
1268 lowestrev = min([self.rev(n) for n in roots])
1302 lowestrev = min([self.rev(n) for n in roots])
1269 else:
1303 else:
1270 roots = [nullid] # Everybody's a descendant of nullid
1304 roots = [nullid] # Everybody's a descendant of nullid
1271 lowestrev = nullrev
1305 lowestrev = nullrev
1272 if (lowestrev == nullrev) and (heads is None):
1306 if (lowestrev == nullrev) and (heads is None):
1273 # We want _all_ the nodes!
1307 # We want _all_ the nodes!
1274 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1308 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1275 if heads is None:
1309 if heads is None:
1276 # All nodes are ancestors, so the latest ancestor is the last
1310 # All nodes are ancestors, so the latest ancestor is the last
1277 # node.
1311 # node.
1278 highestrev = len(self) - 1
1312 highestrev = len(self) - 1
1279 # Set ancestors to None to signal that every node is an ancestor.
1313 # Set ancestors to None to signal that every node is an ancestor.
1280 ancestors = None
1314 ancestors = None
1281 # Set heads to an empty dictionary for later discovery of heads
1315 # Set heads to an empty dictionary for later discovery of heads
1282 heads = {}
1316 heads = {}
1283 else:
1317 else:
1284 heads = list(heads)
1318 heads = list(heads)
1285 if not heads:
1319 if not heads:
1286 return nonodes
1320 return nonodes
1287 ancestors = set()
1321 ancestors = set()
1288 # Turn heads into a dictionary so we can remove 'fake' heads.
1322 # Turn heads into a dictionary so we can remove 'fake' heads.
1289 # Also, later we will be using it to filter out the heads we can't
1323 # Also, later we will be using it to filter out the heads we can't
1290 # find from roots.
1324 # find from roots.
1291 heads = dict.fromkeys(heads, False)
1325 heads = dict.fromkeys(heads, False)
1292 # Start at the top and keep marking parents until we're done.
1326 # Start at the top and keep marking parents until we're done.
1293 nodestotag = set(heads)
1327 nodestotag = set(heads)
1294 # Remember where the top was so we can use it as a limit later.
1328 # Remember where the top was so we can use it as a limit later.
1295 highestrev = max([self.rev(n) for n in nodestotag])
1329 highestrev = max([self.rev(n) for n in nodestotag])
1296 while nodestotag:
1330 while nodestotag:
1297 # grab a node to tag
1331 # grab a node to tag
1298 n = nodestotag.pop()
1332 n = nodestotag.pop()
1299 # Never tag nullid
1333 # Never tag nullid
1300 if n == nullid:
1334 if n == nullid:
1301 continue
1335 continue
1302 # A node's revision number represents its place in a
1336 # A node's revision number represents its place in a
1303 # topologically sorted list of nodes.
1337 # topologically sorted list of nodes.
1304 r = self.rev(n)
1338 r = self.rev(n)
1305 if r >= lowestrev:
1339 if r >= lowestrev:
1306 if n not in ancestors:
1340 if n not in ancestors:
1307 # If we are possibly a descendant of one of the roots
1341 # If we are possibly a descendant of one of the roots
1308 # and we haven't already been marked as an ancestor
1342 # and we haven't already been marked as an ancestor
1309 ancestors.add(n) # Mark as ancestor
1343 ancestors.add(n) # Mark as ancestor
1310 # Add non-nullid parents to list of nodes to tag.
1344 # Add non-nullid parents to list of nodes to tag.
1311 nodestotag.update([p for p in self.parents(n) if
1345 nodestotag.update([p for p in self.parents(n) if
1312 p != nullid])
1346 p != nullid])
1313 elif n in heads: # We've seen it before, is it a fake head?
1347 elif n in heads: # We've seen it before, is it a fake head?
1314 # So it is, real heads should not be the ancestors of
1348 # So it is, real heads should not be the ancestors of
1315 # any other heads.
1349 # any other heads.
1316 heads.pop(n)
1350 heads.pop(n)
1317 if not ancestors:
1351 if not ancestors:
1318 return nonodes
1352 return nonodes
1319 # Now that we have our set of ancestors, we want to remove any
1353 # Now that we have our set of ancestors, we want to remove any
1320 # roots that are not ancestors.
1354 # roots that are not ancestors.
1321
1355
1322 # If one of the roots was nullid, everything is included anyway.
1356 # If one of the roots was nullid, everything is included anyway.
1323 if lowestrev > nullrev:
1357 if lowestrev > nullrev:
1324 # But, since we weren't, let's recompute the lowest rev to not
1358 # But, since we weren't, let's recompute the lowest rev to not
1325 # include roots that aren't ancestors.
1359 # include roots that aren't ancestors.
1326
1360
1327 # Filter out roots that aren't ancestors of heads
1361 # Filter out roots that aren't ancestors of heads
1328 roots = [root for root in roots if root in ancestors]
1362 roots = [root for root in roots if root in ancestors]
1329 # Recompute the lowest revision
1363 # Recompute the lowest revision
1330 if roots:
1364 if roots:
1331 lowestrev = min([self.rev(root) for root in roots])
1365 lowestrev = min([self.rev(root) for root in roots])
1332 else:
1366 else:
1333 # No more roots? Return empty list
1367 # No more roots? Return empty list
1334 return nonodes
1368 return nonodes
1335 else:
1369 else:
1336 # We are descending from nullid, and don't need to care about
1370 # We are descending from nullid, and don't need to care about
1337 # any other roots.
1371 # any other roots.
1338 lowestrev = nullrev
1372 lowestrev = nullrev
1339 roots = [nullid]
1373 roots = [nullid]
1340 # Transform our roots list into a set.
1374 # Transform our roots list into a set.
1341 descendants = set(roots)
1375 descendants = set(roots)
1342 # Also, keep the original roots so we can filter out roots that aren't
1376 # Also, keep the original roots so we can filter out roots that aren't
1343 # 'real' roots (i.e. are descended from other roots).
1377 # 'real' roots (i.e. are descended from other roots).
1344 roots = descendants.copy()
1378 roots = descendants.copy()
1345 # Our topologically sorted list of output nodes.
1379 # Our topologically sorted list of output nodes.
1346 orderedout = []
1380 orderedout = []
1347 # Don't start at nullid since we don't want nullid in our output list,
1381 # Don't start at nullid since we don't want nullid in our output list,
1348 # and if nullid shows up in descendants, empty parents will look like
1382 # and if nullid shows up in descendants, empty parents will look like
1349 # they're descendants.
1383 # they're descendants.
1350 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1384 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1351 n = self.node(r)
1385 n = self.node(r)
1352 isdescendant = False
1386 isdescendant = False
1353 if lowestrev == nullrev: # Everybody is a descendant of nullid
1387 if lowestrev == nullrev: # Everybody is a descendant of nullid
1354 isdescendant = True
1388 isdescendant = True
1355 elif n in descendants:
1389 elif n in descendants:
1356 # n is already a descendant
1390 # n is already a descendant
1357 isdescendant = True
1391 isdescendant = True
1358 # This check only needs to be done here because all the roots
1392 # This check only needs to be done here because all the roots
1359 # will start being marked is descendants before the loop.
1393 # will start being marked is descendants before the loop.
1360 if n in roots:
1394 if n in roots:
1361 # If n was a root, check if it's a 'real' root.
1395 # If n was a root, check if it's a 'real' root.
1362 p = tuple(self.parents(n))
1396 p = tuple(self.parents(n))
1363 # If any of its parents are descendants, it's not a root.
1397 # If any of its parents are descendants, it's not a root.
1364 if (p[0] in descendants) or (p[1] in descendants):
1398 if (p[0] in descendants) or (p[1] in descendants):
1365 roots.remove(n)
1399 roots.remove(n)
1366 else:
1400 else:
1367 p = tuple(self.parents(n))
1401 p = tuple(self.parents(n))
1368 # A node is a descendant if either of its parents are
1402 # A node is a descendant if either of its parents are
1369 # descendants. (We seeded the dependents list with the roots
1403 # descendants. (We seeded the dependents list with the roots
1370 # up there, remember?)
1404 # up there, remember?)
1371 if (p[0] in descendants) or (p[1] in descendants):
1405 if (p[0] in descendants) or (p[1] in descendants):
1372 descendants.add(n)
1406 descendants.add(n)
1373 isdescendant = True
1407 isdescendant = True
1374 if isdescendant and ((ancestors is None) or (n in ancestors)):
1408 if isdescendant and ((ancestors is None) or (n in ancestors)):
1375 # Only include nodes that are both descendants and ancestors.
1409 # Only include nodes that are both descendants and ancestors.
1376 orderedout.append(n)
1410 orderedout.append(n)
1377 if (ancestors is not None) and (n in heads):
1411 if (ancestors is not None) and (n in heads):
1378 # We're trying to figure out which heads are reachable
1412 # We're trying to figure out which heads are reachable
1379 # from roots.
1413 # from roots.
1380 # Mark this head as having been reached
1414 # Mark this head as having been reached
1381 heads[n] = True
1415 heads[n] = True
1382 elif ancestors is None:
1416 elif ancestors is None:
1383 # Otherwise, we're trying to discover the heads.
1417 # Otherwise, we're trying to discover the heads.
1384 # Assume this is a head because if it isn't, the next step
1418 # Assume this is a head because if it isn't, the next step
1385 # will eventually remove it.
1419 # will eventually remove it.
1386 heads[n] = True
1420 heads[n] = True
1387 # But, obviously its parents aren't.
1421 # But, obviously its parents aren't.
1388 for p in self.parents(n):
1422 for p in self.parents(n):
1389 heads.pop(p, None)
1423 heads.pop(p, None)
1390 heads = [head for head, flag in heads.iteritems() if flag]
1424 heads = [head for head, flag in heads.iteritems() if flag]
1391 roots = list(roots)
1425 roots = list(roots)
1392 assert orderedout
1426 assert orderedout
1393 assert roots
1427 assert roots
1394 assert heads
1428 assert heads
1395 return (orderedout, roots, heads)
1429 return (orderedout, roots, heads)
1396
1430
1397 def headrevs(self):
1431 def headrevs(self):
1398 try:
1432 try:
1399 return self.index.headrevs()
1433 return self.index.headrevs()
1400 except AttributeError:
1434 except AttributeError:
1401 return self._headrevs()
1435 return self._headrevs()
1402
1436
1403 def computephases(self, roots):
1437 def computephases(self, roots):
1404 return self.index.computephasesmapsets(roots)
1438 return self.index.computephasesmapsets(roots)
1405
1439
1406 def _headrevs(self):
1440 def _headrevs(self):
1407 count = len(self)
1441 count = len(self)
1408 if not count:
1442 if not count:
1409 return [nullrev]
1443 return [nullrev]
1410 # we won't iter over filtered rev so nobody is a head at start
1444 # we won't iter over filtered rev so nobody is a head at start
1411 ishead = [0] * (count + 1)
1445 ishead = [0] * (count + 1)
1412 index = self.index
1446 index = self.index
1413 for r in self:
1447 for r in self:
1414 ishead[r] = 1 # I may be an head
1448 ishead[r] = 1 # I may be an head
1415 e = index[r]
1449 e = index[r]
1416 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1450 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1417 return [r for r, val in enumerate(ishead) if val]
1451 return [r for r, val in enumerate(ishead) if val]
1418
1452
1419 def heads(self, start=None, stop=None):
1453 def heads(self, start=None, stop=None):
1420 """return the list of all nodes that have no children
1454 """return the list of all nodes that have no children
1421
1455
1422 if start is specified, only heads that are descendants of
1456 if start is specified, only heads that are descendants of
1423 start will be returned
1457 start will be returned
1424 if stop is specified, it will consider all the revs from stop
1458 if stop is specified, it will consider all the revs from stop
1425 as if they had no children
1459 as if they had no children
1426 """
1460 """
1427 if start is None and stop is None:
1461 if start is None and stop is None:
1428 if not len(self):
1462 if not len(self):
1429 return [nullid]
1463 return [nullid]
1430 return [self.node(r) for r in self.headrevs()]
1464 return [self.node(r) for r in self.headrevs()]
1431
1465
1432 if start is None:
1466 if start is None:
1433 start = nullid
1467 start = nullid
1434 if stop is None:
1468 if stop is None:
1435 stop = []
1469 stop = []
1436 stoprevs = set([self.rev(n) for n in stop])
1470 stoprevs = set([self.rev(n) for n in stop])
1437 startrev = self.rev(start)
1471 startrev = self.rev(start)
1438 reachable = {startrev}
1472 reachable = {startrev}
1439 heads = {startrev}
1473 heads = {startrev}
1440
1474
1441 parentrevs = self.parentrevs
1475 parentrevs = self.parentrevs
1442 for r in self.revs(start=startrev + 1):
1476 for r in self.revs(start=startrev + 1):
1443 for p in parentrevs(r):
1477 for p in parentrevs(r):
1444 if p in reachable:
1478 if p in reachable:
1445 if r not in stoprevs:
1479 if r not in stoprevs:
1446 reachable.add(r)
1480 reachable.add(r)
1447 heads.add(r)
1481 heads.add(r)
1448 if p in heads and p not in stoprevs:
1482 if p in heads and p not in stoprevs:
1449 heads.remove(p)
1483 heads.remove(p)
1450
1484
1451 return [self.node(r) for r in heads]
1485 return [self.node(r) for r in heads]
1452
1486
1453 def children(self, node):
1487 def children(self, node):
1454 """find the children of a given node"""
1488 """find the children of a given node"""
1455 c = []
1489 c = []
1456 p = self.rev(node)
1490 p = self.rev(node)
1457 for r in self.revs(start=p + 1):
1491 for r in self.revs(start=p + 1):
1458 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1492 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1459 if prevs:
1493 if prevs:
1460 for pr in prevs:
1494 for pr in prevs:
1461 if pr == p:
1495 if pr == p:
1462 c.append(self.node(r))
1496 c.append(self.node(r))
1463 elif p == nullrev:
1497 elif p == nullrev:
1464 c.append(self.node(r))
1498 c.append(self.node(r))
1465 return c
1499 return c
1466
1500
1467 def descendant(self, start, end):
1501 def descendant(self, start, end):
1468 """True if revision 'end' is an descendant of revision 'start'
1502 """True if revision 'end' is an descendant of revision 'start'
1469
1503
1470 A revision is considered as a descendant of itself."""
1504 A revision is considered as a descendant of itself."""
1471 if start == nullrev:
1505 if start == nullrev:
1472 return True
1506 return True
1473 elif start == end:
1507 elif start == end:
1474 return True
1508 return True
1475 return start in self._commonancestorsheads(start, end)
1509 return start in self._commonancestorsheads(start, end)
1476
1510
1477 def commonancestorsheads(self, a, b):
1511 def commonancestorsheads(self, a, b):
1478 """calculate all the heads of the common ancestors of nodes a and b"""
1512 """calculate all the heads of the common ancestors of nodes a and b"""
1479 a, b = self.rev(a), self.rev(b)
1513 a, b = self.rev(a), self.rev(b)
1480 ancs = self._commonancestorsheads(a, b)
1514 ancs = self._commonancestorsheads(a, b)
1481 return pycompat.maplist(self.node, ancs)
1515 return pycompat.maplist(self.node, ancs)
1482
1516
1483 def _commonancestorsheads(self, *revs):
1517 def _commonancestorsheads(self, *revs):
1484 """calculate all the heads of the common ancestors of revs"""
1518 """calculate all the heads of the common ancestors of revs"""
1485 try:
1519 try:
1486 ancs = self.index.commonancestorsheads(*revs)
1520 ancs = self.index.commonancestorsheads(*revs)
1487 except (AttributeError, OverflowError): # C implementation failed
1521 except (AttributeError, OverflowError): # C implementation failed
1488 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1522 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1489 return ancs
1523 return ancs
1490
1524
1491 def isancestor(self, a, b):
1525 def isancestor(self, a, b):
1492 """return True if node a is an ancestor of node b
1526 """return True if node a is an ancestor of node b
1493
1527
1494 The implementation of this is trivial but the use of
1528 The implementation of this is trivial but the use of
1495 commonancestorsheads is not."""
1529 commonancestorsheads is not."""
1496 a, b = self.rev(a), self.rev(b)
1530 a, b = self.rev(a), self.rev(b)
1497 return self.descendant(a, b)
1531 return self.descendant(a, b)
1498
1532
1499 def ancestor(self, a, b):
1533 def ancestor(self, a, b):
1500 """calculate the "best" common ancestor of nodes a and b"""
1534 """calculate the "best" common ancestor of nodes a and b"""
1501
1535
1502 a, b = self.rev(a), self.rev(b)
1536 a, b = self.rev(a), self.rev(b)
1503 try:
1537 try:
1504 ancs = self.index.ancestors(a, b)
1538 ancs = self.index.ancestors(a, b)
1505 except (AttributeError, OverflowError):
1539 except (AttributeError, OverflowError):
1506 ancs = ancestor.ancestors(self.parentrevs, a, b)
1540 ancs = ancestor.ancestors(self.parentrevs, a, b)
1507 if ancs:
1541 if ancs:
1508 # choose a consistent winner when there's a tie
1542 # choose a consistent winner when there's a tie
1509 return min(map(self.node, ancs))
1543 return min(map(self.node, ancs))
1510 return nullid
1544 return nullid
1511
1545
1512 def _match(self, id):
1546 def _match(self, id):
1513 if isinstance(id, int):
1547 if isinstance(id, int):
1514 # rev
1548 # rev
1515 return self.node(id)
1549 return self.node(id)
1516 if len(id) == 20:
1550 if len(id) == 20:
1517 # possibly a binary node
1551 # possibly a binary node
1518 # odds of a binary node being all hex in ASCII are 1 in 10**25
1552 # odds of a binary node being all hex in ASCII are 1 in 10**25
1519 try:
1553 try:
1520 node = id
1554 node = id
1521 self.rev(node) # quick search the index
1555 self.rev(node) # quick search the index
1522 return node
1556 return node
1523 except LookupError:
1557 except LookupError:
1524 pass # may be partial hex id
1558 pass # may be partial hex id
1525 try:
1559 try:
1526 # str(rev)
1560 # str(rev)
1527 rev = int(id)
1561 rev = int(id)
1528 if "%d" % rev != id:
1562 if "%d" % rev != id:
1529 raise ValueError
1563 raise ValueError
1530 if rev < 0:
1564 if rev < 0:
1531 rev = len(self) + rev
1565 rev = len(self) + rev
1532 if rev < 0 or rev >= len(self):
1566 if rev < 0 or rev >= len(self):
1533 raise ValueError
1567 raise ValueError
1534 return self.node(rev)
1568 return self.node(rev)
1535 except (ValueError, OverflowError):
1569 except (ValueError, OverflowError):
1536 pass
1570 pass
1537 if len(id) == 40:
1571 if len(id) == 40:
1538 try:
1572 try:
1539 # a full hex nodeid?
1573 # a full hex nodeid?
1540 node = bin(id)
1574 node = bin(id)
1541 self.rev(node)
1575 self.rev(node)
1542 return node
1576 return node
1543 except (TypeError, LookupError):
1577 except (TypeError, LookupError):
1544 pass
1578 pass
1545
1579
1546 def _partialmatch(self, id):
1580 def _partialmatch(self, id):
1547 # we don't care wdirfilenodeids as they should be always full hash
1581 # we don't care wdirfilenodeids as they should be always full hash
1548 maybewdir = wdirhex.startswith(id)
1582 maybewdir = wdirhex.startswith(id)
1549 try:
1583 try:
1550 partial = self.index.partialmatch(id)
1584 partial = self.index.partialmatch(id)
1551 if partial and self.hasnode(partial):
1585 if partial and self.hasnode(partial):
1552 if maybewdir:
1586 if maybewdir:
1553 # single 'ff...' match in radix tree, ambiguous with wdir
1587 # single 'ff...' match in radix tree, ambiguous with wdir
1554 raise RevlogError
1588 raise RevlogError
1555 return partial
1589 return partial
1556 if maybewdir:
1590 if maybewdir:
1557 # no 'ff...' match in radix tree, wdir identified
1591 # no 'ff...' match in radix tree, wdir identified
1558 raise error.WdirUnsupported
1592 raise error.WdirUnsupported
1559 return None
1593 return None
1560 except RevlogError:
1594 except RevlogError:
1561 # parsers.c radix tree lookup gave multiple matches
1595 # parsers.c radix tree lookup gave multiple matches
1562 # fast path: for unfiltered changelog, radix tree is accurate
1596 # fast path: for unfiltered changelog, radix tree is accurate
1563 if not getattr(self, 'filteredrevs', None):
1597 if not getattr(self, 'filteredrevs', None):
1564 raise LookupError(id, self.indexfile,
1598 raise LookupError(id, self.indexfile,
1565 _('ambiguous identifier'))
1599 _('ambiguous identifier'))
1566 # fall through to slow path that filters hidden revisions
1600 # fall through to slow path that filters hidden revisions
1567 except (AttributeError, ValueError):
1601 except (AttributeError, ValueError):
1568 # we are pure python, or key was too short to search radix tree
1602 # we are pure python, or key was too short to search radix tree
1569 pass
1603 pass
1570
1604
1571 if id in self._pcache:
1605 if id in self._pcache:
1572 return self._pcache[id]
1606 return self._pcache[id]
1573
1607
1574 if len(id) <= 40:
1608 if len(id) <= 40:
1575 try:
1609 try:
1576 # hex(node)[:...]
1610 # hex(node)[:...]
1577 l = len(id) // 2 # grab an even number of digits
1611 l = len(id) // 2 # grab an even number of digits
1578 prefix = bin(id[:l * 2])
1612 prefix = bin(id[:l * 2])
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1613 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 nl = [n for n in nl if hex(n).startswith(id) and
1614 nl = [n for n in nl if hex(n).startswith(id) and
1581 self.hasnode(n)]
1615 self.hasnode(n)]
1582 if len(nl) > 0:
1616 if len(nl) > 0:
1583 if len(nl) == 1 and not maybewdir:
1617 if len(nl) == 1 and not maybewdir:
1584 self._pcache[id] = nl[0]
1618 self._pcache[id] = nl[0]
1585 return nl[0]
1619 return nl[0]
1586 raise LookupError(id, self.indexfile,
1620 raise LookupError(id, self.indexfile,
1587 _('ambiguous identifier'))
1621 _('ambiguous identifier'))
1588 if maybewdir:
1622 if maybewdir:
1589 raise error.WdirUnsupported
1623 raise error.WdirUnsupported
1590 return None
1624 return None
1591 except TypeError:
1625 except TypeError:
1592 pass
1626 pass
1593
1627
1594 def lookup(self, id):
1628 def lookup(self, id):
1595 """locate a node based on:
1629 """locate a node based on:
1596 - revision number or str(revision number)
1630 - revision number or str(revision number)
1597 - nodeid or subset of hex nodeid
1631 - nodeid or subset of hex nodeid
1598 """
1632 """
1599 n = self._match(id)
1633 n = self._match(id)
1600 if n is not None:
1634 if n is not None:
1601 return n
1635 return n
1602 n = self._partialmatch(id)
1636 n = self._partialmatch(id)
1603 if n:
1637 if n:
1604 return n
1638 return n
1605
1639
1606 raise LookupError(id, self.indexfile, _('no match found'))
1640 raise LookupError(id, self.indexfile, _('no match found'))
1607
1641
1608 def shortest(self, node, minlength=1):
1642 def shortest(self, node, minlength=1):
1609 """Find the shortest unambiguous prefix that matches node."""
1643 """Find the shortest unambiguous prefix that matches node."""
1610 def isvalid(prefix):
1644 def isvalid(prefix):
1611 try:
1645 try:
1612 node = self._partialmatch(prefix)
1646 node = self._partialmatch(prefix)
1613 except error.RevlogError:
1647 except error.RevlogError:
1614 return False
1648 return False
1615 except error.WdirUnsupported:
1649 except error.WdirUnsupported:
1616 # single 'ff...' match
1650 # single 'ff...' match
1617 return True
1651 return True
1618 if node is None:
1652 if node is None:
1619 raise LookupError(node, self.indexfile, _('no node'))
1653 raise LookupError(node, self.indexfile, _('no node'))
1620 return True
1654 return True
1621
1655
1622 def maybewdir(prefix):
1656 def maybewdir(prefix):
1623 return all(c == 'f' for c in prefix)
1657 return all(c == 'f' for c in prefix)
1624
1658
1625 hexnode = hex(node)
1659 hexnode = hex(node)
1626
1660
1627 def disambiguate(hexnode, minlength):
1661 def disambiguate(hexnode, minlength):
1628 """Disambiguate against wdirid."""
1662 """Disambiguate against wdirid."""
1629 for length in range(minlength, 41):
1663 for length in range(minlength, 41):
1630 prefix = hexnode[:length]
1664 prefix = hexnode[:length]
1631 if not maybewdir(prefix):
1665 if not maybewdir(prefix):
1632 return prefix
1666 return prefix
1633
1667
1634 if not getattr(self, 'filteredrevs', None):
1668 if not getattr(self, 'filteredrevs', None):
1635 try:
1669 try:
1636 length = max(self.index.shortest(node), minlength)
1670 length = max(self.index.shortest(node), minlength)
1637 return disambiguate(hexnode, length)
1671 return disambiguate(hexnode, length)
1638 except RevlogError:
1672 except RevlogError:
1639 if node != wdirid:
1673 if node != wdirid:
1640 raise LookupError(node, self.indexfile, _('no node'))
1674 raise LookupError(node, self.indexfile, _('no node'))
1641 except AttributeError:
1675 except AttributeError:
1642 # Fall through to pure code
1676 # Fall through to pure code
1643 pass
1677 pass
1644
1678
1645 if node == wdirid:
1679 if node == wdirid:
1646 for length in range(minlength, 41):
1680 for length in range(minlength, 41):
1647 prefix = hexnode[:length]
1681 prefix = hexnode[:length]
1648 if isvalid(prefix):
1682 if isvalid(prefix):
1649 return prefix
1683 return prefix
1650
1684
1651 for length in range(minlength, 41):
1685 for length in range(minlength, 41):
1652 prefix = hexnode[:length]
1686 prefix = hexnode[:length]
1653 if isvalid(prefix):
1687 if isvalid(prefix):
1654 return disambiguate(hexnode, length)
1688 return disambiguate(hexnode, length)
1655
1689
1656 def cmp(self, node, text):
1690 def cmp(self, node, text):
1657 """compare text with a given file revision
1691 """compare text with a given file revision
1658
1692
1659 returns True if text is different than what is stored.
1693 returns True if text is different than what is stored.
1660 """
1694 """
1661 p1, p2 = self.parents(node)
1695 p1, p2 = self.parents(node)
1662 return hash(text, p1, p2) != node
1696 return hash(text, p1, p2) != node
1663
1697
1664 def _cachesegment(self, offset, data):
1698 def _cachesegment(self, offset, data):
1665 """Add a segment to the revlog cache.
1699 """Add a segment to the revlog cache.
1666
1700
1667 Accepts an absolute offset and the data that is at that location.
1701 Accepts an absolute offset and the data that is at that location.
1668 """
1702 """
1669 o, d = self._chunkcache
1703 o, d = self._chunkcache
1670 # try to add to existing cache
1704 # try to add to existing cache
1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1705 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 self._chunkcache = o, d + data
1706 self._chunkcache = o, d + data
1673 else:
1707 else:
1674 self._chunkcache = offset, data
1708 self._chunkcache = offset, data
1675
1709
1676 def _readsegment(self, offset, length, df=None):
1710 def _readsegment(self, offset, length, df=None):
1677 """Load a segment of raw data from the revlog.
1711 """Load a segment of raw data from the revlog.
1678
1712
1679 Accepts an absolute offset, length to read, and an optional existing
1713 Accepts an absolute offset, length to read, and an optional existing
1680 file handle to read from.
1714 file handle to read from.
1681
1715
1682 If an existing file handle is passed, it will be seeked and the
1716 If an existing file handle is passed, it will be seeked and the
1683 original seek position will NOT be restored.
1717 original seek position will NOT be restored.
1684
1718
1685 Returns a str or buffer of raw byte data.
1719 Returns a str or buffer of raw byte data.
1686 """
1720 """
1687 # Cache data both forward and backward around the requested
1721 # Cache data both forward and backward around the requested
1688 # data, in a fixed size window. This helps speed up operations
1722 # data, in a fixed size window. This helps speed up operations
1689 # involving reading the revlog backwards.
1723 # involving reading the revlog backwards.
1690 cachesize = self._chunkcachesize
1724 cachesize = self._chunkcachesize
1691 realoffset = offset & ~(cachesize - 1)
1725 realoffset = offset & ~(cachesize - 1)
1692 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1726 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1693 - realoffset)
1727 - realoffset)
1694 with self._datareadfp(df) as df:
1728 with self._datareadfp(df) as df:
1695 df.seek(realoffset)
1729 df.seek(realoffset)
1696 d = df.read(reallength)
1730 d = df.read(reallength)
1697 self._cachesegment(realoffset, d)
1731 self._cachesegment(realoffset, d)
1698 if offset != realoffset or reallength != length:
1732 if offset != realoffset or reallength != length:
1699 return util.buffer(d, offset - realoffset, length)
1733 return util.buffer(d, offset - realoffset, length)
1700 return d
1734 return d
1701
1735
1702 def _getsegment(self, offset, length, df=None):
1736 def _getsegment(self, offset, length, df=None):
1703 """Obtain a segment of raw data from the revlog.
1737 """Obtain a segment of raw data from the revlog.
1704
1738
1705 Accepts an absolute offset, length of bytes to obtain, and an
1739 Accepts an absolute offset, length of bytes to obtain, and an
1706 optional file handle to the already-opened revlog. If the file
1740 optional file handle to the already-opened revlog. If the file
1707 handle is used, it's original seek position will not be preserved.
1741 handle is used, it's original seek position will not be preserved.
1708
1742
1709 Requests for data may be returned from a cache.
1743 Requests for data may be returned from a cache.
1710
1744
1711 Returns a str or a buffer instance of raw byte data.
1745 Returns a str or a buffer instance of raw byte data.
1712 """
1746 """
1713 o, d = self._chunkcache
1747 o, d = self._chunkcache
1714 l = len(d)
1748 l = len(d)
1715
1749
1716 # is it in the cache?
1750 # is it in the cache?
1717 cachestart = offset - o
1751 cachestart = offset - o
1718 cacheend = cachestart + length
1752 cacheend = cachestart + length
1719 if cachestart >= 0 and cacheend <= l:
1753 if cachestart >= 0 and cacheend <= l:
1720 if cachestart == 0 and cacheend == l:
1754 if cachestart == 0 and cacheend == l:
1721 return d # avoid a copy
1755 return d # avoid a copy
1722 return util.buffer(d, cachestart, cacheend - cachestart)
1756 return util.buffer(d, cachestart, cacheend - cachestart)
1723
1757
1724 return self._readsegment(offset, length, df=df)
1758 return self._readsegment(offset, length, df=df)
1725
1759
1726 def _getsegmentforrevs(self, startrev, endrev, df=None):
1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1727 """Obtain a segment of raw data corresponding to a range of revisions.
1761 """Obtain a segment of raw data corresponding to a range of revisions.
1728
1762
1729 Accepts the start and end revisions and an optional already-open
1763 Accepts the start and end revisions and an optional already-open
1730 file handle to be used for reading. If the file handle is read, its
1764 file handle to be used for reading. If the file handle is read, its
1731 seek position will not be preserved.
1765 seek position will not be preserved.
1732
1766
1733 Requests for data may be satisfied by a cache.
1767 Requests for data may be satisfied by a cache.
1734
1768
1735 Returns a 2-tuple of (offset, data) for the requested range of
1769 Returns a 2-tuple of (offset, data) for the requested range of
1736 revisions. Offset is the integer offset from the beginning of the
1770 revisions. Offset is the integer offset from the beginning of the
1737 revlog and data is a str or buffer of the raw byte data.
1771 revlog and data is a str or buffer of the raw byte data.
1738
1772
1739 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1740 to determine where each revision's data begins and ends.
1774 to determine where each revision's data begins and ends.
1741 """
1775 """
1742 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1743 # (functions are expensive).
1777 # (functions are expensive).
1744 index = self.index
1778 index = self.index
1745 istart = index[startrev]
1779 istart = index[startrev]
1746 start = int(istart[0] >> 16)
1780 start = int(istart[0] >> 16)
1747 if startrev == endrev:
1781 if startrev == endrev:
1748 end = start + istart[1]
1782 end = start + istart[1]
1749 else:
1783 else:
1750 iend = index[endrev]
1784 iend = index[endrev]
1751 end = int(iend[0] >> 16) + iend[1]
1785 end = int(iend[0] >> 16) + iend[1]
1752
1786
1753 if self._inline:
1787 if self._inline:
1754 start += (startrev + 1) * self._io.size
1788 start += (startrev + 1) * self._io.size
1755 end += (endrev + 1) * self._io.size
1789 end += (endrev + 1) * self._io.size
1756 length = end - start
1790 length = end - start
1757
1791
1758 return start, self._getsegment(start, length, df=df)
1792 return start, self._getsegment(start, length, df=df)
1759
1793
1760 def _chunk(self, rev, df=None):
1794 def _chunk(self, rev, df=None):
1761 """Obtain a single decompressed chunk for a revision.
1795 """Obtain a single decompressed chunk for a revision.
1762
1796
1763 Accepts an integer revision and an optional already-open file handle
1797 Accepts an integer revision and an optional already-open file handle
1764 to be used for reading. If used, the seek position of the file will not
1798 to be used for reading. If used, the seek position of the file will not
1765 be preserved.
1799 be preserved.
1766
1800
1767 Returns a str holding uncompressed data for the requested revision.
1801 Returns a str holding uncompressed data for the requested revision.
1768 """
1802 """
1769 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1803 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1770
1804
1771 def _chunks(self, revs, df=None):
1805 def _chunks(self, revs, df=None):
1772 """Obtain decompressed chunks for the specified revisions.
1806 """Obtain decompressed chunks for the specified revisions.
1773
1807
1774 Accepts an iterable of numeric revisions that are assumed to be in
1808 Accepts an iterable of numeric revisions that are assumed to be in
1775 ascending order. Also accepts an optional already-open file handle
1809 ascending order. Also accepts an optional already-open file handle
1776 to be used for reading. If used, the seek position of the file will
1810 to be used for reading. If used, the seek position of the file will
1777 not be preserved.
1811 not be preserved.
1778
1812
1779 This function is similar to calling ``self._chunk()`` multiple times,
1813 This function is similar to calling ``self._chunk()`` multiple times,
1780 but is faster.
1814 but is faster.
1781
1815
1782 Returns a list with decompressed data for each requested revision.
1816 Returns a list with decompressed data for each requested revision.
1783 """
1817 """
1784 if not revs:
1818 if not revs:
1785 return []
1819 return []
1786 start = self.start
1820 start = self.start
1787 length = self.length
1821 length = self.length
1788 inline = self._inline
1822 inline = self._inline
1789 iosize = self._io.size
1823 iosize = self._io.size
1790 buffer = util.buffer
1824 buffer = util.buffer
1791
1825
1792 l = []
1826 l = []
1793 ladd = l.append
1827 ladd = l.append
1794
1828
1795 if not self._withsparseread:
1829 if not self._withsparseread:
1796 slicedchunks = (revs,)
1830 slicedchunks = (revs,)
1797 else:
1831 else:
1798 slicedchunks = _slicechunk(self, revs)
1832 slicedchunks = _slicechunk(self, revs)
1799
1833
1800 for revschunk in slicedchunks:
1834 for revschunk in slicedchunks:
1801 firstrev = revschunk[0]
1835 firstrev = revschunk[0]
1802 # Skip trailing revisions with empty diff
1836 # Skip trailing revisions with empty diff
1803 for lastrev in revschunk[::-1]:
1837 for lastrev in revschunk[::-1]:
1804 if length(lastrev) != 0:
1838 if length(lastrev) != 0:
1805 break
1839 break
1806
1840
1807 try:
1841 try:
1808 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1842 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1809 except OverflowError:
1843 except OverflowError:
1810 # issue4215 - we can't cache a run of chunks greater than
1844 # issue4215 - we can't cache a run of chunks greater than
1811 # 2G on Windows
1845 # 2G on Windows
1812 return [self._chunk(rev, df=df) for rev in revschunk]
1846 return [self._chunk(rev, df=df) for rev in revschunk]
1813
1847
1814 decomp = self.decompress
1848 decomp = self.decompress
1815 for rev in revschunk:
1849 for rev in revschunk:
1816 chunkstart = start(rev)
1850 chunkstart = start(rev)
1817 if inline:
1851 if inline:
1818 chunkstart += (rev + 1) * iosize
1852 chunkstart += (rev + 1) * iosize
1819 chunklength = length(rev)
1853 chunklength = length(rev)
1820 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1854 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1821
1855
1822 return l
1856 return l
1823
1857
1824 def _chunkclear(self):
1858 def _chunkclear(self):
1825 """Clear the raw chunk cache."""
1859 """Clear the raw chunk cache."""
1826 self._chunkcache = (0, '')
1860 self._chunkcache = (0, '')
1827
1861
1828 def deltaparent(self, rev):
1862 def deltaparent(self, rev):
1829 """return deltaparent of the given revision"""
1863 """return deltaparent of the given revision"""
1830 base = self.index[rev][3]
1864 base = self.index[rev][3]
1831 if base == rev:
1865 if base == rev:
1832 return nullrev
1866 return nullrev
1833 elif self._generaldelta:
1867 elif self._generaldelta:
1834 return base
1868 return base
1835 else:
1869 else:
1836 return rev - 1
1870 return rev - 1
1837
1871
1838 def revdiff(self, rev1, rev2):
1872 def revdiff(self, rev1, rev2):
1839 """return or calculate a delta between two revisions
1873 """return or calculate a delta between two revisions
1840
1874
1841 The delta calculated is in binary form and is intended to be written to
1875 The delta calculated is in binary form and is intended to be written to
1842 revlog data directly. So this function needs raw revision data.
1876 revlog data directly. So this function needs raw revision data.
1843 """
1877 """
1844 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1878 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1845 return bytes(self._chunk(rev2))
1879 return bytes(self._chunk(rev2))
1846
1880
1847 return mdiff.textdiff(self.revision(rev1, raw=True),
1881 return mdiff.textdiff(self.revision(rev1, raw=True),
1848 self.revision(rev2, raw=True))
1882 self.revision(rev2, raw=True))
1849
1883
1850 def revision(self, nodeorrev, _df=None, raw=False):
1884 def revision(self, nodeorrev, _df=None, raw=False):
1851 """return an uncompressed revision of a given node or revision
1885 """return an uncompressed revision of a given node or revision
1852 number.
1886 number.
1853
1887
1854 _df - an existing file handle to read from. (internal-only)
1888 _df - an existing file handle to read from. (internal-only)
1855 raw - an optional argument specifying if the revision data is to be
1889 raw - an optional argument specifying if the revision data is to be
1856 treated as raw data when applying flag transforms. 'raw' should be set
1890 treated as raw data when applying flag transforms. 'raw' should be set
1857 to True when generating changegroups or in debug commands.
1891 to True when generating changegroups or in debug commands.
1858 """
1892 """
1859 if isinstance(nodeorrev, int):
1893 if isinstance(nodeorrev, int):
1860 rev = nodeorrev
1894 rev = nodeorrev
1861 node = self.node(rev)
1895 node = self.node(rev)
1862 else:
1896 else:
1863 node = nodeorrev
1897 node = nodeorrev
1864 rev = None
1898 rev = None
1865
1899
1866 cachedrev = None
1900 cachedrev = None
1867 flags = None
1901 flags = None
1868 rawtext = None
1902 rawtext = None
1869 if node == nullid:
1903 if node == nullid:
1870 return ""
1904 return ""
1871 if self._cache:
1905 if self._cache:
1872 if self._cache[0] == node:
1906 if self._cache[0] == node:
1873 # _cache only stores rawtext
1907 # _cache only stores rawtext
1874 if raw:
1908 if raw:
1875 return self._cache[2]
1909 return self._cache[2]
1876 # duplicated, but good for perf
1910 # duplicated, but good for perf
1877 if rev is None:
1911 if rev is None:
1878 rev = self.rev(node)
1912 rev = self.rev(node)
1879 if flags is None:
1913 if flags is None:
1880 flags = self.flags(rev)
1914 flags = self.flags(rev)
1881 # no extra flags set, no flag processor runs, text = rawtext
1915 # no extra flags set, no flag processor runs, text = rawtext
1882 if flags == REVIDX_DEFAULT_FLAGS:
1916 if flags == REVIDX_DEFAULT_FLAGS:
1883 return self._cache[2]
1917 return self._cache[2]
1884 # rawtext is reusable. need to run flag processor
1918 # rawtext is reusable. need to run flag processor
1885 rawtext = self._cache[2]
1919 rawtext = self._cache[2]
1886
1920
1887 cachedrev = self._cache[1]
1921 cachedrev = self._cache[1]
1888
1922
1889 # look up what we need to read
1923 # look up what we need to read
1890 if rawtext is None:
1924 if rawtext is None:
1891 if rev is None:
1925 if rev is None:
1892 rev = self.rev(node)
1926 rev = self.rev(node)
1893
1927
1894 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1928 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1895 if stopped:
1929 if stopped:
1896 rawtext = self._cache[2]
1930 rawtext = self._cache[2]
1897
1931
1898 # drop cache to save memory
1932 # drop cache to save memory
1899 self._cache = None
1933 self._cache = None
1900
1934
1901 bins = self._chunks(chain, df=_df)
1935 bins = self._chunks(chain, df=_df)
1902 if rawtext is None:
1936 if rawtext is None:
1903 rawtext = bytes(bins[0])
1937 rawtext = bytes(bins[0])
1904 bins = bins[1:]
1938 bins = bins[1:]
1905
1939
1906 rawtext = mdiff.patches(rawtext, bins)
1940 rawtext = mdiff.patches(rawtext, bins)
1907 self._cache = (node, rev, rawtext)
1941 self._cache = (node, rev, rawtext)
1908
1942
1909 if flags is None:
1943 if flags is None:
1910 if rev is None:
1944 if rev is None:
1911 rev = self.rev(node)
1945 rev = self.rev(node)
1912 flags = self.flags(rev)
1946 flags = self.flags(rev)
1913
1947
1914 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1948 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1915 if validatehash:
1949 if validatehash:
1916 self.checkhash(text, node, rev=rev)
1950 self.checkhash(text, node, rev=rev)
1917
1951
1918 return text
1952 return text
1919
1953
1920 def hash(self, text, p1, p2):
1954 def hash(self, text, p1, p2):
1921 """Compute a node hash.
1955 """Compute a node hash.
1922
1956
1923 Available as a function so that subclasses can replace the hash
1957 Available as a function so that subclasses can replace the hash
1924 as needed.
1958 as needed.
1925 """
1959 """
1926 return hash(text, p1, p2)
1960 return hash(text, p1, p2)
1927
1961
1928 def _processflags(self, text, flags, operation, raw=False):
1962 def _processflags(self, text, flags, operation, raw=False):
1929 """Inspect revision data flags and applies transforms defined by
1963 """Inspect revision data flags and applies transforms defined by
1930 registered flag processors.
1964 registered flag processors.
1931
1965
1932 ``text`` - the revision data to process
1966 ``text`` - the revision data to process
1933 ``flags`` - the revision flags
1967 ``flags`` - the revision flags
1934 ``operation`` - the operation being performed (read or write)
1968 ``operation`` - the operation being performed (read or write)
1935 ``raw`` - an optional argument describing if the raw transform should be
1969 ``raw`` - an optional argument describing if the raw transform should be
1936 applied.
1970 applied.
1937
1971
1938 This method processes the flags in the order (or reverse order if
1972 This method processes the flags in the order (or reverse order if
1939 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1973 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1940 flag processors registered for present flags. The order of flags defined
1974 flag processors registered for present flags. The order of flags defined
1941 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1975 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1942
1976
1943 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1977 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1944 processed text and ``validatehash`` is a bool indicating whether the
1978 processed text and ``validatehash`` is a bool indicating whether the
1945 returned text should be checked for hash integrity.
1979 returned text should be checked for hash integrity.
1946
1980
1947 Note: If the ``raw`` argument is set, it has precedence over the
1981 Note: If the ``raw`` argument is set, it has precedence over the
1948 operation and will only update the value of ``validatehash``.
1982 operation and will only update the value of ``validatehash``.
1949 """
1983 """
1950 # fast path: no flag processors will run
1984 # fast path: no flag processors will run
1951 if flags == 0:
1985 if flags == 0:
1952 return text, True
1986 return text, True
1953 if not operation in ('read', 'write'):
1987 if not operation in ('read', 'write'):
1954 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1988 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1955 # Check all flags are known.
1989 # Check all flags are known.
1956 if flags & ~REVIDX_KNOWN_FLAGS:
1990 if flags & ~REVIDX_KNOWN_FLAGS:
1957 raise RevlogError(_("incompatible revision flag '%#x'") %
1991 raise RevlogError(_("incompatible revision flag '%#x'") %
1958 (flags & ~REVIDX_KNOWN_FLAGS))
1992 (flags & ~REVIDX_KNOWN_FLAGS))
1959 validatehash = True
1993 validatehash = True
1960 # Depending on the operation (read or write), the order might be
1994 # Depending on the operation (read or write), the order might be
1961 # reversed due to non-commutative transforms.
1995 # reversed due to non-commutative transforms.
1962 orderedflags = REVIDX_FLAGS_ORDER
1996 orderedflags = REVIDX_FLAGS_ORDER
1963 if operation == 'write':
1997 if operation == 'write':
1964 orderedflags = reversed(orderedflags)
1998 orderedflags = reversed(orderedflags)
1965
1999
1966 for flag in orderedflags:
2000 for flag in orderedflags:
1967 # If a flagprocessor has been registered for a known flag, apply the
2001 # If a flagprocessor has been registered for a known flag, apply the
1968 # related operation transform and update result tuple.
2002 # related operation transform and update result tuple.
1969 if flag & flags:
2003 if flag & flags:
1970 vhash = True
2004 vhash = True
1971
2005
1972 if flag not in _flagprocessors:
2006 if flag not in _flagprocessors:
1973 message = _("missing processor for flag '%#x'") % (flag)
2007 message = _("missing processor for flag '%#x'") % (flag)
1974 raise RevlogError(message)
2008 raise RevlogError(message)
1975
2009
1976 processor = _flagprocessors[flag]
2010 processor = _flagprocessors[flag]
1977 if processor is not None:
2011 if processor is not None:
1978 readtransform, writetransform, rawtransform = processor
2012 readtransform, writetransform, rawtransform = processor
1979
2013
1980 if raw:
2014 if raw:
1981 vhash = rawtransform(self, text)
2015 vhash = rawtransform(self, text)
1982 elif operation == 'read':
2016 elif operation == 'read':
1983 text, vhash = readtransform(self, text)
2017 text, vhash = readtransform(self, text)
1984 else: # write operation
2018 else: # write operation
1985 text, vhash = writetransform(self, text)
2019 text, vhash = writetransform(self, text)
1986 validatehash = validatehash and vhash
2020 validatehash = validatehash and vhash
1987
2021
1988 return text, validatehash
2022 return text, validatehash
1989
2023
1990 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2024 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1991 """Check node hash integrity.
2025 """Check node hash integrity.
1992
2026
1993 Available as a function so that subclasses can extend hash mismatch
2027 Available as a function so that subclasses can extend hash mismatch
1994 behaviors as needed.
2028 behaviors as needed.
1995 """
2029 """
1996 try:
2030 try:
1997 if p1 is None and p2 is None:
2031 if p1 is None and p2 is None:
1998 p1, p2 = self.parents(node)
2032 p1, p2 = self.parents(node)
1999 if node != self.hash(text, p1, p2):
2033 if node != self.hash(text, p1, p2):
2000 revornode = rev
2034 revornode = rev
2001 if revornode is None:
2035 if revornode is None:
2002 revornode = templatefilters.short(hex(node))
2036 revornode = templatefilters.short(hex(node))
2003 raise RevlogError(_("integrity check failed on %s:%s")
2037 raise RevlogError(_("integrity check failed on %s:%s")
2004 % (self.indexfile, pycompat.bytestr(revornode)))
2038 % (self.indexfile, pycompat.bytestr(revornode)))
2005 except RevlogError:
2039 except RevlogError:
2006 if self._censorable and _censoredtext(text):
2040 if self._censorable and _censoredtext(text):
2007 raise error.CensoredNodeError(self.indexfile, node, text)
2041 raise error.CensoredNodeError(self.indexfile, node, text)
2008 raise
2042 raise
2009
2043
2010 def _enforceinlinesize(self, tr, fp=None):
2044 def _enforceinlinesize(self, tr, fp=None):
2011 """Check if the revlog is too big for inline and convert if so.
2045 """Check if the revlog is too big for inline and convert if so.
2012
2046
2013 This should be called after revisions are added to the revlog. If the
2047 This should be called after revisions are added to the revlog. If the
2014 revlog has grown too large to be an inline revlog, it will convert it
2048 revlog has grown too large to be an inline revlog, it will convert it
2015 to use multiple index and data files.
2049 to use multiple index and data files.
2016 """
2050 """
2017 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2051 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2018 return
2052 return
2019
2053
2020 trinfo = tr.find(self.indexfile)
2054 trinfo = tr.find(self.indexfile)
2021 if trinfo is None:
2055 if trinfo is None:
2022 raise RevlogError(_("%s not found in the transaction")
2056 raise RevlogError(_("%s not found in the transaction")
2023 % self.indexfile)
2057 % self.indexfile)
2024
2058
2025 trindex = trinfo[2]
2059 trindex = trinfo[2]
2026 if trindex is not None:
2060 if trindex is not None:
2027 dataoff = self.start(trindex)
2061 dataoff = self.start(trindex)
2028 else:
2062 else:
2029 # revlog was stripped at start of transaction, use all leftover data
2063 # revlog was stripped at start of transaction, use all leftover data
2030 trindex = len(self) - 1
2064 trindex = len(self) - 1
2031 dataoff = self.end(-2)
2065 dataoff = self.end(-2)
2032
2066
2033 tr.add(self.datafile, dataoff)
2067 tr.add(self.datafile, dataoff)
2034
2068
2035 if fp:
2069 if fp:
2036 fp.flush()
2070 fp.flush()
2037 fp.close()
2071 fp.close()
2038
2072
2039 with self._datafp('w') as df:
2073 with self._datafp('w') as df:
2040 for r in self:
2074 for r in self:
2041 df.write(self._getsegmentforrevs(r, r)[1])
2075 df.write(self._getsegmentforrevs(r, r)[1])
2042
2076
2043 with self._indexfp('w') as fp:
2077 with self._indexfp('w') as fp:
2044 self.version &= ~FLAG_INLINE_DATA
2078 self.version &= ~FLAG_INLINE_DATA
2045 self._inline = False
2079 self._inline = False
2046 io = self._io
2080 io = self._io
2047 for i in self:
2081 for i in self:
2048 e = io.packentry(self.index[i], self.node, self.version, i)
2082 e = io.packentry(self.index[i], self.node, self.version, i)
2049 fp.write(e)
2083 fp.write(e)
2050
2084
2051 # the temp file replace the real index when we exit the context
2085 # the temp file replace the real index when we exit the context
2052 # manager
2086 # manager
2053
2087
2054 tr.replace(self.indexfile, trindex * self._io.size)
2088 tr.replace(self.indexfile, trindex * self._io.size)
2055 self._chunkclear()
2089 self._chunkclear()
2056
2090
2057 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2091 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2058 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2092 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2059 """add a revision to the log
2093 """add a revision to the log
2060
2094
2061 text - the revision data to add
2095 text - the revision data to add
2062 transaction - the transaction object used for rollback
2096 transaction - the transaction object used for rollback
2063 link - the linkrev data to add
2097 link - the linkrev data to add
2064 p1, p2 - the parent nodeids of the revision
2098 p1, p2 - the parent nodeids of the revision
2065 cachedelta - an optional precomputed delta
2099 cachedelta - an optional precomputed delta
2066 node - nodeid of revision; typically node is not specified, and it is
2100 node - nodeid of revision; typically node is not specified, and it is
2067 computed by default as hash(text, p1, p2), however subclasses might
2101 computed by default as hash(text, p1, p2), however subclasses might
2068 use different hashing method (and override checkhash() in such case)
2102 use different hashing method (and override checkhash() in such case)
2069 flags - the known flags to set on the revision
2103 flags - the known flags to set on the revision
2070 deltacomputer - an optional _deltacomputer instance shared between
2104 deltacomputer - an optional _deltacomputer instance shared between
2071 multiple calls
2105 multiple calls
2072 """
2106 """
2073 if link == nullrev:
2107 if link == nullrev:
2074 raise RevlogError(_("attempted to add linkrev -1 to %s")
2108 raise RevlogError(_("attempted to add linkrev -1 to %s")
2075 % self.indexfile)
2109 % self.indexfile)
2076
2110
2077 if flags:
2111 if flags:
2078 node = node or self.hash(text, p1, p2)
2112 node = node or self.hash(text, p1, p2)
2079
2113
2080 rawtext, validatehash = self._processflags(text, flags, 'write')
2114 rawtext, validatehash = self._processflags(text, flags, 'write')
2081
2115
2082 # If the flag processor modifies the revision data, ignore any provided
2116 # If the flag processor modifies the revision data, ignore any provided
2083 # cachedelta.
2117 # cachedelta.
2084 if rawtext != text:
2118 if rawtext != text:
2085 cachedelta = None
2119 cachedelta = None
2086
2120
2087 if len(rawtext) > _maxentrysize:
2121 if len(rawtext) > _maxentrysize:
2088 raise RevlogError(
2122 raise RevlogError(
2089 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2123 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2090 % (self.indexfile, len(rawtext)))
2124 % (self.indexfile, len(rawtext)))
2091
2125
2092 node = node or self.hash(rawtext, p1, p2)
2126 node = node or self.hash(rawtext, p1, p2)
2093 if node in self.nodemap:
2127 if node in self.nodemap:
2094 return node
2128 return node
2095
2129
2096 if validatehash:
2130 if validatehash:
2097 self.checkhash(rawtext, node, p1=p1, p2=p2)
2131 self.checkhash(rawtext, node, p1=p1, p2=p2)
2098
2132
2099 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2133 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2100 flags, cachedelta=cachedelta,
2134 flags, cachedelta=cachedelta,
2101 deltacomputer=deltacomputer)
2135 deltacomputer=deltacomputer)
2102
2136
2103 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2137 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2104 cachedelta=None, deltacomputer=None):
2138 cachedelta=None, deltacomputer=None):
2105 """add a raw revision with known flags, node and parents
2139 """add a raw revision with known flags, node and parents
2106 useful when reusing a revision not stored in this revlog (ex: received
2140 useful when reusing a revision not stored in this revlog (ex: received
2107 over wire, or read from an external bundle).
2141 over wire, or read from an external bundle).
2108 """
2142 """
2109 dfh = None
2143 dfh = None
2110 if not self._inline:
2144 if not self._inline:
2111 dfh = self._datafp("a+")
2145 dfh = self._datafp("a+")
2112 ifh = self._indexfp("a+")
2146 ifh = self._indexfp("a+")
2113 try:
2147 try:
2114 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2148 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2115 flags, cachedelta, ifh, dfh,
2149 flags, cachedelta, ifh, dfh,
2116 deltacomputer=deltacomputer)
2150 deltacomputer=deltacomputer)
2117 finally:
2151 finally:
2118 if dfh:
2152 if dfh:
2119 dfh.close()
2153 dfh.close()
2120 ifh.close()
2154 ifh.close()
2121
2155
2122 def compress(self, data):
2156 def compress(self, data):
2123 """Generate a possibly-compressed representation of data."""
2157 """Generate a possibly-compressed representation of data."""
2124 if not data:
2158 if not data:
2125 return '', data
2159 return '', data
2126
2160
2127 compressed = self._compressor.compress(data)
2161 compressed = self._compressor.compress(data)
2128
2162
2129 if compressed:
2163 if compressed:
2130 # The revlog compressor added the header in the returned data.
2164 # The revlog compressor added the header in the returned data.
2131 return '', compressed
2165 return '', compressed
2132
2166
2133 if data[0:1] == '\0':
2167 if data[0:1] == '\0':
2134 return '', data
2168 return '', data
2135 return 'u', data
2169 return 'u', data
2136
2170
2137 def decompress(self, data):
2171 def decompress(self, data):
2138 """Decompress a revlog chunk.
2172 """Decompress a revlog chunk.
2139
2173
2140 The chunk is expected to begin with a header identifying the
2174 The chunk is expected to begin with a header identifying the
2141 format type so it can be routed to an appropriate decompressor.
2175 format type so it can be routed to an appropriate decompressor.
2142 """
2176 """
2143 if not data:
2177 if not data:
2144 return data
2178 return data
2145
2179
2146 # Revlogs are read much more frequently than they are written and many
2180 # Revlogs are read much more frequently than they are written and many
2147 # chunks only take microseconds to decompress, so performance is
2181 # chunks only take microseconds to decompress, so performance is
2148 # important here.
2182 # important here.
2149 #
2183 #
2150 # We can make a few assumptions about revlogs:
2184 # We can make a few assumptions about revlogs:
2151 #
2185 #
2152 # 1) the majority of chunks will be compressed (as opposed to inline
2186 # 1) the majority of chunks will be compressed (as opposed to inline
2153 # raw data).
2187 # raw data).
2154 # 2) decompressing *any* data will likely by at least 10x slower than
2188 # 2) decompressing *any* data will likely by at least 10x slower than
2155 # returning raw inline data.
2189 # returning raw inline data.
2156 # 3) we want to prioritize common and officially supported compression
2190 # 3) we want to prioritize common and officially supported compression
2157 # engines
2191 # engines
2158 #
2192 #
2159 # It follows that we want to optimize for "decompress compressed data
2193 # It follows that we want to optimize for "decompress compressed data
2160 # when encoded with common and officially supported compression engines"
2194 # when encoded with common and officially supported compression engines"
2161 # case over "raw data" and "data encoded by less common or non-official
2195 # case over "raw data" and "data encoded by less common or non-official
2162 # compression engines." That is why we have the inline lookup first
2196 # compression engines." That is why we have the inline lookup first
2163 # followed by the compengines lookup.
2197 # followed by the compengines lookup.
2164 #
2198 #
2165 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2199 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2166 # compressed chunks. And this matters for changelog and manifest reads.
2200 # compressed chunks. And this matters for changelog and manifest reads.
2167 t = data[0:1]
2201 t = data[0:1]
2168
2202
2169 if t == 'x':
2203 if t == 'x':
2170 try:
2204 try:
2171 return _zlibdecompress(data)
2205 return _zlibdecompress(data)
2172 except zlib.error as e:
2206 except zlib.error as e:
2173 raise RevlogError(_('revlog decompress error: %s') %
2207 raise RevlogError(_('revlog decompress error: %s') %
2174 stringutil.forcebytestr(e))
2208 stringutil.forcebytestr(e))
2175 # '\0' is more common than 'u' so it goes first.
2209 # '\0' is more common than 'u' so it goes first.
2176 elif t == '\0':
2210 elif t == '\0':
2177 return data
2211 return data
2178 elif t == 'u':
2212 elif t == 'u':
2179 return util.buffer(data, 1)
2213 return util.buffer(data, 1)
2180
2214
2181 try:
2215 try:
2182 compressor = self._decompressors[t]
2216 compressor = self._decompressors[t]
2183 except KeyError:
2217 except KeyError:
2184 try:
2218 try:
2185 engine = util.compengines.forrevlogheader(t)
2219 engine = util.compengines.forrevlogheader(t)
2186 compressor = engine.revlogcompressor()
2220 compressor = engine.revlogcompressor()
2187 self._decompressors[t] = compressor
2221 self._decompressors[t] = compressor
2188 except KeyError:
2222 except KeyError:
2189 raise RevlogError(_('unknown compression type %r') % t)
2223 raise RevlogError(_('unknown compression type %r') % t)
2190
2224
2191 return compressor.decompress(data)
2225 return compressor.decompress(data)
2192
2226
2193 def _isgooddeltainfo(self, deltainfo, revinfo):
2227 def _isgooddeltainfo(self, deltainfo, revinfo):
2194 """Returns True if the given delta is good. Good means that it is within
2228 """Returns True if the given delta is good. Good means that it is within
2195 the disk span, disk size, and chain length bounds that we know to be
2229 the disk span, disk size, and chain length bounds that we know to be
2196 performant."""
2230 performant."""
2197 if deltainfo is None:
2231 if deltainfo is None:
2198 return False
2232 return False
2199
2233
2200 # - 'deltainfo.distance' is the distance from the base revision --
2234 # - 'deltainfo.distance' is the distance from the base revision --
2201 # bounding it limits the amount of I/O we need to do.
2235 # bounding it limits the amount of I/O we need to do.
2202 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2236 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2203 # deltas we need to apply -- bounding it limits the amount of CPU
2237 # deltas we need to apply -- bounding it limits the amount of CPU
2204 # we consume.
2238 # we consume.
2205
2239
2206 textlen = revinfo.textlen
2240 textlen = revinfo.textlen
2207 defaultmax = textlen * 4
2241 defaultmax = textlen * 4
2208 maxdist = self._maxdeltachainspan
2242 maxdist = self._maxdeltachainspan
2209 if not maxdist:
2243 if not maxdist:
2210 maxdist = deltainfo.distance # ensure the conditional pass
2244 maxdist = deltainfo.distance # ensure the conditional pass
2211 maxdist = max(maxdist, defaultmax)
2245 maxdist = max(maxdist, defaultmax)
2212 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2246 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2213 deltainfo.compresseddeltalen > textlen * 2 or
2247 deltainfo.compresseddeltalen > textlen * 2 or
2214 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2248 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2215 return False
2249 return False
2216
2250
2217 return True
2251 return True
2218
2252
2219 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2253 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2220 cachedelta, ifh, dfh, alwayscache=False,
2254 cachedelta, ifh, dfh, alwayscache=False,
2221 deltacomputer=None):
2255 deltacomputer=None):
2222 """internal function to add revisions to the log
2256 """internal function to add revisions to the log
2223
2257
2224 see addrevision for argument descriptions.
2258 see addrevision for argument descriptions.
2225
2259
2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2227
2261
2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2229 be used.
2263 be used.
2230
2264
2231 invariants:
2265 invariants:
2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 - rawtext is optional (can be None); if not set, cachedelta must be set.
2233 if both are set, they must correspond to each other.
2267 if both are set, they must correspond to each other.
2234 """
2268 """
2235 if node == nullid:
2269 if node == nullid:
2236 raise RevlogError(_("%s: attempt to add null revision") %
2270 raise RevlogError(_("%s: attempt to add null revision") %
2237 (self.indexfile))
2271 (self.indexfile))
2238 if node == wdirid or node in wdirfilenodeids:
2272 if node == wdirid or node in wdirfilenodeids:
2239 raise RevlogError(_("%s: attempt to add wdir revision") %
2273 raise RevlogError(_("%s: attempt to add wdir revision") %
2240 (self.indexfile))
2274 (self.indexfile))
2241
2275
2242 if self._inline:
2276 if self._inline:
2243 fh = ifh
2277 fh = ifh
2244 else:
2278 else:
2245 fh = dfh
2279 fh = dfh
2246
2280
2247 btext = [rawtext]
2281 btext = [rawtext]
2248
2282
2249 curr = len(self)
2283 curr = len(self)
2250 prev = curr - 1
2284 prev = curr - 1
2251 offset = self.end(prev)
2285 offset = self.end(prev)
2252 p1r, p2r = self.rev(p1), self.rev(p2)
2286 p1r, p2r = self.rev(p1), self.rev(p2)
2253
2287
2254 # full versions are inserted when the needed deltas
2288 # full versions are inserted when the needed deltas
2255 # become comparable to the uncompressed text
2289 # become comparable to the uncompressed text
2256 if rawtext is None:
2290 if rawtext is None:
2257 # need rawtext size, before changed by flag processors, which is
2291 # need rawtext size, before changed by flag processors, which is
2258 # the non-raw size. use revlog explicitly to avoid filelog's extra
2292 # the non-raw size. use revlog explicitly to avoid filelog's extra
2259 # logic that might remove metadata size.
2293 # logic that might remove metadata size.
2260 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2294 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2261 cachedelta[1])
2295 cachedelta[1])
2262 else:
2296 else:
2263 textlen = len(rawtext)
2297 textlen = len(rawtext)
2264
2298
2265 if deltacomputer is None:
2299 if deltacomputer is None:
2266 deltacomputer = _deltacomputer(self)
2300 deltacomputer = _deltacomputer(self)
2267
2301
2268 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2302 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2269
2303
2270 # no delta for flag processor revision (see "candelta" for why)
2304 # no delta for flag processor revision (see "candelta" for why)
2271 # not calling candelta since only one revision needs test, also to
2305 # not calling candelta since only one revision needs test, also to
2272 # avoid overhead fetching flags again.
2306 # avoid overhead fetching flags again.
2273 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2307 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2274 deltainfo = None
2308 deltainfo = None
2275 else:
2309 else:
2276 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2310 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2277
2311
2278 if deltainfo is not None:
2312 if deltainfo is not None:
2279 base = deltainfo.base
2313 base = deltainfo.base
2280 chainbase = deltainfo.chainbase
2314 chainbase = deltainfo.chainbase
2281 data = deltainfo.data
2315 data = deltainfo.data
2282 l = deltainfo.deltalen
2316 l = deltainfo.deltalen
2283 else:
2317 else:
2284 rawtext = deltacomputer.buildtext(revinfo, fh)
2318 rawtext = deltacomputer.buildtext(revinfo, fh)
2285 data = self.compress(rawtext)
2319 data = self.compress(rawtext)
2286 l = len(data[1]) + len(data[0])
2320 l = len(data[1]) + len(data[0])
2287 base = chainbase = curr
2321 base = chainbase = curr
2288
2322
2289 e = (offset_type(offset, flags), l, textlen,
2323 e = (offset_type(offset, flags), l, textlen,
2290 base, link, p1r, p2r, node)
2324 base, link, p1r, p2r, node)
2291 self.index.insert(-1, e)
2325 self.index.insert(-1, e)
2292 self.nodemap[node] = curr
2326 self.nodemap[node] = curr
2293
2327
2294 entry = self._io.packentry(e, self.node, self.version, curr)
2328 entry = self._io.packentry(e, self.node, self.version, curr)
2295 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2329 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2296
2330
2297 if alwayscache and rawtext is None:
2331 if alwayscache and rawtext is None:
2298 rawtext = deltacomputer._buildtext(revinfo, fh)
2332 rawtext = deltacomputer._buildtext(revinfo, fh)
2299
2333
2300 if type(rawtext) == bytes: # only accept immutable objects
2334 if type(rawtext) == bytes: # only accept immutable objects
2301 self._cache = (node, curr, rawtext)
2335 self._cache = (node, curr, rawtext)
2302 self._chainbasecache[curr] = chainbase
2336 self._chainbasecache[curr] = chainbase
2303 return node
2337 return node
2304
2338
2305 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2339 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2306 # Files opened in a+ mode have inconsistent behavior on various
2340 # Files opened in a+ mode have inconsistent behavior on various
2307 # platforms. Windows requires that a file positioning call be made
2341 # platforms. Windows requires that a file positioning call be made
2308 # when the file handle transitions between reads and writes. See
2342 # when the file handle transitions between reads and writes. See
2309 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2343 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2310 # platforms, Python or the platform itself can be buggy. Some versions
2344 # platforms, Python or the platform itself can be buggy. Some versions
2311 # of Solaris have been observed to not append at the end of the file
2345 # of Solaris have been observed to not append at the end of the file
2312 # if the file was seeked to before the end. See issue4943 for more.
2346 # if the file was seeked to before the end. See issue4943 for more.
2313 #
2347 #
2314 # We work around this issue by inserting a seek() before writing.
2348 # We work around this issue by inserting a seek() before writing.
2315 # Note: This is likely not necessary on Python 3.
2349 # Note: This is likely not necessary on Python 3.
2316 ifh.seek(0, os.SEEK_END)
2350 ifh.seek(0, os.SEEK_END)
2317 if dfh:
2351 if dfh:
2318 dfh.seek(0, os.SEEK_END)
2352 dfh.seek(0, os.SEEK_END)
2319
2353
2320 curr = len(self) - 1
2354 curr = len(self) - 1
2321 if not self._inline:
2355 if not self._inline:
2322 transaction.add(self.datafile, offset)
2356 transaction.add(self.datafile, offset)
2323 transaction.add(self.indexfile, curr * len(entry))
2357 transaction.add(self.indexfile, curr * len(entry))
2324 if data[0]:
2358 if data[0]:
2325 dfh.write(data[0])
2359 dfh.write(data[0])
2326 dfh.write(data[1])
2360 dfh.write(data[1])
2327 ifh.write(entry)
2361 ifh.write(entry)
2328 else:
2362 else:
2329 offset += curr * self._io.size
2363 offset += curr * self._io.size
2330 transaction.add(self.indexfile, offset, curr)
2364 transaction.add(self.indexfile, offset, curr)
2331 ifh.write(entry)
2365 ifh.write(entry)
2332 ifh.write(data[0])
2366 ifh.write(data[0])
2333 ifh.write(data[1])
2367 ifh.write(data[1])
2334 self._enforceinlinesize(transaction, ifh)
2368 self._enforceinlinesize(transaction, ifh)
2335
2369
2336 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2370 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2337 """
2371 """
2338 add a delta group
2372 add a delta group
2339
2373
2340 given a set of deltas, add them to the revision log. the
2374 given a set of deltas, add them to the revision log. the
2341 first delta is against its parent, which should be in our
2375 first delta is against its parent, which should be in our
2342 log, the rest are against the previous delta.
2376 log, the rest are against the previous delta.
2343
2377
2344 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 If ``addrevisioncb`` is defined, it will be called with arguments of
2345 this revlog and the node that was added.
2379 this revlog and the node that was added.
2346 """
2380 """
2347
2381
2348 nodes = []
2382 nodes = []
2349
2383
2350 r = len(self)
2384 r = len(self)
2351 end = 0
2385 end = 0
2352 if r:
2386 if r:
2353 end = self.end(r - 1)
2387 end = self.end(r - 1)
2354 ifh = self._indexfp("a+")
2388 ifh = self._indexfp("a+")
2355 isize = r * self._io.size
2389 isize = r * self._io.size
2356 if self._inline:
2390 if self._inline:
2357 transaction.add(self.indexfile, end + isize, r)
2391 transaction.add(self.indexfile, end + isize, r)
2358 dfh = None
2392 dfh = None
2359 else:
2393 else:
2360 transaction.add(self.indexfile, isize, r)
2394 transaction.add(self.indexfile, isize, r)
2361 transaction.add(self.datafile, end)
2395 transaction.add(self.datafile, end)
2362 dfh = self._datafp("a+")
2396 dfh = self._datafp("a+")
2363 def flush():
2397 def flush():
2364 if dfh:
2398 if dfh:
2365 dfh.flush()
2399 dfh.flush()
2366 ifh.flush()
2400 ifh.flush()
2367 try:
2401 try:
2368 deltacomputer = _deltacomputer(self)
2402 deltacomputer = _deltacomputer(self)
2369 # loop through our set of deltas
2403 # loop through our set of deltas
2370 for data in deltas:
2404 for data in deltas:
2371 node, p1, p2, linknode, deltabase, delta, flags = data
2405 node, p1, p2, linknode, deltabase, delta, flags = data
2372 link = linkmapper(linknode)
2406 link = linkmapper(linknode)
2373 flags = flags or REVIDX_DEFAULT_FLAGS
2407 flags = flags or REVIDX_DEFAULT_FLAGS
2374
2408
2375 nodes.append(node)
2409 nodes.append(node)
2376
2410
2377 if node in self.nodemap:
2411 if node in self.nodemap:
2378 # this can happen if two branches make the same change
2412 # this can happen if two branches make the same change
2379 continue
2413 continue
2380
2414
2381 for p in (p1, p2):
2415 for p in (p1, p2):
2382 if p not in self.nodemap:
2416 if p not in self.nodemap:
2383 raise LookupError(p, self.indexfile,
2417 raise LookupError(p, self.indexfile,
2384 _('unknown parent'))
2418 _('unknown parent'))
2385
2419
2386 if deltabase not in self.nodemap:
2420 if deltabase not in self.nodemap:
2387 raise LookupError(deltabase, self.indexfile,
2421 raise LookupError(deltabase, self.indexfile,
2388 _('unknown delta base'))
2422 _('unknown delta base'))
2389
2423
2390 baserev = self.rev(deltabase)
2424 baserev = self.rev(deltabase)
2391
2425
2392 if baserev != nullrev and self.iscensored(baserev):
2426 if baserev != nullrev and self.iscensored(baserev):
2393 # if base is censored, delta must be full replacement in a
2427 # if base is censored, delta must be full replacement in a
2394 # single patch operation
2428 # single patch operation
2395 hlen = struct.calcsize(">lll")
2429 hlen = struct.calcsize(">lll")
2396 oldlen = self.rawsize(baserev)
2430 oldlen = self.rawsize(baserev)
2397 newlen = len(delta) - hlen
2431 newlen = len(delta) - hlen
2398 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2432 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2399 raise error.CensoredBaseError(self.indexfile,
2433 raise error.CensoredBaseError(self.indexfile,
2400 self.node(baserev))
2434 self.node(baserev))
2401
2435
2402 if not flags and self._peek_iscensored(baserev, delta, flush):
2436 if not flags and self._peek_iscensored(baserev, delta, flush):
2403 flags |= REVIDX_ISCENSORED
2437 flags |= REVIDX_ISCENSORED
2404
2438
2405 # We assume consumers of addrevisioncb will want to retrieve
2439 # We assume consumers of addrevisioncb will want to retrieve
2406 # the added revision, which will require a call to
2440 # the added revision, which will require a call to
2407 # revision(). revision() will fast path if there is a cache
2441 # revision(). revision() will fast path if there is a cache
2408 # hit. So, we tell _addrevision() to always cache in this case.
2442 # hit. So, we tell _addrevision() to always cache in this case.
2409 # We're only using addgroup() in the context of changegroup
2443 # We're only using addgroup() in the context of changegroup
2410 # generation so the revision data can always be handled as raw
2444 # generation so the revision data can always be handled as raw
2411 # by the flagprocessor.
2445 # by the flagprocessor.
2412 self._addrevision(node, None, transaction, link,
2446 self._addrevision(node, None, transaction, link,
2413 p1, p2, flags, (baserev, delta),
2447 p1, p2, flags, (baserev, delta),
2414 ifh, dfh,
2448 ifh, dfh,
2415 alwayscache=bool(addrevisioncb),
2449 alwayscache=bool(addrevisioncb),
2416 deltacomputer=deltacomputer)
2450 deltacomputer=deltacomputer)
2417
2451
2418 if addrevisioncb:
2452 if addrevisioncb:
2419 addrevisioncb(self, node)
2453 addrevisioncb(self, node)
2420
2454
2421 if not dfh and not self._inline:
2455 if not dfh and not self._inline:
2422 # addrevision switched from inline to conventional
2456 # addrevision switched from inline to conventional
2423 # reopen the index
2457 # reopen the index
2424 ifh.close()
2458 ifh.close()
2425 dfh = self._datafp("a+")
2459 dfh = self._datafp("a+")
2426 ifh = self._indexfp("a+")
2460 ifh = self._indexfp("a+")
2427 finally:
2461 finally:
2428 if dfh:
2462 if dfh:
2429 dfh.close()
2463 dfh.close()
2430 ifh.close()
2464 ifh.close()
2431
2465
2432 return nodes
2466 return nodes
2433
2467
2434 def iscensored(self, rev):
2468 def iscensored(self, rev):
2435 """Check if a file revision is censored."""
2469 """Check if a file revision is censored."""
2436 if not self._censorable:
2470 if not self._censorable:
2437 return False
2471 return False
2438
2472
2439 return self.flags(rev) & REVIDX_ISCENSORED
2473 return self.flags(rev) & REVIDX_ISCENSORED
2440
2474
2441 def _peek_iscensored(self, baserev, delta, flush):
2475 def _peek_iscensored(self, baserev, delta, flush):
2442 """Quickly check if a delta produces a censored revision."""
2476 """Quickly check if a delta produces a censored revision."""
2443 if not self._censorable:
2477 if not self._censorable:
2444 return False
2478 return False
2445
2479
2446 # Fragile heuristic: unless new file meta keys are added alphabetically
2480 # Fragile heuristic: unless new file meta keys are added alphabetically
2447 # preceding "censored", all censored revisions are prefixed by
2481 # preceding "censored", all censored revisions are prefixed by
2448 # "\1\ncensored:". A delta producing such a censored revision must be a
2482 # "\1\ncensored:". A delta producing such a censored revision must be a
2449 # full-replacement delta, so we inspect the first and only patch in the
2483 # full-replacement delta, so we inspect the first and only patch in the
2450 # delta for this prefix.
2484 # delta for this prefix.
2451 hlen = struct.calcsize(">lll")
2485 hlen = struct.calcsize(">lll")
2452 if len(delta) <= hlen:
2486 if len(delta) <= hlen:
2453 return False
2487 return False
2454
2488
2455 oldlen = self.rawsize(baserev)
2489 oldlen = self.rawsize(baserev)
2456 newlen = len(delta) - hlen
2490 newlen = len(delta) - hlen
2457 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2491 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2458 return False
2492 return False
2459
2493
2460 add = "\1\ncensored:"
2494 add = "\1\ncensored:"
2461 addlen = len(add)
2495 addlen = len(add)
2462 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2496 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2463
2497
2464 def getstrippoint(self, minlink):
2498 def getstrippoint(self, minlink):
2465 """find the minimum rev that must be stripped to strip the linkrev
2499 """find the minimum rev that must be stripped to strip the linkrev
2466
2500
2467 Returns a tuple containing the minimum rev and a set of all revs that
2501 Returns a tuple containing the minimum rev and a set of all revs that
2468 have linkrevs that will be broken by this strip.
2502 have linkrevs that will be broken by this strip.
2469 """
2503 """
2470 brokenrevs = set()
2504 brokenrevs = set()
2471 strippoint = len(self)
2505 strippoint = len(self)
2472
2506
2473 heads = {}
2507 heads = {}
2474 futurelargelinkrevs = set()
2508 futurelargelinkrevs = set()
2475 for head in self.headrevs():
2509 for head in self.headrevs():
2476 headlinkrev = self.linkrev(head)
2510 headlinkrev = self.linkrev(head)
2477 heads[head] = headlinkrev
2511 heads[head] = headlinkrev
2478 if headlinkrev >= minlink:
2512 if headlinkrev >= minlink:
2479 futurelargelinkrevs.add(headlinkrev)
2513 futurelargelinkrevs.add(headlinkrev)
2480
2514
2481 # This algorithm involves walking down the rev graph, starting at the
2515 # This algorithm involves walking down the rev graph, starting at the
2482 # heads. Since the revs are topologically sorted according to linkrev,
2516 # heads. Since the revs are topologically sorted according to linkrev,
2483 # once all head linkrevs are below the minlink, we know there are
2517 # once all head linkrevs are below the minlink, we know there are
2484 # no more revs that could have a linkrev greater than minlink.
2518 # no more revs that could have a linkrev greater than minlink.
2485 # So we can stop walking.
2519 # So we can stop walking.
2486 while futurelargelinkrevs:
2520 while futurelargelinkrevs:
2487 strippoint -= 1
2521 strippoint -= 1
2488 linkrev = heads.pop(strippoint)
2522 linkrev = heads.pop(strippoint)
2489
2523
2490 if linkrev < minlink:
2524 if linkrev < minlink:
2491 brokenrevs.add(strippoint)
2525 brokenrevs.add(strippoint)
2492 else:
2526 else:
2493 futurelargelinkrevs.remove(linkrev)
2527 futurelargelinkrevs.remove(linkrev)
2494
2528
2495 for p in self.parentrevs(strippoint):
2529 for p in self.parentrevs(strippoint):
2496 if p != nullrev:
2530 if p != nullrev:
2497 plinkrev = self.linkrev(p)
2531 plinkrev = self.linkrev(p)
2498 heads[p] = plinkrev
2532 heads[p] = plinkrev
2499 if plinkrev >= minlink:
2533 if plinkrev >= minlink:
2500 futurelargelinkrevs.add(plinkrev)
2534 futurelargelinkrevs.add(plinkrev)
2501
2535
2502 return strippoint, brokenrevs
2536 return strippoint, brokenrevs
2503
2537
2504 def strip(self, minlink, transaction):
2538 def strip(self, minlink, transaction):
2505 """truncate the revlog on the first revision with a linkrev >= minlink
2539 """truncate the revlog on the first revision with a linkrev >= minlink
2506
2540
2507 This function is called when we're stripping revision minlink and
2541 This function is called when we're stripping revision minlink and
2508 its descendants from the repository.
2542 its descendants from the repository.
2509
2543
2510 We have to remove all revisions with linkrev >= minlink, because
2544 We have to remove all revisions with linkrev >= minlink, because
2511 the equivalent changelog revisions will be renumbered after the
2545 the equivalent changelog revisions will be renumbered after the
2512 strip.
2546 strip.
2513
2547
2514 So we truncate the revlog on the first of these revisions, and
2548 So we truncate the revlog on the first of these revisions, and
2515 trust that the caller has saved the revisions that shouldn't be
2549 trust that the caller has saved the revisions that shouldn't be
2516 removed and that it'll re-add them after this truncation.
2550 removed and that it'll re-add them after this truncation.
2517 """
2551 """
2518 if len(self) == 0:
2552 if len(self) == 0:
2519 return
2553 return
2520
2554
2521 rev, _ = self.getstrippoint(minlink)
2555 rev, _ = self.getstrippoint(minlink)
2522 if rev == len(self):
2556 if rev == len(self):
2523 return
2557 return
2524
2558
2525 # first truncate the files on disk
2559 # first truncate the files on disk
2526 end = self.start(rev)
2560 end = self.start(rev)
2527 if not self._inline:
2561 if not self._inline:
2528 transaction.add(self.datafile, end)
2562 transaction.add(self.datafile, end)
2529 end = rev * self._io.size
2563 end = rev * self._io.size
2530 else:
2564 else:
2531 end += rev * self._io.size
2565 end += rev * self._io.size
2532
2566
2533 transaction.add(self.indexfile, end)
2567 transaction.add(self.indexfile, end)
2534
2568
2535 # then reset internal state in memory to forget those revisions
2569 # then reset internal state in memory to forget those revisions
2536 self._cache = None
2570 self._cache = None
2537 self._chaininfocache = {}
2571 self._chaininfocache = {}
2538 self._chunkclear()
2572 self._chunkclear()
2539 for x in xrange(rev, len(self)):
2573 for x in xrange(rev, len(self)):
2540 del self.nodemap[self.node(x)]
2574 del self.nodemap[self.node(x)]
2541
2575
2542 del self.index[rev:-1]
2576 del self.index[rev:-1]
2543 self._nodepos = None
2577 self._nodepos = None
2544
2578
2545 def checksize(self):
2579 def checksize(self):
2546 expected = 0
2580 expected = 0
2547 if len(self):
2581 if len(self):
2548 expected = max(0, self.end(len(self) - 1))
2582 expected = max(0, self.end(len(self) - 1))
2549
2583
2550 try:
2584 try:
2551 with self._datafp() as f:
2585 with self._datafp() as f:
2552 f.seek(0, 2)
2586 f.seek(0, 2)
2553 actual = f.tell()
2587 actual = f.tell()
2554 dd = actual - expected
2588 dd = actual - expected
2555 except IOError as inst:
2589 except IOError as inst:
2556 if inst.errno != errno.ENOENT:
2590 if inst.errno != errno.ENOENT:
2557 raise
2591 raise
2558 dd = 0
2592 dd = 0
2559
2593
2560 try:
2594 try:
2561 f = self.opener(self.indexfile)
2595 f = self.opener(self.indexfile)
2562 f.seek(0, 2)
2596 f.seek(0, 2)
2563 actual = f.tell()
2597 actual = f.tell()
2564 f.close()
2598 f.close()
2565 s = self._io.size
2599 s = self._io.size
2566 i = max(0, actual // s)
2600 i = max(0, actual // s)
2567 di = actual - (i * s)
2601 di = actual - (i * s)
2568 if self._inline:
2602 if self._inline:
2569 databytes = 0
2603 databytes = 0
2570 for r in self:
2604 for r in self:
2571 databytes += max(0, self.length(r))
2605 databytes += max(0, self.length(r))
2572 dd = 0
2606 dd = 0
2573 di = actual - len(self) * s - databytes
2607 di = actual - len(self) * s - databytes
2574 except IOError as inst:
2608 except IOError as inst:
2575 if inst.errno != errno.ENOENT:
2609 if inst.errno != errno.ENOENT:
2576 raise
2610 raise
2577 di = 0
2611 di = 0
2578
2612
2579 return (dd, di)
2613 return (dd, di)
2580
2614
2581 def files(self):
2615 def files(self):
2582 res = [self.indexfile]
2616 res = [self.indexfile]
2583 if not self._inline:
2617 if not self._inline:
2584 res.append(self.datafile)
2618 res.append(self.datafile)
2585 return res
2619 return res
2586
2620
2587 DELTAREUSEALWAYS = 'always'
2621 DELTAREUSEALWAYS = 'always'
2588 DELTAREUSESAMEREVS = 'samerevs'
2622 DELTAREUSESAMEREVS = 'samerevs'
2589 DELTAREUSENEVER = 'never'
2623 DELTAREUSENEVER = 'never'
2590
2624
2591 DELTAREUSEFULLADD = 'fulladd'
2625 DELTAREUSEFULLADD = 'fulladd'
2592
2626
2593 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2627 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2594
2628
2595 def clone(self, tr, destrevlog, addrevisioncb=None,
2629 def clone(self, tr, destrevlog, addrevisioncb=None,
2596 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2630 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2597 """Copy this revlog to another, possibly with format changes.
2631 """Copy this revlog to another, possibly with format changes.
2598
2632
2599 The destination revlog will contain the same revisions and nodes.
2633 The destination revlog will contain the same revisions and nodes.
2600 However, it may not be bit-for-bit identical due to e.g. delta encoding
2634 However, it may not be bit-for-bit identical due to e.g. delta encoding
2601 differences.
2635 differences.
2602
2636
2603 The ``deltareuse`` argument control how deltas from the existing revlog
2637 The ``deltareuse`` argument control how deltas from the existing revlog
2604 are preserved in the destination revlog. The argument can have the
2638 are preserved in the destination revlog. The argument can have the
2605 following values:
2639 following values:
2606
2640
2607 DELTAREUSEALWAYS
2641 DELTAREUSEALWAYS
2608 Deltas will always be reused (if possible), even if the destination
2642 Deltas will always be reused (if possible), even if the destination
2609 revlog would not select the same revisions for the delta. This is the
2643 revlog would not select the same revisions for the delta. This is the
2610 fastest mode of operation.
2644 fastest mode of operation.
2611 DELTAREUSESAMEREVS
2645 DELTAREUSESAMEREVS
2612 Deltas will be reused if the destination revlog would pick the same
2646 Deltas will be reused if the destination revlog would pick the same
2613 revisions for the delta. This mode strikes a balance between speed
2647 revisions for the delta. This mode strikes a balance between speed
2614 and optimization.
2648 and optimization.
2615 DELTAREUSENEVER
2649 DELTAREUSENEVER
2616 Deltas will never be reused. This is the slowest mode of execution.
2650 Deltas will never be reused. This is the slowest mode of execution.
2617 This mode can be used to recompute deltas (e.g. if the diff/delta
2651 This mode can be used to recompute deltas (e.g. if the diff/delta
2618 algorithm changes).
2652 algorithm changes).
2619
2653
2620 Delta computation can be slow, so the choice of delta reuse policy can
2654 Delta computation can be slow, so the choice of delta reuse policy can
2621 significantly affect run time.
2655 significantly affect run time.
2622
2656
2623 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2657 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2624 two extremes. Deltas will be reused if they are appropriate. But if the
2658 two extremes. Deltas will be reused if they are appropriate. But if the
2625 delta could choose a better revision, it will do so. This means if you
2659 delta could choose a better revision, it will do so. This means if you
2626 are converting a non-generaldelta revlog to a generaldelta revlog,
2660 are converting a non-generaldelta revlog to a generaldelta revlog,
2627 deltas will be recomputed if the delta's parent isn't a parent of the
2661 deltas will be recomputed if the delta's parent isn't a parent of the
2628 revision.
2662 revision.
2629
2663
2630 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2664 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2631 controls whether to compute deltas against both parents for merges.
2665 controls whether to compute deltas against both parents for merges.
2632 By default, the current default is used.
2666 By default, the current default is used.
2633 """
2667 """
2634 if deltareuse not in self.DELTAREUSEALL:
2668 if deltareuse not in self.DELTAREUSEALL:
2635 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2669 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2636
2670
2637 if len(destrevlog):
2671 if len(destrevlog):
2638 raise ValueError(_('destination revlog is not empty'))
2672 raise ValueError(_('destination revlog is not empty'))
2639
2673
2640 if getattr(self, 'filteredrevs', None):
2674 if getattr(self, 'filteredrevs', None):
2641 raise ValueError(_('source revlog has filtered revisions'))
2675 raise ValueError(_('source revlog has filtered revisions'))
2642 if getattr(destrevlog, 'filteredrevs', None):
2676 if getattr(destrevlog, 'filteredrevs', None):
2643 raise ValueError(_('destination revlog has filtered revisions'))
2677 raise ValueError(_('destination revlog has filtered revisions'))
2644
2678
2645 # lazydeltabase controls whether to reuse a cached delta, if possible.
2679 # lazydeltabase controls whether to reuse a cached delta, if possible.
2646 oldlazydeltabase = destrevlog._lazydeltabase
2680 oldlazydeltabase = destrevlog._lazydeltabase
2647 oldamd = destrevlog._aggressivemergedeltas
2681 oldamd = destrevlog._aggressivemergedeltas
2648
2682
2649 try:
2683 try:
2650 if deltareuse == self.DELTAREUSEALWAYS:
2684 if deltareuse == self.DELTAREUSEALWAYS:
2651 destrevlog._lazydeltabase = True
2685 destrevlog._lazydeltabase = True
2652 elif deltareuse == self.DELTAREUSESAMEREVS:
2686 elif deltareuse == self.DELTAREUSESAMEREVS:
2653 destrevlog._lazydeltabase = False
2687 destrevlog._lazydeltabase = False
2654
2688
2655 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2689 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2656
2690
2657 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2691 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2658 self.DELTAREUSESAMEREVS)
2692 self.DELTAREUSESAMEREVS)
2659
2693
2660 deltacomputer = _deltacomputer(destrevlog)
2694 deltacomputer = _deltacomputer(destrevlog)
2661 index = self.index
2695 index = self.index
2662 for rev in self:
2696 for rev in self:
2663 entry = index[rev]
2697 entry = index[rev]
2664
2698
2665 # Some classes override linkrev to take filtered revs into
2699 # Some classes override linkrev to take filtered revs into
2666 # account. Use raw entry from index.
2700 # account. Use raw entry from index.
2667 flags = entry[0] & 0xffff
2701 flags = entry[0] & 0xffff
2668 linkrev = entry[4]
2702 linkrev = entry[4]
2669 p1 = index[entry[5]][7]
2703 p1 = index[entry[5]][7]
2670 p2 = index[entry[6]][7]
2704 p2 = index[entry[6]][7]
2671 node = entry[7]
2705 node = entry[7]
2672
2706
2673 # (Possibly) reuse the delta from the revlog if allowed and
2707 # (Possibly) reuse the delta from the revlog if allowed and
2674 # the revlog chunk is a delta.
2708 # the revlog chunk is a delta.
2675 cachedelta = None
2709 cachedelta = None
2676 rawtext = None
2710 rawtext = None
2677 if populatecachedelta:
2711 if populatecachedelta:
2678 dp = self.deltaparent(rev)
2712 dp = self.deltaparent(rev)
2679 if dp != nullrev:
2713 if dp != nullrev:
2680 cachedelta = (dp, bytes(self._chunk(rev)))
2714 cachedelta = (dp, bytes(self._chunk(rev)))
2681
2715
2682 if not cachedelta:
2716 if not cachedelta:
2683 rawtext = self.revision(rev, raw=True)
2717 rawtext = self.revision(rev, raw=True)
2684
2718
2685
2719
2686 if deltareuse == self.DELTAREUSEFULLADD:
2720 if deltareuse == self.DELTAREUSEFULLADD:
2687 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2721 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2688 cachedelta=cachedelta,
2722 cachedelta=cachedelta,
2689 node=node, flags=flags,
2723 node=node, flags=flags,
2690 deltacomputer=deltacomputer)
2724 deltacomputer=deltacomputer)
2691 else:
2725 else:
2692 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2726 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2693 checkambig=False)
2727 checkambig=False)
2694 dfh = None
2728 dfh = None
2695 if not destrevlog._inline:
2729 if not destrevlog._inline:
2696 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2730 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2697 try:
2731 try:
2698 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2732 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2699 p2, flags, cachedelta, ifh, dfh,
2733 p2, flags, cachedelta, ifh, dfh,
2700 deltacomputer=deltacomputer)
2734 deltacomputer=deltacomputer)
2701 finally:
2735 finally:
2702 if dfh:
2736 if dfh:
2703 dfh.close()
2737 dfh.close()
2704 ifh.close()
2738 ifh.close()
2705
2739
2706 if addrevisioncb:
2740 if addrevisioncb:
2707 addrevisioncb(self, rev, node)
2741 addrevisioncb(self, rev, node)
2708 finally:
2742 finally:
2709 destrevlog._lazydeltabase = oldlazydeltabase
2743 destrevlog._lazydeltabase = oldlazydeltabase
2710 destrevlog._aggressivemergedeltas = oldamd
2744 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now