##// END OF EJS Templates
revlog: add a doctest to _trimchunk
Boris Feld -
r38657:740f7d44 default
parent child Browse files
Show More
@@ -1,2656 +1,2691 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 class _testrevlog(object):
199 class _testrevlog(object):
200 """minimalist fake revlog to use in doctests"""
200 """minimalist fake revlog to use in doctests"""
201
201
202 def __init__(self, data, density=0.5, mingap=0):
202 def __init__(self, data, density=0.5, mingap=0):
203 """data is an list of revision payload boundaries"""
203 """data is an list of revision payload boundaries"""
204 self._data = data
204 self._data = data
205 self._srdensitythreshold = density
205 self._srdensitythreshold = density
206 self._srmingapsize = mingap
206 self._srmingapsize = mingap
207
207
208 def start(self, rev):
208 def start(self, rev):
209 if rev == 0:
209 if rev == 0:
210 return 0
210 return 0
211 return self._data[rev - 1]
211 return self._data[rev - 1]
212
212
213 def end(self, rev):
213 def end(self, rev):
214 return self._data[rev]
214 return self._data[rev]
215
215
216 def length(self, rev):
216 def length(self, rev):
217 return self.end(rev) - self.start(rev)
217 return self.end(rev) - self.start(rev)
218
218
219 def _trimchunk(revlog, revs, startidx, endidx=None):
219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 """returns revs[startidx:endidx] without empty trailing revs
220 """returns revs[startidx:endidx] without empty trailing revs
221
222 Doctest Setup
223 >>> revlog = _testrevlog([
224 ... 5, #0
225 ... 10, #1
226 ... 12, #2
227 ... 12, #3 (empty)
228 ... 17, #4
229 ... 21, #5
230 ... 21, #6 (empty)
231 ... ])
232
233 Contiguous cases:
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 [0, 1, 2, 3, 4, 5]
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 [0, 1, 2, 3, 4]
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 [0, 1, 2]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 [2]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 [3, 4, 5]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 [3, 4]
246
247 Discontiguous cases:
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 [1, 3, 5]
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 [1]
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 [3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 [3, 5]
221 """
256 """
222 length = revlog.length
257 length = revlog.length
223
258
224 if endidx is None:
259 if endidx is None:
225 endidx = len(revs)
260 endidx = len(revs)
226
261
227 # Trim empty revs at the end, but never the very first revision of a chain
262 # Trim empty revs at the end, but never the very first revision of a chain
228 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
229 endidx -= 1
264 endidx -= 1
230
265
231 return revs[startidx:endidx]
266 return revs[startidx:endidx]
232
267
233 def _segmentspan(revlog, revs):
268 def _segmentspan(revlog, revs):
234 """Get the byte span of a segment of revisions
269 """Get the byte span of a segment of revisions
235
270
236 revs is a sorted array of revision numbers
271 revs is a sorted array of revision numbers
237 """
272 """
238 if not revs:
273 if not revs:
239 return 0
274 return 0
240 return revlog.end(revs[-1]) - revlog.start(revs[0])
275 return revlog.end(revs[-1]) - revlog.start(revs[0])
241
276
242 def _slicechunk(revlog, revs):
277 def _slicechunk(revlog, revs):
243 """slice revs to reduce the amount of unrelated data to be read from disk.
278 """slice revs to reduce the amount of unrelated data to be read from disk.
244
279
245 ``revs`` is sliced into groups that should be read in one time.
280 ``revs`` is sliced into groups that should be read in one time.
246 Assume that revs are sorted.
281 Assume that revs are sorted.
247 """
282 """
248 start = revlog.start
283 start = revlog.start
249 length = revlog.length
284 length = revlog.length
250
285
251 if len(revs) <= 1:
286 if len(revs) <= 1:
252 yield revs
287 yield revs
253 return
288 return
254
289
255 readdata = deltachainspan = _segmentspan(revlog, revs)
290 readdata = deltachainspan = _segmentspan(revlog, revs)
256
291
257 if deltachainspan < revlog._srmingapsize:
292 if deltachainspan < revlog._srmingapsize:
258 yield revs
293 yield revs
259 return
294 return
260
295
261 chainpayload = sum(length(r) for r in revs)
296 chainpayload = sum(length(r) for r in revs)
262
297
263 if deltachainspan:
298 if deltachainspan:
264 density = chainpayload / float(deltachainspan)
299 density = chainpayload / float(deltachainspan)
265 else:
300 else:
266 density = 1.0
301 density = 1.0
267
302
268 if density >= revlog._srdensitythreshold:
303 if density >= revlog._srdensitythreshold:
269 yield revs
304 yield revs
270 return
305 return
271
306
272 # Store the gaps in a heap to have them sorted by decreasing size
307 # Store the gaps in a heap to have them sorted by decreasing size
273 gapsheap = []
308 gapsheap = []
274 heapq.heapify(gapsheap)
309 heapq.heapify(gapsheap)
275 prevend = None
310 prevend = None
276 for i, rev in enumerate(revs):
311 for i, rev in enumerate(revs):
277 revstart = start(rev)
312 revstart = start(rev)
278 revlen = length(rev)
313 revlen = length(rev)
279
314
280 # Skip empty revisions to form larger holes
315 # Skip empty revisions to form larger holes
281 if revlen == 0:
316 if revlen == 0:
282 continue
317 continue
283
318
284 if prevend is not None:
319 if prevend is not None:
285 gapsize = revstart - prevend
320 gapsize = revstart - prevend
286 # only consider holes that are large enough
321 # only consider holes that are large enough
287 if gapsize > revlog._srmingapsize:
322 if gapsize > revlog._srmingapsize:
288 heapq.heappush(gapsheap, (-gapsize, i))
323 heapq.heappush(gapsheap, (-gapsize, i))
289
324
290 prevend = revstart + revlen
325 prevend = revstart + revlen
291
326
292 # Collect the indices of the largest holes until the density is acceptable
327 # Collect the indices of the largest holes until the density is acceptable
293 indicesheap = []
328 indicesheap = []
294 heapq.heapify(indicesheap)
329 heapq.heapify(indicesheap)
295 while gapsheap and density < revlog._srdensitythreshold:
330 while gapsheap and density < revlog._srdensitythreshold:
296 oppgapsize, gapidx = heapq.heappop(gapsheap)
331 oppgapsize, gapidx = heapq.heappop(gapsheap)
297
332
298 heapq.heappush(indicesheap, gapidx)
333 heapq.heappush(indicesheap, gapidx)
299
334
300 # the gap sizes are stored as negatives to be sorted decreasingly
335 # the gap sizes are stored as negatives to be sorted decreasingly
301 # by the heap
336 # by the heap
302 readdata -= (-oppgapsize)
337 readdata -= (-oppgapsize)
303 if readdata > 0:
338 if readdata > 0:
304 density = chainpayload / float(readdata)
339 density = chainpayload / float(readdata)
305 else:
340 else:
306 density = 1.0
341 density = 1.0
307
342
308 # Cut the revs at collected indices
343 # Cut the revs at collected indices
309 previdx = 0
344 previdx = 0
310 while indicesheap:
345 while indicesheap:
311 idx = heapq.heappop(indicesheap)
346 idx = heapq.heappop(indicesheap)
312
347
313 chunk = _trimchunk(revlog, revs, previdx, idx)
348 chunk = _trimchunk(revlog, revs, previdx, idx)
314 if chunk:
349 if chunk:
315 yield chunk
350 yield chunk
316
351
317 previdx = idx
352 previdx = idx
318
353
319 chunk = _trimchunk(revlog, revs, previdx)
354 chunk = _trimchunk(revlog, revs, previdx)
320 if chunk:
355 if chunk:
321 yield chunk
356 yield chunk
322
357
323 @attr.s(slots=True, frozen=True)
358 @attr.s(slots=True, frozen=True)
324 class _deltainfo(object):
359 class _deltainfo(object):
325 distance = attr.ib()
360 distance = attr.ib()
326 deltalen = attr.ib()
361 deltalen = attr.ib()
327 data = attr.ib()
362 data = attr.ib()
328 base = attr.ib()
363 base = attr.ib()
329 chainbase = attr.ib()
364 chainbase = attr.ib()
330 chainlen = attr.ib()
365 chainlen = attr.ib()
331 compresseddeltalen = attr.ib()
366 compresseddeltalen = attr.ib()
332
367
333 class _deltacomputer(object):
368 class _deltacomputer(object):
334 def __init__(self, revlog):
369 def __init__(self, revlog):
335 self.revlog = revlog
370 self.revlog = revlog
336
371
337 def _getcandidaterevs(self, p1, p2, cachedelta):
372 def _getcandidaterevs(self, p1, p2, cachedelta):
338 """
373 """
339 Provides revisions that present an interest to be diffed against,
374 Provides revisions that present an interest to be diffed against,
340 grouped by level of easiness.
375 grouped by level of easiness.
341 """
376 """
342 revlog = self.revlog
377 revlog = self.revlog
343 gdelta = revlog._generaldelta
378 gdelta = revlog._generaldelta
344 curr = len(revlog)
379 curr = len(revlog)
345 prev = curr - 1
380 prev = curr - 1
346 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
381 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
347
382
348 # should we try to build a delta?
383 # should we try to build a delta?
349 if prev != nullrev and revlog.storedeltachains:
384 if prev != nullrev and revlog.storedeltachains:
350 tested = set()
385 tested = set()
351 # This condition is true most of the time when processing
386 # This condition is true most of the time when processing
352 # changegroup data into a generaldelta repo. The only time it
387 # changegroup data into a generaldelta repo. The only time it
353 # isn't true is if this is the first revision in a delta chain
388 # isn't true is if this is the first revision in a delta chain
354 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
389 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
355 if cachedelta and gdelta and revlog._lazydeltabase:
390 if cachedelta and gdelta and revlog._lazydeltabase:
356 # Assume what we received from the server is a good choice
391 # Assume what we received from the server is a good choice
357 # build delta will reuse the cache
392 # build delta will reuse the cache
358 yield (cachedelta[0],)
393 yield (cachedelta[0],)
359 tested.add(cachedelta[0])
394 tested.add(cachedelta[0])
360
395
361 if gdelta:
396 if gdelta:
362 # exclude already lazy tested base if any
397 # exclude already lazy tested base if any
363 parents = [p for p in (p1r, p2r)
398 parents = [p for p in (p1r, p2r)
364 if p != nullrev and p not in tested]
399 if p != nullrev and p not in tested]
365
400
366 if not revlog._aggressivemergedeltas and len(parents) == 2:
401 if not revlog._aggressivemergedeltas and len(parents) == 2:
367 parents.sort()
402 parents.sort()
368 # To minimize the chance of having to build a fulltext,
403 # To minimize the chance of having to build a fulltext,
369 # pick first whichever parent is closest to us (max rev)
404 # pick first whichever parent is closest to us (max rev)
370 yield (parents[1],)
405 yield (parents[1],)
371 # then the other one (min rev) if the first did not fit
406 # then the other one (min rev) if the first did not fit
372 yield (parents[0],)
407 yield (parents[0],)
373 tested.update(parents)
408 tested.update(parents)
374 elif len(parents) > 0:
409 elif len(parents) > 0:
375 # Test all parents (1 or 2), and keep the best candidate
410 # Test all parents (1 or 2), and keep the best candidate
376 yield parents
411 yield parents
377 tested.update(parents)
412 tested.update(parents)
378
413
379 if prev not in tested:
414 if prev not in tested:
380 # other approach failed try against prev to hopefully save us a
415 # other approach failed try against prev to hopefully save us a
381 # fulltext.
416 # fulltext.
382 yield (prev,)
417 yield (prev,)
383 tested.add(prev)
418 tested.add(prev)
384
419
385 def buildtext(self, revinfo, fh):
420 def buildtext(self, revinfo, fh):
386 """Builds a fulltext version of a revision
421 """Builds a fulltext version of a revision
387
422
388 revinfo: _revisioninfo instance that contains all needed info
423 revinfo: _revisioninfo instance that contains all needed info
389 fh: file handle to either the .i or the .d revlog file,
424 fh: file handle to either the .i or the .d revlog file,
390 depending on whether it is inlined or not
425 depending on whether it is inlined or not
391 """
426 """
392 btext = revinfo.btext
427 btext = revinfo.btext
393 if btext[0] is not None:
428 if btext[0] is not None:
394 return btext[0]
429 return btext[0]
395
430
396 revlog = self.revlog
431 revlog = self.revlog
397 cachedelta = revinfo.cachedelta
432 cachedelta = revinfo.cachedelta
398 flags = revinfo.flags
433 flags = revinfo.flags
399 node = revinfo.node
434 node = revinfo.node
400
435
401 baserev = cachedelta[0]
436 baserev = cachedelta[0]
402 delta = cachedelta[1]
437 delta = cachedelta[1]
403 # special case deltas which replace entire base; no need to decode
438 # special case deltas which replace entire base; no need to decode
404 # base revision. this neatly avoids censored bases, which throw when
439 # base revision. this neatly avoids censored bases, which throw when
405 # they're decoded.
440 # they're decoded.
406 hlen = struct.calcsize(">lll")
441 hlen = struct.calcsize(">lll")
407 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
442 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
408 len(delta) - hlen):
443 len(delta) - hlen):
409 btext[0] = delta[hlen:]
444 btext[0] = delta[hlen:]
410 else:
445 else:
411 # deltabase is rawtext before changed by flag processors, which is
446 # deltabase is rawtext before changed by flag processors, which is
412 # equivalent to non-raw text
447 # equivalent to non-raw text
413 basetext = revlog.revision(baserev, _df=fh, raw=False)
448 basetext = revlog.revision(baserev, _df=fh, raw=False)
414 btext[0] = mdiff.patch(basetext, delta)
449 btext[0] = mdiff.patch(basetext, delta)
415
450
416 try:
451 try:
417 res = revlog._processflags(btext[0], flags, 'read', raw=True)
452 res = revlog._processflags(btext[0], flags, 'read', raw=True)
418 btext[0], validatehash = res
453 btext[0], validatehash = res
419 if validatehash:
454 if validatehash:
420 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
455 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
421 if flags & REVIDX_ISCENSORED:
456 if flags & REVIDX_ISCENSORED:
422 raise RevlogError(_('node %s is not censored') % node)
457 raise RevlogError(_('node %s is not censored') % node)
423 except CensoredNodeError:
458 except CensoredNodeError:
424 # must pass the censored index flag to add censored revisions
459 # must pass the censored index flag to add censored revisions
425 if not flags & REVIDX_ISCENSORED:
460 if not flags & REVIDX_ISCENSORED:
426 raise
461 raise
427 return btext[0]
462 return btext[0]
428
463
429 def _builddeltadiff(self, base, revinfo, fh):
464 def _builddeltadiff(self, base, revinfo, fh):
430 revlog = self.revlog
465 revlog = self.revlog
431 t = self.buildtext(revinfo, fh)
466 t = self.buildtext(revinfo, fh)
432 if revlog.iscensored(base):
467 if revlog.iscensored(base):
433 # deltas based on a censored revision must replace the
468 # deltas based on a censored revision must replace the
434 # full content in one patch, so delta works everywhere
469 # full content in one patch, so delta works everywhere
435 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
470 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
436 delta = header + t
471 delta = header + t
437 else:
472 else:
438 ptext = revlog.revision(base, _df=fh, raw=True)
473 ptext = revlog.revision(base, _df=fh, raw=True)
439 delta = mdiff.textdiff(ptext, t)
474 delta = mdiff.textdiff(ptext, t)
440
475
441 return delta
476 return delta
442
477
443 def _builddeltainfo(self, revinfo, base, fh):
478 def _builddeltainfo(self, revinfo, base, fh):
444 # can we use the cached delta?
479 # can we use the cached delta?
445 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
480 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
446 delta = revinfo.cachedelta[1]
481 delta = revinfo.cachedelta[1]
447 else:
482 else:
448 delta = self._builddeltadiff(base, revinfo, fh)
483 delta = self._builddeltadiff(base, revinfo, fh)
449 revlog = self.revlog
484 revlog = self.revlog
450 header, data = revlog.compress(delta)
485 header, data = revlog.compress(delta)
451 deltalen = len(header) + len(data)
486 deltalen = len(header) + len(data)
452 chainbase = revlog.chainbase(base)
487 chainbase = revlog.chainbase(base)
453 offset = revlog.end(len(revlog) - 1)
488 offset = revlog.end(len(revlog) - 1)
454 dist = deltalen + offset - revlog.start(chainbase)
489 dist = deltalen + offset - revlog.start(chainbase)
455 if revlog._generaldelta:
490 if revlog._generaldelta:
456 deltabase = base
491 deltabase = base
457 else:
492 else:
458 deltabase = chainbase
493 deltabase = chainbase
459 chainlen, compresseddeltalen = revlog._chaininfo(base)
494 chainlen, compresseddeltalen = revlog._chaininfo(base)
460 chainlen += 1
495 chainlen += 1
461 compresseddeltalen += deltalen
496 compresseddeltalen += deltalen
462 return _deltainfo(dist, deltalen, (header, data), deltabase,
497 return _deltainfo(dist, deltalen, (header, data), deltabase,
463 chainbase, chainlen, compresseddeltalen)
498 chainbase, chainlen, compresseddeltalen)
464
499
465 def finddeltainfo(self, revinfo, fh):
500 def finddeltainfo(self, revinfo, fh):
466 """Find an acceptable delta against a candidate revision
501 """Find an acceptable delta against a candidate revision
467
502
468 revinfo: information about the revision (instance of _revisioninfo)
503 revinfo: information about the revision (instance of _revisioninfo)
469 fh: file handle to either the .i or the .d revlog file,
504 fh: file handle to either the .i or the .d revlog file,
470 depending on whether it is inlined or not
505 depending on whether it is inlined or not
471
506
472 Returns the first acceptable candidate revision, as ordered by
507 Returns the first acceptable candidate revision, as ordered by
473 _getcandidaterevs
508 _getcandidaterevs
474 """
509 """
475 cachedelta = revinfo.cachedelta
510 cachedelta = revinfo.cachedelta
476 p1 = revinfo.p1
511 p1 = revinfo.p1
477 p2 = revinfo.p2
512 p2 = revinfo.p2
478 revlog = self.revlog
513 revlog = self.revlog
479
514
480 deltainfo = None
515 deltainfo = None
481 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
516 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
482 nominateddeltas = []
517 nominateddeltas = []
483 for candidaterev in candidaterevs:
518 for candidaterev in candidaterevs:
484 # no delta for rawtext-changing revs (see "candelta" for why)
519 # no delta for rawtext-changing revs (see "candelta" for why)
485 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
520 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
486 continue
521 continue
487 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
522 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
488 if revlog._isgooddeltainfo(candidatedelta, revinfo):
523 if revlog._isgooddeltainfo(candidatedelta, revinfo):
489 nominateddeltas.append(candidatedelta)
524 nominateddeltas.append(candidatedelta)
490 if nominateddeltas:
525 if nominateddeltas:
491 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
526 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
492 break
527 break
493
528
494 return deltainfo
529 return deltainfo
495
530
496 @attr.s(slots=True, frozen=True)
531 @attr.s(slots=True, frozen=True)
497 class _revisioninfo(object):
532 class _revisioninfo(object):
498 """Information about a revision that allows building its fulltext
533 """Information about a revision that allows building its fulltext
499 node: expected hash of the revision
534 node: expected hash of the revision
500 p1, p2: parent revs of the revision
535 p1, p2: parent revs of the revision
501 btext: built text cache consisting of a one-element list
536 btext: built text cache consisting of a one-element list
502 cachedelta: (baserev, uncompressed_delta) or None
537 cachedelta: (baserev, uncompressed_delta) or None
503 flags: flags associated to the revision storage
538 flags: flags associated to the revision storage
504
539
505 One of btext[0] or cachedelta must be set.
540 One of btext[0] or cachedelta must be set.
506 """
541 """
507 node = attr.ib()
542 node = attr.ib()
508 p1 = attr.ib()
543 p1 = attr.ib()
509 p2 = attr.ib()
544 p2 = attr.ib()
510 btext = attr.ib()
545 btext = attr.ib()
511 textlen = attr.ib()
546 textlen = attr.ib()
512 cachedelta = attr.ib()
547 cachedelta = attr.ib()
513 flags = attr.ib()
548 flags = attr.ib()
514
549
515 # index v0:
550 # index v0:
516 # 4 bytes: offset
551 # 4 bytes: offset
517 # 4 bytes: compressed length
552 # 4 bytes: compressed length
518 # 4 bytes: base rev
553 # 4 bytes: base rev
519 # 4 bytes: link rev
554 # 4 bytes: link rev
520 # 20 bytes: parent 1 nodeid
555 # 20 bytes: parent 1 nodeid
521 # 20 bytes: parent 2 nodeid
556 # 20 bytes: parent 2 nodeid
522 # 20 bytes: nodeid
557 # 20 bytes: nodeid
523 indexformatv0 = struct.Struct(">4l20s20s20s")
558 indexformatv0 = struct.Struct(">4l20s20s20s")
524 indexformatv0_pack = indexformatv0.pack
559 indexformatv0_pack = indexformatv0.pack
525 indexformatv0_unpack = indexformatv0.unpack
560 indexformatv0_unpack = indexformatv0.unpack
526
561
527 class revlogoldio(object):
562 class revlogoldio(object):
528 def __init__(self):
563 def __init__(self):
529 self.size = indexformatv0.size
564 self.size = indexformatv0.size
530
565
531 def parseindex(self, data, inline):
566 def parseindex(self, data, inline):
532 s = self.size
567 s = self.size
533 index = []
568 index = []
534 nodemap = {nullid: nullrev}
569 nodemap = {nullid: nullrev}
535 n = off = 0
570 n = off = 0
536 l = len(data)
571 l = len(data)
537 while off + s <= l:
572 while off + s <= l:
538 cur = data[off:off + s]
573 cur = data[off:off + s]
539 off += s
574 off += s
540 e = indexformatv0_unpack(cur)
575 e = indexformatv0_unpack(cur)
541 # transform to revlogv1 format
576 # transform to revlogv1 format
542 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
577 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
543 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
578 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
544 index.append(e2)
579 index.append(e2)
545 nodemap[e[6]] = n
580 nodemap[e[6]] = n
546 n += 1
581 n += 1
547
582
548 # add the magic null revision at -1
583 # add the magic null revision at -1
549 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
584 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
550
585
551 return index, nodemap, None
586 return index, nodemap, None
552
587
553 def packentry(self, entry, node, version, rev):
588 def packentry(self, entry, node, version, rev):
554 if gettype(entry[0]):
589 if gettype(entry[0]):
555 raise RevlogError(_('index entry flags need revlog version 1'))
590 raise RevlogError(_('index entry flags need revlog version 1'))
556 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
591 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
557 node(entry[5]), node(entry[6]), entry[7])
592 node(entry[5]), node(entry[6]), entry[7])
558 return indexformatv0_pack(*e2)
593 return indexformatv0_pack(*e2)
559
594
560 # index ng:
595 # index ng:
561 # 6 bytes: offset
596 # 6 bytes: offset
562 # 2 bytes: flags
597 # 2 bytes: flags
563 # 4 bytes: compressed length
598 # 4 bytes: compressed length
564 # 4 bytes: uncompressed length
599 # 4 bytes: uncompressed length
565 # 4 bytes: base rev
600 # 4 bytes: base rev
566 # 4 bytes: link rev
601 # 4 bytes: link rev
567 # 4 bytes: parent 1 rev
602 # 4 bytes: parent 1 rev
568 # 4 bytes: parent 2 rev
603 # 4 bytes: parent 2 rev
569 # 32 bytes: nodeid
604 # 32 bytes: nodeid
570 indexformatng = struct.Struct(">Qiiiiii20s12x")
605 indexformatng = struct.Struct(">Qiiiiii20s12x")
571 indexformatng_pack = indexformatng.pack
606 indexformatng_pack = indexformatng.pack
572 versionformat = struct.Struct(">I")
607 versionformat = struct.Struct(">I")
573 versionformat_pack = versionformat.pack
608 versionformat_pack = versionformat.pack
574 versionformat_unpack = versionformat.unpack
609 versionformat_unpack = versionformat.unpack
575
610
576 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
611 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
577 # signed integer)
612 # signed integer)
578 _maxentrysize = 0x7fffffff
613 _maxentrysize = 0x7fffffff
579
614
580 class revlogio(object):
615 class revlogio(object):
581 def __init__(self):
616 def __init__(self):
582 self.size = indexformatng.size
617 self.size = indexformatng.size
583
618
584 def parseindex(self, data, inline):
619 def parseindex(self, data, inline):
585 # call the C implementation to parse the index data
620 # call the C implementation to parse the index data
586 index, cache = parsers.parse_index2(data, inline)
621 index, cache = parsers.parse_index2(data, inline)
587 return index, getattr(index, 'nodemap', None), cache
622 return index, getattr(index, 'nodemap', None), cache
588
623
589 def packentry(self, entry, node, version, rev):
624 def packentry(self, entry, node, version, rev):
590 p = indexformatng_pack(*entry)
625 p = indexformatng_pack(*entry)
591 if rev == 0:
626 if rev == 0:
592 p = versionformat_pack(version) + p[4:]
627 p = versionformat_pack(version) + p[4:]
593 return p
628 return p
594
629
595 class revlog(object):
630 class revlog(object):
596 """
631 """
597 the underlying revision storage object
632 the underlying revision storage object
598
633
599 A revlog consists of two parts, an index and the revision data.
634 A revlog consists of two parts, an index and the revision data.
600
635
601 The index is a file with a fixed record size containing
636 The index is a file with a fixed record size containing
602 information on each revision, including its nodeid (hash), the
637 information on each revision, including its nodeid (hash), the
603 nodeids of its parents, the position and offset of its data within
638 nodeids of its parents, the position and offset of its data within
604 the data file, and the revision it's based on. Finally, each entry
639 the data file, and the revision it's based on. Finally, each entry
605 contains a linkrev entry that can serve as a pointer to external
640 contains a linkrev entry that can serve as a pointer to external
606 data.
641 data.
607
642
608 The revision data itself is a linear collection of data chunks.
643 The revision data itself is a linear collection of data chunks.
609 Each chunk represents a revision and is usually represented as a
644 Each chunk represents a revision and is usually represented as a
610 delta against the previous chunk. To bound lookup time, runs of
645 delta against the previous chunk. To bound lookup time, runs of
611 deltas are limited to about 2 times the length of the original
646 deltas are limited to about 2 times the length of the original
612 version data. This makes retrieval of a version proportional to
647 version data. This makes retrieval of a version proportional to
613 its size, or O(1) relative to the number of revisions.
648 its size, or O(1) relative to the number of revisions.
614
649
615 Both pieces of the revlog are written to in an append-only
650 Both pieces of the revlog are written to in an append-only
616 fashion, which means we never need to rewrite a file to insert or
651 fashion, which means we never need to rewrite a file to insert or
617 remove data, and can use some simple techniques to avoid the need
652 remove data, and can use some simple techniques to avoid the need
618 for locking while reading.
653 for locking while reading.
619
654
620 If checkambig, indexfile is opened with checkambig=True at
655 If checkambig, indexfile is opened with checkambig=True at
621 writing, to avoid file stat ambiguity.
656 writing, to avoid file stat ambiguity.
622
657
623 If mmaplargeindex is True, and an mmapindexthreshold is set, the
658 If mmaplargeindex is True, and an mmapindexthreshold is set, the
624 index will be mmapped rather than read if it is larger than the
659 index will be mmapped rather than read if it is larger than the
625 configured threshold.
660 configured threshold.
626
661
627 If censorable is True, the revlog can have censored revisions.
662 If censorable is True, the revlog can have censored revisions.
628 """
663 """
629 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
664 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
630 mmaplargeindex=False, censorable=False):
665 mmaplargeindex=False, censorable=False):
631 """
666 """
632 create a revlog object
667 create a revlog object
633
668
634 opener is a function that abstracts the file opening operation
669 opener is a function that abstracts the file opening operation
635 and can be used to implement COW semantics or the like.
670 and can be used to implement COW semantics or the like.
636 """
671 """
637 self.indexfile = indexfile
672 self.indexfile = indexfile
638 self.datafile = datafile or (indexfile[:-2] + ".d")
673 self.datafile = datafile or (indexfile[:-2] + ".d")
639 self.opener = opener
674 self.opener = opener
640 # When True, indexfile is opened with checkambig=True at writing, to
675 # When True, indexfile is opened with checkambig=True at writing, to
641 # avoid file stat ambiguity.
676 # avoid file stat ambiguity.
642 self._checkambig = checkambig
677 self._checkambig = checkambig
643 self._censorable = censorable
678 self._censorable = censorable
644 # 3-tuple of (node, rev, text) for a raw revision.
679 # 3-tuple of (node, rev, text) for a raw revision.
645 self._cache = None
680 self._cache = None
646 # Maps rev to chain base rev.
681 # Maps rev to chain base rev.
647 self._chainbasecache = util.lrucachedict(100)
682 self._chainbasecache = util.lrucachedict(100)
648 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
683 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
649 self._chunkcache = (0, '')
684 self._chunkcache = (0, '')
650 # How much data to read and cache into the raw revlog data cache.
685 # How much data to read and cache into the raw revlog data cache.
651 self._chunkcachesize = 65536
686 self._chunkcachesize = 65536
652 self._maxchainlen = None
687 self._maxchainlen = None
653 self._aggressivemergedeltas = True
688 self._aggressivemergedeltas = True
654 self.index = []
689 self.index = []
655 # Mapping of partial identifiers to full nodes.
690 # Mapping of partial identifiers to full nodes.
656 self._pcache = {}
691 self._pcache = {}
657 # Mapping of revision integer to full node.
692 # Mapping of revision integer to full node.
658 self._nodecache = {nullid: nullrev}
693 self._nodecache = {nullid: nullrev}
659 self._nodepos = None
694 self._nodepos = None
660 self._compengine = 'zlib'
695 self._compengine = 'zlib'
661 self._maxdeltachainspan = -1
696 self._maxdeltachainspan = -1
662 self._withsparseread = False
697 self._withsparseread = False
663 self._srdensitythreshold = 0.50
698 self._srdensitythreshold = 0.50
664 self._srmingapsize = 262144
699 self._srmingapsize = 262144
665
700
666 mmapindexthreshold = None
701 mmapindexthreshold = None
667 v = REVLOG_DEFAULT_VERSION
702 v = REVLOG_DEFAULT_VERSION
668 opts = getattr(opener, 'options', None)
703 opts = getattr(opener, 'options', None)
669 if opts is not None:
704 if opts is not None:
670 if 'revlogv2' in opts:
705 if 'revlogv2' in opts:
671 # version 2 revlogs always use generaldelta.
706 # version 2 revlogs always use generaldelta.
672 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
707 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
673 elif 'revlogv1' in opts:
708 elif 'revlogv1' in opts:
674 if 'generaldelta' in opts:
709 if 'generaldelta' in opts:
675 v |= FLAG_GENERALDELTA
710 v |= FLAG_GENERALDELTA
676 else:
711 else:
677 v = 0
712 v = 0
678 if 'chunkcachesize' in opts:
713 if 'chunkcachesize' in opts:
679 self._chunkcachesize = opts['chunkcachesize']
714 self._chunkcachesize = opts['chunkcachesize']
680 if 'maxchainlen' in opts:
715 if 'maxchainlen' in opts:
681 self._maxchainlen = opts['maxchainlen']
716 self._maxchainlen = opts['maxchainlen']
682 if 'aggressivemergedeltas' in opts:
717 if 'aggressivemergedeltas' in opts:
683 self._aggressivemergedeltas = opts['aggressivemergedeltas']
718 self._aggressivemergedeltas = opts['aggressivemergedeltas']
684 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
719 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
685 if 'compengine' in opts:
720 if 'compengine' in opts:
686 self._compengine = opts['compengine']
721 self._compengine = opts['compengine']
687 if 'maxdeltachainspan' in opts:
722 if 'maxdeltachainspan' in opts:
688 self._maxdeltachainspan = opts['maxdeltachainspan']
723 self._maxdeltachainspan = opts['maxdeltachainspan']
689 if mmaplargeindex and 'mmapindexthreshold' in opts:
724 if mmaplargeindex and 'mmapindexthreshold' in opts:
690 mmapindexthreshold = opts['mmapindexthreshold']
725 mmapindexthreshold = opts['mmapindexthreshold']
691 self._withsparseread = bool(opts.get('with-sparse-read', False))
726 self._withsparseread = bool(opts.get('with-sparse-read', False))
692 if 'sparse-read-density-threshold' in opts:
727 if 'sparse-read-density-threshold' in opts:
693 self._srdensitythreshold = opts['sparse-read-density-threshold']
728 self._srdensitythreshold = opts['sparse-read-density-threshold']
694 if 'sparse-read-min-gap-size' in opts:
729 if 'sparse-read-min-gap-size' in opts:
695 self._srmingapsize = opts['sparse-read-min-gap-size']
730 self._srmingapsize = opts['sparse-read-min-gap-size']
696
731
697 if self._chunkcachesize <= 0:
732 if self._chunkcachesize <= 0:
698 raise RevlogError(_('revlog chunk cache size %r is not greater '
733 raise RevlogError(_('revlog chunk cache size %r is not greater '
699 'than 0') % self._chunkcachesize)
734 'than 0') % self._chunkcachesize)
700 elif self._chunkcachesize & (self._chunkcachesize - 1):
735 elif self._chunkcachesize & (self._chunkcachesize - 1):
701 raise RevlogError(_('revlog chunk cache size %r is not a power '
736 raise RevlogError(_('revlog chunk cache size %r is not a power '
702 'of 2') % self._chunkcachesize)
737 'of 2') % self._chunkcachesize)
703
738
704 indexdata = ''
739 indexdata = ''
705 self._initempty = True
740 self._initempty = True
706 try:
741 try:
707 with self._indexfp() as f:
742 with self._indexfp() as f:
708 if (mmapindexthreshold is not None and
743 if (mmapindexthreshold is not None and
709 self.opener.fstat(f).st_size >= mmapindexthreshold):
744 self.opener.fstat(f).st_size >= mmapindexthreshold):
710 indexdata = util.buffer(util.mmapread(f))
745 indexdata = util.buffer(util.mmapread(f))
711 else:
746 else:
712 indexdata = f.read()
747 indexdata = f.read()
713 if len(indexdata) > 0:
748 if len(indexdata) > 0:
714 v = versionformat_unpack(indexdata[:4])[0]
749 v = versionformat_unpack(indexdata[:4])[0]
715 self._initempty = False
750 self._initempty = False
716 except IOError as inst:
751 except IOError as inst:
717 if inst.errno != errno.ENOENT:
752 if inst.errno != errno.ENOENT:
718 raise
753 raise
719
754
720 self.version = v
755 self.version = v
721 self._inline = v & FLAG_INLINE_DATA
756 self._inline = v & FLAG_INLINE_DATA
722 self._generaldelta = v & FLAG_GENERALDELTA
757 self._generaldelta = v & FLAG_GENERALDELTA
723 flags = v & ~0xFFFF
758 flags = v & ~0xFFFF
724 fmt = v & 0xFFFF
759 fmt = v & 0xFFFF
725 if fmt == REVLOGV0:
760 if fmt == REVLOGV0:
726 if flags:
761 if flags:
727 raise RevlogError(_('unknown flags (%#04x) in version %d '
762 raise RevlogError(_('unknown flags (%#04x) in version %d '
728 'revlog %s') %
763 'revlog %s') %
729 (flags >> 16, fmt, self.indexfile))
764 (flags >> 16, fmt, self.indexfile))
730 elif fmt == REVLOGV1:
765 elif fmt == REVLOGV1:
731 if flags & ~REVLOGV1_FLAGS:
766 if flags & ~REVLOGV1_FLAGS:
732 raise RevlogError(_('unknown flags (%#04x) in version %d '
767 raise RevlogError(_('unknown flags (%#04x) in version %d '
733 'revlog %s') %
768 'revlog %s') %
734 (flags >> 16, fmt, self.indexfile))
769 (flags >> 16, fmt, self.indexfile))
735 elif fmt == REVLOGV2:
770 elif fmt == REVLOGV2:
736 if flags & ~REVLOGV2_FLAGS:
771 if flags & ~REVLOGV2_FLAGS:
737 raise RevlogError(_('unknown flags (%#04x) in version %d '
772 raise RevlogError(_('unknown flags (%#04x) in version %d '
738 'revlog %s') %
773 'revlog %s') %
739 (flags >> 16, fmt, self.indexfile))
774 (flags >> 16, fmt, self.indexfile))
740 else:
775 else:
741 raise RevlogError(_('unknown version (%d) in revlog %s') %
776 raise RevlogError(_('unknown version (%d) in revlog %s') %
742 (fmt, self.indexfile))
777 (fmt, self.indexfile))
743
778
744 self.storedeltachains = True
779 self.storedeltachains = True
745
780
746 self._io = revlogio()
781 self._io = revlogio()
747 if self.version == REVLOGV0:
782 if self.version == REVLOGV0:
748 self._io = revlogoldio()
783 self._io = revlogoldio()
749 try:
784 try:
750 d = self._io.parseindex(indexdata, self._inline)
785 d = self._io.parseindex(indexdata, self._inline)
751 except (ValueError, IndexError):
786 except (ValueError, IndexError):
752 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
787 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
753 self.index, nodemap, self._chunkcache = d
788 self.index, nodemap, self._chunkcache = d
754 if nodemap is not None:
789 if nodemap is not None:
755 self.nodemap = self._nodecache = nodemap
790 self.nodemap = self._nodecache = nodemap
756 if not self._chunkcache:
791 if not self._chunkcache:
757 self._chunkclear()
792 self._chunkclear()
758 # revnum -> (chain-length, sum-delta-length)
793 # revnum -> (chain-length, sum-delta-length)
759 self._chaininfocache = {}
794 self._chaininfocache = {}
760 # revlog header -> revlog compressor
795 # revlog header -> revlog compressor
761 self._decompressors = {}
796 self._decompressors = {}
762
797
763 @util.propertycache
798 @util.propertycache
764 def _compressor(self):
799 def _compressor(self):
765 return util.compengines[self._compengine].revlogcompressor()
800 return util.compengines[self._compengine].revlogcompressor()
766
801
767 def _indexfp(self, mode='r'):
802 def _indexfp(self, mode='r'):
768 """file object for the revlog's index file"""
803 """file object for the revlog's index file"""
769 args = {r'mode': mode}
804 args = {r'mode': mode}
770 if mode != 'r':
805 if mode != 'r':
771 args[r'checkambig'] = self._checkambig
806 args[r'checkambig'] = self._checkambig
772 if mode == 'w':
807 if mode == 'w':
773 args[r'atomictemp'] = True
808 args[r'atomictemp'] = True
774 return self.opener(self.indexfile, **args)
809 return self.opener(self.indexfile, **args)
775
810
776 def _datafp(self, mode='r'):
811 def _datafp(self, mode='r'):
777 """file object for the revlog's data file"""
812 """file object for the revlog's data file"""
778 return self.opener(self.datafile, mode=mode)
813 return self.opener(self.datafile, mode=mode)
779
814
780 @contextlib.contextmanager
815 @contextlib.contextmanager
781 def _datareadfp(self, existingfp=None):
816 def _datareadfp(self, existingfp=None):
782 """file object suitable to read data"""
817 """file object suitable to read data"""
783 if existingfp is not None:
818 if existingfp is not None:
784 yield existingfp
819 yield existingfp
785 else:
820 else:
786 if self._inline:
821 if self._inline:
787 func = self._indexfp
822 func = self._indexfp
788 else:
823 else:
789 func = self._datafp
824 func = self._datafp
790 with func() as fp:
825 with func() as fp:
791 yield fp
826 yield fp
792
827
793 def tip(self):
828 def tip(self):
794 return self.node(len(self.index) - 2)
829 return self.node(len(self.index) - 2)
795 def __contains__(self, rev):
830 def __contains__(self, rev):
796 return 0 <= rev < len(self)
831 return 0 <= rev < len(self)
797 def __len__(self):
832 def __len__(self):
798 return len(self.index) - 1
833 return len(self.index) - 1
799 def __iter__(self):
834 def __iter__(self):
800 return iter(xrange(len(self)))
835 return iter(xrange(len(self)))
801 def revs(self, start=0, stop=None):
836 def revs(self, start=0, stop=None):
802 """iterate over all rev in this revlog (from start to stop)"""
837 """iterate over all rev in this revlog (from start to stop)"""
803 step = 1
838 step = 1
804 if stop is not None:
839 if stop is not None:
805 if start > stop:
840 if start > stop:
806 step = -1
841 step = -1
807 stop += step
842 stop += step
808 else:
843 else:
809 stop = len(self)
844 stop = len(self)
810 return xrange(start, stop, step)
845 return xrange(start, stop, step)
811
846
812 @util.propertycache
847 @util.propertycache
813 def nodemap(self):
848 def nodemap(self):
814 self.rev(self.node(0))
849 self.rev(self.node(0))
815 return self._nodecache
850 return self._nodecache
816
851
817 def hasnode(self, node):
852 def hasnode(self, node):
818 try:
853 try:
819 self.rev(node)
854 self.rev(node)
820 return True
855 return True
821 except KeyError:
856 except KeyError:
822 return False
857 return False
823
858
824 def candelta(self, baserev, rev):
859 def candelta(self, baserev, rev):
825 """whether two revisions (baserev, rev) can be delta-ed or not"""
860 """whether two revisions (baserev, rev) can be delta-ed or not"""
826 # Disable delta if either rev requires a content-changing flag
861 # Disable delta if either rev requires a content-changing flag
827 # processor (ex. LFS). This is because such flag processor can alter
862 # processor (ex. LFS). This is because such flag processor can alter
828 # the rawtext content that the delta will be based on, and two clients
863 # the rawtext content that the delta will be based on, and two clients
829 # could have a same revlog node with different flags (i.e. different
864 # could have a same revlog node with different flags (i.e. different
830 # rawtext contents) and the delta could be incompatible.
865 # rawtext contents) and the delta could be incompatible.
831 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
866 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
832 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
867 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
833 return False
868 return False
834 return True
869 return True
835
870
836 def clearcaches(self):
871 def clearcaches(self):
837 self._cache = None
872 self._cache = None
838 self._chainbasecache.clear()
873 self._chainbasecache.clear()
839 self._chunkcache = (0, '')
874 self._chunkcache = (0, '')
840 self._pcache = {}
875 self._pcache = {}
841
876
842 try:
877 try:
843 self._nodecache.clearcaches()
878 self._nodecache.clearcaches()
844 except AttributeError:
879 except AttributeError:
845 self._nodecache = {nullid: nullrev}
880 self._nodecache = {nullid: nullrev}
846 self._nodepos = None
881 self._nodepos = None
847
882
848 def rev(self, node):
883 def rev(self, node):
849 try:
884 try:
850 return self._nodecache[node]
885 return self._nodecache[node]
851 except TypeError:
886 except TypeError:
852 raise
887 raise
853 except RevlogError:
888 except RevlogError:
854 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
855 if node == wdirid or node in wdirfilenodeids:
890 if node == wdirid or node in wdirfilenodeids:
856 raise error.WdirUnsupported
891 raise error.WdirUnsupported
857 raise LookupError(node, self.indexfile, _('no node'))
892 raise LookupError(node, self.indexfile, _('no node'))
858 except KeyError:
893 except KeyError:
859 # pure python cache lookup failed
894 # pure python cache lookup failed
860 n = self._nodecache
895 n = self._nodecache
861 i = self.index
896 i = self.index
862 p = self._nodepos
897 p = self._nodepos
863 if p is None:
898 if p is None:
864 p = len(i) - 2
899 p = len(i) - 2
865 else:
900 else:
866 assert p < len(i)
901 assert p < len(i)
867 for r in xrange(p, -1, -1):
902 for r in xrange(p, -1, -1):
868 v = i[r][7]
903 v = i[r][7]
869 n[v] = r
904 n[v] = r
870 if v == node:
905 if v == node:
871 self._nodepos = r - 1
906 self._nodepos = r - 1
872 return r
907 return r
873 if node == wdirid or node in wdirfilenodeids:
908 if node == wdirid or node in wdirfilenodeids:
874 raise error.WdirUnsupported
909 raise error.WdirUnsupported
875 raise LookupError(node, self.indexfile, _('no node'))
910 raise LookupError(node, self.indexfile, _('no node'))
876
911
877 # Accessors for index entries.
912 # Accessors for index entries.
878
913
879 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
914 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
880 # are flags.
915 # are flags.
881 def start(self, rev):
916 def start(self, rev):
882 return int(self.index[rev][0] >> 16)
917 return int(self.index[rev][0] >> 16)
883
918
884 def flags(self, rev):
919 def flags(self, rev):
885 return self.index[rev][0] & 0xFFFF
920 return self.index[rev][0] & 0xFFFF
886
921
887 def length(self, rev):
922 def length(self, rev):
888 return self.index[rev][1]
923 return self.index[rev][1]
889
924
890 def rawsize(self, rev):
925 def rawsize(self, rev):
891 """return the length of the uncompressed text for a given revision"""
926 """return the length of the uncompressed text for a given revision"""
892 l = self.index[rev][2]
927 l = self.index[rev][2]
893 if l >= 0:
928 if l >= 0:
894 return l
929 return l
895
930
896 t = self.revision(rev, raw=True)
931 t = self.revision(rev, raw=True)
897 return len(t)
932 return len(t)
898
933
899 def size(self, rev):
934 def size(self, rev):
900 """length of non-raw text (processed by a "read" flag processor)"""
935 """length of non-raw text (processed by a "read" flag processor)"""
901 # fast path: if no "read" flag processor could change the content,
936 # fast path: if no "read" flag processor could change the content,
902 # size is rawsize. note: ELLIPSIS is known to not change the content.
937 # size is rawsize. note: ELLIPSIS is known to not change the content.
903 flags = self.flags(rev)
938 flags = self.flags(rev)
904 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
939 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
905 return self.rawsize(rev)
940 return self.rawsize(rev)
906
941
907 return len(self.revision(rev, raw=False))
942 return len(self.revision(rev, raw=False))
908
943
909 def chainbase(self, rev):
944 def chainbase(self, rev):
910 base = self._chainbasecache.get(rev)
945 base = self._chainbasecache.get(rev)
911 if base is not None:
946 if base is not None:
912 return base
947 return base
913
948
914 index = self.index
949 index = self.index
915 iterrev = rev
950 iterrev = rev
916 base = index[iterrev][3]
951 base = index[iterrev][3]
917 while base != iterrev:
952 while base != iterrev:
918 iterrev = base
953 iterrev = base
919 base = index[iterrev][3]
954 base = index[iterrev][3]
920
955
921 self._chainbasecache[rev] = base
956 self._chainbasecache[rev] = base
922 return base
957 return base
923
958
924 def linkrev(self, rev):
959 def linkrev(self, rev):
925 return self.index[rev][4]
960 return self.index[rev][4]
926
961
927 def parentrevs(self, rev):
962 def parentrevs(self, rev):
928 try:
963 try:
929 entry = self.index[rev]
964 entry = self.index[rev]
930 except IndexError:
965 except IndexError:
931 if rev == wdirrev:
966 if rev == wdirrev:
932 raise error.WdirUnsupported
967 raise error.WdirUnsupported
933 raise
968 raise
934
969
935 return entry[5], entry[6]
970 return entry[5], entry[6]
936
971
937 def node(self, rev):
972 def node(self, rev):
938 try:
973 try:
939 return self.index[rev][7]
974 return self.index[rev][7]
940 except IndexError:
975 except IndexError:
941 if rev == wdirrev:
976 if rev == wdirrev:
942 raise error.WdirUnsupported
977 raise error.WdirUnsupported
943 raise
978 raise
944
979
945 # Derived from index values.
980 # Derived from index values.
946
981
947 def end(self, rev):
982 def end(self, rev):
948 return self.start(rev) + self.length(rev)
983 return self.start(rev) + self.length(rev)
949
984
950 def parents(self, node):
985 def parents(self, node):
951 i = self.index
986 i = self.index
952 d = i[self.rev(node)]
987 d = i[self.rev(node)]
953 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
988 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
954
989
955 def chainlen(self, rev):
990 def chainlen(self, rev):
956 return self._chaininfo(rev)[0]
991 return self._chaininfo(rev)[0]
957
992
958 def _chaininfo(self, rev):
993 def _chaininfo(self, rev):
959 chaininfocache = self._chaininfocache
994 chaininfocache = self._chaininfocache
960 if rev in chaininfocache:
995 if rev in chaininfocache:
961 return chaininfocache[rev]
996 return chaininfocache[rev]
962 index = self.index
997 index = self.index
963 generaldelta = self._generaldelta
998 generaldelta = self._generaldelta
964 iterrev = rev
999 iterrev = rev
965 e = index[iterrev]
1000 e = index[iterrev]
966 clen = 0
1001 clen = 0
967 compresseddeltalen = 0
1002 compresseddeltalen = 0
968 while iterrev != e[3]:
1003 while iterrev != e[3]:
969 clen += 1
1004 clen += 1
970 compresseddeltalen += e[1]
1005 compresseddeltalen += e[1]
971 if generaldelta:
1006 if generaldelta:
972 iterrev = e[3]
1007 iterrev = e[3]
973 else:
1008 else:
974 iterrev -= 1
1009 iterrev -= 1
975 if iterrev in chaininfocache:
1010 if iterrev in chaininfocache:
976 t = chaininfocache[iterrev]
1011 t = chaininfocache[iterrev]
977 clen += t[0]
1012 clen += t[0]
978 compresseddeltalen += t[1]
1013 compresseddeltalen += t[1]
979 break
1014 break
980 e = index[iterrev]
1015 e = index[iterrev]
981 else:
1016 else:
982 # Add text length of base since decompressing that also takes
1017 # Add text length of base since decompressing that also takes
983 # work. For cache hits the length is already included.
1018 # work. For cache hits the length is already included.
984 compresseddeltalen += e[1]
1019 compresseddeltalen += e[1]
985 r = (clen, compresseddeltalen)
1020 r = (clen, compresseddeltalen)
986 chaininfocache[rev] = r
1021 chaininfocache[rev] = r
987 return r
1022 return r
988
1023
989 def _deltachain(self, rev, stoprev=None):
1024 def _deltachain(self, rev, stoprev=None):
990 """Obtain the delta chain for a revision.
1025 """Obtain the delta chain for a revision.
991
1026
992 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 ``stoprev`` specifies a revision to stop at. If not specified, we
993 stop at the base of the chain.
1028 stop at the base of the chain.
994
1029
995 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
996 revs in ascending order and ``stopped`` is a bool indicating whether
1031 revs in ascending order and ``stopped`` is a bool indicating whether
997 ``stoprev`` was hit.
1032 ``stoprev`` was hit.
998 """
1033 """
999 # Try C implementation.
1034 # Try C implementation.
1000 try:
1035 try:
1001 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 return self.index.deltachain(rev, stoprev, self._generaldelta)
1002 except AttributeError:
1037 except AttributeError:
1003 pass
1038 pass
1004
1039
1005 chain = []
1040 chain = []
1006
1041
1007 # Alias to prevent attribute lookup in tight loop.
1042 # Alias to prevent attribute lookup in tight loop.
1008 index = self.index
1043 index = self.index
1009 generaldelta = self._generaldelta
1044 generaldelta = self._generaldelta
1010
1045
1011 iterrev = rev
1046 iterrev = rev
1012 e = index[iterrev]
1047 e = index[iterrev]
1013 while iterrev != e[3] and iterrev != stoprev:
1048 while iterrev != e[3] and iterrev != stoprev:
1014 chain.append(iterrev)
1049 chain.append(iterrev)
1015 if generaldelta:
1050 if generaldelta:
1016 iterrev = e[3]
1051 iterrev = e[3]
1017 else:
1052 else:
1018 iterrev -= 1
1053 iterrev -= 1
1019 e = index[iterrev]
1054 e = index[iterrev]
1020
1055
1021 if iterrev == stoprev:
1056 if iterrev == stoprev:
1022 stopped = True
1057 stopped = True
1023 else:
1058 else:
1024 chain.append(iterrev)
1059 chain.append(iterrev)
1025 stopped = False
1060 stopped = False
1026
1061
1027 chain.reverse()
1062 chain.reverse()
1028 return chain, stopped
1063 return chain, stopped
1029
1064
1030 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 def ancestors(self, revs, stoprev=0, inclusive=False):
1031 """Generate the ancestors of 'revs' in reverse topological order.
1066 """Generate the ancestors of 'revs' in reverse topological order.
1032 Does not generate revs lower than stoprev.
1067 Does not generate revs lower than stoprev.
1033
1068
1034 See the documentation for ancestor.lazyancestors for more details."""
1069 See the documentation for ancestor.lazyancestors for more details."""
1035
1070
1036 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1071 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1037 inclusive=inclusive)
1072 inclusive=inclusive)
1038
1073
1039 def descendants(self, revs):
1074 def descendants(self, revs):
1040 """Generate the descendants of 'revs' in revision order.
1075 """Generate the descendants of 'revs' in revision order.
1041
1076
1042 Yield a sequence of revision numbers starting with a child of
1077 Yield a sequence of revision numbers starting with a child of
1043 some rev in revs, i.e., each revision is *not* considered a
1078 some rev in revs, i.e., each revision is *not* considered a
1044 descendant of itself. Results are ordered by revision number (a
1079 descendant of itself. Results are ordered by revision number (a
1045 topological sort)."""
1080 topological sort)."""
1046 first = min(revs)
1081 first = min(revs)
1047 if first == nullrev:
1082 if first == nullrev:
1048 for i in self:
1083 for i in self:
1049 yield i
1084 yield i
1050 return
1085 return
1051
1086
1052 seen = set(revs)
1087 seen = set(revs)
1053 for i in self.revs(start=first + 1):
1088 for i in self.revs(start=first + 1):
1054 for x in self.parentrevs(i):
1089 for x in self.parentrevs(i):
1055 if x != nullrev and x in seen:
1090 if x != nullrev and x in seen:
1056 seen.add(i)
1091 seen.add(i)
1057 yield i
1092 yield i
1058 break
1093 break
1059
1094
1060 def findcommonmissing(self, common=None, heads=None):
1095 def findcommonmissing(self, common=None, heads=None):
1061 """Return a tuple of the ancestors of common and the ancestors of heads
1096 """Return a tuple of the ancestors of common and the ancestors of heads
1062 that are not ancestors of common. In revset terminology, we return the
1097 that are not ancestors of common. In revset terminology, we return the
1063 tuple:
1098 tuple:
1064
1099
1065 ::common, (::heads) - (::common)
1100 ::common, (::heads) - (::common)
1066
1101
1067 The list is sorted by revision number, meaning it is
1102 The list is sorted by revision number, meaning it is
1068 topologically sorted.
1103 topologically sorted.
1069
1104
1070 'heads' and 'common' are both lists of node IDs. If heads is
1105 'heads' and 'common' are both lists of node IDs. If heads is
1071 not supplied, uses all of the revlog's heads. If common is not
1106 not supplied, uses all of the revlog's heads. If common is not
1072 supplied, uses nullid."""
1107 supplied, uses nullid."""
1073 if common is None:
1108 if common is None:
1074 common = [nullid]
1109 common = [nullid]
1075 if heads is None:
1110 if heads is None:
1076 heads = self.heads()
1111 heads = self.heads()
1077
1112
1078 common = [self.rev(n) for n in common]
1113 common = [self.rev(n) for n in common]
1079 heads = [self.rev(n) for n in heads]
1114 heads = [self.rev(n) for n in heads]
1080
1115
1081 # we want the ancestors, but inclusive
1116 # we want the ancestors, but inclusive
1082 class lazyset(object):
1117 class lazyset(object):
1083 def __init__(self, lazyvalues):
1118 def __init__(self, lazyvalues):
1084 self.addedvalues = set()
1119 self.addedvalues = set()
1085 self.lazyvalues = lazyvalues
1120 self.lazyvalues = lazyvalues
1086
1121
1087 def __contains__(self, value):
1122 def __contains__(self, value):
1088 return value in self.addedvalues or value in self.lazyvalues
1123 return value in self.addedvalues or value in self.lazyvalues
1089
1124
1090 def __iter__(self):
1125 def __iter__(self):
1091 added = self.addedvalues
1126 added = self.addedvalues
1092 for r in added:
1127 for r in added:
1093 yield r
1128 yield r
1094 for r in self.lazyvalues:
1129 for r in self.lazyvalues:
1095 if not r in added:
1130 if not r in added:
1096 yield r
1131 yield r
1097
1132
1098 def add(self, value):
1133 def add(self, value):
1099 self.addedvalues.add(value)
1134 self.addedvalues.add(value)
1100
1135
1101 def update(self, values):
1136 def update(self, values):
1102 self.addedvalues.update(values)
1137 self.addedvalues.update(values)
1103
1138
1104 has = lazyset(self.ancestors(common))
1139 has = lazyset(self.ancestors(common))
1105 has.add(nullrev)
1140 has.add(nullrev)
1106 has.update(common)
1141 has.update(common)
1107
1142
1108 # take all ancestors from heads that aren't in has
1143 # take all ancestors from heads that aren't in has
1109 missing = set()
1144 missing = set()
1110 visit = collections.deque(r for r in heads if r not in has)
1145 visit = collections.deque(r for r in heads if r not in has)
1111 while visit:
1146 while visit:
1112 r = visit.popleft()
1147 r = visit.popleft()
1113 if r in missing:
1148 if r in missing:
1114 continue
1149 continue
1115 else:
1150 else:
1116 missing.add(r)
1151 missing.add(r)
1117 for p in self.parentrevs(r):
1152 for p in self.parentrevs(r):
1118 if p not in has:
1153 if p not in has:
1119 visit.append(p)
1154 visit.append(p)
1120 missing = list(missing)
1155 missing = list(missing)
1121 missing.sort()
1156 missing.sort()
1122 return has, [self.node(miss) for miss in missing]
1157 return has, [self.node(miss) for miss in missing]
1123
1158
1124 def incrementalmissingrevs(self, common=None):
1159 def incrementalmissingrevs(self, common=None):
1125 """Return an object that can be used to incrementally compute the
1160 """Return an object that can be used to incrementally compute the
1126 revision numbers of the ancestors of arbitrary sets that are not
1161 revision numbers of the ancestors of arbitrary sets that are not
1127 ancestors of common. This is an ancestor.incrementalmissingancestors
1162 ancestors of common. This is an ancestor.incrementalmissingancestors
1128 object.
1163 object.
1129
1164
1130 'common' is a list of revision numbers. If common is not supplied, uses
1165 'common' is a list of revision numbers. If common is not supplied, uses
1131 nullrev.
1166 nullrev.
1132 """
1167 """
1133 if common is None:
1168 if common is None:
1134 common = [nullrev]
1169 common = [nullrev]
1135
1170
1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137
1172
1138 def findmissingrevs(self, common=None, heads=None):
1173 def findmissingrevs(self, common=None, heads=None):
1139 """Return the revision numbers of the ancestors of heads that
1174 """Return the revision numbers of the ancestors of heads that
1140 are not ancestors of common.
1175 are not ancestors of common.
1141
1176
1142 More specifically, return a list of revision numbers corresponding to
1177 More specifically, return a list of revision numbers corresponding to
1143 nodes N such that every N satisfies the following constraints:
1178 nodes N such that every N satisfies the following constraints:
1144
1179
1145 1. N is an ancestor of some node in 'heads'
1180 1. N is an ancestor of some node in 'heads'
1146 2. N is not an ancestor of any node in 'common'
1181 2. N is not an ancestor of any node in 'common'
1147
1182
1148 The list is sorted by revision number, meaning it is
1183 The list is sorted by revision number, meaning it is
1149 topologically sorted.
1184 topologically sorted.
1150
1185
1151 'heads' and 'common' are both lists of revision numbers. If heads is
1186 'heads' and 'common' are both lists of revision numbers. If heads is
1152 not supplied, uses all of the revlog's heads. If common is not
1187 not supplied, uses all of the revlog's heads. If common is not
1153 supplied, uses nullid."""
1188 supplied, uses nullid."""
1154 if common is None:
1189 if common is None:
1155 common = [nullrev]
1190 common = [nullrev]
1156 if heads is None:
1191 if heads is None:
1157 heads = self.headrevs()
1192 heads = self.headrevs()
1158
1193
1159 inc = self.incrementalmissingrevs(common=common)
1194 inc = self.incrementalmissingrevs(common=common)
1160 return inc.missingancestors(heads)
1195 return inc.missingancestors(heads)
1161
1196
1162 def findmissing(self, common=None, heads=None):
1197 def findmissing(self, common=None, heads=None):
1163 """Return the ancestors of heads that are not ancestors of common.
1198 """Return the ancestors of heads that are not ancestors of common.
1164
1199
1165 More specifically, return a list of nodes N such that every N
1200 More specifically, return a list of nodes N such that every N
1166 satisfies the following constraints:
1201 satisfies the following constraints:
1167
1202
1168 1. N is an ancestor of some node in 'heads'
1203 1. N is an ancestor of some node in 'heads'
1169 2. N is not an ancestor of any node in 'common'
1204 2. N is not an ancestor of any node in 'common'
1170
1205
1171 The list is sorted by revision number, meaning it is
1206 The list is sorted by revision number, meaning it is
1172 topologically sorted.
1207 topologically sorted.
1173
1208
1174 'heads' and 'common' are both lists of node IDs. If heads is
1209 'heads' and 'common' are both lists of node IDs. If heads is
1175 not supplied, uses all of the revlog's heads. If common is not
1210 not supplied, uses all of the revlog's heads. If common is not
1176 supplied, uses nullid."""
1211 supplied, uses nullid."""
1177 if common is None:
1212 if common is None:
1178 common = [nullid]
1213 common = [nullid]
1179 if heads is None:
1214 if heads is None:
1180 heads = self.heads()
1215 heads = self.heads()
1181
1216
1182 common = [self.rev(n) for n in common]
1217 common = [self.rev(n) for n in common]
1183 heads = [self.rev(n) for n in heads]
1218 heads = [self.rev(n) for n in heads]
1184
1219
1185 inc = self.incrementalmissingrevs(common=common)
1220 inc = self.incrementalmissingrevs(common=common)
1186 return [self.node(r) for r in inc.missingancestors(heads)]
1221 return [self.node(r) for r in inc.missingancestors(heads)]
1187
1222
1188 def nodesbetween(self, roots=None, heads=None):
1223 def nodesbetween(self, roots=None, heads=None):
1189 """Return a topological path from 'roots' to 'heads'.
1224 """Return a topological path from 'roots' to 'heads'.
1190
1225
1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 topologically sorted list of all nodes N that satisfy both of
1227 topologically sorted list of all nodes N that satisfy both of
1193 these constraints:
1228 these constraints:
1194
1229
1195 1. N is a descendant of some node in 'roots'
1230 1. N is a descendant of some node in 'roots'
1196 2. N is an ancestor of some node in 'heads'
1231 2. N is an ancestor of some node in 'heads'
1197
1232
1198 Every node is considered to be both a descendant and an ancestor
1233 Every node is considered to be both a descendant and an ancestor
1199 of itself, so every reachable node in 'roots' and 'heads' will be
1234 of itself, so every reachable node in 'roots' and 'heads' will be
1200 included in 'nodes'.
1235 included in 'nodes'.
1201
1236
1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1237 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1238 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1239 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205
1240
1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 unspecified, uses nullid as the only root. If 'heads' is
1242 unspecified, uses nullid as the only root. If 'heads' is
1208 unspecified, uses list of all of the revlog's heads."""
1243 unspecified, uses list of all of the revlog's heads."""
1209 nonodes = ([], [], [])
1244 nonodes = ([], [], [])
1210 if roots is not None:
1245 if roots is not None:
1211 roots = list(roots)
1246 roots = list(roots)
1212 if not roots:
1247 if not roots:
1213 return nonodes
1248 return nonodes
1214 lowestrev = min([self.rev(n) for n in roots])
1249 lowestrev = min([self.rev(n) for n in roots])
1215 else:
1250 else:
1216 roots = [nullid] # Everybody's a descendant of nullid
1251 roots = [nullid] # Everybody's a descendant of nullid
1217 lowestrev = nullrev
1252 lowestrev = nullrev
1218 if (lowestrev == nullrev) and (heads is None):
1253 if (lowestrev == nullrev) and (heads is None):
1219 # We want _all_ the nodes!
1254 # We want _all_ the nodes!
1220 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1255 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1221 if heads is None:
1256 if heads is None:
1222 # All nodes are ancestors, so the latest ancestor is the last
1257 # All nodes are ancestors, so the latest ancestor is the last
1223 # node.
1258 # node.
1224 highestrev = len(self) - 1
1259 highestrev = len(self) - 1
1225 # Set ancestors to None to signal that every node is an ancestor.
1260 # Set ancestors to None to signal that every node is an ancestor.
1226 ancestors = None
1261 ancestors = None
1227 # Set heads to an empty dictionary for later discovery of heads
1262 # Set heads to an empty dictionary for later discovery of heads
1228 heads = {}
1263 heads = {}
1229 else:
1264 else:
1230 heads = list(heads)
1265 heads = list(heads)
1231 if not heads:
1266 if not heads:
1232 return nonodes
1267 return nonodes
1233 ancestors = set()
1268 ancestors = set()
1234 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 # Turn heads into a dictionary so we can remove 'fake' heads.
1235 # Also, later we will be using it to filter out the heads we can't
1270 # Also, later we will be using it to filter out the heads we can't
1236 # find from roots.
1271 # find from roots.
1237 heads = dict.fromkeys(heads, False)
1272 heads = dict.fromkeys(heads, False)
1238 # Start at the top and keep marking parents until we're done.
1273 # Start at the top and keep marking parents until we're done.
1239 nodestotag = set(heads)
1274 nodestotag = set(heads)
1240 # Remember where the top was so we can use it as a limit later.
1275 # Remember where the top was so we can use it as a limit later.
1241 highestrev = max([self.rev(n) for n in nodestotag])
1276 highestrev = max([self.rev(n) for n in nodestotag])
1242 while nodestotag:
1277 while nodestotag:
1243 # grab a node to tag
1278 # grab a node to tag
1244 n = nodestotag.pop()
1279 n = nodestotag.pop()
1245 # Never tag nullid
1280 # Never tag nullid
1246 if n == nullid:
1281 if n == nullid:
1247 continue
1282 continue
1248 # A node's revision number represents its place in a
1283 # A node's revision number represents its place in a
1249 # topologically sorted list of nodes.
1284 # topologically sorted list of nodes.
1250 r = self.rev(n)
1285 r = self.rev(n)
1251 if r >= lowestrev:
1286 if r >= lowestrev:
1252 if n not in ancestors:
1287 if n not in ancestors:
1253 # If we are possibly a descendant of one of the roots
1288 # If we are possibly a descendant of one of the roots
1254 # and we haven't already been marked as an ancestor
1289 # and we haven't already been marked as an ancestor
1255 ancestors.add(n) # Mark as ancestor
1290 ancestors.add(n) # Mark as ancestor
1256 # Add non-nullid parents to list of nodes to tag.
1291 # Add non-nullid parents to list of nodes to tag.
1257 nodestotag.update([p for p in self.parents(n) if
1292 nodestotag.update([p for p in self.parents(n) if
1258 p != nullid])
1293 p != nullid])
1259 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1260 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1261 # any other heads.
1296 # any other heads.
1262 heads.pop(n)
1297 heads.pop(n)
1263 if not ancestors:
1298 if not ancestors:
1264 return nonodes
1299 return nonodes
1265 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1266 # roots that are not ancestors.
1301 # roots that are not ancestors.
1267
1302
1268 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1269 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1270 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1271 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1272
1307
1273 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1274 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1275 # Recompute the lowest revision
1310 # Recompute the lowest revision
1276 if roots:
1311 if roots:
1277 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1278 else:
1313 else:
1279 # No more roots? Return empty list
1314 # No more roots? Return empty list
1280 return nonodes
1315 return nonodes
1281 else:
1316 else:
1282 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1283 # any other roots.
1318 # any other roots.
1284 lowestrev = nullrev
1319 lowestrev = nullrev
1285 roots = [nullid]
1320 roots = [nullid]
1286 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1287 descendants = set(roots)
1322 descendants = set(roots)
1288 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1289 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1290 roots = descendants.copy()
1325 roots = descendants.copy()
1291 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1292 orderedout = []
1327 orderedout = []
1293 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1294 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1295 # they're descendants.
1330 # they're descendants.
1296 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1297 n = self.node(r)
1332 n = self.node(r)
1298 isdescendant = False
1333 isdescendant = False
1299 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1300 isdescendant = True
1335 isdescendant = True
1301 elif n in descendants:
1336 elif n in descendants:
1302 # n is already a descendant
1337 # n is already a descendant
1303 isdescendant = True
1338 isdescendant = True
1304 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1305 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1306 if n in roots:
1341 if n in roots:
1307 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1308 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1309 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1310 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1311 roots.remove(n)
1346 roots.remove(n)
1312 else:
1347 else:
1313 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1314 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1315 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1316 # up there, remember?)
1351 # up there, remember?)
1317 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1318 descendants.add(n)
1353 descendants.add(n)
1319 isdescendant = True
1354 isdescendant = True
1320 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1321 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1322 orderedout.append(n)
1357 orderedout.append(n)
1323 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1324 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1325 # from roots.
1360 # from roots.
1326 # Mark this head as having been reached
1361 # Mark this head as having been reached
1327 heads[n] = True
1362 heads[n] = True
1328 elif ancestors is None:
1363 elif ancestors is None:
1329 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1330 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1331 # will eventually remove it.
1366 # will eventually remove it.
1332 heads[n] = True
1367 heads[n] = True
1333 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1334 for p in self.parents(n):
1369 for p in self.parents(n):
1335 heads.pop(p, None)
1370 heads.pop(p, None)
1336 heads = [head for head, flag in heads.iteritems() if flag]
1371 heads = [head for head, flag in heads.iteritems() if flag]
1337 roots = list(roots)
1372 roots = list(roots)
1338 assert orderedout
1373 assert orderedout
1339 assert roots
1374 assert roots
1340 assert heads
1375 assert heads
1341 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1342
1377
1343 def headrevs(self):
1378 def headrevs(self):
1344 try:
1379 try:
1345 return self.index.headrevs()
1380 return self.index.headrevs()
1346 except AttributeError:
1381 except AttributeError:
1347 return self._headrevs()
1382 return self._headrevs()
1348
1383
1349 def computephases(self, roots):
1384 def computephases(self, roots):
1350 return self.index.computephasesmapsets(roots)
1385 return self.index.computephasesmapsets(roots)
1351
1386
1352 def _headrevs(self):
1387 def _headrevs(self):
1353 count = len(self)
1388 count = len(self)
1354 if not count:
1389 if not count:
1355 return [nullrev]
1390 return [nullrev]
1356 # we won't iter over filtered rev so nobody is a head at start
1391 # we won't iter over filtered rev so nobody is a head at start
1357 ishead = [0] * (count + 1)
1392 ishead = [0] * (count + 1)
1358 index = self.index
1393 index = self.index
1359 for r in self:
1394 for r in self:
1360 ishead[r] = 1 # I may be an head
1395 ishead[r] = 1 # I may be an head
1361 e = index[r]
1396 e = index[r]
1362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1397 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1363 return [r for r, val in enumerate(ishead) if val]
1398 return [r for r, val in enumerate(ishead) if val]
1364
1399
1365 def heads(self, start=None, stop=None):
1400 def heads(self, start=None, stop=None):
1366 """return the list of all nodes that have no children
1401 """return the list of all nodes that have no children
1367
1402
1368 if start is specified, only heads that are descendants of
1403 if start is specified, only heads that are descendants of
1369 start will be returned
1404 start will be returned
1370 if stop is specified, it will consider all the revs from stop
1405 if stop is specified, it will consider all the revs from stop
1371 as if they had no children
1406 as if they had no children
1372 """
1407 """
1373 if start is None and stop is None:
1408 if start is None and stop is None:
1374 if not len(self):
1409 if not len(self):
1375 return [nullid]
1410 return [nullid]
1376 return [self.node(r) for r in self.headrevs()]
1411 return [self.node(r) for r in self.headrevs()]
1377
1412
1378 if start is None:
1413 if start is None:
1379 start = nullid
1414 start = nullid
1380 if stop is None:
1415 if stop is None:
1381 stop = []
1416 stop = []
1382 stoprevs = set([self.rev(n) for n in stop])
1417 stoprevs = set([self.rev(n) for n in stop])
1383 startrev = self.rev(start)
1418 startrev = self.rev(start)
1384 reachable = {startrev}
1419 reachable = {startrev}
1385 heads = {startrev}
1420 heads = {startrev}
1386
1421
1387 parentrevs = self.parentrevs
1422 parentrevs = self.parentrevs
1388 for r in self.revs(start=startrev + 1):
1423 for r in self.revs(start=startrev + 1):
1389 for p in parentrevs(r):
1424 for p in parentrevs(r):
1390 if p in reachable:
1425 if p in reachable:
1391 if r not in stoprevs:
1426 if r not in stoprevs:
1392 reachable.add(r)
1427 reachable.add(r)
1393 heads.add(r)
1428 heads.add(r)
1394 if p in heads and p not in stoprevs:
1429 if p in heads and p not in stoprevs:
1395 heads.remove(p)
1430 heads.remove(p)
1396
1431
1397 return [self.node(r) for r in heads]
1432 return [self.node(r) for r in heads]
1398
1433
1399 def children(self, node):
1434 def children(self, node):
1400 """find the children of a given node"""
1435 """find the children of a given node"""
1401 c = []
1436 c = []
1402 p = self.rev(node)
1437 p = self.rev(node)
1403 for r in self.revs(start=p + 1):
1438 for r in self.revs(start=p + 1):
1404 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1439 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1405 if prevs:
1440 if prevs:
1406 for pr in prevs:
1441 for pr in prevs:
1407 if pr == p:
1442 if pr == p:
1408 c.append(self.node(r))
1443 c.append(self.node(r))
1409 elif p == nullrev:
1444 elif p == nullrev:
1410 c.append(self.node(r))
1445 c.append(self.node(r))
1411 return c
1446 return c
1412
1447
1413 def descendant(self, start, end):
1448 def descendant(self, start, end):
1414 """True if revision 'end' is an descendant of revision 'start'
1449 """True if revision 'end' is an descendant of revision 'start'
1415
1450
1416 A revision is considered as a descendant of itself."""
1451 A revision is considered as a descendant of itself."""
1417 if start == nullrev:
1452 if start == nullrev:
1418 return True
1453 return True
1419 elif start == end:
1454 elif start == end:
1420 return True
1455 return True
1421 return start in self._commonancestorsheads(start, end)
1456 return start in self._commonancestorsheads(start, end)
1422
1457
1423 def commonancestorsheads(self, a, b):
1458 def commonancestorsheads(self, a, b):
1424 """calculate all the heads of the common ancestors of nodes a and b"""
1459 """calculate all the heads of the common ancestors of nodes a and b"""
1425 a, b = self.rev(a), self.rev(b)
1460 a, b = self.rev(a), self.rev(b)
1426 ancs = self._commonancestorsheads(a, b)
1461 ancs = self._commonancestorsheads(a, b)
1427 return pycompat.maplist(self.node, ancs)
1462 return pycompat.maplist(self.node, ancs)
1428
1463
1429 def _commonancestorsheads(self, *revs):
1464 def _commonancestorsheads(self, *revs):
1430 """calculate all the heads of the common ancestors of revs"""
1465 """calculate all the heads of the common ancestors of revs"""
1431 try:
1466 try:
1432 ancs = self.index.commonancestorsheads(*revs)
1467 ancs = self.index.commonancestorsheads(*revs)
1433 except (AttributeError, OverflowError): # C implementation failed
1468 except (AttributeError, OverflowError): # C implementation failed
1434 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1469 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1435 return ancs
1470 return ancs
1436
1471
1437 def isancestor(self, a, b):
1472 def isancestor(self, a, b):
1438 """return True if node a is an ancestor of node b
1473 """return True if node a is an ancestor of node b
1439
1474
1440 The implementation of this is trivial but the use of
1475 The implementation of this is trivial but the use of
1441 commonancestorsheads is not."""
1476 commonancestorsheads is not."""
1442 a, b = self.rev(a), self.rev(b)
1477 a, b = self.rev(a), self.rev(b)
1443 return self.descendant(a, b)
1478 return self.descendant(a, b)
1444
1479
1445 def ancestor(self, a, b):
1480 def ancestor(self, a, b):
1446 """calculate the "best" common ancestor of nodes a and b"""
1481 """calculate the "best" common ancestor of nodes a and b"""
1447
1482
1448 a, b = self.rev(a), self.rev(b)
1483 a, b = self.rev(a), self.rev(b)
1449 try:
1484 try:
1450 ancs = self.index.ancestors(a, b)
1485 ancs = self.index.ancestors(a, b)
1451 except (AttributeError, OverflowError):
1486 except (AttributeError, OverflowError):
1452 ancs = ancestor.ancestors(self.parentrevs, a, b)
1487 ancs = ancestor.ancestors(self.parentrevs, a, b)
1453 if ancs:
1488 if ancs:
1454 # choose a consistent winner when there's a tie
1489 # choose a consistent winner when there's a tie
1455 return min(map(self.node, ancs))
1490 return min(map(self.node, ancs))
1456 return nullid
1491 return nullid
1457
1492
1458 def _match(self, id):
1493 def _match(self, id):
1459 if isinstance(id, int):
1494 if isinstance(id, int):
1460 # rev
1495 # rev
1461 return self.node(id)
1496 return self.node(id)
1462 if len(id) == 20:
1497 if len(id) == 20:
1463 # possibly a binary node
1498 # possibly a binary node
1464 # odds of a binary node being all hex in ASCII are 1 in 10**25
1499 # odds of a binary node being all hex in ASCII are 1 in 10**25
1465 try:
1500 try:
1466 node = id
1501 node = id
1467 self.rev(node) # quick search the index
1502 self.rev(node) # quick search the index
1468 return node
1503 return node
1469 except LookupError:
1504 except LookupError:
1470 pass # may be partial hex id
1505 pass # may be partial hex id
1471 try:
1506 try:
1472 # str(rev)
1507 # str(rev)
1473 rev = int(id)
1508 rev = int(id)
1474 if "%d" % rev != id:
1509 if "%d" % rev != id:
1475 raise ValueError
1510 raise ValueError
1476 if rev < 0:
1511 if rev < 0:
1477 rev = len(self) + rev
1512 rev = len(self) + rev
1478 if rev < 0 or rev >= len(self):
1513 if rev < 0 or rev >= len(self):
1479 raise ValueError
1514 raise ValueError
1480 return self.node(rev)
1515 return self.node(rev)
1481 except (ValueError, OverflowError):
1516 except (ValueError, OverflowError):
1482 pass
1517 pass
1483 if len(id) == 40:
1518 if len(id) == 40:
1484 try:
1519 try:
1485 # a full hex nodeid?
1520 # a full hex nodeid?
1486 node = bin(id)
1521 node = bin(id)
1487 self.rev(node)
1522 self.rev(node)
1488 return node
1523 return node
1489 except (TypeError, LookupError):
1524 except (TypeError, LookupError):
1490 pass
1525 pass
1491
1526
1492 def _partialmatch(self, id):
1527 def _partialmatch(self, id):
1493 # we don't care wdirfilenodeids as they should be always full hash
1528 # we don't care wdirfilenodeids as they should be always full hash
1494 maybewdir = wdirhex.startswith(id)
1529 maybewdir = wdirhex.startswith(id)
1495 try:
1530 try:
1496 partial = self.index.partialmatch(id)
1531 partial = self.index.partialmatch(id)
1497 if partial and self.hasnode(partial):
1532 if partial and self.hasnode(partial):
1498 if maybewdir:
1533 if maybewdir:
1499 # single 'ff...' match in radix tree, ambiguous with wdir
1534 # single 'ff...' match in radix tree, ambiguous with wdir
1500 raise RevlogError
1535 raise RevlogError
1501 return partial
1536 return partial
1502 if maybewdir:
1537 if maybewdir:
1503 # no 'ff...' match in radix tree, wdir identified
1538 # no 'ff...' match in radix tree, wdir identified
1504 raise error.WdirUnsupported
1539 raise error.WdirUnsupported
1505 return None
1540 return None
1506 except RevlogError:
1541 except RevlogError:
1507 # parsers.c radix tree lookup gave multiple matches
1542 # parsers.c radix tree lookup gave multiple matches
1508 # fast path: for unfiltered changelog, radix tree is accurate
1543 # fast path: for unfiltered changelog, radix tree is accurate
1509 if not getattr(self, 'filteredrevs', None):
1544 if not getattr(self, 'filteredrevs', None):
1510 raise LookupError(id, self.indexfile,
1545 raise LookupError(id, self.indexfile,
1511 _('ambiguous identifier'))
1546 _('ambiguous identifier'))
1512 # fall through to slow path that filters hidden revisions
1547 # fall through to slow path that filters hidden revisions
1513 except (AttributeError, ValueError):
1548 except (AttributeError, ValueError):
1514 # we are pure python, or key was too short to search radix tree
1549 # we are pure python, or key was too short to search radix tree
1515 pass
1550 pass
1516
1551
1517 if id in self._pcache:
1552 if id in self._pcache:
1518 return self._pcache[id]
1553 return self._pcache[id]
1519
1554
1520 if len(id) <= 40:
1555 if len(id) <= 40:
1521 try:
1556 try:
1522 # hex(node)[:...]
1557 # hex(node)[:...]
1523 l = len(id) // 2 # grab an even number of digits
1558 l = len(id) // 2 # grab an even number of digits
1524 prefix = bin(id[:l * 2])
1559 prefix = bin(id[:l * 2])
1525 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1560 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1526 nl = [n for n in nl if hex(n).startswith(id) and
1561 nl = [n for n in nl if hex(n).startswith(id) and
1527 self.hasnode(n)]
1562 self.hasnode(n)]
1528 if len(nl) > 0:
1563 if len(nl) > 0:
1529 if len(nl) == 1 and not maybewdir:
1564 if len(nl) == 1 and not maybewdir:
1530 self._pcache[id] = nl[0]
1565 self._pcache[id] = nl[0]
1531 return nl[0]
1566 return nl[0]
1532 raise LookupError(id, self.indexfile,
1567 raise LookupError(id, self.indexfile,
1533 _('ambiguous identifier'))
1568 _('ambiguous identifier'))
1534 if maybewdir:
1569 if maybewdir:
1535 raise error.WdirUnsupported
1570 raise error.WdirUnsupported
1536 return None
1571 return None
1537 except TypeError:
1572 except TypeError:
1538 pass
1573 pass
1539
1574
1540 def lookup(self, id):
1575 def lookup(self, id):
1541 """locate a node based on:
1576 """locate a node based on:
1542 - revision number or str(revision number)
1577 - revision number or str(revision number)
1543 - nodeid or subset of hex nodeid
1578 - nodeid or subset of hex nodeid
1544 """
1579 """
1545 n = self._match(id)
1580 n = self._match(id)
1546 if n is not None:
1581 if n is not None:
1547 return n
1582 return n
1548 n = self._partialmatch(id)
1583 n = self._partialmatch(id)
1549 if n:
1584 if n:
1550 return n
1585 return n
1551
1586
1552 raise LookupError(id, self.indexfile, _('no match found'))
1587 raise LookupError(id, self.indexfile, _('no match found'))
1553
1588
1554 def shortest(self, node, minlength=1):
1589 def shortest(self, node, minlength=1):
1555 """Find the shortest unambiguous prefix that matches node."""
1590 """Find the shortest unambiguous prefix that matches node."""
1556 def isvalid(prefix):
1591 def isvalid(prefix):
1557 try:
1592 try:
1558 node = self._partialmatch(prefix)
1593 node = self._partialmatch(prefix)
1559 except error.RevlogError:
1594 except error.RevlogError:
1560 return False
1595 return False
1561 except error.WdirUnsupported:
1596 except error.WdirUnsupported:
1562 # single 'ff...' match
1597 # single 'ff...' match
1563 return True
1598 return True
1564 if node is None:
1599 if node is None:
1565 raise LookupError(node, self.indexfile, _('no node'))
1600 raise LookupError(node, self.indexfile, _('no node'))
1566 return True
1601 return True
1567
1602
1568 def maybewdir(prefix):
1603 def maybewdir(prefix):
1569 return all(c == 'f' for c in prefix)
1604 return all(c == 'f' for c in prefix)
1570
1605
1571 hexnode = hex(node)
1606 hexnode = hex(node)
1572
1607
1573 def disambiguate(hexnode, minlength):
1608 def disambiguate(hexnode, minlength):
1574 """Disambiguate against wdirid."""
1609 """Disambiguate against wdirid."""
1575 for length in range(minlength, 41):
1610 for length in range(minlength, 41):
1576 prefix = hexnode[:length]
1611 prefix = hexnode[:length]
1577 if not maybewdir(prefix):
1612 if not maybewdir(prefix):
1578 return prefix
1613 return prefix
1579
1614
1580 if not getattr(self, 'filteredrevs', None):
1615 if not getattr(self, 'filteredrevs', None):
1581 try:
1616 try:
1582 length = max(self.index.shortest(node), minlength)
1617 length = max(self.index.shortest(node), minlength)
1583 return disambiguate(hexnode, length)
1618 return disambiguate(hexnode, length)
1584 except RevlogError:
1619 except RevlogError:
1585 if node != wdirid:
1620 if node != wdirid:
1586 raise LookupError(node, self.indexfile, _('no node'))
1621 raise LookupError(node, self.indexfile, _('no node'))
1587 except AttributeError:
1622 except AttributeError:
1588 # Fall through to pure code
1623 # Fall through to pure code
1589 pass
1624 pass
1590
1625
1591 if node == wdirid:
1626 if node == wdirid:
1592 for length in range(minlength, 41):
1627 for length in range(minlength, 41):
1593 prefix = hexnode[:length]
1628 prefix = hexnode[:length]
1594 if isvalid(prefix):
1629 if isvalid(prefix):
1595 return prefix
1630 return prefix
1596
1631
1597 for length in range(minlength, 41):
1632 for length in range(minlength, 41):
1598 prefix = hexnode[:length]
1633 prefix = hexnode[:length]
1599 if isvalid(prefix):
1634 if isvalid(prefix):
1600 return disambiguate(hexnode, length)
1635 return disambiguate(hexnode, length)
1601
1636
1602 def cmp(self, node, text):
1637 def cmp(self, node, text):
1603 """compare text with a given file revision
1638 """compare text with a given file revision
1604
1639
1605 returns True if text is different than what is stored.
1640 returns True if text is different than what is stored.
1606 """
1641 """
1607 p1, p2 = self.parents(node)
1642 p1, p2 = self.parents(node)
1608 return hash(text, p1, p2) != node
1643 return hash(text, p1, p2) != node
1609
1644
1610 def _cachesegment(self, offset, data):
1645 def _cachesegment(self, offset, data):
1611 """Add a segment to the revlog cache.
1646 """Add a segment to the revlog cache.
1612
1647
1613 Accepts an absolute offset and the data that is at that location.
1648 Accepts an absolute offset and the data that is at that location.
1614 """
1649 """
1615 o, d = self._chunkcache
1650 o, d = self._chunkcache
1616 # try to add to existing cache
1651 # try to add to existing cache
1617 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1652 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1618 self._chunkcache = o, d + data
1653 self._chunkcache = o, d + data
1619 else:
1654 else:
1620 self._chunkcache = offset, data
1655 self._chunkcache = offset, data
1621
1656
1622 def _readsegment(self, offset, length, df=None):
1657 def _readsegment(self, offset, length, df=None):
1623 """Load a segment of raw data from the revlog.
1658 """Load a segment of raw data from the revlog.
1624
1659
1625 Accepts an absolute offset, length to read, and an optional existing
1660 Accepts an absolute offset, length to read, and an optional existing
1626 file handle to read from.
1661 file handle to read from.
1627
1662
1628 If an existing file handle is passed, it will be seeked and the
1663 If an existing file handle is passed, it will be seeked and the
1629 original seek position will NOT be restored.
1664 original seek position will NOT be restored.
1630
1665
1631 Returns a str or buffer of raw byte data.
1666 Returns a str or buffer of raw byte data.
1632 """
1667 """
1633 # Cache data both forward and backward around the requested
1668 # Cache data both forward and backward around the requested
1634 # data, in a fixed size window. This helps speed up operations
1669 # data, in a fixed size window. This helps speed up operations
1635 # involving reading the revlog backwards.
1670 # involving reading the revlog backwards.
1636 cachesize = self._chunkcachesize
1671 cachesize = self._chunkcachesize
1637 realoffset = offset & ~(cachesize - 1)
1672 realoffset = offset & ~(cachesize - 1)
1638 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1673 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1639 - realoffset)
1674 - realoffset)
1640 with self._datareadfp(df) as df:
1675 with self._datareadfp(df) as df:
1641 df.seek(realoffset)
1676 df.seek(realoffset)
1642 d = df.read(reallength)
1677 d = df.read(reallength)
1643 self._cachesegment(realoffset, d)
1678 self._cachesegment(realoffset, d)
1644 if offset != realoffset or reallength != length:
1679 if offset != realoffset or reallength != length:
1645 return util.buffer(d, offset - realoffset, length)
1680 return util.buffer(d, offset - realoffset, length)
1646 return d
1681 return d
1647
1682
1648 def _getsegment(self, offset, length, df=None):
1683 def _getsegment(self, offset, length, df=None):
1649 """Obtain a segment of raw data from the revlog.
1684 """Obtain a segment of raw data from the revlog.
1650
1685
1651 Accepts an absolute offset, length of bytes to obtain, and an
1686 Accepts an absolute offset, length of bytes to obtain, and an
1652 optional file handle to the already-opened revlog. If the file
1687 optional file handle to the already-opened revlog. If the file
1653 handle is used, it's original seek position will not be preserved.
1688 handle is used, it's original seek position will not be preserved.
1654
1689
1655 Requests for data may be returned from a cache.
1690 Requests for data may be returned from a cache.
1656
1691
1657 Returns a str or a buffer instance of raw byte data.
1692 Returns a str or a buffer instance of raw byte data.
1658 """
1693 """
1659 o, d = self._chunkcache
1694 o, d = self._chunkcache
1660 l = len(d)
1695 l = len(d)
1661
1696
1662 # is it in the cache?
1697 # is it in the cache?
1663 cachestart = offset - o
1698 cachestart = offset - o
1664 cacheend = cachestart + length
1699 cacheend = cachestart + length
1665 if cachestart >= 0 and cacheend <= l:
1700 if cachestart >= 0 and cacheend <= l:
1666 if cachestart == 0 and cacheend == l:
1701 if cachestart == 0 and cacheend == l:
1667 return d # avoid a copy
1702 return d # avoid a copy
1668 return util.buffer(d, cachestart, cacheend - cachestart)
1703 return util.buffer(d, cachestart, cacheend - cachestart)
1669
1704
1670 return self._readsegment(offset, length, df=df)
1705 return self._readsegment(offset, length, df=df)
1671
1706
1672 def _getsegmentforrevs(self, startrev, endrev, df=None):
1707 def _getsegmentforrevs(self, startrev, endrev, df=None):
1673 """Obtain a segment of raw data corresponding to a range of revisions.
1708 """Obtain a segment of raw data corresponding to a range of revisions.
1674
1709
1675 Accepts the start and end revisions and an optional already-open
1710 Accepts the start and end revisions and an optional already-open
1676 file handle to be used for reading. If the file handle is read, its
1711 file handle to be used for reading. If the file handle is read, its
1677 seek position will not be preserved.
1712 seek position will not be preserved.
1678
1713
1679 Requests for data may be satisfied by a cache.
1714 Requests for data may be satisfied by a cache.
1680
1715
1681 Returns a 2-tuple of (offset, data) for the requested range of
1716 Returns a 2-tuple of (offset, data) for the requested range of
1682 revisions. Offset is the integer offset from the beginning of the
1717 revisions. Offset is the integer offset from the beginning of the
1683 revlog and data is a str or buffer of the raw byte data.
1718 revlog and data is a str or buffer of the raw byte data.
1684
1719
1685 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1720 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1686 to determine where each revision's data begins and ends.
1721 to determine where each revision's data begins and ends.
1687 """
1722 """
1688 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1723 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1689 # (functions are expensive).
1724 # (functions are expensive).
1690 index = self.index
1725 index = self.index
1691 istart = index[startrev]
1726 istart = index[startrev]
1692 start = int(istart[0] >> 16)
1727 start = int(istart[0] >> 16)
1693 if startrev == endrev:
1728 if startrev == endrev:
1694 end = start + istart[1]
1729 end = start + istart[1]
1695 else:
1730 else:
1696 iend = index[endrev]
1731 iend = index[endrev]
1697 end = int(iend[0] >> 16) + iend[1]
1732 end = int(iend[0] >> 16) + iend[1]
1698
1733
1699 if self._inline:
1734 if self._inline:
1700 start += (startrev + 1) * self._io.size
1735 start += (startrev + 1) * self._io.size
1701 end += (endrev + 1) * self._io.size
1736 end += (endrev + 1) * self._io.size
1702 length = end - start
1737 length = end - start
1703
1738
1704 return start, self._getsegment(start, length, df=df)
1739 return start, self._getsegment(start, length, df=df)
1705
1740
1706 def _chunk(self, rev, df=None):
1741 def _chunk(self, rev, df=None):
1707 """Obtain a single decompressed chunk for a revision.
1742 """Obtain a single decompressed chunk for a revision.
1708
1743
1709 Accepts an integer revision and an optional already-open file handle
1744 Accepts an integer revision and an optional already-open file handle
1710 to be used for reading. If used, the seek position of the file will not
1745 to be used for reading. If used, the seek position of the file will not
1711 be preserved.
1746 be preserved.
1712
1747
1713 Returns a str holding uncompressed data for the requested revision.
1748 Returns a str holding uncompressed data for the requested revision.
1714 """
1749 """
1715 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1750 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1716
1751
1717 def _chunks(self, revs, df=None):
1752 def _chunks(self, revs, df=None):
1718 """Obtain decompressed chunks for the specified revisions.
1753 """Obtain decompressed chunks for the specified revisions.
1719
1754
1720 Accepts an iterable of numeric revisions that are assumed to be in
1755 Accepts an iterable of numeric revisions that are assumed to be in
1721 ascending order. Also accepts an optional already-open file handle
1756 ascending order. Also accepts an optional already-open file handle
1722 to be used for reading. If used, the seek position of the file will
1757 to be used for reading. If used, the seek position of the file will
1723 not be preserved.
1758 not be preserved.
1724
1759
1725 This function is similar to calling ``self._chunk()`` multiple times,
1760 This function is similar to calling ``self._chunk()`` multiple times,
1726 but is faster.
1761 but is faster.
1727
1762
1728 Returns a list with decompressed data for each requested revision.
1763 Returns a list with decompressed data for each requested revision.
1729 """
1764 """
1730 if not revs:
1765 if not revs:
1731 return []
1766 return []
1732 start = self.start
1767 start = self.start
1733 length = self.length
1768 length = self.length
1734 inline = self._inline
1769 inline = self._inline
1735 iosize = self._io.size
1770 iosize = self._io.size
1736 buffer = util.buffer
1771 buffer = util.buffer
1737
1772
1738 l = []
1773 l = []
1739 ladd = l.append
1774 ladd = l.append
1740
1775
1741 if not self._withsparseread:
1776 if not self._withsparseread:
1742 slicedchunks = (revs,)
1777 slicedchunks = (revs,)
1743 else:
1778 else:
1744 slicedchunks = _slicechunk(self, revs)
1779 slicedchunks = _slicechunk(self, revs)
1745
1780
1746 for revschunk in slicedchunks:
1781 for revschunk in slicedchunks:
1747 firstrev = revschunk[0]
1782 firstrev = revschunk[0]
1748 # Skip trailing revisions with empty diff
1783 # Skip trailing revisions with empty diff
1749 for lastrev in revschunk[::-1]:
1784 for lastrev in revschunk[::-1]:
1750 if length(lastrev) != 0:
1785 if length(lastrev) != 0:
1751 break
1786 break
1752
1787
1753 try:
1788 try:
1754 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1789 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1755 except OverflowError:
1790 except OverflowError:
1756 # issue4215 - we can't cache a run of chunks greater than
1791 # issue4215 - we can't cache a run of chunks greater than
1757 # 2G on Windows
1792 # 2G on Windows
1758 return [self._chunk(rev, df=df) for rev in revschunk]
1793 return [self._chunk(rev, df=df) for rev in revschunk]
1759
1794
1760 decomp = self.decompress
1795 decomp = self.decompress
1761 for rev in revschunk:
1796 for rev in revschunk:
1762 chunkstart = start(rev)
1797 chunkstart = start(rev)
1763 if inline:
1798 if inline:
1764 chunkstart += (rev + 1) * iosize
1799 chunkstart += (rev + 1) * iosize
1765 chunklength = length(rev)
1800 chunklength = length(rev)
1766 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1801 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1767
1802
1768 return l
1803 return l
1769
1804
1770 def _chunkclear(self):
1805 def _chunkclear(self):
1771 """Clear the raw chunk cache."""
1806 """Clear the raw chunk cache."""
1772 self._chunkcache = (0, '')
1807 self._chunkcache = (0, '')
1773
1808
1774 def deltaparent(self, rev):
1809 def deltaparent(self, rev):
1775 """return deltaparent of the given revision"""
1810 """return deltaparent of the given revision"""
1776 base = self.index[rev][3]
1811 base = self.index[rev][3]
1777 if base == rev:
1812 if base == rev:
1778 return nullrev
1813 return nullrev
1779 elif self._generaldelta:
1814 elif self._generaldelta:
1780 return base
1815 return base
1781 else:
1816 else:
1782 return rev - 1
1817 return rev - 1
1783
1818
1784 def revdiff(self, rev1, rev2):
1819 def revdiff(self, rev1, rev2):
1785 """return or calculate a delta between two revisions
1820 """return or calculate a delta between two revisions
1786
1821
1787 The delta calculated is in binary form and is intended to be written to
1822 The delta calculated is in binary form and is intended to be written to
1788 revlog data directly. So this function needs raw revision data.
1823 revlog data directly. So this function needs raw revision data.
1789 """
1824 """
1790 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1791 return bytes(self._chunk(rev2))
1826 return bytes(self._chunk(rev2))
1792
1827
1793 return mdiff.textdiff(self.revision(rev1, raw=True),
1828 return mdiff.textdiff(self.revision(rev1, raw=True),
1794 self.revision(rev2, raw=True))
1829 self.revision(rev2, raw=True))
1795
1830
1796 def revision(self, nodeorrev, _df=None, raw=False):
1831 def revision(self, nodeorrev, _df=None, raw=False):
1797 """return an uncompressed revision of a given node or revision
1832 """return an uncompressed revision of a given node or revision
1798 number.
1833 number.
1799
1834
1800 _df - an existing file handle to read from. (internal-only)
1835 _df - an existing file handle to read from. (internal-only)
1801 raw - an optional argument specifying if the revision data is to be
1836 raw - an optional argument specifying if the revision data is to be
1802 treated as raw data when applying flag transforms. 'raw' should be set
1837 treated as raw data when applying flag transforms. 'raw' should be set
1803 to True when generating changegroups or in debug commands.
1838 to True when generating changegroups or in debug commands.
1804 """
1839 """
1805 if isinstance(nodeorrev, int):
1840 if isinstance(nodeorrev, int):
1806 rev = nodeorrev
1841 rev = nodeorrev
1807 node = self.node(rev)
1842 node = self.node(rev)
1808 else:
1843 else:
1809 node = nodeorrev
1844 node = nodeorrev
1810 rev = None
1845 rev = None
1811
1846
1812 cachedrev = None
1847 cachedrev = None
1813 flags = None
1848 flags = None
1814 rawtext = None
1849 rawtext = None
1815 if node == nullid:
1850 if node == nullid:
1816 return ""
1851 return ""
1817 if self._cache:
1852 if self._cache:
1818 if self._cache[0] == node:
1853 if self._cache[0] == node:
1819 # _cache only stores rawtext
1854 # _cache only stores rawtext
1820 if raw:
1855 if raw:
1821 return self._cache[2]
1856 return self._cache[2]
1822 # duplicated, but good for perf
1857 # duplicated, but good for perf
1823 if rev is None:
1858 if rev is None:
1824 rev = self.rev(node)
1859 rev = self.rev(node)
1825 if flags is None:
1860 if flags is None:
1826 flags = self.flags(rev)
1861 flags = self.flags(rev)
1827 # no extra flags set, no flag processor runs, text = rawtext
1862 # no extra flags set, no flag processor runs, text = rawtext
1828 if flags == REVIDX_DEFAULT_FLAGS:
1863 if flags == REVIDX_DEFAULT_FLAGS:
1829 return self._cache[2]
1864 return self._cache[2]
1830 # rawtext is reusable. need to run flag processor
1865 # rawtext is reusable. need to run flag processor
1831 rawtext = self._cache[2]
1866 rawtext = self._cache[2]
1832
1867
1833 cachedrev = self._cache[1]
1868 cachedrev = self._cache[1]
1834
1869
1835 # look up what we need to read
1870 # look up what we need to read
1836 if rawtext is None:
1871 if rawtext is None:
1837 if rev is None:
1872 if rev is None:
1838 rev = self.rev(node)
1873 rev = self.rev(node)
1839
1874
1840 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1841 if stopped:
1876 if stopped:
1842 rawtext = self._cache[2]
1877 rawtext = self._cache[2]
1843
1878
1844 # drop cache to save memory
1879 # drop cache to save memory
1845 self._cache = None
1880 self._cache = None
1846
1881
1847 bins = self._chunks(chain, df=_df)
1882 bins = self._chunks(chain, df=_df)
1848 if rawtext is None:
1883 if rawtext is None:
1849 rawtext = bytes(bins[0])
1884 rawtext = bytes(bins[0])
1850 bins = bins[1:]
1885 bins = bins[1:]
1851
1886
1852 rawtext = mdiff.patches(rawtext, bins)
1887 rawtext = mdiff.patches(rawtext, bins)
1853 self._cache = (node, rev, rawtext)
1888 self._cache = (node, rev, rawtext)
1854
1889
1855 if flags is None:
1890 if flags is None:
1856 if rev is None:
1891 if rev is None:
1857 rev = self.rev(node)
1892 rev = self.rev(node)
1858 flags = self.flags(rev)
1893 flags = self.flags(rev)
1859
1894
1860 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1895 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1861 if validatehash:
1896 if validatehash:
1862 self.checkhash(text, node, rev=rev)
1897 self.checkhash(text, node, rev=rev)
1863
1898
1864 return text
1899 return text
1865
1900
1866 def hash(self, text, p1, p2):
1901 def hash(self, text, p1, p2):
1867 """Compute a node hash.
1902 """Compute a node hash.
1868
1903
1869 Available as a function so that subclasses can replace the hash
1904 Available as a function so that subclasses can replace the hash
1870 as needed.
1905 as needed.
1871 """
1906 """
1872 return hash(text, p1, p2)
1907 return hash(text, p1, p2)
1873
1908
1874 def _processflags(self, text, flags, operation, raw=False):
1909 def _processflags(self, text, flags, operation, raw=False):
1875 """Inspect revision data flags and applies transforms defined by
1910 """Inspect revision data flags and applies transforms defined by
1876 registered flag processors.
1911 registered flag processors.
1877
1912
1878 ``text`` - the revision data to process
1913 ``text`` - the revision data to process
1879 ``flags`` - the revision flags
1914 ``flags`` - the revision flags
1880 ``operation`` - the operation being performed (read or write)
1915 ``operation`` - the operation being performed (read or write)
1881 ``raw`` - an optional argument describing if the raw transform should be
1916 ``raw`` - an optional argument describing if the raw transform should be
1882 applied.
1917 applied.
1883
1918
1884 This method processes the flags in the order (or reverse order if
1919 This method processes the flags in the order (or reverse order if
1885 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1920 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1886 flag processors registered for present flags. The order of flags defined
1921 flag processors registered for present flags. The order of flags defined
1887 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1922 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1888
1923
1889 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1924 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1890 processed text and ``validatehash`` is a bool indicating whether the
1925 processed text and ``validatehash`` is a bool indicating whether the
1891 returned text should be checked for hash integrity.
1926 returned text should be checked for hash integrity.
1892
1927
1893 Note: If the ``raw`` argument is set, it has precedence over the
1928 Note: If the ``raw`` argument is set, it has precedence over the
1894 operation and will only update the value of ``validatehash``.
1929 operation and will only update the value of ``validatehash``.
1895 """
1930 """
1896 # fast path: no flag processors will run
1931 # fast path: no flag processors will run
1897 if flags == 0:
1932 if flags == 0:
1898 return text, True
1933 return text, True
1899 if not operation in ('read', 'write'):
1934 if not operation in ('read', 'write'):
1900 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1935 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1901 # Check all flags are known.
1936 # Check all flags are known.
1902 if flags & ~REVIDX_KNOWN_FLAGS:
1937 if flags & ~REVIDX_KNOWN_FLAGS:
1903 raise RevlogError(_("incompatible revision flag '%#x'") %
1938 raise RevlogError(_("incompatible revision flag '%#x'") %
1904 (flags & ~REVIDX_KNOWN_FLAGS))
1939 (flags & ~REVIDX_KNOWN_FLAGS))
1905 validatehash = True
1940 validatehash = True
1906 # Depending on the operation (read or write), the order might be
1941 # Depending on the operation (read or write), the order might be
1907 # reversed due to non-commutative transforms.
1942 # reversed due to non-commutative transforms.
1908 orderedflags = REVIDX_FLAGS_ORDER
1943 orderedflags = REVIDX_FLAGS_ORDER
1909 if operation == 'write':
1944 if operation == 'write':
1910 orderedflags = reversed(orderedflags)
1945 orderedflags = reversed(orderedflags)
1911
1946
1912 for flag in orderedflags:
1947 for flag in orderedflags:
1913 # If a flagprocessor has been registered for a known flag, apply the
1948 # If a flagprocessor has been registered for a known flag, apply the
1914 # related operation transform and update result tuple.
1949 # related operation transform and update result tuple.
1915 if flag & flags:
1950 if flag & flags:
1916 vhash = True
1951 vhash = True
1917
1952
1918 if flag not in _flagprocessors:
1953 if flag not in _flagprocessors:
1919 message = _("missing processor for flag '%#x'") % (flag)
1954 message = _("missing processor for flag '%#x'") % (flag)
1920 raise RevlogError(message)
1955 raise RevlogError(message)
1921
1956
1922 processor = _flagprocessors[flag]
1957 processor = _flagprocessors[flag]
1923 if processor is not None:
1958 if processor is not None:
1924 readtransform, writetransform, rawtransform = processor
1959 readtransform, writetransform, rawtransform = processor
1925
1960
1926 if raw:
1961 if raw:
1927 vhash = rawtransform(self, text)
1962 vhash = rawtransform(self, text)
1928 elif operation == 'read':
1963 elif operation == 'read':
1929 text, vhash = readtransform(self, text)
1964 text, vhash = readtransform(self, text)
1930 else: # write operation
1965 else: # write operation
1931 text, vhash = writetransform(self, text)
1966 text, vhash = writetransform(self, text)
1932 validatehash = validatehash and vhash
1967 validatehash = validatehash and vhash
1933
1968
1934 return text, validatehash
1969 return text, validatehash
1935
1970
1936 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1971 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1937 """Check node hash integrity.
1972 """Check node hash integrity.
1938
1973
1939 Available as a function so that subclasses can extend hash mismatch
1974 Available as a function so that subclasses can extend hash mismatch
1940 behaviors as needed.
1975 behaviors as needed.
1941 """
1976 """
1942 try:
1977 try:
1943 if p1 is None and p2 is None:
1978 if p1 is None and p2 is None:
1944 p1, p2 = self.parents(node)
1979 p1, p2 = self.parents(node)
1945 if node != self.hash(text, p1, p2):
1980 if node != self.hash(text, p1, p2):
1946 revornode = rev
1981 revornode = rev
1947 if revornode is None:
1982 if revornode is None:
1948 revornode = templatefilters.short(hex(node))
1983 revornode = templatefilters.short(hex(node))
1949 raise RevlogError(_("integrity check failed on %s:%s")
1984 raise RevlogError(_("integrity check failed on %s:%s")
1950 % (self.indexfile, pycompat.bytestr(revornode)))
1985 % (self.indexfile, pycompat.bytestr(revornode)))
1951 except RevlogError:
1986 except RevlogError:
1952 if self._censorable and _censoredtext(text):
1987 if self._censorable and _censoredtext(text):
1953 raise error.CensoredNodeError(self.indexfile, node, text)
1988 raise error.CensoredNodeError(self.indexfile, node, text)
1954 raise
1989 raise
1955
1990
1956 def _enforceinlinesize(self, tr, fp=None):
1991 def _enforceinlinesize(self, tr, fp=None):
1957 """Check if the revlog is too big for inline and convert if so.
1992 """Check if the revlog is too big for inline and convert if so.
1958
1993
1959 This should be called after revisions are added to the revlog. If the
1994 This should be called after revisions are added to the revlog. If the
1960 revlog has grown too large to be an inline revlog, it will convert it
1995 revlog has grown too large to be an inline revlog, it will convert it
1961 to use multiple index and data files.
1996 to use multiple index and data files.
1962 """
1997 """
1963 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1998 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1964 return
1999 return
1965
2000
1966 trinfo = tr.find(self.indexfile)
2001 trinfo = tr.find(self.indexfile)
1967 if trinfo is None:
2002 if trinfo is None:
1968 raise RevlogError(_("%s not found in the transaction")
2003 raise RevlogError(_("%s not found in the transaction")
1969 % self.indexfile)
2004 % self.indexfile)
1970
2005
1971 trindex = trinfo[2]
2006 trindex = trinfo[2]
1972 if trindex is not None:
2007 if trindex is not None:
1973 dataoff = self.start(trindex)
2008 dataoff = self.start(trindex)
1974 else:
2009 else:
1975 # revlog was stripped at start of transaction, use all leftover data
2010 # revlog was stripped at start of transaction, use all leftover data
1976 trindex = len(self) - 1
2011 trindex = len(self) - 1
1977 dataoff = self.end(-2)
2012 dataoff = self.end(-2)
1978
2013
1979 tr.add(self.datafile, dataoff)
2014 tr.add(self.datafile, dataoff)
1980
2015
1981 if fp:
2016 if fp:
1982 fp.flush()
2017 fp.flush()
1983 fp.close()
2018 fp.close()
1984
2019
1985 with self._datafp('w') as df:
2020 with self._datafp('w') as df:
1986 for r in self:
2021 for r in self:
1987 df.write(self._getsegmentforrevs(r, r)[1])
2022 df.write(self._getsegmentforrevs(r, r)[1])
1988
2023
1989 with self._indexfp('w') as fp:
2024 with self._indexfp('w') as fp:
1990 self.version &= ~FLAG_INLINE_DATA
2025 self.version &= ~FLAG_INLINE_DATA
1991 self._inline = False
2026 self._inline = False
1992 io = self._io
2027 io = self._io
1993 for i in self:
2028 for i in self:
1994 e = io.packentry(self.index[i], self.node, self.version, i)
2029 e = io.packentry(self.index[i], self.node, self.version, i)
1995 fp.write(e)
2030 fp.write(e)
1996
2031
1997 # the temp file replace the real index when we exit the context
2032 # the temp file replace the real index when we exit the context
1998 # manager
2033 # manager
1999
2034
2000 tr.replace(self.indexfile, trindex * self._io.size)
2035 tr.replace(self.indexfile, trindex * self._io.size)
2001 self._chunkclear()
2036 self._chunkclear()
2002
2037
2003 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2038 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2004 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2039 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2005 """add a revision to the log
2040 """add a revision to the log
2006
2041
2007 text - the revision data to add
2042 text - the revision data to add
2008 transaction - the transaction object used for rollback
2043 transaction - the transaction object used for rollback
2009 link - the linkrev data to add
2044 link - the linkrev data to add
2010 p1, p2 - the parent nodeids of the revision
2045 p1, p2 - the parent nodeids of the revision
2011 cachedelta - an optional precomputed delta
2046 cachedelta - an optional precomputed delta
2012 node - nodeid of revision; typically node is not specified, and it is
2047 node - nodeid of revision; typically node is not specified, and it is
2013 computed by default as hash(text, p1, p2), however subclasses might
2048 computed by default as hash(text, p1, p2), however subclasses might
2014 use different hashing method (and override checkhash() in such case)
2049 use different hashing method (and override checkhash() in such case)
2015 flags - the known flags to set on the revision
2050 flags - the known flags to set on the revision
2016 deltacomputer - an optional _deltacomputer instance shared between
2051 deltacomputer - an optional _deltacomputer instance shared between
2017 multiple calls
2052 multiple calls
2018 """
2053 """
2019 if link == nullrev:
2054 if link == nullrev:
2020 raise RevlogError(_("attempted to add linkrev -1 to %s")
2055 raise RevlogError(_("attempted to add linkrev -1 to %s")
2021 % self.indexfile)
2056 % self.indexfile)
2022
2057
2023 if flags:
2058 if flags:
2024 node = node or self.hash(text, p1, p2)
2059 node = node or self.hash(text, p1, p2)
2025
2060
2026 rawtext, validatehash = self._processflags(text, flags, 'write')
2061 rawtext, validatehash = self._processflags(text, flags, 'write')
2027
2062
2028 # If the flag processor modifies the revision data, ignore any provided
2063 # If the flag processor modifies the revision data, ignore any provided
2029 # cachedelta.
2064 # cachedelta.
2030 if rawtext != text:
2065 if rawtext != text:
2031 cachedelta = None
2066 cachedelta = None
2032
2067
2033 if len(rawtext) > _maxentrysize:
2068 if len(rawtext) > _maxentrysize:
2034 raise RevlogError(
2069 raise RevlogError(
2035 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2070 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2036 % (self.indexfile, len(rawtext)))
2071 % (self.indexfile, len(rawtext)))
2037
2072
2038 node = node or self.hash(rawtext, p1, p2)
2073 node = node or self.hash(rawtext, p1, p2)
2039 if node in self.nodemap:
2074 if node in self.nodemap:
2040 return node
2075 return node
2041
2076
2042 if validatehash:
2077 if validatehash:
2043 self.checkhash(rawtext, node, p1=p1, p2=p2)
2078 self.checkhash(rawtext, node, p1=p1, p2=p2)
2044
2079
2045 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2080 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2046 flags, cachedelta=cachedelta,
2081 flags, cachedelta=cachedelta,
2047 deltacomputer=deltacomputer)
2082 deltacomputer=deltacomputer)
2048
2083
2049 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2084 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2050 cachedelta=None, deltacomputer=None):
2085 cachedelta=None, deltacomputer=None):
2051 """add a raw revision with known flags, node and parents
2086 """add a raw revision with known flags, node and parents
2052 useful when reusing a revision not stored in this revlog (ex: received
2087 useful when reusing a revision not stored in this revlog (ex: received
2053 over wire, or read from an external bundle).
2088 over wire, or read from an external bundle).
2054 """
2089 """
2055 dfh = None
2090 dfh = None
2056 if not self._inline:
2091 if not self._inline:
2057 dfh = self._datafp("a+")
2092 dfh = self._datafp("a+")
2058 ifh = self._indexfp("a+")
2093 ifh = self._indexfp("a+")
2059 try:
2094 try:
2060 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2095 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2061 flags, cachedelta, ifh, dfh,
2096 flags, cachedelta, ifh, dfh,
2062 deltacomputer=deltacomputer)
2097 deltacomputer=deltacomputer)
2063 finally:
2098 finally:
2064 if dfh:
2099 if dfh:
2065 dfh.close()
2100 dfh.close()
2066 ifh.close()
2101 ifh.close()
2067
2102
2068 def compress(self, data):
2103 def compress(self, data):
2069 """Generate a possibly-compressed representation of data."""
2104 """Generate a possibly-compressed representation of data."""
2070 if not data:
2105 if not data:
2071 return '', data
2106 return '', data
2072
2107
2073 compressed = self._compressor.compress(data)
2108 compressed = self._compressor.compress(data)
2074
2109
2075 if compressed:
2110 if compressed:
2076 # The revlog compressor added the header in the returned data.
2111 # The revlog compressor added the header in the returned data.
2077 return '', compressed
2112 return '', compressed
2078
2113
2079 if data[0:1] == '\0':
2114 if data[0:1] == '\0':
2080 return '', data
2115 return '', data
2081 return 'u', data
2116 return 'u', data
2082
2117
2083 def decompress(self, data):
2118 def decompress(self, data):
2084 """Decompress a revlog chunk.
2119 """Decompress a revlog chunk.
2085
2120
2086 The chunk is expected to begin with a header identifying the
2121 The chunk is expected to begin with a header identifying the
2087 format type so it can be routed to an appropriate decompressor.
2122 format type so it can be routed to an appropriate decompressor.
2088 """
2123 """
2089 if not data:
2124 if not data:
2090 return data
2125 return data
2091
2126
2092 # Revlogs are read much more frequently than they are written and many
2127 # Revlogs are read much more frequently than they are written and many
2093 # chunks only take microseconds to decompress, so performance is
2128 # chunks only take microseconds to decompress, so performance is
2094 # important here.
2129 # important here.
2095 #
2130 #
2096 # We can make a few assumptions about revlogs:
2131 # We can make a few assumptions about revlogs:
2097 #
2132 #
2098 # 1) the majority of chunks will be compressed (as opposed to inline
2133 # 1) the majority of chunks will be compressed (as opposed to inline
2099 # raw data).
2134 # raw data).
2100 # 2) decompressing *any* data will likely by at least 10x slower than
2135 # 2) decompressing *any* data will likely by at least 10x slower than
2101 # returning raw inline data.
2136 # returning raw inline data.
2102 # 3) we want to prioritize common and officially supported compression
2137 # 3) we want to prioritize common and officially supported compression
2103 # engines
2138 # engines
2104 #
2139 #
2105 # It follows that we want to optimize for "decompress compressed data
2140 # It follows that we want to optimize for "decompress compressed data
2106 # when encoded with common and officially supported compression engines"
2141 # when encoded with common and officially supported compression engines"
2107 # case over "raw data" and "data encoded by less common or non-official
2142 # case over "raw data" and "data encoded by less common or non-official
2108 # compression engines." That is why we have the inline lookup first
2143 # compression engines." That is why we have the inline lookup first
2109 # followed by the compengines lookup.
2144 # followed by the compengines lookup.
2110 #
2145 #
2111 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2146 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2112 # compressed chunks. And this matters for changelog and manifest reads.
2147 # compressed chunks. And this matters for changelog and manifest reads.
2113 t = data[0:1]
2148 t = data[0:1]
2114
2149
2115 if t == 'x':
2150 if t == 'x':
2116 try:
2151 try:
2117 return _zlibdecompress(data)
2152 return _zlibdecompress(data)
2118 except zlib.error as e:
2153 except zlib.error as e:
2119 raise RevlogError(_('revlog decompress error: %s') %
2154 raise RevlogError(_('revlog decompress error: %s') %
2120 stringutil.forcebytestr(e))
2155 stringutil.forcebytestr(e))
2121 # '\0' is more common than 'u' so it goes first.
2156 # '\0' is more common than 'u' so it goes first.
2122 elif t == '\0':
2157 elif t == '\0':
2123 return data
2158 return data
2124 elif t == 'u':
2159 elif t == 'u':
2125 return util.buffer(data, 1)
2160 return util.buffer(data, 1)
2126
2161
2127 try:
2162 try:
2128 compressor = self._decompressors[t]
2163 compressor = self._decompressors[t]
2129 except KeyError:
2164 except KeyError:
2130 try:
2165 try:
2131 engine = util.compengines.forrevlogheader(t)
2166 engine = util.compengines.forrevlogheader(t)
2132 compressor = engine.revlogcompressor()
2167 compressor = engine.revlogcompressor()
2133 self._decompressors[t] = compressor
2168 self._decompressors[t] = compressor
2134 except KeyError:
2169 except KeyError:
2135 raise RevlogError(_('unknown compression type %r') % t)
2170 raise RevlogError(_('unknown compression type %r') % t)
2136
2171
2137 return compressor.decompress(data)
2172 return compressor.decompress(data)
2138
2173
2139 def _isgooddeltainfo(self, deltainfo, revinfo):
2174 def _isgooddeltainfo(self, deltainfo, revinfo):
2140 """Returns True if the given delta is good. Good means that it is within
2175 """Returns True if the given delta is good. Good means that it is within
2141 the disk span, disk size, and chain length bounds that we know to be
2176 the disk span, disk size, and chain length bounds that we know to be
2142 performant."""
2177 performant."""
2143 if deltainfo is None:
2178 if deltainfo is None:
2144 return False
2179 return False
2145
2180
2146 # - 'deltainfo.distance' is the distance from the base revision --
2181 # - 'deltainfo.distance' is the distance from the base revision --
2147 # bounding it limits the amount of I/O we need to do.
2182 # bounding it limits the amount of I/O we need to do.
2148 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2183 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2149 # deltas we need to apply -- bounding it limits the amount of CPU
2184 # deltas we need to apply -- bounding it limits the amount of CPU
2150 # we consume.
2185 # we consume.
2151
2186
2152 textlen = revinfo.textlen
2187 textlen = revinfo.textlen
2153 defaultmax = textlen * 4
2188 defaultmax = textlen * 4
2154 maxdist = self._maxdeltachainspan
2189 maxdist = self._maxdeltachainspan
2155 if not maxdist:
2190 if not maxdist:
2156 maxdist = deltainfo.distance # ensure the conditional pass
2191 maxdist = deltainfo.distance # ensure the conditional pass
2157 maxdist = max(maxdist, defaultmax)
2192 maxdist = max(maxdist, defaultmax)
2158 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2193 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2159 deltainfo.compresseddeltalen > textlen * 2 or
2194 deltainfo.compresseddeltalen > textlen * 2 or
2160 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2195 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2161 return False
2196 return False
2162
2197
2163 return True
2198 return True
2164
2199
2165 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2200 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2166 cachedelta, ifh, dfh, alwayscache=False,
2201 cachedelta, ifh, dfh, alwayscache=False,
2167 deltacomputer=None):
2202 deltacomputer=None):
2168 """internal function to add revisions to the log
2203 """internal function to add revisions to the log
2169
2204
2170 see addrevision for argument descriptions.
2205 see addrevision for argument descriptions.
2171
2206
2172 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2207 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2173
2208
2174 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2209 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2175 be used.
2210 be used.
2176
2211
2177 invariants:
2212 invariants:
2178 - rawtext is optional (can be None); if not set, cachedelta must be set.
2213 - rawtext is optional (can be None); if not set, cachedelta must be set.
2179 if both are set, they must correspond to each other.
2214 if both are set, they must correspond to each other.
2180 """
2215 """
2181 if node == nullid:
2216 if node == nullid:
2182 raise RevlogError(_("%s: attempt to add null revision") %
2217 raise RevlogError(_("%s: attempt to add null revision") %
2183 (self.indexfile))
2218 (self.indexfile))
2184 if node == wdirid or node in wdirfilenodeids:
2219 if node == wdirid or node in wdirfilenodeids:
2185 raise RevlogError(_("%s: attempt to add wdir revision") %
2220 raise RevlogError(_("%s: attempt to add wdir revision") %
2186 (self.indexfile))
2221 (self.indexfile))
2187
2222
2188 if self._inline:
2223 if self._inline:
2189 fh = ifh
2224 fh = ifh
2190 else:
2225 else:
2191 fh = dfh
2226 fh = dfh
2192
2227
2193 btext = [rawtext]
2228 btext = [rawtext]
2194
2229
2195 curr = len(self)
2230 curr = len(self)
2196 prev = curr - 1
2231 prev = curr - 1
2197 offset = self.end(prev)
2232 offset = self.end(prev)
2198 p1r, p2r = self.rev(p1), self.rev(p2)
2233 p1r, p2r = self.rev(p1), self.rev(p2)
2199
2234
2200 # full versions are inserted when the needed deltas
2235 # full versions are inserted when the needed deltas
2201 # become comparable to the uncompressed text
2236 # become comparable to the uncompressed text
2202 if rawtext is None:
2237 if rawtext is None:
2203 # need rawtext size, before changed by flag processors, which is
2238 # need rawtext size, before changed by flag processors, which is
2204 # the non-raw size. use revlog explicitly to avoid filelog's extra
2239 # the non-raw size. use revlog explicitly to avoid filelog's extra
2205 # logic that might remove metadata size.
2240 # logic that might remove metadata size.
2206 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2241 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2207 cachedelta[1])
2242 cachedelta[1])
2208 else:
2243 else:
2209 textlen = len(rawtext)
2244 textlen = len(rawtext)
2210
2245
2211 if deltacomputer is None:
2246 if deltacomputer is None:
2212 deltacomputer = _deltacomputer(self)
2247 deltacomputer = _deltacomputer(self)
2213
2248
2214 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2249 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2215
2250
2216 # no delta for flag processor revision (see "candelta" for why)
2251 # no delta for flag processor revision (see "candelta" for why)
2217 # not calling candelta since only one revision needs test, also to
2252 # not calling candelta since only one revision needs test, also to
2218 # avoid overhead fetching flags again.
2253 # avoid overhead fetching flags again.
2219 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2254 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2220 deltainfo = None
2255 deltainfo = None
2221 else:
2256 else:
2222 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2257 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2223
2258
2224 if deltainfo is not None:
2259 if deltainfo is not None:
2225 base = deltainfo.base
2260 base = deltainfo.base
2226 chainbase = deltainfo.chainbase
2261 chainbase = deltainfo.chainbase
2227 data = deltainfo.data
2262 data = deltainfo.data
2228 l = deltainfo.deltalen
2263 l = deltainfo.deltalen
2229 else:
2264 else:
2230 rawtext = deltacomputer.buildtext(revinfo, fh)
2265 rawtext = deltacomputer.buildtext(revinfo, fh)
2231 data = self.compress(rawtext)
2266 data = self.compress(rawtext)
2232 l = len(data[1]) + len(data[0])
2267 l = len(data[1]) + len(data[0])
2233 base = chainbase = curr
2268 base = chainbase = curr
2234
2269
2235 e = (offset_type(offset, flags), l, textlen,
2270 e = (offset_type(offset, flags), l, textlen,
2236 base, link, p1r, p2r, node)
2271 base, link, p1r, p2r, node)
2237 self.index.insert(-1, e)
2272 self.index.insert(-1, e)
2238 self.nodemap[node] = curr
2273 self.nodemap[node] = curr
2239
2274
2240 entry = self._io.packentry(e, self.node, self.version, curr)
2275 entry = self._io.packentry(e, self.node, self.version, curr)
2241 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2276 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2242
2277
2243 if alwayscache and rawtext is None:
2278 if alwayscache and rawtext is None:
2244 rawtext = deltacomputer._buildtext(revinfo, fh)
2279 rawtext = deltacomputer._buildtext(revinfo, fh)
2245
2280
2246 if type(rawtext) == bytes: # only accept immutable objects
2281 if type(rawtext) == bytes: # only accept immutable objects
2247 self._cache = (node, curr, rawtext)
2282 self._cache = (node, curr, rawtext)
2248 self._chainbasecache[curr] = chainbase
2283 self._chainbasecache[curr] = chainbase
2249 return node
2284 return node
2250
2285
2251 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2286 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2252 # Files opened in a+ mode have inconsistent behavior on various
2287 # Files opened in a+ mode have inconsistent behavior on various
2253 # platforms. Windows requires that a file positioning call be made
2288 # platforms. Windows requires that a file positioning call be made
2254 # when the file handle transitions between reads and writes. See
2289 # when the file handle transitions between reads and writes. See
2255 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2290 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2256 # platforms, Python or the platform itself can be buggy. Some versions
2291 # platforms, Python or the platform itself can be buggy. Some versions
2257 # of Solaris have been observed to not append at the end of the file
2292 # of Solaris have been observed to not append at the end of the file
2258 # if the file was seeked to before the end. See issue4943 for more.
2293 # if the file was seeked to before the end. See issue4943 for more.
2259 #
2294 #
2260 # We work around this issue by inserting a seek() before writing.
2295 # We work around this issue by inserting a seek() before writing.
2261 # Note: This is likely not necessary on Python 3.
2296 # Note: This is likely not necessary on Python 3.
2262 ifh.seek(0, os.SEEK_END)
2297 ifh.seek(0, os.SEEK_END)
2263 if dfh:
2298 if dfh:
2264 dfh.seek(0, os.SEEK_END)
2299 dfh.seek(0, os.SEEK_END)
2265
2300
2266 curr = len(self) - 1
2301 curr = len(self) - 1
2267 if not self._inline:
2302 if not self._inline:
2268 transaction.add(self.datafile, offset)
2303 transaction.add(self.datafile, offset)
2269 transaction.add(self.indexfile, curr * len(entry))
2304 transaction.add(self.indexfile, curr * len(entry))
2270 if data[0]:
2305 if data[0]:
2271 dfh.write(data[0])
2306 dfh.write(data[0])
2272 dfh.write(data[1])
2307 dfh.write(data[1])
2273 ifh.write(entry)
2308 ifh.write(entry)
2274 else:
2309 else:
2275 offset += curr * self._io.size
2310 offset += curr * self._io.size
2276 transaction.add(self.indexfile, offset, curr)
2311 transaction.add(self.indexfile, offset, curr)
2277 ifh.write(entry)
2312 ifh.write(entry)
2278 ifh.write(data[0])
2313 ifh.write(data[0])
2279 ifh.write(data[1])
2314 ifh.write(data[1])
2280 self._enforceinlinesize(transaction, ifh)
2315 self._enforceinlinesize(transaction, ifh)
2281
2316
2282 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2317 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2283 """
2318 """
2284 add a delta group
2319 add a delta group
2285
2320
2286 given a set of deltas, add them to the revision log. the
2321 given a set of deltas, add them to the revision log. the
2287 first delta is against its parent, which should be in our
2322 first delta is against its parent, which should be in our
2288 log, the rest are against the previous delta.
2323 log, the rest are against the previous delta.
2289
2324
2290 If ``addrevisioncb`` is defined, it will be called with arguments of
2325 If ``addrevisioncb`` is defined, it will be called with arguments of
2291 this revlog and the node that was added.
2326 this revlog and the node that was added.
2292 """
2327 """
2293
2328
2294 nodes = []
2329 nodes = []
2295
2330
2296 r = len(self)
2331 r = len(self)
2297 end = 0
2332 end = 0
2298 if r:
2333 if r:
2299 end = self.end(r - 1)
2334 end = self.end(r - 1)
2300 ifh = self._indexfp("a+")
2335 ifh = self._indexfp("a+")
2301 isize = r * self._io.size
2336 isize = r * self._io.size
2302 if self._inline:
2337 if self._inline:
2303 transaction.add(self.indexfile, end + isize, r)
2338 transaction.add(self.indexfile, end + isize, r)
2304 dfh = None
2339 dfh = None
2305 else:
2340 else:
2306 transaction.add(self.indexfile, isize, r)
2341 transaction.add(self.indexfile, isize, r)
2307 transaction.add(self.datafile, end)
2342 transaction.add(self.datafile, end)
2308 dfh = self._datafp("a+")
2343 dfh = self._datafp("a+")
2309 def flush():
2344 def flush():
2310 if dfh:
2345 if dfh:
2311 dfh.flush()
2346 dfh.flush()
2312 ifh.flush()
2347 ifh.flush()
2313 try:
2348 try:
2314 deltacomputer = _deltacomputer(self)
2349 deltacomputer = _deltacomputer(self)
2315 # loop through our set of deltas
2350 # loop through our set of deltas
2316 for data in deltas:
2351 for data in deltas:
2317 node, p1, p2, linknode, deltabase, delta, flags = data
2352 node, p1, p2, linknode, deltabase, delta, flags = data
2318 link = linkmapper(linknode)
2353 link = linkmapper(linknode)
2319 flags = flags or REVIDX_DEFAULT_FLAGS
2354 flags = flags or REVIDX_DEFAULT_FLAGS
2320
2355
2321 nodes.append(node)
2356 nodes.append(node)
2322
2357
2323 if node in self.nodemap:
2358 if node in self.nodemap:
2324 # this can happen if two branches make the same change
2359 # this can happen if two branches make the same change
2325 continue
2360 continue
2326
2361
2327 for p in (p1, p2):
2362 for p in (p1, p2):
2328 if p not in self.nodemap:
2363 if p not in self.nodemap:
2329 raise LookupError(p, self.indexfile,
2364 raise LookupError(p, self.indexfile,
2330 _('unknown parent'))
2365 _('unknown parent'))
2331
2366
2332 if deltabase not in self.nodemap:
2367 if deltabase not in self.nodemap:
2333 raise LookupError(deltabase, self.indexfile,
2368 raise LookupError(deltabase, self.indexfile,
2334 _('unknown delta base'))
2369 _('unknown delta base'))
2335
2370
2336 baserev = self.rev(deltabase)
2371 baserev = self.rev(deltabase)
2337
2372
2338 if baserev != nullrev and self.iscensored(baserev):
2373 if baserev != nullrev and self.iscensored(baserev):
2339 # if base is censored, delta must be full replacement in a
2374 # if base is censored, delta must be full replacement in a
2340 # single patch operation
2375 # single patch operation
2341 hlen = struct.calcsize(">lll")
2376 hlen = struct.calcsize(">lll")
2342 oldlen = self.rawsize(baserev)
2377 oldlen = self.rawsize(baserev)
2343 newlen = len(delta) - hlen
2378 newlen = len(delta) - hlen
2344 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2379 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2345 raise error.CensoredBaseError(self.indexfile,
2380 raise error.CensoredBaseError(self.indexfile,
2346 self.node(baserev))
2381 self.node(baserev))
2347
2382
2348 if not flags and self._peek_iscensored(baserev, delta, flush):
2383 if not flags and self._peek_iscensored(baserev, delta, flush):
2349 flags |= REVIDX_ISCENSORED
2384 flags |= REVIDX_ISCENSORED
2350
2385
2351 # We assume consumers of addrevisioncb will want to retrieve
2386 # We assume consumers of addrevisioncb will want to retrieve
2352 # the added revision, which will require a call to
2387 # the added revision, which will require a call to
2353 # revision(). revision() will fast path if there is a cache
2388 # revision(). revision() will fast path if there is a cache
2354 # hit. So, we tell _addrevision() to always cache in this case.
2389 # hit. So, we tell _addrevision() to always cache in this case.
2355 # We're only using addgroup() in the context of changegroup
2390 # We're only using addgroup() in the context of changegroup
2356 # generation so the revision data can always be handled as raw
2391 # generation so the revision data can always be handled as raw
2357 # by the flagprocessor.
2392 # by the flagprocessor.
2358 self._addrevision(node, None, transaction, link,
2393 self._addrevision(node, None, transaction, link,
2359 p1, p2, flags, (baserev, delta),
2394 p1, p2, flags, (baserev, delta),
2360 ifh, dfh,
2395 ifh, dfh,
2361 alwayscache=bool(addrevisioncb),
2396 alwayscache=bool(addrevisioncb),
2362 deltacomputer=deltacomputer)
2397 deltacomputer=deltacomputer)
2363
2398
2364 if addrevisioncb:
2399 if addrevisioncb:
2365 addrevisioncb(self, node)
2400 addrevisioncb(self, node)
2366
2401
2367 if not dfh and not self._inline:
2402 if not dfh and not self._inline:
2368 # addrevision switched from inline to conventional
2403 # addrevision switched from inline to conventional
2369 # reopen the index
2404 # reopen the index
2370 ifh.close()
2405 ifh.close()
2371 dfh = self._datafp("a+")
2406 dfh = self._datafp("a+")
2372 ifh = self._indexfp("a+")
2407 ifh = self._indexfp("a+")
2373 finally:
2408 finally:
2374 if dfh:
2409 if dfh:
2375 dfh.close()
2410 dfh.close()
2376 ifh.close()
2411 ifh.close()
2377
2412
2378 return nodes
2413 return nodes
2379
2414
2380 def iscensored(self, rev):
2415 def iscensored(self, rev):
2381 """Check if a file revision is censored."""
2416 """Check if a file revision is censored."""
2382 if not self._censorable:
2417 if not self._censorable:
2383 return False
2418 return False
2384
2419
2385 return self.flags(rev) & REVIDX_ISCENSORED
2420 return self.flags(rev) & REVIDX_ISCENSORED
2386
2421
2387 def _peek_iscensored(self, baserev, delta, flush):
2422 def _peek_iscensored(self, baserev, delta, flush):
2388 """Quickly check if a delta produces a censored revision."""
2423 """Quickly check if a delta produces a censored revision."""
2389 if not self._censorable:
2424 if not self._censorable:
2390 return False
2425 return False
2391
2426
2392 # Fragile heuristic: unless new file meta keys are added alphabetically
2427 # Fragile heuristic: unless new file meta keys are added alphabetically
2393 # preceding "censored", all censored revisions are prefixed by
2428 # preceding "censored", all censored revisions are prefixed by
2394 # "\1\ncensored:". A delta producing such a censored revision must be a
2429 # "\1\ncensored:". A delta producing such a censored revision must be a
2395 # full-replacement delta, so we inspect the first and only patch in the
2430 # full-replacement delta, so we inspect the first and only patch in the
2396 # delta for this prefix.
2431 # delta for this prefix.
2397 hlen = struct.calcsize(">lll")
2432 hlen = struct.calcsize(">lll")
2398 if len(delta) <= hlen:
2433 if len(delta) <= hlen:
2399 return False
2434 return False
2400
2435
2401 oldlen = self.rawsize(baserev)
2436 oldlen = self.rawsize(baserev)
2402 newlen = len(delta) - hlen
2437 newlen = len(delta) - hlen
2403 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2438 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2404 return False
2439 return False
2405
2440
2406 add = "\1\ncensored:"
2441 add = "\1\ncensored:"
2407 addlen = len(add)
2442 addlen = len(add)
2408 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2443 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2409
2444
2410 def getstrippoint(self, minlink):
2445 def getstrippoint(self, minlink):
2411 """find the minimum rev that must be stripped to strip the linkrev
2446 """find the minimum rev that must be stripped to strip the linkrev
2412
2447
2413 Returns a tuple containing the minimum rev and a set of all revs that
2448 Returns a tuple containing the minimum rev and a set of all revs that
2414 have linkrevs that will be broken by this strip.
2449 have linkrevs that will be broken by this strip.
2415 """
2450 """
2416 brokenrevs = set()
2451 brokenrevs = set()
2417 strippoint = len(self)
2452 strippoint = len(self)
2418
2453
2419 heads = {}
2454 heads = {}
2420 futurelargelinkrevs = set()
2455 futurelargelinkrevs = set()
2421 for head in self.headrevs():
2456 for head in self.headrevs():
2422 headlinkrev = self.linkrev(head)
2457 headlinkrev = self.linkrev(head)
2423 heads[head] = headlinkrev
2458 heads[head] = headlinkrev
2424 if headlinkrev >= minlink:
2459 if headlinkrev >= minlink:
2425 futurelargelinkrevs.add(headlinkrev)
2460 futurelargelinkrevs.add(headlinkrev)
2426
2461
2427 # This algorithm involves walking down the rev graph, starting at the
2462 # This algorithm involves walking down the rev graph, starting at the
2428 # heads. Since the revs are topologically sorted according to linkrev,
2463 # heads. Since the revs are topologically sorted according to linkrev,
2429 # once all head linkrevs are below the minlink, we know there are
2464 # once all head linkrevs are below the minlink, we know there are
2430 # no more revs that could have a linkrev greater than minlink.
2465 # no more revs that could have a linkrev greater than minlink.
2431 # So we can stop walking.
2466 # So we can stop walking.
2432 while futurelargelinkrevs:
2467 while futurelargelinkrevs:
2433 strippoint -= 1
2468 strippoint -= 1
2434 linkrev = heads.pop(strippoint)
2469 linkrev = heads.pop(strippoint)
2435
2470
2436 if linkrev < minlink:
2471 if linkrev < minlink:
2437 brokenrevs.add(strippoint)
2472 brokenrevs.add(strippoint)
2438 else:
2473 else:
2439 futurelargelinkrevs.remove(linkrev)
2474 futurelargelinkrevs.remove(linkrev)
2440
2475
2441 for p in self.parentrevs(strippoint):
2476 for p in self.parentrevs(strippoint):
2442 if p != nullrev:
2477 if p != nullrev:
2443 plinkrev = self.linkrev(p)
2478 plinkrev = self.linkrev(p)
2444 heads[p] = plinkrev
2479 heads[p] = plinkrev
2445 if plinkrev >= minlink:
2480 if plinkrev >= minlink:
2446 futurelargelinkrevs.add(plinkrev)
2481 futurelargelinkrevs.add(plinkrev)
2447
2482
2448 return strippoint, brokenrevs
2483 return strippoint, brokenrevs
2449
2484
2450 def strip(self, minlink, transaction):
2485 def strip(self, minlink, transaction):
2451 """truncate the revlog on the first revision with a linkrev >= minlink
2486 """truncate the revlog on the first revision with a linkrev >= minlink
2452
2487
2453 This function is called when we're stripping revision minlink and
2488 This function is called when we're stripping revision minlink and
2454 its descendants from the repository.
2489 its descendants from the repository.
2455
2490
2456 We have to remove all revisions with linkrev >= minlink, because
2491 We have to remove all revisions with linkrev >= minlink, because
2457 the equivalent changelog revisions will be renumbered after the
2492 the equivalent changelog revisions will be renumbered after the
2458 strip.
2493 strip.
2459
2494
2460 So we truncate the revlog on the first of these revisions, and
2495 So we truncate the revlog on the first of these revisions, and
2461 trust that the caller has saved the revisions that shouldn't be
2496 trust that the caller has saved the revisions that shouldn't be
2462 removed and that it'll re-add them after this truncation.
2497 removed and that it'll re-add them after this truncation.
2463 """
2498 """
2464 if len(self) == 0:
2499 if len(self) == 0:
2465 return
2500 return
2466
2501
2467 rev, _ = self.getstrippoint(minlink)
2502 rev, _ = self.getstrippoint(minlink)
2468 if rev == len(self):
2503 if rev == len(self):
2469 return
2504 return
2470
2505
2471 # first truncate the files on disk
2506 # first truncate the files on disk
2472 end = self.start(rev)
2507 end = self.start(rev)
2473 if not self._inline:
2508 if not self._inline:
2474 transaction.add(self.datafile, end)
2509 transaction.add(self.datafile, end)
2475 end = rev * self._io.size
2510 end = rev * self._io.size
2476 else:
2511 else:
2477 end += rev * self._io.size
2512 end += rev * self._io.size
2478
2513
2479 transaction.add(self.indexfile, end)
2514 transaction.add(self.indexfile, end)
2480
2515
2481 # then reset internal state in memory to forget those revisions
2516 # then reset internal state in memory to forget those revisions
2482 self._cache = None
2517 self._cache = None
2483 self._chaininfocache = {}
2518 self._chaininfocache = {}
2484 self._chunkclear()
2519 self._chunkclear()
2485 for x in xrange(rev, len(self)):
2520 for x in xrange(rev, len(self)):
2486 del self.nodemap[self.node(x)]
2521 del self.nodemap[self.node(x)]
2487
2522
2488 del self.index[rev:-1]
2523 del self.index[rev:-1]
2489 self._nodepos = None
2524 self._nodepos = None
2490
2525
2491 def checksize(self):
2526 def checksize(self):
2492 expected = 0
2527 expected = 0
2493 if len(self):
2528 if len(self):
2494 expected = max(0, self.end(len(self) - 1))
2529 expected = max(0, self.end(len(self) - 1))
2495
2530
2496 try:
2531 try:
2497 with self._datafp() as f:
2532 with self._datafp() as f:
2498 f.seek(0, 2)
2533 f.seek(0, 2)
2499 actual = f.tell()
2534 actual = f.tell()
2500 dd = actual - expected
2535 dd = actual - expected
2501 except IOError as inst:
2536 except IOError as inst:
2502 if inst.errno != errno.ENOENT:
2537 if inst.errno != errno.ENOENT:
2503 raise
2538 raise
2504 dd = 0
2539 dd = 0
2505
2540
2506 try:
2541 try:
2507 f = self.opener(self.indexfile)
2542 f = self.opener(self.indexfile)
2508 f.seek(0, 2)
2543 f.seek(0, 2)
2509 actual = f.tell()
2544 actual = f.tell()
2510 f.close()
2545 f.close()
2511 s = self._io.size
2546 s = self._io.size
2512 i = max(0, actual // s)
2547 i = max(0, actual // s)
2513 di = actual - (i * s)
2548 di = actual - (i * s)
2514 if self._inline:
2549 if self._inline:
2515 databytes = 0
2550 databytes = 0
2516 for r in self:
2551 for r in self:
2517 databytes += max(0, self.length(r))
2552 databytes += max(0, self.length(r))
2518 dd = 0
2553 dd = 0
2519 di = actual - len(self) * s - databytes
2554 di = actual - len(self) * s - databytes
2520 except IOError as inst:
2555 except IOError as inst:
2521 if inst.errno != errno.ENOENT:
2556 if inst.errno != errno.ENOENT:
2522 raise
2557 raise
2523 di = 0
2558 di = 0
2524
2559
2525 return (dd, di)
2560 return (dd, di)
2526
2561
2527 def files(self):
2562 def files(self):
2528 res = [self.indexfile]
2563 res = [self.indexfile]
2529 if not self._inline:
2564 if not self._inline:
2530 res.append(self.datafile)
2565 res.append(self.datafile)
2531 return res
2566 return res
2532
2567
2533 DELTAREUSEALWAYS = 'always'
2568 DELTAREUSEALWAYS = 'always'
2534 DELTAREUSESAMEREVS = 'samerevs'
2569 DELTAREUSESAMEREVS = 'samerevs'
2535 DELTAREUSENEVER = 'never'
2570 DELTAREUSENEVER = 'never'
2536
2571
2537 DELTAREUSEFULLADD = 'fulladd'
2572 DELTAREUSEFULLADD = 'fulladd'
2538
2573
2539 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2574 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2540
2575
2541 def clone(self, tr, destrevlog, addrevisioncb=None,
2576 def clone(self, tr, destrevlog, addrevisioncb=None,
2542 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2577 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2543 """Copy this revlog to another, possibly with format changes.
2578 """Copy this revlog to another, possibly with format changes.
2544
2579
2545 The destination revlog will contain the same revisions and nodes.
2580 The destination revlog will contain the same revisions and nodes.
2546 However, it may not be bit-for-bit identical due to e.g. delta encoding
2581 However, it may not be bit-for-bit identical due to e.g. delta encoding
2547 differences.
2582 differences.
2548
2583
2549 The ``deltareuse`` argument control how deltas from the existing revlog
2584 The ``deltareuse`` argument control how deltas from the existing revlog
2550 are preserved in the destination revlog. The argument can have the
2585 are preserved in the destination revlog. The argument can have the
2551 following values:
2586 following values:
2552
2587
2553 DELTAREUSEALWAYS
2588 DELTAREUSEALWAYS
2554 Deltas will always be reused (if possible), even if the destination
2589 Deltas will always be reused (if possible), even if the destination
2555 revlog would not select the same revisions for the delta. This is the
2590 revlog would not select the same revisions for the delta. This is the
2556 fastest mode of operation.
2591 fastest mode of operation.
2557 DELTAREUSESAMEREVS
2592 DELTAREUSESAMEREVS
2558 Deltas will be reused if the destination revlog would pick the same
2593 Deltas will be reused if the destination revlog would pick the same
2559 revisions for the delta. This mode strikes a balance between speed
2594 revisions for the delta. This mode strikes a balance between speed
2560 and optimization.
2595 and optimization.
2561 DELTAREUSENEVER
2596 DELTAREUSENEVER
2562 Deltas will never be reused. This is the slowest mode of execution.
2597 Deltas will never be reused. This is the slowest mode of execution.
2563 This mode can be used to recompute deltas (e.g. if the diff/delta
2598 This mode can be used to recompute deltas (e.g. if the diff/delta
2564 algorithm changes).
2599 algorithm changes).
2565
2600
2566 Delta computation can be slow, so the choice of delta reuse policy can
2601 Delta computation can be slow, so the choice of delta reuse policy can
2567 significantly affect run time.
2602 significantly affect run time.
2568
2603
2569 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2604 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2570 two extremes. Deltas will be reused if they are appropriate. But if the
2605 two extremes. Deltas will be reused if they are appropriate. But if the
2571 delta could choose a better revision, it will do so. This means if you
2606 delta could choose a better revision, it will do so. This means if you
2572 are converting a non-generaldelta revlog to a generaldelta revlog,
2607 are converting a non-generaldelta revlog to a generaldelta revlog,
2573 deltas will be recomputed if the delta's parent isn't a parent of the
2608 deltas will be recomputed if the delta's parent isn't a parent of the
2574 revision.
2609 revision.
2575
2610
2576 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2611 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2577 controls whether to compute deltas against both parents for merges.
2612 controls whether to compute deltas against both parents for merges.
2578 By default, the current default is used.
2613 By default, the current default is used.
2579 """
2614 """
2580 if deltareuse not in self.DELTAREUSEALL:
2615 if deltareuse not in self.DELTAREUSEALL:
2581 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2616 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2582
2617
2583 if len(destrevlog):
2618 if len(destrevlog):
2584 raise ValueError(_('destination revlog is not empty'))
2619 raise ValueError(_('destination revlog is not empty'))
2585
2620
2586 if getattr(self, 'filteredrevs', None):
2621 if getattr(self, 'filteredrevs', None):
2587 raise ValueError(_('source revlog has filtered revisions'))
2622 raise ValueError(_('source revlog has filtered revisions'))
2588 if getattr(destrevlog, 'filteredrevs', None):
2623 if getattr(destrevlog, 'filteredrevs', None):
2589 raise ValueError(_('destination revlog has filtered revisions'))
2624 raise ValueError(_('destination revlog has filtered revisions'))
2590
2625
2591 # lazydeltabase controls whether to reuse a cached delta, if possible.
2626 # lazydeltabase controls whether to reuse a cached delta, if possible.
2592 oldlazydeltabase = destrevlog._lazydeltabase
2627 oldlazydeltabase = destrevlog._lazydeltabase
2593 oldamd = destrevlog._aggressivemergedeltas
2628 oldamd = destrevlog._aggressivemergedeltas
2594
2629
2595 try:
2630 try:
2596 if deltareuse == self.DELTAREUSEALWAYS:
2631 if deltareuse == self.DELTAREUSEALWAYS:
2597 destrevlog._lazydeltabase = True
2632 destrevlog._lazydeltabase = True
2598 elif deltareuse == self.DELTAREUSESAMEREVS:
2633 elif deltareuse == self.DELTAREUSESAMEREVS:
2599 destrevlog._lazydeltabase = False
2634 destrevlog._lazydeltabase = False
2600
2635
2601 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2636 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2602
2637
2603 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2638 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2604 self.DELTAREUSESAMEREVS)
2639 self.DELTAREUSESAMEREVS)
2605
2640
2606 deltacomputer = _deltacomputer(destrevlog)
2641 deltacomputer = _deltacomputer(destrevlog)
2607 index = self.index
2642 index = self.index
2608 for rev in self:
2643 for rev in self:
2609 entry = index[rev]
2644 entry = index[rev]
2610
2645
2611 # Some classes override linkrev to take filtered revs into
2646 # Some classes override linkrev to take filtered revs into
2612 # account. Use raw entry from index.
2647 # account. Use raw entry from index.
2613 flags = entry[0] & 0xffff
2648 flags = entry[0] & 0xffff
2614 linkrev = entry[4]
2649 linkrev = entry[4]
2615 p1 = index[entry[5]][7]
2650 p1 = index[entry[5]][7]
2616 p2 = index[entry[6]][7]
2651 p2 = index[entry[6]][7]
2617 node = entry[7]
2652 node = entry[7]
2618
2653
2619 # (Possibly) reuse the delta from the revlog if allowed and
2654 # (Possibly) reuse the delta from the revlog if allowed and
2620 # the revlog chunk is a delta.
2655 # the revlog chunk is a delta.
2621 cachedelta = None
2656 cachedelta = None
2622 rawtext = None
2657 rawtext = None
2623 if populatecachedelta:
2658 if populatecachedelta:
2624 dp = self.deltaparent(rev)
2659 dp = self.deltaparent(rev)
2625 if dp != nullrev:
2660 if dp != nullrev:
2626 cachedelta = (dp, bytes(self._chunk(rev)))
2661 cachedelta = (dp, bytes(self._chunk(rev)))
2627
2662
2628 if not cachedelta:
2663 if not cachedelta:
2629 rawtext = self.revision(rev, raw=True)
2664 rawtext = self.revision(rev, raw=True)
2630
2665
2631
2666
2632 if deltareuse == self.DELTAREUSEFULLADD:
2667 if deltareuse == self.DELTAREUSEFULLADD:
2633 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2668 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2634 cachedelta=cachedelta,
2669 cachedelta=cachedelta,
2635 node=node, flags=flags,
2670 node=node, flags=flags,
2636 deltacomputer=deltacomputer)
2671 deltacomputer=deltacomputer)
2637 else:
2672 else:
2638 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2673 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2639 checkambig=False)
2674 checkambig=False)
2640 dfh = None
2675 dfh = None
2641 if not destrevlog._inline:
2676 if not destrevlog._inline:
2642 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2677 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2643 try:
2678 try:
2644 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2679 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2645 p2, flags, cachedelta, ifh, dfh,
2680 p2, flags, cachedelta, ifh, dfh,
2646 deltacomputer=deltacomputer)
2681 deltacomputer=deltacomputer)
2647 finally:
2682 finally:
2648 if dfh:
2683 if dfh:
2649 dfh.close()
2684 dfh.close()
2650 ifh.close()
2685 ifh.close()
2651
2686
2652 if addrevisioncb:
2687 if addrevisioncb:
2653 addrevisioncb(self, rev, node)
2688 addrevisioncb(self, rev, node)
2654 finally:
2689 finally:
2655 destrevlog._lazydeltabase = oldlazydeltabase
2690 destrevlog._lazydeltabase = oldlazydeltabase
2656 destrevlog._aggressivemergedeltas = oldamd
2691 destrevlog._aggressivemergedeltas = oldamd
@@ -1,82 +1,83 b''
1 # this is hack to make sure no escape characters are inserted into the output
1 # this is hack to make sure no escape characters are inserted into the output
2
2
3 from __future__ import absolute_import
3 from __future__ import absolute_import
4
4
5 import doctest
5 import doctest
6 import os
6 import os
7 import re
7 import re
8 import sys
8 import sys
9
9
10 ispy3 = (sys.version_info[0] >= 3)
10 ispy3 = (sys.version_info[0] >= 3)
11
11
12 if 'TERM' in os.environ:
12 if 'TERM' in os.environ:
13 del os.environ['TERM']
13 del os.environ['TERM']
14
14
15 class py3docchecker(doctest.OutputChecker):
15 class py3docchecker(doctest.OutputChecker):
16 def check_output(self, want, got, optionflags):
16 def check_output(self, want, got, optionflags):
17 want2 = re.sub(r'''\bu(['"])(.*?)\1''', r'\1\2\1', want) # py2: u''
17 want2 = re.sub(r'''\bu(['"])(.*?)\1''', r'\1\2\1', want) # py2: u''
18 got2 = re.sub(r'''\bb(['"])(.*?)\1''', r'\1\2\1', got) # py3: b''
18 got2 = re.sub(r'''\bb(['"])(.*?)\1''', r'\1\2\1', got) # py3: b''
19 # py3: <exc.name>: b'<msg>' -> <name>: <msg>
19 # py3: <exc.name>: b'<msg>' -> <name>: <msg>
20 # <exc.name>: <others> -> <name>: <others>
20 # <exc.name>: <others> -> <name>: <others>
21 got2 = re.sub(r'''^mercurial\.\w+\.(\w+): (['"])(.*?)\2''', r'\1: \3',
21 got2 = re.sub(r'''^mercurial\.\w+\.(\w+): (['"])(.*?)\2''', r'\1: \3',
22 got2, re.MULTILINE)
22 got2, re.MULTILINE)
23 got2 = re.sub(r'^mercurial\.\w+\.(\w+): ', r'\1: ', got2, re.MULTILINE)
23 got2 = re.sub(r'^mercurial\.\w+\.(\w+): ', r'\1: ', got2, re.MULTILINE)
24 return any(doctest.OutputChecker.check_output(self, w, g, optionflags)
24 return any(doctest.OutputChecker.check_output(self, w, g, optionflags)
25 for w, g in [(want, got), (want2, got2)])
25 for w, g in [(want, got), (want2, got2)])
26
26
27 def testmod(name, optionflags=0, testtarget=None):
27 def testmod(name, optionflags=0, testtarget=None):
28 __import__(name)
28 __import__(name)
29 mod = sys.modules[name]
29 mod = sys.modules[name]
30 if testtarget is not None:
30 if testtarget is not None:
31 mod = getattr(mod, testtarget)
31 mod = getattr(mod, testtarget)
32
32
33 # minimal copy of doctest.testmod()
33 # minimal copy of doctest.testmod()
34 finder = doctest.DocTestFinder()
34 finder = doctest.DocTestFinder()
35 checker = None
35 checker = None
36 if ispy3:
36 if ispy3:
37 checker = py3docchecker()
37 checker = py3docchecker()
38 runner = doctest.DocTestRunner(checker=checker, optionflags=optionflags)
38 runner = doctest.DocTestRunner(checker=checker, optionflags=optionflags)
39 for test in finder.find(mod, name):
39 for test in finder.find(mod, name):
40 runner.run(test)
40 runner.run(test)
41 runner.summarize()
41 runner.summarize()
42
42
43 testmod('mercurial.changegroup')
43 testmod('mercurial.changegroup')
44 testmod('mercurial.changelog')
44 testmod('mercurial.changelog')
45 testmod('mercurial.cmdutil')
45 testmod('mercurial.cmdutil')
46 testmod('mercurial.color')
46 testmod('mercurial.color')
47 testmod('mercurial.config')
47 testmod('mercurial.config')
48 testmod('mercurial.context')
48 testmod('mercurial.context')
49 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
49 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
50 testmod('mercurial.dispatch')
50 testmod('mercurial.dispatch')
51 testmod('mercurial.encoding')
51 testmod('mercurial.encoding')
52 testmod('mercurial.fancyopts')
52 testmod('mercurial.fancyopts')
53 testmod('mercurial.formatter')
53 testmod('mercurial.formatter')
54 testmod('mercurial.hg')
54 testmod('mercurial.hg')
55 testmod('mercurial.hgweb.hgwebdir_mod')
55 testmod('mercurial.hgweb.hgwebdir_mod')
56 testmod('mercurial.match')
56 testmod('mercurial.match')
57 testmod('mercurial.mdiff')
57 testmod('mercurial.mdiff')
58 testmod('mercurial.minirst')
58 testmod('mercurial.minirst')
59 testmod('mercurial.patch')
59 testmod('mercurial.patch')
60 testmod('mercurial.pathutil')
60 testmod('mercurial.pathutil')
61 testmod('mercurial.parser')
61 testmod('mercurial.parser')
62 testmod('mercurial.pycompat')
62 testmod('mercurial.pycompat')
63 testmod('mercurial.revlog')
63 testmod('mercurial.revsetlang')
64 testmod('mercurial.revsetlang')
64 testmod('mercurial.smartset')
65 testmod('mercurial.smartset')
65 testmod('mercurial.store')
66 testmod('mercurial.store')
66 testmod('mercurial.subrepo')
67 testmod('mercurial.subrepo')
67 testmod('mercurial.templatefilters')
68 testmod('mercurial.templatefilters')
68 testmod('mercurial.templater')
69 testmod('mercurial.templater')
69 testmod('mercurial.ui')
70 testmod('mercurial.ui')
70 testmod('mercurial.url')
71 testmod('mercurial.url')
71 testmod('mercurial.util')
72 testmod('mercurial.util')
72 testmod('mercurial.util', testtarget='platform')
73 testmod('mercurial.util', testtarget='platform')
73 testmod('mercurial.utils.stringutil')
74 testmod('mercurial.utils.stringutil')
74 testmod('hgext.convert.convcmd')
75 testmod('hgext.convert.convcmd')
75 testmod('hgext.convert.cvsps')
76 testmod('hgext.convert.cvsps')
76 testmod('hgext.convert.filemap')
77 testmod('hgext.convert.filemap')
77 testmod('hgext.convert.p4')
78 testmod('hgext.convert.p4')
78 testmod('hgext.convert.subversion')
79 testmod('hgext.convert.subversion')
79 testmod('hgext.fix')
80 testmod('hgext.fix')
80 testmod('hgext.mq')
81 testmod('hgext.mq')
81 # Helper scripts in tests/ that have doctests:
82 # Helper scripts in tests/ that have doctests:
82 testmod('drawdag')
83 testmod('drawdag')
General Comments 0
You need to be logged in to leave comments. Login now