##// END OF EJS Templates
revlog: early return in _slicechunk when span is already small enough...
Boris Feld -
r38654:d083ae26 default
parent child Browse files
Show More
@@ -1,2625 +1,2629
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 if deltachainspan < revlog._srmingapsize:
231 yield revs
232 return
233
230 chainpayload = sum(length(r) for r in revs)
234 chainpayload = sum(length(r) for r in revs)
231
235
232 if deltachainspan:
236 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
237 density = chainpayload / float(deltachainspan)
234 else:
238 else:
235 density = 1.0
239 density = 1.0
236
240
237 if density >= revlog._srdensitythreshold:
241 if density >= revlog._srdensitythreshold:
238 yield revs
242 yield revs
239 return
243 return
240
244
241 # Store the gaps in a heap to have them sorted by decreasing size
245 # Store the gaps in a heap to have them sorted by decreasing size
242 gapsheap = []
246 gapsheap = []
243 heapq.heapify(gapsheap)
247 heapq.heapify(gapsheap)
244 prevend = None
248 prevend = None
245 for i, rev in enumerate(revs):
249 for i, rev in enumerate(revs):
246 revstart = start(rev)
250 revstart = start(rev)
247 revlen = length(rev)
251 revlen = length(rev)
248
252
249 # Skip empty revisions to form larger holes
253 # Skip empty revisions to form larger holes
250 if revlen == 0:
254 if revlen == 0:
251 continue
255 continue
252
256
253 if prevend is not None:
257 if prevend is not None:
254 gapsize = revstart - prevend
258 gapsize = revstart - prevend
255 # only consider holes that are large enough
259 # only consider holes that are large enough
256 if gapsize > revlog._srmingapsize:
260 if gapsize > revlog._srmingapsize:
257 heapq.heappush(gapsheap, (-gapsize, i))
261 heapq.heappush(gapsheap, (-gapsize, i))
258
262
259 prevend = revstart + revlen
263 prevend = revstart + revlen
260
264
261 # Collect the indices of the largest holes until the density is acceptable
265 # Collect the indices of the largest holes until the density is acceptable
262 indicesheap = []
266 indicesheap = []
263 heapq.heapify(indicesheap)
267 heapq.heapify(indicesheap)
264 while gapsheap and density < revlog._srdensitythreshold:
268 while gapsheap and density < revlog._srdensitythreshold:
265 oppgapsize, gapidx = heapq.heappop(gapsheap)
269 oppgapsize, gapidx = heapq.heappop(gapsheap)
266
270
267 heapq.heappush(indicesheap, gapidx)
271 heapq.heappush(indicesheap, gapidx)
268
272
269 # the gap sizes are stored as negatives to be sorted decreasingly
273 # the gap sizes are stored as negatives to be sorted decreasingly
270 # by the heap
274 # by the heap
271 readdata -= (-oppgapsize)
275 readdata -= (-oppgapsize)
272 if readdata > 0:
276 if readdata > 0:
273 density = chainpayload / float(readdata)
277 density = chainpayload / float(readdata)
274 else:
278 else:
275 density = 1.0
279 density = 1.0
276
280
277 # Cut the revs at collected indices
281 # Cut the revs at collected indices
278 previdx = 0
282 previdx = 0
279 while indicesheap:
283 while indicesheap:
280 idx = heapq.heappop(indicesheap)
284 idx = heapq.heappop(indicesheap)
281
285
282 chunk = _trimchunk(revlog, revs, previdx, idx)
286 chunk = _trimchunk(revlog, revs, previdx, idx)
283 if chunk:
287 if chunk:
284 yield chunk
288 yield chunk
285
289
286 previdx = idx
290 previdx = idx
287
291
288 chunk = _trimchunk(revlog, revs, previdx)
292 chunk = _trimchunk(revlog, revs, previdx)
289 if chunk:
293 if chunk:
290 yield chunk
294 yield chunk
291
295
292 @attr.s(slots=True, frozen=True)
296 @attr.s(slots=True, frozen=True)
293 class _deltainfo(object):
297 class _deltainfo(object):
294 distance = attr.ib()
298 distance = attr.ib()
295 deltalen = attr.ib()
299 deltalen = attr.ib()
296 data = attr.ib()
300 data = attr.ib()
297 base = attr.ib()
301 base = attr.ib()
298 chainbase = attr.ib()
302 chainbase = attr.ib()
299 chainlen = attr.ib()
303 chainlen = attr.ib()
300 compresseddeltalen = attr.ib()
304 compresseddeltalen = attr.ib()
301
305
302 class _deltacomputer(object):
306 class _deltacomputer(object):
303 def __init__(self, revlog):
307 def __init__(self, revlog):
304 self.revlog = revlog
308 self.revlog = revlog
305
309
306 def _getcandidaterevs(self, p1, p2, cachedelta):
310 def _getcandidaterevs(self, p1, p2, cachedelta):
307 """
311 """
308 Provides revisions that present an interest to be diffed against,
312 Provides revisions that present an interest to be diffed against,
309 grouped by level of easiness.
313 grouped by level of easiness.
310 """
314 """
311 revlog = self.revlog
315 revlog = self.revlog
312 gdelta = revlog._generaldelta
316 gdelta = revlog._generaldelta
313 curr = len(revlog)
317 curr = len(revlog)
314 prev = curr - 1
318 prev = curr - 1
315 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
319 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
316
320
317 # should we try to build a delta?
321 # should we try to build a delta?
318 if prev != nullrev and revlog.storedeltachains:
322 if prev != nullrev and revlog.storedeltachains:
319 tested = set()
323 tested = set()
320 # This condition is true most of the time when processing
324 # This condition is true most of the time when processing
321 # changegroup data into a generaldelta repo. The only time it
325 # changegroup data into a generaldelta repo. The only time it
322 # isn't true is if this is the first revision in a delta chain
326 # isn't true is if this is the first revision in a delta chain
323 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
327 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
324 if cachedelta and gdelta and revlog._lazydeltabase:
328 if cachedelta and gdelta and revlog._lazydeltabase:
325 # Assume what we received from the server is a good choice
329 # Assume what we received from the server is a good choice
326 # build delta will reuse the cache
330 # build delta will reuse the cache
327 yield (cachedelta[0],)
331 yield (cachedelta[0],)
328 tested.add(cachedelta[0])
332 tested.add(cachedelta[0])
329
333
330 if gdelta:
334 if gdelta:
331 # exclude already lazy tested base if any
335 # exclude already lazy tested base if any
332 parents = [p for p in (p1r, p2r)
336 parents = [p for p in (p1r, p2r)
333 if p != nullrev and p not in tested]
337 if p != nullrev and p not in tested]
334
338
335 if not revlog._aggressivemergedeltas and len(parents) == 2:
339 if not revlog._aggressivemergedeltas and len(parents) == 2:
336 parents.sort()
340 parents.sort()
337 # To minimize the chance of having to build a fulltext,
341 # To minimize the chance of having to build a fulltext,
338 # pick first whichever parent is closest to us (max rev)
342 # pick first whichever parent is closest to us (max rev)
339 yield (parents[1],)
343 yield (parents[1],)
340 # then the other one (min rev) if the first did not fit
344 # then the other one (min rev) if the first did not fit
341 yield (parents[0],)
345 yield (parents[0],)
342 tested.update(parents)
346 tested.update(parents)
343 elif len(parents) > 0:
347 elif len(parents) > 0:
344 # Test all parents (1 or 2), and keep the best candidate
348 # Test all parents (1 or 2), and keep the best candidate
345 yield parents
349 yield parents
346 tested.update(parents)
350 tested.update(parents)
347
351
348 if prev not in tested:
352 if prev not in tested:
349 # other approach failed try against prev to hopefully save us a
353 # other approach failed try against prev to hopefully save us a
350 # fulltext.
354 # fulltext.
351 yield (prev,)
355 yield (prev,)
352 tested.add(prev)
356 tested.add(prev)
353
357
354 def buildtext(self, revinfo, fh):
358 def buildtext(self, revinfo, fh):
355 """Builds a fulltext version of a revision
359 """Builds a fulltext version of a revision
356
360
357 revinfo: _revisioninfo instance that contains all needed info
361 revinfo: _revisioninfo instance that contains all needed info
358 fh: file handle to either the .i or the .d revlog file,
362 fh: file handle to either the .i or the .d revlog file,
359 depending on whether it is inlined or not
363 depending on whether it is inlined or not
360 """
364 """
361 btext = revinfo.btext
365 btext = revinfo.btext
362 if btext[0] is not None:
366 if btext[0] is not None:
363 return btext[0]
367 return btext[0]
364
368
365 revlog = self.revlog
369 revlog = self.revlog
366 cachedelta = revinfo.cachedelta
370 cachedelta = revinfo.cachedelta
367 flags = revinfo.flags
371 flags = revinfo.flags
368 node = revinfo.node
372 node = revinfo.node
369
373
370 baserev = cachedelta[0]
374 baserev = cachedelta[0]
371 delta = cachedelta[1]
375 delta = cachedelta[1]
372 # special case deltas which replace entire base; no need to decode
376 # special case deltas which replace entire base; no need to decode
373 # base revision. this neatly avoids censored bases, which throw when
377 # base revision. this neatly avoids censored bases, which throw when
374 # they're decoded.
378 # they're decoded.
375 hlen = struct.calcsize(">lll")
379 hlen = struct.calcsize(">lll")
376 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
380 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
377 len(delta) - hlen):
381 len(delta) - hlen):
378 btext[0] = delta[hlen:]
382 btext[0] = delta[hlen:]
379 else:
383 else:
380 # deltabase is rawtext before changed by flag processors, which is
384 # deltabase is rawtext before changed by flag processors, which is
381 # equivalent to non-raw text
385 # equivalent to non-raw text
382 basetext = revlog.revision(baserev, _df=fh, raw=False)
386 basetext = revlog.revision(baserev, _df=fh, raw=False)
383 btext[0] = mdiff.patch(basetext, delta)
387 btext[0] = mdiff.patch(basetext, delta)
384
388
385 try:
389 try:
386 res = revlog._processflags(btext[0], flags, 'read', raw=True)
390 res = revlog._processflags(btext[0], flags, 'read', raw=True)
387 btext[0], validatehash = res
391 btext[0], validatehash = res
388 if validatehash:
392 if validatehash:
389 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
393 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
390 if flags & REVIDX_ISCENSORED:
394 if flags & REVIDX_ISCENSORED:
391 raise RevlogError(_('node %s is not censored') % node)
395 raise RevlogError(_('node %s is not censored') % node)
392 except CensoredNodeError:
396 except CensoredNodeError:
393 # must pass the censored index flag to add censored revisions
397 # must pass the censored index flag to add censored revisions
394 if not flags & REVIDX_ISCENSORED:
398 if not flags & REVIDX_ISCENSORED:
395 raise
399 raise
396 return btext[0]
400 return btext[0]
397
401
398 def _builddeltadiff(self, base, revinfo, fh):
402 def _builddeltadiff(self, base, revinfo, fh):
399 revlog = self.revlog
403 revlog = self.revlog
400 t = self.buildtext(revinfo, fh)
404 t = self.buildtext(revinfo, fh)
401 if revlog.iscensored(base):
405 if revlog.iscensored(base):
402 # deltas based on a censored revision must replace the
406 # deltas based on a censored revision must replace the
403 # full content in one patch, so delta works everywhere
407 # full content in one patch, so delta works everywhere
404 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
408 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
405 delta = header + t
409 delta = header + t
406 else:
410 else:
407 ptext = revlog.revision(base, _df=fh, raw=True)
411 ptext = revlog.revision(base, _df=fh, raw=True)
408 delta = mdiff.textdiff(ptext, t)
412 delta = mdiff.textdiff(ptext, t)
409
413
410 return delta
414 return delta
411
415
412 def _builddeltainfo(self, revinfo, base, fh):
416 def _builddeltainfo(self, revinfo, base, fh):
413 # can we use the cached delta?
417 # can we use the cached delta?
414 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
418 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
415 delta = revinfo.cachedelta[1]
419 delta = revinfo.cachedelta[1]
416 else:
420 else:
417 delta = self._builddeltadiff(base, revinfo, fh)
421 delta = self._builddeltadiff(base, revinfo, fh)
418 revlog = self.revlog
422 revlog = self.revlog
419 header, data = revlog.compress(delta)
423 header, data = revlog.compress(delta)
420 deltalen = len(header) + len(data)
424 deltalen = len(header) + len(data)
421 chainbase = revlog.chainbase(base)
425 chainbase = revlog.chainbase(base)
422 offset = revlog.end(len(revlog) - 1)
426 offset = revlog.end(len(revlog) - 1)
423 dist = deltalen + offset - revlog.start(chainbase)
427 dist = deltalen + offset - revlog.start(chainbase)
424 if revlog._generaldelta:
428 if revlog._generaldelta:
425 deltabase = base
429 deltabase = base
426 else:
430 else:
427 deltabase = chainbase
431 deltabase = chainbase
428 chainlen, compresseddeltalen = revlog._chaininfo(base)
432 chainlen, compresseddeltalen = revlog._chaininfo(base)
429 chainlen += 1
433 chainlen += 1
430 compresseddeltalen += deltalen
434 compresseddeltalen += deltalen
431 return _deltainfo(dist, deltalen, (header, data), deltabase,
435 return _deltainfo(dist, deltalen, (header, data), deltabase,
432 chainbase, chainlen, compresseddeltalen)
436 chainbase, chainlen, compresseddeltalen)
433
437
434 def finddeltainfo(self, revinfo, fh):
438 def finddeltainfo(self, revinfo, fh):
435 """Find an acceptable delta against a candidate revision
439 """Find an acceptable delta against a candidate revision
436
440
437 revinfo: information about the revision (instance of _revisioninfo)
441 revinfo: information about the revision (instance of _revisioninfo)
438 fh: file handle to either the .i or the .d revlog file,
442 fh: file handle to either the .i or the .d revlog file,
439 depending on whether it is inlined or not
443 depending on whether it is inlined or not
440
444
441 Returns the first acceptable candidate revision, as ordered by
445 Returns the first acceptable candidate revision, as ordered by
442 _getcandidaterevs
446 _getcandidaterevs
443 """
447 """
444 cachedelta = revinfo.cachedelta
448 cachedelta = revinfo.cachedelta
445 p1 = revinfo.p1
449 p1 = revinfo.p1
446 p2 = revinfo.p2
450 p2 = revinfo.p2
447 revlog = self.revlog
451 revlog = self.revlog
448
452
449 deltainfo = None
453 deltainfo = None
450 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
454 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
451 nominateddeltas = []
455 nominateddeltas = []
452 for candidaterev in candidaterevs:
456 for candidaterev in candidaterevs:
453 # no delta for rawtext-changing revs (see "candelta" for why)
457 # no delta for rawtext-changing revs (see "candelta" for why)
454 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
458 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
455 continue
459 continue
456 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
460 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
457 if revlog._isgooddeltainfo(candidatedelta, revinfo):
461 if revlog._isgooddeltainfo(candidatedelta, revinfo):
458 nominateddeltas.append(candidatedelta)
462 nominateddeltas.append(candidatedelta)
459 if nominateddeltas:
463 if nominateddeltas:
460 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
464 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
461 break
465 break
462
466
463 return deltainfo
467 return deltainfo
464
468
465 @attr.s(slots=True, frozen=True)
469 @attr.s(slots=True, frozen=True)
466 class _revisioninfo(object):
470 class _revisioninfo(object):
467 """Information about a revision that allows building its fulltext
471 """Information about a revision that allows building its fulltext
468 node: expected hash of the revision
472 node: expected hash of the revision
469 p1, p2: parent revs of the revision
473 p1, p2: parent revs of the revision
470 btext: built text cache consisting of a one-element list
474 btext: built text cache consisting of a one-element list
471 cachedelta: (baserev, uncompressed_delta) or None
475 cachedelta: (baserev, uncompressed_delta) or None
472 flags: flags associated to the revision storage
476 flags: flags associated to the revision storage
473
477
474 One of btext[0] or cachedelta must be set.
478 One of btext[0] or cachedelta must be set.
475 """
479 """
476 node = attr.ib()
480 node = attr.ib()
477 p1 = attr.ib()
481 p1 = attr.ib()
478 p2 = attr.ib()
482 p2 = attr.ib()
479 btext = attr.ib()
483 btext = attr.ib()
480 textlen = attr.ib()
484 textlen = attr.ib()
481 cachedelta = attr.ib()
485 cachedelta = attr.ib()
482 flags = attr.ib()
486 flags = attr.ib()
483
487
484 # index v0:
488 # index v0:
485 # 4 bytes: offset
489 # 4 bytes: offset
486 # 4 bytes: compressed length
490 # 4 bytes: compressed length
487 # 4 bytes: base rev
491 # 4 bytes: base rev
488 # 4 bytes: link rev
492 # 4 bytes: link rev
489 # 20 bytes: parent 1 nodeid
493 # 20 bytes: parent 1 nodeid
490 # 20 bytes: parent 2 nodeid
494 # 20 bytes: parent 2 nodeid
491 # 20 bytes: nodeid
495 # 20 bytes: nodeid
492 indexformatv0 = struct.Struct(">4l20s20s20s")
496 indexformatv0 = struct.Struct(">4l20s20s20s")
493 indexformatv0_pack = indexformatv0.pack
497 indexformatv0_pack = indexformatv0.pack
494 indexformatv0_unpack = indexformatv0.unpack
498 indexformatv0_unpack = indexformatv0.unpack
495
499
496 class revlogoldio(object):
500 class revlogoldio(object):
497 def __init__(self):
501 def __init__(self):
498 self.size = indexformatv0.size
502 self.size = indexformatv0.size
499
503
500 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
501 s = self.size
505 s = self.size
502 index = []
506 index = []
503 nodemap = {nullid: nullrev}
507 nodemap = {nullid: nullrev}
504 n = off = 0
508 n = off = 0
505 l = len(data)
509 l = len(data)
506 while off + s <= l:
510 while off + s <= l:
507 cur = data[off:off + s]
511 cur = data[off:off + s]
508 off += s
512 off += s
509 e = indexformatv0_unpack(cur)
513 e = indexformatv0_unpack(cur)
510 # transform to revlogv1 format
514 # transform to revlogv1 format
511 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
515 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
512 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
516 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
513 index.append(e2)
517 index.append(e2)
514 nodemap[e[6]] = n
518 nodemap[e[6]] = n
515 n += 1
519 n += 1
516
520
517 # add the magic null revision at -1
521 # add the magic null revision at -1
518 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
522 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
519
523
520 return index, nodemap, None
524 return index, nodemap, None
521
525
522 def packentry(self, entry, node, version, rev):
526 def packentry(self, entry, node, version, rev):
523 if gettype(entry[0]):
527 if gettype(entry[0]):
524 raise RevlogError(_('index entry flags need revlog version 1'))
528 raise RevlogError(_('index entry flags need revlog version 1'))
525 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
529 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
526 node(entry[5]), node(entry[6]), entry[7])
530 node(entry[5]), node(entry[6]), entry[7])
527 return indexformatv0_pack(*e2)
531 return indexformatv0_pack(*e2)
528
532
529 # index ng:
533 # index ng:
530 # 6 bytes: offset
534 # 6 bytes: offset
531 # 2 bytes: flags
535 # 2 bytes: flags
532 # 4 bytes: compressed length
536 # 4 bytes: compressed length
533 # 4 bytes: uncompressed length
537 # 4 bytes: uncompressed length
534 # 4 bytes: base rev
538 # 4 bytes: base rev
535 # 4 bytes: link rev
539 # 4 bytes: link rev
536 # 4 bytes: parent 1 rev
540 # 4 bytes: parent 1 rev
537 # 4 bytes: parent 2 rev
541 # 4 bytes: parent 2 rev
538 # 32 bytes: nodeid
542 # 32 bytes: nodeid
539 indexformatng = struct.Struct(">Qiiiiii20s12x")
543 indexformatng = struct.Struct(">Qiiiiii20s12x")
540 indexformatng_pack = indexformatng.pack
544 indexformatng_pack = indexformatng.pack
541 versionformat = struct.Struct(">I")
545 versionformat = struct.Struct(">I")
542 versionformat_pack = versionformat.pack
546 versionformat_pack = versionformat.pack
543 versionformat_unpack = versionformat.unpack
547 versionformat_unpack = versionformat.unpack
544
548
545 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
549 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
546 # signed integer)
550 # signed integer)
547 _maxentrysize = 0x7fffffff
551 _maxentrysize = 0x7fffffff
548
552
549 class revlogio(object):
553 class revlogio(object):
550 def __init__(self):
554 def __init__(self):
551 self.size = indexformatng.size
555 self.size = indexformatng.size
552
556
553 def parseindex(self, data, inline):
557 def parseindex(self, data, inline):
554 # call the C implementation to parse the index data
558 # call the C implementation to parse the index data
555 index, cache = parsers.parse_index2(data, inline)
559 index, cache = parsers.parse_index2(data, inline)
556 return index, getattr(index, 'nodemap', None), cache
560 return index, getattr(index, 'nodemap', None), cache
557
561
558 def packentry(self, entry, node, version, rev):
562 def packentry(self, entry, node, version, rev):
559 p = indexformatng_pack(*entry)
563 p = indexformatng_pack(*entry)
560 if rev == 0:
564 if rev == 0:
561 p = versionformat_pack(version) + p[4:]
565 p = versionformat_pack(version) + p[4:]
562 return p
566 return p
563
567
564 class revlog(object):
568 class revlog(object):
565 """
569 """
566 the underlying revision storage object
570 the underlying revision storage object
567
571
568 A revlog consists of two parts, an index and the revision data.
572 A revlog consists of two parts, an index and the revision data.
569
573
570 The index is a file with a fixed record size containing
574 The index is a file with a fixed record size containing
571 information on each revision, including its nodeid (hash), the
575 information on each revision, including its nodeid (hash), the
572 nodeids of its parents, the position and offset of its data within
576 nodeids of its parents, the position and offset of its data within
573 the data file, and the revision it's based on. Finally, each entry
577 the data file, and the revision it's based on. Finally, each entry
574 contains a linkrev entry that can serve as a pointer to external
578 contains a linkrev entry that can serve as a pointer to external
575 data.
579 data.
576
580
577 The revision data itself is a linear collection of data chunks.
581 The revision data itself is a linear collection of data chunks.
578 Each chunk represents a revision and is usually represented as a
582 Each chunk represents a revision and is usually represented as a
579 delta against the previous chunk. To bound lookup time, runs of
583 delta against the previous chunk. To bound lookup time, runs of
580 deltas are limited to about 2 times the length of the original
584 deltas are limited to about 2 times the length of the original
581 version data. This makes retrieval of a version proportional to
585 version data. This makes retrieval of a version proportional to
582 its size, or O(1) relative to the number of revisions.
586 its size, or O(1) relative to the number of revisions.
583
587
584 Both pieces of the revlog are written to in an append-only
588 Both pieces of the revlog are written to in an append-only
585 fashion, which means we never need to rewrite a file to insert or
589 fashion, which means we never need to rewrite a file to insert or
586 remove data, and can use some simple techniques to avoid the need
590 remove data, and can use some simple techniques to avoid the need
587 for locking while reading.
591 for locking while reading.
588
592
589 If checkambig, indexfile is opened with checkambig=True at
593 If checkambig, indexfile is opened with checkambig=True at
590 writing, to avoid file stat ambiguity.
594 writing, to avoid file stat ambiguity.
591
595
592 If mmaplargeindex is True, and an mmapindexthreshold is set, the
596 If mmaplargeindex is True, and an mmapindexthreshold is set, the
593 index will be mmapped rather than read if it is larger than the
597 index will be mmapped rather than read if it is larger than the
594 configured threshold.
598 configured threshold.
595
599
596 If censorable is True, the revlog can have censored revisions.
600 If censorable is True, the revlog can have censored revisions.
597 """
601 """
598 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
602 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
599 mmaplargeindex=False, censorable=False):
603 mmaplargeindex=False, censorable=False):
600 """
604 """
601 create a revlog object
605 create a revlog object
602
606
603 opener is a function that abstracts the file opening operation
607 opener is a function that abstracts the file opening operation
604 and can be used to implement COW semantics or the like.
608 and can be used to implement COW semantics or the like.
605 """
609 """
606 self.indexfile = indexfile
610 self.indexfile = indexfile
607 self.datafile = datafile or (indexfile[:-2] + ".d")
611 self.datafile = datafile or (indexfile[:-2] + ".d")
608 self.opener = opener
612 self.opener = opener
609 # When True, indexfile is opened with checkambig=True at writing, to
613 # When True, indexfile is opened with checkambig=True at writing, to
610 # avoid file stat ambiguity.
614 # avoid file stat ambiguity.
611 self._checkambig = checkambig
615 self._checkambig = checkambig
612 self._censorable = censorable
616 self._censorable = censorable
613 # 3-tuple of (node, rev, text) for a raw revision.
617 # 3-tuple of (node, rev, text) for a raw revision.
614 self._cache = None
618 self._cache = None
615 # Maps rev to chain base rev.
619 # Maps rev to chain base rev.
616 self._chainbasecache = util.lrucachedict(100)
620 self._chainbasecache = util.lrucachedict(100)
617 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
621 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
618 self._chunkcache = (0, '')
622 self._chunkcache = (0, '')
619 # How much data to read and cache into the raw revlog data cache.
623 # How much data to read and cache into the raw revlog data cache.
620 self._chunkcachesize = 65536
624 self._chunkcachesize = 65536
621 self._maxchainlen = None
625 self._maxchainlen = None
622 self._aggressivemergedeltas = True
626 self._aggressivemergedeltas = True
623 self.index = []
627 self.index = []
624 # Mapping of partial identifiers to full nodes.
628 # Mapping of partial identifiers to full nodes.
625 self._pcache = {}
629 self._pcache = {}
626 # Mapping of revision integer to full node.
630 # Mapping of revision integer to full node.
627 self._nodecache = {nullid: nullrev}
631 self._nodecache = {nullid: nullrev}
628 self._nodepos = None
632 self._nodepos = None
629 self._compengine = 'zlib'
633 self._compengine = 'zlib'
630 self._maxdeltachainspan = -1
634 self._maxdeltachainspan = -1
631 self._withsparseread = False
635 self._withsparseread = False
632 self._srdensitythreshold = 0.50
636 self._srdensitythreshold = 0.50
633 self._srmingapsize = 262144
637 self._srmingapsize = 262144
634
638
635 mmapindexthreshold = None
639 mmapindexthreshold = None
636 v = REVLOG_DEFAULT_VERSION
640 v = REVLOG_DEFAULT_VERSION
637 opts = getattr(opener, 'options', None)
641 opts = getattr(opener, 'options', None)
638 if opts is not None:
642 if opts is not None:
639 if 'revlogv2' in opts:
643 if 'revlogv2' in opts:
640 # version 2 revlogs always use generaldelta.
644 # version 2 revlogs always use generaldelta.
641 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
645 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
642 elif 'revlogv1' in opts:
646 elif 'revlogv1' in opts:
643 if 'generaldelta' in opts:
647 if 'generaldelta' in opts:
644 v |= FLAG_GENERALDELTA
648 v |= FLAG_GENERALDELTA
645 else:
649 else:
646 v = 0
650 v = 0
647 if 'chunkcachesize' in opts:
651 if 'chunkcachesize' in opts:
648 self._chunkcachesize = opts['chunkcachesize']
652 self._chunkcachesize = opts['chunkcachesize']
649 if 'maxchainlen' in opts:
653 if 'maxchainlen' in opts:
650 self._maxchainlen = opts['maxchainlen']
654 self._maxchainlen = opts['maxchainlen']
651 if 'aggressivemergedeltas' in opts:
655 if 'aggressivemergedeltas' in opts:
652 self._aggressivemergedeltas = opts['aggressivemergedeltas']
656 self._aggressivemergedeltas = opts['aggressivemergedeltas']
653 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
657 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
654 if 'compengine' in opts:
658 if 'compengine' in opts:
655 self._compengine = opts['compengine']
659 self._compengine = opts['compengine']
656 if 'maxdeltachainspan' in opts:
660 if 'maxdeltachainspan' in opts:
657 self._maxdeltachainspan = opts['maxdeltachainspan']
661 self._maxdeltachainspan = opts['maxdeltachainspan']
658 if mmaplargeindex and 'mmapindexthreshold' in opts:
662 if mmaplargeindex and 'mmapindexthreshold' in opts:
659 mmapindexthreshold = opts['mmapindexthreshold']
663 mmapindexthreshold = opts['mmapindexthreshold']
660 self._withsparseread = bool(opts.get('with-sparse-read', False))
664 self._withsparseread = bool(opts.get('with-sparse-read', False))
661 if 'sparse-read-density-threshold' in opts:
665 if 'sparse-read-density-threshold' in opts:
662 self._srdensitythreshold = opts['sparse-read-density-threshold']
666 self._srdensitythreshold = opts['sparse-read-density-threshold']
663 if 'sparse-read-min-gap-size' in opts:
667 if 'sparse-read-min-gap-size' in opts:
664 self._srmingapsize = opts['sparse-read-min-gap-size']
668 self._srmingapsize = opts['sparse-read-min-gap-size']
665
669
666 if self._chunkcachesize <= 0:
670 if self._chunkcachesize <= 0:
667 raise RevlogError(_('revlog chunk cache size %r is not greater '
671 raise RevlogError(_('revlog chunk cache size %r is not greater '
668 'than 0') % self._chunkcachesize)
672 'than 0') % self._chunkcachesize)
669 elif self._chunkcachesize & (self._chunkcachesize - 1):
673 elif self._chunkcachesize & (self._chunkcachesize - 1):
670 raise RevlogError(_('revlog chunk cache size %r is not a power '
674 raise RevlogError(_('revlog chunk cache size %r is not a power '
671 'of 2') % self._chunkcachesize)
675 'of 2') % self._chunkcachesize)
672
676
673 indexdata = ''
677 indexdata = ''
674 self._initempty = True
678 self._initempty = True
675 try:
679 try:
676 with self._indexfp() as f:
680 with self._indexfp() as f:
677 if (mmapindexthreshold is not None and
681 if (mmapindexthreshold is not None and
678 self.opener.fstat(f).st_size >= mmapindexthreshold):
682 self.opener.fstat(f).st_size >= mmapindexthreshold):
679 indexdata = util.buffer(util.mmapread(f))
683 indexdata = util.buffer(util.mmapread(f))
680 else:
684 else:
681 indexdata = f.read()
685 indexdata = f.read()
682 if len(indexdata) > 0:
686 if len(indexdata) > 0:
683 v = versionformat_unpack(indexdata[:4])[0]
687 v = versionformat_unpack(indexdata[:4])[0]
684 self._initempty = False
688 self._initempty = False
685 except IOError as inst:
689 except IOError as inst:
686 if inst.errno != errno.ENOENT:
690 if inst.errno != errno.ENOENT:
687 raise
691 raise
688
692
689 self.version = v
693 self.version = v
690 self._inline = v & FLAG_INLINE_DATA
694 self._inline = v & FLAG_INLINE_DATA
691 self._generaldelta = v & FLAG_GENERALDELTA
695 self._generaldelta = v & FLAG_GENERALDELTA
692 flags = v & ~0xFFFF
696 flags = v & ~0xFFFF
693 fmt = v & 0xFFFF
697 fmt = v & 0xFFFF
694 if fmt == REVLOGV0:
698 if fmt == REVLOGV0:
695 if flags:
699 if flags:
696 raise RevlogError(_('unknown flags (%#04x) in version %d '
700 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 'revlog %s') %
701 'revlog %s') %
698 (flags >> 16, fmt, self.indexfile))
702 (flags >> 16, fmt, self.indexfile))
699 elif fmt == REVLOGV1:
703 elif fmt == REVLOGV1:
700 if flags & ~REVLOGV1_FLAGS:
704 if flags & ~REVLOGV1_FLAGS:
701 raise RevlogError(_('unknown flags (%#04x) in version %d '
705 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 'revlog %s') %
706 'revlog %s') %
703 (flags >> 16, fmt, self.indexfile))
707 (flags >> 16, fmt, self.indexfile))
704 elif fmt == REVLOGV2:
708 elif fmt == REVLOGV2:
705 if flags & ~REVLOGV2_FLAGS:
709 if flags & ~REVLOGV2_FLAGS:
706 raise RevlogError(_('unknown flags (%#04x) in version %d '
710 raise RevlogError(_('unknown flags (%#04x) in version %d '
707 'revlog %s') %
711 'revlog %s') %
708 (flags >> 16, fmt, self.indexfile))
712 (flags >> 16, fmt, self.indexfile))
709 else:
713 else:
710 raise RevlogError(_('unknown version (%d) in revlog %s') %
714 raise RevlogError(_('unknown version (%d) in revlog %s') %
711 (fmt, self.indexfile))
715 (fmt, self.indexfile))
712
716
713 self.storedeltachains = True
717 self.storedeltachains = True
714
718
715 self._io = revlogio()
719 self._io = revlogio()
716 if self.version == REVLOGV0:
720 if self.version == REVLOGV0:
717 self._io = revlogoldio()
721 self._io = revlogoldio()
718 try:
722 try:
719 d = self._io.parseindex(indexdata, self._inline)
723 d = self._io.parseindex(indexdata, self._inline)
720 except (ValueError, IndexError):
724 except (ValueError, IndexError):
721 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
725 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
722 self.index, nodemap, self._chunkcache = d
726 self.index, nodemap, self._chunkcache = d
723 if nodemap is not None:
727 if nodemap is not None:
724 self.nodemap = self._nodecache = nodemap
728 self.nodemap = self._nodecache = nodemap
725 if not self._chunkcache:
729 if not self._chunkcache:
726 self._chunkclear()
730 self._chunkclear()
727 # revnum -> (chain-length, sum-delta-length)
731 # revnum -> (chain-length, sum-delta-length)
728 self._chaininfocache = {}
732 self._chaininfocache = {}
729 # revlog header -> revlog compressor
733 # revlog header -> revlog compressor
730 self._decompressors = {}
734 self._decompressors = {}
731
735
732 @util.propertycache
736 @util.propertycache
733 def _compressor(self):
737 def _compressor(self):
734 return util.compengines[self._compengine].revlogcompressor()
738 return util.compengines[self._compengine].revlogcompressor()
735
739
736 def _indexfp(self, mode='r'):
740 def _indexfp(self, mode='r'):
737 """file object for the revlog's index file"""
741 """file object for the revlog's index file"""
738 args = {r'mode': mode}
742 args = {r'mode': mode}
739 if mode != 'r':
743 if mode != 'r':
740 args[r'checkambig'] = self._checkambig
744 args[r'checkambig'] = self._checkambig
741 if mode == 'w':
745 if mode == 'w':
742 args[r'atomictemp'] = True
746 args[r'atomictemp'] = True
743 return self.opener(self.indexfile, **args)
747 return self.opener(self.indexfile, **args)
744
748
745 def _datafp(self, mode='r'):
749 def _datafp(self, mode='r'):
746 """file object for the revlog's data file"""
750 """file object for the revlog's data file"""
747 return self.opener(self.datafile, mode=mode)
751 return self.opener(self.datafile, mode=mode)
748
752
749 @contextlib.contextmanager
753 @contextlib.contextmanager
750 def _datareadfp(self, existingfp=None):
754 def _datareadfp(self, existingfp=None):
751 """file object suitable to read data"""
755 """file object suitable to read data"""
752 if existingfp is not None:
756 if existingfp is not None:
753 yield existingfp
757 yield existingfp
754 else:
758 else:
755 if self._inline:
759 if self._inline:
756 func = self._indexfp
760 func = self._indexfp
757 else:
761 else:
758 func = self._datafp
762 func = self._datafp
759 with func() as fp:
763 with func() as fp:
760 yield fp
764 yield fp
761
765
762 def tip(self):
766 def tip(self):
763 return self.node(len(self.index) - 2)
767 return self.node(len(self.index) - 2)
764 def __contains__(self, rev):
768 def __contains__(self, rev):
765 return 0 <= rev < len(self)
769 return 0 <= rev < len(self)
766 def __len__(self):
770 def __len__(self):
767 return len(self.index) - 1
771 return len(self.index) - 1
768 def __iter__(self):
772 def __iter__(self):
769 return iter(xrange(len(self)))
773 return iter(xrange(len(self)))
770 def revs(self, start=0, stop=None):
774 def revs(self, start=0, stop=None):
771 """iterate over all rev in this revlog (from start to stop)"""
775 """iterate over all rev in this revlog (from start to stop)"""
772 step = 1
776 step = 1
773 if stop is not None:
777 if stop is not None:
774 if start > stop:
778 if start > stop:
775 step = -1
779 step = -1
776 stop += step
780 stop += step
777 else:
781 else:
778 stop = len(self)
782 stop = len(self)
779 return xrange(start, stop, step)
783 return xrange(start, stop, step)
780
784
781 @util.propertycache
785 @util.propertycache
782 def nodemap(self):
786 def nodemap(self):
783 self.rev(self.node(0))
787 self.rev(self.node(0))
784 return self._nodecache
788 return self._nodecache
785
789
786 def hasnode(self, node):
790 def hasnode(self, node):
787 try:
791 try:
788 self.rev(node)
792 self.rev(node)
789 return True
793 return True
790 except KeyError:
794 except KeyError:
791 return False
795 return False
792
796
793 def candelta(self, baserev, rev):
797 def candelta(self, baserev, rev):
794 """whether two revisions (baserev, rev) can be delta-ed or not"""
798 """whether two revisions (baserev, rev) can be delta-ed or not"""
795 # Disable delta if either rev requires a content-changing flag
799 # Disable delta if either rev requires a content-changing flag
796 # processor (ex. LFS). This is because such flag processor can alter
800 # processor (ex. LFS). This is because such flag processor can alter
797 # the rawtext content that the delta will be based on, and two clients
801 # the rawtext content that the delta will be based on, and two clients
798 # could have a same revlog node with different flags (i.e. different
802 # could have a same revlog node with different flags (i.e. different
799 # rawtext contents) and the delta could be incompatible.
803 # rawtext contents) and the delta could be incompatible.
800 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
804 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
801 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
805 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
802 return False
806 return False
803 return True
807 return True
804
808
805 def clearcaches(self):
809 def clearcaches(self):
806 self._cache = None
810 self._cache = None
807 self._chainbasecache.clear()
811 self._chainbasecache.clear()
808 self._chunkcache = (0, '')
812 self._chunkcache = (0, '')
809 self._pcache = {}
813 self._pcache = {}
810
814
811 try:
815 try:
812 self._nodecache.clearcaches()
816 self._nodecache.clearcaches()
813 except AttributeError:
817 except AttributeError:
814 self._nodecache = {nullid: nullrev}
818 self._nodecache = {nullid: nullrev}
815 self._nodepos = None
819 self._nodepos = None
816
820
817 def rev(self, node):
821 def rev(self, node):
818 try:
822 try:
819 return self._nodecache[node]
823 return self._nodecache[node]
820 except TypeError:
824 except TypeError:
821 raise
825 raise
822 except RevlogError:
826 except RevlogError:
823 # parsers.c radix tree lookup failed
827 # parsers.c radix tree lookup failed
824 if node == wdirid or node in wdirfilenodeids:
828 if node == wdirid or node in wdirfilenodeids:
825 raise error.WdirUnsupported
829 raise error.WdirUnsupported
826 raise LookupError(node, self.indexfile, _('no node'))
830 raise LookupError(node, self.indexfile, _('no node'))
827 except KeyError:
831 except KeyError:
828 # pure python cache lookup failed
832 # pure python cache lookup failed
829 n = self._nodecache
833 n = self._nodecache
830 i = self.index
834 i = self.index
831 p = self._nodepos
835 p = self._nodepos
832 if p is None:
836 if p is None:
833 p = len(i) - 2
837 p = len(i) - 2
834 else:
838 else:
835 assert p < len(i)
839 assert p < len(i)
836 for r in xrange(p, -1, -1):
840 for r in xrange(p, -1, -1):
837 v = i[r][7]
841 v = i[r][7]
838 n[v] = r
842 n[v] = r
839 if v == node:
843 if v == node:
840 self._nodepos = r - 1
844 self._nodepos = r - 1
841 return r
845 return r
842 if node == wdirid or node in wdirfilenodeids:
846 if node == wdirid or node in wdirfilenodeids:
843 raise error.WdirUnsupported
847 raise error.WdirUnsupported
844 raise LookupError(node, self.indexfile, _('no node'))
848 raise LookupError(node, self.indexfile, _('no node'))
845
849
846 # Accessors for index entries.
850 # Accessors for index entries.
847
851
848 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
852 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
849 # are flags.
853 # are flags.
850 def start(self, rev):
854 def start(self, rev):
851 return int(self.index[rev][0] >> 16)
855 return int(self.index[rev][0] >> 16)
852
856
853 def flags(self, rev):
857 def flags(self, rev):
854 return self.index[rev][0] & 0xFFFF
858 return self.index[rev][0] & 0xFFFF
855
859
856 def length(self, rev):
860 def length(self, rev):
857 return self.index[rev][1]
861 return self.index[rev][1]
858
862
859 def rawsize(self, rev):
863 def rawsize(self, rev):
860 """return the length of the uncompressed text for a given revision"""
864 """return the length of the uncompressed text for a given revision"""
861 l = self.index[rev][2]
865 l = self.index[rev][2]
862 if l >= 0:
866 if l >= 0:
863 return l
867 return l
864
868
865 t = self.revision(rev, raw=True)
869 t = self.revision(rev, raw=True)
866 return len(t)
870 return len(t)
867
871
868 def size(self, rev):
872 def size(self, rev):
869 """length of non-raw text (processed by a "read" flag processor)"""
873 """length of non-raw text (processed by a "read" flag processor)"""
870 # fast path: if no "read" flag processor could change the content,
874 # fast path: if no "read" flag processor could change the content,
871 # size is rawsize. note: ELLIPSIS is known to not change the content.
875 # size is rawsize. note: ELLIPSIS is known to not change the content.
872 flags = self.flags(rev)
876 flags = self.flags(rev)
873 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
877 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
874 return self.rawsize(rev)
878 return self.rawsize(rev)
875
879
876 return len(self.revision(rev, raw=False))
880 return len(self.revision(rev, raw=False))
877
881
878 def chainbase(self, rev):
882 def chainbase(self, rev):
879 base = self._chainbasecache.get(rev)
883 base = self._chainbasecache.get(rev)
880 if base is not None:
884 if base is not None:
881 return base
885 return base
882
886
883 index = self.index
887 index = self.index
884 iterrev = rev
888 iterrev = rev
885 base = index[iterrev][3]
889 base = index[iterrev][3]
886 while base != iterrev:
890 while base != iterrev:
887 iterrev = base
891 iterrev = base
888 base = index[iterrev][3]
892 base = index[iterrev][3]
889
893
890 self._chainbasecache[rev] = base
894 self._chainbasecache[rev] = base
891 return base
895 return base
892
896
893 def linkrev(self, rev):
897 def linkrev(self, rev):
894 return self.index[rev][4]
898 return self.index[rev][4]
895
899
896 def parentrevs(self, rev):
900 def parentrevs(self, rev):
897 try:
901 try:
898 entry = self.index[rev]
902 entry = self.index[rev]
899 except IndexError:
903 except IndexError:
900 if rev == wdirrev:
904 if rev == wdirrev:
901 raise error.WdirUnsupported
905 raise error.WdirUnsupported
902 raise
906 raise
903
907
904 return entry[5], entry[6]
908 return entry[5], entry[6]
905
909
906 def node(self, rev):
910 def node(self, rev):
907 try:
911 try:
908 return self.index[rev][7]
912 return self.index[rev][7]
909 except IndexError:
913 except IndexError:
910 if rev == wdirrev:
914 if rev == wdirrev:
911 raise error.WdirUnsupported
915 raise error.WdirUnsupported
912 raise
916 raise
913
917
914 # Derived from index values.
918 # Derived from index values.
915
919
916 def end(self, rev):
920 def end(self, rev):
917 return self.start(rev) + self.length(rev)
921 return self.start(rev) + self.length(rev)
918
922
919 def parents(self, node):
923 def parents(self, node):
920 i = self.index
924 i = self.index
921 d = i[self.rev(node)]
925 d = i[self.rev(node)]
922 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
926 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
923
927
924 def chainlen(self, rev):
928 def chainlen(self, rev):
925 return self._chaininfo(rev)[0]
929 return self._chaininfo(rev)[0]
926
930
927 def _chaininfo(self, rev):
931 def _chaininfo(self, rev):
928 chaininfocache = self._chaininfocache
932 chaininfocache = self._chaininfocache
929 if rev in chaininfocache:
933 if rev in chaininfocache:
930 return chaininfocache[rev]
934 return chaininfocache[rev]
931 index = self.index
935 index = self.index
932 generaldelta = self._generaldelta
936 generaldelta = self._generaldelta
933 iterrev = rev
937 iterrev = rev
934 e = index[iterrev]
938 e = index[iterrev]
935 clen = 0
939 clen = 0
936 compresseddeltalen = 0
940 compresseddeltalen = 0
937 while iterrev != e[3]:
941 while iterrev != e[3]:
938 clen += 1
942 clen += 1
939 compresseddeltalen += e[1]
943 compresseddeltalen += e[1]
940 if generaldelta:
944 if generaldelta:
941 iterrev = e[3]
945 iterrev = e[3]
942 else:
946 else:
943 iterrev -= 1
947 iterrev -= 1
944 if iterrev in chaininfocache:
948 if iterrev in chaininfocache:
945 t = chaininfocache[iterrev]
949 t = chaininfocache[iterrev]
946 clen += t[0]
950 clen += t[0]
947 compresseddeltalen += t[1]
951 compresseddeltalen += t[1]
948 break
952 break
949 e = index[iterrev]
953 e = index[iterrev]
950 else:
954 else:
951 # Add text length of base since decompressing that also takes
955 # Add text length of base since decompressing that also takes
952 # work. For cache hits the length is already included.
956 # work. For cache hits the length is already included.
953 compresseddeltalen += e[1]
957 compresseddeltalen += e[1]
954 r = (clen, compresseddeltalen)
958 r = (clen, compresseddeltalen)
955 chaininfocache[rev] = r
959 chaininfocache[rev] = r
956 return r
960 return r
957
961
958 def _deltachain(self, rev, stoprev=None):
962 def _deltachain(self, rev, stoprev=None):
959 """Obtain the delta chain for a revision.
963 """Obtain the delta chain for a revision.
960
964
961 ``stoprev`` specifies a revision to stop at. If not specified, we
965 ``stoprev`` specifies a revision to stop at. If not specified, we
962 stop at the base of the chain.
966 stop at the base of the chain.
963
967
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
968 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 revs in ascending order and ``stopped`` is a bool indicating whether
969 revs in ascending order and ``stopped`` is a bool indicating whether
966 ``stoprev`` was hit.
970 ``stoprev`` was hit.
967 """
971 """
968 # Try C implementation.
972 # Try C implementation.
969 try:
973 try:
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
974 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 except AttributeError:
975 except AttributeError:
972 pass
976 pass
973
977
974 chain = []
978 chain = []
975
979
976 # Alias to prevent attribute lookup in tight loop.
980 # Alias to prevent attribute lookup in tight loop.
977 index = self.index
981 index = self.index
978 generaldelta = self._generaldelta
982 generaldelta = self._generaldelta
979
983
980 iterrev = rev
984 iterrev = rev
981 e = index[iterrev]
985 e = index[iterrev]
982 while iterrev != e[3] and iterrev != stoprev:
986 while iterrev != e[3] and iterrev != stoprev:
983 chain.append(iterrev)
987 chain.append(iterrev)
984 if generaldelta:
988 if generaldelta:
985 iterrev = e[3]
989 iterrev = e[3]
986 else:
990 else:
987 iterrev -= 1
991 iterrev -= 1
988 e = index[iterrev]
992 e = index[iterrev]
989
993
990 if iterrev == stoprev:
994 if iterrev == stoprev:
991 stopped = True
995 stopped = True
992 else:
996 else:
993 chain.append(iterrev)
997 chain.append(iterrev)
994 stopped = False
998 stopped = False
995
999
996 chain.reverse()
1000 chain.reverse()
997 return chain, stopped
1001 return chain, stopped
998
1002
999 def ancestors(self, revs, stoprev=0, inclusive=False):
1003 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 """Generate the ancestors of 'revs' in reverse topological order.
1004 """Generate the ancestors of 'revs' in reverse topological order.
1001 Does not generate revs lower than stoprev.
1005 Does not generate revs lower than stoprev.
1002
1006
1003 See the documentation for ancestor.lazyancestors for more details."""
1007 See the documentation for ancestor.lazyancestors for more details."""
1004
1008
1005 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1009 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1006 inclusive=inclusive)
1010 inclusive=inclusive)
1007
1011
1008 def descendants(self, revs):
1012 def descendants(self, revs):
1009 """Generate the descendants of 'revs' in revision order.
1013 """Generate the descendants of 'revs' in revision order.
1010
1014
1011 Yield a sequence of revision numbers starting with a child of
1015 Yield a sequence of revision numbers starting with a child of
1012 some rev in revs, i.e., each revision is *not* considered a
1016 some rev in revs, i.e., each revision is *not* considered a
1013 descendant of itself. Results are ordered by revision number (a
1017 descendant of itself. Results are ordered by revision number (a
1014 topological sort)."""
1018 topological sort)."""
1015 first = min(revs)
1019 first = min(revs)
1016 if first == nullrev:
1020 if first == nullrev:
1017 for i in self:
1021 for i in self:
1018 yield i
1022 yield i
1019 return
1023 return
1020
1024
1021 seen = set(revs)
1025 seen = set(revs)
1022 for i in self.revs(start=first + 1):
1026 for i in self.revs(start=first + 1):
1023 for x in self.parentrevs(i):
1027 for x in self.parentrevs(i):
1024 if x != nullrev and x in seen:
1028 if x != nullrev and x in seen:
1025 seen.add(i)
1029 seen.add(i)
1026 yield i
1030 yield i
1027 break
1031 break
1028
1032
1029 def findcommonmissing(self, common=None, heads=None):
1033 def findcommonmissing(self, common=None, heads=None):
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1034 """Return a tuple of the ancestors of common and the ancestors of heads
1031 that are not ancestors of common. In revset terminology, we return the
1035 that are not ancestors of common. In revset terminology, we return the
1032 tuple:
1036 tuple:
1033
1037
1034 ::common, (::heads) - (::common)
1038 ::common, (::heads) - (::common)
1035
1039
1036 The list is sorted by revision number, meaning it is
1040 The list is sorted by revision number, meaning it is
1037 topologically sorted.
1041 topologically sorted.
1038
1042
1039 'heads' and 'common' are both lists of node IDs. If heads is
1043 'heads' and 'common' are both lists of node IDs. If heads is
1040 not supplied, uses all of the revlog's heads. If common is not
1044 not supplied, uses all of the revlog's heads. If common is not
1041 supplied, uses nullid."""
1045 supplied, uses nullid."""
1042 if common is None:
1046 if common is None:
1043 common = [nullid]
1047 common = [nullid]
1044 if heads is None:
1048 if heads is None:
1045 heads = self.heads()
1049 heads = self.heads()
1046
1050
1047 common = [self.rev(n) for n in common]
1051 common = [self.rev(n) for n in common]
1048 heads = [self.rev(n) for n in heads]
1052 heads = [self.rev(n) for n in heads]
1049
1053
1050 # we want the ancestors, but inclusive
1054 # we want the ancestors, but inclusive
1051 class lazyset(object):
1055 class lazyset(object):
1052 def __init__(self, lazyvalues):
1056 def __init__(self, lazyvalues):
1053 self.addedvalues = set()
1057 self.addedvalues = set()
1054 self.lazyvalues = lazyvalues
1058 self.lazyvalues = lazyvalues
1055
1059
1056 def __contains__(self, value):
1060 def __contains__(self, value):
1057 return value in self.addedvalues or value in self.lazyvalues
1061 return value in self.addedvalues or value in self.lazyvalues
1058
1062
1059 def __iter__(self):
1063 def __iter__(self):
1060 added = self.addedvalues
1064 added = self.addedvalues
1061 for r in added:
1065 for r in added:
1062 yield r
1066 yield r
1063 for r in self.lazyvalues:
1067 for r in self.lazyvalues:
1064 if not r in added:
1068 if not r in added:
1065 yield r
1069 yield r
1066
1070
1067 def add(self, value):
1071 def add(self, value):
1068 self.addedvalues.add(value)
1072 self.addedvalues.add(value)
1069
1073
1070 def update(self, values):
1074 def update(self, values):
1071 self.addedvalues.update(values)
1075 self.addedvalues.update(values)
1072
1076
1073 has = lazyset(self.ancestors(common))
1077 has = lazyset(self.ancestors(common))
1074 has.add(nullrev)
1078 has.add(nullrev)
1075 has.update(common)
1079 has.update(common)
1076
1080
1077 # take all ancestors from heads that aren't in has
1081 # take all ancestors from heads that aren't in has
1078 missing = set()
1082 missing = set()
1079 visit = collections.deque(r for r in heads if r not in has)
1083 visit = collections.deque(r for r in heads if r not in has)
1080 while visit:
1084 while visit:
1081 r = visit.popleft()
1085 r = visit.popleft()
1082 if r in missing:
1086 if r in missing:
1083 continue
1087 continue
1084 else:
1088 else:
1085 missing.add(r)
1089 missing.add(r)
1086 for p in self.parentrevs(r):
1090 for p in self.parentrevs(r):
1087 if p not in has:
1091 if p not in has:
1088 visit.append(p)
1092 visit.append(p)
1089 missing = list(missing)
1093 missing = list(missing)
1090 missing.sort()
1094 missing.sort()
1091 return has, [self.node(miss) for miss in missing]
1095 return has, [self.node(miss) for miss in missing]
1092
1096
1093 def incrementalmissingrevs(self, common=None):
1097 def incrementalmissingrevs(self, common=None):
1094 """Return an object that can be used to incrementally compute the
1098 """Return an object that can be used to incrementally compute the
1095 revision numbers of the ancestors of arbitrary sets that are not
1099 revision numbers of the ancestors of arbitrary sets that are not
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1100 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 object.
1101 object.
1098
1102
1099 'common' is a list of revision numbers. If common is not supplied, uses
1103 'common' is a list of revision numbers. If common is not supplied, uses
1100 nullrev.
1104 nullrev.
1101 """
1105 """
1102 if common is None:
1106 if common is None:
1103 common = [nullrev]
1107 common = [nullrev]
1104
1108
1105 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1109 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1106
1110
1107 def findmissingrevs(self, common=None, heads=None):
1111 def findmissingrevs(self, common=None, heads=None):
1108 """Return the revision numbers of the ancestors of heads that
1112 """Return the revision numbers of the ancestors of heads that
1109 are not ancestors of common.
1113 are not ancestors of common.
1110
1114
1111 More specifically, return a list of revision numbers corresponding to
1115 More specifically, return a list of revision numbers corresponding to
1112 nodes N such that every N satisfies the following constraints:
1116 nodes N such that every N satisfies the following constraints:
1113
1117
1114 1. N is an ancestor of some node in 'heads'
1118 1. N is an ancestor of some node in 'heads'
1115 2. N is not an ancestor of any node in 'common'
1119 2. N is not an ancestor of any node in 'common'
1116
1120
1117 The list is sorted by revision number, meaning it is
1121 The list is sorted by revision number, meaning it is
1118 topologically sorted.
1122 topologically sorted.
1119
1123
1120 'heads' and 'common' are both lists of revision numbers. If heads is
1124 'heads' and 'common' are both lists of revision numbers. If heads is
1121 not supplied, uses all of the revlog's heads. If common is not
1125 not supplied, uses all of the revlog's heads. If common is not
1122 supplied, uses nullid."""
1126 supplied, uses nullid."""
1123 if common is None:
1127 if common is None:
1124 common = [nullrev]
1128 common = [nullrev]
1125 if heads is None:
1129 if heads is None:
1126 heads = self.headrevs()
1130 heads = self.headrevs()
1127
1131
1128 inc = self.incrementalmissingrevs(common=common)
1132 inc = self.incrementalmissingrevs(common=common)
1129 return inc.missingancestors(heads)
1133 return inc.missingancestors(heads)
1130
1134
1131 def findmissing(self, common=None, heads=None):
1135 def findmissing(self, common=None, heads=None):
1132 """Return the ancestors of heads that are not ancestors of common.
1136 """Return the ancestors of heads that are not ancestors of common.
1133
1137
1134 More specifically, return a list of nodes N such that every N
1138 More specifically, return a list of nodes N such that every N
1135 satisfies the following constraints:
1139 satisfies the following constraints:
1136
1140
1137 1. N is an ancestor of some node in 'heads'
1141 1. N is an ancestor of some node in 'heads'
1138 2. N is not an ancestor of any node in 'common'
1142 2. N is not an ancestor of any node in 'common'
1139
1143
1140 The list is sorted by revision number, meaning it is
1144 The list is sorted by revision number, meaning it is
1141 topologically sorted.
1145 topologically sorted.
1142
1146
1143 'heads' and 'common' are both lists of node IDs. If heads is
1147 'heads' and 'common' are both lists of node IDs. If heads is
1144 not supplied, uses all of the revlog's heads. If common is not
1148 not supplied, uses all of the revlog's heads. If common is not
1145 supplied, uses nullid."""
1149 supplied, uses nullid."""
1146 if common is None:
1150 if common is None:
1147 common = [nullid]
1151 common = [nullid]
1148 if heads is None:
1152 if heads is None:
1149 heads = self.heads()
1153 heads = self.heads()
1150
1154
1151 common = [self.rev(n) for n in common]
1155 common = [self.rev(n) for n in common]
1152 heads = [self.rev(n) for n in heads]
1156 heads = [self.rev(n) for n in heads]
1153
1157
1154 inc = self.incrementalmissingrevs(common=common)
1158 inc = self.incrementalmissingrevs(common=common)
1155 return [self.node(r) for r in inc.missingancestors(heads)]
1159 return [self.node(r) for r in inc.missingancestors(heads)]
1156
1160
1157 def nodesbetween(self, roots=None, heads=None):
1161 def nodesbetween(self, roots=None, heads=None):
1158 """Return a topological path from 'roots' to 'heads'.
1162 """Return a topological path from 'roots' to 'heads'.
1159
1163
1160 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1164 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1161 topologically sorted list of all nodes N that satisfy both of
1165 topologically sorted list of all nodes N that satisfy both of
1162 these constraints:
1166 these constraints:
1163
1167
1164 1. N is a descendant of some node in 'roots'
1168 1. N is a descendant of some node in 'roots'
1165 2. N is an ancestor of some node in 'heads'
1169 2. N is an ancestor of some node in 'heads'
1166
1170
1167 Every node is considered to be both a descendant and an ancestor
1171 Every node is considered to be both a descendant and an ancestor
1168 of itself, so every reachable node in 'roots' and 'heads' will be
1172 of itself, so every reachable node in 'roots' and 'heads' will be
1169 included in 'nodes'.
1173 included in 'nodes'.
1170
1174
1171 'outroots' is the list of reachable nodes in 'roots', i.e., the
1175 'outroots' is the list of reachable nodes in 'roots', i.e., the
1172 subset of 'roots' that is returned in 'nodes'. Likewise,
1176 subset of 'roots' that is returned in 'nodes'. Likewise,
1173 'outheads' is the subset of 'heads' that is also in 'nodes'.
1177 'outheads' is the subset of 'heads' that is also in 'nodes'.
1174
1178
1175 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1179 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1176 unspecified, uses nullid as the only root. If 'heads' is
1180 unspecified, uses nullid as the only root. If 'heads' is
1177 unspecified, uses list of all of the revlog's heads."""
1181 unspecified, uses list of all of the revlog's heads."""
1178 nonodes = ([], [], [])
1182 nonodes = ([], [], [])
1179 if roots is not None:
1183 if roots is not None:
1180 roots = list(roots)
1184 roots = list(roots)
1181 if not roots:
1185 if not roots:
1182 return nonodes
1186 return nonodes
1183 lowestrev = min([self.rev(n) for n in roots])
1187 lowestrev = min([self.rev(n) for n in roots])
1184 else:
1188 else:
1185 roots = [nullid] # Everybody's a descendant of nullid
1189 roots = [nullid] # Everybody's a descendant of nullid
1186 lowestrev = nullrev
1190 lowestrev = nullrev
1187 if (lowestrev == nullrev) and (heads is None):
1191 if (lowestrev == nullrev) and (heads is None):
1188 # We want _all_ the nodes!
1192 # We want _all_ the nodes!
1189 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1193 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1190 if heads is None:
1194 if heads is None:
1191 # All nodes are ancestors, so the latest ancestor is the last
1195 # All nodes are ancestors, so the latest ancestor is the last
1192 # node.
1196 # node.
1193 highestrev = len(self) - 1
1197 highestrev = len(self) - 1
1194 # Set ancestors to None to signal that every node is an ancestor.
1198 # Set ancestors to None to signal that every node is an ancestor.
1195 ancestors = None
1199 ancestors = None
1196 # Set heads to an empty dictionary for later discovery of heads
1200 # Set heads to an empty dictionary for later discovery of heads
1197 heads = {}
1201 heads = {}
1198 else:
1202 else:
1199 heads = list(heads)
1203 heads = list(heads)
1200 if not heads:
1204 if not heads:
1201 return nonodes
1205 return nonodes
1202 ancestors = set()
1206 ancestors = set()
1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 # Turn heads into a dictionary so we can remove 'fake' heads.
1204 # Also, later we will be using it to filter out the heads we can't
1208 # Also, later we will be using it to filter out the heads we can't
1205 # find from roots.
1209 # find from roots.
1206 heads = dict.fromkeys(heads, False)
1210 heads = dict.fromkeys(heads, False)
1207 # Start at the top and keep marking parents until we're done.
1211 # Start at the top and keep marking parents until we're done.
1208 nodestotag = set(heads)
1212 nodestotag = set(heads)
1209 # Remember where the top was so we can use it as a limit later.
1213 # Remember where the top was so we can use it as a limit later.
1210 highestrev = max([self.rev(n) for n in nodestotag])
1214 highestrev = max([self.rev(n) for n in nodestotag])
1211 while nodestotag:
1215 while nodestotag:
1212 # grab a node to tag
1216 # grab a node to tag
1213 n = nodestotag.pop()
1217 n = nodestotag.pop()
1214 # Never tag nullid
1218 # Never tag nullid
1215 if n == nullid:
1219 if n == nullid:
1216 continue
1220 continue
1217 # A node's revision number represents its place in a
1221 # A node's revision number represents its place in a
1218 # topologically sorted list of nodes.
1222 # topologically sorted list of nodes.
1219 r = self.rev(n)
1223 r = self.rev(n)
1220 if r >= lowestrev:
1224 if r >= lowestrev:
1221 if n not in ancestors:
1225 if n not in ancestors:
1222 # If we are possibly a descendant of one of the roots
1226 # If we are possibly a descendant of one of the roots
1223 # and we haven't already been marked as an ancestor
1227 # and we haven't already been marked as an ancestor
1224 ancestors.add(n) # Mark as ancestor
1228 ancestors.add(n) # Mark as ancestor
1225 # Add non-nullid parents to list of nodes to tag.
1229 # Add non-nullid parents to list of nodes to tag.
1226 nodestotag.update([p for p in self.parents(n) if
1230 nodestotag.update([p for p in self.parents(n) if
1227 p != nullid])
1231 p != nullid])
1228 elif n in heads: # We've seen it before, is it a fake head?
1232 elif n in heads: # We've seen it before, is it a fake head?
1229 # So it is, real heads should not be the ancestors of
1233 # So it is, real heads should not be the ancestors of
1230 # any other heads.
1234 # any other heads.
1231 heads.pop(n)
1235 heads.pop(n)
1232 if not ancestors:
1236 if not ancestors:
1233 return nonodes
1237 return nonodes
1234 # Now that we have our set of ancestors, we want to remove any
1238 # Now that we have our set of ancestors, we want to remove any
1235 # roots that are not ancestors.
1239 # roots that are not ancestors.
1236
1240
1237 # If one of the roots was nullid, everything is included anyway.
1241 # If one of the roots was nullid, everything is included anyway.
1238 if lowestrev > nullrev:
1242 if lowestrev > nullrev:
1239 # But, since we weren't, let's recompute the lowest rev to not
1243 # But, since we weren't, let's recompute the lowest rev to not
1240 # include roots that aren't ancestors.
1244 # include roots that aren't ancestors.
1241
1245
1242 # Filter out roots that aren't ancestors of heads
1246 # Filter out roots that aren't ancestors of heads
1243 roots = [root for root in roots if root in ancestors]
1247 roots = [root for root in roots if root in ancestors]
1244 # Recompute the lowest revision
1248 # Recompute the lowest revision
1245 if roots:
1249 if roots:
1246 lowestrev = min([self.rev(root) for root in roots])
1250 lowestrev = min([self.rev(root) for root in roots])
1247 else:
1251 else:
1248 # No more roots? Return empty list
1252 # No more roots? Return empty list
1249 return nonodes
1253 return nonodes
1250 else:
1254 else:
1251 # We are descending from nullid, and don't need to care about
1255 # We are descending from nullid, and don't need to care about
1252 # any other roots.
1256 # any other roots.
1253 lowestrev = nullrev
1257 lowestrev = nullrev
1254 roots = [nullid]
1258 roots = [nullid]
1255 # Transform our roots list into a set.
1259 # Transform our roots list into a set.
1256 descendants = set(roots)
1260 descendants = set(roots)
1257 # Also, keep the original roots so we can filter out roots that aren't
1261 # Also, keep the original roots so we can filter out roots that aren't
1258 # 'real' roots (i.e. are descended from other roots).
1262 # 'real' roots (i.e. are descended from other roots).
1259 roots = descendants.copy()
1263 roots = descendants.copy()
1260 # Our topologically sorted list of output nodes.
1264 # Our topologically sorted list of output nodes.
1261 orderedout = []
1265 orderedout = []
1262 # Don't start at nullid since we don't want nullid in our output list,
1266 # Don't start at nullid since we don't want nullid in our output list,
1263 # and if nullid shows up in descendants, empty parents will look like
1267 # and if nullid shows up in descendants, empty parents will look like
1264 # they're descendants.
1268 # they're descendants.
1265 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1266 n = self.node(r)
1270 n = self.node(r)
1267 isdescendant = False
1271 isdescendant = False
1268 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1269 isdescendant = True
1273 isdescendant = True
1270 elif n in descendants:
1274 elif n in descendants:
1271 # n is already a descendant
1275 # n is already a descendant
1272 isdescendant = True
1276 isdescendant = True
1273 # This check only needs to be done here because all the roots
1277 # This check only needs to be done here because all the roots
1274 # will start being marked is descendants before the loop.
1278 # will start being marked is descendants before the loop.
1275 if n in roots:
1279 if n in roots:
1276 # If n was a root, check if it's a 'real' root.
1280 # If n was a root, check if it's a 'real' root.
1277 p = tuple(self.parents(n))
1281 p = tuple(self.parents(n))
1278 # If any of its parents are descendants, it's not a root.
1282 # If any of its parents are descendants, it's not a root.
1279 if (p[0] in descendants) or (p[1] in descendants):
1283 if (p[0] in descendants) or (p[1] in descendants):
1280 roots.remove(n)
1284 roots.remove(n)
1281 else:
1285 else:
1282 p = tuple(self.parents(n))
1286 p = tuple(self.parents(n))
1283 # A node is a descendant if either of its parents are
1287 # A node is a descendant if either of its parents are
1284 # descendants. (We seeded the dependents list with the roots
1288 # descendants. (We seeded the dependents list with the roots
1285 # up there, remember?)
1289 # up there, remember?)
1286 if (p[0] in descendants) or (p[1] in descendants):
1290 if (p[0] in descendants) or (p[1] in descendants):
1287 descendants.add(n)
1291 descendants.add(n)
1288 isdescendant = True
1292 isdescendant = True
1289 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1290 # Only include nodes that are both descendants and ancestors.
1294 # Only include nodes that are both descendants and ancestors.
1291 orderedout.append(n)
1295 orderedout.append(n)
1292 if (ancestors is not None) and (n in heads):
1296 if (ancestors is not None) and (n in heads):
1293 # We're trying to figure out which heads are reachable
1297 # We're trying to figure out which heads are reachable
1294 # from roots.
1298 # from roots.
1295 # Mark this head as having been reached
1299 # Mark this head as having been reached
1296 heads[n] = True
1300 heads[n] = True
1297 elif ancestors is None:
1301 elif ancestors is None:
1298 # Otherwise, we're trying to discover the heads.
1302 # Otherwise, we're trying to discover the heads.
1299 # Assume this is a head because if it isn't, the next step
1303 # Assume this is a head because if it isn't, the next step
1300 # will eventually remove it.
1304 # will eventually remove it.
1301 heads[n] = True
1305 heads[n] = True
1302 # But, obviously its parents aren't.
1306 # But, obviously its parents aren't.
1303 for p in self.parents(n):
1307 for p in self.parents(n):
1304 heads.pop(p, None)
1308 heads.pop(p, None)
1305 heads = [head for head, flag in heads.iteritems() if flag]
1309 heads = [head for head, flag in heads.iteritems() if flag]
1306 roots = list(roots)
1310 roots = list(roots)
1307 assert orderedout
1311 assert orderedout
1308 assert roots
1312 assert roots
1309 assert heads
1313 assert heads
1310 return (orderedout, roots, heads)
1314 return (orderedout, roots, heads)
1311
1315
1312 def headrevs(self):
1316 def headrevs(self):
1313 try:
1317 try:
1314 return self.index.headrevs()
1318 return self.index.headrevs()
1315 except AttributeError:
1319 except AttributeError:
1316 return self._headrevs()
1320 return self._headrevs()
1317
1321
1318 def computephases(self, roots):
1322 def computephases(self, roots):
1319 return self.index.computephasesmapsets(roots)
1323 return self.index.computephasesmapsets(roots)
1320
1324
1321 def _headrevs(self):
1325 def _headrevs(self):
1322 count = len(self)
1326 count = len(self)
1323 if not count:
1327 if not count:
1324 return [nullrev]
1328 return [nullrev]
1325 # we won't iter over filtered rev so nobody is a head at start
1329 # we won't iter over filtered rev so nobody is a head at start
1326 ishead = [0] * (count + 1)
1330 ishead = [0] * (count + 1)
1327 index = self.index
1331 index = self.index
1328 for r in self:
1332 for r in self:
1329 ishead[r] = 1 # I may be an head
1333 ishead[r] = 1 # I may be an head
1330 e = index[r]
1334 e = index[r]
1331 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1335 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1332 return [r for r, val in enumerate(ishead) if val]
1336 return [r for r, val in enumerate(ishead) if val]
1333
1337
1334 def heads(self, start=None, stop=None):
1338 def heads(self, start=None, stop=None):
1335 """return the list of all nodes that have no children
1339 """return the list of all nodes that have no children
1336
1340
1337 if start is specified, only heads that are descendants of
1341 if start is specified, only heads that are descendants of
1338 start will be returned
1342 start will be returned
1339 if stop is specified, it will consider all the revs from stop
1343 if stop is specified, it will consider all the revs from stop
1340 as if they had no children
1344 as if they had no children
1341 """
1345 """
1342 if start is None and stop is None:
1346 if start is None and stop is None:
1343 if not len(self):
1347 if not len(self):
1344 return [nullid]
1348 return [nullid]
1345 return [self.node(r) for r in self.headrevs()]
1349 return [self.node(r) for r in self.headrevs()]
1346
1350
1347 if start is None:
1351 if start is None:
1348 start = nullid
1352 start = nullid
1349 if stop is None:
1353 if stop is None:
1350 stop = []
1354 stop = []
1351 stoprevs = set([self.rev(n) for n in stop])
1355 stoprevs = set([self.rev(n) for n in stop])
1352 startrev = self.rev(start)
1356 startrev = self.rev(start)
1353 reachable = {startrev}
1357 reachable = {startrev}
1354 heads = {startrev}
1358 heads = {startrev}
1355
1359
1356 parentrevs = self.parentrevs
1360 parentrevs = self.parentrevs
1357 for r in self.revs(start=startrev + 1):
1361 for r in self.revs(start=startrev + 1):
1358 for p in parentrevs(r):
1362 for p in parentrevs(r):
1359 if p in reachable:
1363 if p in reachable:
1360 if r not in stoprevs:
1364 if r not in stoprevs:
1361 reachable.add(r)
1365 reachable.add(r)
1362 heads.add(r)
1366 heads.add(r)
1363 if p in heads and p not in stoprevs:
1367 if p in heads and p not in stoprevs:
1364 heads.remove(p)
1368 heads.remove(p)
1365
1369
1366 return [self.node(r) for r in heads]
1370 return [self.node(r) for r in heads]
1367
1371
1368 def children(self, node):
1372 def children(self, node):
1369 """find the children of a given node"""
1373 """find the children of a given node"""
1370 c = []
1374 c = []
1371 p = self.rev(node)
1375 p = self.rev(node)
1372 for r in self.revs(start=p + 1):
1376 for r in self.revs(start=p + 1):
1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1377 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 if prevs:
1378 if prevs:
1375 for pr in prevs:
1379 for pr in prevs:
1376 if pr == p:
1380 if pr == p:
1377 c.append(self.node(r))
1381 c.append(self.node(r))
1378 elif p == nullrev:
1382 elif p == nullrev:
1379 c.append(self.node(r))
1383 c.append(self.node(r))
1380 return c
1384 return c
1381
1385
1382 def descendant(self, start, end):
1386 def descendant(self, start, end):
1383 """True if revision 'end' is an descendant of revision 'start'
1387 """True if revision 'end' is an descendant of revision 'start'
1384
1388
1385 A revision is considered as a descendant of itself."""
1389 A revision is considered as a descendant of itself."""
1386 if start == nullrev:
1390 if start == nullrev:
1387 return True
1391 return True
1388 elif start == end:
1392 elif start == end:
1389 return True
1393 return True
1390 return start in self._commonancestorsheads(start, end)
1394 return start in self._commonancestorsheads(start, end)
1391
1395
1392 def commonancestorsheads(self, a, b):
1396 def commonancestorsheads(self, a, b):
1393 """calculate all the heads of the common ancestors of nodes a and b"""
1397 """calculate all the heads of the common ancestors of nodes a and b"""
1394 a, b = self.rev(a), self.rev(b)
1398 a, b = self.rev(a), self.rev(b)
1395 ancs = self._commonancestorsheads(a, b)
1399 ancs = self._commonancestorsheads(a, b)
1396 return pycompat.maplist(self.node, ancs)
1400 return pycompat.maplist(self.node, ancs)
1397
1401
1398 def _commonancestorsheads(self, *revs):
1402 def _commonancestorsheads(self, *revs):
1399 """calculate all the heads of the common ancestors of revs"""
1403 """calculate all the heads of the common ancestors of revs"""
1400 try:
1404 try:
1401 ancs = self.index.commonancestorsheads(*revs)
1405 ancs = self.index.commonancestorsheads(*revs)
1402 except (AttributeError, OverflowError): # C implementation failed
1406 except (AttributeError, OverflowError): # C implementation failed
1403 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1407 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1404 return ancs
1408 return ancs
1405
1409
1406 def isancestor(self, a, b):
1410 def isancestor(self, a, b):
1407 """return True if node a is an ancestor of node b
1411 """return True if node a is an ancestor of node b
1408
1412
1409 The implementation of this is trivial but the use of
1413 The implementation of this is trivial but the use of
1410 commonancestorsheads is not."""
1414 commonancestorsheads is not."""
1411 a, b = self.rev(a), self.rev(b)
1415 a, b = self.rev(a), self.rev(b)
1412 return self.descendant(a, b)
1416 return self.descendant(a, b)
1413
1417
1414 def ancestor(self, a, b):
1418 def ancestor(self, a, b):
1415 """calculate the "best" common ancestor of nodes a and b"""
1419 """calculate the "best" common ancestor of nodes a and b"""
1416
1420
1417 a, b = self.rev(a), self.rev(b)
1421 a, b = self.rev(a), self.rev(b)
1418 try:
1422 try:
1419 ancs = self.index.ancestors(a, b)
1423 ancs = self.index.ancestors(a, b)
1420 except (AttributeError, OverflowError):
1424 except (AttributeError, OverflowError):
1421 ancs = ancestor.ancestors(self.parentrevs, a, b)
1425 ancs = ancestor.ancestors(self.parentrevs, a, b)
1422 if ancs:
1426 if ancs:
1423 # choose a consistent winner when there's a tie
1427 # choose a consistent winner when there's a tie
1424 return min(map(self.node, ancs))
1428 return min(map(self.node, ancs))
1425 return nullid
1429 return nullid
1426
1430
1427 def _match(self, id):
1431 def _match(self, id):
1428 if isinstance(id, int):
1432 if isinstance(id, int):
1429 # rev
1433 # rev
1430 return self.node(id)
1434 return self.node(id)
1431 if len(id) == 20:
1435 if len(id) == 20:
1432 # possibly a binary node
1436 # possibly a binary node
1433 # odds of a binary node being all hex in ASCII are 1 in 10**25
1437 # odds of a binary node being all hex in ASCII are 1 in 10**25
1434 try:
1438 try:
1435 node = id
1439 node = id
1436 self.rev(node) # quick search the index
1440 self.rev(node) # quick search the index
1437 return node
1441 return node
1438 except LookupError:
1442 except LookupError:
1439 pass # may be partial hex id
1443 pass # may be partial hex id
1440 try:
1444 try:
1441 # str(rev)
1445 # str(rev)
1442 rev = int(id)
1446 rev = int(id)
1443 if "%d" % rev != id:
1447 if "%d" % rev != id:
1444 raise ValueError
1448 raise ValueError
1445 if rev < 0:
1449 if rev < 0:
1446 rev = len(self) + rev
1450 rev = len(self) + rev
1447 if rev < 0 or rev >= len(self):
1451 if rev < 0 or rev >= len(self):
1448 raise ValueError
1452 raise ValueError
1449 return self.node(rev)
1453 return self.node(rev)
1450 except (ValueError, OverflowError):
1454 except (ValueError, OverflowError):
1451 pass
1455 pass
1452 if len(id) == 40:
1456 if len(id) == 40:
1453 try:
1457 try:
1454 # a full hex nodeid?
1458 # a full hex nodeid?
1455 node = bin(id)
1459 node = bin(id)
1456 self.rev(node)
1460 self.rev(node)
1457 return node
1461 return node
1458 except (TypeError, LookupError):
1462 except (TypeError, LookupError):
1459 pass
1463 pass
1460
1464
1461 def _partialmatch(self, id):
1465 def _partialmatch(self, id):
1462 # we don't care wdirfilenodeids as they should be always full hash
1466 # we don't care wdirfilenodeids as they should be always full hash
1463 maybewdir = wdirhex.startswith(id)
1467 maybewdir = wdirhex.startswith(id)
1464 try:
1468 try:
1465 partial = self.index.partialmatch(id)
1469 partial = self.index.partialmatch(id)
1466 if partial and self.hasnode(partial):
1470 if partial and self.hasnode(partial):
1467 if maybewdir:
1471 if maybewdir:
1468 # single 'ff...' match in radix tree, ambiguous with wdir
1472 # single 'ff...' match in radix tree, ambiguous with wdir
1469 raise RevlogError
1473 raise RevlogError
1470 return partial
1474 return partial
1471 if maybewdir:
1475 if maybewdir:
1472 # no 'ff...' match in radix tree, wdir identified
1476 # no 'ff...' match in radix tree, wdir identified
1473 raise error.WdirUnsupported
1477 raise error.WdirUnsupported
1474 return None
1478 return None
1475 except RevlogError:
1479 except RevlogError:
1476 # parsers.c radix tree lookup gave multiple matches
1480 # parsers.c radix tree lookup gave multiple matches
1477 # fast path: for unfiltered changelog, radix tree is accurate
1481 # fast path: for unfiltered changelog, radix tree is accurate
1478 if not getattr(self, 'filteredrevs', None):
1482 if not getattr(self, 'filteredrevs', None):
1479 raise LookupError(id, self.indexfile,
1483 raise LookupError(id, self.indexfile,
1480 _('ambiguous identifier'))
1484 _('ambiguous identifier'))
1481 # fall through to slow path that filters hidden revisions
1485 # fall through to slow path that filters hidden revisions
1482 except (AttributeError, ValueError):
1486 except (AttributeError, ValueError):
1483 # we are pure python, or key was too short to search radix tree
1487 # we are pure python, or key was too short to search radix tree
1484 pass
1488 pass
1485
1489
1486 if id in self._pcache:
1490 if id in self._pcache:
1487 return self._pcache[id]
1491 return self._pcache[id]
1488
1492
1489 if len(id) <= 40:
1493 if len(id) <= 40:
1490 try:
1494 try:
1491 # hex(node)[:...]
1495 # hex(node)[:...]
1492 l = len(id) // 2 # grab an even number of digits
1496 l = len(id) // 2 # grab an even number of digits
1493 prefix = bin(id[:l * 2])
1497 prefix = bin(id[:l * 2])
1494 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1498 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1495 nl = [n for n in nl if hex(n).startswith(id) and
1499 nl = [n for n in nl if hex(n).startswith(id) and
1496 self.hasnode(n)]
1500 self.hasnode(n)]
1497 if len(nl) > 0:
1501 if len(nl) > 0:
1498 if len(nl) == 1 and not maybewdir:
1502 if len(nl) == 1 and not maybewdir:
1499 self._pcache[id] = nl[0]
1503 self._pcache[id] = nl[0]
1500 return nl[0]
1504 return nl[0]
1501 raise LookupError(id, self.indexfile,
1505 raise LookupError(id, self.indexfile,
1502 _('ambiguous identifier'))
1506 _('ambiguous identifier'))
1503 if maybewdir:
1507 if maybewdir:
1504 raise error.WdirUnsupported
1508 raise error.WdirUnsupported
1505 return None
1509 return None
1506 except TypeError:
1510 except TypeError:
1507 pass
1511 pass
1508
1512
1509 def lookup(self, id):
1513 def lookup(self, id):
1510 """locate a node based on:
1514 """locate a node based on:
1511 - revision number or str(revision number)
1515 - revision number or str(revision number)
1512 - nodeid or subset of hex nodeid
1516 - nodeid or subset of hex nodeid
1513 """
1517 """
1514 n = self._match(id)
1518 n = self._match(id)
1515 if n is not None:
1519 if n is not None:
1516 return n
1520 return n
1517 n = self._partialmatch(id)
1521 n = self._partialmatch(id)
1518 if n:
1522 if n:
1519 return n
1523 return n
1520
1524
1521 raise LookupError(id, self.indexfile, _('no match found'))
1525 raise LookupError(id, self.indexfile, _('no match found'))
1522
1526
1523 def shortest(self, node, minlength=1):
1527 def shortest(self, node, minlength=1):
1524 """Find the shortest unambiguous prefix that matches node."""
1528 """Find the shortest unambiguous prefix that matches node."""
1525 def isvalid(prefix):
1529 def isvalid(prefix):
1526 try:
1530 try:
1527 node = self._partialmatch(prefix)
1531 node = self._partialmatch(prefix)
1528 except error.RevlogError:
1532 except error.RevlogError:
1529 return False
1533 return False
1530 except error.WdirUnsupported:
1534 except error.WdirUnsupported:
1531 # single 'ff...' match
1535 # single 'ff...' match
1532 return True
1536 return True
1533 if node is None:
1537 if node is None:
1534 raise LookupError(node, self.indexfile, _('no node'))
1538 raise LookupError(node, self.indexfile, _('no node'))
1535 return True
1539 return True
1536
1540
1537 def maybewdir(prefix):
1541 def maybewdir(prefix):
1538 return all(c == 'f' for c in prefix)
1542 return all(c == 'f' for c in prefix)
1539
1543
1540 hexnode = hex(node)
1544 hexnode = hex(node)
1541
1545
1542 def disambiguate(hexnode, minlength):
1546 def disambiguate(hexnode, minlength):
1543 """Disambiguate against wdirid."""
1547 """Disambiguate against wdirid."""
1544 for length in range(minlength, 41):
1548 for length in range(minlength, 41):
1545 prefix = hexnode[:length]
1549 prefix = hexnode[:length]
1546 if not maybewdir(prefix):
1550 if not maybewdir(prefix):
1547 return prefix
1551 return prefix
1548
1552
1549 if not getattr(self, 'filteredrevs', None):
1553 if not getattr(self, 'filteredrevs', None):
1550 try:
1554 try:
1551 length = max(self.index.shortest(node), minlength)
1555 length = max(self.index.shortest(node), minlength)
1552 return disambiguate(hexnode, length)
1556 return disambiguate(hexnode, length)
1553 except RevlogError:
1557 except RevlogError:
1554 if node != wdirid:
1558 if node != wdirid:
1555 raise LookupError(node, self.indexfile, _('no node'))
1559 raise LookupError(node, self.indexfile, _('no node'))
1556 except AttributeError:
1560 except AttributeError:
1557 # Fall through to pure code
1561 # Fall through to pure code
1558 pass
1562 pass
1559
1563
1560 if node == wdirid:
1564 if node == wdirid:
1561 for length in range(minlength, 41):
1565 for length in range(minlength, 41):
1562 prefix = hexnode[:length]
1566 prefix = hexnode[:length]
1563 if isvalid(prefix):
1567 if isvalid(prefix):
1564 return prefix
1568 return prefix
1565
1569
1566 for length in range(minlength, 41):
1570 for length in range(minlength, 41):
1567 prefix = hexnode[:length]
1571 prefix = hexnode[:length]
1568 if isvalid(prefix):
1572 if isvalid(prefix):
1569 return disambiguate(hexnode, length)
1573 return disambiguate(hexnode, length)
1570
1574
1571 def cmp(self, node, text):
1575 def cmp(self, node, text):
1572 """compare text with a given file revision
1576 """compare text with a given file revision
1573
1577
1574 returns True if text is different than what is stored.
1578 returns True if text is different than what is stored.
1575 """
1579 """
1576 p1, p2 = self.parents(node)
1580 p1, p2 = self.parents(node)
1577 return hash(text, p1, p2) != node
1581 return hash(text, p1, p2) != node
1578
1582
1579 def _cachesegment(self, offset, data):
1583 def _cachesegment(self, offset, data):
1580 """Add a segment to the revlog cache.
1584 """Add a segment to the revlog cache.
1581
1585
1582 Accepts an absolute offset and the data that is at that location.
1586 Accepts an absolute offset and the data that is at that location.
1583 """
1587 """
1584 o, d = self._chunkcache
1588 o, d = self._chunkcache
1585 # try to add to existing cache
1589 # try to add to existing cache
1586 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1590 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1587 self._chunkcache = o, d + data
1591 self._chunkcache = o, d + data
1588 else:
1592 else:
1589 self._chunkcache = offset, data
1593 self._chunkcache = offset, data
1590
1594
1591 def _readsegment(self, offset, length, df=None):
1595 def _readsegment(self, offset, length, df=None):
1592 """Load a segment of raw data from the revlog.
1596 """Load a segment of raw data from the revlog.
1593
1597
1594 Accepts an absolute offset, length to read, and an optional existing
1598 Accepts an absolute offset, length to read, and an optional existing
1595 file handle to read from.
1599 file handle to read from.
1596
1600
1597 If an existing file handle is passed, it will be seeked and the
1601 If an existing file handle is passed, it will be seeked and the
1598 original seek position will NOT be restored.
1602 original seek position will NOT be restored.
1599
1603
1600 Returns a str or buffer of raw byte data.
1604 Returns a str or buffer of raw byte data.
1601 """
1605 """
1602 # Cache data both forward and backward around the requested
1606 # Cache data both forward and backward around the requested
1603 # data, in a fixed size window. This helps speed up operations
1607 # data, in a fixed size window. This helps speed up operations
1604 # involving reading the revlog backwards.
1608 # involving reading the revlog backwards.
1605 cachesize = self._chunkcachesize
1609 cachesize = self._chunkcachesize
1606 realoffset = offset & ~(cachesize - 1)
1610 realoffset = offset & ~(cachesize - 1)
1607 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1611 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1608 - realoffset)
1612 - realoffset)
1609 with self._datareadfp(df) as df:
1613 with self._datareadfp(df) as df:
1610 df.seek(realoffset)
1614 df.seek(realoffset)
1611 d = df.read(reallength)
1615 d = df.read(reallength)
1612 self._cachesegment(realoffset, d)
1616 self._cachesegment(realoffset, d)
1613 if offset != realoffset or reallength != length:
1617 if offset != realoffset or reallength != length:
1614 return util.buffer(d, offset - realoffset, length)
1618 return util.buffer(d, offset - realoffset, length)
1615 return d
1619 return d
1616
1620
1617 def _getsegment(self, offset, length, df=None):
1621 def _getsegment(self, offset, length, df=None):
1618 """Obtain a segment of raw data from the revlog.
1622 """Obtain a segment of raw data from the revlog.
1619
1623
1620 Accepts an absolute offset, length of bytes to obtain, and an
1624 Accepts an absolute offset, length of bytes to obtain, and an
1621 optional file handle to the already-opened revlog. If the file
1625 optional file handle to the already-opened revlog. If the file
1622 handle is used, it's original seek position will not be preserved.
1626 handle is used, it's original seek position will not be preserved.
1623
1627
1624 Requests for data may be returned from a cache.
1628 Requests for data may be returned from a cache.
1625
1629
1626 Returns a str or a buffer instance of raw byte data.
1630 Returns a str or a buffer instance of raw byte data.
1627 """
1631 """
1628 o, d = self._chunkcache
1632 o, d = self._chunkcache
1629 l = len(d)
1633 l = len(d)
1630
1634
1631 # is it in the cache?
1635 # is it in the cache?
1632 cachestart = offset - o
1636 cachestart = offset - o
1633 cacheend = cachestart + length
1637 cacheend = cachestart + length
1634 if cachestart >= 0 and cacheend <= l:
1638 if cachestart >= 0 and cacheend <= l:
1635 if cachestart == 0 and cacheend == l:
1639 if cachestart == 0 and cacheend == l:
1636 return d # avoid a copy
1640 return d # avoid a copy
1637 return util.buffer(d, cachestart, cacheend - cachestart)
1641 return util.buffer(d, cachestart, cacheend - cachestart)
1638
1642
1639 return self._readsegment(offset, length, df=df)
1643 return self._readsegment(offset, length, df=df)
1640
1644
1641 def _getsegmentforrevs(self, startrev, endrev, df=None):
1645 def _getsegmentforrevs(self, startrev, endrev, df=None):
1642 """Obtain a segment of raw data corresponding to a range of revisions.
1646 """Obtain a segment of raw data corresponding to a range of revisions.
1643
1647
1644 Accepts the start and end revisions and an optional already-open
1648 Accepts the start and end revisions and an optional already-open
1645 file handle to be used for reading. If the file handle is read, its
1649 file handle to be used for reading. If the file handle is read, its
1646 seek position will not be preserved.
1650 seek position will not be preserved.
1647
1651
1648 Requests for data may be satisfied by a cache.
1652 Requests for data may be satisfied by a cache.
1649
1653
1650 Returns a 2-tuple of (offset, data) for the requested range of
1654 Returns a 2-tuple of (offset, data) for the requested range of
1651 revisions. Offset is the integer offset from the beginning of the
1655 revisions. Offset is the integer offset from the beginning of the
1652 revlog and data is a str or buffer of the raw byte data.
1656 revlog and data is a str or buffer of the raw byte data.
1653
1657
1654 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1658 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1655 to determine where each revision's data begins and ends.
1659 to determine where each revision's data begins and ends.
1656 """
1660 """
1657 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1661 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1658 # (functions are expensive).
1662 # (functions are expensive).
1659 index = self.index
1663 index = self.index
1660 istart = index[startrev]
1664 istart = index[startrev]
1661 start = int(istart[0] >> 16)
1665 start = int(istart[0] >> 16)
1662 if startrev == endrev:
1666 if startrev == endrev:
1663 end = start + istart[1]
1667 end = start + istart[1]
1664 else:
1668 else:
1665 iend = index[endrev]
1669 iend = index[endrev]
1666 end = int(iend[0] >> 16) + iend[1]
1670 end = int(iend[0] >> 16) + iend[1]
1667
1671
1668 if self._inline:
1672 if self._inline:
1669 start += (startrev + 1) * self._io.size
1673 start += (startrev + 1) * self._io.size
1670 end += (endrev + 1) * self._io.size
1674 end += (endrev + 1) * self._io.size
1671 length = end - start
1675 length = end - start
1672
1676
1673 return start, self._getsegment(start, length, df=df)
1677 return start, self._getsegment(start, length, df=df)
1674
1678
1675 def _chunk(self, rev, df=None):
1679 def _chunk(self, rev, df=None):
1676 """Obtain a single decompressed chunk for a revision.
1680 """Obtain a single decompressed chunk for a revision.
1677
1681
1678 Accepts an integer revision and an optional already-open file handle
1682 Accepts an integer revision and an optional already-open file handle
1679 to be used for reading. If used, the seek position of the file will not
1683 to be used for reading. If used, the seek position of the file will not
1680 be preserved.
1684 be preserved.
1681
1685
1682 Returns a str holding uncompressed data for the requested revision.
1686 Returns a str holding uncompressed data for the requested revision.
1683 """
1687 """
1684 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1688 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1685
1689
1686 def _chunks(self, revs, df=None):
1690 def _chunks(self, revs, df=None):
1687 """Obtain decompressed chunks for the specified revisions.
1691 """Obtain decompressed chunks for the specified revisions.
1688
1692
1689 Accepts an iterable of numeric revisions that are assumed to be in
1693 Accepts an iterable of numeric revisions that are assumed to be in
1690 ascending order. Also accepts an optional already-open file handle
1694 ascending order. Also accepts an optional already-open file handle
1691 to be used for reading. If used, the seek position of the file will
1695 to be used for reading. If used, the seek position of the file will
1692 not be preserved.
1696 not be preserved.
1693
1697
1694 This function is similar to calling ``self._chunk()`` multiple times,
1698 This function is similar to calling ``self._chunk()`` multiple times,
1695 but is faster.
1699 but is faster.
1696
1700
1697 Returns a list with decompressed data for each requested revision.
1701 Returns a list with decompressed data for each requested revision.
1698 """
1702 """
1699 if not revs:
1703 if not revs:
1700 return []
1704 return []
1701 start = self.start
1705 start = self.start
1702 length = self.length
1706 length = self.length
1703 inline = self._inline
1707 inline = self._inline
1704 iosize = self._io.size
1708 iosize = self._io.size
1705 buffer = util.buffer
1709 buffer = util.buffer
1706
1710
1707 l = []
1711 l = []
1708 ladd = l.append
1712 ladd = l.append
1709
1713
1710 if not self._withsparseread:
1714 if not self._withsparseread:
1711 slicedchunks = (revs,)
1715 slicedchunks = (revs,)
1712 else:
1716 else:
1713 slicedchunks = _slicechunk(self, revs)
1717 slicedchunks = _slicechunk(self, revs)
1714
1718
1715 for revschunk in slicedchunks:
1719 for revschunk in slicedchunks:
1716 firstrev = revschunk[0]
1720 firstrev = revschunk[0]
1717 # Skip trailing revisions with empty diff
1721 # Skip trailing revisions with empty diff
1718 for lastrev in revschunk[::-1]:
1722 for lastrev in revschunk[::-1]:
1719 if length(lastrev) != 0:
1723 if length(lastrev) != 0:
1720 break
1724 break
1721
1725
1722 try:
1726 try:
1723 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1727 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1724 except OverflowError:
1728 except OverflowError:
1725 # issue4215 - we can't cache a run of chunks greater than
1729 # issue4215 - we can't cache a run of chunks greater than
1726 # 2G on Windows
1730 # 2G on Windows
1727 return [self._chunk(rev, df=df) for rev in revschunk]
1731 return [self._chunk(rev, df=df) for rev in revschunk]
1728
1732
1729 decomp = self.decompress
1733 decomp = self.decompress
1730 for rev in revschunk:
1734 for rev in revschunk:
1731 chunkstart = start(rev)
1735 chunkstart = start(rev)
1732 if inline:
1736 if inline:
1733 chunkstart += (rev + 1) * iosize
1737 chunkstart += (rev + 1) * iosize
1734 chunklength = length(rev)
1738 chunklength = length(rev)
1735 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1739 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1736
1740
1737 return l
1741 return l
1738
1742
1739 def _chunkclear(self):
1743 def _chunkclear(self):
1740 """Clear the raw chunk cache."""
1744 """Clear the raw chunk cache."""
1741 self._chunkcache = (0, '')
1745 self._chunkcache = (0, '')
1742
1746
1743 def deltaparent(self, rev):
1747 def deltaparent(self, rev):
1744 """return deltaparent of the given revision"""
1748 """return deltaparent of the given revision"""
1745 base = self.index[rev][3]
1749 base = self.index[rev][3]
1746 if base == rev:
1750 if base == rev:
1747 return nullrev
1751 return nullrev
1748 elif self._generaldelta:
1752 elif self._generaldelta:
1749 return base
1753 return base
1750 else:
1754 else:
1751 return rev - 1
1755 return rev - 1
1752
1756
1753 def revdiff(self, rev1, rev2):
1757 def revdiff(self, rev1, rev2):
1754 """return or calculate a delta between two revisions
1758 """return or calculate a delta between two revisions
1755
1759
1756 The delta calculated is in binary form and is intended to be written to
1760 The delta calculated is in binary form and is intended to be written to
1757 revlog data directly. So this function needs raw revision data.
1761 revlog data directly. So this function needs raw revision data.
1758 """
1762 """
1759 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 return bytes(self._chunk(rev2))
1764 return bytes(self._chunk(rev2))
1761
1765
1762 return mdiff.textdiff(self.revision(rev1, raw=True),
1766 return mdiff.textdiff(self.revision(rev1, raw=True),
1763 self.revision(rev2, raw=True))
1767 self.revision(rev2, raw=True))
1764
1768
1765 def revision(self, nodeorrev, _df=None, raw=False):
1769 def revision(self, nodeorrev, _df=None, raw=False):
1766 """return an uncompressed revision of a given node or revision
1770 """return an uncompressed revision of a given node or revision
1767 number.
1771 number.
1768
1772
1769 _df - an existing file handle to read from. (internal-only)
1773 _df - an existing file handle to read from. (internal-only)
1770 raw - an optional argument specifying if the revision data is to be
1774 raw - an optional argument specifying if the revision data is to be
1771 treated as raw data when applying flag transforms. 'raw' should be set
1775 treated as raw data when applying flag transforms. 'raw' should be set
1772 to True when generating changegroups or in debug commands.
1776 to True when generating changegroups or in debug commands.
1773 """
1777 """
1774 if isinstance(nodeorrev, int):
1778 if isinstance(nodeorrev, int):
1775 rev = nodeorrev
1779 rev = nodeorrev
1776 node = self.node(rev)
1780 node = self.node(rev)
1777 else:
1781 else:
1778 node = nodeorrev
1782 node = nodeorrev
1779 rev = None
1783 rev = None
1780
1784
1781 cachedrev = None
1785 cachedrev = None
1782 flags = None
1786 flags = None
1783 rawtext = None
1787 rawtext = None
1784 if node == nullid:
1788 if node == nullid:
1785 return ""
1789 return ""
1786 if self._cache:
1790 if self._cache:
1787 if self._cache[0] == node:
1791 if self._cache[0] == node:
1788 # _cache only stores rawtext
1792 # _cache only stores rawtext
1789 if raw:
1793 if raw:
1790 return self._cache[2]
1794 return self._cache[2]
1791 # duplicated, but good for perf
1795 # duplicated, but good for perf
1792 if rev is None:
1796 if rev is None:
1793 rev = self.rev(node)
1797 rev = self.rev(node)
1794 if flags is None:
1798 if flags is None:
1795 flags = self.flags(rev)
1799 flags = self.flags(rev)
1796 # no extra flags set, no flag processor runs, text = rawtext
1800 # no extra flags set, no flag processor runs, text = rawtext
1797 if flags == REVIDX_DEFAULT_FLAGS:
1801 if flags == REVIDX_DEFAULT_FLAGS:
1798 return self._cache[2]
1802 return self._cache[2]
1799 # rawtext is reusable. need to run flag processor
1803 # rawtext is reusable. need to run flag processor
1800 rawtext = self._cache[2]
1804 rawtext = self._cache[2]
1801
1805
1802 cachedrev = self._cache[1]
1806 cachedrev = self._cache[1]
1803
1807
1804 # look up what we need to read
1808 # look up what we need to read
1805 if rawtext is None:
1809 if rawtext is None:
1806 if rev is None:
1810 if rev is None:
1807 rev = self.rev(node)
1811 rev = self.rev(node)
1808
1812
1809 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1813 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1810 if stopped:
1814 if stopped:
1811 rawtext = self._cache[2]
1815 rawtext = self._cache[2]
1812
1816
1813 # drop cache to save memory
1817 # drop cache to save memory
1814 self._cache = None
1818 self._cache = None
1815
1819
1816 bins = self._chunks(chain, df=_df)
1820 bins = self._chunks(chain, df=_df)
1817 if rawtext is None:
1821 if rawtext is None:
1818 rawtext = bytes(bins[0])
1822 rawtext = bytes(bins[0])
1819 bins = bins[1:]
1823 bins = bins[1:]
1820
1824
1821 rawtext = mdiff.patches(rawtext, bins)
1825 rawtext = mdiff.patches(rawtext, bins)
1822 self._cache = (node, rev, rawtext)
1826 self._cache = (node, rev, rawtext)
1823
1827
1824 if flags is None:
1828 if flags is None:
1825 if rev is None:
1829 if rev is None:
1826 rev = self.rev(node)
1830 rev = self.rev(node)
1827 flags = self.flags(rev)
1831 flags = self.flags(rev)
1828
1832
1829 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1833 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1830 if validatehash:
1834 if validatehash:
1831 self.checkhash(text, node, rev=rev)
1835 self.checkhash(text, node, rev=rev)
1832
1836
1833 return text
1837 return text
1834
1838
1835 def hash(self, text, p1, p2):
1839 def hash(self, text, p1, p2):
1836 """Compute a node hash.
1840 """Compute a node hash.
1837
1841
1838 Available as a function so that subclasses can replace the hash
1842 Available as a function so that subclasses can replace the hash
1839 as needed.
1843 as needed.
1840 """
1844 """
1841 return hash(text, p1, p2)
1845 return hash(text, p1, p2)
1842
1846
1843 def _processflags(self, text, flags, operation, raw=False):
1847 def _processflags(self, text, flags, operation, raw=False):
1844 """Inspect revision data flags and applies transforms defined by
1848 """Inspect revision data flags and applies transforms defined by
1845 registered flag processors.
1849 registered flag processors.
1846
1850
1847 ``text`` - the revision data to process
1851 ``text`` - the revision data to process
1848 ``flags`` - the revision flags
1852 ``flags`` - the revision flags
1849 ``operation`` - the operation being performed (read or write)
1853 ``operation`` - the operation being performed (read or write)
1850 ``raw`` - an optional argument describing if the raw transform should be
1854 ``raw`` - an optional argument describing if the raw transform should be
1851 applied.
1855 applied.
1852
1856
1853 This method processes the flags in the order (or reverse order if
1857 This method processes the flags in the order (or reverse order if
1854 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1858 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1855 flag processors registered for present flags. The order of flags defined
1859 flag processors registered for present flags. The order of flags defined
1856 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1860 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1857
1861
1858 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1862 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1859 processed text and ``validatehash`` is a bool indicating whether the
1863 processed text and ``validatehash`` is a bool indicating whether the
1860 returned text should be checked for hash integrity.
1864 returned text should be checked for hash integrity.
1861
1865
1862 Note: If the ``raw`` argument is set, it has precedence over the
1866 Note: If the ``raw`` argument is set, it has precedence over the
1863 operation and will only update the value of ``validatehash``.
1867 operation and will only update the value of ``validatehash``.
1864 """
1868 """
1865 # fast path: no flag processors will run
1869 # fast path: no flag processors will run
1866 if flags == 0:
1870 if flags == 0:
1867 return text, True
1871 return text, True
1868 if not operation in ('read', 'write'):
1872 if not operation in ('read', 'write'):
1869 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1873 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1870 # Check all flags are known.
1874 # Check all flags are known.
1871 if flags & ~REVIDX_KNOWN_FLAGS:
1875 if flags & ~REVIDX_KNOWN_FLAGS:
1872 raise RevlogError(_("incompatible revision flag '%#x'") %
1876 raise RevlogError(_("incompatible revision flag '%#x'") %
1873 (flags & ~REVIDX_KNOWN_FLAGS))
1877 (flags & ~REVIDX_KNOWN_FLAGS))
1874 validatehash = True
1878 validatehash = True
1875 # Depending on the operation (read or write), the order might be
1879 # Depending on the operation (read or write), the order might be
1876 # reversed due to non-commutative transforms.
1880 # reversed due to non-commutative transforms.
1877 orderedflags = REVIDX_FLAGS_ORDER
1881 orderedflags = REVIDX_FLAGS_ORDER
1878 if operation == 'write':
1882 if operation == 'write':
1879 orderedflags = reversed(orderedflags)
1883 orderedflags = reversed(orderedflags)
1880
1884
1881 for flag in orderedflags:
1885 for flag in orderedflags:
1882 # If a flagprocessor has been registered for a known flag, apply the
1886 # If a flagprocessor has been registered for a known flag, apply the
1883 # related operation transform and update result tuple.
1887 # related operation transform and update result tuple.
1884 if flag & flags:
1888 if flag & flags:
1885 vhash = True
1889 vhash = True
1886
1890
1887 if flag not in _flagprocessors:
1891 if flag not in _flagprocessors:
1888 message = _("missing processor for flag '%#x'") % (flag)
1892 message = _("missing processor for flag '%#x'") % (flag)
1889 raise RevlogError(message)
1893 raise RevlogError(message)
1890
1894
1891 processor = _flagprocessors[flag]
1895 processor = _flagprocessors[flag]
1892 if processor is not None:
1896 if processor is not None:
1893 readtransform, writetransform, rawtransform = processor
1897 readtransform, writetransform, rawtransform = processor
1894
1898
1895 if raw:
1899 if raw:
1896 vhash = rawtransform(self, text)
1900 vhash = rawtransform(self, text)
1897 elif operation == 'read':
1901 elif operation == 'read':
1898 text, vhash = readtransform(self, text)
1902 text, vhash = readtransform(self, text)
1899 else: # write operation
1903 else: # write operation
1900 text, vhash = writetransform(self, text)
1904 text, vhash = writetransform(self, text)
1901 validatehash = validatehash and vhash
1905 validatehash = validatehash and vhash
1902
1906
1903 return text, validatehash
1907 return text, validatehash
1904
1908
1905 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1909 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1906 """Check node hash integrity.
1910 """Check node hash integrity.
1907
1911
1908 Available as a function so that subclasses can extend hash mismatch
1912 Available as a function so that subclasses can extend hash mismatch
1909 behaviors as needed.
1913 behaviors as needed.
1910 """
1914 """
1911 try:
1915 try:
1912 if p1 is None and p2 is None:
1916 if p1 is None and p2 is None:
1913 p1, p2 = self.parents(node)
1917 p1, p2 = self.parents(node)
1914 if node != self.hash(text, p1, p2):
1918 if node != self.hash(text, p1, p2):
1915 revornode = rev
1919 revornode = rev
1916 if revornode is None:
1920 if revornode is None:
1917 revornode = templatefilters.short(hex(node))
1921 revornode = templatefilters.short(hex(node))
1918 raise RevlogError(_("integrity check failed on %s:%s")
1922 raise RevlogError(_("integrity check failed on %s:%s")
1919 % (self.indexfile, pycompat.bytestr(revornode)))
1923 % (self.indexfile, pycompat.bytestr(revornode)))
1920 except RevlogError:
1924 except RevlogError:
1921 if self._censorable and _censoredtext(text):
1925 if self._censorable and _censoredtext(text):
1922 raise error.CensoredNodeError(self.indexfile, node, text)
1926 raise error.CensoredNodeError(self.indexfile, node, text)
1923 raise
1927 raise
1924
1928
1925 def _enforceinlinesize(self, tr, fp=None):
1929 def _enforceinlinesize(self, tr, fp=None):
1926 """Check if the revlog is too big for inline and convert if so.
1930 """Check if the revlog is too big for inline and convert if so.
1927
1931
1928 This should be called after revisions are added to the revlog. If the
1932 This should be called after revisions are added to the revlog. If the
1929 revlog has grown too large to be an inline revlog, it will convert it
1933 revlog has grown too large to be an inline revlog, it will convert it
1930 to use multiple index and data files.
1934 to use multiple index and data files.
1931 """
1935 """
1932 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1936 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1933 return
1937 return
1934
1938
1935 trinfo = tr.find(self.indexfile)
1939 trinfo = tr.find(self.indexfile)
1936 if trinfo is None:
1940 if trinfo is None:
1937 raise RevlogError(_("%s not found in the transaction")
1941 raise RevlogError(_("%s not found in the transaction")
1938 % self.indexfile)
1942 % self.indexfile)
1939
1943
1940 trindex = trinfo[2]
1944 trindex = trinfo[2]
1941 if trindex is not None:
1945 if trindex is not None:
1942 dataoff = self.start(trindex)
1946 dataoff = self.start(trindex)
1943 else:
1947 else:
1944 # revlog was stripped at start of transaction, use all leftover data
1948 # revlog was stripped at start of transaction, use all leftover data
1945 trindex = len(self) - 1
1949 trindex = len(self) - 1
1946 dataoff = self.end(-2)
1950 dataoff = self.end(-2)
1947
1951
1948 tr.add(self.datafile, dataoff)
1952 tr.add(self.datafile, dataoff)
1949
1953
1950 if fp:
1954 if fp:
1951 fp.flush()
1955 fp.flush()
1952 fp.close()
1956 fp.close()
1953
1957
1954 with self._datafp('w') as df:
1958 with self._datafp('w') as df:
1955 for r in self:
1959 for r in self:
1956 df.write(self._getsegmentforrevs(r, r)[1])
1960 df.write(self._getsegmentforrevs(r, r)[1])
1957
1961
1958 with self._indexfp('w') as fp:
1962 with self._indexfp('w') as fp:
1959 self.version &= ~FLAG_INLINE_DATA
1963 self.version &= ~FLAG_INLINE_DATA
1960 self._inline = False
1964 self._inline = False
1961 io = self._io
1965 io = self._io
1962 for i in self:
1966 for i in self:
1963 e = io.packentry(self.index[i], self.node, self.version, i)
1967 e = io.packentry(self.index[i], self.node, self.version, i)
1964 fp.write(e)
1968 fp.write(e)
1965
1969
1966 # the temp file replace the real index when we exit the context
1970 # the temp file replace the real index when we exit the context
1967 # manager
1971 # manager
1968
1972
1969 tr.replace(self.indexfile, trindex * self._io.size)
1973 tr.replace(self.indexfile, trindex * self._io.size)
1970 self._chunkclear()
1974 self._chunkclear()
1971
1975
1972 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1976 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1973 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1977 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1974 """add a revision to the log
1978 """add a revision to the log
1975
1979
1976 text - the revision data to add
1980 text - the revision data to add
1977 transaction - the transaction object used for rollback
1981 transaction - the transaction object used for rollback
1978 link - the linkrev data to add
1982 link - the linkrev data to add
1979 p1, p2 - the parent nodeids of the revision
1983 p1, p2 - the parent nodeids of the revision
1980 cachedelta - an optional precomputed delta
1984 cachedelta - an optional precomputed delta
1981 node - nodeid of revision; typically node is not specified, and it is
1985 node - nodeid of revision; typically node is not specified, and it is
1982 computed by default as hash(text, p1, p2), however subclasses might
1986 computed by default as hash(text, p1, p2), however subclasses might
1983 use different hashing method (and override checkhash() in such case)
1987 use different hashing method (and override checkhash() in such case)
1984 flags - the known flags to set on the revision
1988 flags - the known flags to set on the revision
1985 deltacomputer - an optional _deltacomputer instance shared between
1989 deltacomputer - an optional _deltacomputer instance shared between
1986 multiple calls
1990 multiple calls
1987 """
1991 """
1988 if link == nullrev:
1992 if link == nullrev:
1989 raise RevlogError(_("attempted to add linkrev -1 to %s")
1993 raise RevlogError(_("attempted to add linkrev -1 to %s")
1990 % self.indexfile)
1994 % self.indexfile)
1991
1995
1992 if flags:
1996 if flags:
1993 node = node or self.hash(text, p1, p2)
1997 node = node or self.hash(text, p1, p2)
1994
1998
1995 rawtext, validatehash = self._processflags(text, flags, 'write')
1999 rawtext, validatehash = self._processflags(text, flags, 'write')
1996
2000
1997 # If the flag processor modifies the revision data, ignore any provided
2001 # If the flag processor modifies the revision data, ignore any provided
1998 # cachedelta.
2002 # cachedelta.
1999 if rawtext != text:
2003 if rawtext != text:
2000 cachedelta = None
2004 cachedelta = None
2001
2005
2002 if len(rawtext) > _maxentrysize:
2006 if len(rawtext) > _maxentrysize:
2003 raise RevlogError(
2007 raise RevlogError(
2004 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2008 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2005 % (self.indexfile, len(rawtext)))
2009 % (self.indexfile, len(rawtext)))
2006
2010
2007 node = node or self.hash(rawtext, p1, p2)
2011 node = node or self.hash(rawtext, p1, p2)
2008 if node in self.nodemap:
2012 if node in self.nodemap:
2009 return node
2013 return node
2010
2014
2011 if validatehash:
2015 if validatehash:
2012 self.checkhash(rawtext, node, p1=p1, p2=p2)
2016 self.checkhash(rawtext, node, p1=p1, p2=p2)
2013
2017
2014 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2018 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2015 flags, cachedelta=cachedelta,
2019 flags, cachedelta=cachedelta,
2016 deltacomputer=deltacomputer)
2020 deltacomputer=deltacomputer)
2017
2021
2018 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2022 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2019 cachedelta=None, deltacomputer=None):
2023 cachedelta=None, deltacomputer=None):
2020 """add a raw revision with known flags, node and parents
2024 """add a raw revision with known flags, node and parents
2021 useful when reusing a revision not stored in this revlog (ex: received
2025 useful when reusing a revision not stored in this revlog (ex: received
2022 over wire, or read from an external bundle).
2026 over wire, or read from an external bundle).
2023 """
2027 """
2024 dfh = None
2028 dfh = None
2025 if not self._inline:
2029 if not self._inline:
2026 dfh = self._datafp("a+")
2030 dfh = self._datafp("a+")
2027 ifh = self._indexfp("a+")
2031 ifh = self._indexfp("a+")
2028 try:
2032 try:
2029 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2033 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2030 flags, cachedelta, ifh, dfh,
2034 flags, cachedelta, ifh, dfh,
2031 deltacomputer=deltacomputer)
2035 deltacomputer=deltacomputer)
2032 finally:
2036 finally:
2033 if dfh:
2037 if dfh:
2034 dfh.close()
2038 dfh.close()
2035 ifh.close()
2039 ifh.close()
2036
2040
2037 def compress(self, data):
2041 def compress(self, data):
2038 """Generate a possibly-compressed representation of data."""
2042 """Generate a possibly-compressed representation of data."""
2039 if not data:
2043 if not data:
2040 return '', data
2044 return '', data
2041
2045
2042 compressed = self._compressor.compress(data)
2046 compressed = self._compressor.compress(data)
2043
2047
2044 if compressed:
2048 if compressed:
2045 # The revlog compressor added the header in the returned data.
2049 # The revlog compressor added the header in the returned data.
2046 return '', compressed
2050 return '', compressed
2047
2051
2048 if data[0:1] == '\0':
2052 if data[0:1] == '\0':
2049 return '', data
2053 return '', data
2050 return 'u', data
2054 return 'u', data
2051
2055
2052 def decompress(self, data):
2056 def decompress(self, data):
2053 """Decompress a revlog chunk.
2057 """Decompress a revlog chunk.
2054
2058
2055 The chunk is expected to begin with a header identifying the
2059 The chunk is expected to begin with a header identifying the
2056 format type so it can be routed to an appropriate decompressor.
2060 format type so it can be routed to an appropriate decompressor.
2057 """
2061 """
2058 if not data:
2062 if not data:
2059 return data
2063 return data
2060
2064
2061 # Revlogs are read much more frequently than they are written and many
2065 # Revlogs are read much more frequently than they are written and many
2062 # chunks only take microseconds to decompress, so performance is
2066 # chunks only take microseconds to decompress, so performance is
2063 # important here.
2067 # important here.
2064 #
2068 #
2065 # We can make a few assumptions about revlogs:
2069 # We can make a few assumptions about revlogs:
2066 #
2070 #
2067 # 1) the majority of chunks will be compressed (as opposed to inline
2071 # 1) the majority of chunks will be compressed (as opposed to inline
2068 # raw data).
2072 # raw data).
2069 # 2) decompressing *any* data will likely by at least 10x slower than
2073 # 2) decompressing *any* data will likely by at least 10x slower than
2070 # returning raw inline data.
2074 # returning raw inline data.
2071 # 3) we want to prioritize common and officially supported compression
2075 # 3) we want to prioritize common and officially supported compression
2072 # engines
2076 # engines
2073 #
2077 #
2074 # It follows that we want to optimize for "decompress compressed data
2078 # It follows that we want to optimize for "decompress compressed data
2075 # when encoded with common and officially supported compression engines"
2079 # when encoded with common and officially supported compression engines"
2076 # case over "raw data" and "data encoded by less common or non-official
2080 # case over "raw data" and "data encoded by less common or non-official
2077 # compression engines." That is why we have the inline lookup first
2081 # compression engines." That is why we have the inline lookup first
2078 # followed by the compengines lookup.
2082 # followed by the compengines lookup.
2079 #
2083 #
2080 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2084 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2081 # compressed chunks. And this matters for changelog and manifest reads.
2085 # compressed chunks. And this matters for changelog and manifest reads.
2082 t = data[0:1]
2086 t = data[0:1]
2083
2087
2084 if t == 'x':
2088 if t == 'x':
2085 try:
2089 try:
2086 return _zlibdecompress(data)
2090 return _zlibdecompress(data)
2087 except zlib.error as e:
2091 except zlib.error as e:
2088 raise RevlogError(_('revlog decompress error: %s') %
2092 raise RevlogError(_('revlog decompress error: %s') %
2089 stringutil.forcebytestr(e))
2093 stringutil.forcebytestr(e))
2090 # '\0' is more common than 'u' so it goes first.
2094 # '\0' is more common than 'u' so it goes first.
2091 elif t == '\0':
2095 elif t == '\0':
2092 return data
2096 return data
2093 elif t == 'u':
2097 elif t == 'u':
2094 return util.buffer(data, 1)
2098 return util.buffer(data, 1)
2095
2099
2096 try:
2100 try:
2097 compressor = self._decompressors[t]
2101 compressor = self._decompressors[t]
2098 except KeyError:
2102 except KeyError:
2099 try:
2103 try:
2100 engine = util.compengines.forrevlogheader(t)
2104 engine = util.compengines.forrevlogheader(t)
2101 compressor = engine.revlogcompressor()
2105 compressor = engine.revlogcompressor()
2102 self._decompressors[t] = compressor
2106 self._decompressors[t] = compressor
2103 except KeyError:
2107 except KeyError:
2104 raise RevlogError(_('unknown compression type %r') % t)
2108 raise RevlogError(_('unknown compression type %r') % t)
2105
2109
2106 return compressor.decompress(data)
2110 return compressor.decompress(data)
2107
2111
2108 def _isgooddeltainfo(self, deltainfo, revinfo):
2112 def _isgooddeltainfo(self, deltainfo, revinfo):
2109 """Returns True if the given delta is good. Good means that it is within
2113 """Returns True if the given delta is good. Good means that it is within
2110 the disk span, disk size, and chain length bounds that we know to be
2114 the disk span, disk size, and chain length bounds that we know to be
2111 performant."""
2115 performant."""
2112 if deltainfo is None:
2116 if deltainfo is None:
2113 return False
2117 return False
2114
2118
2115 # - 'deltainfo.distance' is the distance from the base revision --
2119 # - 'deltainfo.distance' is the distance from the base revision --
2116 # bounding it limits the amount of I/O we need to do.
2120 # bounding it limits the amount of I/O we need to do.
2117 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2121 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2118 # deltas we need to apply -- bounding it limits the amount of CPU
2122 # deltas we need to apply -- bounding it limits the amount of CPU
2119 # we consume.
2123 # we consume.
2120
2124
2121 textlen = revinfo.textlen
2125 textlen = revinfo.textlen
2122 defaultmax = textlen * 4
2126 defaultmax = textlen * 4
2123 maxdist = self._maxdeltachainspan
2127 maxdist = self._maxdeltachainspan
2124 if not maxdist:
2128 if not maxdist:
2125 maxdist = deltainfo.distance # ensure the conditional pass
2129 maxdist = deltainfo.distance # ensure the conditional pass
2126 maxdist = max(maxdist, defaultmax)
2130 maxdist = max(maxdist, defaultmax)
2127 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2131 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2128 deltainfo.compresseddeltalen > textlen * 2 or
2132 deltainfo.compresseddeltalen > textlen * 2 or
2129 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2133 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2130 return False
2134 return False
2131
2135
2132 return True
2136 return True
2133
2137
2134 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2138 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2135 cachedelta, ifh, dfh, alwayscache=False,
2139 cachedelta, ifh, dfh, alwayscache=False,
2136 deltacomputer=None):
2140 deltacomputer=None):
2137 """internal function to add revisions to the log
2141 """internal function to add revisions to the log
2138
2142
2139 see addrevision for argument descriptions.
2143 see addrevision for argument descriptions.
2140
2144
2141 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2145 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2142
2146
2143 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2147 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2144 be used.
2148 be used.
2145
2149
2146 invariants:
2150 invariants:
2147 - rawtext is optional (can be None); if not set, cachedelta must be set.
2151 - rawtext is optional (can be None); if not set, cachedelta must be set.
2148 if both are set, they must correspond to each other.
2152 if both are set, they must correspond to each other.
2149 """
2153 """
2150 if node == nullid:
2154 if node == nullid:
2151 raise RevlogError(_("%s: attempt to add null revision") %
2155 raise RevlogError(_("%s: attempt to add null revision") %
2152 (self.indexfile))
2156 (self.indexfile))
2153 if node == wdirid or node in wdirfilenodeids:
2157 if node == wdirid or node in wdirfilenodeids:
2154 raise RevlogError(_("%s: attempt to add wdir revision") %
2158 raise RevlogError(_("%s: attempt to add wdir revision") %
2155 (self.indexfile))
2159 (self.indexfile))
2156
2160
2157 if self._inline:
2161 if self._inline:
2158 fh = ifh
2162 fh = ifh
2159 else:
2163 else:
2160 fh = dfh
2164 fh = dfh
2161
2165
2162 btext = [rawtext]
2166 btext = [rawtext]
2163
2167
2164 curr = len(self)
2168 curr = len(self)
2165 prev = curr - 1
2169 prev = curr - 1
2166 offset = self.end(prev)
2170 offset = self.end(prev)
2167 p1r, p2r = self.rev(p1), self.rev(p2)
2171 p1r, p2r = self.rev(p1), self.rev(p2)
2168
2172
2169 # full versions are inserted when the needed deltas
2173 # full versions are inserted when the needed deltas
2170 # become comparable to the uncompressed text
2174 # become comparable to the uncompressed text
2171 if rawtext is None:
2175 if rawtext is None:
2172 # need rawtext size, before changed by flag processors, which is
2176 # need rawtext size, before changed by flag processors, which is
2173 # the non-raw size. use revlog explicitly to avoid filelog's extra
2177 # the non-raw size. use revlog explicitly to avoid filelog's extra
2174 # logic that might remove metadata size.
2178 # logic that might remove metadata size.
2175 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2179 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2176 cachedelta[1])
2180 cachedelta[1])
2177 else:
2181 else:
2178 textlen = len(rawtext)
2182 textlen = len(rawtext)
2179
2183
2180 if deltacomputer is None:
2184 if deltacomputer is None:
2181 deltacomputer = _deltacomputer(self)
2185 deltacomputer = _deltacomputer(self)
2182
2186
2183 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2187 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2184
2188
2185 # no delta for flag processor revision (see "candelta" for why)
2189 # no delta for flag processor revision (see "candelta" for why)
2186 # not calling candelta since only one revision needs test, also to
2190 # not calling candelta since only one revision needs test, also to
2187 # avoid overhead fetching flags again.
2191 # avoid overhead fetching flags again.
2188 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2192 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2189 deltainfo = None
2193 deltainfo = None
2190 else:
2194 else:
2191 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2195 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2192
2196
2193 if deltainfo is not None:
2197 if deltainfo is not None:
2194 base = deltainfo.base
2198 base = deltainfo.base
2195 chainbase = deltainfo.chainbase
2199 chainbase = deltainfo.chainbase
2196 data = deltainfo.data
2200 data = deltainfo.data
2197 l = deltainfo.deltalen
2201 l = deltainfo.deltalen
2198 else:
2202 else:
2199 rawtext = deltacomputer.buildtext(revinfo, fh)
2203 rawtext = deltacomputer.buildtext(revinfo, fh)
2200 data = self.compress(rawtext)
2204 data = self.compress(rawtext)
2201 l = len(data[1]) + len(data[0])
2205 l = len(data[1]) + len(data[0])
2202 base = chainbase = curr
2206 base = chainbase = curr
2203
2207
2204 e = (offset_type(offset, flags), l, textlen,
2208 e = (offset_type(offset, flags), l, textlen,
2205 base, link, p1r, p2r, node)
2209 base, link, p1r, p2r, node)
2206 self.index.insert(-1, e)
2210 self.index.insert(-1, e)
2207 self.nodemap[node] = curr
2211 self.nodemap[node] = curr
2208
2212
2209 entry = self._io.packentry(e, self.node, self.version, curr)
2213 entry = self._io.packentry(e, self.node, self.version, curr)
2210 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2214 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2211
2215
2212 if alwayscache and rawtext is None:
2216 if alwayscache and rawtext is None:
2213 rawtext = deltacomputer._buildtext(revinfo, fh)
2217 rawtext = deltacomputer._buildtext(revinfo, fh)
2214
2218
2215 if type(rawtext) == bytes: # only accept immutable objects
2219 if type(rawtext) == bytes: # only accept immutable objects
2216 self._cache = (node, curr, rawtext)
2220 self._cache = (node, curr, rawtext)
2217 self._chainbasecache[curr] = chainbase
2221 self._chainbasecache[curr] = chainbase
2218 return node
2222 return node
2219
2223
2220 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2224 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2221 # Files opened in a+ mode have inconsistent behavior on various
2225 # Files opened in a+ mode have inconsistent behavior on various
2222 # platforms. Windows requires that a file positioning call be made
2226 # platforms. Windows requires that a file positioning call be made
2223 # when the file handle transitions between reads and writes. See
2227 # when the file handle transitions between reads and writes. See
2224 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2228 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2225 # platforms, Python or the platform itself can be buggy. Some versions
2229 # platforms, Python or the platform itself can be buggy. Some versions
2226 # of Solaris have been observed to not append at the end of the file
2230 # of Solaris have been observed to not append at the end of the file
2227 # if the file was seeked to before the end. See issue4943 for more.
2231 # if the file was seeked to before the end. See issue4943 for more.
2228 #
2232 #
2229 # We work around this issue by inserting a seek() before writing.
2233 # We work around this issue by inserting a seek() before writing.
2230 # Note: This is likely not necessary on Python 3.
2234 # Note: This is likely not necessary on Python 3.
2231 ifh.seek(0, os.SEEK_END)
2235 ifh.seek(0, os.SEEK_END)
2232 if dfh:
2236 if dfh:
2233 dfh.seek(0, os.SEEK_END)
2237 dfh.seek(0, os.SEEK_END)
2234
2238
2235 curr = len(self) - 1
2239 curr = len(self) - 1
2236 if not self._inline:
2240 if not self._inline:
2237 transaction.add(self.datafile, offset)
2241 transaction.add(self.datafile, offset)
2238 transaction.add(self.indexfile, curr * len(entry))
2242 transaction.add(self.indexfile, curr * len(entry))
2239 if data[0]:
2243 if data[0]:
2240 dfh.write(data[0])
2244 dfh.write(data[0])
2241 dfh.write(data[1])
2245 dfh.write(data[1])
2242 ifh.write(entry)
2246 ifh.write(entry)
2243 else:
2247 else:
2244 offset += curr * self._io.size
2248 offset += curr * self._io.size
2245 transaction.add(self.indexfile, offset, curr)
2249 transaction.add(self.indexfile, offset, curr)
2246 ifh.write(entry)
2250 ifh.write(entry)
2247 ifh.write(data[0])
2251 ifh.write(data[0])
2248 ifh.write(data[1])
2252 ifh.write(data[1])
2249 self._enforceinlinesize(transaction, ifh)
2253 self._enforceinlinesize(transaction, ifh)
2250
2254
2251 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2255 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2252 """
2256 """
2253 add a delta group
2257 add a delta group
2254
2258
2255 given a set of deltas, add them to the revision log. the
2259 given a set of deltas, add them to the revision log. the
2256 first delta is against its parent, which should be in our
2260 first delta is against its parent, which should be in our
2257 log, the rest are against the previous delta.
2261 log, the rest are against the previous delta.
2258
2262
2259 If ``addrevisioncb`` is defined, it will be called with arguments of
2263 If ``addrevisioncb`` is defined, it will be called with arguments of
2260 this revlog and the node that was added.
2264 this revlog and the node that was added.
2261 """
2265 """
2262
2266
2263 nodes = []
2267 nodes = []
2264
2268
2265 r = len(self)
2269 r = len(self)
2266 end = 0
2270 end = 0
2267 if r:
2271 if r:
2268 end = self.end(r - 1)
2272 end = self.end(r - 1)
2269 ifh = self._indexfp("a+")
2273 ifh = self._indexfp("a+")
2270 isize = r * self._io.size
2274 isize = r * self._io.size
2271 if self._inline:
2275 if self._inline:
2272 transaction.add(self.indexfile, end + isize, r)
2276 transaction.add(self.indexfile, end + isize, r)
2273 dfh = None
2277 dfh = None
2274 else:
2278 else:
2275 transaction.add(self.indexfile, isize, r)
2279 transaction.add(self.indexfile, isize, r)
2276 transaction.add(self.datafile, end)
2280 transaction.add(self.datafile, end)
2277 dfh = self._datafp("a+")
2281 dfh = self._datafp("a+")
2278 def flush():
2282 def flush():
2279 if dfh:
2283 if dfh:
2280 dfh.flush()
2284 dfh.flush()
2281 ifh.flush()
2285 ifh.flush()
2282 try:
2286 try:
2283 deltacomputer = _deltacomputer(self)
2287 deltacomputer = _deltacomputer(self)
2284 # loop through our set of deltas
2288 # loop through our set of deltas
2285 for data in deltas:
2289 for data in deltas:
2286 node, p1, p2, linknode, deltabase, delta, flags = data
2290 node, p1, p2, linknode, deltabase, delta, flags = data
2287 link = linkmapper(linknode)
2291 link = linkmapper(linknode)
2288 flags = flags or REVIDX_DEFAULT_FLAGS
2292 flags = flags or REVIDX_DEFAULT_FLAGS
2289
2293
2290 nodes.append(node)
2294 nodes.append(node)
2291
2295
2292 if node in self.nodemap:
2296 if node in self.nodemap:
2293 # this can happen if two branches make the same change
2297 # this can happen if two branches make the same change
2294 continue
2298 continue
2295
2299
2296 for p in (p1, p2):
2300 for p in (p1, p2):
2297 if p not in self.nodemap:
2301 if p not in self.nodemap:
2298 raise LookupError(p, self.indexfile,
2302 raise LookupError(p, self.indexfile,
2299 _('unknown parent'))
2303 _('unknown parent'))
2300
2304
2301 if deltabase not in self.nodemap:
2305 if deltabase not in self.nodemap:
2302 raise LookupError(deltabase, self.indexfile,
2306 raise LookupError(deltabase, self.indexfile,
2303 _('unknown delta base'))
2307 _('unknown delta base'))
2304
2308
2305 baserev = self.rev(deltabase)
2309 baserev = self.rev(deltabase)
2306
2310
2307 if baserev != nullrev and self.iscensored(baserev):
2311 if baserev != nullrev and self.iscensored(baserev):
2308 # if base is censored, delta must be full replacement in a
2312 # if base is censored, delta must be full replacement in a
2309 # single patch operation
2313 # single patch operation
2310 hlen = struct.calcsize(">lll")
2314 hlen = struct.calcsize(">lll")
2311 oldlen = self.rawsize(baserev)
2315 oldlen = self.rawsize(baserev)
2312 newlen = len(delta) - hlen
2316 newlen = len(delta) - hlen
2313 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2317 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2314 raise error.CensoredBaseError(self.indexfile,
2318 raise error.CensoredBaseError(self.indexfile,
2315 self.node(baserev))
2319 self.node(baserev))
2316
2320
2317 if not flags and self._peek_iscensored(baserev, delta, flush):
2321 if not flags and self._peek_iscensored(baserev, delta, flush):
2318 flags |= REVIDX_ISCENSORED
2322 flags |= REVIDX_ISCENSORED
2319
2323
2320 # We assume consumers of addrevisioncb will want to retrieve
2324 # We assume consumers of addrevisioncb will want to retrieve
2321 # the added revision, which will require a call to
2325 # the added revision, which will require a call to
2322 # revision(). revision() will fast path if there is a cache
2326 # revision(). revision() will fast path if there is a cache
2323 # hit. So, we tell _addrevision() to always cache in this case.
2327 # hit. So, we tell _addrevision() to always cache in this case.
2324 # We're only using addgroup() in the context of changegroup
2328 # We're only using addgroup() in the context of changegroup
2325 # generation so the revision data can always be handled as raw
2329 # generation so the revision data can always be handled as raw
2326 # by the flagprocessor.
2330 # by the flagprocessor.
2327 self._addrevision(node, None, transaction, link,
2331 self._addrevision(node, None, transaction, link,
2328 p1, p2, flags, (baserev, delta),
2332 p1, p2, flags, (baserev, delta),
2329 ifh, dfh,
2333 ifh, dfh,
2330 alwayscache=bool(addrevisioncb),
2334 alwayscache=bool(addrevisioncb),
2331 deltacomputer=deltacomputer)
2335 deltacomputer=deltacomputer)
2332
2336
2333 if addrevisioncb:
2337 if addrevisioncb:
2334 addrevisioncb(self, node)
2338 addrevisioncb(self, node)
2335
2339
2336 if not dfh and not self._inline:
2340 if not dfh and not self._inline:
2337 # addrevision switched from inline to conventional
2341 # addrevision switched from inline to conventional
2338 # reopen the index
2342 # reopen the index
2339 ifh.close()
2343 ifh.close()
2340 dfh = self._datafp("a+")
2344 dfh = self._datafp("a+")
2341 ifh = self._indexfp("a+")
2345 ifh = self._indexfp("a+")
2342 finally:
2346 finally:
2343 if dfh:
2347 if dfh:
2344 dfh.close()
2348 dfh.close()
2345 ifh.close()
2349 ifh.close()
2346
2350
2347 return nodes
2351 return nodes
2348
2352
2349 def iscensored(self, rev):
2353 def iscensored(self, rev):
2350 """Check if a file revision is censored."""
2354 """Check if a file revision is censored."""
2351 if not self._censorable:
2355 if not self._censorable:
2352 return False
2356 return False
2353
2357
2354 return self.flags(rev) & REVIDX_ISCENSORED
2358 return self.flags(rev) & REVIDX_ISCENSORED
2355
2359
2356 def _peek_iscensored(self, baserev, delta, flush):
2360 def _peek_iscensored(self, baserev, delta, flush):
2357 """Quickly check if a delta produces a censored revision."""
2361 """Quickly check if a delta produces a censored revision."""
2358 if not self._censorable:
2362 if not self._censorable:
2359 return False
2363 return False
2360
2364
2361 # Fragile heuristic: unless new file meta keys are added alphabetically
2365 # Fragile heuristic: unless new file meta keys are added alphabetically
2362 # preceding "censored", all censored revisions are prefixed by
2366 # preceding "censored", all censored revisions are prefixed by
2363 # "\1\ncensored:". A delta producing such a censored revision must be a
2367 # "\1\ncensored:". A delta producing such a censored revision must be a
2364 # full-replacement delta, so we inspect the first and only patch in the
2368 # full-replacement delta, so we inspect the first and only patch in the
2365 # delta for this prefix.
2369 # delta for this prefix.
2366 hlen = struct.calcsize(">lll")
2370 hlen = struct.calcsize(">lll")
2367 if len(delta) <= hlen:
2371 if len(delta) <= hlen:
2368 return False
2372 return False
2369
2373
2370 oldlen = self.rawsize(baserev)
2374 oldlen = self.rawsize(baserev)
2371 newlen = len(delta) - hlen
2375 newlen = len(delta) - hlen
2372 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2376 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2373 return False
2377 return False
2374
2378
2375 add = "\1\ncensored:"
2379 add = "\1\ncensored:"
2376 addlen = len(add)
2380 addlen = len(add)
2377 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2381 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2378
2382
2379 def getstrippoint(self, minlink):
2383 def getstrippoint(self, minlink):
2380 """find the minimum rev that must be stripped to strip the linkrev
2384 """find the minimum rev that must be stripped to strip the linkrev
2381
2385
2382 Returns a tuple containing the minimum rev and a set of all revs that
2386 Returns a tuple containing the minimum rev and a set of all revs that
2383 have linkrevs that will be broken by this strip.
2387 have linkrevs that will be broken by this strip.
2384 """
2388 """
2385 brokenrevs = set()
2389 brokenrevs = set()
2386 strippoint = len(self)
2390 strippoint = len(self)
2387
2391
2388 heads = {}
2392 heads = {}
2389 futurelargelinkrevs = set()
2393 futurelargelinkrevs = set()
2390 for head in self.headrevs():
2394 for head in self.headrevs():
2391 headlinkrev = self.linkrev(head)
2395 headlinkrev = self.linkrev(head)
2392 heads[head] = headlinkrev
2396 heads[head] = headlinkrev
2393 if headlinkrev >= minlink:
2397 if headlinkrev >= minlink:
2394 futurelargelinkrevs.add(headlinkrev)
2398 futurelargelinkrevs.add(headlinkrev)
2395
2399
2396 # This algorithm involves walking down the rev graph, starting at the
2400 # This algorithm involves walking down the rev graph, starting at the
2397 # heads. Since the revs are topologically sorted according to linkrev,
2401 # heads. Since the revs are topologically sorted according to linkrev,
2398 # once all head linkrevs are below the minlink, we know there are
2402 # once all head linkrevs are below the minlink, we know there are
2399 # no more revs that could have a linkrev greater than minlink.
2403 # no more revs that could have a linkrev greater than minlink.
2400 # So we can stop walking.
2404 # So we can stop walking.
2401 while futurelargelinkrevs:
2405 while futurelargelinkrevs:
2402 strippoint -= 1
2406 strippoint -= 1
2403 linkrev = heads.pop(strippoint)
2407 linkrev = heads.pop(strippoint)
2404
2408
2405 if linkrev < minlink:
2409 if linkrev < minlink:
2406 brokenrevs.add(strippoint)
2410 brokenrevs.add(strippoint)
2407 else:
2411 else:
2408 futurelargelinkrevs.remove(linkrev)
2412 futurelargelinkrevs.remove(linkrev)
2409
2413
2410 for p in self.parentrevs(strippoint):
2414 for p in self.parentrevs(strippoint):
2411 if p != nullrev:
2415 if p != nullrev:
2412 plinkrev = self.linkrev(p)
2416 plinkrev = self.linkrev(p)
2413 heads[p] = plinkrev
2417 heads[p] = plinkrev
2414 if plinkrev >= minlink:
2418 if plinkrev >= minlink:
2415 futurelargelinkrevs.add(plinkrev)
2419 futurelargelinkrevs.add(plinkrev)
2416
2420
2417 return strippoint, brokenrevs
2421 return strippoint, brokenrevs
2418
2422
2419 def strip(self, minlink, transaction):
2423 def strip(self, minlink, transaction):
2420 """truncate the revlog on the first revision with a linkrev >= minlink
2424 """truncate the revlog on the first revision with a linkrev >= minlink
2421
2425
2422 This function is called when we're stripping revision minlink and
2426 This function is called when we're stripping revision minlink and
2423 its descendants from the repository.
2427 its descendants from the repository.
2424
2428
2425 We have to remove all revisions with linkrev >= minlink, because
2429 We have to remove all revisions with linkrev >= minlink, because
2426 the equivalent changelog revisions will be renumbered after the
2430 the equivalent changelog revisions will be renumbered after the
2427 strip.
2431 strip.
2428
2432
2429 So we truncate the revlog on the first of these revisions, and
2433 So we truncate the revlog on the first of these revisions, and
2430 trust that the caller has saved the revisions that shouldn't be
2434 trust that the caller has saved the revisions that shouldn't be
2431 removed and that it'll re-add them after this truncation.
2435 removed and that it'll re-add them after this truncation.
2432 """
2436 """
2433 if len(self) == 0:
2437 if len(self) == 0:
2434 return
2438 return
2435
2439
2436 rev, _ = self.getstrippoint(minlink)
2440 rev, _ = self.getstrippoint(minlink)
2437 if rev == len(self):
2441 if rev == len(self):
2438 return
2442 return
2439
2443
2440 # first truncate the files on disk
2444 # first truncate the files on disk
2441 end = self.start(rev)
2445 end = self.start(rev)
2442 if not self._inline:
2446 if not self._inline:
2443 transaction.add(self.datafile, end)
2447 transaction.add(self.datafile, end)
2444 end = rev * self._io.size
2448 end = rev * self._io.size
2445 else:
2449 else:
2446 end += rev * self._io.size
2450 end += rev * self._io.size
2447
2451
2448 transaction.add(self.indexfile, end)
2452 transaction.add(self.indexfile, end)
2449
2453
2450 # then reset internal state in memory to forget those revisions
2454 # then reset internal state in memory to forget those revisions
2451 self._cache = None
2455 self._cache = None
2452 self._chaininfocache = {}
2456 self._chaininfocache = {}
2453 self._chunkclear()
2457 self._chunkclear()
2454 for x in xrange(rev, len(self)):
2458 for x in xrange(rev, len(self)):
2455 del self.nodemap[self.node(x)]
2459 del self.nodemap[self.node(x)]
2456
2460
2457 del self.index[rev:-1]
2461 del self.index[rev:-1]
2458 self._nodepos = None
2462 self._nodepos = None
2459
2463
2460 def checksize(self):
2464 def checksize(self):
2461 expected = 0
2465 expected = 0
2462 if len(self):
2466 if len(self):
2463 expected = max(0, self.end(len(self) - 1))
2467 expected = max(0, self.end(len(self) - 1))
2464
2468
2465 try:
2469 try:
2466 with self._datafp() as f:
2470 with self._datafp() as f:
2467 f.seek(0, 2)
2471 f.seek(0, 2)
2468 actual = f.tell()
2472 actual = f.tell()
2469 dd = actual - expected
2473 dd = actual - expected
2470 except IOError as inst:
2474 except IOError as inst:
2471 if inst.errno != errno.ENOENT:
2475 if inst.errno != errno.ENOENT:
2472 raise
2476 raise
2473 dd = 0
2477 dd = 0
2474
2478
2475 try:
2479 try:
2476 f = self.opener(self.indexfile)
2480 f = self.opener(self.indexfile)
2477 f.seek(0, 2)
2481 f.seek(0, 2)
2478 actual = f.tell()
2482 actual = f.tell()
2479 f.close()
2483 f.close()
2480 s = self._io.size
2484 s = self._io.size
2481 i = max(0, actual // s)
2485 i = max(0, actual // s)
2482 di = actual - (i * s)
2486 di = actual - (i * s)
2483 if self._inline:
2487 if self._inline:
2484 databytes = 0
2488 databytes = 0
2485 for r in self:
2489 for r in self:
2486 databytes += max(0, self.length(r))
2490 databytes += max(0, self.length(r))
2487 dd = 0
2491 dd = 0
2488 di = actual - len(self) * s - databytes
2492 di = actual - len(self) * s - databytes
2489 except IOError as inst:
2493 except IOError as inst:
2490 if inst.errno != errno.ENOENT:
2494 if inst.errno != errno.ENOENT:
2491 raise
2495 raise
2492 di = 0
2496 di = 0
2493
2497
2494 return (dd, di)
2498 return (dd, di)
2495
2499
2496 def files(self):
2500 def files(self):
2497 res = [self.indexfile]
2501 res = [self.indexfile]
2498 if not self._inline:
2502 if not self._inline:
2499 res.append(self.datafile)
2503 res.append(self.datafile)
2500 return res
2504 return res
2501
2505
2502 DELTAREUSEALWAYS = 'always'
2506 DELTAREUSEALWAYS = 'always'
2503 DELTAREUSESAMEREVS = 'samerevs'
2507 DELTAREUSESAMEREVS = 'samerevs'
2504 DELTAREUSENEVER = 'never'
2508 DELTAREUSENEVER = 'never'
2505
2509
2506 DELTAREUSEFULLADD = 'fulladd'
2510 DELTAREUSEFULLADD = 'fulladd'
2507
2511
2508 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2512 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2509
2513
2510 def clone(self, tr, destrevlog, addrevisioncb=None,
2514 def clone(self, tr, destrevlog, addrevisioncb=None,
2511 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2515 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2512 """Copy this revlog to another, possibly with format changes.
2516 """Copy this revlog to another, possibly with format changes.
2513
2517
2514 The destination revlog will contain the same revisions and nodes.
2518 The destination revlog will contain the same revisions and nodes.
2515 However, it may not be bit-for-bit identical due to e.g. delta encoding
2519 However, it may not be bit-for-bit identical due to e.g. delta encoding
2516 differences.
2520 differences.
2517
2521
2518 The ``deltareuse`` argument control how deltas from the existing revlog
2522 The ``deltareuse`` argument control how deltas from the existing revlog
2519 are preserved in the destination revlog. The argument can have the
2523 are preserved in the destination revlog. The argument can have the
2520 following values:
2524 following values:
2521
2525
2522 DELTAREUSEALWAYS
2526 DELTAREUSEALWAYS
2523 Deltas will always be reused (if possible), even if the destination
2527 Deltas will always be reused (if possible), even if the destination
2524 revlog would not select the same revisions for the delta. This is the
2528 revlog would not select the same revisions for the delta. This is the
2525 fastest mode of operation.
2529 fastest mode of operation.
2526 DELTAREUSESAMEREVS
2530 DELTAREUSESAMEREVS
2527 Deltas will be reused if the destination revlog would pick the same
2531 Deltas will be reused if the destination revlog would pick the same
2528 revisions for the delta. This mode strikes a balance between speed
2532 revisions for the delta. This mode strikes a balance between speed
2529 and optimization.
2533 and optimization.
2530 DELTAREUSENEVER
2534 DELTAREUSENEVER
2531 Deltas will never be reused. This is the slowest mode of execution.
2535 Deltas will never be reused. This is the slowest mode of execution.
2532 This mode can be used to recompute deltas (e.g. if the diff/delta
2536 This mode can be used to recompute deltas (e.g. if the diff/delta
2533 algorithm changes).
2537 algorithm changes).
2534
2538
2535 Delta computation can be slow, so the choice of delta reuse policy can
2539 Delta computation can be slow, so the choice of delta reuse policy can
2536 significantly affect run time.
2540 significantly affect run time.
2537
2541
2538 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2542 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2539 two extremes. Deltas will be reused if they are appropriate. But if the
2543 two extremes. Deltas will be reused if they are appropriate. But if the
2540 delta could choose a better revision, it will do so. This means if you
2544 delta could choose a better revision, it will do so. This means if you
2541 are converting a non-generaldelta revlog to a generaldelta revlog,
2545 are converting a non-generaldelta revlog to a generaldelta revlog,
2542 deltas will be recomputed if the delta's parent isn't a parent of the
2546 deltas will be recomputed if the delta's parent isn't a parent of the
2543 revision.
2547 revision.
2544
2548
2545 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2549 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2546 controls whether to compute deltas against both parents for merges.
2550 controls whether to compute deltas against both parents for merges.
2547 By default, the current default is used.
2551 By default, the current default is used.
2548 """
2552 """
2549 if deltareuse not in self.DELTAREUSEALL:
2553 if deltareuse not in self.DELTAREUSEALL:
2550 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2554 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2551
2555
2552 if len(destrevlog):
2556 if len(destrevlog):
2553 raise ValueError(_('destination revlog is not empty'))
2557 raise ValueError(_('destination revlog is not empty'))
2554
2558
2555 if getattr(self, 'filteredrevs', None):
2559 if getattr(self, 'filteredrevs', None):
2556 raise ValueError(_('source revlog has filtered revisions'))
2560 raise ValueError(_('source revlog has filtered revisions'))
2557 if getattr(destrevlog, 'filteredrevs', None):
2561 if getattr(destrevlog, 'filteredrevs', None):
2558 raise ValueError(_('destination revlog has filtered revisions'))
2562 raise ValueError(_('destination revlog has filtered revisions'))
2559
2563
2560 # lazydeltabase controls whether to reuse a cached delta, if possible.
2564 # lazydeltabase controls whether to reuse a cached delta, if possible.
2561 oldlazydeltabase = destrevlog._lazydeltabase
2565 oldlazydeltabase = destrevlog._lazydeltabase
2562 oldamd = destrevlog._aggressivemergedeltas
2566 oldamd = destrevlog._aggressivemergedeltas
2563
2567
2564 try:
2568 try:
2565 if deltareuse == self.DELTAREUSEALWAYS:
2569 if deltareuse == self.DELTAREUSEALWAYS:
2566 destrevlog._lazydeltabase = True
2570 destrevlog._lazydeltabase = True
2567 elif deltareuse == self.DELTAREUSESAMEREVS:
2571 elif deltareuse == self.DELTAREUSESAMEREVS:
2568 destrevlog._lazydeltabase = False
2572 destrevlog._lazydeltabase = False
2569
2573
2570 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2574 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2571
2575
2572 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2576 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2573 self.DELTAREUSESAMEREVS)
2577 self.DELTAREUSESAMEREVS)
2574
2578
2575 deltacomputer = _deltacomputer(destrevlog)
2579 deltacomputer = _deltacomputer(destrevlog)
2576 index = self.index
2580 index = self.index
2577 for rev in self:
2581 for rev in self:
2578 entry = index[rev]
2582 entry = index[rev]
2579
2583
2580 # Some classes override linkrev to take filtered revs into
2584 # Some classes override linkrev to take filtered revs into
2581 # account. Use raw entry from index.
2585 # account. Use raw entry from index.
2582 flags = entry[0] & 0xffff
2586 flags = entry[0] & 0xffff
2583 linkrev = entry[4]
2587 linkrev = entry[4]
2584 p1 = index[entry[5]][7]
2588 p1 = index[entry[5]][7]
2585 p2 = index[entry[6]][7]
2589 p2 = index[entry[6]][7]
2586 node = entry[7]
2590 node = entry[7]
2587
2591
2588 # (Possibly) reuse the delta from the revlog if allowed and
2592 # (Possibly) reuse the delta from the revlog if allowed and
2589 # the revlog chunk is a delta.
2593 # the revlog chunk is a delta.
2590 cachedelta = None
2594 cachedelta = None
2591 rawtext = None
2595 rawtext = None
2592 if populatecachedelta:
2596 if populatecachedelta:
2593 dp = self.deltaparent(rev)
2597 dp = self.deltaparent(rev)
2594 if dp != nullrev:
2598 if dp != nullrev:
2595 cachedelta = (dp, bytes(self._chunk(rev)))
2599 cachedelta = (dp, bytes(self._chunk(rev)))
2596
2600
2597 if not cachedelta:
2601 if not cachedelta:
2598 rawtext = self.revision(rev, raw=True)
2602 rawtext = self.revision(rev, raw=True)
2599
2603
2600
2604
2601 if deltareuse == self.DELTAREUSEFULLADD:
2605 if deltareuse == self.DELTAREUSEFULLADD:
2602 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2606 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2603 cachedelta=cachedelta,
2607 cachedelta=cachedelta,
2604 node=node, flags=flags,
2608 node=node, flags=flags,
2605 deltacomputer=deltacomputer)
2609 deltacomputer=deltacomputer)
2606 else:
2610 else:
2607 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2611 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2608 checkambig=False)
2612 checkambig=False)
2609 dfh = None
2613 dfh = None
2610 if not destrevlog._inline:
2614 if not destrevlog._inline:
2611 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2615 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2612 try:
2616 try:
2613 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2617 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2614 p2, flags, cachedelta, ifh, dfh,
2618 p2, flags, cachedelta, ifh, dfh,
2615 deltacomputer=deltacomputer)
2619 deltacomputer=deltacomputer)
2616 finally:
2620 finally:
2617 if dfh:
2621 if dfh:
2618 dfh.close()
2622 dfh.close()
2619 ifh.close()
2623 ifh.close()
2620
2624
2621 if addrevisioncb:
2625 if addrevisioncb:
2622 addrevisioncb(self, rev, node)
2626 addrevisioncb(self, rev, node)
2623 finally:
2627 finally:
2624 destrevlog._lazydeltabase = oldlazydeltabase
2628 destrevlog._lazydeltabase = oldlazydeltabase
2625 destrevlog._aggressivemergedeltas = oldamd
2629 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now