##// END OF EJS Templates
revlog: make getcandidaterevs more consistent about updating tested revs set...
Paul Morelle -
r38186:faa01541 default
parent child Browse files
Show More
@@ -1,2613 +1,2614
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 chainpayload = sum(length(r) for r in revs)
230 chainpayload = sum(length(r) for r in revs)
231
231
232 if deltachainspan:
232 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
233 density = chainpayload / float(deltachainspan)
234 else:
234 else:
235 density = 1.0
235 density = 1.0
236
236
237 # Store the gaps in a heap to have them sorted by decreasing size
237 # Store the gaps in a heap to have them sorted by decreasing size
238 gapsheap = []
238 gapsheap = []
239 heapq.heapify(gapsheap)
239 heapq.heapify(gapsheap)
240 prevend = None
240 prevend = None
241 for i, rev in enumerate(revs):
241 for i, rev in enumerate(revs):
242 revstart = start(rev)
242 revstart = start(rev)
243 revlen = length(rev)
243 revlen = length(rev)
244
244
245 # Skip empty revisions to form larger holes
245 # Skip empty revisions to form larger holes
246 if revlen == 0:
246 if revlen == 0:
247 continue
247 continue
248
248
249 if prevend is not None:
249 if prevend is not None:
250 gapsize = revstart - prevend
250 gapsize = revstart - prevend
251 # only consider holes that are large enough
251 # only consider holes that are large enough
252 if gapsize > revlog._srmingapsize:
252 if gapsize > revlog._srmingapsize:
253 heapq.heappush(gapsheap, (-gapsize, i))
253 heapq.heappush(gapsheap, (-gapsize, i))
254
254
255 prevend = revstart + revlen
255 prevend = revstart + revlen
256
256
257 # Collect the indices of the largest holes until the density is acceptable
257 # Collect the indices of the largest holes until the density is acceptable
258 indicesheap = []
258 indicesheap = []
259 heapq.heapify(indicesheap)
259 heapq.heapify(indicesheap)
260 while gapsheap and density < revlog._srdensitythreshold:
260 while gapsheap and density < revlog._srdensitythreshold:
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262
262
263 heapq.heappush(indicesheap, gapidx)
263 heapq.heappush(indicesheap, gapidx)
264
264
265 # the gap sizes are stored as negatives to be sorted decreasingly
265 # the gap sizes are stored as negatives to be sorted decreasingly
266 # by the heap
266 # by the heap
267 readdata -= (-oppgapsize)
267 readdata -= (-oppgapsize)
268 if readdata > 0:
268 if readdata > 0:
269 density = chainpayload / float(readdata)
269 density = chainpayload / float(readdata)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 # Cut the revs at collected indices
273 # Cut the revs at collected indices
274 previdx = 0
274 previdx = 0
275 while indicesheap:
275 while indicesheap:
276 idx = heapq.heappop(indicesheap)
276 idx = heapq.heappop(indicesheap)
277
277
278 chunk = _trimchunk(revlog, revs, previdx, idx)
278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 if chunk:
279 if chunk:
280 yield chunk
280 yield chunk
281
281
282 previdx = idx
282 previdx = idx
283
283
284 chunk = _trimchunk(revlog, revs, previdx)
284 chunk = _trimchunk(revlog, revs, previdx)
285 if chunk:
285 if chunk:
286 yield chunk
286 yield chunk
287
287
288 @attr.s(slots=True, frozen=True)
288 @attr.s(slots=True, frozen=True)
289 class _deltainfo(object):
289 class _deltainfo(object):
290 distance = attr.ib()
290 distance = attr.ib()
291 deltalen = attr.ib()
291 deltalen = attr.ib()
292 data = attr.ib()
292 data = attr.ib()
293 base = attr.ib()
293 base = attr.ib()
294 chainbase = attr.ib()
294 chainbase = attr.ib()
295 chainlen = attr.ib()
295 chainlen = attr.ib()
296 compresseddeltalen = attr.ib()
296 compresseddeltalen = attr.ib()
297
297
298 class _deltacomputer(object):
298 class _deltacomputer(object):
299 def __init__(self, revlog):
299 def __init__(self, revlog):
300 self.revlog = revlog
300 self.revlog = revlog
301
301
302 def _getcandidaterevs(self, p1, p2, cachedelta):
302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 """
303 """
304 Provides revisions that present an interest to be diffed against,
304 Provides revisions that present an interest to be diffed against,
305 grouped by level of easiness.
305 grouped by level of easiness.
306 """
306 """
307 revlog = self.revlog
307 revlog = self.revlog
308 gdelta = revlog._generaldelta
308 gdelta = revlog._generaldelta
309 curr = len(revlog)
309 curr = len(revlog)
310 prev = curr - 1
310 prev = curr - 1
311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
312
312
313 # should we try to build a delta?
313 # should we try to build a delta?
314 if prev != nullrev and revlog.storedeltachains:
314 if prev != nullrev and revlog.storedeltachains:
315 tested = set()
315 tested = set()
316 # This condition is true most of the time when processing
316 # This condition is true most of the time when processing
317 # changegroup data into a generaldelta repo. The only time it
317 # changegroup data into a generaldelta repo. The only time it
318 # isn't true is if this is the first revision in a delta chain
318 # isn't true is if this is the first revision in a delta chain
319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
320 if cachedelta and gdelta and revlog._lazydeltabase:
320 if cachedelta and gdelta and revlog._lazydeltabase:
321 # Assume what we received from the server is a good choice
321 # Assume what we received from the server is a good choice
322 # build delta will reuse the cache
322 # build delta will reuse the cache
323 yield (cachedelta[0],)
323 yield (cachedelta[0],)
324 tested.add(cachedelta[0])
324 tested.add(cachedelta[0])
325
325
326 if gdelta:
326 if gdelta:
327 # exclude already lazy tested base if any
327 # exclude already lazy tested base if any
328 parents = [p for p in (p1r, p2r)
328 parents = [p for p in (p1r, p2r)
329 if p != nullrev and p not in tested]
329 if p != nullrev and p not in tested]
330
330
331 if not revlog._aggressivemergedeltas and len(parents) == 2:
331 if not revlog._aggressivemergedeltas and len(parents) == 2:
332 parents.sort()
332 parents.sort()
333 # To minimize the chance of having to build a fulltext,
333 # To minimize the chance of having to build a fulltext,
334 # pick first whichever parent is closest to us (max rev)
334 # pick first whichever parent is closest to us (max rev)
335 yield (parents[1],)
335 yield (parents[1],)
336 # then the other one (min rev) if the first did not fit
336 # then the other one (min rev) if the first did not fit
337 yield (parents[0],)
337 yield (parents[0],)
338 tested.update(parents)
338 tested.update(parents)
339 elif len(parents) > 0:
339 elif len(parents) > 0:
340 # Test all parents (1 or 2), and keep the best candidate
340 # Test all parents (1 or 2), and keep the best candidate
341 yield parents
341 yield parents
342 tested.update(parents)
342 tested.update(parents)
343
343
344 if prev not in tested:
344 if prev not in tested:
345 # other approach failed try against prev to hopefully save us a
345 # other approach failed try against prev to hopefully save us a
346 # fulltext.
346 # fulltext.
347 yield (prev,)
347 yield (prev,)
348 tested.add(prev)
348
349
349 def buildtext(self, revinfo, fh):
350 def buildtext(self, revinfo, fh):
350 """Builds a fulltext version of a revision
351 """Builds a fulltext version of a revision
351
352
352 revinfo: _revisioninfo instance that contains all needed info
353 revinfo: _revisioninfo instance that contains all needed info
353 fh: file handle to either the .i or the .d revlog file,
354 fh: file handle to either the .i or the .d revlog file,
354 depending on whether it is inlined or not
355 depending on whether it is inlined or not
355 """
356 """
356 btext = revinfo.btext
357 btext = revinfo.btext
357 if btext[0] is not None:
358 if btext[0] is not None:
358 return btext[0]
359 return btext[0]
359
360
360 revlog = self.revlog
361 revlog = self.revlog
361 cachedelta = revinfo.cachedelta
362 cachedelta = revinfo.cachedelta
362 flags = revinfo.flags
363 flags = revinfo.flags
363 node = revinfo.node
364 node = revinfo.node
364
365
365 baserev = cachedelta[0]
366 baserev = cachedelta[0]
366 delta = cachedelta[1]
367 delta = cachedelta[1]
367 # special case deltas which replace entire base; no need to decode
368 # special case deltas which replace entire base; no need to decode
368 # base revision. this neatly avoids censored bases, which throw when
369 # base revision. this neatly avoids censored bases, which throw when
369 # they're decoded.
370 # they're decoded.
370 hlen = struct.calcsize(">lll")
371 hlen = struct.calcsize(">lll")
371 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
372 len(delta) - hlen):
373 len(delta) - hlen):
373 btext[0] = delta[hlen:]
374 btext[0] = delta[hlen:]
374 else:
375 else:
375 # deltabase is rawtext before changed by flag processors, which is
376 # deltabase is rawtext before changed by flag processors, which is
376 # equivalent to non-raw text
377 # equivalent to non-raw text
377 basetext = revlog.revision(baserev, _df=fh, raw=False)
378 basetext = revlog.revision(baserev, _df=fh, raw=False)
378 btext[0] = mdiff.patch(basetext, delta)
379 btext[0] = mdiff.patch(basetext, delta)
379
380
380 try:
381 try:
381 res = revlog._processflags(btext[0], flags, 'read', raw=True)
382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
382 btext[0], validatehash = res
383 btext[0], validatehash = res
383 if validatehash:
384 if validatehash:
384 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
385 if flags & REVIDX_ISCENSORED:
386 if flags & REVIDX_ISCENSORED:
386 raise RevlogError(_('node %s is not censored') % node)
387 raise RevlogError(_('node %s is not censored') % node)
387 except CensoredNodeError:
388 except CensoredNodeError:
388 # must pass the censored index flag to add censored revisions
389 # must pass the censored index flag to add censored revisions
389 if not flags & REVIDX_ISCENSORED:
390 if not flags & REVIDX_ISCENSORED:
390 raise
391 raise
391 return btext[0]
392 return btext[0]
392
393
393 def _builddeltadiff(self, base, revinfo, fh):
394 def _builddeltadiff(self, base, revinfo, fh):
394 revlog = self.revlog
395 revlog = self.revlog
395 t = self.buildtext(revinfo, fh)
396 t = self.buildtext(revinfo, fh)
396 if revlog.iscensored(base):
397 if revlog.iscensored(base):
397 # deltas based on a censored revision must replace the
398 # deltas based on a censored revision must replace the
398 # full content in one patch, so delta works everywhere
399 # full content in one patch, so delta works everywhere
399 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
400 delta = header + t
401 delta = header + t
401 else:
402 else:
402 ptext = revlog.revision(base, _df=fh, raw=True)
403 ptext = revlog.revision(base, _df=fh, raw=True)
403 delta = mdiff.textdiff(ptext, t)
404 delta = mdiff.textdiff(ptext, t)
404
405
405 return delta
406 return delta
406
407
407 def _builddeltainfo(self, revinfo, base, fh):
408 def _builddeltainfo(self, revinfo, base, fh):
408 # can we use the cached delta?
409 # can we use the cached delta?
409 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
410 delta = revinfo.cachedelta[1]
411 delta = revinfo.cachedelta[1]
411 else:
412 else:
412 delta = self._builddeltadiff(base, revinfo, fh)
413 delta = self._builddeltadiff(base, revinfo, fh)
413 revlog = self.revlog
414 revlog = self.revlog
414 header, data = revlog.compress(delta)
415 header, data = revlog.compress(delta)
415 deltalen = len(header) + len(data)
416 deltalen = len(header) + len(data)
416 chainbase = revlog.chainbase(base)
417 chainbase = revlog.chainbase(base)
417 offset = revlog.end(len(revlog) - 1)
418 offset = revlog.end(len(revlog) - 1)
418 dist = deltalen + offset - revlog.start(chainbase)
419 dist = deltalen + offset - revlog.start(chainbase)
419 if revlog._generaldelta:
420 if revlog._generaldelta:
420 deltabase = base
421 deltabase = base
421 else:
422 else:
422 deltabase = chainbase
423 deltabase = chainbase
423 chainlen, compresseddeltalen = revlog._chaininfo(base)
424 chainlen, compresseddeltalen = revlog._chaininfo(base)
424 chainlen += 1
425 chainlen += 1
425 compresseddeltalen += deltalen
426 compresseddeltalen += deltalen
426 return _deltainfo(dist, deltalen, (header, data), deltabase,
427 return _deltainfo(dist, deltalen, (header, data), deltabase,
427 chainbase, chainlen, compresseddeltalen)
428 chainbase, chainlen, compresseddeltalen)
428
429
429 def finddeltainfo(self, revinfo, fh):
430 def finddeltainfo(self, revinfo, fh):
430 """Find an acceptable delta against a candidate revision
431 """Find an acceptable delta against a candidate revision
431
432
432 revinfo: information about the revision (instance of _revisioninfo)
433 revinfo: information about the revision (instance of _revisioninfo)
433 fh: file handle to either the .i or the .d revlog file,
434 fh: file handle to either the .i or the .d revlog file,
434 depending on whether it is inlined or not
435 depending on whether it is inlined or not
435
436
436 Returns the first acceptable candidate revision, as ordered by
437 Returns the first acceptable candidate revision, as ordered by
437 _getcandidaterevs
438 _getcandidaterevs
438 """
439 """
439 cachedelta = revinfo.cachedelta
440 cachedelta = revinfo.cachedelta
440 p1 = revinfo.p1
441 p1 = revinfo.p1
441 p2 = revinfo.p2
442 p2 = revinfo.p2
442 revlog = self.revlog
443 revlog = self.revlog
443
444
444 deltainfo = None
445 deltainfo = None
445 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
446 nominateddeltas = []
447 nominateddeltas = []
447 for candidaterev in candidaterevs:
448 for candidaterev in candidaterevs:
448 # no delta for rawtext-changing revs (see "candelta" for why)
449 # no delta for rawtext-changing revs (see "candelta" for why)
449 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
450 continue
451 continue
451 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
452 if revlog._isgooddeltainfo(candidatedelta, revinfo):
453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
453 nominateddeltas.append(candidatedelta)
454 nominateddeltas.append(candidatedelta)
454 if nominateddeltas:
455 if nominateddeltas:
455 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
456 break
457 break
457
458
458 return deltainfo
459 return deltainfo
459
460
460 @attr.s(slots=True, frozen=True)
461 @attr.s(slots=True, frozen=True)
461 class _revisioninfo(object):
462 class _revisioninfo(object):
462 """Information about a revision that allows building its fulltext
463 """Information about a revision that allows building its fulltext
463 node: expected hash of the revision
464 node: expected hash of the revision
464 p1, p2: parent revs of the revision
465 p1, p2: parent revs of the revision
465 btext: built text cache consisting of a one-element list
466 btext: built text cache consisting of a one-element list
466 cachedelta: (baserev, uncompressed_delta) or None
467 cachedelta: (baserev, uncompressed_delta) or None
467 flags: flags associated to the revision storage
468 flags: flags associated to the revision storage
468
469
469 One of btext[0] or cachedelta must be set.
470 One of btext[0] or cachedelta must be set.
470 """
471 """
471 node = attr.ib()
472 node = attr.ib()
472 p1 = attr.ib()
473 p1 = attr.ib()
473 p2 = attr.ib()
474 p2 = attr.ib()
474 btext = attr.ib()
475 btext = attr.ib()
475 textlen = attr.ib()
476 textlen = attr.ib()
476 cachedelta = attr.ib()
477 cachedelta = attr.ib()
477 flags = attr.ib()
478 flags = attr.ib()
478
479
479 # index v0:
480 # index v0:
480 # 4 bytes: offset
481 # 4 bytes: offset
481 # 4 bytes: compressed length
482 # 4 bytes: compressed length
482 # 4 bytes: base rev
483 # 4 bytes: base rev
483 # 4 bytes: link rev
484 # 4 bytes: link rev
484 # 20 bytes: parent 1 nodeid
485 # 20 bytes: parent 1 nodeid
485 # 20 bytes: parent 2 nodeid
486 # 20 bytes: parent 2 nodeid
486 # 20 bytes: nodeid
487 # 20 bytes: nodeid
487 indexformatv0 = struct.Struct(">4l20s20s20s")
488 indexformatv0 = struct.Struct(">4l20s20s20s")
488 indexformatv0_pack = indexformatv0.pack
489 indexformatv0_pack = indexformatv0.pack
489 indexformatv0_unpack = indexformatv0.unpack
490 indexformatv0_unpack = indexformatv0.unpack
490
491
491 class revlogoldio(object):
492 class revlogoldio(object):
492 def __init__(self):
493 def __init__(self):
493 self.size = indexformatv0.size
494 self.size = indexformatv0.size
494
495
495 def parseindex(self, data, inline):
496 def parseindex(self, data, inline):
496 s = self.size
497 s = self.size
497 index = []
498 index = []
498 nodemap = {nullid: nullrev}
499 nodemap = {nullid: nullrev}
499 n = off = 0
500 n = off = 0
500 l = len(data)
501 l = len(data)
501 while off + s <= l:
502 while off + s <= l:
502 cur = data[off:off + s]
503 cur = data[off:off + s]
503 off += s
504 off += s
504 e = indexformatv0_unpack(cur)
505 e = indexformatv0_unpack(cur)
505 # transform to revlogv1 format
506 # transform to revlogv1 format
506 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
507 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
508 index.append(e2)
509 index.append(e2)
509 nodemap[e[6]] = n
510 nodemap[e[6]] = n
510 n += 1
511 n += 1
511
512
512 # add the magic null revision at -1
513 # add the magic null revision at -1
513 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
514
515
515 return index, nodemap, None
516 return index, nodemap, None
516
517
517 def packentry(self, entry, node, version, rev):
518 def packentry(self, entry, node, version, rev):
518 if gettype(entry[0]):
519 if gettype(entry[0]):
519 raise RevlogError(_('index entry flags need revlog version 1'))
520 raise RevlogError(_('index entry flags need revlog version 1'))
520 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
521 node(entry[5]), node(entry[6]), entry[7])
522 node(entry[5]), node(entry[6]), entry[7])
522 return indexformatv0_pack(*e2)
523 return indexformatv0_pack(*e2)
523
524
524 # index ng:
525 # index ng:
525 # 6 bytes: offset
526 # 6 bytes: offset
526 # 2 bytes: flags
527 # 2 bytes: flags
527 # 4 bytes: compressed length
528 # 4 bytes: compressed length
528 # 4 bytes: uncompressed length
529 # 4 bytes: uncompressed length
529 # 4 bytes: base rev
530 # 4 bytes: base rev
530 # 4 bytes: link rev
531 # 4 bytes: link rev
531 # 4 bytes: parent 1 rev
532 # 4 bytes: parent 1 rev
532 # 4 bytes: parent 2 rev
533 # 4 bytes: parent 2 rev
533 # 32 bytes: nodeid
534 # 32 bytes: nodeid
534 indexformatng = struct.Struct(">Qiiiiii20s12x")
535 indexformatng = struct.Struct(">Qiiiiii20s12x")
535 indexformatng_pack = indexformatng.pack
536 indexformatng_pack = indexformatng.pack
536 versionformat = struct.Struct(">I")
537 versionformat = struct.Struct(">I")
537 versionformat_pack = versionformat.pack
538 versionformat_pack = versionformat.pack
538 versionformat_unpack = versionformat.unpack
539 versionformat_unpack = versionformat.unpack
539
540
540 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
541 # signed integer)
542 # signed integer)
542 _maxentrysize = 0x7fffffff
543 _maxentrysize = 0x7fffffff
543
544
544 class revlogio(object):
545 class revlogio(object):
545 def __init__(self):
546 def __init__(self):
546 self.size = indexformatng.size
547 self.size = indexformatng.size
547
548
548 def parseindex(self, data, inline):
549 def parseindex(self, data, inline):
549 # call the C implementation to parse the index data
550 # call the C implementation to parse the index data
550 index, cache = parsers.parse_index2(data, inline)
551 index, cache = parsers.parse_index2(data, inline)
551 return index, getattr(index, 'nodemap', None), cache
552 return index, getattr(index, 'nodemap', None), cache
552
553
553 def packentry(self, entry, node, version, rev):
554 def packentry(self, entry, node, version, rev):
554 p = indexformatng_pack(*entry)
555 p = indexformatng_pack(*entry)
555 if rev == 0:
556 if rev == 0:
556 p = versionformat_pack(version) + p[4:]
557 p = versionformat_pack(version) + p[4:]
557 return p
558 return p
558
559
559 class revlog(object):
560 class revlog(object):
560 """
561 """
561 the underlying revision storage object
562 the underlying revision storage object
562
563
563 A revlog consists of two parts, an index and the revision data.
564 A revlog consists of two parts, an index and the revision data.
564
565
565 The index is a file with a fixed record size containing
566 The index is a file with a fixed record size containing
566 information on each revision, including its nodeid (hash), the
567 information on each revision, including its nodeid (hash), the
567 nodeids of its parents, the position and offset of its data within
568 nodeids of its parents, the position and offset of its data within
568 the data file, and the revision it's based on. Finally, each entry
569 the data file, and the revision it's based on. Finally, each entry
569 contains a linkrev entry that can serve as a pointer to external
570 contains a linkrev entry that can serve as a pointer to external
570 data.
571 data.
571
572
572 The revision data itself is a linear collection of data chunks.
573 The revision data itself is a linear collection of data chunks.
573 Each chunk represents a revision and is usually represented as a
574 Each chunk represents a revision and is usually represented as a
574 delta against the previous chunk. To bound lookup time, runs of
575 delta against the previous chunk. To bound lookup time, runs of
575 deltas are limited to about 2 times the length of the original
576 deltas are limited to about 2 times the length of the original
576 version data. This makes retrieval of a version proportional to
577 version data. This makes retrieval of a version proportional to
577 its size, or O(1) relative to the number of revisions.
578 its size, or O(1) relative to the number of revisions.
578
579
579 Both pieces of the revlog are written to in an append-only
580 Both pieces of the revlog are written to in an append-only
580 fashion, which means we never need to rewrite a file to insert or
581 fashion, which means we never need to rewrite a file to insert or
581 remove data, and can use some simple techniques to avoid the need
582 remove data, and can use some simple techniques to avoid the need
582 for locking while reading.
583 for locking while reading.
583
584
584 If checkambig, indexfile is opened with checkambig=True at
585 If checkambig, indexfile is opened with checkambig=True at
585 writing, to avoid file stat ambiguity.
586 writing, to avoid file stat ambiguity.
586
587
587 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 index will be mmapped rather than read if it is larger than the
589 index will be mmapped rather than read if it is larger than the
589 configured threshold.
590 configured threshold.
590
591
591 If censorable is True, the revlog can have censored revisions.
592 If censorable is True, the revlog can have censored revisions.
592 """
593 """
593 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
594 mmaplargeindex=False, censorable=False):
595 mmaplargeindex=False, censorable=False):
595 """
596 """
596 create a revlog object
597 create a revlog object
597
598
598 opener is a function that abstracts the file opening operation
599 opener is a function that abstracts the file opening operation
599 and can be used to implement COW semantics or the like.
600 and can be used to implement COW semantics or the like.
600 """
601 """
601 self.indexfile = indexfile
602 self.indexfile = indexfile
602 self.datafile = datafile or (indexfile[:-2] + ".d")
603 self.datafile = datafile or (indexfile[:-2] + ".d")
603 self.opener = opener
604 self.opener = opener
604 # When True, indexfile is opened with checkambig=True at writing, to
605 # When True, indexfile is opened with checkambig=True at writing, to
605 # avoid file stat ambiguity.
606 # avoid file stat ambiguity.
606 self._checkambig = checkambig
607 self._checkambig = checkambig
607 self._censorable = censorable
608 self._censorable = censorable
608 # 3-tuple of (node, rev, text) for a raw revision.
609 # 3-tuple of (node, rev, text) for a raw revision.
609 self._cache = None
610 self._cache = None
610 # Maps rev to chain base rev.
611 # Maps rev to chain base rev.
611 self._chainbasecache = util.lrucachedict(100)
612 self._chainbasecache = util.lrucachedict(100)
612 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
613 self._chunkcache = (0, '')
614 self._chunkcache = (0, '')
614 # How much data to read and cache into the raw revlog data cache.
615 # How much data to read and cache into the raw revlog data cache.
615 self._chunkcachesize = 65536
616 self._chunkcachesize = 65536
616 self._maxchainlen = None
617 self._maxchainlen = None
617 self._aggressivemergedeltas = False
618 self._aggressivemergedeltas = False
618 self.index = []
619 self.index = []
619 # Mapping of partial identifiers to full nodes.
620 # Mapping of partial identifiers to full nodes.
620 self._pcache = {}
621 self._pcache = {}
621 # Mapping of revision integer to full node.
622 # Mapping of revision integer to full node.
622 self._nodecache = {nullid: nullrev}
623 self._nodecache = {nullid: nullrev}
623 self._nodepos = None
624 self._nodepos = None
624 self._compengine = 'zlib'
625 self._compengine = 'zlib'
625 self._maxdeltachainspan = -1
626 self._maxdeltachainspan = -1
626 self._withsparseread = False
627 self._withsparseread = False
627 self._srdensitythreshold = 0.25
628 self._srdensitythreshold = 0.25
628 self._srmingapsize = 262144
629 self._srmingapsize = 262144
629
630
630 mmapindexthreshold = None
631 mmapindexthreshold = None
631 v = REVLOG_DEFAULT_VERSION
632 v = REVLOG_DEFAULT_VERSION
632 opts = getattr(opener, 'options', None)
633 opts = getattr(opener, 'options', None)
633 if opts is not None:
634 if opts is not None:
634 if 'revlogv2' in opts:
635 if 'revlogv2' in opts:
635 # version 2 revlogs always use generaldelta.
636 # version 2 revlogs always use generaldelta.
636 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
637 elif 'revlogv1' in opts:
638 elif 'revlogv1' in opts:
638 if 'generaldelta' in opts:
639 if 'generaldelta' in opts:
639 v |= FLAG_GENERALDELTA
640 v |= FLAG_GENERALDELTA
640 else:
641 else:
641 v = 0
642 v = 0
642 if 'chunkcachesize' in opts:
643 if 'chunkcachesize' in opts:
643 self._chunkcachesize = opts['chunkcachesize']
644 self._chunkcachesize = opts['chunkcachesize']
644 if 'maxchainlen' in opts:
645 if 'maxchainlen' in opts:
645 self._maxchainlen = opts['maxchainlen']
646 self._maxchainlen = opts['maxchainlen']
646 if 'aggressivemergedeltas' in opts:
647 if 'aggressivemergedeltas' in opts:
647 self._aggressivemergedeltas = opts['aggressivemergedeltas']
648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
648 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
649 if 'compengine' in opts:
650 if 'compengine' in opts:
650 self._compengine = opts['compengine']
651 self._compengine = opts['compengine']
651 if 'maxdeltachainspan' in opts:
652 if 'maxdeltachainspan' in opts:
652 self._maxdeltachainspan = opts['maxdeltachainspan']
653 self._maxdeltachainspan = opts['maxdeltachainspan']
653 if mmaplargeindex and 'mmapindexthreshold' in opts:
654 if mmaplargeindex and 'mmapindexthreshold' in opts:
654 mmapindexthreshold = opts['mmapindexthreshold']
655 mmapindexthreshold = opts['mmapindexthreshold']
655 self._withsparseread = bool(opts.get('with-sparse-read', False))
656 self._withsparseread = bool(opts.get('with-sparse-read', False))
656 if 'sparse-read-density-threshold' in opts:
657 if 'sparse-read-density-threshold' in opts:
657 self._srdensitythreshold = opts['sparse-read-density-threshold']
658 self._srdensitythreshold = opts['sparse-read-density-threshold']
658 if 'sparse-read-min-gap-size' in opts:
659 if 'sparse-read-min-gap-size' in opts:
659 self._srmingapsize = opts['sparse-read-min-gap-size']
660 self._srmingapsize = opts['sparse-read-min-gap-size']
660
661
661 if self._chunkcachesize <= 0:
662 if self._chunkcachesize <= 0:
662 raise RevlogError(_('revlog chunk cache size %r is not greater '
663 raise RevlogError(_('revlog chunk cache size %r is not greater '
663 'than 0') % self._chunkcachesize)
664 'than 0') % self._chunkcachesize)
664 elif self._chunkcachesize & (self._chunkcachesize - 1):
665 elif self._chunkcachesize & (self._chunkcachesize - 1):
665 raise RevlogError(_('revlog chunk cache size %r is not a power '
666 raise RevlogError(_('revlog chunk cache size %r is not a power '
666 'of 2') % self._chunkcachesize)
667 'of 2') % self._chunkcachesize)
667
668
668 indexdata = ''
669 indexdata = ''
669 self._initempty = True
670 self._initempty = True
670 try:
671 try:
671 with self._indexfp() as f:
672 with self._indexfp() as f:
672 if (mmapindexthreshold is not None and
673 if (mmapindexthreshold is not None and
673 self.opener.fstat(f).st_size >= mmapindexthreshold):
674 self.opener.fstat(f).st_size >= mmapindexthreshold):
674 indexdata = util.buffer(util.mmapread(f))
675 indexdata = util.buffer(util.mmapread(f))
675 else:
676 else:
676 indexdata = f.read()
677 indexdata = f.read()
677 if len(indexdata) > 0:
678 if len(indexdata) > 0:
678 v = versionformat_unpack(indexdata[:4])[0]
679 v = versionformat_unpack(indexdata[:4])[0]
679 self._initempty = False
680 self._initempty = False
680 except IOError as inst:
681 except IOError as inst:
681 if inst.errno != errno.ENOENT:
682 if inst.errno != errno.ENOENT:
682 raise
683 raise
683
684
684 self.version = v
685 self.version = v
685 self._inline = v & FLAG_INLINE_DATA
686 self._inline = v & FLAG_INLINE_DATA
686 self._generaldelta = v & FLAG_GENERALDELTA
687 self._generaldelta = v & FLAG_GENERALDELTA
687 flags = v & ~0xFFFF
688 flags = v & ~0xFFFF
688 fmt = v & 0xFFFF
689 fmt = v & 0xFFFF
689 if fmt == REVLOGV0:
690 if fmt == REVLOGV0:
690 if flags:
691 if flags:
691 raise RevlogError(_('unknown flags (%#04x) in version %d '
692 raise RevlogError(_('unknown flags (%#04x) in version %d '
692 'revlog %s') %
693 'revlog %s') %
693 (flags >> 16, fmt, self.indexfile))
694 (flags >> 16, fmt, self.indexfile))
694 elif fmt == REVLOGV1:
695 elif fmt == REVLOGV1:
695 if flags & ~REVLOGV1_FLAGS:
696 if flags & ~REVLOGV1_FLAGS:
696 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 'revlog %s') %
698 'revlog %s') %
698 (flags >> 16, fmt, self.indexfile))
699 (flags >> 16, fmt, self.indexfile))
699 elif fmt == REVLOGV2:
700 elif fmt == REVLOGV2:
700 if flags & ~REVLOGV2_FLAGS:
701 if flags & ~REVLOGV2_FLAGS:
701 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 'revlog %s') %
703 'revlog %s') %
703 (flags >> 16, fmt, self.indexfile))
704 (flags >> 16, fmt, self.indexfile))
704 else:
705 else:
705 raise RevlogError(_('unknown version (%d) in revlog %s') %
706 raise RevlogError(_('unknown version (%d) in revlog %s') %
706 (fmt, self.indexfile))
707 (fmt, self.indexfile))
707
708
708 self.storedeltachains = True
709 self.storedeltachains = True
709
710
710 self._io = revlogio()
711 self._io = revlogio()
711 if self.version == REVLOGV0:
712 if self.version == REVLOGV0:
712 self._io = revlogoldio()
713 self._io = revlogoldio()
713 try:
714 try:
714 d = self._io.parseindex(indexdata, self._inline)
715 d = self._io.parseindex(indexdata, self._inline)
715 except (ValueError, IndexError):
716 except (ValueError, IndexError):
716 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
717 self.index, nodemap, self._chunkcache = d
718 self.index, nodemap, self._chunkcache = d
718 if nodemap is not None:
719 if nodemap is not None:
719 self.nodemap = self._nodecache = nodemap
720 self.nodemap = self._nodecache = nodemap
720 if not self._chunkcache:
721 if not self._chunkcache:
721 self._chunkclear()
722 self._chunkclear()
722 # revnum -> (chain-length, sum-delta-length)
723 # revnum -> (chain-length, sum-delta-length)
723 self._chaininfocache = {}
724 self._chaininfocache = {}
724 # revlog header -> revlog compressor
725 # revlog header -> revlog compressor
725 self._decompressors = {}
726 self._decompressors = {}
726
727
727 @util.propertycache
728 @util.propertycache
728 def _compressor(self):
729 def _compressor(self):
729 return util.compengines[self._compengine].revlogcompressor()
730 return util.compengines[self._compengine].revlogcompressor()
730
731
731 def _indexfp(self, mode='r'):
732 def _indexfp(self, mode='r'):
732 """file object for the revlog's index file"""
733 """file object for the revlog's index file"""
733 args = {r'mode': mode}
734 args = {r'mode': mode}
734 if mode != 'r':
735 if mode != 'r':
735 args[r'checkambig'] = self._checkambig
736 args[r'checkambig'] = self._checkambig
736 if mode == 'w':
737 if mode == 'w':
737 args[r'atomictemp'] = True
738 args[r'atomictemp'] = True
738 return self.opener(self.indexfile, **args)
739 return self.opener(self.indexfile, **args)
739
740
740 def _datafp(self, mode='r'):
741 def _datafp(self, mode='r'):
741 """file object for the revlog's data file"""
742 """file object for the revlog's data file"""
742 return self.opener(self.datafile, mode=mode)
743 return self.opener(self.datafile, mode=mode)
743
744
744 @contextlib.contextmanager
745 @contextlib.contextmanager
745 def _datareadfp(self, existingfp=None):
746 def _datareadfp(self, existingfp=None):
746 """file object suitable to read data"""
747 """file object suitable to read data"""
747 if existingfp is not None:
748 if existingfp is not None:
748 yield existingfp
749 yield existingfp
749 else:
750 else:
750 if self._inline:
751 if self._inline:
751 func = self._indexfp
752 func = self._indexfp
752 else:
753 else:
753 func = self._datafp
754 func = self._datafp
754 with func() as fp:
755 with func() as fp:
755 yield fp
756 yield fp
756
757
757 def tip(self):
758 def tip(self):
758 return self.node(len(self.index) - 2)
759 return self.node(len(self.index) - 2)
759 def __contains__(self, rev):
760 def __contains__(self, rev):
760 return 0 <= rev < len(self)
761 return 0 <= rev < len(self)
761 def __len__(self):
762 def __len__(self):
762 return len(self.index) - 1
763 return len(self.index) - 1
763 def __iter__(self):
764 def __iter__(self):
764 return iter(xrange(len(self)))
765 return iter(xrange(len(self)))
765 def revs(self, start=0, stop=None):
766 def revs(self, start=0, stop=None):
766 """iterate over all rev in this revlog (from start to stop)"""
767 """iterate over all rev in this revlog (from start to stop)"""
767 step = 1
768 step = 1
768 if stop is not None:
769 if stop is not None:
769 if start > stop:
770 if start > stop:
770 step = -1
771 step = -1
771 stop += step
772 stop += step
772 else:
773 else:
773 stop = len(self)
774 stop = len(self)
774 return xrange(start, stop, step)
775 return xrange(start, stop, step)
775
776
776 @util.propertycache
777 @util.propertycache
777 def nodemap(self):
778 def nodemap(self):
778 self.rev(self.node(0))
779 self.rev(self.node(0))
779 return self._nodecache
780 return self._nodecache
780
781
781 def hasnode(self, node):
782 def hasnode(self, node):
782 try:
783 try:
783 self.rev(node)
784 self.rev(node)
784 return True
785 return True
785 except KeyError:
786 except KeyError:
786 return False
787 return False
787
788
788 def candelta(self, baserev, rev):
789 def candelta(self, baserev, rev):
789 """whether two revisions (baserev, rev) can be delta-ed or not"""
790 """whether two revisions (baserev, rev) can be delta-ed or not"""
790 # Disable delta if either rev requires a content-changing flag
791 # Disable delta if either rev requires a content-changing flag
791 # processor (ex. LFS). This is because such flag processor can alter
792 # processor (ex. LFS). This is because such flag processor can alter
792 # the rawtext content that the delta will be based on, and two clients
793 # the rawtext content that the delta will be based on, and two clients
793 # could have a same revlog node with different flags (i.e. different
794 # could have a same revlog node with different flags (i.e. different
794 # rawtext contents) and the delta could be incompatible.
795 # rawtext contents) and the delta could be incompatible.
795 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
796 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
797 return False
798 return False
798 return True
799 return True
799
800
800 def clearcaches(self):
801 def clearcaches(self):
801 self._cache = None
802 self._cache = None
802 self._chainbasecache.clear()
803 self._chainbasecache.clear()
803 self._chunkcache = (0, '')
804 self._chunkcache = (0, '')
804 self._pcache = {}
805 self._pcache = {}
805
806
806 try:
807 try:
807 self._nodecache.clearcaches()
808 self._nodecache.clearcaches()
808 except AttributeError:
809 except AttributeError:
809 self._nodecache = {nullid: nullrev}
810 self._nodecache = {nullid: nullrev}
810 self._nodepos = None
811 self._nodepos = None
811
812
812 def rev(self, node):
813 def rev(self, node):
813 try:
814 try:
814 return self._nodecache[node]
815 return self._nodecache[node]
815 except TypeError:
816 except TypeError:
816 raise
817 raise
817 except RevlogError:
818 except RevlogError:
818 # parsers.c radix tree lookup failed
819 # parsers.c radix tree lookup failed
819 if node == wdirid or node in wdirfilenodeids:
820 if node == wdirid or node in wdirfilenodeids:
820 raise error.WdirUnsupported
821 raise error.WdirUnsupported
821 raise LookupError(node, self.indexfile, _('no node'))
822 raise LookupError(node, self.indexfile, _('no node'))
822 except KeyError:
823 except KeyError:
823 # pure python cache lookup failed
824 # pure python cache lookup failed
824 n = self._nodecache
825 n = self._nodecache
825 i = self.index
826 i = self.index
826 p = self._nodepos
827 p = self._nodepos
827 if p is None:
828 if p is None:
828 p = len(i) - 2
829 p = len(i) - 2
829 else:
830 else:
830 assert p < len(i)
831 assert p < len(i)
831 for r in xrange(p, -1, -1):
832 for r in xrange(p, -1, -1):
832 v = i[r][7]
833 v = i[r][7]
833 n[v] = r
834 n[v] = r
834 if v == node:
835 if v == node:
835 self._nodepos = r - 1
836 self._nodepos = r - 1
836 return r
837 return r
837 if node == wdirid or node in wdirfilenodeids:
838 if node == wdirid or node in wdirfilenodeids:
838 raise error.WdirUnsupported
839 raise error.WdirUnsupported
839 raise LookupError(node, self.indexfile, _('no node'))
840 raise LookupError(node, self.indexfile, _('no node'))
840
841
841 # Accessors for index entries.
842 # Accessors for index entries.
842
843
843 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 # are flags.
845 # are flags.
845 def start(self, rev):
846 def start(self, rev):
846 return int(self.index[rev][0] >> 16)
847 return int(self.index[rev][0] >> 16)
847
848
848 def flags(self, rev):
849 def flags(self, rev):
849 return self.index[rev][0] & 0xFFFF
850 return self.index[rev][0] & 0xFFFF
850
851
851 def length(self, rev):
852 def length(self, rev):
852 return self.index[rev][1]
853 return self.index[rev][1]
853
854
854 def rawsize(self, rev):
855 def rawsize(self, rev):
855 """return the length of the uncompressed text for a given revision"""
856 """return the length of the uncompressed text for a given revision"""
856 l = self.index[rev][2]
857 l = self.index[rev][2]
857 if l is not None and l >= 0:
858 if l is not None and l >= 0:
858 return l
859 return l
859
860
860 t = self.revision(rev, raw=True)
861 t = self.revision(rev, raw=True)
861 return len(t)
862 return len(t)
862
863
863 def size(self, rev):
864 def size(self, rev):
864 """length of non-raw text (processed by a "read" flag processor)"""
865 """length of non-raw text (processed by a "read" flag processor)"""
865 # fast path: if no "read" flag processor could change the content,
866 # fast path: if no "read" flag processor could change the content,
866 # size is rawsize. note: ELLIPSIS is known to not change the content.
867 # size is rawsize. note: ELLIPSIS is known to not change the content.
867 flags = self.flags(rev)
868 flags = self.flags(rev)
868 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
869 return self.rawsize(rev)
870 return self.rawsize(rev)
870
871
871 return len(self.revision(rev, raw=False))
872 return len(self.revision(rev, raw=False))
872
873
873 def chainbase(self, rev):
874 def chainbase(self, rev):
874 base = self._chainbasecache.get(rev)
875 base = self._chainbasecache.get(rev)
875 if base is not None:
876 if base is not None:
876 return base
877 return base
877
878
878 index = self.index
879 index = self.index
879 base = index[rev][3]
880 base = index[rev][3]
880 while base != rev:
881 while base != rev:
881 rev = base
882 rev = base
882 base = index[rev][3]
883 base = index[rev][3]
883
884
884 self._chainbasecache[rev] = base
885 self._chainbasecache[rev] = base
885 return base
886 return base
886
887
887 def linkrev(self, rev):
888 def linkrev(self, rev):
888 return self.index[rev][4]
889 return self.index[rev][4]
889
890
890 def parentrevs(self, rev):
891 def parentrevs(self, rev):
891 try:
892 try:
892 entry = self.index[rev]
893 entry = self.index[rev]
893 except IndexError:
894 except IndexError:
894 if rev == wdirrev:
895 if rev == wdirrev:
895 raise error.WdirUnsupported
896 raise error.WdirUnsupported
896 raise
897 raise
897
898
898 return entry[5], entry[6]
899 return entry[5], entry[6]
899
900
900 def node(self, rev):
901 def node(self, rev):
901 try:
902 try:
902 return self.index[rev][7]
903 return self.index[rev][7]
903 except IndexError:
904 except IndexError:
904 if rev == wdirrev:
905 if rev == wdirrev:
905 raise error.WdirUnsupported
906 raise error.WdirUnsupported
906 raise
907 raise
907
908
908 # Derived from index values.
909 # Derived from index values.
909
910
910 def end(self, rev):
911 def end(self, rev):
911 return self.start(rev) + self.length(rev)
912 return self.start(rev) + self.length(rev)
912
913
913 def parents(self, node):
914 def parents(self, node):
914 i = self.index
915 i = self.index
915 d = i[self.rev(node)]
916 d = i[self.rev(node)]
916 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
917 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
917
918
918 def chainlen(self, rev):
919 def chainlen(self, rev):
919 return self._chaininfo(rev)[0]
920 return self._chaininfo(rev)[0]
920
921
921 def _chaininfo(self, rev):
922 def _chaininfo(self, rev):
922 chaininfocache = self._chaininfocache
923 chaininfocache = self._chaininfocache
923 if rev in chaininfocache:
924 if rev in chaininfocache:
924 return chaininfocache[rev]
925 return chaininfocache[rev]
925 index = self.index
926 index = self.index
926 generaldelta = self._generaldelta
927 generaldelta = self._generaldelta
927 iterrev = rev
928 iterrev = rev
928 e = index[iterrev]
929 e = index[iterrev]
929 clen = 0
930 clen = 0
930 compresseddeltalen = 0
931 compresseddeltalen = 0
931 while iterrev != e[3]:
932 while iterrev != e[3]:
932 clen += 1
933 clen += 1
933 compresseddeltalen += e[1]
934 compresseddeltalen += e[1]
934 if generaldelta:
935 if generaldelta:
935 iterrev = e[3]
936 iterrev = e[3]
936 else:
937 else:
937 iterrev -= 1
938 iterrev -= 1
938 if iterrev in chaininfocache:
939 if iterrev in chaininfocache:
939 t = chaininfocache[iterrev]
940 t = chaininfocache[iterrev]
940 clen += t[0]
941 clen += t[0]
941 compresseddeltalen += t[1]
942 compresseddeltalen += t[1]
942 break
943 break
943 e = index[iterrev]
944 e = index[iterrev]
944 else:
945 else:
945 # Add text length of base since decompressing that also takes
946 # Add text length of base since decompressing that also takes
946 # work. For cache hits the length is already included.
947 # work. For cache hits the length is already included.
947 compresseddeltalen += e[1]
948 compresseddeltalen += e[1]
948 r = (clen, compresseddeltalen)
949 r = (clen, compresseddeltalen)
949 chaininfocache[rev] = r
950 chaininfocache[rev] = r
950 return r
951 return r
951
952
952 def _deltachain(self, rev, stoprev=None):
953 def _deltachain(self, rev, stoprev=None):
953 """Obtain the delta chain for a revision.
954 """Obtain the delta chain for a revision.
954
955
955 ``stoprev`` specifies a revision to stop at. If not specified, we
956 ``stoprev`` specifies a revision to stop at. If not specified, we
956 stop at the base of the chain.
957 stop at the base of the chain.
957
958
958 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
959 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
959 revs in ascending order and ``stopped`` is a bool indicating whether
960 revs in ascending order and ``stopped`` is a bool indicating whether
960 ``stoprev`` was hit.
961 ``stoprev`` was hit.
961 """
962 """
962 # Try C implementation.
963 # Try C implementation.
963 try:
964 try:
964 return self.index.deltachain(rev, stoprev, self._generaldelta)
965 return self.index.deltachain(rev, stoprev, self._generaldelta)
965 except AttributeError:
966 except AttributeError:
966 pass
967 pass
967
968
968 chain = []
969 chain = []
969
970
970 # Alias to prevent attribute lookup in tight loop.
971 # Alias to prevent attribute lookup in tight loop.
971 index = self.index
972 index = self.index
972 generaldelta = self._generaldelta
973 generaldelta = self._generaldelta
973
974
974 iterrev = rev
975 iterrev = rev
975 e = index[iterrev]
976 e = index[iterrev]
976 while iterrev != e[3] and iterrev != stoprev:
977 while iterrev != e[3] and iterrev != stoprev:
977 chain.append(iterrev)
978 chain.append(iterrev)
978 if generaldelta:
979 if generaldelta:
979 iterrev = e[3]
980 iterrev = e[3]
980 else:
981 else:
981 iterrev -= 1
982 iterrev -= 1
982 e = index[iterrev]
983 e = index[iterrev]
983
984
984 if iterrev == stoprev:
985 if iterrev == stoprev:
985 stopped = True
986 stopped = True
986 else:
987 else:
987 chain.append(iterrev)
988 chain.append(iterrev)
988 stopped = False
989 stopped = False
989
990
990 chain.reverse()
991 chain.reverse()
991 return chain, stopped
992 return chain, stopped
992
993
993 def ancestors(self, revs, stoprev=0, inclusive=False):
994 def ancestors(self, revs, stoprev=0, inclusive=False):
994 """Generate the ancestors of 'revs' in reverse topological order.
995 """Generate the ancestors of 'revs' in reverse topological order.
995 Does not generate revs lower than stoprev.
996 Does not generate revs lower than stoprev.
996
997
997 See the documentation for ancestor.lazyancestors for more details."""
998 See the documentation for ancestor.lazyancestors for more details."""
998
999
999 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1000 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1000 inclusive=inclusive)
1001 inclusive=inclusive)
1001
1002
1002 def descendants(self, revs):
1003 def descendants(self, revs):
1003 """Generate the descendants of 'revs' in revision order.
1004 """Generate the descendants of 'revs' in revision order.
1004
1005
1005 Yield a sequence of revision numbers starting with a child of
1006 Yield a sequence of revision numbers starting with a child of
1006 some rev in revs, i.e., each revision is *not* considered a
1007 some rev in revs, i.e., each revision is *not* considered a
1007 descendant of itself. Results are ordered by revision number (a
1008 descendant of itself. Results are ordered by revision number (a
1008 topological sort)."""
1009 topological sort)."""
1009 first = min(revs)
1010 first = min(revs)
1010 if first == nullrev:
1011 if first == nullrev:
1011 for i in self:
1012 for i in self:
1012 yield i
1013 yield i
1013 return
1014 return
1014
1015
1015 seen = set(revs)
1016 seen = set(revs)
1016 for i in self.revs(start=first + 1):
1017 for i in self.revs(start=first + 1):
1017 for x in self.parentrevs(i):
1018 for x in self.parentrevs(i):
1018 if x != nullrev and x in seen:
1019 if x != nullrev and x in seen:
1019 seen.add(i)
1020 seen.add(i)
1020 yield i
1021 yield i
1021 break
1022 break
1022
1023
1023 def findcommonmissing(self, common=None, heads=None):
1024 def findcommonmissing(self, common=None, heads=None):
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1025 """Return a tuple of the ancestors of common and the ancestors of heads
1025 that are not ancestors of common. In revset terminology, we return the
1026 that are not ancestors of common. In revset terminology, we return the
1026 tuple:
1027 tuple:
1027
1028
1028 ::common, (::heads) - (::common)
1029 ::common, (::heads) - (::common)
1029
1030
1030 The list is sorted by revision number, meaning it is
1031 The list is sorted by revision number, meaning it is
1031 topologically sorted.
1032 topologically sorted.
1032
1033
1033 'heads' and 'common' are both lists of node IDs. If heads is
1034 'heads' and 'common' are both lists of node IDs. If heads is
1034 not supplied, uses all of the revlog's heads. If common is not
1035 not supplied, uses all of the revlog's heads. If common is not
1035 supplied, uses nullid."""
1036 supplied, uses nullid."""
1036 if common is None:
1037 if common is None:
1037 common = [nullid]
1038 common = [nullid]
1038 if heads is None:
1039 if heads is None:
1039 heads = self.heads()
1040 heads = self.heads()
1040
1041
1041 common = [self.rev(n) for n in common]
1042 common = [self.rev(n) for n in common]
1042 heads = [self.rev(n) for n in heads]
1043 heads = [self.rev(n) for n in heads]
1043
1044
1044 # we want the ancestors, but inclusive
1045 # we want the ancestors, but inclusive
1045 class lazyset(object):
1046 class lazyset(object):
1046 def __init__(self, lazyvalues):
1047 def __init__(self, lazyvalues):
1047 self.addedvalues = set()
1048 self.addedvalues = set()
1048 self.lazyvalues = lazyvalues
1049 self.lazyvalues = lazyvalues
1049
1050
1050 def __contains__(self, value):
1051 def __contains__(self, value):
1051 return value in self.addedvalues or value in self.lazyvalues
1052 return value in self.addedvalues or value in self.lazyvalues
1052
1053
1053 def __iter__(self):
1054 def __iter__(self):
1054 added = self.addedvalues
1055 added = self.addedvalues
1055 for r in added:
1056 for r in added:
1056 yield r
1057 yield r
1057 for r in self.lazyvalues:
1058 for r in self.lazyvalues:
1058 if not r in added:
1059 if not r in added:
1059 yield r
1060 yield r
1060
1061
1061 def add(self, value):
1062 def add(self, value):
1062 self.addedvalues.add(value)
1063 self.addedvalues.add(value)
1063
1064
1064 def update(self, values):
1065 def update(self, values):
1065 self.addedvalues.update(values)
1066 self.addedvalues.update(values)
1066
1067
1067 has = lazyset(self.ancestors(common))
1068 has = lazyset(self.ancestors(common))
1068 has.add(nullrev)
1069 has.add(nullrev)
1069 has.update(common)
1070 has.update(common)
1070
1071
1071 # take all ancestors from heads that aren't in has
1072 # take all ancestors from heads that aren't in has
1072 missing = set()
1073 missing = set()
1073 visit = collections.deque(r for r in heads if r not in has)
1074 visit = collections.deque(r for r in heads if r not in has)
1074 while visit:
1075 while visit:
1075 r = visit.popleft()
1076 r = visit.popleft()
1076 if r in missing:
1077 if r in missing:
1077 continue
1078 continue
1078 else:
1079 else:
1079 missing.add(r)
1080 missing.add(r)
1080 for p in self.parentrevs(r):
1081 for p in self.parentrevs(r):
1081 if p not in has:
1082 if p not in has:
1082 visit.append(p)
1083 visit.append(p)
1083 missing = list(missing)
1084 missing = list(missing)
1084 missing.sort()
1085 missing.sort()
1085 return has, [self.node(miss) for miss in missing]
1086 return has, [self.node(miss) for miss in missing]
1086
1087
1087 def incrementalmissingrevs(self, common=None):
1088 def incrementalmissingrevs(self, common=None):
1088 """Return an object that can be used to incrementally compute the
1089 """Return an object that can be used to incrementally compute the
1089 revision numbers of the ancestors of arbitrary sets that are not
1090 revision numbers of the ancestors of arbitrary sets that are not
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 object.
1092 object.
1092
1093
1093 'common' is a list of revision numbers. If common is not supplied, uses
1094 'common' is a list of revision numbers. If common is not supplied, uses
1094 nullrev.
1095 nullrev.
1095 """
1096 """
1096 if common is None:
1097 if common is None:
1097 common = [nullrev]
1098 common = [nullrev]
1098
1099
1099 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1100 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1100
1101
1101 def findmissingrevs(self, common=None, heads=None):
1102 def findmissingrevs(self, common=None, heads=None):
1102 """Return the revision numbers of the ancestors of heads that
1103 """Return the revision numbers of the ancestors of heads that
1103 are not ancestors of common.
1104 are not ancestors of common.
1104
1105
1105 More specifically, return a list of revision numbers corresponding to
1106 More specifically, return a list of revision numbers corresponding to
1106 nodes N such that every N satisfies the following constraints:
1107 nodes N such that every N satisfies the following constraints:
1107
1108
1108 1. N is an ancestor of some node in 'heads'
1109 1. N is an ancestor of some node in 'heads'
1109 2. N is not an ancestor of any node in 'common'
1110 2. N is not an ancestor of any node in 'common'
1110
1111
1111 The list is sorted by revision number, meaning it is
1112 The list is sorted by revision number, meaning it is
1112 topologically sorted.
1113 topologically sorted.
1113
1114
1114 'heads' and 'common' are both lists of revision numbers. If heads is
1115 'heads' and 'common' are both lists of revision numbers. If heads is
1115 not supplied, uses all of the revlog's heads. If common is not
1116 not supplied, uses all of the revlog's heads. If common is not
1116 supplied, uses nullid."""
1117 supplied, uses nullid."""
1117 if common is None:
1118 if common is None:
1118 common = [nullrev]
1119 common = [nullrev]
1119 if heads is None:
1120 if heads is None:
1120 heads = self.headrevs()
1121 heads = self.headrevs()
1121
1122
1122 inc = self.incrementalmissingrevs(common=common)
1123 inc = self.incrementalmissingrevs(common=common)
1123 return inc.missingancestors(heads)
1124 return inc.missingancestors(heads)
1124
1125
1125 def findmissing(self, common=None, heads=None):
1126 def findmissing(self, common=None, heads=None):
1126 """Return the ancestors of heads that are not ancestors of common.
1127 """Return the ancestors of heads that are not ancestors of common.
1127
1128
1128 More specifically, return a list of nodes N such that every N
1129 More specifically, return a list of nodes N such that every N
1129 satisfies the following constraints:
1130 satisfies the following constraints:
1130
1131
1131 1. N is an ancestor of some node in 'heads'
1132 1. N is an ancestor of some node in 'heads'
1132 2. N is not an ancestor of any node in 'common'
1133 2. N is not an ancestor of any node in 'common'
1133
1134
1134 The list is sorted by revision number, meaning it is
1135 The list is sorted by revision number, meaning it is
1135 topologically sorted.
1136 topologically sorted.
1136
1137
1137 'heads' and 'common' are both lists of node IDs. If heads is
1138 'heads' and 'common' are both lists of node IDs. If heads is
1138 not supplied, uses all of the revlog's heads. If common is not
1139 not supplied, uses all of the revlog's heads. If common is not
1139 supplied, uses nullid."""
1140 supplied, uses nullid."""
1140 if common is None:
1141 if common is None:
1141 common = [nullid]
1142 common = [nullid]
1142 if heads is None:
1143 if heads is None:
1143 heads = self.heads()
1144 heads = self.heads()
1144
1145
1145 common = [self.rev(n) for n in common]
1146 common = [self.rev(n) for n in common]
1146 heads = [self.rev(n) for n in heads]
1147 heads = [self.rev(n) for n in heads]
1147
1148
1148 inc = self.incrementalmissingrevs(common=common)
1149 inc = self.incrementalmissingrevs(common=common)
1149 return [self.node(r) for r in inc.missingancestors(heads)]
1150 return [self.node(r) for r in inc.missingancestors(heads)]
1150
1151
1151 def nodesbetween(self, roots=None, heads=None):
1152 def nodesbetween(self, roots=None, heads=None):
1152 """Return a topological path from 'roots' to 'heads'.
1153 """Return a topological path from 'roots' to 'heads'.
1153
1154
1154 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1155 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1155 topologically sorted list of all nodes N that satisfy both of
1156 topologically sorted list of all nodes N that satisfy both of
1156 these constraints:
1157 these constraints:
1157
1158
1158 1. N is a descendant of some node in 'roots'
1159 1. N is a descendant of some node in 'roots'
1159 2. N is an ancestor of some node in 'heads'
1160 2. N is an ancestor of some node in 'heads'
1160
1161
1161 Every node is considered to be both a descendant and an ancestor
1162 Every node is considered to be both a descendant and an ancestor
1162 of itself, so every reachable node in 'roots' and 'heads' will be
1163 of itself, so every reachable node in 'roots' and 'heads' will be
1163 included in 'nodes'.
1164 included in 'nodes'.
1164
1165
1165 'outroots' is the list of reachable nodes in 'roots', i.e., the
1166 'outroots' is the list of reachable nodes in 'roots', i.e., the
1166 subset of 'roots' that is returned in 'nodes'. Likewise,
1167 subset of 'roots' that is returned in 'nodes'. Likewise,
1167 'outheads' is the subset of 'heads' that is also in 'nodes'.
1168 'outheads' is the subset of 'heads' that is also in 'nodes'.
1168
1169
1169 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1170 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1170 unspecified, uses nullid as the only root. If 'heads' is
1171 unspecified, uses nullid as the only root. If 'heads' is
1171 unspecified, uses list of all of the revlog's heads."""
1172 unspecified, uses list of all of the revlog's heads."""
1172 nonodes = ([], [], [])
1173 nonodes = ([], [], [])
1173 if roots is not None:
1174 if roots is not None:
1174 roots = list(roots)
1175 roots = list(roots)
1175 if not roots:
1176 if not roots:
1176 return nonodes
1177 return nonodes
1177 lowestrev = min([self.rev(n) for n in roots])
1178 lowestrev = min([self.rev(n) for n in roots])
1178 else:
1179 else:
1179 roots = [nullid] # Everybody's a descendant of nullid
1180 roots = [nullid] # Everybody's a descendant of nullid
1180 lowestrev = nullrev
1181 lowestrev = nullrev
1181 if (lowestrev == nullrev) and (heads is None):
1182 if (lowestrev == nullrev) and (heads is None):
1182 # We want _all_ the nodes!
1183 # We want _all_ the nodes!
1183 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1184 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1184 if heads is None:
1185 if heads is None:
1185 # All nodes are ancestors, so the latest ancestor is the last
1186 # All nodes are ancestors, so the latest ancestor is the last
1186 # node.
1187 # node.
1187 highestrev = len(self) - 1
1188 highestrev = len(self) - 1
1188 # Set ancestors to None to signal that every node is an ancestor.
1189 # Set ancestors to None to signal that every node is an ancestor.
1189 ancestors = None
1190 ancestors = None
1190 # Set heads to an empty dictionary for later discovery of heads
1191 # Set heads to an empty dictionary for later discovery of heads
1191 heads = {}
1192 heads = {}
1192 else:
1193 else:
1193 heads = list(heads)
1194 heads = list(heads)
1194 if not heads:
1195 if not heads:
1195 return nonodes
1196 return nonodes
1196 ancestors = set()
1197 ancestors = set()
1197 # Turn heads into a dictionary so we can remove 'fake' heads.
1198 # Turn heads into a dictionary so we can remove 'fake' heads.
1198 # Also, later we will be using it to filter out the heads we can't
1199 # Also, later we will be using it to filter out the heads we can't
1199 # find from roots.
1200 # find from roots.
1200 heads = dict.fromkeys(heads, False)
1201 heads = dict.fromkeys(heads, False)
1201 # Start at the top and keep marking parents until we're done.
1202 # Start at the top and keep marking parents until we're done.
1202 nodestotag = set(heads)
1203 nodestotag = set(heads)
1203 # Remember where the top was so we can use it as a limit later.
1204 # Remember where the top was so we can use it as a limit later.
1204 highestrev = max([self.rev(n) for n in nodestotag])
1205 highestrev = max([self.rev(n) for n in nodestotag])
1205 while nodestotag:
1206 while nodestotag:
1206 # grab a node to tag
1207 # grab a node to tag
1207 n = nodestotag.pop()
1208 n = nodestotag.pop()
1208 # Never tag nullid
1209 # Never tag nullid
1209 if n == nullid:
1210 if n == nullid:
1210 continue
1211 continue
1211 # A node's revision number represents its place in a
1212 # A node's revision number represents its place in a
1212 # topologically sorted list of nodes.
1213 # topologically sorted list of nodes.
1213 r = self.rev(n)
1214 r = self.rev(n)
1214 if r >= lowestrev:
1215 if r >= lowestrev:
1215 if n not in ancestors:
1216 if n not in ancestors:
1216 # If we are possibly a descendant of one of the roots
1217 # If we are possibly a descendant of one of the roots
1217 # and we haven't already been marked as an ancestor
1218 # and we haven't already been marked as an ancestor
1218 ancestors.add(n) # Mark as ancestor
1219 ancestors.add(n) # Mark as ancestor
1219 # Add non-nullid parents to list of nodes to tag.
1220 # Add non-nullid parents to list of nodes to tag.
1220 nodestotag.update([p for p in self.parents(n) if
1221 nodestotag.update([p for p in self.parents(n) if
1221 p != nullid])
1222 p != nullid])
1222 elif n in heads: # We've seen it before, is it a fake head?
1223 elif n in heads: # We've seen it before, is it a fake head?
1223 # So it is, real heads should not be the ancestors of
1224 # So it is, real heads should not be the ancestors of
1224 # any other heads.
1225 # any other heads.
1225 heads.pop(n)
1226 heads.pop(n)
1226 if not ancestors:
1227 if not ancestors:
1227 return nonodes
1228 return nonodes
1228 # Now that we have our set of ancestors, we want to remove any
1229 # Now that we have our set of ancestors, we want to remove any
1229 # roots that are not ancestors.
1230 # roots that are not ancestors.
1230
1231
1231 # If one of the roots was nullid, everything is included anyway.
1232 # If one of the roots was nullid, everything is included anyway.
1232 if lowestrev > nullrev:
1233 if lowestrev > nullrev:
1233 # But, since we weren't, let's recompute the lowest rev to not
1234 # But, since we weren't, let's recompute the lowest rev to not
1234 # include roots that aren't ancestors.
1235 # include roots that aren't ancestors.
1235
1236
1236 # Filter out roots that aren't ancestors of heads
1237 # Filter out roots that aren't ancestors of heads
1237 roots = [root for root in roots if root in ancestors]
1238 roots = [root for root in roots if root in ancestors]
1238 # Recompute the lowest revision
1239 # Recompute the lowest revision
1239 if roots:
1240 if roots:
1240 lowestrev = min([self.rev(root) for root in roots])
1241 lowestrev = min([self.rev(root) for root in roots])
1241 else:
1242 else:
1242 # No more roots? Return empty list
1243 # No more roots? Return empty list
1243 return nonodes
1244 return nonodes
1244 else:
1245 else:
1245 # We are descending from nullid, and don't need to care about
1246 # We are descending from nullid, and don't need to care about
1246 # any other roots.
1247 # any other roots.
1247 lowestrev = nullrev
1248 lowestrev = nullrev
1248 roots = [nullid]
1249 roots = [nullid]
1249 # Transform our roots list into a set.
1250 # Transform our roots list into a set.
1250 descendants = set(roots)
1251 descendants = set(roots)
1251 # Also, keep the original roots so we can filter out roots that aren't
1252 # Also, keep the original roots so we can filter out roots that aren't
1252 # 'real' roots (i.e. are descended from other roots).
1253 # 'real' roots (i.e. are descended from other roots).
1253 roots = descendants.copy()
1254 roots = descendants.copy()
1254 # Our topologically sorted list of output nodes.
1255 # Our topologically sorted list of output nodes.
1255 orderedout = []
1256 orderedout = []
1256 # Don't start at nullid since we don't want nullid in our output list,
1257 # Don't start at nullid since we don't want nullid in our output list,
1257 # and if nullid shows up in descendants, empty parents will look like
1258 # and if nullid shows up in descendants, empty parents will look like
1258 # they're descendants.
1259 # they're descendants.
1259 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1260 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1260 n = self.node(r)
1261 n = self.node(r)
1261 isdescendant = False
1262 isdescendant = False
1262 if lowestrev == nullrev: # Everybody is a descendant of nullid
1263 if lowestrev == nullrev: # Everybody is a descendant of nullid
1263 isdescendant = True
1264 isdescendant = True
1264 elif n in descendants:
1265 elif n in descendants:
1265 # n is already a descendant
1266 # n is already a descendant
1266 isdescendant = True
1267 isdescendant = True
1267 # This check only needs to be done here because all the roots
1268 # This check only needs to be done here because all the roots
1268 # will start being marked is descendants before the loop.
1269 # will start being marked is descendants before the loop.
1269 if n in roots:
1270 if n in roots:
1270 # If n was a root, check if it's a 'real' root.
1271 # If n was a root, check if it's a 'real' root.
1271 p = tuple(self.parents(n))
1272 p = tuple(self.parents(n))
1272 # If any of its parents are descendants, it's not a root.
1273 # If any of its parents are descendants, it's not a root.
1273 if (p[0] in descendants) or (p[1] in descendants):
1274 if (p[0] in descendants) or (p[1] in descendants):
1274 roots.remove(n)
1275 roots.remove(n)
1275 else:
1276 else:
1276 p = tuple(self.parents(n))
1277 p = tuple(self.parents(n))
1277 # A node is a descendant if either of its parents are
1278 # A node is a descendant if either of its parents are
1278 # descendants. (We seeded the dependents list with the roots
1279 # descendants. (We seeded the dependents list with the roots
1279 # up there, remember?)
1280 # up there, remember?)
1280 if (p[0] in descendants) or (p[1] in descendants):
1281 if (p[0] in descendants) or (p[1] in descendants):
1281 descendants.add(n)
1282 descendants.add(n)
1282 isdescendant = True
1283 isdescendant = True
1283 if isdescendant and ((ancestors is None) or (n in ancestors)):
1284 if isdescendant and ((ancestors is None) or (n in ancestors)):
1284 # Only include nodes that are both descendants and ancestors.
1285 # Only include nodes that are both descendants and ancestors.
1285 orderedout.append(n)
1286 orderedout.append(n)
1286 if (ancestors is not None) and (n in heads):
1287 if (ancestors is not None) and (n in heads):
1287 # We're trying to figure out which heads are reachable
1288 # We're trying to figure out which heads are reachable
1288 # from roots.
1289 # from roots.
1289 # Mark this head as having been reached
1290 # Mark this head as having been reached
1290 heads[n] = True
1291 heads[n] = True
1291 elif ancestors is None:
1292 elif ancestors is None:
1292 # Otherwise, we're trying to discover the heads.
1293 # Otherwise, we're trying to discover the heads.
1293 # Assume this is a head because if it isn't, the next step
1294 # Assume this is a head because if it isn't, the next step
1294 # will eventually remove it.
1295 # will eventually remove it.
1295 heads[n] = True
1296 heads[n] = True
1296 # But, obviously its parents aren't.
1297 # But, obviously its parents aren't.
1297 for p in self.parents(n):
1298 for p in self.parents(n):
1298 heads.pop(p, None)
1299 heads.pop(p, None)
1299 heads = [head for head, flag in heads.iteritems() if flag]
1300 heads = [head for head, flag in heads.iteritems() if flag]
1300 roots = list(roots)
1301 roots = list(roots)
1301 assert orderedout
1302 assert orderedout
1302 assert roots
1303 assert roots
1303 assert heads
1304 assert heads
1304 return (orderedout, roots, heads)
1305 return (orderedout, roots, heads)
1305
1306
1306 def headrevs(self):
1307 def headrevs(self):
1307 try:
1308 try:
1308 return self.index.headrevs()
1309 return self.index.headrevs()
1309 except AttributeError:
1310 except AttributeError:
1310 return self._headrevs()
1311 return self._headrevs()
1311
1312
1312 def computephases(self, roots):
1313 def computephases(self, roots):
1313 return self.index.computephasesmapsets(roots)
1314 return self.index.computephasesmapsets(roots)
1314
1315
1315 def _headrevs(self):
1316 def _headrevs(self):
1316 count = len(self)
1317 count = len(self)
1317 if not count:
1318 if not count:
1318 return [nullrev]
1319 return [nullrev]
1319 # we won't iter over filtered rev so nobody is a head at start
1320 # we won't iter over filtered rev so nobody is a head at start
1320 ishead = [0] * (count + 1)
1321 ishead = [0] * (count + 1)
1321 index = self.index
1322 index = self.index
1322 for r in self:
1323 for r in self:
1323 ishead[r] = 1 # I may be an head
1324 ishead[r] = 1 # I may be an head
1324 e = index[r]
1325 e = index[r]
1325 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1326 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1326 return [r for r, val in enumerate(ishead) if val]
1327 return [r for r, val in enumerate(ishead) if val]
1327
1328
1328 def heads(self, start=None, stop=None):
1329 def heads(self, start=None, stop=None):
1329 """return the list of all nodes that have no children
1330 """return the list of all nodes that have no children
1330
1331
1331 if start is specified, only heads that are descendants of
1332 if start is specified, only heads that are descendants of
1332 start will be returned
1333 start will be returned
1333 if stop is specified, it will consider all the revs from stop
1334 if stop is specified, it will consider all the revs from stop
1334 as if they had no children
1335 as if they had no children
1335 """
1336 """
1336 if start is None and stop is None:
1337 if start is None and stop is None:
1337 if not len(self):
1338 if not len(self):
1338 return [nullid]
1339 return [nullid]
1339 return [self.node(r) for r in self.headrevs()]
1340 return [self.node(r) for r in self.headrevs()]
1340
1341
1341 if start is None:
1342 if start is None:
1342 start = nullid
1343 start = nullid
1343 if stop is None:
1344 if stop is None:
1344 stop = []
1345 stop = []
1345 stoprevs = set([self.rev(n) for n in stop])
1346 stoprevs = set([self.rev(n) for n in stop])
1346 startrev = self.rev(start)
1347 startrev = self.rev(start)
1347 reachable = {startrev}
1348 reachable = {startrev}
1348 heads = {startrev}
1349 heads = {startrev}
1349
1350
1350 parentrevs = self.parentrevs
1351 parentrevs = self.parentrevs
1351 for r in self.revs(start=startrev + 1):
1352 for r in self.revs(start=startrev + 1):
1352 for p in parentrevs(r):
1353 for p in parentrevs(r):
1353 if p in reachable:
1354 if p in reachable:
1354 if r not in stoprevs:
1355 if r not in stoprevs:
1355 reachable.add(r)
1356 reachable.add(r)
1356 heads.add(r)
1357 heads.add(r)
1357 if p in heads and p not in stoprevs:
1358 if p in heads and p not in stoprevs:
1358 heads.remove(p)
1359 heads.remove(p)
1359
1360
1360 return [self.node(r) for r in heads]
1361 return [self.node(r) for r in heads]
1361
1362
1362 def children(self, node):
1363 def children(self, node):
1363 """find the children of a given node"""
1364 """find the children of a given node"""
1364 c = []
1365 c = []
1365 p = self.rev(node)
1366 p = self.rev(node)
1366 for r in self.revs(start=p + 1):
1367 for r in self.revs(start=p + 1):
1367 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1368 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1368 if prevs:
1369 if prevs:
1369 for pr in prevs:
1370 for pr in prevs:
1370 if pr == p:
1371 if pr == p:
1371 c.append(self.node(r))
1372 c.append(self.node(r))
1372 elif p == nullrev:
1373 elif p == nullrev:
1373 c.append(self.node(r))
1374 c.append(self.node(r))
1374 return c
1375 return c
1375
1376
1376 def descendant(self, start, end):
1377 def descendant(self, start, end):
1377 if start == nullrev:
1378 if start == nullrev:
1378 return True
1379 return True
1379 for i in self.descendants([start]):
1380 for i in self.descendants([start]):
1380 if i == end:
1381 if i == end:
1381 return True
1382 return True
1382 elif i > end:
1383 elif i > end:
1383 break
1384 break
1384 return False
1385 return False
1385
1386
1386 def commonancestorsheads(self, a, b):
1387 def commonancestorsheads(self, a, b):
1387 """calculate all the heads of the common ancestors of nodes a and b"""
1388 """calculate all the heads of the common ancestors of nodes a and b"""
1388 a, b = self.rev(a), self.rev(b)
1389 a, b = self.rev(a), self.rev(b)
1389 try:
1390 try:
1390 ancs = self.index.commonancestorsheads(a, b)
1391 ancs = self.index.commonancestorsheads(a, b)
1391 except (AttributeError, OverflowError): # C implementation failed
1392 except (AttributeError, OverflowError): # C implementation failed
1392 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1393 return pycompat.maplist(self.node, ancs)
1394 return pycompat.maplist(self.node, ancs)
1394
1395
1395 def isancestor(self, a, b):
1396 def isancestor(self, a, b):
1396 """return True if node a is an ancestor of node b
1397 """return True if node a is an ancestor of node b
1397
1398
1398 The implementation of this is trivial but the use of
1399 The implementation of this is trivial but the use of
1399 commonancestorsheads is not."""
1400 commonancestorsheads is not."""
1400 return a in self.commonancestorsheads(a, b)
1401 return a in self.commonancestorsheads(a, b)
1401
1402
1402 def ancestor(self, a, b):
1403 def ancestor(self, a, b):
1403 """calculate the "best" common ancestor of nodes a and b"""
1404 """calculate the "best" common ancestor of nodes a and b"""
1404
1405
1405 a, b = self.rev(a), self.rev(b)
1406 a, b = self.rev(a), self.rev(b)
1406 try:
1407 try:
1407 ancs = self.index.ancestors(a, b)
1408 ancs = self.index.ancestors(a, b)
1408 except (AttributeError, OverflowError):
1409 except (AttributeError, OverflowError):
1409 ancs = ancestor.ancestors(self.parentrevs, a, b)
1410 ancs = ancestor.ancestors(self.parentrevs, a, b)
1410 if ancs:
1411 if ancs:
1411 # choose a consistent winner when there's a tie
1412 # choose a consistent winner when there's a tie
1412 return min(map(self.node, ancs))
1413 return min(map(self.node, ancs))
1413 return nullid
1414 return nullid
1414
1415
1415 def _match(self, id):
1416 def _match(self, id):
1416 if isinstance(id, int):
1417 if isinstance(id, int):
1417 # rev
1418 # rev
1418 return self.node(id)
1419 return self.node(id)
1419 if len(id) == 20:
1420 if len(id) == 20:
1420 # possibly a binary node
1421 # possibly a binary node
1421 # odds of a binary node being all hex in ASCII are 1 in 10**25
1422 # odds of a binary node being all hex in ASCII are 1 in 10**25
1422 try:
1423 try:
1423 node = id
1424 node = id
1424 self.rev(node) # quick search the index
1425 self.rev(node) # quick search the index
1425 return node
1426 return node
1426 except LookupError:
1427 except LookupError:
1427 pass # may be partial hex id
1428 pass # may be partial hex id
1428 try:
1429 try:
1429 # str(rev)
1430 # str(rev)
1430 rev = int(id)
1431 rev = int(id)
1431 if "%d" % rev != id:
1432 if "%d" % rev != id:
1432 raise ValueError
1433 raise ValueError
1433 if rev < 0:
1434 if rev < 0:
1434 rev = len(self) + rev
1435 rev = len(self) + rev
1435 if rev < 0 or rev >= len(self):
1436 if rev < 0 or rev >= len(self):
1436 raise ValueError
1437 raise ValueError
1437 return self.node(rev)
1438 return self.node(rev)
1438 except (ValueError, OverflowError):
1439 except (ValueError, OverflowError):
1439 pass
1440 pass
1440 if len(id) == 40:
1441 if len(id) == 40:
1441 try:
1442 try:
1442 # a full hex nodeid?
1443 # a full hex nodeid?
1443 node = bin(id)
1444 node = bin(id)
1444 self.rev(node)
1445 self.rev(node)
1445 return node
1446 return node
1446 except (TypeError, LookupError):
1447 except (TypeError, LookupError):
1447 pass
1448 pass
1448
1449
1449 def _partialmatch(self, id):
1450 def _partialmatch(self, id):
1450 # we don't care wdirfilenodeids as they should be always full hash
1451 # we don't care wdirfilenodeids as they should be always full hash
1451 maybewdir = wdirhex.startswith(id)
1452 maybewdir = wdirhex.startswith(id)
1452 try:
1453 try:
1453 partial = self.index.partialmatch(id)
1454 partial = self.index.partialmatch(id)
1454 if partial and self.hasnode(partial):
1455 if partial and self.hasnode(partial):
1455 if maybewdir:
1456 if maybewdir:
1456 # single 'ff...' match in radix tree, ambiguous with wdir
1457 # single 'ff...' match in radix tree, ambiguous with wdir
1457 raise RevlogError
1458 raise RevlogError
1458 return partial
1459 return partial
1459 if maybewdir:
1460 if maybewdir:
1460 # no 'ff...' match in radix tree, wdir identified
1461 # no 'ff...' match in radix tree, wdir identified
1461 raise error.WdirUnsupported
1462 raise error.WdirUnsupported
1462 return None
1463 return None
1463 except RevlogError:
1464 except RevlogError:
1464 # parsers.c radix tree lookup gave multiple matches
1465 # parsers.c radix tree lookup gave multiple matches
1465 # fast path: for unfiltered changelog, radix tree is accurate
1466 # fast path: for unfiltered changelog, radix tree is accurate
1466 if not getattr(self, 'filteredrevs', None):
1467 if not getattr(self, 'filteredrevs', None):
1467 raise LookupError(id, self.indexfile,
1468 raise LookupError(id, self.indexfile,
1468 _('ambiguous identifier'))
1469 _('ambiguous identifier'))
1469 # fall through to slow path that filters hidden revisions
1470 # fall through to slow path that filters hidden revisions
1470 except (AttributeError, ValueError):
1471 except (AttributeError, ValueError):
1471 # we are pure python, or key was too short to search radix tree
1472 # we are pure python, or key was too short to search radix tree
1472 pass
1473 pass
1473
1474
1474 if id in self._pcache:
1475 if id in self._pcache:
1475 return self._pcache[id]
1476 return self._pcache[id]
1476
1477
1477 if len(id) <= 40:
1478 if len(id) <= 40:
1478 try:
1479 try:
1479 # hex(node)[:...]
1480 # hex(node)[:...]
1480 l = len(id) // 2 # grab an even number of digits
1481 l = len(id) // 2 # grab an even number of digits
1481 prefix = bin(id[:l * 2])
1482 prefix = bin(id[:l * 2])
1482 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1483 nl = [n for n in nl if hex(n).startswith(id) and
1484 nl = [n for n in nl if hex(n).startswith(id) and
1484 self.hasnode(n)]
1485 self.hasnode(n)]
1485 if len(nl) > 0:
1486 if len(nl) > 0:
1486 if len(nl) == 1 and not maybewdir:
1487 if len(nl) == 1 and not maybewdir:
1487 self._pcache[id] = nl[0]
1488 self._pcache[id] = nl[0]
1488 return nl[0]
1489 return nl[0]
1489 raise LookupError(id, self.indexfile,
1490 raise LookupError(id, self.indexfile,
1490 _('ambiguous identifier'))
1491 _('ambiguous identifier'))
1491 if maybewdir:
1492 if maybewdir:
1492 raise error.WdirUnsupported
1493 raise error.WdirUnsupported
1493 return None
1494 return None
1494 except TypeError:
1495 except TypeError:
1495 pass
1496 pass
1496
1497
1497 def lookup(self, id):
1498 def lookup(self, id):
1498 """locate a node based on:
1499 """locate a node based on:
1499 - revision number or str(revision number)
1500 - revision number or str(revision number)
1500 - nodeid or subset of hex nodeid
1501 - nodeid or subset of hex nodeid
1501 """
1502 """
1502 n = self._match(id)
1503 n = self._match(id)
1503 if n is not None:
1504 if n is not None:
1504 return n
1505 return n
1505 n = self._partialmatch(id)
1506 n = self._partialmatch(id)
1506 if n:
1507 if n:
1507 return n
1508 return n
1508
1509
1509 raise LookupError(id, self.indexfile, _('no match found'))
1510 raise LookupError(id, self.indexfile, _('no match found'))
1510
1511
1511 def shortest(self, node, minlength=1):
1512 def shortest(self, node, minlength=1):
1512 """Find the shortest unambiguous prefix that matches node."""
1513 """Find the shortest unambiguous prefix that matches node."""
1513 def isvalid(prefix):
1514 def isvalid(prefix):
1514 try:
1515 try:
1515 node = self._partialmatch(prefix)
1516 node = self._partialmatch(prefix)
1516 except error.RevlogError:
1517 except error.RevlogError:
1517 return False
1518 return False
1518 except error.WdirUnsupported:
1519 except error.WdirUnsupported:
1519 # single 'ff...' match
1520 # single 'ff...' match
1520 return True
1521 return True
1521 if node is None:
1522 if node is None:
1522 raise LookupError(node, self.indexfile, _('no node'))
1523 raise LookupError(node, self.indexfile, _('no node'))
1523 return True
1524 return True
1524
1525
1525 def maybewdir(prefix):
1526 def maybewdir(prefix):
1526 return all(c == 'f' for c in prefix)
1527 return all(c == 'f' for c in prefix)
1527
1528
1528 hexnode = hex(node)
1529 hexnode = hex(node)
1529
1530
1530 def disambiguate(hexnode, minlength):
1531 def disambiguate(hexnode, minlength):
1531 """Disambiguate against wdirid."""
1532 """Disambiguate against wdirid."""
1532 for length in range(minlength, 41):
1533 for length in range(minlength, 41):
1533 prefix = hexnode[:length]
1534 prefix = hexnode[:length]
1534 if not maybewdir(prefix):
1535 if not maybewdir(prefix):
1535 return prefix
1536 return prefix
1536
1537
1537 if not getattr(self, 'filteredrevs', None):
1538 if not getattr(self, 'filteredrevs', None):
1538 try:
1539 try:
1539 length = max(self.index.shortest(node), minlength)
1540 length = max(self.index.shortest(node), minlength)
1540 return disambiguate(hexnode, length)
1541 return disambiguate(hexnode, length)
1541 except RevlogError:
1542 except RevlogError:
1542 if node != wdirid:
1543 if node != wdirid:
1543 raise LookupError(node, self.indexfile, _('no node'))
1544 raise LookupError(node, self.indexfile, _('no node'))
1544 except AttributeError:
1545 except AttributeError:
1545 # Fall through to pure code
1546 # Fall through to pure code
1546 pass
1547 pass
1547
1548
1548 if node == wdirid:
1549 if node == wdirid:
1549 for length in range(minlength, 41):
1550 for length in range(minlength, 41):
1550 prefix = hexnode[:length]
1551 prefix = hexnode[:length]
1551 if isvalid(prefix):
1552 if isvalid(prefix):
1552 return prefix
1553 return prefix
1553
1554
1554 for length in range(minlength, 41):
1555 for length in range(minlength, 41):
1555 prefix = hexnode[:length]
1556 prefix = hexnode[:length]
1556 if isvalid(prefix):
1557 if isvalid(prefix):
1557 return disambiguate(hexnode, length)
1558 return disambiguate(hexnode, length)
1558
1559
1559 def cmp(self, node, text):
1560 def cmp(self, node, text):
1560 """compare text with a given file revision
1561 """compare text with a given file revision
1561
1562
1562 returns True if text is different than what is stored.
1563 returns True if text is different than what is stored.
1563 """
1564 """
1564 p1, p2 = self.parents(node)
1565 p1, p2 = self.parents(node)
1565 return hash(text, p1, p2) != node
1566 return hash(text, p1, p2) != node
1566
1567
1567 def _cachesegment(self, offset, data):
1568 def _cachesegment(self, offset, data):
1568 """Add a segment to the revlog cache.
1569 """Add a segment to the revlog cache.
1569
1570
1570 Accepts an absolute offset and the data that is at that location.
1571 Accepts an absolute offset and the data that is at that location.
1571 """
1572 """
1572 o, d = self._chunkcache
1573 o, d = self._chunkcache
1573 # try to add to existing cache
1574 # try to add to existing cache
1574 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1575 self._chunkcache = o, d + data
1576 self._chunkcache = o, d + data
1576 else:
1577 else:
1577 self._chunkcache = offset, data
1578 self._chunkcache = offset, data
1578
1579
1579 def _readsegment(self, offset, length, df=None):
1580 def _readsegment(self, offset, length, df=None):
1580 """Load a segment of raw data from the revlog.
1581 """Load a segment of raw data from the revlog.
1581
1582
1582 Accepts an absolute offset, length to read, and an optional existing
1583 Accepts an absolute offset, length to read, and an optional existing
1583 file handle to read from.
1584 file handle to read from.
1584
1585
1585 If an existing file handle is passed, it will be seeked and the
1586 If an existing file handle is passed, it will be seeked and the
1586 original seek position will NOT be restored.
1587 original seek position will NOT be restored.
1587
1588
1588 Returns a str or buffer of raw byte data.
1589 Returns a str or buffer of raw byte data.
1589 """
1590 """
1590 # Cache data both forward and backward around the requested
1591 # Cache data both forward and backward around the requested
1591 # data, in a fixed size window. This helps speed up operations
1592 # data, in a fixed size window. This helps speed up operations
1592 # involving reading the revlog backwards.
1593 # involving reading the revlog backwards.
1593 cachesize = self._chunkcachesize
1594 cachesize = self._chunkcachesize
1594 realoffset = offset & ~(cachesize - 1)
1595 realoffset = offset & ~(cachesize - 1)
1595 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1596 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1596 - realoffset)
1597 - realoffset)
1597 with self._datareadfp(df) as df:
1598 with self._datareadfp(df) as df:
1598 df.seek(realoffset)
1599 df.seek(realoffset)
1599 d = df.read(reallength)
1600 d = df.read(reallength)
1600 self._cachesegment(realoffset, d)
1601 self._cachesegment(realoffset, d)
1601 if offset != realoffset or reallength != length:
1602 if offset != realoffset or reallength != length:
1602 return util.buffer(d, offset - realoffset, length)
1603 return util.buffer(d, offset - realoffset, length)
1603 return d
1604 return d
1604
1605
1605 def _getsegment(self, offset, length, df=None):
1606 def _getsegment(self, offset, length, df=None):
1606 """Obtain a segment of raw data from the revlog.
1607 """Obtain a segment of raw data from the revlog.
1607
1608
1608 Accepts an absolute offset, length of bytes to obtain, and an
1609 Accepts an absolute offset, length of bytes to obtain, and an
1609 optional file handle to the already-opened revlog. If the file
1610 optional file handle to the already-opened revlog. If the file
1610 handle is used, it's original seek position will not be preserved.
1611 handle is used, it's original seek position will not be preserved.
1611
1612
1612 Requests for data may be returned from a cache.
1613 Requests for data may be returned from a cache.
1613
1614
1614 Returns a str or a buffer instance of raw byte data.
1615 Returns a str or a buffer instance of raw byte data.
1615 """
1616 """
1616 o, d = self._chunkcache
1617 o, d = self._chunkcache
1617 l = len(d)
1618 l = len(d)
1618
1619
1619 # is it in the cache?
1620 # is it in the cache?
1620 cachestart = offset - o
1621 cachestart = offset - o
1621 cacheend = cachestart + length
1622 cacheend = cachestart + length
1622 if cachestart >= 0 and cacheend <= l:
1623 if cachestart >= 0 and cacheend <= l:
1623 if cachestart == 0 and cacheend == l:
1624 if cachestart == 0 and cacheend == l:
1624 return d # avoid a copy
1625 return d # avoid a copy
1625 return util.buffer(d, cachestart, cacheend - cachestart)
1626 return util.buffer(d, cachestart, cacheend - cachestart)
1626
1627
1627 return self._readsegment(offset, length, df=df)
1628 return self._readsegment(offset, length, df=df)
1628
1629
1629 def _getsegmentforrevs(self, startrev, endrev, df=None):
1630 def _getsegmentforrevs(self, startrev, endrev, df=None):
1630 """Obtain a segment of raw data corresponding to a range of revisions.
1631 """Obtain a segment of raw data corresponding to a range of revisions.
1631
1632
1632 Accepts the start and end revisions and an optional already-open
1633 Accepts the start and end revisions and an optional already-open
1633 file handle to be used for reading. If the file handle is read, its
1634 file handle to be used for reading. If the file handle is read, its
1634 seek position will not be preserved.
1635 seek position will not be preserved.
1635
1636
1636 Requests for data may be satisfied by a cache.
1637 Requests for data may be satisfied by a cache.
1637
1638
1638 Returns a 2-tuple of (offset, data) for the requested range of
1639 Returns a 2-tuple of (offset, data) for the requested range of
1639 revisions. Offset is the integer offset from the beginning of the
1640 revisions. Offset is the integer offset from the beginning of the
1640 revlog and data is a str or buffer of the raw byte data.
1641 revlog and data is a str or buffer of the raw byte data.
1641
1642
1642 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1643 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1643 to determine where each revision's data begins and ends.
1644 to determine where each revision's data begins and ends.
1644 """
1645 """
1645 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1646 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1646 # (functions are expensive).
1647 # (functions are expensive).
1647 index = self.index
1648 index = self.index
1648 istart = index[startrev]
1649 istart = index[startrev]
1649 start = int(istart[0] >> 16)
1650 start = int(istart[0] >> 16)
1650 if startrev == endrev:
1651 if startrev == endrev:
1651 end = start + istart[1]
1652 end = start + istart[1]
1652 else:
1653 else:
1653 iend = index[endrev]
1654 iend = index[endrev]
1654 end = int(iend[0] >> 16) + iend[1]
1655 end = int(iend[0] >> 16) + iend[1]
1655
1656
1656 if self._inline:
1657 if self._inline:
1657 start += (startrev + 1) * self._io.size
1658 start += (startrev + 1) * self._io.size
1658 end += (endrev + 1) * self._io.size
1659 end += (endrev + 1) * self._io.size
1659 length = end - start
1660 length = end - start
1660
1661
1661 return start, self._getsegment(start, length, df=df)
1662 return start, self._getsegment(start, length, df=df)
1662
1663
1663 def _chunk(self, rev, df=None):
1664 def _chunk(self, rev, df=None):
1664 """Obtain a single decompressed chunk for a revision.
1665 """Obtain a single decompressed chunk for a revision.
1665
1666
1666 Accepts an integer revision and an optional already-open file handle
1667 Accepts an integer revision and an optional already-open file handle
1667 to be used for reading. If used, the seek position of the file will not
1668 to be used for reading. If used, the seek position of the file will not
1668 be preserved.
1669 be preserved.
1669
1670
1670 Returns a str holding uncompressed data for the requested revision.
1671 Returns a str holding uncompressed data for the requested revision.
1671 """
1672 """
1672 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1673 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1673
1674
1674 def _chunks(self, revs, df=None):
1675 def _chunks(self, revs, df=None):
1675 """Obtain decompressed chunks for the specified revisions.
1676 """Obtain decompressed chunks for the specified revisions.
1676
1677
1677 Accepts an iterable of numeric revisions that are assumed to be in
1678 Accepts an iterable of numeric revisions that are assumed to be in
1678 ascending order. Also accepts an optional already-open file handle
1679 ascending order. Also accepts an optional already-open file handle
1679 to be used for reading. If used, the seek position of the file will
1680 to be used for reading. If used, the seek position of the file will
1680 not be preserved.
1681 not be preserved.
1681
1682
1682 This function is similar to calling ``self._chunk()`` multiple times,
1683 This function is similar to calling ``self._chunk()`` multiple times,
1683 but is faster.
1684 but is faster.
1684
1685
1685 Returns a list with decompressed data for each requested revision.
1686 Returns a list with decompressed data for each requested revision.
1686 """
1687 """
1687 if not revs:
1688 if not revs:
1688 return []
1689 return []
1689 start = self.start
1690 start = self.start
1690 length = self.length
1691 length = self.length
1691 inline = self._inline
1692 inline = self._inline
1692 iosize = self._io.size
1693 iosize = self._io.size
1693 buffer = util.buffer
1694 buffer = util.buffer
1694
1695
1695 l = []
1696 l = []
1696 ladd = l.append
1697 ladd = l.append
1697
1698
1698 if not self._withsparseread:
1699 if not self._withsparseread:
1699 slicedchunks = (revs,)
1700 slicedchunks = (revs,)
1700 else:
1701 else:
1701 slicedchunks = _slicechunk(self, revs)
1702 slicedchunks = _slicechunk(self, revs)
1702
1703
1703 for revschunk in slicedchunks:
1704 for revschunk in slicedchunks:
1704 firstrev = revschunk[0]
1705 firstrev = revschunk[0]
1705 # Skip trailing revisions with empty diff
1706 # Skip trailing revisions with empty diff
1706 for lastrev in revschunk[::-1]:
1707 for lastrev in revschunk[::-1]:
1707 if length(lastrev) != 0:
1708 if length(lastrev) != 0:
1708 break
1709 break
1709
1710
1710 try:
1711 try:
1711 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1712 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1712 except OverflowError:
1713 except OverflowError:
1713 # issue4215 - we can't cache a run of chunks greater than
1714 # issue4215 - we can't cache a run of chunks greater than
1714 # 2G on Windows
1715 # 2G on Windows
1715 return [self._chunk(rev, df=df) for rev in revschunk]
1716 return [self._chunk(rev, df=df) for rev in revschunk]
1716
1717
1717 decomp = self.decompress
1718 decomp = self.decompress
1718 for rev in revschunk:
1719 for rev in revschunk:
1719 chunkstart = start(rev)
1720 chunkstart = start(rev)
1720 if inline:
1721 if inline:
1721 chunkstart += (rev + 1) * iosize
1722 chunkstart += (rev + 1) * iosize
1722 chunklength = length(rev)
1723 chunklength = length(rev)
1723 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1724 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1724
1725
1725 return l
1726 return l
1726
1727
1727 def _chunkclear(self):
1728 def _chunkclear(self):
1728 """Clear the raw chunk cache."""
1729 """Clear the raw chunk cache."""
1729 self._chunkcache = (0, '')
1730 self._chunkcache = (0, '')
1730
1731
1731 def deltaparent(self, rev):
1732 def deltaparent(self, rev):
1732 """return deltaparent of the given revision"""
1733 """return deltaparent of the given revision"""
1733 base = self.index[rev][3]
1734 base = self.index[rev][3]
1734 if base == rev:
1735 if base == rev:
1735 return nullrev
1736 return nullrev
1736 elif self._generaldelta:
1737 elif self._generaldelta:
1737 return base
1738 return base
1738 else:
1739 else:
1739 return rev - 1
1740 return rev - 1
1740
1741
1741 def revdiff(self, rev1, rev2):
1742 def revdiff(self, rev1, rev2):
1742 """return or calculate a delta between two revisions
1743 """return or calculate a delta between two revisions
1743
1744
1744 The delta calculated is in binary form and is intended to be written to
1745 The delta calculated is in binary form and is intended to be written to
1745 revlog data directly. So this function needs raw revision data.
1746 revlog data directly. So this function needs raw revision data.
1746 """
1747 """
1747 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1748 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1748 return bytes(self._chunk(rev2))
1749 return bytes(self._chunk(rev2))
1749
1750
1750 return mdiff.textdiff(self.revision(rev1, raw=True),
1751 return mdiff.textdiff(self.revision(rev1, raw=True),
1751 self.revision(rev2, raw=True))
1752 self.revision(rev2, raw=True))
1752
1753
1753 def revision(self, nodeorrev, _df=None, raw=False):
1754 def revision(self, nodeorrev, _df=None, raw=False):
1754 """return an uncompressed revision of a given node or revision
1755 """return an uncompressed revision of a given node or revision
1755 number.
1756 number.
1756
1757
1757 _df - an existing file handle to read from. (internal-only)
1758 _df - an existing file handle to read from. (internal-only)
1758 raw - an optional argument specifying if the revision data is to be
1759 raw - an optional argument specifying if the revision data is to be
1759 treated as raw data when applying flag transforms. 'raw' should be set
1760 treated as raw data when applying flag transforms. 'raw' should be set
1760 to True when generating changegroups or in debug commands.
1761 to True when generating changegroups or in debug commands.
1761 """
1762 """
1762 if isinstance(nodeorrev, int):
1763 if isinstance(nodeorrev, int):
1763 rev = nodeorrev
1764 rev = nodeorrev
1764 node = self.node(rev)
1765 node = self.node(rev)
1765 else:
1766 else:
1766 node = nodeorrev
1767 node = nodeorrev
1767 rev = None
1768 rev = None
1768
1769
1769 cachedrev = None
1770 cachedrev = None
1770 flags = None
1771 flags = None
1771 rawtext = None
1772 rawtext = None
1772 if node == nullid:
1773 if node == nullid:
1773 return ""
1774 return ""
1774 if self._cache:
1775 if self._cache:
1775 if self._cache[0] == node:
1776 if self._cache[0] == node:
1776 # _cache only stores rawtext
1777 # _cache only stores rawtext
1777 if raw:
1778 if raw:
1778 return self._cache[2]
1779 return self._cache[2]
1779 # duplicated, but good for perf
1780 # duplicated, but good for perf
1780 if rev is None:
1781 if rev is None:
1781 rev = self.rev(node)
1782 rev = self.rev(node)
1782 if flags is None:
1783 if flags is None:
1783 flags = self.flags(rev)
1784 flags = self.flags(rev)
1784 # no extra flags set, no flag processor runs, text = rawtext
1785 # no extra flags set, no flag processor runs, text = rawtext
1785 if flags == REVIDX_DEFAULT_FLAGS:
1786 if flags == REVIDX_DEFAULT_FLAGS:
1786 return self._cache[2]
1787 return self._cache[2]
1787 # rawtext is reusable. need to run flag processor
1788 # rawtext is reusable. need to run flag processor
1788 rawtext = self._cache[2]
1789 rawtext = self._cache[2]
1789
1790
1790 cachedrev = self._cache[1]
1791 cachedrev = self._cache[1]
1791
1792
1792 # look up what we need to read
1793 # look up what we need to read
1793 if rawtext is None:
1794 if rawtext is None:
1794 if rev is None:
1795 if rev is None:
1795 rev = self.rev(node)
1796 rev = self.rev(node)
1796
1797
1797 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1798 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1798 if stopped:
1799 if stopped:
1799 rawtext = self._cache[2]
1800 rawtext = self._cache[2]
1800
1801
1801 # drop cache to save memory
1802 # drop cache to save memory
1802 self._cache = None
1803 self._cache = None
1803
1804
1804 bins = self._chunks(chain, df=_df)
1805 bins = self._chunks(chain, df=_df)
1805 if rawtext is None:
1806 if rawtext is None:
1806 rawtext = bytes(bins[0])
1807 rawtext = bytes(bins[0])
1807 bins = bins[1:]
1808 bins = bins[1:]
1808
1809
1809 rawtext = mdiff.patches(rawtext, bins)
1810 rawtext = mdiff.patches(rawtext, bins)
1810 self._cache = (node, rev, rawtext)
1811 self._cache = (node, rev, rawtext)
1811
1812
1812 if flags is None:
1813 if flags is None:
1813 if rev is None:
1814 if rev is None:
1814 rev = self.rev(node)
1815 rev = self.rev(node)
1815 flags = self.flags(rev)
1816 flags = self.flags(rev)
1816
1817
1817 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1818 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1818 if validatehash:
1819 if validatehash:
1819 self.checkhash(text, node, rev=rev)
1820 self.checkhash(text, node, rev=rev)
1820
1821
1821 return text
1822 return text
1822
1823
1823 def hash(self, text, p1, p2):
1824 def hash(self, text, p1, p2):
1824 """Compute a node hash.
1825 """Compute a node hash.
1825
1826
1826 Available as a function so that subclasses can replace the hash
1827 Available as a function so that subclasses can replace the hash
1827 as needed.
1828 as needed.
1828 """
1829 """
1829 return hash(text, p1, p2)
1830 return hash(text, p1, p2)
1830
1831
1831 def _processflags(self, text, flags, operation, raw=False):
1832 def _processflags(self, text, flags, operation, raw=False):
1832 """Inspect revision data flags and applies transforms defined by
1833 """Inspect revision data flags and applies transforms defined by
1833 registered flag processors.
1834 registered flag processors.
1834
1835
1835 ``text`` - the revision data to process
1836 ``text`` - the revision data to process
1836 ``flags`` - the revision flags
1837 ``flags`` - the revision flags
1837 ``operation`` - the operation being performed (read or write)
1838 ``operation`` - the operation being performed (read or write)
1838 ``raw`` - an optional argument describing if the raw transform should be
1839 ``raw`` - an optional argument describing if the raw transform should be
1839 applied.
1840 applied.
1840
1841
1841 This method processes the flags in the order (or reverse order if
1842 This method processes the flags in the order (or reverse order if
1842 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1843 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1843 flag processors registered for present flags. The order of flags defined
1844 flag processors registered for present flags. The order of flags defined
1844 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1845 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1845
1846
1846 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1847 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1847 processed text and ``validatehash`` is a bool indicating whether the
1848 processed text and ``validatehash`` is a bool indicating whether the
1848 returned text should be checked for hash integrity.
1849 returned text should be checked for hash integrity.
1849
1850
1850 Note: If the ``raw`` argument is set, it has precedence over the
1851 Note: If the ``raw`` argument is set, it has precedence over the
1851 operation and will only update the value of ``validatehash``.
1852 operation and will only update the value of ``validatehash``.
1852 """
1853 """
1853 # fast path: no flag processors will run
1854 # fast path: no flag processors will run
1854 if flags == 0:
1855 if flags == 0:
1855 return text, True
1856 return text, True
1856 if not operation in ('read', 'write'):
1857 if not operation in ('read', 'write'):
1857 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1858 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1858 # Check all flags are known.
1859 # Check all flags are known.
1859 if flags & ~REVIDX_KNOWN_FLAGS:
1860 if flags & ~REVIDX_KNOWN_FLAGS:
1860 raise RevlogError(_("incompatible revision flag '%#x'") %
1861 raise RevlogError(_("incompatible revision flag '%#x'") %
1861 (flags & ~REVIDX_KNOWN_FLAGS))
1862 (flags & ~REVIDX_KNOWN_FLAGS))
1862 validatehash = True
1863 validatehash = True
1863 # Depending on the operation (read or write), the order might be
1864 # Depending on the operation (read or write), the order might be
1864 # reversed due to non-commutative transforms.
1865 # reversed due to non-commutative transforms.
1865 orderedflags = REVIDX_FLAGS_ORDER
1866 orderedflags = REVIDX_FLAGS_ORDER
1866 if operation == 'write':
1867 if operation == 'write':
1867 orderedflags = reversed(orderedflags)
1868 orderedflags = reversed(orderedflags)
1868
1869
1869 for flag in orderedflags:
1870 for flag in orderedflags:
1870 # If a flagprocessor has been registered for a known flag, apply the
1871 # If a flagprocessor has been registered for a known flag, apply the
1871 # related operation transform and update result tuple.
1872 # related operation transform and update result tuple.
1872 if flag & flags:
1873 if flag & flags:
1873 vhash = True
1874 vhash = True
1874
1875
1875 if flag not in _flagprocessors:
1876 if flag not in _flagprocessors:
1876 message = _("missing processor for flag '%#x'") % (flag)
1877 message = _("missing processor for flag '%#x'") % (flag)
1877 raise RevlogError(message)
1878 raise RevlogError(message)
1878
1879
1879 processor = _flagprocessors[flag]
1880 processor = _flagprocessors[flag]
1880 if processor is not None:
1881 if processor is not None:
1881 readtransform, writetransform, rawtransform = processor
1882 readtransform, writetransform, rawtransform = processor
1882
1883
1883 if raw:
1884 if raw:
1884 vhash = rawtransform(self, text)
1885 vhash = rawtransform(self, text)
1885 elif operation == 'read':
1886 elif operation == 'read':
1886 text, vhash = readtransform(self, text)
1887 text, vhash = readtransform(self, text)
1887 else: # write operation
1888 else: # write operation
1888 text, vhash = writetransform(self, text)
1889 text, vhash = writetransform(self, text)
1889 validatehash = validatehash and vhash
1890 validatehash = validatehash and vhash
1890
1891
1891 return text, validatehash
1892 return text, validatehash
1892
1893
1893 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1894 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1894 """Check node hash integrity.
1895 """Check node hash integrity.
1895
1896
1896 Available as a function so that subclasses can extend hash mismatch
1897 Available as a function so that subclasses can extend hash mismatch
1897 behaviors as needed.
1898 behaviors as needed.
1898 """
1899 """
1899 try:
1900 try:
1900 if p1 is None and p2 is None:
1901 if p1 is None and p2 is None:
1901 p1, p2 = self.parents(node)
1902 p1, p2 = self.parents(node)
1902 if node != self.hash(text, p1, p2):
1903 if node != self.hash(text, p1, p2):
1903 revornode = rev
1904 revornode = rev
1904 if revornode is None:
1905 if revornode is None:
1905 revornode = templatefilters.short(hex(node))
1906 revornode = templatefilters.short(hex(node))
1906 raise RevlogError(_("integrity check failed on %s:%s")
1907 raise RevlogError(_("integrity check failed on %s:%s")
1907 % (self.indexfile, pycompat.bytestr(revornode)))
1908 % (self.indexfile, pycompat.bytestr(revornode)))
1908 except RevlogError:
1909 except RevlogError:
1909 if self._censorable and _censoredtext(text):
1910 if self._censorable and _censoredtext(text):
1910 raise error.CensoredNodeError(self.indexfile, node, text)
1911 raise error.CensoredNodeError(self.indexfile, node, text)
1911 raise
1912 raise
1912
1913
1913 def _enforceinlinesize(self, tr, fp=None):
1914 def _enforceinlinesize(self, tr, fp=None):
1914 """Check if the revlog is too big for inline and convert if so.
1915 """Check if the revlog is too big for inline and convert if so.
1915
1916
1916 This should be called after revisions are added to the revlog. If the
1917 This should be called after revisions are added to the revlog. If the
1917 revlog has grown too large to be an inline revlog, it will convert it
1918 revlog has grown too large to be an inline revlog, it will convert it
1918 to use multiple index and data files.
1919 to use multiple index and data files.
1919 """
1920 """
1920 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1921 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1921 return
1922 return
1922
1923
1923 trinfo = tr.find(self.indexfile)
1924 trinfo = tr.find(self.indexfile)
1924 if trinfo is None:
1925 if trinfo is None:
1925 raise RevlogError(_("%s not found in the transaction")
1926 raise RevlogError(_("%s not found in the transaction")
1926 % self.indexfile)
1927 % self.indexfile)
1927
1928
1928 trindex = trinfo[2]
1929 trindex = trinfo[2]
1929 if trindex is not None:
1930 if trindex is not None:
1930 dataoff = self.start(trindex)
1931 dataoff = self.start(trindex)
1931 else:
1932 else:
1932 # revlog was stripped at start of transaction, use all leftover data
1933 # revlog was stripped at start of transaction, use all leftover data
1933 trindex = len(self) - 1
1934 trindex = len(self) - 1
1934 dataoff = self.end(-2)
1935 dataoff = self.end(-2)
1935
1936
1936 tr.add(self.datafile, dataoff)
1937 tr.add(self.datafile, dataoff)
1937
1938
1938 if fp:
1939 if fp:
1939 fp.flush()
1940 fp.flush()
1940 fp.close()
1941 fp.close()
1941
1942
1942 with self._datafp('w') as df:
1943 with self._datafp('w') as df:
1943 for r in self:
1944 for r in self:
1944 df.write(self._getsegmentforrevs(r, r)[1])
1945 df.write(self._getsegmentforrevs(r, r)[1])
1945
1946
1946 with self._indexfp('w') as fp:
1947 with self._indexfp('w') as fp:
1947 self.version &= ~FLAG_INLINE_DATA
1948 self.version &= ~FLAG_INLINE_DATA
1948 self._inline = False
1949 self._inline = False
1949 io = self._io
1950 io = self._io
1950 for i in self:
1951 for i in self:
1951 e = io.packentry(self.index[i], self.node, self.version, i)
1952 e = io.packentry(self.index[i], self.node, self.version, i)
1952 fp.write(e)
1953 fp.write(e)
1953
1954
1954 # the temp file replace the real index when we exit the context
1955 # the temp file replace the real index when we exit the context
1955 # manager
1956 # manager
1956
1957
1957 tr.replace(self.indexfile, trindex * self._io.size)
1958 tr.replace(self.indexfile, trindex * self._io.size)
1958 self._chunkclear()
1959 self._chunkclear()
1959
1960
1960 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1961 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1961 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1962 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1962 """add a revision to the log
1963 """add a revision to the log
1963
1964
1964 text - the revision data to add
1965 text - the revision data to add
1965 transaction - the transaction object used for rollback
1966 transaction - the transaction object used for rollback
1966 link - the linkrev data to add
1967 link - the linkrev data to add
1967 p1, p2 - the parent nodeids of the revision
1968 p1, p2 - the parent nodeids of the revision
1968 cachedelta - an optional precomputed delta
1969 cachedelta - an optional precomputed delta
1969 node - nodeid of revision; typically node is not specified, and it is
1970 node - nodeid of revision; typically node is not specified, and it is
1970 computed by default as hash(text, p1, p2), however subclasses might
1971 computed by default as hash(text, p1, p2), however subclasses might
1971 use different hashing method (and override checkhash() in such case)
1972 use different hashing method (and override checkhash() in such case)
1972 flags - the known flags to set on the revision
1973 flags - the known flags to set on the revision
1973 deltacomputer - an optional _deltacomputer instance shared between
1974 deltacomputer - an optional _deltacomputer instance shared between
1974 multiple calls
1975 multiple calls
1975 """
1976 """
1976 if link == nullrev:
1977 if link == nullrev:
1977 raise RevlogError(_("attempted to add linkrev -1 to %s")
1978 raise RevlogError(_("attempted to add linkrev -1 to %s")
1978 % self.indexfile)
1979 % self.indexfile)
1979
1980
1980 if flags:
1981 if flags:
1981 node = node or self.hash(text, p1, p2)
1982 node = node or self.hash(text, p1, p2)
1982
1983
1983 rawtext, validatehash = self._processflags(text, flags, 'write')
1984 rawtext, validatehash = self._processflags(text, flags, 'write')
1984
1985
1985 # If the flag processor modifies the revision data, ignore any provided
1986 # If the flag processor modifies the revision data, ignore any provided
1986 # cachedelta.
1987 # cachedelta.
1987 if rawtext != text:
1988 if rawtext != text:
1988 cachedelta = None
1989 cachedelta = None
1989
1990
1990 if len(rawtext) > _maxentrysize:
1991 if len(rawtext) > _maxentrysize:
1991 raise RevlogError(
1992 raise RevlogError(
1992 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1993 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1993 % (self.indexfile, len(rawtext)))
1994 % (self.indexfile, len(rawtext)))
1994
1995
1995 node = node or self.hash(rawtext, p1, p2)
1996 node = node or self.hash(rawtext, p1, p2)
1996 if node in self.nodemap:
1997 if node in self.nodemap:
1997 return node
1998 return node
1998
1999
1999 if validatehash:
2000 if validatehash:
2000 self.checkhash(rawtext, node, p1=p1, p2=p2)
2001 self.checkhash(rawtext, node, p1=p1, p2=p2)
2001
2002
2002 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2003 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2003 flags, cachedelta=cachedelta,
2004 flags, cachedelta=cachedelta,
2004 deltacomputer=deltacomputer)
2005 deltacomputer=deltacomputer)
2005
2006
2006 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2007 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2007 cachedelta=None, deltacomputer=None):
2008 cachedelta=None, deltacomputer=None):
2008 """add a raw revision with known flags, node and parents
2009 """add a raw revision with known flags, node and parents
2009 useful when reusing a revision not stored in this revlog (ex: received
2010 useful when reusing a revision not stored in this revlog (ex: received
2010 over wire, or read from an external bundle).
2011 over wire, or read from an external bundle).
2011 """
2012 """
2012 dfh = None
2013 dfh = None
2013 if not self._inline:
2014 if not self._inline:
2014 dfh = self._datafp("a+")
2015 dfh = self._datafp("a+")
2015 ifh = self._indexfp("a+")
2016 ifh = self._indexfp("a+")
2016 try:
2017 try:
2017 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2018 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2018 flags, cachedelta, ifh, dfh,
2019 flags, cachedelta, ifh, dfh,
2019 deltacomputer=deltacomputer)
2020 deltacomputer=deltacomputer)
2020 finally:
2021 finally:
2021 if dfh:
2022 if dfh:
2022 dfh.close()
2023 dfh.close()
2023 ifh.close()
2024 ifh.close()
2024
2025
2025 def compress(self, data):
2026 def compress(self, data):
2026 """Generate a possibly-compressed representation of data."""
2027 """Generate a possibly-compressed representation of data."""
2027 if not data:
2028 if not data:
2028 return '', data
2029 return '', data
2029
2030
2030 compressed = self._compressor.compress(data)
2031 compressed = self._compressor.compress(data)
2031
2032
2032 if compressed:
2033 if compressed:
2033 # The revlog compressor added the header in the returned data.
2034 # The revlog compressor added the header in the returned data.
2034 return '', compressed
2035 return '', compressed
2035
2036
2036 if data[0:1] == '\0':
2037 if data[0:1] == '\0':
2037 return '', data
2038 return '', data
2038 return 'u', data
2039 return 'u', data
2039
2040
2040 def decompress(self, data):
2041 def decompress(self, data):
2041 """Decompress a revlog chunk.
2042 """Decompress a revlog chunk.
2042
2043
2043 The chunk is expected to begin with a header identifying the
2044 The chunk is expected to begin with a header identifying the
2044 format type so it can be routed to an appropriate decompressor.
2045 format type so it can be routed to an appropriate decompressor.
2045 """
2046 """
2046 if not data:
2047 if not data:
2047 return data
2048 return data
2048
2049
2049 # Revlogs are read much more frequently than they are written and many
2050 # Revlogs are read much more frequently than they are written and many
2050 # chunks only take microseconds to decompress, so performance is
2051 # chunks only take microseconds to decompress, so performance is
2051 # important here.
2052 # important here.
2052 #
2053 #
2053 # We can make a few assumptions about revlogs:
2054 # We can make a few assumptions about revlogs:
2054 #
2055 #
2055 # 1) the majority of chunks will be compressed (as opposed to inline
2056 # 1) the majority of chunks will be compressed (as opposed to inline
2056 # raw data).
2057 # raw data).
2057 # 2) decompressing *any* data will likely by at least 10x slower than
2058 # 2) decompressing *any* data will likely by at least 10x slower than
2058 # returning raw inline data.
2059 # returning raw inline data.
2059 # 3) we want to prioritize common and officially supported compression
2060 # 3) we want to prioritize common and officially supported compression
2060 # engines
2061 # engines
2061 #
2062 #
2062 # It follows that we want to optimize for "decompress compressed data
2063 # It follows that we want to optimize for "decompress compressed data
2063 # when encoded with common and officially supported compression engines"
2064 # when encoded with common and officially supported compression engines"
2064 # case over "raw data" and "data encoded by less common or non-official
2065 # case over "raw data" and "data encoded by less common or non-official
2065 # compression engines." That is why we have the inline lookup first
2066 # compression engines." That is why we have the inline lookup first
2066 # followed by the compengines lookup.
2067 # followed by the compengines lookup.
2067 #
2068 #
2068 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2069 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2069 # compressed chunks. And this matters for changelog and manifest reads.
2070 # compressed chunks. And this matters for changelog and manifest reads.
2070 t = data[0:1]
2071 t = data[0:1]
2071
2072
2072 if t == 'x':
2073 if t == 'x':
2073 try:
2074 try:
2074 return _zlibdecompress(data)
2075 return _zlibdecompress(data)
2075 except zlib.error as e:
2076 except zlib.error as e:
2076 raise RevlogError(_('revlog decompress error: %s') %
2077 raise RevlogError(_('revlog decompress error: %s') %
2077 stringutil.forcebytestr(e))
2078 stringutil.forcebytestr(e))
2078 # '\0' is more common than 'u' so it goes first.
2079 # '\0' is more common than 'u' so it goes first.
2079 elif t == '\0':
2080 elif t == '\0':
2080 return data
2081 return data
2081 elif t == 'u':
2082 elif t == 'u':
2082 return util.buffer(data, 1)
2083 return util.buffer(data, 1)
2083
2084
2084 try:
2085 try:
2085 compressor = self._decompressors[t]
2086 compressor = self._decompressors[t]
2086 except KeyError:
2087 except KeyError:
2087 try:
2088 try:
2088 engine = util.compengines.forrevlogheader(t)
2089 engine = util.compengines.forrevlogheader(t)
2089 compressor = engine.revlogcompressor()
2090 compressor = engine.revlogcompressor()
2090 self._decompressors[t] = compressor
2091 self._decompressors[t] = compressor
2091 except KeyError:
2092 except KeyError:
2092 raise RevlogError(_('unknown compression type %r') % t)
2093 raise RevlogError(_('unknown compression type %r') % t)
2093
2094
2094 return compressor.decompress(data)
2095 return compressor.decompress(data)
2095
2096
2096 def _isgooddeltainfo(self, deltainfo, revinfo):
2097 def _isgooddeltainfo(self, deltainfo, revinfo):
2097 """Returns True if the given delta is good. Good means that it is within
2098 """Returns True if the given delta is good. Good means that it is within
2098 the disk span, disk size, and chain length bounds that we know to be
2099 the disk span, disk size, and chain length bounds that we know to be
2099 performant."""
2100 performant."""
2100 if deltainfo is None:
2101 if deltainfo is None:
2101 return False
2102 return False
2102
2103
2103 # - 'deltainfo.distance' is the distance from the base revision --
2104 # - 'deltainfo.distance' is the distance from the base revision --
2104 # bounding it limits the amount of I/O we need to do.
2105 # bounding it limits the amount of I/O we need to do.
2105 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2106 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2106 # deltas we need to apply -- bounding it limits the amount of CPU
2107 # deltas we need to apply -- bounding it limits the amount of CPU
2107 # we consume.
2108 # we consume.
2108
2109
2109 textlen = revinfo.textlen
2110 textlen = revinfo.textlen
2110 defaultmax = textlen * 4
2111 defaultmax = textlen * 4
2111 maxdist = self._maxdeltachainspan
2112 maxdist = self._maxdeltachainspan
2112 if not maxdist:
2113 if not maxdist:
2113 maxdist = deltainfo.distance # ensure the conditional pass
2114 maxdist = deltainfo.distance # ensure the conditional pass
2114 maxdist = max(maxdist, defaultmax)
2115 maxdist = max(maxdist, defaultmax)
2115 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2116 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2116 deltainfo.compresseddeltalen > textlen * 2 or
2117 deltainfo.compresseddeltalen > textlen * 2 or
2117 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2118 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2118 return False
2119 return False
2119
2120
2120 return True
2121 return True
2121
2122
2122 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2123 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2123 cachedelta, ifh, dfh, alwayscache=False,
2124 cachedelta, ifh, dfh, alwayscache=False,
2124 deltacomputer=None):
2125 deltacomputer=None):
2125 """internal function to add revisions to the log
2126 """internal function to add revisions to the log
2126
2127
2127 see addrevision for argument descriptions.
2128 see addrevision for argument descriptions.
2128
2129
2129 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2130 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2130
2131
2131 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2132 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2132 be used.
2133 be used.
2133
2134
2134 invariants:
2135 invariants:
2135 - rawtext is optional (can be None); if not set, cachedelta must be set.
2136 - rawtext is optional (can be None); if not set, cachedelta must be set.
2136 if both are set, they must correspond to each other.
2137 if both are set, they must correspond to each other.
2137 """
2138 """
2138 if node == nullid:
2139 if node == nullid:
2139 raise RevlogError(_("%s: attempt to add null revision") %
2140 raise RevlogError(_("%s: attempt to add null revision") %
2140 (self.indexfile))
2141 (self.indexfile))
2141 if node == wdirid or node in wdirfilenodeids:
2142 if node == wdirid or node in wdirfilenodeids:
2142 raise RevlogError(_("%s: attempt to add wdir revision") %
2143 raise RevlogError(_("%s: attempt to add wdir revision") %
2143 (self.indexfile))
2144 (self.indexfile))
2144
2145
2145 if self._inline:
2146 if self._inline:
2146 fh = ifh
2147 fh = ifh
2147 else:
2148 else:
2148 fh = dfh
2149 fh = dfh
2149
2150
2150 btext = [rawtext]
2151 btext = [rawtext]
2151
2152
2152 curr = len(self)
2153 curr = len(self)
2153 prev = curr - 1
2154 prev = curr - 1
2154 offset = self.end(prev)
2155 offset = self.end(prev)
2155 p1r, p2r = self.rev(p1), self.rev(p2)
2156 p1r, p2r = self.rev(p1), self.rev(p2)
2156
2157
2157 # full versions are inserted when the needed deltas
2158 # full versions are inserted when the needed deltas
2158 # become comparable to the uncompressed text
2159 # become comparable to the uncompressed text
2159 if rawtext is None:
2160 if rawtext is None:
2160 # need rawtext size, before changed by flag processors, which is
2161 # need rawtext size, before changed by flag processors, which is
2161 # the non-raw size. use revlog explicitly to avoid filelog's extra
2162 # the non-raw size. use revlog explicitly to avoid filelog's extra
2162 # logic that might remove metadata size.
2163 # logic that might remove metadata size.
2163 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2164 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2164 cachedelta[1])
2165 cachedelta[1])
2165 else:
2166 else:
2166 textlen = len(rawtext)
2167 textlen = len(rawtext)
2167
2168
2168 if deltacomputer is None:
2169 if deltacomputer is None:
2169 deltacomputer = _deltacomputer(self)
2170 deltacomputer = _deltacomputer(self)
2170
2171
2171 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2172 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2172
2173
2173 # no delta for flag processor revision (see "candelta" for why)
2174 # no delta for flag processor revision (see "candelta" for why)
2174 # not calling candelta since only one revision needs test, also to
2175 # not calling candelta since only one revision needs test, also to
2175 # avoid overhead fetching flags again.
2176 # avoid overhead fetching flags again.
2176 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2177 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2177 deltainfo = None
2178 deltainfo = None
2178 else:
2179 else:
2179 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2180 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2180
2181
2181 if deltainfo is not None:
2182 if deltainfo is not None:
2182 base = deltainfo.base
2183 base = deltainfo.base
2183 chainbase = deltainfo.chainbase
2184 chainbase = deltainfo.chainbase
2184 data = deltainfo.data
2185 data = deltainfo.data
2185 l = deltainfo.deltalen
2186 l = deltainfo.deltalen
2186 else:
2187 else:
2187 rawtext = deltacomputer.buildtext(revinfo, fh)
2188 rawtext = deltacomputer.buildtext(revinfo, fh)
2188 data = self.compress(rawtext)
2189 data = self.compress(rawtext)
2189 l = len(data[1]) + len(data[0])
2190 l = len(data[1]) + len(data[0])
2190 base = chainbase = curr
2191 base = chainbase = curr
2191
2192
2192 e = (offset_type(offset, flags), l, textlen,
2193 e = (offset_type(offset, flags), l, textlen,
2193 base, link, p1r, p2r, node)
2194 base, link, p1r, p2r, node)
2194 self.index.insert(-1, e)
2195 self.index.insert(-1, e)
2195 self.nodemap[node] = curr
2196 self.nodemap[node] = curr
2196
2197
2197 entry = self._io.packentry(e, self.node, self.version, curr)
2198 entry = self._io.packentry(e, self.node, self.version, curr)
2198 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2199 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2199
2200
2200 if alwayscache and rawtext is None:
2201 if alwayscache and rawtext is None:
2201 rawtext = deltacomputer._buildtext(revinfo, fh)
2202 rawtext = deltacomputer._buildtext(revinfo, fh)
2202
2203
2203 if type(rawtext) == bytes: # only accept immutable objects
2204 if type(rawtext) == bytes: # only accept immutable objects
2204 self._cache = (node, curr, rawtext)
2205 self._cache = (node, curr, rawtext)
2205 self._chainbasecache[curr] = chainbase
2206 self._chainbasecache[curr] = chainbase
2206 return node
2207 return node
2207
2208
2208 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2209 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2209 # Files opened in a+ mode have inconsistent behavior on various
2210 # Files opened in a+ mode have inconsistent behavior on various
2210 # platforms. Windows requires that a file positioning call be made
2211 # platforms. Windows requires that a file positioning call be made
2211 # when the file handle transitions between reads and writes. See
2212 # when the file handle transitions between reads and writes. See
2212 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2213 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2213 # platforms, Python or the platform itself can be buggy. Some versions
2214 # platforms, Python or the platform itself can be buggy. Some versions
2214 # of Solaris have been observed to not append at the end of the file
2215 # of Solaris have been observed to not append at the end of the file
2215 # if the file was seeked to before the end. See issue4943 for more.
2216 # if the file was seeked to before the end. See issue4943 for more.
2216 #
2217 #
2217 # We work around this issue by inserting a seek() before writing.
2218 # We work around this issue by inserting a seek() before writing.
2218 # Note: This is likely not necessary on Python 3.
2219 # Note: This is likely not necessary on Python 3.
2219 ifh.seek(0, os.SEEK_END)
2220 ifh.seek(0, os.SEEK_END)
2220 if dfh:
2221 if dfh:
2221 dfh.seek(0, os.SEEK_END)
2222 dfh.seek(0, os.SEEK_END)
2222
2223
2223 curr = len(self) - 1
2224 curr = len(self) - 1
2224 if not self._inline:
2225 if not self._inline:
2225 transaction.add(self.datafile, offset)
2226 transaction.add(self.datafile, offset)
2226 transaction.add(self.indexfile, curr * len(entry))
2227 transaction.add(self.indexfile, curr * len(entry))
2227 if data[0]:
2228 if data[0]:
2228 dfh.write(data[0])
2229 dfh.write(data[0])
2229 dfh.write(data[1])
2230 dfh.write(data[1])
2230 ifh.write(entry)
2231 ifh.write(entry)
2231 else:
2232 else:
2232 offset += curr * self._io.size
2233 offset += curr * self._io.size
2233 transaction.add(self.indexfile, offset, curr)
2234 transaction.add(self.indexfile, offset, curr)
2234 ifh.write(entry)
2235 ifh.write(entry)
2235 ifh.write(data[0])
2236 ifh.write(data[0])
2236 ifh.write(data[1])
2237 ifh.write(data[1])
2237 self._enforceinlinesize(transaction, ifh)
2238 self._enforceinlinesize(transaction, ifh)
2238
2239
2239 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2240 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2240 """
2241 """
2241 add a delta group
2242 add a delta group
2242
2243
2243 given a set of deltas, add them to the revision log. the
2244 given a set of deltas, add them to the revision log. the
2244 first delta is against its parent, which should be in our
2245 first delta is against its parent, which should be in our
2245 log, the rest are against the previous delta.
2246 log, the rest are against the previous delta.
2246
2247
2247 If ``addrevisioncb`` is defined, it will be called with arguments of
2248 If ``addrevisioncb`` is defined, it will be called with arguments of
2248 this revlog and the node that was added.
2249 this revlog and the node that was added.
2249 """
2250 """
2250
2251
2251 nodes = []
2252 nodes = []
2252
2253
2253 r = len(self)
2254 r = len(self)
2254 end = 0
2255 end = 0
2255 if r:
2256 if r:
2256 end = self.end(r - 1)
2257 end = self.end(r - 1)
2257 ifh = self._indexfp("a+")
2258 ifh = self._indexfp("a+")
2258 isize = r * self._io.size
2259 isize = r * self._io.size
2259 if self._inline:
2260 if self._inline:
2260 transaction.add(self.indexfile, end + isize, r)
2261 transaction.add(self.indexfile, end + isize, r)
2261 dfh = None
2262 dfh = None
2262 else:
2263 else:
2263 transaction.add(self.indexfile, isize, r)
2264 transaction.add(self.indexfile, isize, r)
2264 transaction.add(self.datafile, end)
2265 transaction.add(self.datafile, end)
2265 dfh = self._datafp("a+")
2266 dfh = self._datafp("a+")
2266 def flush():
2267 def flush():
2267 if dfh:
2268 if dfh:
2268 dfh.flush()
2269 dfh.flush()
2269 ifh.flush()
2270 ifh.flush()
2270 try:
2271 try:
2271 deltacomputer = _deltacomputer(self)
2272 deltacomputer = _deltacomputer(self)
2272 # loop through our set of deltas
2273 # loop through our set of deltas
2273 for data in deltas:
2274 for data in deltas:
2274 node, p1, p2, linknode, deltabase, delta, flags = data
2275 node, p1, p2, linknode, deltabase, delta, flags = data
2275 link = linkmapper(linknode)
2276 link = linkmapper(linknode)
2276 flags = flags or REVIDX_DEFAULT_FLAGS
2277 flags = flags or REVIDX_DEFAULT_FLAGS
2277
2278
2278 nodes.append(node)
2279 nodes.append(node)
2279
2280
2280 if node in self.nodemap:
2281 if node in self.nodemap:
2281 # this can happen if two branches make the same change
2282 # this can happen if two branches make the same change
2282 continue
2283 continue
2283
2284
2284 for p in (p1, p2):
2285 for p in (p1, p2):
2285 if p not in self.nodemap:
2286 if p not in self.nodemap:
2286 raise LookupError(p, self.indexfile,
2287 raise LookupError(p, self.indexfile,
2287 _('unknown parent'))
2288 _('unknown parent'))
2288
2289
2289 if deltabase not in self.nodemap:
2290 if deltabase not in self.nodemap:
2290 raise LookupError(deltabase, self.indexfile,
2291 raise LookupError(deltabase, self.indexfile,
2291 _('unknown delta base'))
2292 _('unknown delta base'))
2292
2293
2293 baserev = self.rev(deltabase)
2294 baserev = self.rev(deltabase)
2294
2295
2295 if baserev != nullrev and self.iscensored(baserev):
2296 if baserev != nullrev and self.iscensored(baserev):
2296 # if base is censored, delta must be full replacement in a
2297 # if base is censored, delta must be full replacement in a
2297 # single patch operation
2298 # single patch operation
2298 hlen = struct.calcsize(">lll")
2299 hlen = struct.calcsize(">lll")
2299 oldlen = self.rawsize(baserev)
2300 oldlen = self.rawsize(baserev)
2300 newlen = len(delta) - hlen
2301 newlen = len(delta) - hlen
2301 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2302 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2302 raise error.CensoredBaseError(self.indexfile,
2303 raise error.CensoredBaseError(self.indexfile,
2303 self.node(baserev))
2304 self.node(baserev))
2304
2305
2305 if not flags and self._peek_iscensored(baserev, delta, flush):
2306 if not flags and self._peek_iscensored(baserev, delta, flush):
2306 flags |= REVIDX_ISCENSORED
2307 flags |= REVIDX_ISCENSORED
2307
2308
2308 # We assume consumers of addrevisioncb will want to retrieve
2309 # We assume consumers of addrevisioncb will want to retrieve
2309 # the added revision, which will require a call to
2310 # the added revision, which will require a call to
2310 # revision(). revision() will fast path if there is a cache
2311 # revision(). revision() will fast path if there is a cache
2311 # hit. So, we tell _addrevision() to always cache in this case.
2312 # hit. So, we tell _addrevision() to always cache in this case.
2312 # We're only using addgroup() in the context of changegroup
2313 # We're only using addgroup() in the context of changegroup
2313 # generation so the revision data can always be handled as raw
2314 # generation so the revision data can always be handled as raw
2314 # by the flagprocessor.
2315 # by the flagprocessor.
2315 self._addrevision(node, None, transaction, link,
2316 self._addrevision(node, None, transaction, link,
2316 p1, p2, flags, (baserev, delta),
2317 p1, p2, flags, (baserev, delta),
2317 ifh, dfh,
2318 ifh, dfh,
2318 alwayscache=bool(addrevisioncb),
2319 alwayscache=bool(addrevisioncb),
2319 deltacomputer=deltacomputer)
2320 deltacomputer=deltacomputer)
2320
2321
2321 if addrevisioncb:
2322 if addrevisioncb:
2322 addrevisioncb(self, node)
2323 addrevisioncb(self, node)
2323
2324
2324 if not dfh and not self._inline:
2325 if not dfh and not self._inline:
2325 # addrevision switched from inline to conventional
2326 # addrevision switched from inline to conventional
2326 # reopen the index
2327 # reopen the index
2327 ifh.close()
2328 ifh.close()
2328 dfh = self._datafp("a+")
2329 dfh = self._datafp("a+")
2329 ifh = self._indexfp("a+")
2330 ifh = self._indexfp("a+")
2330 finally:
2331 finally:
2331 if dfh:
2332 if dfh:
2332 dfh.close()
2333 dfh.close()
2333 ifh.close()
2334 ifh.close()
2334
2335
2335 return nodes
2336 return nodes
2336
2337
2337 def iscensored(self, rev):
2338 def iscensored(self, rev):
2338 """Check if a file revision is censored."""
2339 """Check if a file revision is censored."""
2339 if not self._censorable:
2340 if not self._censorable:
2340 return False
2341 return False
2341
2342
2342 return self.flags(rev) & REVIDX_ISCENSORED
2343 return self.flags(rev) & REVIDX_ISCENSORED
2343
2344
2344 def _peek_iscensored(self, baserev, delta, flush):
2345 def _peek_iscensored(self, baserev, delta, flush):
2345 """Quickly check if a delta produces a censored revision."""
2346 """Quickly check if a delta produces a censored revision."""
2346 if not self._censorable:
2347 if not self._censorable:
2347 return False
2348 return False
2348
2349
2349 # Fragile heuristic: unless new file meta keys are added alphabetically
2350 # Fragile heuristic: unless new file meta keys are added alphabetically
2350 # preceding "censored", all censored revisions are prefixed by
2351 # preceding "censored", all censored revisions are prefixed by
2351 # "\1\ncensored:". A delta producing such a censored revision must be a
2352 # "\1\ncensored:". A delta producing such a censored revision must be a
2352 # full-replacement delta, so we inspect the first and only patch in the
2353 # full-replacement delta, so we inspect the first and only patch in the
2353 # delta for this prefix.
2354 # delta for this prefix.
2354 hlen = struct.calcsize(">lll")
2355 hlen = struct.calcsize(">lll")
2355 if len(delta) <= hlen:
2356 if len(delta) <= hlen:
2356 return False
2357 return False
2357
2358
2358 oldlen = self.rawsize(baserev)
2359 oldlen = self.rawsize(baserev)
2359 newlen = len(delta) - hlen
2360 newlen = len(delta) - hlen
2360 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2361 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2361 return False
2362 return False
2362
2363
2363 add = "\1\ncensored:"
2364 add = "\1\ncensored:"
2364 addlen = len(add)
2365 addlen = len(add)
2365 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2366 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2366
2367
2367 def getstrippoint(self, minlink):
2368 def getstrippoint(self, minlink):
2368 """find the minimum rev that must be stripped to strip the linkrev
2369 """find the minimum rev that must be stripped to strip the linkrev
2369
2370
2370 Returns a tuple containing the minimum rev and a set of all revs that
2371 Returns a tuple containing the minimum rev and a set of all revs that
2371 have linkrevs that will be broken by this strip.
2372 have linkrevs that will be broken by this strip.
2372 """
2373 """
2373 brokenrevs = set()
2374 brokenrevs = set()
2374 strippoint = len(self)
2375 strippoint = len(self)
2375
2376
2376 heads = {}
2377 heads = {}
2377 futurelargelinkrevs = set()
2378 futurelargelinkrevs = set()
2378 for head in self.headrevs():
2379 for head in self.headrevs():
2379 headlinkrev = self.linkrev(head)
2380 headlinkrev = self.linkrev(head)
2380 heads[head] = headlinkrev
2381 heads[head] = headlinkrev
2381 if headlinkrev >= minlink:
2382 if headlinkrev >= minlink:
2382 futurelargelinkrevs.add(headlinkrev)
2383 futurelargelinkrevs.add(headlinkrev)
2383
2384
2384 # This algorithm involves walking down the rev graph, starting at the
2385 # This algorithm involves walking down the rev graph, starting at the
2385 # heads. Since the revs are topologically sorted according to linkrev,
2386 # heads. Since the revs are topologically sorted according to linkrev,
2386 # once all head linkrevs are below the minlink, we know there are
2387 # once all head linkrevs are below the minlink, we know there are
2387 # no more revs that could have a linkrev greater than minlink.
2388 # no more revs that could have a linkrev greater than minlink.
2388 # So we can stop walking.
2389 # So we can stop walking.
2389 while futurelargelinkrevs:
2390 while futurelargelinkrevs:
2390 strippoint -= 1
2391 strippoint -= 1
2391 linkrev = heads.pop(strippoint)
2392 linkrev = heads.pop(strippoint)
2392
2393
2393 if linkrev < minlink:
2394 if linkrev < minlink:
2394 brokenrevs.add(strippoint)
2395 brokenrevs.add(strippoint)
2395 else:
2396 else:
2396 futurelargelinkrevs.remove(linkrev)
2397 futurelargelinkrevs.remove(linkrev)
2397
2398
2398 for p in self.parentrevs(strippoint):
2399 for p in self.parentrevs(strippoint):
2399 if p != nullrev:
2400 if p != nullrev:
2400 plinkrev = self.linkrev(p)
2401 plinkrev = self.linkrev(p)
2401 heads[p] = plinkrev
2402 heads[p] = plinkrev
2402 if plinkrev >= minlink:
2403 if plinkrev >= minlink:
2403 futurelargelinkrevs.add(plinkrev)
2404 futurelargelinkrevs.add(plinkrev)
2404
2405
2405 return strippoint, brokenrevs
2406 return strippoint, brokenrevs
2406
2407
2407 def strip(self, minlink, transaction):
2408 def strip(self, minlink, transaction):
2408 """truncate the revlog on the first revision with a linkrev >= minlink
2409 """truncate the revlog on the first revision with a linkrev >= minlink
2409
2410
2410 This function is called when we're stripping revision minlink and
2411 This function is called when we're stripping revision minlink and
2411 its descendants from the repository.
2412 its descendants from the repository.
2412
2413
2413 We have to remove all revisions with linkrev >= minlink, because
2414 We have to remove all revisions with linkrev >= minlink, because
2414 the equivalent changelog revisions will be renumbered after the
2415 the equivalent changelog revisions will be renumbered after the
2415 strip.
2416 strip.
2416
2417
2417 So we truncate the revlog on the first of these revisions, and
2418 So we truncate the revlog on the first of these revisions, and
2418 trust that the caller has saved the revisions that shouldn't be
2419 trust that the caller has saved the revisions that shouldn't be
2419 removed and that it'll re-add them after this truncation.
2420 removed and that it'll re-add them after this truncation.
2420 """
2421 """
2421 if len(self) == 0:
2422 if len(self) == 0:
2422 return
2423 return
2423
2424
2424 rev, _ = self.getstrippoint(minlink)
2425 rev, _ = self.getstrippoint(minlink)
2425 if rev == len(self):
2426 if rev == len(self):
2426 return
2427 return
2427
2428
2428 # first truncate the files on disk
2429 # first truncate the files on disk
2429 end = self.start(rev)
2430 end = self.start(rev)
2430 if not self._inline:
2431 if not self._inline:
2431 transaction.add(self.datafile, end)
2432 transaction.add(self.datafile, end)
2432 end = rev * self._io.size
2433 end = rev * self._io.size
2433 else:
2434 else:
2434 end += rev * self._io.size
2435 end += rev * self._io.size
2435
2436
2436 transaction.add(self.indexfile, end)
2437 transaction.add(self.indexfile, end)
2437
2438
2438 # then reset internal state in memory to forget those revisions
2439 # then reset internal state in memory to forget those revisions
2439 self._cache = None
2440 self._cache = None
2440 self._chaininfocache = {}
2441 self._chaininfocache = {}
2441 self._chunkclear()
2442 self._chunkclear()
2442 for x in xrange(rev, len(self)):
2443 for x in xrange(rev, len(self)):
2443 del self.nodemap[self.node(x)]
2444 del self.nodemap[self.node(x)]
2444
2445
2445 del self.index[rev:-1]
2446 del self.index[rev:-1]
2446 self._nodepos = None
2447 self._nodepos = None
2447
2448
2448 def checksize(self):
2449 def checksize(self):
2449 expected = 0
2450 expected = 0
2450 if len(self):
2451 if len(self):
2451 expected = max(0, self.end(len(self) - 1))
2452 expected = max(0, self.end(len(self) - 1))
2452
2453
2453 try:
2454 try:
2454 with self._datafp() as f:
2455 with self._datafp() as f:
2455 f.seek(0, 2)
2456 f.seek(0, 2)
2456 actual = f.tell()
2457 actual = f.tell()
2457 dd = actual - expected
2458 dd = actual - expected
2458 except IOError as inst:
2459 except IOError as inst:
2459 if inst.errno != errno.ENOENT:
2460 if inst.errno != errno.ENOENT:
2460 raise
2461 raise
2461 dd = 0
2462 dd = 0
2462
2463
2463 try:
2464 try:
2464 f = self.opener(self.indexfile)
2465 f = self.opener(self.indexfile)
2465 f.seek(0, 2)
2466 f.seek(0, 2)
2466 actual = f.tell()
2467 actual = f.tell()
2467 f.close()
2468 f.close()
2468 s = self._io.size
2469 s = self._io.size
2469 i = max(0, actual // s)
2470 i = max(0, actual // s)
2470 di = actual - (i * s)
2471 di = actual - (i * s)
2471 if self._inline:
2472 if self._inline:
2472 databytes = 0
2473 databytes = 0
2473 for r in self:
2474 for r in self:
2474 databytes += max(0, self.length(r))
2475 databytes += max(0, self.length(r))
2475 dd = 0
2476 dd = 0
2476 di = actual - len(self) * s - databytes
2477 di = actual - len(self) * s - databytes
2477 except IOError as inst:
2478 except IOError as inst:
2478 if inst.errno != errno.ENOENT:
2479 if inst.errno != errno.ENOENT:
2479 raise
2480 raise
2480 di = 0
2481 di = 0
2481
2482
2482 return (dd, di)
2483 return (dd, di)
2483
2484
2484 def files(self):
2485 def files(self):
2485 res = [self.indexfile]
2486 res = [self.indexfile]
2486 if not self._inline:
2487 if not self._inline:
2487 res.append(self.datafile)
2488 res.append(self.datafile)
2488 return res
2489 return res
2489
2490
2490 DELTAREUSEALWAYS = 'always'
2491 DELTAREUSEALWAYS = 'always'
2491 DELTAREUSESAMEREVS = 'samerevs'
2492 DELTAREUSESAMEREVS = 'samerevs'
2492 DELTAREUSENEVER = 'never'
2493 DELTAREUSENEVER = 'never'
2493
2494
2494 DELTAREUSEFULLADD = 'fulladd'
2495 DELTAREUSEFULLADD = 'fulladd'
2495
2496
2496 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2497 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2497
2498
2498 def clone(self, tr, destrevlog, addrevisioncb=None,
2499 def clone(self, tr, destrevlog, addrevisioncb=None,
2499 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2500 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2500 """Copy this revlog to another, possibly with format changes.
2501 """Copy this revlog to another, possibly with format changes.
2501
2502
2502 The destination revlog will contain the same revisions and nodes.
2503 The destination revlog will contain the same revisions and nodes.
2503 However, it may not be bit-for-bit identical due to e.g. delta encoding
2504 However, it may not be bit-for-bit identical due to e.g. delta encoding
2504 differences.
2505 differences.
2505
2506
2506 The ``deltareuse`` argument control how deltas from the existing revlog
2507 The ``deltareuse`` argument control how deltas from the existing revlog
2507 are preserved in the destination revlog. The argument can have the
2508 are preserved in the destination revlog. The argument can have the
2508 following values:
2509 following values:
2509
2510
2510 DELTAREUSEALWAYS
2511 DELTAREUSEALWAYS
2511 Deltas will always be reused (if possible), even if the destination
2512 Deltas will always be reused (if possible), even if the destination
2512 revlog would not select the same revisions for the delta. This is the
2513 revlog would not select the same revisions for the delta. This is the
2513 fastest mode of operation.
2514 fastest mode of operation.
2514 DELTAREUSESAMEREVS
2515 DELTAREUSESAMEREVS
2515 Deltas will be reused if the destination revlog would pick the same
2516 Deltas will be reused if the destination revlog would pick the same
2516 revisions for the delta. This mode strikes a balance between speed
2517 revisions for the delta. This mode strikes a balance between speed
2517 and optimization.
2518 and optimization.
2518 DELTAREUSENEVER
2519 DELTAREUSENEVER
2519 Deltas will never be reused. This is the slowest mode of execution.
2520 Deltas will never be reused. This is the slowest mode of execution.
2520 This mode can be used to recompute deltas (e.g. if the diff/delta
2521 This mode can be used to recompute deltas (e.g. if the diff/delta
2521 algorithm changes).
2522 algorithm changes).
2522
2523
2523 Delta computation can be slow, so the choice of delta reuse policy can
2524 Delta computation can be slow, so the choice of delta reuse policy can
2524 significantly affect run time.
2525 significantly affect run time.
2525
2526
2526 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2527 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2527 two extremes. Deltas will be reused if they are appropriate. But if the
2528 two extremes. Deltas will be reused if they are appropriate. But if the
2528 delta could choose a better revision, it will do so. This means if you
2529 delta could choose a better revision, it will do so. This means if you
2529 are converting a non-generaldelta revlog to a generaldelta revlog,
2530 are converting a non-generaldelta revlog to a generaldelta revlog,
2530 deltas will be recomputed if the delta's parent isn't a parent of the
2531 deltas will be recomputed if the delta's parent isn't a parent of the
2531 revision.
2532 revision.
2532
2533
2533 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2534 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2534 controls whether to compute deltas against both parents for merges.
2535 controls whether to compute deltas against both parents for merges.
2535 By default, the current default is used.
2536 By default, the current default is used.
2536 """
2537 """
2537 if deltareuse not in self.DELTAREUSEALL:
2538 if deltareuse not in self.DELTAREUSEALL:
2538 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2539 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2539
2540
2540 if len(destrevlog):
2541 if len(destrevlog):
2541 raise ValueError(_('destination revlog is not empty'))
2542 raise ValueError(_('destination revlog is not empty'))
2542
2543
2543 if getattr(self, 'filteredrevs', None):
2544 if getattr(self, 'filteredrevs', None):
2544 raise ValueError(_('source revlog has filtered revisions'))
2545 raise ValueError(_('source revlog has filtered revisions'))
2545 if getattr(destrevlog, 'filteredrevs', None):
2546 if getattr(destrevlog, 'filteredrevs', None):
2546 raise ValueError(_('destination revlog has filtered revisions'))
2547 raise ValueError(_('destination revlog has filtered revisions'))
2547
2548
2548 # lazydeltabase controls whether to reuse a cached delta, if possible.
2549 # lazydeltabase controls whether to reuse a cached delta, if possible.
2549 oldlazydeltabase = destrevlog._lazydeltabase
2550 oldlazydeltabase = destrevlog._lazydeltabase
2550 oldamd = destrevlog._aggressivemergedeltas
2551 oldamd = destrevlog._aggressivemergedeltas
2551
2552
2552 try:
2553 try:
2553 if deltareuse == self.DELTAREUSEALWAYS:
2554 if deltareuse == self.DELTAREUSEALWAYS:
2554 destrevlog._lazydeltabase = True
2555 destrevlog._lazydeltabase = True
2555 elif deltareuse == self.DELTAREUSESAMEREVS:
2556 elif deltareuse == self.DELTAREUSESAMEREVS:
2556 destrevlog._lazydeltabase = False
2557 destrevlog._lazydeltabase = False
2557
2558
2558 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2559 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2559
2560
2560 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2561 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2561 self.DELTAREUSESAMEREVS)
2562 self.DELTAREUSESAMEREVS)
2562
2563
2563 deltacomputer = _deltacomputer(destrevlog)
2564 deltacomputer = _deltacomputer(destrevlog)
2564 index = self.index
2565 index = self.index
2565 for rev in self:
2566 for rev in self:
2566 entry = index[rev]
2567 entry = index[rev]
2567
2568
2568 # Some classes override linkrev to take filtered revs into
2569 # Some classes override linkrev to take filtered revs into
2569 # account. Use raw entry from index.
2570 # account. Use raw entry from index.
2570 flags = entry[0] & 0xffff
2571 flags = entry[0] & 0xffff
2571 linkrev = entry[4]
2572 linkrev = entry[4]
2572 p1 = index[entry[5]][7]
2573 p1 = index[entry[5]][7]
2573 p2 = index[entry[6]][7]
2574 p2 = index[entry[6]][7]
2574 node = entry[7]
2575 node = entry[7]
2575
2576
2576 # (Possibly) reuse the delta from the revlog if allowed and
2577 # (Possibly) reuse the delta from the revlog if allowed and
2577 # the revlog chunk is a delta.
2578 # the revlog chunk is a delta.
2578 cachedelta = None
2579 cachedelta = None
2579 rawtext = None
2580 rawtext = None
2580 if populatecachedelta:
2581 if populatecachedelta:
2581 dp = self.deltaparent(rev)
2582 dp = self.deltaparent(rev)
2582 if dp != nullrev:
2583 if dp != nullrev:
2583 cachedelta = (dp, bytes(self._chunk(rev)))
2584 cachedelta = (dp, bytes(self._chunk(rev)))
2584
2585
2585 if not cachedelta:
2586 if not cachedelta:
2586 rawtext = self.revision(rev, raw=True)
2587 rawtext = self.revision(rev, raw=True)
2587
2588
2588
2589
2589 if deltareuse == self.DELTAREUSEFULLADD:
2590 if deltareuse == self.DELTAREUSEFULLADD:
2590 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2591 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2591 cachedelta=cachedelta,
2592 cachedelta=cachedelta,
2592 node=node, flags=flags,
2593 node=node, flags=flags,
2593 deltacomputer=deltacomputer)
2594 deltacomputer=deltacomputer)
2594 else:
2595 else:
2595 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2596 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2596 checkambig=False)
2597 checkambig=False)
2597 dfh = None
2598 dfh = None
2598 if not destrevlog._inline:
2599 if not destrevlog._inline:
2599 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2600 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2600 try:
2601 try:
2601 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2602 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2602 p2, flags, cachedelta, ifh, dfh,
2603 p2, flags, cachedelta, ifh, dfh,
2603 deltacomputer=deltacomputer)
2604 deltacomputer=deltacomputer)
2604 finally:
2605 finally:
2605 if dfh:
2606 if dfh:
2606 dfh.close()
2607 dfh.close()
2607 ifh.close()
2608 ifh.close()
2608
2609
2609 if addrevisioncb:
2610 if addrevisioncb:
2610 addrevisioncb(self, rev, node)
2611 addrevisioncb(self, rev, node)
2611 finally:
2612 finally:
2612 destrevlog._lazydeltabase = oldlazydeltabase
2613 destrevlog._lazydeltabase = oldlazydeltabase
2613 destrevlog._aggressivemergedeltas = oldamd
2614 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now