##// END OF EJS Templates
revlog: suggest other parent when a parent was refused for a delta (issue5481)...
Paul Morelle -
r38114:9bf0bd4d default
parent child Browse files
Show More
@@ -1,2603 +1,2610
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 chainpayload = sum(length(r) for r in revs)
230 chainpayload = sum(length(r) for r in revs)
231
231
232 if deltachainspan:
232 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
233 density = chainpayload / float(deltachainspan)
234 else:
234 else:
235 density = 1.0
235 density = 1.0
236
236
237 # Store the gaps in a heap to have them sorted by decreasing size
237 # Store the gaps in a heap to have them sorted by decreasing size
238 gapsheap = []
238 gapsheap = []
239 heapq.heapify(gapsheap)
239 heapq.heapify(gapsheap)
240 prevend = None
240 prevend = None
241 for i, rev in enumerate(revs):
241 for i, rev in enumerate(revs):
242 revstart = start(rev)
242 revstart = start(rev)
243 revlen = length(rev)
243 revlen = length(rev)
244
244
245 # Skip empty revisions to form larger holes
245 # Skip empty revisions to form larger holes
246 if revlen == 0:
246 if revlen == 0:
247 continue
247 continue
248
248
249 if prevend is not None:
249 if prevend is not None:
250 gapsize = revstart - prevend
250 gapsize = revstart - prevend
251 # only consider holes that are large enough
251 # only consider holes that are large enough
252 if gapsize > revlog._srmingapsize:
252 if gapsize > revlog._srmingapsize:
253 heapq.heappush(gapsheap, (-gapsize, i))
253 heapq.heappush(gapsheap, (-gapsize, i))
254
254
255 prevend = revstart + revlen
255 prevend = revstart + revlen
256
256
257 # Collect the indices of the largest holes until the density is acceptable
257 # Collect the indices of the largest holes until the density is acceptable
258 indicesheap = []
258 indicesheap = []
259 heapq.heapify(indicesheap)
259 heapq.heapify(indicesheap)
260 while gapsheap and density < revlog._srdensitythreshold:
260 while gapsheap and density < revlog._srdensitythreshold:
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262
262
263 heapq.heappush(indicesheap, gapidx)
263 heapq.heappush(indicesheap, gapidx)
264
264
265 # the gap sizes are stored as negatives to be sorted decreasingly
265 # the gap sizes are stored as negatives to be sorted decreasingly
266 # by the heap
266 # by the heap
267 readdata -= (-oppgapsize)
267 readdata -= (-oppgapsize)
268 if readdata > 0:
268 if readdata > 0:
269 density = chainpayload / float(readdata)
269 density = chainpayload / float(readdata)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 # Cut the revs at collected indices
273 # Cut the revs at collected indices
274 previdx = 0
274 previdx = 0
275 while indicesheap:
275 while indicesheap:
276 idx = heapq.heappop(indicesheap)
276 idx = heapq.heappop(indicesheap)
277
277
278 chunk = _trimchunk(revlog, revs, previdx, idx)
278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 if chunk:
279 if chunk:
280 yield chunk
280 yield chunk
281
281
282 previdx = idx
282 previdx = idx
283
283
284 chunk = _trimchunk(revlog, revs, previdx)
284 chunk = _trimchunk(revlog, revs, previdx)
285 if chunk:
285 if chunk:
286 yield chunk
286 yield chunk
287
287
288 @attr.s(slots=True, frozen=True)
288 @attr.s(slots=True, frozen=True)
289 class _deltainfo(object):
289 class _deltainfo(object):
290 distance = attr.ib()
290 distance = attr.ib()
291 deltalen = attr.ib()
291 deltalen = attr.ib()
292 data = attr.ib()
292 data = attr.ib()
293 base = attr.ib()
293 base = attr.ib()
294 chainbase = attr.ib()
294 chainbase = attr.ib()
295 chainlen = attr.ib()
295 chainlen = attr.ib()
296 compresseddeltalen = attr.ib()
296 compresseddeltalen = attr.ib()
297
297
298 class _deltacomputer(object):
298 class _deltacomputer(object):
299 def __init__(self, revlog):
299 def __init__(self, revlog):
300 self.revlog = revlog
300 self.revlog = revlog
301
301
302 def _getcandidaterevs(self, p1, p2, cachedelta):
302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 """
303 """
304 Provides revisions that present an interest to be diffed against,
304 Provides revisions that present an interest to be diffed against,
305 grouped by level of easiness.
305 grouped by level of easiness.
306 """
306 """
307 revlog = self.revlog
307 revlog = self.revlog
308 curr = len(revlog)
308 curr = len(revlog)
309 prev = curr - 1
309 prev = curr - 1
310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311
311
312 # should we try to build a delta?
312 # should we try to build a delta?
313 if prev != nullrev and revlog.storedeltachains:
313 if prev != nullrev and revlog.storedeltachains:
314 tested = set()
314 tested = set()
315 # This condition is true most of the time when processing
315 # This condition is true most of the time when processing
316 # changegroup data into a generaldelta repo. The only time it
316 # changegroup data into a generaldelta repo. The only time it
317 # isn't true is if this is the first revision in a delta chain
317 # isn't true is if this is the first revision in a delta chain
318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 # Assume what we received from the server is a good choice
320 # Assume what we received from the server is a good choice
321 # build delta will reuse the cache
321 # build delta will reuse the cache
322 yield (cachedelta[0],)
322 yield (cachedelta[0],)
323 tested.add(cachedelta[0])
323 tested.add(cachedelta[0])
324
324
325 if revlog._generaldelta:
325 if revlog._generaldelta:
326 # exclude already lazy tested base if any
326 # exclude already lazy tested base if any
327 parents = [p for p in (p1r, p2r)
327 parents = [p for p in (p1r, p2r)
328 if p != nullrev and p not in tested]
328 if p != nullrev and p not in tested]
329 if parents and not revlog._aggressivemergedeltas:
329
330 # Pick whichever parent is closer to us (to minimize the
330 if not revlog._aggressivemergedeltas and len(parents) == 2:
331 # chance of having to build a fulltext).
331 parents.sort()
332 parents = [max(parents)]
332 # To minimize the chance of having to build a fulltext,
333 tested.update(parents)
333 # pick first whichever parent is closest to us (max rev)
334 yield parents
334 yield (parents[1],)
335 # then the other one (min rev) if the first did not fit
336 yield (parents[0],)
337 tested.update(parents)
338 elif len(parents) > 0:
339 # Test all parents (1 or 2), and keep the best candidate
340 yield parents
341 tested.update(parents)
335
342
336 if prev not in tested:
343 if prev not in tested:
337 # other approach failed try against prev to hopefully save us a
344 # other approach failed try against prev to hopefully save us a
338 # fulltext.
345 # fulltext.
339 yield (prev,)
346 yield (prev,)
340
347
341 def buildtext(self, revinfo, fh):
348 def buildtext(self, revinfo, fh):
342 """Builds a fulltext version of a revision
349 """Builds a fulltext version of a revision
343
350
344 revinfo: _revisioninfo instance that contains all needed info
351 revinfo: _revisioninfo instance that contains all needed info
345 fh: file handle to either the .i or the .d revlog file,
352 fh: file handle to either the .i or the .d revlog file,
346 depending on whether it is inlined or not
353 depending on whether it is inlined or not
347 """
354 """
348 btext = revinfo.btext
355 btext = revinfo.btext
349 if btext[0] is not None:
356 if btext[0] is not None:
350 return btext[0]
357 return btext[0]
351
358
352 revlog = self.revlog
359 revlog = self.revlog
353 cachedelta = revinfo.cachedelta
360 cachedelta = revinfo.cachedelta
354 flags = revinfo.flags
361 flags = revinfo.flags
355 node = revinfo.node
362 node = revinfo.node
356
363
357 baserev = cachedelta[0]
364 baserev = cachedelta[0]
358 delta = cachedelta[1]
365 delta = cachedelta[1]
359 # special case deltas which replace entire base; no need to decode
366 # special case deltas which replace entire base; no need to decode
360 # base revision. this neatly avoids censored bases, which throw when
367 # base revision. this neatly avoids censored bases, which throw when
361 # they're decoded.
368 # they're decoded.
362 hlen = struct.calcsize(">lll")
369 hlen = struct.calcsize(">lll")
363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
370 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 len(delta) - hlen):
371 len(delta) - hlen):
365 btext[0] = delta[hlen:]
372 btext[0] = delta[hlen:]
366 else:
373 else:
367 # deltabase is rawtext before changed by flag processors, which is
374 # deltabase is rawtext before changed by flag processors, which is
368 # equivalent to non-raw text
375 # equivalent to non-raw text
369 basetext = revlog.revision(baserev, _df=fh, raw=False)
376 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 btext[0] = mdiff.patch(basetext, delta)
377 btext[0] = mdiff.patch(basetext, delta)
371
378
372 try:
379 try:
373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
380 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 btext[0], validatehash = res
381 btext[0], validatehash = res
375 if validatehash:
382 if validatehash:
376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
383 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 if flags & REVIDX_ISCENSORED:
384 if flags & REVIDX_ISCENSORED:
378 raise RevlogError(_('node %s is not censored') % node)
385 raise RevlogError(_('node %s is not censored') % node)
379 except CensoredNodeError:
386 except CensoredNodeError:
380 # must pass the censored index flag to add censored revisions
387 # must pass the censored index flag to add censored revisions
381 if not flags & REVIDX_ISCENSORED:
388 if not flags & REVIDX_ISCENSORED:
382 raise
389 raise
383 return btext[0]
390 return btext[0]
384
391
385 def _builddeltadiff(self, base, revinfo, fh):
392 def _builddeltadiff(self, base, revinfo, fh):
386 revlog = self.revlog
393 revlog = self.revlog
387 t = self.buildtext(revinfo, fh)
394 t = self.buildtext(revinfo, fh)
388 if revlog.iscensored(base):
395 if revlog.iscensored(base):
389 # deltas based on a censored revision must replace the
396 # deltas based on a censored revision must replace the
390 # full content in one patch, so delta works everywhere
397 # full content in one patch, so delta works everywhere
391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
398 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 delta = header + t
399 delta = header + t
393 else:
400 else:
394 ptext = revlog.revision(base, _df=fh, raw=True)
401 ptext = revlog.revision(base, _df=fh, raw=True)
395 delta = mdiff.textdiff(ptext, t)
402 delta = mdiff.textdiff(ptext, t)
396
403
397 return delta
404 return delta
398
405
399 def _builddeltainfo(self, revinfo, base, fh):
406 def _builddeltainfo(self, revinfo, base, fh):
400 # can we use the cached delta?
407 # can we use the cached delta?
401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
408 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 delta = revinfo.cachedelta[1]
409 delta = revinfo.cachedelta[1]
403 else:
410 else:
404 delta = self._builddeltadiff(base, revinfo, fh)
411 delta = self._builddeltadiff(base, revinfo, fh)
405 revlog = self.revlog
412 revlog = self.revlog
406 header, data = revlog.compress(delta)
413 header, data = revlog.compress(delta)
407 deltalen = len(header) + len(data)
414 deltalen = len(header) + len(data)
408 chainbase = revlog.chainbase(base)
415 chainbase = revlog.chainbase(base)
409 offset = revlog.end(len(revlog) - 1)
416 offset = revlog.end(len(revlog) - 1)
410 dist = deltalen + offset - revlog.start(chainbase)
417 dist = deltalen + offset - revlog.start(chainbase)
411 if revlog._generaldelta:
418 if revlog._generaldelta:
412 deltabase = base
419 deltabase = base
413 else:
420 else:
414 deltabase = chainbase
421 deltabase = chainbase
415 chainlen, compresseddeltalen = revlog._chaininfo(base)
422 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 chainlen += 1
423 chainlen += 1
417 compresseddeltalen += deltalen
424 compresseddeltalen += deltalen
418 return _deltainfo(dist, deltalen, (header, data), deltabase,
425 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 chainbase, chainlen, compresseddeltalen)
426 chainbase, chainlen, compresseddeltalen)
420
427
421 def finddeltainfo(self, revinfo, fh):
428 def finddeltainfo(self, revinfo, fh):
422 """Find an acceptable delta against a candidate revision
429 """Find an acceptable delta against a candidate revision
423
430
424 revinfo: information about the revision (instance of _revisioninfo)
431 revinfo: information about the revision (instance of _revisioninfo)
425 fh: file handle to either the .i or the .d revlog file,
432 fh: file handle to either the .i or the .d revlog file,
426 depending on whether it is inlined or not
433 depending on whether it is inlined or not
427
434
428 Returns the first acceptable candidate revision, as ordered by
435 Returns the first acceptable candidate revision, as ordered by
429 _getcandidaterevs
436 _getcandidaterevs
430 """
437 """
431 cachedelta = revinfo.cachedelta
438 cachedelta = revinfo.cachedelta
432 p1 = revinfo.p1
439 p1 = revinfo.p1
433 p2 = revinfo.p2
440 p2 = revinfo.p2
434 revlog = self.revlog
441 revlog = self.revlog
435
442
436 deltainfo = None
443 deltainfo = None
437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
444 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 nominateddeltas = []
445 nominateddeltas = []
439 for candidaterev in candidaterevs:
446 for candidaterev in candidaterevs:
440 # no delta for rawtext-changing revs (see "candelta" for why)
447 # no delta for rawtext-changing revs (see "candelta" for why)
441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
448 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 continue
449 continue
443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
450 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
451 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 nominateddeltas.append(candidatedelta)
452 nominateddeltas.append(candidatedelta)
446 if nominateddeltas:
453 if nominateddeltas:
447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
454 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 break
455 break
449
456
450 return deltainfo
457 return deltainfo
451
458
452 @attr.s(slots=True, frozen=True)
459 @attr.s(slots=True, frozen=True)
453 class _revisioninfo(object):
460 class _revisioninfo(object):
454 """Information about a revision that allows building its fulltext
461 """Information about a revision that allows building its fulltext
455 node: expected hash of the revision
462 node: expected hash of the revision
456 p1, p2: parent revs of the revision
463 p1, p2: parent revs of the revision
457 btext: built text cache consisting of a one-element list
464 btext: built text cache consisting of a one-element list
458 cachedelta: (baserev, uncompressed_delta) or None
465 cachedelta: (baserev, uncompressed_delta) or None
459 flags: flags associated to the revision storage
466 flags: flags associated to the revision storage
460
467
461 One of btext[0] or cachedelta must be set.
468 One of btext[0] or cachedelta must be set.
462 """
469 """
463 node = attr.ib()
470 node = attr.ib()
464 p1 = attr.ib()
471 p1 = attr.ib()
465 p2 = attr.ib()
472 p2 = attr.ib()
466 btext = attr.ib()
473 btext = attr.ib()
467 textlen = attr.ib()
474 textlen = attr.ib()
468 cachedelta = attr.ib()
475 cachedelta = attr.ib()
469 flags = attr.ib()
476 flags = attr.ib()
470
477
471 # index v0:
478 # index v0:
472 # 4 bytes: offset
479 # 4 bytes: offset
473 # 4 bytes: compressed length
480 # 4 bytes: compressed length
474 # 4 bytes: base rev
481 # 4 bytes: base rev
475 # 4 bytes: link rev
482 # 4 bytes: link rev
476 # 20 bytes: parent 1 nodeid
483 # 20 bytes: parent 1 nodeid
477 # 20 bytes: parent 2 nodeid
484 # 20 bytes: parent 2 nodeid
478 # 20 bytes: nodeid
485 # 20 bytes: nodeid
479 indexformatv0 = struct.Struct(">4l20s20s20s")
486 indexformatv0 = struct.Struct(">4l20s20s20s")
480 indexformatv0_pack = indexformatv0.pack
487 indexformatv0_pack = indexformatv0.pack
481 indexformatv0_unpack = indexformatv0.unpack
488 indexformatv0_unpack = indexformatv0.unpack
482
489
483 class revlogoldio(object):
490 class revlogoldio(object):
484 def __init__(self):
491 def __init__(self):
485 self.size = indexformatv0.size
492 self.size = indexformatv0.size
486
493
487 def parseindex(self, data, inline):
494 def parseindex(self, data, inline):
488 s = self.size
495 s = self.size
489 index = []
496 index = []
490 nodemap = {nullid: nullrev}
497 nodemap = {nullid: nullrev}
491 n = off = 0
498 n = off = 0
492 l = len(data)
499 l = len(data)
493 while off + s <= l:
500 while off + s <= l:
494 cur = data[off:off + s]
501 cur = data[off:off + s]
495 off += s
502 off += s
496 e = indexformatv0_unpack(cur)
503 e = indexformatv0_unpack(cur)
497 # transform to revlogv1 format
504 # transform to revlogv1 format
498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
505 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
506 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 index.append(e2)
507 index.append(e2)
501 nodemap[e[6]] = n
508 nodemap[e[6]] = n
502 n += 1
509 n += 1
503
510
504 # add the magic null revision at -1
511 # add the magic null revision at -1
505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
512 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506
513
507 return index, nodemap, None
514 return index, nodemap, None
508
515
509 def packentry(self, entry, node, version, rev):
516 def packentry(self, entry, node, version, rev):
510 if gettype(entry[0]):
517 if gettype(entry[0]):
511 raise RevlogError(_('index entry flags need revlog version 1'))
518 raise RevlogError(_('index entry flags need revlog version 1'))
512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
519 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 node(entry[5]), node(entry[6]), entry[7])
520 node(entry[5]), node(entry[6]), entry[7])
514 return indexformatv0_pack(*e2)
521 return indexformatv0_pack(*e2)
515
522
516 # index ng:
523 # index ng:
517 # 6 bytes: offset
524 # 6 bytes: offset
518 # 2 bytes: flags
525 # 2 bytes: flags
519 # 4 bytes: compressed length
526 # 4 bytes: compressed length
520 # 4 bytes: uncompressed length
527 # 4 bytes: uncompressed length
521 # 4 bytes: base rev
528 # 4 bytes: base rev
522 # 4 bytes: link rev
529 # 4 bytes: link rev
523 # 4 bytes: parent 1 rev
530 # 4 bytes: parent 1 rev
524 # 4 bytes: parent 2 rev
531 # 4 bytes: parent 2 rev
525 # 32 bytes: nodeid
532 # 32 bytes: nodeid
526 indexformatng = struct.Struct(">Qiiiiii20s12x")
533 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 indexformatng_pack = indexformatng.pack
534 indexformatng_pack = indexformatng.pack
528 versionformat = struct.Struct(">I")
535 versionformat = struct.Struct(">I")
529 versionformat_pack = versionformat.pack
536 versionformat_pack = versionformat.pack
530 versionformat_unpack = versionformat.unpack
537 versionformat_unpack = versionformat.unpack
531
538
532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
539 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 # signed integer)
540 # signed integer)
534 _maxentrysize = 0x7fffffff
541 _maxentrysize = 0x7fffffff
535
542
536 class revlogio(object):
543 class revlogio(object):
537 def __init__(self):
544 def __init__(self):
538 self.size = indexformatng.size
545 self.size = indexformatng.size
539
546
540 def parseindex(self, data, inline):
547 def parseindex(self, data, inline):
541 # call the C implementation to parse the index data
548 # call the C implementation to parse the index data
542 index, cache = parsers.parse_index2(data, inline)
549 index, cache = parsers.parse_index2(data, inline)
543 return index, getattr(index, 'nodemap', None), cache
550 return index, getattr(index, 'nodemap', None), cache
544
551
545 def packentry(self, entry, node, version, rev):
552 def packentry(self, entry, node, version, rev):
546 p = indexformatng_pack(*entry)
553 p = indexformatng_pack(*entry)
547 if rev == 0:
554 if rev == 0:
548 p = versionformat_pack(version) + p[4:]
555 p = versionformat_pack(version) + p[4:]
549 return p
556 return p
550
557
551 class revlog(object):
558 class revlog(object):
552 """
559 """
553 the underlying revision storage object
560 the underlying revision storage object
554
561
555 A revlog consists of two parts, an index and the revision data.
562 A revlog consists of two parts, an index and the revision data.
556
563
557 The index is a file with a fixed record size containing
564 The index is a file with a fixed record size containing
558 information on each revision, including its nodeid (hash), the
565 information on each revision, including its nodeid (hash), the
559 nodeids of its parents, the position and offset of its data within
566 nodeids of its parents, the position and offset of its data within
560 the data file, and the revision it's based on. Finally, each entry
567 the data file, and the revision it's based on. Finally, each entry
561 contains a linkrev entry that can serve as a pointer to external
568 contains a linkrev entry that can serve as a pointer to external
562 data.
569 data.
563
570
564 The revision data itself is a linear collection of data chunks.
571 The revision data itself is a linear collection of data chunks.
565 Each chunk represents a revision and is usually represented as a
572 Each chunk represents a revision and is usually represented as a
566 delta against the previous chunk. To bound lookup time, runs of
573 delta against the previous chunk. To bound lookup time, runs of
567 deltas are limited to about 2 times the length of the original
574 deltas are limited to about 2 times the length of the original
568 version data. This makes retrieval of a version proportional to
575 version data. This makes retrieval of a version proportional to
569 its size, or O(1) relative to the number of revisions.
576 its size, or O(1) relative to the number of revisions.
570
577
571 Both pieces of the revlog are written to in an append-only
578 Both pieces of the revlog are written to in an append-only
572 fashion, which means we never need to rewrite a file to insert or
579 fashion, which means we never need to rewrite a file to insert or
573 remove data, and can use some simple techniques to avoid the need
580 remove data, and can use some simple techniques to avoid the need
574 for locking while reading.
581 for locking while reading.
575
582
576 If checkambig, indexfile is opened with checkambig=True at
583 If checkambig, indexfile is opened with checkambig=True at
577 writing, to avoid file stat ambiguity.
584 writing, to avoid file stat ambiguity.
578
585
579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
586 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 index will be mmapped rather than read if it is larger than the
587 index will be mmapped rather than read if it is larger than the
581 configured threshold.
588 configured threshold.
582
589
583 If censorable is True, the revlog can have censored revisions.
590 If censorable is True, the revlog can have censored revisions.
584 """
591 """
585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
592 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 mmaplargeindex=False, censorable=False):
593 mmaplargeindex=False, censorable=False):
587 """
594 """
588 create a revlog object
595 create a revlog object
589
596
590 opener is a function that abstracts the file opening operation
597 opener is a function that abstracts the file opening operation
591 and can be used to implement COW semantics or the like.
598 and can be used to implement COW semantics or the like.
592 """
599 """
593 self.indexfile = indexfile
600 self.indexfile = indexfile
594 self.datafile = datafile or (indexfile[:-2] + ".d")
601 self.datafile = datafile or (indexfile[:-2] + ".d")
595 self.opener = opener
602 self.opener = opener
596 # When True, indexfile is opened with checkambig=True at writing, to
603 # When True, indexfile is opened with checkambig=True at writing, to
597 # avoid file stat ambiguity.
604 # avoid file stat ambiguity.
598 self._checkambig = checkambig
605 self._checkambig = checkambig
599 self._censorable = censorable
606 self._censorable = censorable
600 # 3-tuple of (node, rev, text) for a raw revision.
607 # 3-tuple of (node, rev, text) for a raw revision.
601 self._cache = None
608 self._cache = None
602 # Maps rev to chain base rev.
609 # Maps rev to chain base rev.
603 self._chainbasecache = util.lrucachedict(100)
610 self._chainbasecache = util.lrucachedict(100)
604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
611 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 self._chunkcache = (0, '')
612 self._chunkcache = (0, '')
606 # How much data to read and cache into the raw revlog data cache.
613 # How much data to read and cache into the raw revlog data cache.
607 self._chunkcachesize = 65536
614 self._chunkcachesize = 65536
608 self._maxchainlen = None
615 self._maxchainlen = None
609 self._aggressivemergedeltas = False
616 self._aggressivemergedeltas = False
610 self.index = []
617 self.index = []
611 # Mapping of partial identifiers to full nodes.
618 # Mapping of partial identifiers to full nodes.
612 self._pcache = {}
619 self._pcache = {}
613 # Mapping of revision integer to full node.
620 # Mapping of revision integer to full node.
614 self._nodecache = {nullid: nullrev}
621 self._nodecache = {nullid: nullrev}
615 self._nodepos = None
622 self._nodepos = None
616 self._compengine = 'zlib'
623 self._compengine = 'zlib'
617 self._maxdeltachainspan = -1
624 self._maxdeltachainspan = -1
618 self._withsparseread = False
625 self._withsparseread = False
619 self._srdensitythreshold = 0.25
626 self._srdensitythreshold = 0.25
620 self._srmingapsize = 262144
627 self._srmingapsize = 262144
621
628
622 mmapindexthreshold = None
629 mmapindexthreshold = None
623 v = REVLOG_DEFAULT_VERSION
630 v = REVLOG_DEFAULT_VERSION
624 opts = getattr(opener, 'options', None)
631 opts = getattr(opener, 'options', None)
625 if opts is not None:
632 if opts is not None:
626 if 'revlogv2' in opts:
633 if 'revlogv2' in opts:
627 # version 2 revlogs always use generaldelta.
634 # version 2 revlogs always use generaldelta.
628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
635 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 elif 'revlogv1' in opts:
636 elif 'revlogv1' in opts:
630 if 'generaldelta' in opts:
637 if 'generaldelta' in opts:
631 v |= FLAG_GENERALDELTA
638 v |= FLAG_GENERALDELTA
632 else:
639 else:
633 v = 0
640 v = 0
634 if 'chunkcachesize' in opts:
641 if 'chunkcachesize' in opts:
635 self._chunkcachesize = opts['chunkcachesize']
642 self._chunkcachesize = opts['chunkcachesize']
636 if 'maxchainlen' in opts:
643 if 'maxchainlen' in opts:
637 self._maxchainlen = opts['maxchainlen']
644 self._maxchainlen = opts['maxchainlen']
638 if 'aggressivemergedeltas' in opts:
645 if 'aggressivemergedeltas' in opts:
639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
646 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
647 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 if 'compengine' in opts:
648 if 'compengine' in opts:
642 self._compengine = opts['compengine']
649 self._compengine = opts['compengine']
643 if 'maxdeltachainspan' in opts:
650 if 'maxdeltachainspan' in opts:
644 self._maxdeltachainspan = opts['maxdeltachainspan']
651 self._maxdeltachainspan = opts['maxdeltachainspan']
645 if mmaplargeindex and 'mmapindexthreshold' in opts:
652 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 mmapindexthreshold = opts['mmapindexthreshold']
653 mmapindexthreshold = opts['mmapindexthreshold']
647 self._withsparseread = bool(opts.get('with-sparse-read', False))
654 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 if 'sparse-read-density-threshold' in opts:
655 if 'sparse-read-density-threshold' in opts:
649 self._srdensitythreshold = opts['sparse-read-density-threshold']
656 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 if 'sparse-read-min-gap-size' in opts:
657 if 'sparse-read-min-gap-size' in opts:
651 self._srmingapsize = opts['sparse-read-min-gap-size']
658 self._srmingapsize = opts['sparse-read-min-gap-size']
652
659
653 if self._chunkcachesize <= 0:
660 if self._chunkcachesize <= 0:
654 raise RevlogError(_('revlog chunk cache size %r is not greater '
661 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 'than 0') % self._chunkcachesize)
662 'than 0') % self._chunkcachesize)
656 elif self._chunkcachesize & (self._chunkcachesize - 1):
663 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 raise RevlogError(_('revlog chunk cache size %r is not a power '
664 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 'of 2') % self._chunkcachesize)
665 'of 2') % self._chunkcachesize)
659
666
660 indexdata = ''
667 indexdata = ''
661 self._initempty = True
668 self._initempty = True
662 try:
669 try:
663 with self._indexfp() as f:
670 with self._indexfp() as f:
664 if (mmapindexthreshold is not None and
671 if (mmapindexthreshold is not None and
665 self.opener.fstat(f).st_size >= mmapindexthreshold):
672 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 indexdata = util.buffer(util.mmapread(f))
673 indexdata = util.buffer(util.mmapread(f))
667 else:
674 else:
668 indexdata = f.read()
675 indexdata = f.read()
669 if len(indexdata) > 0:
676 if len(indexdata) > 0:
670 v = versionformat_unpack(indexdata[:4])[0]
677 v = versionformat_unpack(indexdata[:4])[0]
671 self._initempty = False
678 self._initempty = False
672 except IOError as inst:
679 except IOError as inst:
673 if inst.errno != errno.ENOENT:
680 if inst.errno != errno.ENOENT:
674 raise
681 raise
675
682
676 self.version = v
683 self.version = v
677 self._inline = v & FLAG_INLINE_DATA
684 self._inline = v & FLAG_INLINE_DATA
678 self._generaldelta = v & FLAG_GENERALDELTA
685 self._generaldelta = v & FLAG_GENERALDELTA
679 flags = v & ~0xFFFF
686 flags = v & ~0xFFFF
680 fmt = v & 0xFFFF
687 fmt = v & 0xFFFF
681 if fmt == REVLOGV0:
688 if fmt == REVLOGV0:
682 if flags:
689 if flags:
683 raise RevlogError(_('unknown flags (%#04x) in version %d '
690 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 'revlog %s') %
691 'revlog %s') %
685 (flags >> 16, fmt, self.indexfile))
692 (flags >> 16, fmt, self.indexfile))
686 elif fmt == REVLOGV1:
693 elif fmt == REVLOGV1:
687 if flags & ~REVLOGV1_FLAGS:
694 if flags & ~REVLOGV1_FLAGS:
688 raise RevlogError(_('unknown flags (%#04x) in version %d '
695 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 'revlog %s') %
696 'revlog %s') %
690 (flags >> 16, fmt, self.indexfile))
697 (flags >> 16, fmt, self.indexfile))
691 elif fmt == REVLOGV2:
698 elif fmt == REVLOGV2:
692 if flags & ~REVLOGV2_FLAGS:
699 if flags & ~REVLOGV2_FLAGS:
693 raise RevlogError(_('unknown flags (%#04x) in version %d '
700 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 'revlog %s') %
701 'revlog %s') %
695 (flags >> 16, fmt, self.indexfile))
702 (flags >> 16, fmt, self.indexfile))
696 else:
703 else:
697 raise RevlogError(_('unknown version (%d) in revlog %s') %
704 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 (fmt, self.indexfile))
705 (fmt, self.indexfile))
699
706
700 self.storedeltachains = True
707 self.storedeltachains = True
701
708
702 self._io = revlogio()
709 self._io = revlogio()
703 if self.version == REVLOGV0:
710 if self.version == REVLOGV0:
704 self._io = revlogoldio()
711 self._io = revlogoldio()
705 try:
712 try:
706 d = self._io.parseindex(indexdata, self._inline)
713 d = self._io.parseindex(indexdata, self._inline)
707 except (ValueError, IndexError):
714 except (ValueError, IndexError):
708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
715 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 self.index, nodemap, self._chunkcache = d
716 self.index, nodemap, self._chunkcache = d
710 if nodemap is not None:
717 if nodemap is not None:
711 self.nodemap = self._nodecache = nodemap
718 self.nodemap = self._nodecache = nodemap
712 if not self._chunkcache:
719 if not self._chunkcache:
713 self._chunkclear()
720 self._chunkclear()
714 # revnum -> (chain-length, sum-delta-length)
721 # revnum -> (chain-length, sum-delta-length)
715 self._chaininfocache = {}
722 self._chaininfocache = {}
716 # revlog header -> revlog compressor
723 # revlog header -> revlog compressor
717 self._decompressors = {}
724 self._decompressors = {}
718
725
719 @util.propertycache
726 @util.propertycache
720 def _compressor(self):
727 def _compressor(self):
721 return util.compengines[self._compengine].revlogcompressor()
728 return util.compengines[self._compengine].revlogcompressor()
722
729
723 def _indexfp(self, mode='r'):
730 def _indexfp(self, mode='r'):
724 """file object for the revlog's index file"""
731 """file object for the revlog's index file"""
725 args = {r'mode': mode}
732 args = {r'mode': mode}
726 if mode != 'r':
733 if mode != 'r':
727 args[r'checkambig'] = self._checkambig
734 args[r'checkambig'] = self._checkambig
728 if mode == 'w':
735 if mode == 'w':
729 args[r'atomictemp'] = True
736 args[r'atomictemp'] = True
730 return self.opener(self.indexfile, **args)
737 return self.opener(self.indexfile, **args)
731
738
732 def _datafp(self, mode='r'):
739 def _datafp(self, mode='r'):
733 """file object for the revlog's data file"""
740 """file object for the revlog's data file"""
734 return self.opener(self.datafile, mode=mode)
741 return self.opener(self.datafile, mode=mode)
735
742
736 @contextlib.contextmanager
743 @contextlib.contextmanager
737 def _datareadfp(self, existingfp=None):
744 def _datareadfp(self, existingfp=None):
738 """file object suitable to read data"""
745 """file object suitable to read data"""
739 if existingfp is not None:
746 if existingfp is not None:
740 yield existingfp
747 yield existingfp
741 else:
748 else:
742 if self._inline:
749 if self._inline:
743 func = self._indexfp
750 func = self._indexfp
744 else:
751 else:
745 func = self._datafp
752 func = self._datafp
746 with func() as fp:
753 with func() as fp:
747 yield fp
754 yield fp
748
755
749 def tip(self):
756 def tip(self):
750 return self.node(len(self.index) - 2)
757 return self.node(len(self.index) - 2)
751 def __contains__(self, rev):
758 def __contains__(self, rev):
752 return 0 <= rev < len(self)
759 return 0 <= rev < len(self)
753 def __len__(self):
760 def __len__(self):
754 return len(self.index) - 1
761 return len(self.index) - 1
755 def __iter__(self):
762 def __iter__(self):
756 return iter(xrange(len(self)))
763 return iter(xrange(len(self)))
757 def revs(self, start=0, stop=None):
764 def revs(self, start=0, stop=None):
758 """iterate over all rev in this revlog (from start to stop)"""
765 """iterate over all rev in this revlog (from start to stop)"""
759 step = 1
766 step = 1
760 if stop is not None:
767 if stop is not None:
761 if start > stop:
768 if start > stop:
762 step = -1
769 step = -1
763 stop += step
770 stop += step
764 else:
771 else:
765 stop = len(self)
772 stop = len(self)
766 return xrange(start, stop, step)
773 return xrange(start, stop, step)
767
774
768 @util.propertycache
775 @util.propertycache
769 def nodemap(self):
776 def nodemap(self):
770 self.rev(self.node(0))
777 self.rev(self.node(0))
771 return self._nodecache
778 return self._nodecache
772
779
773 def hasnode(self, node):
780 def hasnode(self, node):
774 try:
781 try:
775 self.rev(node)
782 self.rev(node)
776 return True
783 return True
777 except KeyError:
784 except KeyError:
778 return False
785 return False
779
786
780 def candelta(self, baserev, rev):
787 def candelta(self, baserev, rev):
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
788 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 # Disable delta if either rev requires a content-changing flag
789 # Disable delta if either rev requires a content-changing flag
783 # processor (ex. LFS). This is because such flag processor can alter
790 # processor (ex. LFS). This is because such flag processor can alter
784 # the rawtext content that the delta will be based on, and two clients
791 # the rawtext content that the delta will be based on, and two clients
785 # could have a same revlog node with different flags (i.e. different
792 # could have a same revlog node with different flags (i.e. different
786 # rawtext contents) and the delta could be incompatible.
793 # rawtext contents) and the delta could be incompatible.
787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
794 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
795 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 return False
796 return False
790 return True
797 return True
791
798
792 def clearcaches(self):
799 def clearcaches(self):
793 self._cache = None
800 self._cache = None
794 self._chainbasecache.clear()
801 self._chainbasecache.clear()
795 self._chunkcache = (0, '')
802 self._chunkcache = (0, '')
796 self._pcache = {}
803 self._pcache = {}
797
804
798 try:
805 try:
799 self._nodecache.clearcaches()
806 self._nodecache.clearcaches()
800 except AttributeError:
807 except AttributeError:
801 self._nodecache = {nullid: nullrev}
808 self._nodecache = {nullid: nullrev}
802 self._nodepos = None
809 self._nodepos = None
803
810
804 def rev(self, node):
811 def rev(self, node):
805 try:
812 try:
806 return self._nodecache[node]
813 return self._nodecache[node]
807 except TypeError:
814 except TypeError:
808 raise
815 raise
809 except RevlogError:
816 except RevlogError:
810 # parsers.c radix tree lookup failed
817 # parsers.c radix tree lookup failed
811 if node == wdirid or node in wdirfilenodeids:
818 if node == wdirid or node in wdirfilenodeids:
812 raise error.WdirUnsupported
819 raise error.WdirUnsupported
813 raise LookupError(node, self.indexfile, _('no node'))
820 raise LookupError(node, self.indexfile, _('no node'))
814 except KeyError:
821 except KeyError:
815 # pure python cache lookup failed
822 # pure python cache lookup failed
816 n = self._nodecache
823 n = self._nodecache
817 i = self.index
824 i = self.index
818 p = self._nodepos
825 p = self._nodepos
819 if p is None:
826 if p is None:
820 p = len(i) - 2
827 p = len(i) - 2
821 else:
828 else:
822 assert p < len(i)
829 assert p < len(i)
823 for r in xrange(p, -1, -1):
830 for r in xrange(p, -1, -1):
824 v = i[r][7]
831 v = i[r][7]
825 n[v] = r
832 n[v] = r
826 if v == node:
833 if v == node:
827 self._nodepos = r - 1
834 self._nodepos = r - 1
828 return r
835 return r
829 if node == wdirid or node in wdirfilenodeids:
836 if node == wdirid or node in wdirfilenodeids:
830 raise error.WdirUnsupported
837 raise error.WdirUnsupported
831 raise LookupError(node, self.indexfile, _('no node'))
838 raise LookupError(node, self.indexfile, _('no node'))
832
839
833 # Accessors for index entries.
840 # Accessors for index entries.
834
841
835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
842 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 # are flags.
843 # are flags.
837 def start(self, rev):
844 def start(self, rev):
838 return int(self.index[rev][0] >> 16)
845 return int(self.index[rev][0] >> 16)
839
846
840 def flags(self, rev):
847 def flags(self, rev):
841 return self.index[rev][0] & 0xFFFF
848 return self.index[rev][0] & 0xFFFF
842
849
843 def length(self, rev):
850 def length(self, rev):
844 return self.index[rev][1]
851 return self.index[rev][1]
845
852
846 def rawsize(self, rev):
853 def rawsize(self, rev):
847 """return the length of the uncompressed text for a given revision"""
854 """return the length of the uncompressed text for a given revision"""
848 l = self.index[rev][2]
855 l = self.index[rev][2]
849 if l is not None and l >= 0:
856 if l is not None and l >= 0:
850 return l
857 return l
851
858
852 t = self.revision(rev, raw=True)
859 t = self.revision(rev, raw=True)
853 return len(t)
860 return len(t)
854
861
855 def size(self, rev):
862 def size(self, rev):
856 """length of non-raw text (processed by a "read" flag processor)"""
863 """length of non-raw text (processed by a "read" flag processor)"""
857 # fast path: if no "read" flag processor could change the content,
864 # fast path: if no "read" flag processor could change the content,
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
865 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 flags = self.flags(rev)
866 flags = self.flags(rev)
860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
867 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 return self.rawsize(rev)
868 return self.rawsize(rev)
862
869
863 return len(self.revision(rev, raw=False))
870 return len(self.revision(rev, raw=False))
864
871
865 def chainbase(self, rev):
872 def chainbase(self, rev):
866 base = self._chainbasecache.get(rev)
873 base = self._chainbasecache.get(rev)
867 if base is not None:
874 if base is not None:
868 return base
875 return base
869
876
870 index = self.index
877 index = self.index
871 base = index[rev][3]
878 base = index[rev][3]
872 while base != rev:
879 while base != rev:
873 rev = base
880 rev = base
874 base = index[rev][3]
881 base = index[rev][3]
875
882
876 self._chainbasecache[rev] = base
883 self._chainbasecache[rev] = base
877 return base
884 return base
878
885
879 def linkrev(self, rev):
886 def linkrev(self, rev):
880 return self.index[rev][4]
887 return self.index[rev][4]
881
888
882 def parentrevs(self, rev):
889 def parentrevs(self, rev):
883 try:
890 try:
884 entry = self.index[rev]
891 entry = self.index[rev]
885 except IndexError:
892 except IndexError:
886 if rev == wdirrev:
893 if rev == wdirrev:
887 raise error.WdirUnsupported
894 raise error.WdirUnsupported
888 raise
895 raise
889
896
890 return entry[5], entry[6]
897 return entry[5], entry[6]
891
898
892 def node(self, rev):
899 def node(self, rev):
893 try:
900 try:
894 return self.index[rev][7]
901 return self.index[rev][7]
895 except IndexError:
902 except IndexError:
896 if rev == wdirrev:
903 if rev == wdirrev:
897 raise error.WdirUnsupported
904 raise error.WdirUnsupported
898 raise
905 raise
899
906
900 # Derived from index values.
907 # Derived from index values.
901
908
902 def end(self, rev):
909 def end(self, rev):
903 return self.start(rev) + self.length(rev)
910 return self.start(rev) + self.length(rev)
904
911
905 def parents(self, node):
912 def parents(self, node):
906 i = self.index
913 i = self.index
907 d = i[self.rev(node)]
914 d = i[self.rev(node)]
908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
915 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909
916
910 def chainlen(self, rev):
917 def chainlen(self, rev):
911 return self._chaininfo(rev)[0]
918 return self._chaininfo(rev)[0]
912
919
913 def _chaininfo(self, rev):
920 def _chaininfo(self, rev):
914 chaininfocache = self._chaininfocache
921 chaininfocache = self._chaininfocache
915 if rev in chaininfocache:
922 if rev in chaininfocache:
916 return chaininfocache[rev]
923 return chaininfocache[rev]
917 index = self.index
924 index = self.index
918 generaldelta = self._generaldelta
925 generaldelta = self._generaldelta
919 iterrev = rev
926 iterrev = rev
920 e = index[iterrev]
927 e = index[iterrev]
921 clen = 0
928 clen = 0
922 compresseddeltalen = 0
929 compresseddeltalen = 0
923 while iterrev != e[3]:
930 while iterrev != e[3]:
924 clen += 1
931 clen += 1
925 compresseddeltalen += e[1]
932 compresseddeltalen += e[1]
926 if generaldelta:
933 if generaldelta:
927 iterrev = e[3]
934 iterrev = e[3]
928 else:
935 else:
929 iterrev -= 1
936 iterrev -= 1
930 if iterrev in chaininfocache:
937 if iterrev in chaininfocache:
931 t = chaininfocache[iterrev]
938 t = chaininfocache[iterrev]
932 clen += t[0]
939 clen += t[0]
933 compresseddeltalen += t[1]
940 compresseddeltalen += t[1]
934 break
941 break
935 e = index[iterrev]
942 e = index[iterrev]
936 else:
943 else:
937 # Add text length of base since decompressing that also takes
944 # Add text length of base since decompressing that also takes
938 # work. For cache hits the length is already included.
945 # work. For cache hits the length is already included.
939 compresseddeltalen += e[1]
946 compresseddeltalen += e[1]
940 r = (clen, compresseddeltalen)
947 r = (clen, compresseddeltalen)
941 chaininfocache[rev] = r
948 chaininfocache[rev] = r
942 return r
949 return r
943
950
944 def _deltachain(self, rev, stoprev=None):
951 def _deltachain(self, rev, stoprev=None):
945 """Obtain the delta chain for a revision.
952 """Obtain the delta chain for a revision.
946
953
947 ``stoprev`` specifies a revision to stop at. If not specified, we
954 ``stoprev`` specifies a revision to stop at. If not specified, we
948 stop at the base of the chain.
955 stop at the base of the chain.
949
956
950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
957 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 revs in ascending order and ``stopped`` is a bool indicating whether
958 revs in ascending order and ``stopped`` is a bool indicating whether
952 ``stoprev`` was hit.
959 ``stoprev`` was hit.
953 """
960 """
954 # Try C implementation.
961 # Try C implementation.
955 try:
962 try:
956 return self.index.deltachain(rev, stoprev, self._generaldelta)
963 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 except AttributeError:
964 except AttributeError:
958 pass
965 pass
959
966
960 chain = []
967 chain = []
961
968
962 # Alias to prevent attribute lookup in tight loop.
969 # Alias to prevent attribute lookup in tight loop.
963 index = self.index
970 index = self.index
964 generaldelta = self._generaldelta
971 generaldelta = self._generaldelta
965
972
966 iterrev = rev
973 iterrev = rev
967 e = index[iterrev]
974 e = index[iterrev]
968 while iterrev != e[3] and iterrev != stoprev:
975 while iterrev != e[3] and iterrev != stoprev:
969 chain.append(iterrev)
976 chain.append(iterrev)
970 if generaldelta:
977 if generaldelta:
971 iterrev = e[3]
978 iterrev = e[3]
972 else:
979 else:
973 iterrev -= 1
980 iterrev -= 1
974 e = index[iterrev]
981 e = index[iterrev]
975
982
976 if iterrev == stoprev:
983 if iterrev == stoprev:
977 stopped = True
984 stopped = True
978 else:
985 else:
979 chain.append(iterrev)
986 chain.append(iterrev)
980 stopped = False
987 stopped = False
981
988
982 chain.reverse()
989 chain.reverse()
983 return chain, stopped
990 return chain, stopped
984
991
985 def ancestors(self, revs, stoprev=0, inclusive=False):
992 def ancestors(self, revs, stoprev=0, inclusive=False):
986 """Generate the ancestors of 'revs' in reverse topological order.
993 """Generate the ancestors of 'revs' in reverse topological order.
987 Does not generate revs lower than stoprev.
994 Does not generate revs lower than stoprev.
988
995
989 See the documentation for ancestor.lazyancestors for more details."""
996 See the documentation for ancestor.lazyancestors for more details."""
990
997
991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
998 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 inclusive=inclusive)
999 inclusive=inclusive)
993
1000
994 def descendants(self, revs):
1001 def descendants(self, revs):
995 """Generate the descendants of 'revs' in revision order.
1002 """Generate the descendants of 'revs' in revision order.
996
1003
997 Yield a sequence of revision numbers starting with a child of
1004 Yield a sequence of revision numbers starting with a child of
998 some rev in revs, i.e., each revision is *not* considered a
1005 some rev in revs, i.e., each revision is *not* considered a
999 descendant of itself. Results are ordered by revision number (a
1006 descendant of itself. Results are ordered by revision number (a
1000 topological sort)."""
1007 topological sort)."""
1001 first = min(revs)
1008 first = min(revs)
1002 if first == nullrev:
1009 if first == nullrev:
1003 for i in self:
1010 for i in self:
1004 yield i
1011 yield i
1005 return
1012 return
1006
1013
1007 seen = set(revs)
1014 seen = set(revs)
1008 for i in self.revs(start=first + 1):
1015 for i in self.revs(start=first + 1):
1009 for x in self.parentrevs(i):
1016 for x in self.parentrevs(i):
1010 if x != nullrev and x in seen:
1017 if x != nullrev and x in seen:
1011 seen.add(i)
1018 seen.add(i)
1012 yield i
1019 yield i
1013 break
1020 break
1014
1021
1015 def findcommonmissing(self, common=None, heads=None):
1022 def findcommonmissing(self, common=None, heads=None):
1016 """Return a tuple of the ancestors of common and the ancestors of heads
1023 """Return a tuple of the ancestors of common and the ancestors of heads
1017 that are not ancestors of common. In revset terminology, we return the
1024 that are not ancestors of common. In revset terminology, we return the
1018 tuple:
1025 tuple:
1019
1026
1020 ::common, (::heads) - (::common)
1027 ::common, (::heads) - (::common)
1021
1028
1022 The list is sorted by revision number, meaning it is
1029 The list is sorted by revision number, meaning it is
1023 topologically sorted.
1030 topologically sorted.
1024
1031
1025 'heads' and 'common' are both lists of node IDs. If heads is
1032 'heads' and 'common' are both lists of node IDs. If heads is
1026 not supplied, uses all of the revlog's heads. If common is not
1033 not supplied, uses all of the revlog's heads. If common is not
1027 supplied, uses nullid."""
1034 supplied, uses nullid."""
1028 if common is None:
1035 if common is None:
1029 common = [nullid]
1036 common = [nullid]
1030 if heads is None:
1037 if heads is None:
1031 heads = self.heads()
1038 heads = self.heads()
1032
1039
1033 common = [self.rev(n) for n in common]
1040 common = [self.rev(n) for n in common]
1034 heads = [self.rev(n) for n in heads]
1041 heads = [self.rev(n) for n in heads]
1035
1042
1036 # we want the ancestors, but inclusive
1043 # we want the ancestors, but inclusive
1037 class lazyset(object):
1044 class lazyset(object):
1038 def __init__(self, lazyvalues):
1045 def __init__(self, lazyvalues):
1039 self.addedvalues = set()
1046 self.addedvalues = set()
1040 self.lazyvalues = lazyvalues
1047 self.lazyvalues = lazyvalues
1041
1048
1042 def __contains__(self, value):
1049 def __contains__(self, value):
1043 return value in self.addedvalues or value in self.lazyvalues
1050 return value in self.addedvalues or value in self.lazyvalues
1044
1051
1045 def __iter__(self):
1052 def __iter__(self):
1046 added = self.addedvalues
1053 added = self.addedvalues
1047 for r in added:
1054 for r in added:
1048 yield r
1055 yield r
1049 for r in self.lazyvalues:
1056 for r in self.lazyvalues:
1050 if not r in added:
1057 if not r in added:
1051 yield r
1058 yield r
1052
1059
1053 def add(self, value):
1060 def add(self, value):
1054 self.addedvalues.add(value)
1061 self.addedvalues.add(value)
1055
1062
1056 def update(self, values):
1063 def update(self, values):
1057 self.addedvalues.update(values)
1064 self.addedvalues.update(values)
1058
1065
1059 has = lazyset(self.ancestors(common))
1066 has = lazyset(self.ancestors(common))
1060 has.add(nullrev)
1067 has.add(nullrev)
1061 has.update(common)
1068 has.update(common)
1062
1069
1063 # take all ancestors from heads that aren't in has
1070 # take all ancestors from heads that aren't in has
1064 missing = set()
1071 missing = set()
1065 visit = collections.deque(r for r in heads if r not in has)
1072 visit = collections.deque(r for r in heads if r not in has)
1066 while visit:
1073 while visit:
1067 r = visit.popleft()
1074 r = visit.popleft()
1068 if r in missing:
1075 if r in missing:
1069 continue
1076 continue
1070 else:
1077 else:
1071 missing.add(r)
1078 missing.add(r)
1072 for p in self.parentrevs(r):
1079 for p in self.parentrevs(r):
1073 if p not in has:
1080 if p not in has:
1074 visit.append(p)
1081 visit.append(p)
1075 missing = list(missing)
1082 missing = list(missing)
1076 missing.sort()
1083 missing.sort()
1077 return has, [self.node(miss) for miss in missing]
1084 return has, [self.node(miss) for miss in missing]
1078
1085
1079 def incrementalmissingrevs(self, common=None):
1086 def incrementalmissingrevs(self, common=None):
1080 """Return an object that can be used to incrementally compute the
1087 """Return an object that can be used to incrementally compute the
1081 revision numbers of the ancestors of arbitrary sets that are not
1088 revision numbers of the ancestors of arbitrary sets that are not
1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1089 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 object.
1090 object.
1084
1091
1085 'common' is a list of revision numbers. If common is not supplied, uses
1092 'common' is a list of revision numbers. If common is not supplied, uses
1086 nullrev.
1093 nullrev.
1087 """
1094 """
1088 if common is None:
1095 if common is None:
1089 common = [nullrev]
1096 common = [nullrev]
1090
1097
1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1098 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092
1099
1093 def findmissingrevs(self, common=None, heads=None):
1100 def findmissingrevs(self, common=None, heads=None):
1094 """Return the revision numbers of the ancestors of heads that
1101 """Return the revision numbers of the ancestors of heads that
1095 are not ancestors of common.
1102 are not ancestors of common.
1096
1103
1097 More specifically, return a list of revision numbers corresponding to
1104 More specifically, return a list of revision numbers corresponding to
1098 nodes N such that every N satisfies the following constraints:
1105 nodes N such that every N satisfies the following constraints:
1099
1106
1100 1. N is an ancestor of some node in 'heads'
1107 1. N is an ancestor of some node in 'heads'
1101 2. N is not an ancestor of any node in 'common'
1108 2. N is not an ancestor of any node in 'common'
1102
1109
1103 The list is sorted by revision number, meaning it is
1110 The list is sorted by revision number, meaning it is
1104 topologically sorted.
1111 topologically sorted.
1105
1112
1106 'heads' and 'common' are both lists of revision numbers. If heads is
1113 'heads' and 'common' are both lists of revision numbers. If heads is
1107 not supplied, uses all of the revlog's heads. If common is not
1114 not supplied, uses all of the revlog's heads. If common is not
1108 supplied, uses nullid."""
1115 supplied, uses nullid."""
1109 if common is None:
1116 if common is None:
1110 common = [nullrev]
1117 common = [nullrev]
1111 if heads is None:
1118 if heads is None:
1112 heads = self.headrevs()
1119 heads = self.headrevs()
1113
1120
1114 inc = self.incrementalmissingrevs(common=common)
1121 inc = self.incrementalmissingrevs(common=common)
1115 return inc.missingancestors(heads)
1122 return inc.missingancestors(heads)
1116
1123
1117 def findmissing(self, common=None, heads=None):
1124 def findmissing(self, common=None, heads=None):
1118 """Return the ancestors of heads that are not ancestors of common.
1125 """Return the ancestors of heads that are not ancestors of common.
1119
1126
1120 More specifically, return a list of nodes N such that every N
1127 More specifically, return a list of nodes N such that every N
1121 satisfies the following constraints:
1128 satisfies the following constraints:
1122
1129
1123 1. N is an ancestor of some node in 'heads'
1130 1. N is an ancestor of some node in 'heads'
1124 2. N is not an ancestor of any node in 'common'
1131 2. N is not an ancestor of any node in 'common'
1125
1132
1126 The list is sorted by revision number, meaning it is
1133 The list is sorted by revision number, meaning it is
1127 topologically sorted.
1134 topologically sorted.
1128
1135
1129 'heads' and 'common' are both lists of node IDs. If heads is
1136 'heads' and 'common' are both lists of node IDs. If heads is
1130 not supplied, uses all of the revlog's heads. If common is not
1137 not supplied, uses all of the revlog's heads. If common is not
1131 supplied, uses nullid."""
1138 supplied, uses nullid."""
1132 if common is None:
1139 if common is None:
1133 common = [nullid]
1140 common = [nullid]
1134 if heads is None:
1141 if heads is None:
1135 heads = self.heads()
1142 heads = self.heads()
1136
1143
1137 common = [self.rev(n) for n in common]
1144 common = [self.rev(n) for n in common]
1138 heads = [self.rev(n) for n in heads]
1145 heads = [self.rev(n) for n in heads]
1139
1146
1140 inc = self.incrementalmissingrevs(common=common)
1147 inc = self.incrementalmissingrevs(common=common)
1141 return [self.node(r) for r in inc.missingancestors(heads)]
1148 return [self.node(r) for r in inc.missingancestors(heads)]
1142
1149
1143 def nodesbetween(self, roots=None, heads=None):
1150 def nodesbetween(self, roots=None, heads=None):
1144 """Return a topological path from 'roots' to 'heads'.
1151 """Return a topological path from 'roots' to 'heads'.
1145
1152
1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1153 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 topologically sorted list of all nodes N that satisfy both of
1154 topologically sorted list of all nodes N that satisfy both of
1148 these constraints:
1155 these constraints:
1149
1156
1150 1. N is a descendant of some node in 'roots'
1157 1. N is a descendant of some node in 'roots'
1151 2. N is an ancestor of some node in 'heads'
1158 2. N is an ancestor of some node in 'heads'
1152
1159
1153 Every node is considered to be both a descendant and an ancestor
1160 Every node is considered to be both a descendant and an ancestor
1154 of itself, so every reachable node in 'roots' and 'heads' will be
1161 of itself, so every reachable node in 'roots' and 'heads' will be
1155 included in 'nodes'.
1162 included in 'nodes'.
1156
1163
1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1164 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1165 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1166 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160
1167
1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1168 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 unspecified, uses nullid as the only root. If 'heads' is
1169 unspecified, uses nullid as the only root. If 'heads' is
1163 unspecified, uses list of all of the revlog's heads."""
1170 unspecified, uses list of all of the revlog's heads."""
1164 nonodes = ([], [], [])
1171 nonodes = ([], [], [])
1165 if roots is not None:
1172 if roots is not None:
1166 roots = list(roots)
1173 roots = list(roots)
1167 if not roots:
1174 if not roots:
1168 return nonodes
1175 return nonodes
1169 lowestrev = min([self.rev(n) for n in roots])
1176 lowestrev = min([self.rev(n) for n in roots])
1170 else:
1177 else:
1171 roots = [nullid] # Everybody's a descendant of nullid
1178 roots = [nullid] # Everybody's a descendant of nullid
1172 lowestrev = nullrev
1179 lowestrev = nullrev
1173 if (lowestrev == nullrev) and (heads is None):
1180 if (lowestrev == nullrev) and (heads is None):
1174 # We want _all_ the nodes!
1181 # We want _all_ the nodes!
1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1182 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 if heads is None:
1183 if heads is None:
1177 # All nodes are ancestors, so the latest ancestor is the last
1184 # All nodes are ancestors, so the latest ancestor is the last
1178 # node.
1185 # node.
1179 highestrev = len(self) - 1
1186 highestrev = len(self) - 1
1180 # Set ancestors to None to signal that every node is an ancestor.
1187 # Set ancestors to None to signal that every node is an ancestor.
1181 ancestors = None
1188 ancestors = None
1182 # Set heads to an empty dictionary for later discovery of heads
1189 # Set heads to an empty dictionary for later discovery of heads
1183 heads = {}
1190 heads = {}
1184 else:
1191 else:
1185 heads = list(heads)
1192 heads = list(heads)
1186 if not heads:
1193 if not heads:
1187 return nonodes
1194 return nonodes
1188 ancestors = set()
1195 ancestors = set()
1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1196 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 # Also, later we will be using it to filter out the heads we can't
1197 # Also, later we will be using it to filter out the heads we can't
1191 # find from roots.
1198 # find from roots.
1192 heads = dict.fromkeys(heads, False)
1199 heads = dict.fromkeys(heads, False)
1193 # Start at the top and keep marking parents until we're done.
1200 # Start at the top and keep marking parents until we're done.
1194 nodestotag = set(heads)
1201 nodestotag = set(heads)
1195 # Remember where the top was so we can use it as a limit later.
1202 # Remember where the top was so we can use it as a limit later.
1196 highestrev = max([self.rev(n) for n in nodestotag])
1203 highestrev = max([self.rev(n) for n in nodestotag])
1197 while nodestotag:
1204 while nodestotag:
1198 # grab a node to tag
1205 # grab a node to tag
1199 n = nodestotag.pop()
1206 n = nodestotag.pop()
1200 # Never tag nullid
1207 # Never tag nullid
1201 if n == nullid:
1208 if n == nullid:
1202 continue
1209 continue
1203 # A node's revision number represents its place in a
1210 # A node's revision number represents its place in a
1204 # topologically sorted list of nodes.
1211 # topologically sorted list of nodes.
1205 r = self.rev(n)
1212 r = self.rev(n)
1206 if r >= lowestrev:
1213 if r >= lowestrev:
1207 if n not in ancestors:
1214 if n not in ancestors:
1208 # If we are possibly a descendant of one of the roots
1215 # If we are possibly a descendant of one of the roots
1209 # and we haven't already been marked as an ancestor
1216 # and we haven't already been marked as an ancestor
1210 ancestors.add(n) # Mark as ancestor
1217 ancestors.add(n) # Mark as ancestor
1211 # Add non-nullid parents to list of nodes to tag.
1218 # Add non-nullid parents to list of nodes to tag.
1212 nodestotag.update([p for p in self.parents(n) if
1219 nodestotag.update([p for p in self.parents(n) if
1213 p != nullid])
1220 p != nullid])
1214 elif n in heads: # We've seen it before, is it a fake head?
1221 elif n in heads: # We've seen it before, is it a fake head?
1215 # So it is, real heads should not be the ancestors of
1222 # So it is, real heads should not be the ancestors of
1216 # any other heads.
1223 # any other heads.
1217 heads.pop(n)
1224 heads.pop(n)
1218 if not ancestors:
1225 if not ancestors:
1219 return nonodes
1226 return nonodes
1220 # Now that we have our set of ancestors, we want to remove any
1227 # Now that we have our set of ancestors, we want to remove any
1221 # roots that are not ancestors.
1228 # roots that are not ancestors.
1222
1229
1223 # If one of the roots was nullid, everything is included anyway.
1230 # If one of the roots was nullid, everything is included anyway.
1224 if lowestrev > nullrev:
1231 if lowestrev > nullrev:
1225 # But, since we weren't, let's recompute the lowest rev to not
1232 # But, since we weren't, let's recompute the lowest rev to not
1226 # include roots that aren't ancestors.
1233 # include roots that aren't ancestors.
1227
1234
1228 # Filter out roots that aren't ancestors of heads
1235 # Filter out roots that aren't ancestors of heads
1229 roots = [root for root in roots if root in ancestors]
1236 roots = [root for root in roots if root in ancestors]
1230 # Recompute the lowest revision
1237 # Recompute the lowest revision
1231 if roots:
1238 if roots:
1232 lowestrev = min([self.rev(root) for root in roots])
1239 lowestrev = min([self.rev(root) for root in roots])
1233 else:
1240 else:
1234 # No more roots? Return empty list
1241 # No more roots? Return empty list
1235 return nonodes
1242 return nonodes
1236 else:
1243 else:
1237 # We are descending from nullid, and don't need to care about
1244 # We are descending from nullid, and don't need to care about
1238 # any other roots.
1245 # any other roots.
1239 lowestrev = nullrev
1246 lowestrev = nullrev
1240 roots = [nullid]
1247 roots = [nullid]
1241 # Transform our roots list into a set.
1248 # Transform our roots list into a set.
1242 descendants = set(roots)
1249 descendants = set(roots)
1243 # Also, keep the original roots so we can filter out roots that aren't
1250 # Also, keep the original roots so we can filter out roots that aren't
1244 # 'real' roots (i.e. are descended from other roots).
1251 # 'real' roots (i.e. are descended from other roots).
1245 roots = descendants.copy()
1252 roots = descendants.copy()
1246 # Our topologically sorted list of output nodes.
1253 # Our topologically sorted list of output nodes.
1247 orderedout = []
1254 orderedout = []
1248 # Don't start at nullid since we don't want nullid in our output list,
1255 # Don't start at nullid since we don't want nullid in our output list,
1249 # and if nullid shows up in descendants, empty parents will look like
1256 # and if nullid shows up in descendants, empty parents will look like
1250 # they're descendants.
1257 # they're descendants.
1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1258 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 n = self.node(r)
1259 n = self.node(r)
1253 isdescendant = False
1260 isdescendant = False
1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1261 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 isdescendant = True
1262 isdescendant = True
1256 elif n in descendants:
1263 elif n in descendants:
1257 # n is already a descendant
1264 # n is already a descendant
1258 isdescendant = True
1265 isdescendant = True
1259 # This check only needs to be done here because all the roots
1266 # This check only needs to be done here because all the roots
1260 # will start being marked is descendants before the loop.
1267 # will start being marked is descendants before the loop.
1261 if n in roots:
1268 if n in roots:
1262 # If n was a root, check if it's a 'real' root.
1269 # If n was a root, check if it's a 'real' root.
1263 p = tuple(self.parents(n))
1270 p = tuple(self.parents(n))
1264 # If any of its parents are descendants, it's not a root.
1271 # If any of its parents are descendants, it's not a root.
1265 if (p[0] in descendants) or (p[1] in descendants):
1272 if (p[0] in descendants) or (p[1] in descendants):
1266 roots.remove(n)
1273 roots.remove(n)
1267 else:
1274 else:
1268 p = tuple(self.parents(n))
1275 p = tuple(self.parents(n))
1269 # A node is a descendant if either of its parents are
1276 # A node is a descendant if either of its parents are
1270 # descendants. (We seeded the dependents list with the roots
1277 # descendants. (We seeded the dependents list with the roots
1271 # up there, remember?)
1278 # up there, remember?)
1272 if (p[0] in descendants) or (p[1] in descendants):
1279 if (p[0] in descendants) or (p[1] in descendants):
1273 descendants.add(n)
1280 descendants.add(n)
1274 isdescendant = True
1281 isdescendant = True
1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1282 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 # Only include nodes that are both descendants and ancestors.
1283 # Only include nodes that are both descendants and ancestors.
1277 orderedout.append(n)
1284 orderedout.append(n)
1278 if (ancestors is not None) and (n in heads):
1285 if (ancestors is not None) and (n in heads):
1279 # We're trying to figure out which heads are reachable
1286 # We're trying to figure out which heads are reachable
1280 # from roots.
1287 # from roots.
1281 # Mark this head as having been reached
1288 # Mark this head as having been reached
1282 heads[n] = True
1289 heads[n] = True
1283 elif ancestors is None:
1290 elif ancestors is None:
1284 # Otherwise, we're trying to discover the heads.
1291 # Otherwise, we're trying to discover the heads.
1285 # Assume this is a head because if it isn't, the next step
1292 # Assume this is a head because if it isn't, the next step
1286 # will eventually remove it.
1293 # will eventually remove it.
1287 heads[n] = True
1294 heads[n] = True
1288 # But, obviously its parents aren't.
1295 # But, obviously its parents aren't.
1289 for p in self.parents(n):
1296 for p in self.parents(n):
1290 heads.pop(p, None)
1297 heads.pop(p, None)
1291 heads = [head for head, flag in heads.iteritems() if flag]
1298 heads = [head for head, flag in heads.iteritems() if flag]
1292 roots = list(roots)
1299 roots = list(roots)
1293 assert orderedout
1300 assert orderedout
1294 assert roots
1301 assert roots
1295 assert heads
1302 assert heads
1296 return (orderedout, roots, heads)
1303 return (orderedout, roots, heads)
1297
1304
1298 def headrevs(self):
1305 def headrevs(self):
1299 try:
1306 try:
1300 return self.index.headrevs()
1307 return self.index.headrevs()
1301 except AttributeError:
1308 except AttributeError:
1302 return self._headrevs()
1309 return self._headrevs()
1303
1310
1304 def computephases(self, roots):
1311 def computephases(self, roots):
1305 return self.index.computephasesmapsets(roots)
1312 return self.index.computephasesmapsets(roots)
1306
1313
1307 def _headrevs(self):
1314 def _headrevs(self):
1308 count = len(self)
1315 count = len(self)
1309 if not count:
1316 if not count:
1310 return [nullrev]
1317 return [nullrev]
1311 # we won't iter over filtered rev so nobody is a head at start
1318 # we won't iter over filtered rev so nobody is a head at start
1312 ishead = [0] * (count + 1)
1319 ishead = [0] * (count + 1)
1313 index = self.index
1320 index = self.index
1314 for r in self:
1321 for r in self:
1315 ishead[r] = 1 # I may be an head
1322 ishead[r] = 1 # I may be an head
1316 e = index[r]
1323 e = index[r]
1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1324 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 return [r for r, val in enumerate(ishead) if val]
1325 return [r for r, val in enumerate(ishead) if val]
1319
1326
1320 def heads(self, start=None, stop=None):
1327 def heads(self, start=None, stop=None):
1321 """return the list of all nodes that have no children
1328 """return the list of all nodes that have no children
1322
1329
1323 if start is specified, only heads that are descendants of
1330 if start is specified, only heads that are descendants of
1324 start will be returned
1331 start will be returned
1325 if stop is specified, it will consider all the revs from stop
1332 if stop is specified, it will consider all the revs from stop
1326 as if they had no children
1333 as if they had no children
1327 """
1334 """
1328 if start is None and stop is None:
1335 if start is None and stop is None:
1329 if not len(self):
1336 if not len(self):
1330 return [nullid]
1337 return [nullid]
1331 return [self.node(r) for r in self.headrevs()]
1338 return [self.node(r) for r in self.headrevs()]
1332
1339
1333 if start is None:
1340 if start is None:
1334 start = nullid
1341 start = nullid
1335 if stop is None:
1342 if stop is None:
1336 stop = []
1343 stop = []
1337 stoprevs = set([self.rev(n) for n in stop])
1344 stoprevs = set([self.rev(n) for n in stop])
1338 startrev = self.rev(start)
1345 startrev = self.rev(start)
1339 reachable = {startrev}
1346 reachable = {startrev}
1340 heads = {startrev}
1347 heads = {startrev}
1341
1348
1342 parentrevs = self.parentrevs
1349 parentrevs = self.parentrevs
1343 for r in self.revs(start=startrev + 1):
1350 for r in self.revs(start=startrev + 1):
1344 for p in parentrevs(r):
1351 for p in parentrevs(r):
1345 if p in reachable:
1352 if p in reachable:
1346 if r not in stoprevs:
1353 if r not in stoprevs:
1347 reachable.add(r)
1354 reachable.add(r)
1348 heads.add(r)
1355 heads.add(r)
1349 if p in heads and p not in stoprevs:
1356 if p in heads and p not in stoprevs:
1350 heads.remove(p)
1357 heads.remove(p)
1351
1358
1352 return [self.node(r) for r in heads]
1359 return [self.node(r) for r in heads]
1353
1360
1354 def children(self, node):
1361 def children(self, node):
1355 """find the children of a given node"""
1362 """find the children of a given node"""
1356 c = []
1363 c = []
1357 p = self.rev(node)
1364 p = self.rev(node)
1358 for r in self.revs(start=p + 1):
1365 for r in self.revs(start=p + 1):
1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1366 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 if prevs:
1367 if prevs:
1361 for pr in prevs:
1368 for pr in prevs:
1362 if pr == p:
1369 if pr == p:
1363 c.append(self.node(r))
1370 c.append(self.node(r))
1364 elif p == nullrev:
1371 elif p == nullrev:
1365 c.append(self.node(r))
1372 c.append(self.node(r))
1366 return c
1373 return c
1367
1374
1368 def descendant(self, start, end):
1375 def descendant(self, start, end):
1369 if start == nullrev:
1376 if start == nullrev:
1370 return True
1377 return True
1371 for i in self.descendants([start]):
1378 for i in self.descendants([start]):
1372 if i == end:
1379 if i == end:
1373 return True
1380 return True
1374 elif i > end:
1381 elif i > end:
1375 break
1382 break
1376 return False
1383 return False
1377
1384
1378 def commonancestorsheads(self, a, b):
1385 def commonancestorsheads(self, a, b):
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1386 """calculate all the heads of the common ancestors of nodes a and b"""
1380 a, b = self.rev(a), self.rev(b)
1387 a, b = self.rev(a), self.rev(b)
1381 try:
1388 try:
1382 ancs = self.index.commonancestorsheads(a, b)
1389 ancs = self.index.commonancestorsheads(a, b)
1383 except (AttributeError, OverflowError): # C implementation failed
1390 except (AttributeError, OverflowError): # C implementation failed
1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1391 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 return pycompat.maplist(self.node, ancs)
1392 return pycompat.maplist(self.node, ancs)
1386
1393
1387 def isancestor(self, a, b):
1394 def isancestor(self, a, b):
1388 """return True if node a is an ancestor of node b
1395 """return True if node a is an ancestor of node b
1389
1396
1390 The implementation of this is trivial but the use of
1397 The implementation of this is trivial but the use of
1391 commonancestorsheads is not."""
1398 commonancestorsheads is not."""
1392 return a in self.commonancestorsheads(a, b)
1399 return a in self.commonancestorsheads(a, b)
1393
1400
1394 def ancestor(self, a, b):
1401 def ancestor(self, a, b):
1395 """calculate the "best" common ancestor of nodes a and b"""
1402 """calculate the "best" common ancestor of nodes a and b"""
1396
1403
1397 a, b = self.rev(a), self.rev(b)
1404 a, b = self.rev(a), self.rev(b)
1398 try:
1405 try:
1399 ancs = self.index.ancestors(a, b)
1406 ancs = self.index.ancestors(a, b)
1400 except (AttributeError, OverflowError):
1407 except (AttributeError, OverflowError):
1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1408 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 if ancs:
1409 if ancs:
1403 # choose a consistent winner when there's a tie
1410 # choose a consistent winner when there's a tie
1404 return min(map(self.node, ancs))
1411 return min(map(self.node, ancs))
1405 return nullid
1412 return nullid
1406
1413
1407 def _match(self, id):
1414 def _match(self, id):
1408 if isinstance(id, int):
1415 if isinstance(id, int):
1409 # rev
1416 # rev
1410 return self.node(id)
1417 return self.node(id)
1411 if len(id) == 20:
1418 if len(id) == 20:
1412 # possibly a binary node
1419 # possibly a binary node
1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1420 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 try:
1421 try:
1415 node = id
1422 node = id
1416 self.rev(node) # quick search the index
1423 self.rev(node) # quick search the index
1417 return node
1424 return node
1418 except LookupError:
1425 except LookupError:
1419 pass # may be partial hex id
1426 pass # may be partial hex id
1420 try:
1427 try:
1421 # str(rev)
1428 # str(rev)
1422 rev = int(id)
1429 rev = int(id)
1423 if "%d" % rev != id:
1430 if "%d" % rev != id:
1424 raise ValueError
1431 raise ValueError
1425 if rev < 0:
1432 if rev < 0:
1426 rev = len(self) + rev
1433 rev = len(self) + rev
1427 if rev < 0 or rev >= len(self):
1434 if rev < 0 or rev >= len(self):
1428 raise ValueError
1435 raise ValueError
1429 return self.node(rev)
1436 return self.node(rev)
1430 except (ValueError, OverflowError):
1437 except (ValueError, OverflowError):
1431 pass
1438 pass
1432 if len(id) == 40:
1439 if len(id) == 40:
1433 try:
1440 try:
1434 # a full hex nodeid?
1441 # a full hex nodeid?
1435 node = bin(id)
1442 node = bin(id)
1436 self.rev(node)
1443 self.rev(node)
1437 return node
1444 return node
1438 except (TypeError, LookupError):
1445 except (TypeError, LookupError):
1439 pass
1446 pass
1440
1447
1441 def _partialmatch(self, id):
1448 def _partialmatch(self, id):
1442 # we don't care wdirfilenodeids as they should be always full hash
1449 # we don't care wdirfilenodeids as they should be always full hash
1443 maybewdir = wdirhex.startswith(id)
1450 maybewdir = wdirhex.startswith(id)
1444 try:
1451 try:
1445 partial = self.index.partialmatch(id)
1452 partial = self.index.partialmatch(id)
1446 if partial and self.hasnode(partial):
1453 if partial and self.hasnode(partial):
1447 if maybewdir:
1454 if maybewdir:
1448 # single 'ff...' match in radix tree, ambiguous with wdir
1455 # single 'ff...' match in radix tree, ambiguous with wdir
1449 raise RevlogError
1456 raise RevlogError
1450 return partial
1457 return partial
1451 if maybewdir:
1458 if maybewdir:
1452 # no 'ff...' match in radix tree, wdir identified
1459 # no 'ff...' match in radix tree, wdir identified
1453 raise error.WdirUnsupported
1460 raise error.WdirUnsupported
1454 return None
1461 return None
1455 except RevlogError:
1462 except RevlogError:
1456 # parsers.c radix tree lookup gave multiple matches
1463 # parsers.c radix tree lookup gave multiple matches
1457 # fast path: for unfiltered changelog, radix tree is accurate
1464 # fast path: for unfiltered changelog, radix tree is accurate
1458 if not getattr(self, 'filteredrevs', None):
1465 if not getattr(self, 'filteredrevs', None):
1459 raise LookupError(id, self.indexfile,
1466 raise LookupError(id, self.indexfile,
1460 _('ambiguous identifier'))
1467 _('ambiguous identifier'))
1461 # fall through to slow path that filters hidden revisions
1468 # fall through to slow path that filters hidden revisions
1462 except (AttributeError, ValueError):
1469 except (AttributeError, ValueError):
1463 # we are pure python, or key was too short to search radix tree
1470 # we are pure python, or key was too short to search radix tree
1464 pass
1471 pass
1465
1472
1466 if id in self._pcache:
1473 if id in self._pcache:
1467 return self._pcache[id]
1474 return self._pcache[id]
1468
1475
1469 if len(id) <= 40:
1476 if len(id) <= 40:
1470 try:
1477 try:
1471 # hex(node)[:...]
1478 # hex(node)[:...]
1472 l = len(id) // 2 # grab an even number of digits
1479 l = len(id) // 2 # grab an even number of digits
1473 prefix = bin(id[:l * 2])
1480 prefix = bin(id[:l * 2])
1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1481 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 nl = [n for n in nl if hex(n).startswith(id) and
1482 nl = [n for n in nl if hex(n).startswith(id) and
1476 self.hasnode(n)]
1483 self.hasnode(n)]
1477 if len(nl) > 0:
1484 if len(nl) > 0:
1478 if len(nl) == 1 and not maybewdir:
1485 if len(nl) == 1 and not maybewdir:
1479 self._pcache[id] = nl[0]
1486 self._pcache[id] = nl[0]
1480 return nl[0]
1487 return nl[0]
1481 raise LookupError(id, self.indexfile,
1488 raise LookupError(id, self.indexfile,
1482 _('ambiguous identifier'))
1489 _('ambiguous identifier'))
1483 if maybewdir:
1490 if maybewdir:
1484 raise error.WdirUnsupported
1491 raise error.WdirUnsupported
1485 return None
1492 return None
1486 except TypeError:
1493 except TypeError:
1487 pass
1494 pass
1488
1495
1489 def lookup(self, id):
1496 def lookup(self, id):
1490 """locate a node based on:
1497 """locate a node based on:
1491 - revision number or str(revision number)
1498 - revision number or str(revision number)
1492 - nodeid or subset of hex nodeid
1499 - nodeid or subset of hex nodeid
1493 """
1500 """
1494 n = self._match(id)
1501 n = self._match(id)
1495 if n is not None:
1502 if n is not None:
1496 return n
1503 return n
1497 n = self._partialmatch(id)
1504 n = self._partialmatch(id)
1498 if n:
1505 if n:
1499 return n
1506 return n
1500
1507
1501 raise LookupError(id, self.indexfile, _('no match found'))
1508 raise LookupError(id, self.indexfile, _('no match found'))
1502
1509
1503 def shortest(self, node, minlength=1):
1510 def shortest(self, node, minlength=1):
1504 """Find the shortest unambiguous prefix that matches node."""
1511 """Find the shortest unambiguous prefix that matches node."""
1505 def isvalid(prefix):
1512 def isvalid(prefix):
1506 try:
1513 try:
1507 node = self._partialmatch(prefix)
1514 node = self._partialmatch(prefix)
1508 except error.RevlogError:
1515 except error.RevlogError:
1509 return False
1516 return False
1510 except error.WdirUnsupported:
1517 except error.WdirUnsupported:
1511 # single 'ff...' match
1518 # single 'ff...' match
1512 return True
1519 return True
1513 if node is None:
1520 if node is None:
1514 raise LookupError(node, self.indexfile, _('no node'))
1521 raise LookupError(node, self.indexfile, _('no node'))
1515 return True
1522 return True
1516
1523
1517 def maybewdir(prefix):
1524 def maybewdir(prefix):
1518 return all(c == 'f' for c in prefix)
1525 return all(c == 'f' for c in prefix)
1519
1526
1520 hexnode = hex(node)
1527 hexnode = hex(node)
1521
1528
1522 def disambiguate(hexnode, minlength):
1529 def disambiguate(hexnode, minlength):
1523 """Disambiguate against wdirid."""
1530 """Disambiguate against wdirid."""
1524 for length in range(minlength, 41):
1531 for length in range(minlength, 41):
1525 prefix = hexnode[:length]
1532 prefix = hexnode[:length]
1526 if not maybewdir(prefix):
1533 if not maybewdir(prefix):
1527 return prefix
1534 return prefix
1528
1535
1529 if not getattr(self, 'filteredrevs', None):
1536 if not getattr(self, 'filteredrevs', None):
1530 try:
1537 try:
1531 length = max(self.index.shortest(node), minlength)
1538 length = max(self.index.shortest(node), minlength)
1532 return disambiguate(hexnode, length)
1539 return disambiguate(hexnode, length)
1533 except RevlogError:
1540 except RevlogError:
1534 if node != wdirid:
1541 if node != wdirid:
1535 raise LookupError(node, self.indexfile, _('no node'))
1542 raise LookupError(node, self.indexfile, _('no node'))
1536 except AttributeError:
1543 except AttributeError:
1537 # Fall through to pure code
1544 # Fall through to pure code
1538 pass
1545 pass
1539
1546
1540 if node == wdirid:
1547 if node == wdirid:
1541 for length in range(minlength, 41):
1548 for length in range(minlength, 41):
1542 prefix = hexnode[:length]
1549 prefix = hexnode[:length]
1543 if isvalid(prefix):
1550 if isvalid(prefix):
1544 return prefix
1551 return prefix
1545
1552
1546 for length in range(minlength, 41):
1553 for length in range(minlength, 41):
1547 prefix = hexnode[:length]
1554 prefix = hexnode[:length]
1548 if isvalid(prefix):
1555 if isvalid(prefix):
1549 return disambiguate(hexnode, length)
1556 return disambiguate(hexnode, length)
1550
1557
1551 def cmp(self, node, text):
1558 def cmp(self, node, text):
1552 """compare text with a given file revision
1559 """compare text with a given file revision
1553
1560
1554 returns True if text is different than what is stored.
1561 returns True if text is different than what is stored.
1555 """
1562 """
1556 p1, p2 = self.parents(node)
1563 p1, p2 = self.parents(node)
1557 return hash(text, p1, p2) != node
1564 return hash(text, p1, p2) != node
1558
1565
1559 def _cachesegment(self, offset, data):
1566 def _cachesegment(self, offset, data):
1560 """Add a segment to the revlog cache.
1567 """Add a segment to the revlog cache.
1561
1568
1562 Accepts an absolute offset and the data that is at that location.
1569 Accepts an absolute offset and the data that is at that location.
1563 """
1570 """
1564 o, d = self._chunkcache
1571 o, d = self._chunkcache
1565 # try to add to existing cache
1572 # try to add to existing cache
1566 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1573 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1567 self._chunkcache = o, d + data
1574 self._chunkcache = o, d + data
1568 else:
1575 else:
1569 self._chunkcache = offset, data
1576 self._chunkcache = offset, data
1570
1577
1571 def _readsegment(self, offset, length, df=None):
1578 def _readsegment(self, offset, length, df=None):
1572 """Load a segment of raw data from the revlog.
1579 """Load a segment of raw data from the revlog.
1573
1580
1574 Accepts an absolute offset, length to read, and an optional existing
1581 Accepts an absolute offset, length to read, and an optional existing
1575 file handle to read from.
1582 file handle to read from.
1576
1583
1577 If an existing file handle is passed, it will be seeked and the
1584 If an existing file handle is passed, it will be seeked and the
1578 original seek position will NOT be restored.
1585 original seek position will NOT be restored.
1579
1586
1580 Returns a str or buffer of raw byte data.
1587 Returns a str or buffer of raw byte data.
1581 """
1588 """
1582 # Cache data both forward and backward around the requested
1589 # Cache data both forward and backward around the requested
1583 # data, in a fixed size window. This helps speed up operations
1590 # data, in a fixed size window. This helps speed up operations
1584 # involving reading the revlog backwards.
1591 # involving reading the revlog backwards.
1585 cachesize = self._chunkcachesize
1592 cachesize = self._chunkcachesize
1586 realoffset = offset & ~(cachesize - 1)
1593 realoffset = offset & ~(cachesize - 1)
1587 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1594 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1588 - realoffset)
1595 - realoffset)
1589 with self._datareadfp(df) as df:
1596 with self._datareadfp(df) as df:
1590 df.seek(realoffset)
1597 df.seek(realoffset)
1591 d = df.read(reallength)
1598 d = df.read(reallength)
1592 self._cachesegment(realoffset, d)
1599 self._cachesegment(realoffset, d)
1593 if offset != realoffset or reallength != length:
1600 if offset != realoffset or reallength != length:
1594 return util.buffer(d, offset - realoffset, length)
1601 return util.buffer(d, offset - realoffset, length)
1595 return d
1602 return d
1596
1603
1597 def _getsegment(self, offset, length, df=None):
1604 def _getsegment(self, offset, length, df=None):
1598 """Obtain a segment of raw data from the revlog.
1605 """Obtain a segment of raw data from the revlog.
1599
1606
1600 Accepts an absolute offset, length of bytes to obtain, and an
1607 Accepts an absolute offset, length of bytes to obtain, and an
1601 optional file handle to the already-opened revlog. If the file
1608 optional file handle to the already-opened revlog. If the file
1602 handle is used, it's original seek position will not be preserved.
1609 handle is used, it's original seek position will not be preserved.
1603
1610
1604 Requests for data may be returned from a cache.
1611 Requests for data may be returned from a cache.
1605
1612
1606 Returns a str or a buffer instance of raw byte data.
1613 Returns a str or a buffer instance of raw byte data.
1607 """
1614 """
1608 o, d = self._chunkcache
1615 o, d = self._chunkcache
1609 l = len(d)
1616 l = len(d)
1610
1617
1611 # is it in the cache?
1618 # is it in the cache?
1612 cachestart = offset - o
1619 cachestart = offset - o
1613 cacheend = cachestart + length
1620 cacheend = cachestart + length
1614 if cachestart >= 0 and cacheend <= l:
1621 if cachestart >= 0 and cacheend <= l:
1615 if cachestart == 0 and cacheend == l:
1622 if cachestart == 0 and cacheend == l:
1616 return d # avoid a copy
1623 return d # avoid a copy
1617 return util.buffer(d, cachestart, cacheend - cachestart)
1624 return util.buffer(d, cachestart, cacheend - cachestart)
1618
1625
1619 return self._readsegment(offset, length, df=df)
1626 return self._readsegment(offset, length, df=df)
1620
1627
1621 def _getsegmentforrevs(self, startrev, endrev, df=None):
1628 def _getsegmentforrevs(self, startrev, endrev, df=None):
1622 """Obtain a segment of raw data corresponding to a range of revisions.
1629 """Obtain a segment of raw data corresponding to a range of revisions.
1623
1630
1624 Accepts the start and end revisions and an optional already-open
1631 Accepts the start and end revisions and an optional already-open
1625 file handle to be used for reading. If the file handle is read, its
1632 file handle to be used for reading. If the file handle is read, its
1626 seek position will not be preserved.
1633 seek position will not be preserved.
1627
1634
1628 Requests for data may be satisfied by a cache.
1635 Requests for data may be satisfied by a cache.
1629
1636
1630 Returns a 2-tuple of (offset, data) for the requested range of
1637 Returns a 2-tuple of (offset, data) for the requested range of
1631 revisions. Offset is the integer offset from the beginning of the
1638 revisions. Offset is the integer offset from the beginning of the
1632 revlog and data is a str or buffer of the raw byte data.
1639 revlog and data is a str or buffer of the raw byte data.
1633
1640
1634 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1641 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1635 to determine where each revision's data begins and ends.
1642 to determine where each revision's data begins and ends.
1636 """
1643 """
1637 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1644 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1638 # (functions are expensive).
1645 # (functions are expensive).
1639 index = self.index
1646 index = self.index
1640 istart = index[startrev]
1647 istart = index[startrev]
1641 start = int(istart[0] >> 16)
1648 start = int(istart[0] >> 16)
1642 if startrev == endrev:
1649 if startrev == endrev:
1643 end = start + istart[1]
1650 end = start + istart[1]
1644 else:
1651 else:
1645 iend = index[endrev]
1652 iend = index[endrev]
1646 end = int(iend[0] >> 16) + iend[1]
1653 end = int(iend[0] >> 16) + iend[1]
1647
1654
1648 if self._inline:
1655 if self._inline:
1649 start += (startrev + 1) * self._io.size
1656 start += (startrev + 1) * self._io.size
1650 end += (endrev + 1) * self._io.size
1657 end += (endrev + 1) * self._io.size
1651 length = end - start
1658 length = end - start
1652
1659
1653 return start, self._getsegment(start, length, df=df)
1660 return start, self._getsegment(start, length, df=df)
1654
1661
1655 def _chunk(self, rev, df=None):
1662 def _chunk(self, rev, df=None):
1656 """Obtain a single decompressed chunk for a revision.
1663 """Obtain a single decompressed chunk for a revision.
1657
1664
1658 Accepts an integer revision and an optional already-open file handle
1665 Accepts an integer revision and an optional already-open file handle
1659 to be used for reading. If used, the seek position of the file will not
1666 to be used for reading. If used, the seek position of the file will not
1660 be preserved.
1667 be preserved.
1661
1668
1662 Returns a str holding uncompressed data for the requested revision.
1669 Returns a str holding uncompressed data for the requested revision.
1663 """
1670 """
1664 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1671 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1665
1672
1666 def _chunks(self, revs, df=None):
1673 def _chunks(self, revs, df=None):
1667 """Obtain decompressed chunks for the specified revisions.
1674 """Obtain decompressed chunks for the specified revisions.
1668
1675
1669 Accepts an iterable of numeric revisions that are assumed to be in
1676 Accepts an iterable of numeric revisions that are assumed to be in
1670 ascending order. Also accepts an optional already-open file handle
1677 ascending order. Also accepts an optional already-open file handle
1671 to be used for reading. If used, the seek position of the file will
1678 to be used for reading. If used, the seek position of the file will
1672 not be preserved.
1679 not be preserved.
1673
1680
1674 This function is similar to calling ``self._chunk()`` multiple times,
1681 This function is similar to calling ``self._chunk()`` multiple times,
1675 but is faster.
1682 but is faster.
1676
1683
1677 Returns a list with decompressed data for each requested revision.
1684 Returns a list with decompressed data for each requested revision.
1678 """
1685 """
1679 if not revs:
1686 if not revs:
1680 return []
1687 return []
1681 start = self.start
1688 start = self.start
1682 length = self.length
1689 length = self.length
1683 inline = self._inline
1690 inline = self._inline
1684 iosize = self._io.size
1691 iosize = self._io.size
1685 buffer = util.buffer
1692 buffer = util.buffer
1686
1693
1687 l = []
1694 l = []
1688 ladd = l.append
1695 ladd = l.append
1689
1696
1690 if not self._withsparseread:
1697 if not self._withsparseread:
1691 slicedchunks = (revs,)
1698 slicedchunks = (revs,)
1692 else:
1699 else:
1693 slicedchunks = _slicechunk(self, revs)
1700 slicedchunks = _slicechunk(self, revs)
1694
1701
1695 for revschunk in slicedchunks:
1702 for revschunk in slicedchunks:
1696 firstrev = revschunk[0]
1703 firstrev = revschunk[0]
1697 # Skip trailing revisions with empty diff
1704 # Skip trailing revisions with empty diff
1698 for lastrev in revschunk[::-1]:
1705 for lastrev in revschunk[::-1]:
1699 if length(lastrev) != 0:
1706 if length(lastrev) != 0:
1700 break
1707 break
1701
1708
1702 try:
1709 try:
1703 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1710 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1704 except OverflowError:
1711 except OverflowError:
1705 # issue4215 - we can't cache a run of chunks greater than
1712 # issue4215 - we can't cache a run of chunks greater than
1706 # 2G on Windows
1713 # 2G on Windows
1707 return [self._chunk(rev, df=df) for rev in revschunk]
1714 return [self._chunk(rev, df=df) for rev in revschunk]
1708
1715
1709 decomp = self.decompress
1716 decomp = self.decompress
1710 for rev in revschunk:
1717 for rev in revschunk:
1711 chunkstart = start(rev)
1718 chunkstart = start(rev)
1712 if inline:
1719 if inline:
1713 chunkstart += (rev + 1) * iosize
1720 chunkstart += (rev + 1) * iosize
1714 chunklength = length(rev)
1721 chunklength = length(rev)
1715 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1722 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1716
1723
1717 return l
1724 return l
1718
1725
1719 def _chunkclear(self):
1726 def _chunkclear(self):
1720 """Clear the raw chunk cache."""
1727 """Clear the raw chunk cache."""
1721 self._chunkcache = (0, '')
1728 self._chunkcache = (0, '')
1722
1729
1723 def deltaparent(self, rev):
1730 def deltaparent(self, rev):
1724 """return deltaparent of the given revision"""
1731 """return deltaparent of the given revision"""
1725 base = self.index[rev][3]
1732 base = self.index[rev][3]
1726 if base == rev:
1733 if base == rev:
1727 return nullrev
1734 return nullrev
1728 elif self._generaldelta:
1735 elif self._generaldelta:
1729 return base
1736 return base
1730 else:
1737 else:
1731 return rev - 1
1738 return rev - 1
1732
1739
1733 def revdiff(self, rev1, rev2):
1740 def revdiff(self, rev1, rev2):
1734 """return or calculate a delta between two revisions
1741 """return or calculate a delta between two revisions
1735
1742
1736 The delta calculated is in binary form and is intended to be written to
1743 The delta calculated is in binary form and is intended to be written to
1737 revlog data directly. So this function needs raw revision data.
1744 revlog data directly. So this function needs raw revision data.
1738 """
1745 """
1739 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1746 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1740 return bytes(self._chunk(rev2))
1747 return bytes(self._chunk(rev2))
1741
1748
1742 return mdiff.textdiff(self.revision(rev1, raw=True),
1749 return mdiff.textdiff(self.revision(rev1, raw=True),
1743 self.revision(rev2, raw=True))
1750 self.revision(rev2, raw=True))
1744
1751
1745 def revision(self, nodeorrev, _df=None, raw=False):
1752 def revision(self, nodeorrev, _df=None, raw=False):
1746 """return an uncompressed revision of a given node or revision
1753 """return an uncompressed revision of a given node or revision
1747 number.
1754 number.
1748
1755
1749 _df - an existing file handle to read from. (internal-only)
1756 _df - an existing file handle to read from. (internal-only)
1750 raw - an optional argument specifying if the revision data is to be
1757 raw - an optional argument specifying if the revision data is to be
1751 treated as raw data when applying flag transforms. 'raw' should be set
1758 treated as raw data when applying flag transforms. 'raw' should be set
1752 to True when generating changegroups or in debug commands.
1759 to True when generating changegroups or in debug commands.
1753 """
1760 """
1754 if isinstance(nodeorrev, int):
1761 if isinstance(nodeorrev, int):
1755 rev = nodeorrev
1762 rev = nodeorrev
1756 node = self.node(rev)
1763 node = self.node(rev)
1757 else:
1764 else:
1758 node = nodeorrev
1765 node = nodeorrev
1759 rev = None
1766 rev = None
1760
1767
1761 cachedrev = None
1768 cachedrev = None
1762 flags = None
1769 flags = None
1763 rawtext = None
1770 rawtext = None
1764 if node == nullid:
1771 if node == nullid:
1765 return ""
1772 return ""
1766 if self._cache:
1773 if self._cache:
1767 if self._cache[0] == node:
1774 if self._cache[0] == node:
1768 # _cache only stores rawtext
1775 # _cache only stores rawtext
1769 if raw:
1776 if raw:
1770 return self._cache[2]
1777 return self._cache[2]
1771 # duplicated, but good for perf
1778 # duplicated, but good for perf
1772 if rev is None:
1779 if rev is None:
1773 rev = self.rev(node)
1780 rev = self.rev(node)
1774 if flags is None:
1781 if flags is None:
1775 flags = self.flags(rev)
1782 flags = self.flags(rev)
1776 # no extra flags set, no flag processor runs, text = rawtext
1783 # no extra flags set, no flag processor runs, text = rawtext
1777 if flags == REVIDX_DEFAULT_FLAGS:
1784 if flags == REVIDX_DEFAULT_FLAGS:
1778 return self._cache[2]
1785 return self._cache[2]
1779 # rawtext is reusable. need to run flag processor
1786 # rawtext is reusable. need to run flag processor
1780 rawtext = self._cache[2]
1787 rawtext = self._cache[2]
1781
1788
1782 cachedrev = self._cache[1]
1789 cachedrev = self._cache[1]
1783
1790
1784 # look up what we need to read
1791 # look up what we need to read
1785 if rawtext is None:
1792 if rawtext is None:
1786 if rev is None:
1793 if rev is None:
1787 rev = self.rev(node)
1794 rev = self.rev(node)
1788
1795
1789 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1796 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1790 if stopped:
1797 if stopped:
1791 rawtext = self._cache[2]
1798 rawtext = self._cache[2]
1792
1799
1793 # drop cache to save memory
1800 # drop cache to save memory
1794 self._cache = None
1801 self._cache = None
1795
1802
1796 bins = self._chunks(chain, df=_df)
1803 bins = self._chunks(chain, df=_df)
1797 if rawtext is None:
1804 if rawtext is None:
1798 rawtext = bytes(bins[0])
1805 rawtext = bytes(bins[0])
1799 bins = bins[1:]
1806 bins = bins[1:]
1800
1807
1801 rawtext = mdiff.patches(rawtext, bins)
1808 rawtext = mdiff.patches(rawtext, bins)
1802 self._cache = (node, rev, rawtext)
1809 self._cache = (node, rev, rawtext)
1803
1810
1804 if flags is None:
1811 if flags is None:
1805 if rev is None:
1812 if rev is None:
1806 rev = self.rev(node)
1813 rev = self.rev(node)
1807 flags = self.flags(rev)
1814 flags = self.flags(rev)
1808
1815
1809 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1816 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1810 if validatehash:
1817 if validatehash:
1811 self.checkhash(text, node, rev=rev)
1818 self.checkhash(text, node, rev=rev)
1812
1819
1813 return text
1820 return text
1814
1821
1815 def hash(self, text, p1, p2):
1822 def hash(self, text, p1, p2):
1816 """Compute a node hash.
1823 """Compute a node hash.
1817
1824
1818 Available as a function so that subclasses can replace the hash
1825 Available as a function so that subclasses can replace the hash
1819 as needed.
1826 as needed.
1820 """
1827 """
1821 return hash(text, p1, p2)
1828 return hash(text, p1, p2)
1822
1829
1823 def _processflags(self, text, flags, operation, raw=False):
1830 def _processflags(self, text, flags, operation, raw=False):
1824 """Inspect revision data flags and applies transforms defined by
1831 """Inspect revision data flags and applies transforms defined by
1825 registered flag processors.
1832 registered flag processors.
1826
1833
1827 ``text`` - the revision data to process
1834 ``text`` - the revision data to process
1828 ``flags`` - the revision flags
1835 ``flags`` - the revision flags
1829 ``operation`` - the operation being performed (read or write)
1836 ``operation`` - the operation being performed (read or write)
1830 ``raw`` - an optional argument describing if the raw transform should be
1837 ``raw`` - an optional argument describing if the raw transform should be
1831 applied.
1838 applied.
1832
1839
1833 This method processes the flags in the order (or reverse order if
1840 This method processes the flags in the order (or reverse order if
1834 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1841 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1835 flag processors registered for present flags. The order of flags defined
1842 flag processors registered for present flags. The order of flags defined
1836 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1843 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1837
1844
1838 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1845 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1839 processed text and ``validatehash`` is a bool indicating whether the
1846 processed text and ``validatehash`` is a bool indicating whether the
1840 returned text should be checked for hash integrity.
1847 returned text should be checked for hash integrity.
1841
1848
1842 Note: If the ``raw`` argument is set, it has precedence over the
1849 Note: If the ``raw`` argument is set, it has precedence over the
1843 operation and will only update the value of ``validatehash``.
1850 operation and will only update the value of ``validatehash``.
1844 """
1851 """
1845 # fast path: no flag processors will run
1852 # fast path: no flag processors will run
1846 if flags == 0:
1853 if flags == 0:
1847 return text, True
1854 return text, True
1848 if not operation in ('read', 'write'):
1855 if not operation in ('read', 'write'):
1849 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1856 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1850 # Check all flags are known.
1857 # Check all flags are known.
1851 if flags & ~REVIDX_KNOWN_FLAGS:
1858 if flags & ~REVIDX_KNOWN_FLAGS:
1852 raise RevlogError(_("incompatible revision flag '%#x'") %
1859 raise RevlogError(_("incompatible revision flag '%#x'") %
1853 (flags & ~REVIDX_KNOWN_FLAGS))
1860 (flags & ~REVIDX_KNOWN_FLAGS))
1854 validatehash = True
1861 validatehash = True
1855 # Depending on the operation (read or write), the order might be
1862 # Depending on the operation (read or write), the order might be
1856 # reversed due to non-commutative transforms.
1863 # reversed due to non-commutative transforms.
1857 orderedflags = REVIDX_FLAGS_ORDER
1864 orderedflags = REVIDX_FLAGS_ORDER
1858 if operation == 'write':
1865 if operation == 'write':
1859 orderedflags = reversed(orderedflags)
1866 orderedflags = reversed(orderedflags)
1860
1867
1861 for flag in orderedflags:
1868 for flag in orderedflags:
1862 # If a flagprocessor has been registered for a known flag, apply the
1869 # If a flagprocessor has been registered for a known flag, apply the
1863 # related operation transform and update result tuple.
1870 # related operation transform and update result tuple.
1864 if flag & flags:
1871 if flag & flags:
1865 vhash = True
1872 vhash = True
1866
1873
1867 if flag not in _flagprocessors:
1874 if flag not in _flagprocessors:
1868 message = _("missing processor for flag '%#x'") % (flag)
1875 message = _("missing processor for flag '%#x'") % (flag)
1869 raise RevlogError(message)
1876 raise RevlogError(message)
1870
1877
1871 processor = _flagprocessors[flag]
1878 processor = _flagprocessors[flag]
1872 if processor is not None:
1879 if processor is not None:
1873 readtransform, writetransform, rawtransform = processor
1880 readtransform, writetransform, rawtransform = processor
1874
1881
1875 if raw:
1882 if raw:
1876 vhash = rawtransform(self, text)
1883 vhash = rawtransform(self, text)
1877 elif operation == 'read':
1884 elif operation == 'read':
1878 text, vhash = readtransform(self, text)
1885 text, vhash = readtransform(self, text)
1879 else: # write operation
1886 else: # write operation
1880 text, vhash = writetransform(self, text)
1887 text, vhash = writetransform(self, text)
1881 validatehash = validatehash and vhash
1888 validatehash = validatehash and vhash
1882
1889
1883 return text, validatehash
1890 return text, validatehash
1884
1891
1885 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1892 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1886 """Check node hash integrity.
1893 """Check node hash integrity.
1887
1894
1888 Available as a function so that subclasses can extend hash mismatch
1895 Available as a function so that subclasses can extend hash mismatch
1889 behaviors as needed.
1896 behaviors as needed.
1890 """
1897 """
1891 try:
1898 try:
1892 if p1 is None and p2 is None:
1899 if p1 is None and p2 is None:
1893 p1, p2 = self.parents(node)
1900 p1, p2 = self.parents(node)
1894 if node != self.hash(text, p1, p2):
1901 if node != self.hash(text, p1, p2):
1895 revornode = rev
1902 revornode = rev
1896 if revornode is None:
1903 if revornode is None:
1897 revornode = templatefilters.short(hex(node))
1904 revornode = templatefilters.short(hex(node))
1898 raise RevlogError(_("integrity check failed on %s:%s")
1905 raise RevlogError(_("integrity check failed on %s:%s")
1899 % (self.indexfile, pycompat.bytestr(revornode)))
1906 % (self.indexfile, pycompat.bytestr(revornode)))
1900 except RevlogError:
1907 except RevlogError:
1901 if self._censorable and _censoredtext(text):
1908 if self._censorable and _censoredtext(text):
1902 raise error.CensoredNodeError(self.indexfile, node, text)
1909 raise error.CensoredNodeError(self.indexfile, node, text)
1903 raise
1910 raise
1904
1911
1905 def _enforceinlinesize(self, tr, fp=None):
1912 def _enforceinlinesize(self, tr, fp=None):
1906 """Check if the revlog is too big for inline and convert if so.
1913 """Check if the revlog is too big for inline and convert if so.
1907
1914
1908 This should be called after revisions are added to the revlog. If the
1915 This should be called after revisions are added to the revlog. If the
1909 revlog has grown too large to be an inline revlog, it will convert it
1916 revlog has grown too large to be an inline revlog, it will convert it
1910 to use multiple index and data files.
1917 to use multiple index and data files.
1911 """
1918 """
1912 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1919 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1913 return
1920 return
1914
1921
1915 trinfo = tr.find(self.indexfile)
1922 trinfo = tr.find(self.indexfile)
1916 if trinfo is None:
1923 if trinfo is None:
1917 raise RevlogError(_("%s not found in the transaction")
1924 raise RevlogError(_("%s not found in the transaction")
1918 % self.indexfile)
1925 % self.indexfile)
1919
1926
1920 trindex = trinfo[2]
1927 trindex = trinfo[2]
1921 if trindex is not None:
1928 if trindex is not None:
1922 dataoff = self.start(trindex)
1929 dataoff = self.start(trindex)
1923 else:
1930 else:
1924 # revlog was stripped at start of transaction, use all leftover data
1931 # revlog was stripped at start of transaction, use all leftover data
1925 trindex = len(self) - 1
1932 trindex = len(self) - 1
1926 dataoff = self.end(-2)
1933 dataoff = self.end(-2)
1927
1934
1928 tr.add(self.datafile, dataoff)
1935 tr.add(self.datafile, dataoff)
1929
1936
1930 if fp:
1937 if fp:
1931 fp.flush()
1938 fp.flush()
1932 fp.close()
1939 fp.close()
1933
1940
1934 with self._datafp('w') as df:
1941 with self._datafp('w') as df:
1935 for r in self:
1942 for r in self:
1936 df.write(self._getsegmentforrevs(r, r)[1])
1943 df.write(self._getsegmentforrevs(r, r)[1])
1937
1944
1938 with self._indexfp('w') as fp:
1945 with self._indexfp('w') as fp:
1939 self.version &= ~FLAG_INLINE_DATA
1946 self.version &= ~FLAG_INLINE_DATA
1940 self._inline = False
1947 self._inline = False
1941 io = self._io
1948 io = self._io
1942 for i in self:
1949 for i in self:
1943 e = io.packentry(self.index[i], self.node, self.version, i)
1950 e = io.packentry(self.index[i], self.node, self.version, i)
1944 fp.write(e)
1951 fp.write(e)
1945
1952
1946 # the temp file replace the real index when we exit the context
1953 # the temp file replace the real index when we exit the context
1947 # manager
1954 # manager
1948
1955
1949 tr.replace(self.indexfile, trindex * self._io.size)
1956 tr.replace(self.indexfile, trindex * self._io.size)
1950 self._chunkclear()
1957 self._chunkclear()
1951
1958
1952 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1959 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1953 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1960 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1954 """add a revision to the log
1961 """add a revision to the log
1955
1962
1956 text - the revision data to add
1963 text - the revision data to add
1957 transaction - the transaction object used for rollback
1964 transaction - the transaction object used for rollback
1958 link - the linkrev data to add
1965 link - the linkrev data to add
1959 p1, p2 - the parent nodeids of the revision
1966 p1, p2 - the parent nodeids of the revision
1960 cachedelta - an optional precomputed delta
1967 cachedelta - an optional precomputed delta
1961 node - nodeid of revision; typically node is not specified, and it is
1968 node - nodeid of revision; typically node is not specified, and it is
1962 computed by default as hash(text, p1, p2), however subclasses might
1969 computed by default as hash(text, p1, p2), however subclasses might
1963 use different hashing method (and override checkhash() in such case)
1970 use different hashing method (and override checkhash() in such case)
1964 flags - the known flags to set on the revision
1971 flags - the known flags to set on the revision
1965 deltacomputer - an optional _deltacomputer instance shared between
1972 deltacomputer - an optional _deltacomputer instance shared between
1966 multiple calls
1973 multiple calls
1967 """
1974 """
1968 if link == nullrev:
1975 if link == nullrev:
1969 raise RevlogError(_("attempted to add linkrev -1 to %s")
1976 raise RevlogError(_("attempted to add linkrev -1 to %s")
1970 % self.indexfile)
1977 % self.indexfile)
1971
1978
1972 if flags:
1979 if flags:
1973 node = node or self.hash(text, p1, p2)
1980 node = node or self.hash(text, p1, p2)
1974
1981
1975 rawtext, validatehash = self._processflags(text, flags, 'write')
1982 rawtext, validatehash = self._processflags(text, flags, 'write')
1976
1983
1977 # If the flag processor modifies the revision data, ignore any provided
1984 # If the flag processor modifies the revision data, ignore any provided
1978 # cachedelta.
1985 # cachedelta.
1979 if rawtext != text:
1986 if rawtext != text:
1980 cachedelta = None
1987 cachedelta = None
1981
1988
1982 if len(rawtext) > _maxentrysize:
1989 if len(rawtext) > _maxentrysize:
1983 raise RevlogError(
1990 raise RevlogError(
1984 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1991 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1985 % (self.indexfile, len(rawtext)))
1992 % (self.indexfile, len(rawtext)))
1986
1993
1987 node = node or self.hash(rawtext, p1, p2)
1994 node = node or self.hash(rawtext, p1, p2)
1988 if node in self.nodemap:
1995 if node in self.nodemap:
1989 return node
1996 return node
1990
1997
1991 if validatehash:
1998 if validatehash:
1992 self.checkhash(rawtext, node, p1=p1, p2=p2)
1999 self.checkhash(rawtext, node, p1=p1, p2=p2)
1993
2000
1994 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2001 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1995 flags, cachedelta=cachedelta,
2002 flags, cachedelta=cachedelta,
1996 deltacomputer=deltacomputer)
2003 deltacomputer=deltacomputer)
1997
2004
1998 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2005 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1999 cachedelta=None, deltacomputer=None):
2006 cachedelta=None, deltacomputer=None):
2000 """add a raw revision with known flags, node and parents
2007 """add a raw revision with known flags, node and parents
2001 useful when reusing a revision not stored in this revlog (ex: received
2008 useful when reusing a revision not stored in this revlog (ex: received
2002 over wire, or read from an external bundle).
2009 over wire, or read from an external bundle).
2003 """
2010 """
2004 dfh = None
2011 dfh = None
2005 if not self._inline:
2012 if not self._inline:
2006 dfh = self._datafp("a+")
2013 dfh = self._datafp("a+")
2007 ifh = self._indexfp("a+")
2014 ifh = self._indexfp("a+")
2008 try:
2015 try:
2009 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2016 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2010 flags, cachedelta, ifh, dfh,
2017 flags, cachedelta, ifh, dfh,
2011 deltacomputer=deltacomputer)
2018 deltacomputer=deltacomputer)
2012 finally:
2019 finally:
2013 if dfh:
2020 if dfh:
2014 dfh.close()
2021 dfh.close()
2015 ifh.close()
2022 ifh.close()
2016
2023
2017 def compress(self, data):
2024 def compress(self, data):
2018 """Generate a possibly-compressed representation of data."""
2025 """Generate a possibly-compressed representation of data."""
2019 if not data:
2026 if not data:
2020 return '', data
2027 return '', data
2021
2028
2022 compressed = self._compressor.compress(data)
2029 compressed = self._compressor.compress(data)
2023
2030
2024 if compressed:
2031 if compressed:
2025 # The revlog compressor added the header in the returned data.
2032 # The revlog compressor added the header in the returned data.
2026 return '', compressed
2033 return '', compressed
2027
2034
2028 if data[0:1] == '\0':
2035 if data[0:1] == '\0':
2029 return '', data
2036 return '', data
2030 return 'u', data
2037 return 'u', data
2031
2038
2032 def decompress(self, data):
2039 def decompress(self, data):
2033 """Decompress a revlog chunk.
2040 """Decompress a revlog chunk.
2034
2041
2035 The chunk is expected to begin with a header identifying the
2042 The chunk is expected to begin with a header identifying the
2036 format type so it can be routed to an appropriate decompressor.
2043 format type so it can be routed to an appropriate decompressor.
2037 """
2044 """
2038 if not data:
2045 if not data:
2039 return data
2046 return data
2040
2047
2041 # Revlogs are read much more frequently than they are written and many
2048 # Revlogs are read much more frequently than they are written and many
2042 # chunks only take microseconds to decompress, so performance is
2049 # chunks only take microseconds to decompress, so performance is
2043 # important here.
2050 # important here.
2044 #
2051 #
2045 # We can make a few assumptions about revlogs:
2052 # We can make a few assumptions about revlogs:
2046 #
2053 #
2047 # 1) the majority of chunks will be compressed (as opposed to inline
2054 # 1) the majority of chunks will be compressed (as opposed to inline
2048 # raw data).
2055 # raw data).
2049 # 2) decompressing *any* data will likely by at least 10x slower than
2056 # 2) decompressing *any* data will likely by at least 10x slower than
2050 # returning raw inline data.
2057 # returning raw inline data.
2051 # 3) we want to prioritize common and officially supported compression
2058 # 3) we want to prioritize common and officially supported compression
2052 # engines
2059 # engines
2053 #
2060 #
2054 # It follows that we want to optimize for "decompress compressed data
2061 # It follows that we want to optimize for "decompress compressed data
2055 # when encoded with common and officially supported compression engines"
2062 # when encoded with common and officially supported compression engines"
2056 # case over "raw data" and "data encoded by less common or non-official
2063 # case over "raw data" and "data encoded by less common or non-official
2057 # compression engines." That is why we have the inline lookup first
2064 # compression engines." That is why we have the inline lookup first
2058 # followed by the compengines lookup.
2065 # followed by the compengines lookup.
2059 #
2066 #
2060 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2067 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2061 # compressed chunks. And this matters for changelog and manifest reads.
2068 # compressed chunks. And this matters for changelog and manifest reads.
2062 t = data[0:1]
2069 t = data[0:1]
2063
2070
2064 if t == 'x':
2071 if t == 'x':
2065 try:
2072 try:
2066 return _zlibdecompress(data)
2073 return _zlibdecompress(data)
2067 except zlib.error as e:
2074 except zlib.error as e:
2068 raise RevlogError(_('revlog decompress error: %s') %
2075 raise RevlogError(_('revlog decompress error: %s') %
2069 stringutil.forcebytestr(e))
2076 stringutil.forcebytestr(e))
2070 # '\0' is more common than 'u' so it goes first.
2077 # '\0' is more common than 'u' so it goes first.
2071 elif t == '\0':
2078 elif t == '\0':
2072 return data
2079 return data
2073 elif t == 'u':
2080 elif t == 'u':
2074 return util.buffer(data, 1)
2081 return util.buffer(data, 1)
2075
2082
2076 try:
2083 try:
2077 compressor = self._decompressors[t]
2084 compressor = self._decompressors[t]
2078 except KeyError:
2085 except KeyError:
2079 try:
2086 try:
2080 engine = util.compengines.forrevlogheader(t)
2087 engine = util.compengines.forrevlogheader(t)
2081 compressor = engine.revlogcompressor()
2088 compressor = engine.revlogcompressor()
2082 self._decompressors[t] = compressor
2089 self._decompressors[t] = compressor
2083 except KeyError:
2090 except KeyError:
2084 raise RevlogError(_('unknown compression type %r') % t)
2091 raise RevlogError(_('unknown compression type %r') % t)
2085
2092
2086 return compressor.decompress(data)
2093 return compressor.decompress(data)
2087
2094
2088 def _isgooddeltainfo(self, d, textlen):
2095 def _isgooddeltainfo(self, d, textlen):
2089 """Returns True if the given delta is good. Good means that it is within
2096 """Returns True if the given delta is good. Good means that it is within
2090 the disk span, disk size, and chain length bounds that we know to be
2097 the disk span, disk size, and chain length bounds that we know to be
2091 performant."""
2098 performant."""
2092 if d is None:
2099 if d is None:
2093 return False
2100 return False
2094
2101
2095 # - 'd.distance' is the distance from the base revision -- bounding it
2102 # - 'd.distance' is the distance from the base revision -- bounding it
2096 # limits the amount of I/O we need to do.
2103 # limits the amount of I/O we need to do.
2097 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2104 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2098 # need to apply -- bounding it limits the amount of CPU we consume.
2105 # need to apply -- bounding it limits the amount of CPU we consume.
2099
2106
2100 defaultmax = textlen * 4
2107 defaultmax = textlen * 4
2101 maxdist = self._maxdeltachainspan
2108 maxdist = self._maxdeltachainspan
2102 if not maxdist:
2109 if not maxdist:
2103 maxdist = d.distance # ensure the conditional pass
2110 maxdist = d.distance # ensure the conditional pass
2104 maxdist = max(maxdist, defaultmax)
2111 maxdist = max(maxdist, defaultmax)
2105 if (d.distance > maxdist or d.deltalen > textlen or
2112 if (d.distance > maxdist or d.deltalen > textlen or
2106 d.compresseddeltalen > textlen * 2 or
2113 d.compresseddeltalen > textlen * 2 or
2107 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2114 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2108 return False
2115 return False
2109
2116
2110 return True
2117 return True
2111
2118
2112 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2119 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2113 cachedelta, ifh, dfh, alwayscache=False,
2120 cachedelta, ifh, dfh, alwayscache=False,
2114 deltacomputer=None):
2121 deltacomputer=None):
2115 """internal function to add revisions to the log
2122 """internal function to add revisions to the log
2116
2123
2117 see addrevision for argument descriptions.
2124 see addrevision for argument descriptions.
2118
2125
2119 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2126 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2120
2127
2121 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2128 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2122 be used.
2129 be used.
2123
2130
2124 invariants:
2131 invariants:
2125 - rawtext is optional (can be None); if not set, cachedelta must be set.
2132 - rawtext is optional (can be None); if not set, cachedelta must be set.
2126 if both are set, they must correspond to each other.
2133 if both are set, they must correspond to each other.
2127 """
2134 """
2128 if node == nullid:
2135 if node == nullid:
2129 raise RevlogError(_("%s: attempt to add null revision") %
2136 raise RevlogError(_("%s: attempt to add null revision") %
2130 (self.indexfile))
2137 (self.indexfile))
2131 if node == wdirid or node in wdirfilenodeids:
2138 if node == wdirid or node in wdirfilenodeids:
2132 raise RevlogError(_("%s: attempt to add wdir revision") %
2139 raise RevlogError(_("%s: attempt to add wdir revision") %
2133 (self.indexfile))
2140 (self.indexfile))
2134
2141
2135 if self._inline:
2142 if self._inline:
2136 fh = ifh
2143 fh = ifh
2137 else:
2144 else:
2138 fh = dfh
2145 fh = dfh
2139
2146
2140 btext = [rawtext]
2147 btext = [rawtext]
2141
2148
2142 curr = len(self)
2149 curr = len(self)
2143 prev = curr - 1
2150 prev = curr - 1
2144 offset = self.end(prev)
2151 offset = self.end(prev)
2145 p1r, p2r = self.rev(p1), self.rev(p2)
2152 p1r, p2r = self.rev(p1), self.rev(p2)
2146
2153
2147 # full versions are inserted when the needed deltas
2154 # full versions are inserted when the needed deltas
2148 # become comparable to the uncompressed text
2155 # become comparable to the uncompressed text
2149 if rawtext is None:
2156 if rawtext is None:
2150 # need rawtext size, before changed by flag processors, which is
2157 # need rawtext size, before changed by flag processors, which is
2151 # the non-raw size. use revlog explicitly to avoid filelog's extra
2158 # the non-raw size. use revlog explicitly to avoid filelog's extra
2152 # logic that might remove metadata size.
2159 # logic that might remove metadata size.
2153 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2160 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2154 cachedelta[1])
2161 cachedelta[1])
2155 else:
2162 else:
2156 textlen = len(rawtext)
2163 textlen = len(rawtext)
2157
2164
2158 if deltacomputer is None:
2165 if deltacomputer is None:
2159 deltacomputer = _deltacomputer(self)
2166 deltacomputer = _deltacomputer(self)
2160
2167
2161 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2168 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2162
2169
2163 # no delta for flag processor revision (see "candelta" for why)
2170 # no delta for flag processor revision (see "candelta" for why)
2164 # not calling candelta since only one revision needs test, also to
2171 # not calling candelta since only one revision needs test, also to
2165 # avoid overhead fetching flags again.
2172 # avoid overhead fetching flags again.
2166 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2173 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2167 deltainfo = None
2174 deltainfo = None
2168 else:
2175 else:
2169 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2176 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2170
2177
2171 if deltainfo is not None:
2178 if deltainfo is not None:
2172 base = deltainfo.base
2179 base = deltainfo.base
2173 chainbase = deltainfo.chainbase
2180 chainbase = deltainfo.chainbase
2174 data = deltainfo.data
2181 data = deltainfo.data
2175 l = deltainfo.deltalen
2182 l = deltainfo.deltalen
2176 else:
2183 else:
2177 rawtext = deltacomputer.buildtext(revinfo, fh)
2184 rawtext = deltacomputer.buildtext(revinfo, fh)
2178 data = self.compress(rawtext)
2185 data = self.compress(rawtext)
2179 l = len(data[1]) + len(data[0])
2186 l = len(data[1]) + len(data[0])
2180 base = chainbase = curr
2187 base = chainbase = curr
2181
2188
2182 e = (offset_type(offset, flags), l, textlen,
2189 e = (offset_type(offset, flags), l, textlen,
2183 base, link, p1r, p2r, node)
2190 base, link, p1r, p2r, node)
2184 self.index.insert(-1, e)
2191 self.index.insert(-1, e)
2185 self.nodemap[node] = curr
2192 self.nodemap[node] = curr
2186
2193
2187 entry = self._io.packentry(e, self.node, self.version, curr)
2194 entry = self._io.packentry(e, self.node, self.version, curr)
2188 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2195 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2189
2196
2190 if alwayscache and rawtext is None:
2197 if alwayscache and rawtext is None:
2191 rawtext = deltacomputer._buildtext(revinfo, fh)
2198 rawtext = deltacomputer._buildtext(revinfo, fh)
2192
2199
2193 if type(rawtext) == bytes: # only accept immutable objects
2200 if type(rawtext) == bytes: # only accept immutable objects
2194 self._cache = (node, curr, rawtext)
2201 self._cache = (node, curr, rawtext)
2195 self._chainbasecache[curr] = chainbase
2202 self._chainbasecache[curr] = chainbase
2196 return node
2203 return node
2197
2204
2198 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2205 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2199 # Files opened in a+ mode have inconsistent behavior on various
2206 # Files opened in a+ mode have inconsistent behavior on various
2200 # platforms. Windows requires that a file positioning call be made
2207 # platforms. Windows requires that a file positioning call be made
2201 # when the file handle transitions between reads and writes. See
2208 # when the file handle transitions between reads and writes. See
2202 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2209 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2203 # platforms, Python or the platform itself can be buggy. Some versions
2210 # platforms, Python or the platform itself can be buggy. Some versions
2204 # of Solaris have been observed to not append at the end of the file
2211 # of Solaris have been observed to not append at the end of the file
2205 # if the file was seeked to before the end. See issue4943 for more.
2212 # if the file was seeked to before the end. See issue4943 for more.
2206 #
2213 #
2207 # We work around this issue by inserting a seek() before writing.
2214 # We work around this issue by inserting a seek() before writing.
2208 # Note: This is likely not necessary on Python 3.
2215 # Note: This is likely not necessary on Python 3.
2209 ifh.seek(0, os.SEEK_END)
2216 ifh.seek(0, os.SEEK_END)
2210 if dfh:
2217 if dfh:
2211 dfh.seek(0, os.SEEK_END)
2218 dfh.seek(0, os.SEEK_END)
2212
2219
2213 curr = len(self) - 1
2220 curr = len(self) - 1
2214 if not self._inline:
2221 if not self._inline:
2215 transaction.add(self.datafile, offset)
2222 transaction.add(self.datafile, offset)
2216 transaction.add(self.indexfile, curr * len(entry))
2223 transaction.add(self.indexfile, curr * len(entry))
2217 if data[0]:
2224 if data[0]:
2218 dfh.write(data[0])
2225 dfh.write(data[0])
2219 dfh.write(data[1])
2226 dfh.write(data[1])
2220 ifh.write(entry)
2227 ifh.write(entry)
2221 else:
2228 else:
2222 offset += curr * self._io.size
2229 offset += curr * self._io.size
2223 transaction.add(self.indexfile, offset, curr)
2230 transaction.add(self.indexfile, offset, curr)
2224 ifh.write(entry)
2231 ifh.write(entry)
2225 ifh.write(data[0])
2232 ifh.write(data[0])
2226 ifh.write(data[1])
2233 ifh.write(data[1])
2227 self._enforceinlinesize(transaction, ifh)
2234 self._enforceinlinesize(transaction, ifh)
2228
2235
2229 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2236 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2230 """
2237 """
2231 add a delta group
2238 add a delta group
2232
2239
2233 given a set of deltas, add them to the revision log. the
2240 given a set of deltas, add them to the revision log. the
2234 first delta is against its parent, which should be in our
2241 first delta is against its parent, which should be in our
2235 log, the rest are against the previous delta.
2242 log, the rest are against the previous delta.
2236
2243
2237 If ``addrevisioncb`` is defined, it will be called with arguments of
2244 If ``addrevisioncb`` is defined, it will be called with arguments of
2238 this revlog and the node that was added.
2245 this revlog and the node that was added.
2239 """
2246 """
2240
2247
2241 nodes = []
2248 nodes = []
2242
2249
2243 r = len(self)
2250 r = len(self)
2244 end = 0
2251 end = 0
2245 if r:
2252 if r:
2246 end = self.end(r - 1)
2253 end = self.end(r - 1)
2247 ifh = self._indexfp("a+")
2254 ifh = self._indexfp("a+")
2248 isize = r * self._io.size
2255 isize = r * self._io.size
2249 if self._inline:
2256 if self._inline:
2250 transaction.add(self.indexfile, end + isize, r)
2257 transaction.add(self.indexfile, end + isize, r)
2251 dfh = None
2258 dfh = None
2252 else:
2259 else:
2253 transaction.add(self.indexfile, isize, r)
2260 transaction.add(self.indexfile, isize, r)
2254 transaction.add(self.datafile, end)
2261 transaction.add(self.datafile, end)
2255 dfh = self._datafp("a+")
2262 dfh = self._datafp("a+")
2256 def flush():
2263 def flush():
2257 if dfh:
2264 if dfh:
2258 dfh.flush()
2265 dfh.flush()
2259 ifh.flush()
2266 ifh.flush()
2260 try:
2267 try:
2261 deltacomputer = _deltacomputer(self)
2268 deltacomputer = _deltacomputer(self)
2262 # loop through our set of deltas
2269 # loop through our set of deltas
2263 for data in deltas:
2270 for data in deltas:
2264 node, p1, p2, linknode, deltabase, delta, flags = data
2271 node, p1, p2, linknode, deltabase, delta, flags = data
2265 link = linkmapper(linknode)
2272 link = linkmapper(linknode)
2266 flags = flags or REVIDX_DEFAULT_FLAGS
2273 flags = flags or REVIDX_DEFAULT_FLAGS
2267
2274
2268 nodes.append(node)
2275 nodes.append(node)
2269
2276
2270 if node in self.nodemap:
2277 if node in self.nodemap:
2271 # this can happen if two branches make the same change
2278 # this can happen if two branches make the same change
2272 continue
2279 continue
2273
2280
2274 for p in (p1, p2):
2281 for p in (p1, p2):
2275 if p not in self.nodemap:
2282 if p not in self.nodemap:
2276 raise LookupError(p, self.indexfile,
2283 raise LookupError(p, self.indexfile,
2277 _('unknown parent'))
2284 _('unknown parent'))
2278
2285
2279 if deltabase not in self.nodemap:
2286 if deltabase not in self.nodemap:
2280 raise LookupError(deltabase, self.indexfile,
2287 raise LookupError(deltabase, self.indexfile,
2281 _('unknown delta base'))
2288 _('unknown delta base'))
2282
2289
2283 baserev = self.rev(deltabase)
2290 baserev = self.rev(deltabase)
2284
2291
2285 if baserev != nullrev and self.iscensored(baserev):
2292 if baserev != nullrev and self.iscensored(baserev):
2286 # if base is censored, delta must be full replacement in a
2293 # if base is censored, delta must be full replacement in a
2287 # single patch operation
2294 # single patch operation
2288 hlen = struct.calcsize(">lll")
2295 hlen = struct.calcsize(">lll")
2289 oldlen = self.rawsize(baserev)
2296 oldlen = self.rawsize(baserev)
2290 newlen = len(delta) - hlen
2297 newlen = len(delta) - hlen
2291 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2298 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2292 raise error.CensoredBaseError(self.indexfile,
2299 raise error.CensoredBaseError(self.indexfile,
2293 self.node(baserev))
2300 self.node(baserev))
2294
2301
2295 if not flags and self._peek_iscensored(baserev, delta, flush):
2302 if not flags and self._peek_iscensored(baserev, delta, flush):
2296 flags |= REVIDX_ISCENSORED
2303 flags |= REVIDX_ISCENSORED
2297
2304
2298 # We assume consumers of addrevisioncb will want to retrieve
2305 # We assume consumers of addrevisioncb will want to retrieve
2299 # the added revision, which will require a call to
2306 # the added revision, which will require a call to
2300 # revision(). revision() will fast path if there is a cache
2307 # revision(). revision() will fast path if there is a cache
2301 # hit. So, we tell _addrevision() to always cache in this case.
2308 # hit. So, we tell _addrevision() to always cache in this case.
2302 # We're only using addgroup() in the context of changegroup
2309 # We're only using addgroup() in the context of changegroup
2303 # generation so the revision data can always be handled as raw
2310 # generation so the revision data can always be handled as raw
2304 # by the flagprocessor.
2311 # by the flagprocessor.
2305 self._addrevision(node, None, transaction, link,
2312 self._addrevision(node, None, transaction, link,
2306 p1, p2, flags, (baserev, delta),
2313 p1, p2, flags, (baserev, delta),
2307 ifh, dfh,
2314 ifh, dfh,
2308 alwayscache=bool(addrevisioncb),
2315 alwayscache=bool(addrevisioncb),
2309 deltacomputer=deltacomputer)
2316 deltacomputer=deltacomputer)
2310
2317
2311 if addrevisioncb:
2318 if addrevisioncb:
2312 addrevisioncb(self, node)
2319 addrevisioncb(self, node)
2313
2320
2314 if not dfh and not self._inline:
2321 if not dfh and not self._inline:
2315 # addrevision switched from inline to conventional
2322 # addrevision switched from inline to conventional
2316 # reopen the index
2323 # reopen the index
2317 ifh.close()
2324 ifh.close()
2318 dfh = self._datafp("a+")
2325 dfh = self._datafp("a+")
2319 ifh = self._indexfp("a+")
2326 ifh = self._indexfp("a+")
2320 finally:
2327 finally:
2321 if dfh:
2328 if dfh:
2322 dfh.close()
2329 dfh.close()
2323 ifh.close()
2330 ifh.close()
2324
2331
2325 return nodes
2332 return nodes
2326
2333
2327 def iscensored(self, rev):
2334 def iscensored(self, rev):
2328 """Check if a file revision is censored."""
2335 """Check if a file revision is censored."""
2329 if not self._censorable:
2336 if not self._censorable:
2330 return False
2337 return False
2331
2338
2332 return self.flags(rev) & REVIDX_ISCENSORED
2339 return self.flags(rev) & REVIDX_ISCENSORED
2333
2340
2334 def _peek_iscensored(self, baserev, delta, flush):
2341 def _peek_iscensored(self, baserev, delta, flush):
2335 """Quickly check if a delta produces a censored revision."""
2342 """Quickly check if a delta produces a censored revision."""
2336 if not self._censorable:
2343 if not self._censorable:
2337 return False
2344 return False
2338
2345
2339 # Fragile heuristic: unless new file meta keys are added alphabetically
2346 # Fragile heuristic: unless new file meta keys are added alphabetically
2340 # preceding "censored", all censored revisions are prefixed by
2347 # preceding "censored", all censored revisions are prefixed by
2341 # "\1\ncensored:". A delta producing such a censored revision must be a
2348 # "\1\ncensored:". A delta producing such a censored revision must be a
2342 # full-replacement delta, so we inspect the first and only patch in the
2349 # full-replacement delta, so we inspect the first and only patch in the
2343 # delta for this prefix.
2350 # delta for this prefix.
2344 hlen = struct.calcsize(">lll")
2351 hlen = struct.calcsize(">lll")
2345 if len(delta) <= hlen:
2352 if len(delta) <= hlen:
2346 return False
2353 return False
2347
2354
2348 oldlen = self.rawsize(baserev)
2355 oldlen = self.rawsize(baserev)
2349 newlen = len(delta) - hlen
2356 newlen = len(delta) - hlen
2350 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2357 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2351 return False
2358 return False
2352
2359
2353 add = "\1\ncensored:"
2360 add = "\1\ncensored:"
2354 addlen = len(add)
2361 addlen = len(add)
2355 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2362 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2356
2363
2357 def getstrippoint(self, minlink):
2364 def getstrippoint(self, minlink):
2358 """find the minimum rev that must be stripped to strip the linkrev
2365 """find the minimum rev that must be stripped to strip the linkrev
2359
2366
2360 Returns a tuple containing the minimum rev and a set of all revs that
2367 Returns a tuple containing the minimum rev and a set of all revs that
2361 have linkrevs that will be broken by this strip.
2368 have linkrevs that will be broken by this strip.
2362 """
2369 """
2363 brokenrevs = set()
2370 brokenrevs = set()
2364 strippoint = len(self)
2371 strippoint = len(self)
2365
2372
2366 heads = {}
2373 heads = {}
2367 futurelargelinkrevs = set()
2374 futurelargelinkrevs = set()
2368 for head in self.headrevs():
2375 for head in self.headrevs():
2369 headlinkrev = self.linkrev(head)
2376 headlinkrev = self.linkrev(head)
2370 heads[head] = headlinkrev
2377 heads[head] = headlinkrev
2371 if headlinkrev >= minlink:
2378 if headlinkrev >= minlink:
2372 futurelargelinkrevs.add(headlinkrev)
2379 futurelargelinkrevs.add(headlinkrev)
2373
2380
2374 # This algorithm involves walking down the rev graph, starting at the
2381 # This algorithm involves walking down the rev graph, starting at the
2375 # heads. Since the revs are topologically sorted according to linkrev,
2382 # heads. Since the revs are topologically sorted according to linkrev,
2376 # once all head linkrevs are below the minlink, we know there are
2383 # once all head linkrevs are below the minlink, we know there are
2377 # no more revs that could have a linkrev greater than minlink.
2384 # no more revs that could have a linkrev greater than minlink.
2378 # So we can stop walking.
2385 # So we can stop walking.
2379 while futurelargelinkrevs:
2386 while futurelargelinkrevs:
2380 strippoint -= 1
2387 strippoint -= 1
2381 linkrev = heads.pop(strippoint)
2388 linkrev = heads.pop(strippoint)
2382
2389
2383 if linkrev < minlink:
2390 if linkrev < minlink:
2384 brokenrevs.add(strippoint)
2391 brokenrevs.add(strippoint)
2385 else:
2392 else:
2386 futurelargelinkrevs.remove(linkrev)
2393 futurelargelinkrevs.remove(linkrev)
2387
2394
2388 for p in self.parentrevs(strippoint):
2395 for p in self.parentrevs(strippoint):
2389 if p != nullrev:
2396 if p != nullrev:
2390 plinkrev = self.linkrev(p)
2397 plinkrev = self.linkrev(p)
2391 heads[p] = plinkrev
2398 heads[p] = plinkrev
2392 if plinkrev >= minlink:
2399 if plinkrev >= minlink:
2393 futurelargelinkrevs.add(plinkrev)
2400 futurelargelinkrevs.add(plinkrev)
2394
2401
2395 return strippoint, brokenrevs
2402 return strippoint, brokenrevs
2396
2403
2397 def strip(self, minlink, transaction):
2404 def strip(self, minlink, transaction):
2398 """truncate the revlog on the first revision with a linkrev >= minlink
2405 """truncate the revlog on the first revision with a linkrev >= minlink
2399
2406
2400 This function is called when we're stripping revision minlink and
2407 This function is called when we're stripping revision minlink and
2401 its descendants from the repository.
2408 its descendants from the repository.
2402
2409
2403 We have to remove all revisions with linkrev >= minlink, because
2410 We have to remove all revisions with linkrev >= minlink, because
2404 the equivalent changelog revisions will be renumbered after the
2411 the equivalent changelog revisions will be renumbered after the
2405 strip.
2412 strip.
2406
2413
2407 So we truncate the revlog on the first of these revisions, and
2414 So we truncate the revlog on the first of these revisions, and
2408 trust that the caller has saved the revisions that shouldn't be
2415 trust that the caller has saved the revisions that shouldn't be
2409 removed and that it'll re-add them after this truncation.
2416 removed and that it'll re-add them after this truncation.
2410 """
2417 """
2411 if len(self) == 0:
2418 if len(self) == 0:
2412 return
2419 return
2413
2420
2414 rev, _ = self.getstrippoint(minlink)
2421 rev, _ = self.getstrippoint(minlink)
2415 if rev == len(self):
2422 if rev == len(self):
2416 return
2423 return
2417
2424
2418 # first truncate the files on disk
2425 # first truncate the files on disk
2419 end = self.start(rev)
2426 end = self.start(rev)
2420 if not self._inline:
2427 if not self._inline:
2421 transaction.add(self.datafile, end)
2428 transaction.add(self.datafile, end)
2422 end = rev * self._io.size
2429 end = rev * self._io.size
2423 else:
2430 else:
2424 end += rev * self._io.size
2431 end += rev * self._io.size
2425
2432
2426 transaction.add(self.indexfile, end)
2433 transaction.add(self.indexfile, end)
2427
2434
2428 # then reset internal state in memory to forget those revisions
2435 # then reset internal state in memory to forget those revisions
2429 self._cache = None
2436 self._cache = None
2430 self._chaininfocache = {}
2437 self._chaininfocache = {}
2431 self._chunkclear()
2438 self._chunkclear()
2432 for x in xrange(rev, len(self)):
2439 for x in xrange(rev, len(self)):
2433 del self.nodemap[self.node(x)]
2440 del self.nodemap[self.node(x)]
2434
2441
2435 del self.index[rev:-1]
2442 del self.index[rev:-1]
2436 self._nodepos = None
2443 self._nodepos = None
2437
2444
2438 def checksize(self):
2445 def checksize(self):
2439 expected = 0
2446 expected = 0
2440 if len(self):
2447 if len(self):
2441 expected = max(0, self.end(len(self) - 1))
2448 expected = max(0, self.end(len(self) - 1))
2442
2449
2443 try:
2450 try:
2444 with self._datafp() as f:
2451 with self._datafp() as f:
2445 f.seek(0, 2)
2452 f.seek(0, 2)
2446 actual = f.tell()
2453 actual = f.tell()
2447 dd = actual - expected
2454 dd = actual - expected
2448 except IOError as inst:
2455 except IOError as inst:
2449 if inst.errno != errno.ENOENT:
2456 if inst.errno != errno.ENOENT:
2450 raise
2457 raise
2451 dd = 0
2458 dd = 0
2452
2459
2453 try:
2460 try:
2454 f = self.opener(self.indexfile)
2461 f = self.opener(self.indexfile)
2455 f.seek(0, 2)
2462 f.seek(0, 2)
2456 actual = f.tell()
2463 actual = f.tell()
2457 f.close()
2464 f.close()
2458 s = self._io.size
2465 s = self._io.size
2459 i = max(0, actual // s)
2466 i = max(0, actual // s)
2460 di = actual - (i * s)
2467 di = actual - (i * s)
2461 if self._inline:
2468 if self._inline:
2462 databytes = 0
2469 databytes = 0
2463 for r in self:
2470 for r in self:
2464 databytes += max(0, self.length(r))
2471 databytes += max(0, self.length(r))
2465 dd = 0
2472 dd = 0
2466 di = actual - len(self) * s - databytes
2473 di = actual - len(self) * s - databytes
2467 except IOError as inst:
2474 except IOError as inst:
2468 if inst.errno != errno.ENOENT:
2475 if inst.errno != errno.ENOENT:
2469 raise
2476 raise
2470 di = 0
2477 di = 0
2471
2478
2472 return (dd, di)
2479 return (dd, di)
2473
2480
2474 def files(self):
2481 def files(self):
2475 res = [self.indexfile]
2482 res = [self.indexfile]
2476 if not self._inline:
2483 if not self._inline:
2477 res.append(self.datafile)
2484 res.append(self.datafile)
2478 return res
2485 return res
2479
2486
2480 DELTAREUSEALWAYS = 'always'
2487 DELTAREUSEALWAYS = 'always'
2481 DELTAREUSESAMEREVS = 'samerevs'
2488 DELTAREUSESAMEREVS = 'samerevs'
2482 DELTAREUSENEVER = 'never'
2489 DELTAREUSENEVER = 'never'
2483
2490
2484 DELTAREUSEFULLADD = 'fulladd'
2491 DELTAREUSEFULLADD = 'fulladd'
2485
2492
2486 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2493 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2487
2494
2488 def clone(self, tr, destrevlog, addrevisioncb=None,
2495 def clone(self, tr, destrevlog, addrevisioncb=None,
2489 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2496 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2490 """Copy this revlog to another, possibly with format changes.
2497 """Copy this revlog to another, possibly with format changes.
2491
2498
2492 The destination revlog will contain the same revisions and nodes.
2499 The destination revlog will contain the same revisions and nodes.
2493 However, it may not be bit-for-bit identical due to e.g. delta encoding
2500 However, it may not be bit-for-bit identical due to e.g. delta encoding
2494 differences.
2501 differences.
2495
2502
2496 The ``deltareuse`` argument control how deltas from the existing revlog
2503 The ``deltareuse`` argument control how deltas from the existing revlog
2497 are preserved in the destination revlog. The argument can have the
2504 are preserved in the destination revlog. The argument can have the
2498 following values:
2505 following values:
2499
2506
2500 DELTAREUSEALWAYS
2507 DELTAREUSEALWAYS
2501 Deltas will always be reused (if possible), even if the destination
2508 Deltas will always be reused (if possible), even if the destination
2502 revlog would not select the same revisions for the delta. This is the
2509 revlog would not select the same revisions for the delta. This is the
2503 fastest mode of operation.
2510 fastest mode of operation.
2504 DELTAREUSESAMEREVS
2511 DELTAREUSESAMEREVS
2505 Deltas will be reused if the destination revlog would pick the same
2512 Deltas will be reused if the destination revlog would pick the same
2506 revisions for the delta. This mode strikes a balance between speed
2513 revisions for the delta. This mode strikes a balance between speed
2507 and optimization.
2514 and optimization.
2508 DELTAREUSENEVER
2515 DELTAREUSENEVER
2509 Deltas will never be reused. This is the slowest mode of execution.
2516 Deltas will never be reused. This is the slowest mode of execution.
2510 This mode can be used to recompute deltas (e.g. if the diff/delta
2517 This mode can be used to recompute deltas (e.g. if the diff/delta
2511 algorithm changes).
2518 algorithm changes).
2512
2519
2513 Delta computation can be slow, so the choice of delta reuse policy can
2520 Delta computation can be slow, so the choice of delta reuse policy can
2514 significantly affect run time.
2521 significantly affect run time.
2515
2522
2516 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2523 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2517 two extremes. Deltas will be reused if they are appropriate. But if the
2524 two extremes. Deltas will be reused if they are appropriate. But if the
2518 delta could choose a better revision, it will do so. This means if you
2525 delta could choose a better revision, it will do so. This means if you
2519 are converting a non-generaldelta revlog to a generaldelta revlog,
2526 are converting a non-generaldelta revlog to a generaldelta revlog,
2520 deltas will be recomputed if the delta's parent isn't a parent of the
2527 deltas will be recomputed if the delta's parent isn't a parent of the
2521 revision.
2528 revision.
2522
2529
2523 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2530 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2524 controls whether to compute deltas against both parents for merges.
2531 controls whether to compute deltas against both parents for merges.
2525 By default, the current default is used.
2532 By default, the current default is used.
2526 """
2533 """
2527 if deltareuse not in self.DELTAREUSEALL:
2534 if deltareuse not in self.DELTAREUSEALL:
2528 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2535 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2529
2536
2530 if len(destrevlog):
2537 if len(destrevlog):
2531 raise ValueError(_('destination revlog is not empty'))
2538 raise ValueError(_('destination revlog is not empty'))
2532
2539
2533 if getattr(self, 'filteredrevs', None):
2540 if getattr(self, 'filteredrevs', None):
2534 raise ValueError(_('source revlog has filtered revisions'))
2541 raise ValueError(_('source revlog has filtered revisions'))
2535 if getattr(destrevlog, 'filteredrevs', None):
2542 if getattr(destrevlog, 'filteredrevs', None):
2536 raise ValueError(_('destination revlog has filtered revisions'))
2543 raise ValueError(_('destination revlog has filtered revisions'))
2537
2544
2538 # lazydeltabase controls whether to reuse a cached delta, if possible.
2545 # lazydeltabase controls whether to reuse a cached delta, if possible.
2539 oldlazydeltabase = destrevlog._lazydeltabase
2546 oldlazydeltabase = destrevlog._lazydeltabase
2540 oldamd = destrevlog._aggressivemergedeltas
2547 oldamd = destrevlog._aggressivemergedeltas
2541
2548
2542 try:
2549 try:
2543 if deltareuse == self.DELTAREUSEALWAYS:
2550 if deltareuse == self.DELTAREUSEALWAYS:
2544 destrevlog._lazydeltabase = True
2551 destrevlog._lazydeltabase = True
2545 elif deltareuse == self.DELTAREUSESAMEREVS:
2552 elif deltareuse == self.DELTAREUSESAMEREVS:
2546 destrevlog._lazydeltabase = False
2553 destrevlog._lazydeltabase = False
2547
2554
2548 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2555 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2549
2556
2550 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2557 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2551 self.DELTAREUSESAMEREVS)
2558 self.DELTAREUSESAMEREVS)
2552
2559
2553 deltacomputer = _deltacomputer(destrevlog)
2560 deltacomputer = _deltacomputer(destrevlog)
2554 index = self.index
2561 index = self.index
2555 for rev in self:
2562 for rev in self:
2556 entry = index[rev]
2563 entry = index[rev]
2557
2564
2558 # Some classes override linkrev to take filtered revs into
2565 # Some classes override linkrev to take filtered revs into
2559 # account. Use raw entry from index.
2566 # account. Use raw entry from index.
2560 flags = entry[0] & 0xffff
2567 flags = entry[0] & 0xffff
2561 linkrev = entry[4]
2568 linkrev = entry[4]
2562 p1 = index[entry[5]][7]
2569 p1 = index[entry[5]][7]
2563 p2 = index[entry[6]][7]
2570 p2 = index[entry[6]][7]
2564 node = entry[7]
2571 node = entry[7]
2565
2572
2566 # (Possibly) reuse the delta from the revlog if allowed and
2573 # (Possibly) reuse the delta from the revlog if allowed and
2567 # the revlog chunk is a delta.
2574 # the revlog chunk is a delta.
2568 cachedelta = None
2575 cachedelta = None
2569 rawtext = None
2576 rawtext = None
2570 if populatecachedelta:
2577 if populatecachedelta:
2571 dp = self.deltaparent(rev)
2578 dp = self.deltaparent(rev)
2572 if dp != nullrev:
2579 if dp != nullrev:
2573 cachedelta = (dp, bytes(self._chunk(rev)))
2580 cachedelta = (dp, bytes(self._chunk(rev)))
2574
2581
2575 if not cachedelta:
2582 if not cachedelta:
2576 rawtext = self.revision(rev, raw=True)
2583 rawtext = self.revision(rev, raw=True)
2577
2584
2578
2585
2579 if deltareuse == self.DELTAREUSEFULLADD:
2586 if deltareuse == self.DELTAREUSEFULLADD:
2580 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2587 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2581 cachedelta=cachedelta,
2588 cachedelta=cachedelta,
2582 node=node, flags=flags,
2589 node=node, flags=flags,
2583 deltacomputer=deltacomputer)
2590 deltacomputer=deltacomputer)
2584 else:
2591 else:
2585 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2592 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2586 checkambig=False)
2593 checkambig=False)
2587 dfh = None
2594 dfh = None
2588 if not destrevlog._inline:
2595 if not destrevlog._inline:
2589 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2596 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2590 try:
2597 try:
2591 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2598 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2592 p2, flags, cachedelta, ifh, dfh,
2599 p2, flags, cachedelta, ifh, dfh,
2593 deltacomputer=deltacomputer)
2600 deltacomputer=deltacomputer)
2594 finally:
2601 finally:
2595 if dfh:
2602 if dfh:
2596 dfh.close()
2603 dfh.close()
2597 ifh.close()
2604 ifh.close()
2598
2605
2599 if addrevisioncb:
2606 if addrevisioncb:
2600 addrevisioncb(self, rev, node)
2607 addrevisioncb(self, rev, node)
2601 finally:
2608 finally:
2602 destrevlog._lazydeltabase = oldlazydeltabase
2609 destrevlog._lazydeltabase = oldlazydeltabase
2603 destrevlog._aggressivemergedeltas = oldamd
2610 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now