##// END OF EJS Templates
revlog: make chainbase cache its result for the correct revision...
Paul Morelle -
r38187:fc72beec default
parent child Browse files
Show More
@@ -1,2614 +1,2615
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 chainpayload = sum(length(r) for r in revs)
230 chainpayload = sum(length(r) for r in revs)
231
231
232 if deltachainspan:
232 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
233 density = chainpayload / float(deltachainspan)
234 else:
234 else:
235 density = 1.0
235 density = 1.0
236
236
237 # Store the gaps in a heap to have them sorted by decreasing size
237 # Store the gaps in a heap to have them sorted by decreasing size
238 gapsheap = []
238 gapsheap = []
239 heapq.heapify(gapsheap)
239 heapq.heapify(gapsheap)
240 prevend = None
240 prevend = None
241 for i, rev in enumerate(revs):
241 for i, rev in enumerate(revs):
242 revstart = start(rev)
242 revstart = start(rev)
243 revlen = length(rev)
243 revlen = length(rev)
244
244
245 # Skip empty revisions to form larger holes
245 # Skip empty revisions to form larger holes
246 if revlen == 0:
246 if revlen == 0:
247 continue
247 continue
248
248
249 if prevend is not None:
249 if prevend is not None:
250 gapsize = revstart - prevend
250 gapsize = revstart - prevend
251 # only consider holes that are large enough
251 # only consider holes that are large enough
252 if gapsize > revlog._srmingapsize:
252 if gapsize > revlog._srmingapsize:
253 heapq.heappush(gapsheap, (-gapsize, i))
253 heapq.heappush(gapsheap, (-gapsize, i))
254
254
255 prevend = revstart + revlen
255 prevend = revstart + revlen
256
256
257 # Collect the indices of the largest holes until the density is acceptable
257 # Collect the indices of the largest holes until the density is acceptable
258 indicesheap = []
258 indicesheap = []
259 heapq.heapify(indicesheap)
259 heapq.heapify(indicesheap)
260 while gapsheap and density < revlog._srdensitythreshold:
260 while gapsheap and density < revlog._srdensitythreshold:
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262
262
263 heapq.heappush(indicesheap, gapidx)
263 heapq.heappush(indicesheap, gapidx)
264
264
265 # the gap sizes are stored as negatives to be sorted decreasingly
265 # the gap sizes are stored as negatives to be sorted decreasingly
266 # by the heap
266 # by the heap
267 readdata -= (-oppgapsize)
267 readdata -= (-oppgapsize)
268 if readdata > 0:
268 if readdata > 0:
269 density = chainpayload / float(readdata)
269 density = chainpayload / float(readdata)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 # Cut the revs at collected indices
273 # Cut the revs at collected indices
274 previdx = 0
274 previdx = 0
275 while indicesheap:
275 while indicesheap:
276 idx = heapq.heappop(indicesheap)
276 idx = heapq.heappop(indicesheap)
277
277
278 chunk = _trimchunk(revlog, revs, previdx, idx)
278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 if chunk:
279 if chunk:
280 yield chunk
280 yield chunk
281
281
282 previdx = idx
282 previdx = idx
283
283
284 chunk = _trimchunk(revlog, revs, previdx)
284 chunk = _trimchunk(revlog, revs, previdx)
285 if chunk:
285 if chunk:
286 yield chunk
286 yield chunk
287
287
288 @attr.s(slots=True, frozen=True)
288 @attr.s(slots=True, frozen=True)
289 class _deltainfo(object):
289 class _deltainfo(object):
290 distance = attr.ib()
290 distance = attr.ib()
291 deltalen = attr.ib()
291 deltalen = attr.ib()
292 data = attr.ib()
292 data = attr.ib()
293 base = attr.ib()
293 base = attr.ib()
294 chainbase = attr.ib()
294 chainbase = attr.ib()
295 chainlen = attr.ib()
295 chainlen = attr.ib()
296 compresseddeltalen = attr.ib()
296 compresseddeltalen = attr.ib()
297
297
298 class _deltacomputer(object):
298 class _deltacomputer(object):
299 def __init__(self, revlog):
299 def __init__(self, revlog):
300 self.revlog = revlog
300 self.revlog = revlog
301
301
302 def _getcandidaterevs(self, p1, p2, cachedelta):
302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 """
303 """
304 Provides revisions that present an interest to be diffed against,
304 Provides revisions that present an interest to be diffed against,
305 grouped by level of easiness.
305 grouped by level of easiness.
306 """
306 """
307 revlog = self.revlog
307 revlog = self.revlog
308 gdelta = revlog._generaldelta
308 gdelta = revlog._generaldelta
309 curr = len(revlog)
309 curr = len(revlog)
310 prev = curr - 1
310 prev = curr - 1
311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
312
312
313 # should we try to build a delta?
313 # should we try to build a delta?
314 if prev != nullrev and revlog.storedeltachains:
314 if prev != nullrev and revlog.storedeltachains:
315 tested = set()
315 tested = set()
316 # This condition is true most of the time when processing
316 # This condition is true most of the time when processing
317 # changegroup data into a generaldelta repo. The only time it
317 # changegroup data into a generaldelta repo. The only time it
318 # isn't true is if this is the first revision in a delta chain
318 # isn't true is if this is the first revision in a delta chain
319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
320 if cachedelta and gdelta and revlog._lazydeltabase:
320 if cachedelta and gdelta and revlog._lazydeltabase:
321 # Assume what we received from the server is a good choice
321 # Assume what we received from the server is a good choice
322 # build delta will reuse the cache
322 # build delta will reuse the cache
323 yield (cachedelta[0],)
323 yield (cachedelta[0],)
324 tested.add(cachedelta[0])
324 tested.add(cachedelta[0])
325
325
326 if gdelta:
326 if gdelta:
327 # exclude already lazy tested base if any
327 # exclude already lazy tested base if any
328 parents = [p for p in (p1r, p2r)
328 parents = [p for p in (p1r, p2r)
329 if p != nullrev and p not in tested]
329 if p != nullrev and p not in tested]
330
330
331 if not revlog._aggressivemergedeltas and len(parents) == 2:
331 if not revlog._aggressivemergedeltas and len(parents) == 2:
332 parents.sort()
332 parents.sort()
333 # To minimize the chance of having to build a fulltext,
333 # To minimize the chance of having to build a fulltext,
334 # pick first whichever parent is closest to us (max rev)
334 # pick first whichever parent is closest to us (max rev)
335 yield (parents[1],)
335 yield (parents[1],)
336 # then the other one (min rev) if the first did not fit
336 # then the other one (min rev) if the first did not fit
337 yield (parents[0],)
337 yield (parents[0],)
338 tested.update(parents)
338 tested.update(parents)
339 elif len(parents) > 0:
339 elif len(parents) > 0:
340 # Test all parents (1 or 2), and keep the best candidate
340 # Test all parents (1 or 2), and keep the best candidate
341 yield parents
341 yield parents
342 tested.update(parents)
342 tested.update(parents)
343
343
344 if prev not in tested:
344 if prev not in tested:
345 # other approach failed try against prev to hopefully save us a
345 # other approach failed try against prev to hopefully save us a
346 # fulltext.
346 # fulltext.
347 yield (prev,)
347 yield (prev,)
348 tested.add(prev)
348 tested.add(prev)
349
349
350 def buildtext(self, revinfo, fh):
350 def buildtext(self, revinfo, fh):
351 """Builds a fulltext version of a revision
351 """Builds a fulltext version of a revision
352
352
353 revinfo: _revisioninfo instance that contains all needed info
353 revinfo: _revisioninfo instance that contains all needed info
354 fh: file handle to either the .i or the .d revlog file,
354 fh: file handle to either the .i or the .d revlog file,
355 depending on whether it is inlined or not
355 depending on whether it is inlined or not
356 """
356 """
357 btext = revinfo.btext
357 btext = revinfo.btext
358 if btext[0] is not None:
358 if btext[0] is not None:
359 return btext[0]
359 return btext[0]
360
360
361 revlog = self.revlog
361 revlog = self.revlog
362 cachedelta = revinfo.cachedelta
362 cachedelta = revinfo.cachedelta
363 flags = revinfo.flags
363 flags = revinfo.flags
364 node = revinfo.node
364 node = revinfo.node
365
365
366 baserev = cachedelta[0]
366 baserev = cachedelta[0]
367 delta = cachedelta[1]
367 delta = cachedelta[1]
368 # special case deltas which replace entire base; no need to decode
368 # special case deltas which replace entire base; no need to decode
369 # base revision. this neatly avoids censored bases, which throw when
369 # base revision. this neatly avoids censored bases, which throw when
370 # they're decoded.
370 # they're decoded.
371 hlen = struct.calcsize(">lll")
371 hlen = struct.calcsize(">lll")
372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
373 len(delta) - hlen):
373 len(delta) - hlen):
374 btext[0] = delta[hlen:]
374 btext[0] = delta[hlen:]
375 else:
375 else:
376 # deltabase is rawtext before changed by flag processors, which is
376 # deltabase is rawtext before changed by flag processors, which is
377 # equivalent to non-raw text
377 # equivalent to non-raw text
378 basetext = revlog.revision(baserev, _df=fh, raw=False)
378 basetext = revlog.revision(baserev, _df=fh, raw=False)
379 btext[0] = mdiff.patch(basetext, delta)
379 btext[0] = mdiff.patch(basetext, delta)
380
380
381 try:
381 try:
382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
383 btext[0], validatehash = res
383 btext[0], validatehash = res
384 if validatehash:
384 if validatehash:
385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
386 if flags & REVIDX_ISCENSORED:
386 if flags & REVIDX_ISCENSORED:
387 raise RevlogError(_('node %s is not censored') % node)
387 raise RevlogError(_('node %s is not censored') % node)
388 except CensoredNodeError:
388 except CensoredNodeError:
389 # must pass the censored index flag to add censored revisions
389 # must pass the censored index flag to add censored revisions
390 if not flags & REVIDX_ISCENSORED:
390 if not flags & REVIDX_ISCENSORED:
391 raise
391 raise
392 return btext[0]
392 return btext[0]
393
393
394 def _builddeltadiff(self, base, revinfo, fh):
394 def _builddeltadiff(self, base, revinfo, fh):
395 revlog = self.revlog
395 revlog = self.revlog
396 t = self.buildtext(revinfo, fh)
396 t = self.buildtext(revinfo, fh)
397 if revlog.iscensored(base):
397 if revlog.iscensored(base):
398 # deltas based on a censored revision must replace the
398 # deltas based on a censored revision must replace the
399 # full content in one patch, so delta works everywhere
399 # full content in one patch, so delta works everywhere
400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
401 delta = header + t
401 delta = header + t
402 else:
402 else:
403 ptext = revlog.revision(base, _df=fh, raw=True)
403 ptext = revlog.revision(base, _df=fh, raw=True)
404 delta = mdiff.textdiff(ptext, t)
404 delta = mdiff.textdiff(ptext, t)
405
405
406 return delta
406 return delta
407
407
408 def _builddeltainfo(self, revinfo, base, fh):
408 def _builddeltainfo(self, revinfo, base, fh):
409 # can we use the cached delta?
409 # can we use the cached delta?
410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
411 delta = revinfo.cachedelta[1]
411 delta = revinfo.cachedelta[1]
412 else:
412 else:
413 delta = self._builddeltadiff(base, revinfo, fh)
413 delta = self._builddeltadiff(base, revinfo, fh)
414 revlog = self.revlog
414 revlog = self.revlog
415 header, data = revlog.compress(delta)
415 header, data = revlog.compress(delta)
416 deltalen = len(header) + len(data)
416 deltalen = len(header) + len(data)
417 chainbase = revlog.chainbase(base)
417 chainbase = revlog.chainbase(base)
418 offset = revlog.end(len(revlog) - 1)
418 offset = revlog.end(len(revlog) - 1)
419 dist = deltalen + offset - revlog.start(chainbase)
419 dist = deltalen + offset - revlog.start(chainbase)
420 if revlog._generaldelta:
420 if revlog._generaldelta:
421 deltabase = base
421 deltabase = base
422 else:
422 else:
423 deltabase = chainbase
423 deltabase = chainbase
424 chainlen, compresseddeltalen = revlog._chaininfo(base)
424 chainlen, compresseddeltalen = revlog._chaininfo(base)
425 chainlen += 1
425 chainlen += 1
426 compresseddeltalen += deltalen
426 compresseddeltalen += deltalen
427 return _deltainfo(dist, deltalen, (header, data), deltabase,
427 return _deltainfo(dist, deltalen, (header, data), deltabase,
428 chainbase, chainlen, compresseddeltalen)
428 chainbase, chainlen, compresseddeltalen)
429
429
430 def finddeltainfo(self, revinfo, fh):
430 def finddeltainfo(self, revinfo, fh):
431 """Find an acceptable delta against a candidate revision
431 """Find an acceptable delta against a candidate revision
432
432
433 revinfo: information about the revision (instance of _revisioninfo)
433 revinfo: information about the revision (instance of _revisioninfo)
434 fh: file handle to either the .i or the .d revlog file,
434 fh: file handle to either the .i or the .d revlog file,
435 depending on whether it is inlined or not
435 depending on whether it is inlined or not
436
436
437 Returns the first acceptable candidate revision, as ordered by
437 Returns the first acceptable candidate revision, as ordered by
438 _getcandidaterevs
438 _getcandidaterevs
439 """
439 """
440 cachedelta = revinfo.cachedelta
440 cachedelta = revinfo.cachedelta
441 p1 = revinfo.p1
441 p1 = revinfo.p1
442 p2 = revinfo.p2
442 p2 = revinfo.p2
443 revlog = self.revlog
443 revlog = self.revlog
444
444
445 deltainfo = None
445 deltainfo = None
446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
447 nominateddeltas = []
447 nominateddeltas = []
448 for candidaterev in candidaterevs:
448 for candidaterev in candidaterevs:
449 # no delta for rawtext-changing revs (see "candelta" for why)
449 # no delta for rawtext-changing revs (see "candelta" for why)
450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
451 continue
451 continue
452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
454 nominateddeltas.append(candidatedelta)
454 nominateddeltas.append(candidatedelta)
455 if nominateddeltas:
455 if nominateddeltas:
456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
457 break
457 break
458
458
459 return deltainfo
459 return deltainfo
460
460
461 @attr.s(slots=True, frozen=True)
461 @attr.s(slots=True, frozen=True)
462 class _revisioninfo(object):
462 class _revisioninfo(object):
463 """Information about a revision that allows building its fulltext
463 """Information about a revision that allows building its fulltext
464 node: expected hash of the revision
464 node: expected hash of the revision
465 p1, p2: parent revs of the revision
465 p1, p2: parent revs of the revision
466 btext: built text cache consisting of a one-element list
466 btext: built text cache consisting of a one-element list
467 cachedelta: (baserev, uncompressed_delta) or None
467 cachedelta: (baserev, uncompressed_delta) or None
468 flags: flags associated to the revision storage
468 flags: flags associated to the revision storage
469
469
470 One of btext[0] or cachedelta must be set.
470 One of btext[0] or cachedelta must be set.
471 """
471 """
472 node = attr.ib()
472 node = attr.ib()
473 p1 = attr.ib()
473 p1 = attr.ib()
474 p2 = attr.ib()
474 p2 = attr.ib()
475 btext = attr.ib()
475 btext = attr.ib()
476 textlen = attr.ib()
476 textlen = attr.ib()
477 cachedelta = attr.ib()
477 cachedelta = attr.ib()
478 flags = attr.ib()
478 flags = attr.ib()
479
479
480 # index v0:
480 # index v0:
481 # 4 bytes: offset
481 # 4 bytes: offset
482 # 4 bytes: compressed length
482 # 4 bytes: compressed length
483 # 4 bytes: base rev
483 # 4 bytes: base rev
484 # 4 bytes: link rev
484 # 4 bytes: link rev
485 # 20 bytes: parent 1 nodeid
485 # 20 bytes: parent 1 nodeid
486 # 20 bytes: parent 2 nodeid
486 # 20 bytes: parent 2 nodeid
487 # 20 bytes: nodeid
487 # 20 bytes: nodeid
488 indexformatv0 = struct.Struct(">4l20s20s20s")
488 indexformatv0 = struct.Struct(">4l20s20s20s")
489 indexformatv0_pack = indexformatv0.pack
489 indexformatv0_pack = indexformatv0.pack
490 indexformatv0_unpack = indexformatv0.unpack
490 indexformatv0_unpack = indexformatv0.unpack
491
491
492 class revlogoldio(object):
492 class revlogoldio(object):
493 def __init__(self):
493 def __init__(self):
494 self.size = indexformatv0.size
494 self.size = indexformatv0.size
495
495
496 def parseindex(self, data, inline):
496 def parseindex(self, data, inline):
497 s = self.size
497 s = self.size
498 index = []
498 index = []
499 nodemap = {nullid: nullrev}
499 nodemap = {nullid: nullrev}
500 n = off = 0
500 n = off = 0
501 l = len(data)
501 l = len(data)
502 while off + s <= l:
502 while off + s <= l:
503 cur = data[off:off + s]
503 cur = data[off:off + s]
504 off += s
504 off += s
505 e = indexformatv0_unpack(cur)
505 e = indexformatv0_unpack(cur)
506 # transform to revlogv1 format
506 # transform to revlogv1 format
507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
509 index.append(e2)
509 index.append(e2)
510 nodemap[e[6]] = n
510 nodemap[e[6]] = n
511 n += 1
511 n += 1
512
512
513 # add the magic null revision at -1
513 # add the magic null revision at -1
514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
515
515
516 return index, nodemap, None
516 return index, nodemap, None
517
517
518 def packentry(self, entry, node, version, rev):
518 def packentry(self, entry, node, version, rev):
519 if gettype(entry[0]):
519 if gettype(entry[0]):
520 raise RevlogError(_('index entry flags need revlog version 1'))
520 raise RevlogError(_('index entry flags need revlog version 1'))
521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
522 node(entry[5]), node(entry[6]), entry[7])
522 node(entry[5]), node(entry[6]), entry[7])
523 return indexformatv0_pack(*e2)
523 return indexformatv0_pack(*e2)
524
524
525 # index ng:
525 # index ng:
526 # 6 bytes: offset
526 # 6 bytes: offset
527 # 2 bytes: flags
527 # 2 bytes: flags
528 # 4 bytes: compressed length
528 # 4 bytes: compressed length
529 # 4 bytes: uncompressed length
529 # 4 bytes: uncompressed length
530 # 4 bytes: base rev
530 # 4 bytes: base rev
531 # 4 bytes: link rev
531 # 4 bytes: link rev
532 # 4 bytes: parent 1 rev
532 # 4 bytes: parent 1 rev
533 # 4 bytes: parent 2 rev
533 # 4 bytes: parent 2 rev
534 # 32 bytes: nodeid
534 # 32 bytes: nodeid
535 indexformatng = struct.Struct(">Qiiiiii20s12x")
535 indexformatng = struct.Struct(">Qiiiiii20s12x")
536 indexformatng_pack = indexformatng.pack
536 indexformatng_pack = indexformatng.pack
537 versionformat = struct.Struct(">I")
537 versionformat = struct.Struct(">I")
538 versionformat_pack = versionformat.pack
538 versionformat_pack = versionformat.pack
539 versionformat_unpack = versionformat.unpack
539 versionformat_unpack = versionformat.unpack
540
540
541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
542 # signed integer)
542 # signed integer)
543 _maxentrysize = 0x7fffffff
543 _maxentrysize = 0x7fffffff
544
544
545 class revlogio(object):
545 class revlogio(object):
546 def __init__(self):
546 def __init__(self):
547 self.size = indexformatng.size
547 self.size = indexformatng.size
548
548
549 def parseindex(self, data, inline):
549 def parseindex(self, data, inline):
550 # call the C implementation to parse the index data
550 # call the C implementation to parse the index data
551 index, cache = parsers.parse_index2(data, inline)
551 index, cache = parsers.parse_index2(data, inline)
552 return index, getattr(index, 'nodemap', None), cache
552 return index, getattr(index, 'nodemap', None), cache
553
553
554 def packentry(self, entry, node, version, rev):
554 def packentry(self, entry, node, version, rev):
555 p = indexformatng_pack(*entry)
555 p = indexformatng_pack(*entry)
556 if rev == 0:
556 if rev == 0:
557 p = versionformat_pack(version) + p[4:]
557 p = versionformat_pack(version) + p[4:]
558 return p
558 return p
559
559
560 class revlog(object):
560 class revlog(object):
561 """
561 """
562 the underlying revision storage object
562 the underlying revision storage object
563
563
564 A revlog consists of two parts, an index and the revision data.
564 A revlog consists of two parts, an index and the revision data.
565
565
566 The index is a file with a fixed record size containing
566 The index is a file with a fixed record size containing
567 information on each revision, including its nodeid (hash), the
567 information on each revision, including its nodeid (hash), the
568 nodeids of its parents, the position and offset of its data within
568 nodeids of its parents, the position and offset of its data within
569 the data file, and the revision it's based on. Finally, each entry
569 the data file, and the revision it's based on. Finally, each entry
570 contains a linkrev entry that can serve as a pointer to external
570 contains a linkrev entry that can serve as a pointer to external
571 data.
571 data.
572
572
573 The revision data itself is a linear collection of data chunks.
573 The revision data itself is a linear collection of data chunks.
574 Each chunk represents a revision and is usually represented as a
574 Each chunk represents a revision and is usually represented as a
575 delta against the previous chunk. To bound lookup time, runs of
575 delta against the previous chunk. To bound lookup time, runs of
576 deltas are limited to about 2 times the length of the original
576 deltas are limited to about 2 times the length of the original
577 version data. This makes retrieval of a version proportional to
577 version data. This makes retrieval of a version proportional to
578 its size, or O(1) relative to the number of revisions.
578 its size, or O(1) relative to the number of revisions.
579
579
580 Both pieces of the revlog are written to in an append-only
580 Both pieces of the revlog are written to in an append-only
581 fashion, which means we never need to rewrite a file to insert or
581 fashion, which means we never need to rewrite a file to insert or
582 remove data, and can use some simple techniques to avoid the need
582 remove data, and can use some simple techniques to avoid the need
583 for locking while reading.
583 for locking while reading.
584
584
585 If checkambig, indexfile is opened with checkambig=True at
585 If checkambig, indexfile is opened with checkambig=True at
586 writing, to avoid file stat ambiguity.
586 writing, to avoid file stat ambiguity.
587
587
588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
589 index will be mmapped rather than read if it is larger than the
589 index will be mmapped rather than read if it is larger than the
590 configured threshold.
590 configured threshold.
591
591
592 If censorable is True, the revlog can have censored revisions.
592 If censorable is True, the revlog can have censored revisions.
593 """
593 """
594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
595 mmaplargeindex=False, censorable=False):
595 mmaplargeindex=False, censorable=False):
596 """
596 """
597 create a revlog object
597 create a revlog object
598
598
599 opener is a function that abstracts the file opening operation
599 opener is a function that abstracts the file opening operation
600 and can be used to implement COW semantics or the like.
600 and can be used to implement COW semantics or the like.
601 """
601 """
602 self.indexfile = indexfile
602 self.indexfile = indexfile
603 self.datafile = datafile or (indexfile[:-2] + ".d")
603 self.datafile = datafile or (indexfile[:-2] + ".d")
604 self.opener = opener
604 self.opener = opener
605 # When True, indexfile is opened with checkambig=True at writing, to
605 # When True, indexfile is opened with checkambig=True at writing, to
606 # avoid file stat ambiguity.
606 # avoid file stat ambiguity.
607 self._checkambig = checkambig
607 self._checkambig = checkambig
608 self._censorable = censorable
608 self._censorable = censorable
609 # 3-tuple of (node, rev, text) for a raw revision.
609 # 3-tuple of (node, rev, text) for a raw revision.
610 self._cache = None
610 self._cache = None
611 # Maps rev to chain base rev.
611 # Maps rev to chain base rev.
612 self._chainbasecache = util.lrucachedict(100)
612 self._chainbasecache = util.lrucachedict(100)
613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
614 self._chunkcache = (0, '')
614 self._chunkcache = (0, '')
615 # How much data to read and cache into the raw revlog data cache.
615 # How much data to read and cache into the raw revlog data cache.
616 self._chunkcachesize = 65536
616 self._chunkcachesize = 65536
617 self._maxchainlen = None
617 self._maxchainlen = None
618 self._aggressivemergedeltas = False
618 self._aggressivemergedeltas = False
619 self.index = []
619 self.index = []
620 # Mapping of partial identifiers to full nodes.
620 # Mapping of partial identifiers to full nodes.
621 self._pcache = {}
621 self._pcache = {}
622 # Mapping of revision integer to full node.
622 # Mapping of revision integer to full node.
623 self._nodecache = {nullid: nullrev}
623 self._nodecache = {nullid: nullrev}
624 self._nodepos = None
624 self._nodepos = None
625 self._compengine = 'zlib'
625 self._compengine = 'zlib'
626 self._maxdeltachainspan = -1
626 self._maxdeltachainspan = -1
627 self._withsparseread = False
627 self._withsparseread = False
628 self._srdensitythreshold = 0.25
628 self._srdensitythreshold = 0.25
629 self._srmingapsize = 262144
629 self._srmingapsize = 262144
630
630
631 mmapindexthreshold = None
631 mmapindexthreshold = None
632 v = REVLOG_DEFAULT_VERSION
632 v = REVLOG_DEFAULT_VERSION
633 opts = getattr(opener, 'options', None)
633 opts = getattr(opener, 'options', None)
634 if opts is not None:
634 if opts is not None:
635 if 'revlogv2' in opts:
635 if 'revlogv2' in opts:
636 # version 2 revlogs always use generaldelta.
636 # version 2 revlogs always use generaldelta.
637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
638 elif 'revlogv1' in opts:
638 elif 'revlogv1' in opts:
639 if 'generaldelta' in opts:
639 if 'generaldelta' in opts:
640 v |= FLAG_GENERALDELTA
640 v |= FLAG_GENERALDELTA
641 else:
641 else:
642 v = 0
642 v = 0
643 if 'chunkcachesize' in opts:
643 if 'chunkcachesize' in opts:
644 self._chunkcachesize = opts['chunkcachesize']
644 self._chunkcachesize = opts['chunkcachesize']
645 if 'maxchainlen' in opts:
645 if 'maxchainlen' in opts:
646 self._maxchainlen = opts['maxchainlen']
646 self._maxchainlen = opts['maxchainlen']
647 if 'aggressivemergedeltas' in opts:
647 if 'aggressivemergedeltas' in opts:
648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
650 if 'compengine' in opts:
650 if 'compengine' in opts:
651 self._compengine = opts['compengine']
651 self._compengine = opts['compengine']
652 if 'maxdeltachainspan' in opts:
652 if 'maxdeltachainspan' in opts:
653 self._maxdeltachainspan = opts['maxdeltachainspan']
653 self._maxdeltachainspan = opts['maxdeltachainspan']
654 if mmaplargeindex and 'mmapindexthreshold' in opts:
654 if mmaplargeindex and 'mmapindexthreshold' in opts:
655 mmapindexthreshold = opts['mmapindexthreshold']
655 mmapindexthreshold = opts['mmapindexthreshold']
656 self._withsparseread = bool(opts.get('with-sparse-read', False))
656 self._withsparseread = bool(opts.get('with-sparse-read', False))
657 if 'sparse-read-density-threshold' in opts:
657 if 'sparse-read-density-threshold' in opts:
658 self._srdensitythreshold = opts['sparse-read-density-threshold']
658 self._srdensitythreshold = opts['sparse-read-density-threshold']
659 if 'sparse-read-min-gap-size' in opts:
659 if 'sparse-read-min-gap-size' in opts:
660 self._srmingapsize = opts['sparse-read-min-gap-size']
660 self._srmingapsize = opts['sparse-read-min-gap-size']
661
661
662 if self._chunkcachesize <= 0:
662 if self._chunkcachesize <= 0:
663 raise RevlogError(_('revlog chunk cache size %r is not greater '
663 raise RevlogError(_('revlog chunk cache size %r is not greater '
664 'than 0') % self._chunkcachesize)
664 'than 0') % self._chunkcachesize)
665 elif self._chunkcachesize & (self._chunkcachesize - 1):
665 elif self._chunkcachesize & (self._chunkcachesize - 1):
666 raise RevlogError(_('revlog chunk cache size %r is not a power '
666 raise RevlogError(_('revlog chunk cache size %r is not a power '
667 'of 2') % self._chunkcachesize)
667 'of 2') % self._chunkcachesize)
668
668
669 indexdata = ''
669 indexdata = ''
670 self._initempty = True
670 self._initempty = True
671 try:
671 try:
672 with self._indexfp() as f:
672 with self._indexfp() as f:
673 if (mmapindexthreshold is not None and
673 if (mmapindexthreshold is not None and
674 self.opener.fstat(f).st_size >= mmapindexthreshold):
674 self.opener.fstat(f).st_size >= mmapindexthreshold):
675 indexdata = util.buffer(util.mmapread(f))
675 indexdata = util.buffer(util.mmapread(f))
676 else:
676 else:
677 indexdata = f.read()
677 indexdata = f.read()
678 if len(indexdata) > 0:
678 if len(indexdata) > 0:
679 v = versionformat_unpack(indexdata[:4])[0]
679 v = versionformat_unpack(indexdata[:4])[0]
680 self._initempty = False
680 self._initempty = False
681 except IOError as inst:
681 except IOError as inst:
682 if inst.errno != errno.ENOENT:
682 if inst.errno != errno.ENOENT:
683 raise
683 raise
684
684
685 self.version = v
685 self.version = v
686 self._inline = v & FLAG_INLINE_DATA
686 self._inline = v & FLAG_INLINE_DATA
687 self._generaldelta = v & FLAG_GENERALDELTA
687 self._generaldelta = v & FLAG_GENERALDELTA
688 flags = v & ~0xFFFF
688 flags = v & ~0xFFFF
689 fmt = v & 0xFFFF
689 fmt = v & 0xFFFF
690 if fmt == REVLOGV0:
690 if fmt == REVLOGV0:
691 if flags:
691 if flags:
692 raise RevlogError(_('unknown flags (%#04x) in version %d '
692 raise RevlogError(_('unknown flags (%#04x) in version %d '
693 'revlog %s') %
693 'revlog %s') %
694 (flags >> 16, fmt, self.indexfile))
694 (flags >> 16, fmt, self.indexfile))
695 elif fmt == REVLOGV1:
695 elif fmt == REVLOGV1:
696 if flags & ~REVLOGV1_FLAGS:
696 if flags & ~REVLOGV1_FLAGS:
697 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 raise RevlogError(_('unknown flags (%#04x) in version %d '
698 'revlog %s') %
698 'revlog %s') %
699 (flags >> 16, fmt, self.indexfile))
699 (flags >> 16, fmt, self.indexfile))
700 elif fmt == REVLOGV2:
700 elif fmt == REVLOGV2:
701 if flags & ~REVLOGV2_FLAGS:
701 if flags & ~REVLOGV2_FLAGS:
702 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 raise RevlogError(_('unknown flags (%#04x) in version %d '
703 'revlog %s') %
703 'revlog %s') %
704 (flags >> 16, fmt, self.indexfile))
704 (flags >> 16, fmt, self.indexfile))
705 else:
705 else:
706 raise RevlogError(_('unknown version (%d) in revlog %s') %
706 raise RevlogError(_('unknown version (%d) in revlog %s') %
707 (fmt, self.indexfile))
707 (fmt, self.indexfile))
708
708
709 self.storedeltachains = True
709 self.storedeltachains = True
710
710
711 self._io = revlogio()
711 self._io = revlogio()
712 if self.version == REVLOGV0:
712 if self.version == REVLOGV0:
713 self._io = revlogoldio()
713 self._io = revlogoldio()
714 try:
714 try:
715 d = self._io.parseindex(indexdata, self._inline)
715 d = self._io.parseindex(indexdata, self._inline)
716 except (ValueError, IndexError):
716 except (ValueError, IndexError):
717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
718 self.index, nodemap, self._chunkcache = d
718 self.index, nodemap, self._chunkcache = d
719 if nodemap is not None:
719 if nodemap is not None:
720 self.nodemap = self._nodecache = nodemap
720 self.nodemap = self._nodecache = nodemap
721 if not self._chunkcache:
721 if not self._chunkcache:
722 self._chunkclear()
722 self._chunkclear()
723 # revnum -> (chain-length, sum-delta-length)
723 # revnum -> (chain-length, sum-delta-length)
724 self._chaininfocache = {}
724 self._chaininfocache = {}
725 # revlog header -> revlog compressor
725 # revlog header -> revlog compressor
726 self._decompressors = {}
726 self._decompressors = {}
727
727
728 @util.propertycache
728 @util.propertycache
729 def _compressor(self):
729 def _compressor(self):
730 return util.compengines[self._compengine].revlogcompressor()
730 return util.compengines[self._compengine].revlogcompressor()
731
731
732 def _indexfp(self, mode='r'):
732 def _indexfp(self, mode='r'):
733 """file object for the revlog's index file"""
733 """file object for the revlog's index file"""
734 args = {r'mode': mode}
734 args = {r'mode': mode}
735 if mode != 'r':
735 if mode != 'r':
736 args[r'checkambig'] = self._checkambig
736 args[r'checkambig'] = self._checkambig
737 if mode == 'w':
737 if mode == 'w':
738 args[r'atomictemp'] = True
738 args[r'atomictemp'] = True
739 return self.opener(self.indexfile, **args)
739 return self.opener(self.indexfile, **args)
740
740
741 def _datafp(self, mode='r'):
741 def _datafp(self, mode='r'):
742 """file object for the revlog's data file"""
742 """file object for the revlog's data file"""
743 return self.opener(self.datafile, mode=mode)
743 return self.opener(self.datafile, mode=mode)
744
744
745 @contextlib.contextmanager
745 @contextlib.contextmanager
746 def _datareadfp(self, existingfp=None):
746 def _datareadfp(self, existingfp=None):
747 """file object suitable to read data"""
747 """file object suitable to read data"""
748 if existingfp is not None:
748 if existingfp is not None:
749 yield existingfp
749 yield existingfp
750 else:
750 else:
751 if self._inline:
751 if self._inline:
752 func = self._indexfp
752 func = self._indexfp
753 else:
753 else:
754 func = self._datafp
754 func = self._datafp
755 with func() as fp:
755 with func() as fp:
756 yield fp
756 yield fp
757
757
758 def tip(self):
758 def tip(self):
759 return self.node(len(self.index) - 2)
759 return self.node(len(self.index) - 2)
760 def __contains__(self, rev):
760 def __contains__(self, rev):
761 return 0 <= rev < len(self)
761 return 0 <= rev < len(self)
762 def __len__(self):
762 def __len__(self):
763 return len(self.index) - 1
763 return len(self.index) - 1
764 def __iter__(self):
764 def __iter__(self):
765 return iter(xrange(len(self)))
765 return iter(xrange(len(self)))
766 def revs(self, start=0, stop=None):
766 def revs(self, start=0, stop=None):
767 """iterate over all rev in this revlog (from start to stop)"""
767 """iterate over all rev in this revlog (from start to stop)"""
768 step = 1
768 step = 1
769 if stop is not None:
769 if stop is not None:
770 if start > stop:
770 if start > stop:
771 step = -1
771 step = -1
772 stop += step
772 stop += step
773 else:
773 else:
774 stop = len(self)
774 stop = len(self)
775 return xrange(start, stop, step)
775 return xrange(start, stop, step)
776
776
777 @util.propertycache
777 @util.propertycache
778 def nodemap(self):
778 def nodemap(self):
779 self.rev(self.node(0))
779 self.rev(self.node(0))
780 return self._nodecache
780 return self._nodecache
781
781
782 def hasnode(self, node):
782 def hasnode(self, node):
783 try:
783 try:
784 self.rev(node)
784 self.rev(node)
785 return True
785 return True
786 except KeyError:
786 except KeyError:
787 return False
787 return False
788
788
789 def candelta(self, baserev, rev):
789 def candelta(self, baserev, rev):
790 """whether two revisions (baserev, rev) can be delta-ed or not"""
790 """whether two revisions (baserev, rev) can be delta-ed or not"""
791 # Disable delta if either rev requires a content-changing flag
791 # Disable delta if either rev requires a content-changing flag
792 # processor (ex. LFS). This is because such flag processor can alter
792 # processor (ex. LFS). This is because such flag processor can alter
793 # the rawtext content that the delta will be based on, and two clients
793 # the rawtext content that the delta will be based on, and two clients
794 # could have a same revlog node with different flags (i.e. different
794 # could have a same revlog node with different flags (i.e. different
795 # rawtext contents) and the delta could be incompatible.
795 # rawtext contents) and the delta could be incompatible.
796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
798 return False
798 return False
799 return True
799 return True
800
800
801 def clearcaches(self):
801 def clearcaches(self):
802 self._cache = None
802 self._cache = None
803 self._chainbasecache.clear()
803 self._chainbasecache.clear()
804 self._chunkcache = (0, '')
804 self._chunkcache = (0, '')
805 self._pcache = {}
805 self._pcache = {}
806
806
807 try:
807 try:
808 self._nodecache.clearcaches()
808 self._nodecache.clearcaches()
809 except AttributeError:
809 except AttributeError:
810 self._nodecache = {nullid: nullrev}
810 self._nodecache = {nullid: nullrev}
811 self._nodepos = None
811 self._nodepos = None
812
812
813 def rev(self, node):
813 def rev(self, node):
814 try:
814 try:
815 return self._nodecache[node]
815 return self._nodecache[node]
816 except TypeError:
816 except TypeError:
817 raise
817 raise
818 except RevlogError:
818 except RevlogError:
819 # parsers.c radix tree lookup failed
819 # parsers.c radix tree lookup failed
820 if node == wdirid or node in wdirfilenodeids:
820 if node == wdirid or node in wdirfilenodeids:
821 raise error.WdirUnsupported
821 raise error.WdirUnsupported
822 raise LookupError(node, self.indexfile, _('no node'))
822 raise LookupError(node, self.indexfile, _('no node'))
823 except KeyError:
823 except KeyError:
824 # pure python cache lookup failed
824 # pure python cache lookup failed
825 n = self._nodecache
825 n = self._nodecache
826 i = self.index
826 i = self.index
827 p = self._nodepos
827 p = self._nodepos
828 if p is None:
828 if p is None:
829 p = len(i) - 2
829 p = len(i) - 2
830 else:
830 else:
831 assert p < len(i)
831 assert p < len(i)
832 for r in xrange(p, -1, -1):
832 for r in xrange(p, -1, -1):
833 v = i[r][7]
833 v = i[r][7]
834 n[v] = r
834 n[v] = r
835 if v == node:
835 if v == node:
836 self._nodepos = r - 1
836 self._nodepos = r - 1
837 return r
837 return r
838 if node == wdirid or node in wdirfilenodeids:
838 if node == wdirid or node in wdirfilenodeids:
839 raise error.WdirUnsupported
839 raise error.WdirUnsupported
840 raise LookupError(node, self.indexfile, _('no node'))
840 raise LookupError(node, self.indexfile, _('no node'))
841
841
842 # Accessors for index entries.
842 # Accessors for index entries.
843
843
844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
845 # are flags.
845 # are flags.
846 def start(self, rev):
846 def start(self, rev):
847 return int(self.index[rev][0] >> 16)
847 return int(self.index[rev][0] >> 16)
848
848
849 def flags(self, rev):
849 def flags(self, rev):
850 return self.index[rev][0] & 0xFFFF
850 return self.index[rev][0] & 0xFFFF
851
851
852 def length(self, rev):
852 def length(self, rev):
853 return self.index[rev][1]
853 return self.index[rev][1]
854
854
855 def rawsize(self, rev):
855 def rawsize(self, rev):
856 """return the length of the uncompressed text for a given revision"""
856 """return the length of the uncompressed text for a given revision"""
857 l = self.index[rev][2]
857 l = self.index[rev][2]
858 if l is not None and l >= 0:
858 if l is not None and l >= 0:
859 return l
859 return l
860
860
861 t = self.revision(rev, raw=True)
861 t = self.revision(rev, raw=True)
862 return len(t)
862 return len(t)
863
863
864 def size(self, rev):
864 def size(self, rev):
865 """length of non-raw text (processed by a "read" flag processor)"""
865 """length of non-raw text (processed by a "read" flag processor)"""
866 # fast path: if no "read" flag processor could change the content,
866 # fast path: if no "read" flag processor could change the content,
867 # size is rawsize. note: ELLIPSIS is known to not change the content.
867 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 flags = self.flags(rev)
868 flags = self.flags(rev)
869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 return self.rawsize(rev)
870 return self.rawsize(rev)
871
871
872 return len(self.revision(rev, raw=False))
872 return len(self.revision(rev, raw=False))
873
873
874 def chainbase(self, rev):
874 def chainbase(self, rev):
875 base = self._chainbasecache.get(rev)
875 base = self._chainbasecache.get(rev)
876 if base is not None:
876 if base is not None:
877 return base
877 return base
878
878
879 index = self.index
879 index = self.index
880 base = index[rev][3]
880 iterrev = rev
881 while base != rev:
881 base = index[iterrev][3]
882 rev = base
882 while base != iterrev:
883 base = index[rev][3]
883 iterrev = base
884 base = index[iterrev][3]
884
885
885 self._chainbasecache[rev] = base
886 self._chainbasecache[rev] = base
886 return base
887 return base
887
888
888 def linkrev(self, rev):
889 def linkrev(self, rev):
889 return self.index[rev][4]
890 return self.index[rev][4]
890
891
891 def parentrevs(self, rev):
892 def parentrevs(self, rev):
892 try:
893 try:
893 entry = self.index[rev]
894 entry = self.index[rev]
894 except IndexError:
895 except IndexError:
895 if rev == wdirrev:
896 if rev == wdirrev:
896 raise error.WdirUnsupported
897 raise error.WdirUnsupported
897 raise
898 raise
898
899
899 return entry[5], entry[6]
900 return entry[5], entry[6]
900
901
901 def node(self, rev):
902 def node(self, rev):
902 try:
903 try:
903 return self.index[rev][7]
904 return self.index[rev][7]
904 except IndexError:
905 except IndexError:
905 if rev == wdirrev:
906 if rev == wdirrev:
906 raise error.WdirUnsupported
907 raise error.WdirUnsupported
907 raise
908 raise
908
909
909 # Derived from index values.
910 # Derived from index values.
910
911
911 def end(self, rev):
912 def end(self, rev):
912 return self.start(rev) + self.length(rev)
913 return self.start(rev) + self.length(rev)
913
914
914 def parents(self, node):
915 def parents(self, node):
915 i = self.index
916 i = self.index
916 d = i[self.rev(node)]
917 d = i[self.rev(node)]
917 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
918 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
918
919
919 def chainlen(self, rev):
920 def chainlen(self, rev):
920 return self._chaininfo(rev)[0]
921 return self._chaininfo(rev)[0]
921
922
922 def _chaininfo(self, rev):
923 def _chaininfo(self, rev):
923 chaininfocache = self._chaininfocache
924 chaininfocache = self._chaininfocache
924 if rev in chaininfocache:
925 if rev in chaininfocache:
925 return chaininfocache[rev]
926 return chaininfocache[rev]
926 index = self.index
927 index = self.index
927 generaldelta = self._generaldelta
928 generaldelta = self._generaldelta
928 iterrev = rev
929 iterrev = rev
929 e = index[iterrev]
930 e = index[iterrev]
930 clen = 0
931 clen = 0
931 compresseddeltalen = 0
932 compresseddeltalen = 0
932 while iterrev != e[3]:
933 while iterrev != e[3]:
933 clen += 1
934 clen += 1
934 compresseddeltalen += e[1]
935 compresseddeltalen += e[1]
935 if generaldelta:
936 if generaldelta:
936 iterrev = e[3]
937 iterrev = e[3]
937 else:
938 else:
938 iterrev -= 1
939 iterrev -= 1
939 if iterrev in chaininfocache:
940 if iterrev in chaininfocache:
940 t = chaininfocache[iterrev]
941 t = chaininfocache[iterrev]
941 clen += t[0]
942 clen += t[0]
942 compresseddeltalen += t[1]
943 compresseddeltalen += t[1]
943 break
944 break
944 e = index[iterrev]
945 e = index[iterrev]
945 else:
946 else:
946 # Add text length of base since decompressing that also takes
947 # Add text length of base since decompressing that also takes
947 # work. For cache hits the length is already included.
948 # work. For cache hits the length is already included.
948 compresseddeltalen += e[1]
949 compresseddeltalen += e[1]
949 r = (clen, compresseddeltalen)
950 r = (clen, compresseddeltalen)
950 chaininfocache[rev] = r
951 chaininfocache[rev] = r
951 return r
952 return r
952
953
953 def _deltachain(self, rev, stoprev=None):
954 def _deltachain(self, rev, stoprev=None):
954 """Obtain the delta chain for a revision.
955 """Obtain the delta chain for a revision.
955
956
956 ``stoprev`` specifies a revision to stop at. If not specified, we
957 ``stoprev`` specifies a revision to stop at. If not specified, we
957 stop at the base of the chain.
958 stop at the base of the chain.
958
959
959 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
960 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
960 revs in ascending order and ``stopped`` is a bool indicating whether
961 revs in ascending order and ``stopped`` is a bool indicating whether
961 ``stoprev`` was hit.
962 ``stoprev`` was hit.
962 """
963 """
963 # Try C implementation.
964 # Try C implementation.
964 try:
965 try:
965 return self.index.deltachain(rev, stoprev, self._generaldelta)
966 return self.index.deltachain(rev, stoprev, self._generaldelta)
966 except AttributeError:
967 except AttributeError:
967 pass
968 pass
968
969
969 chain = []
970 chain = []
970
971
971 # Alias to prevent attribute lookup in tight loop.
972 # Alias to prevent attribute lookup in tight loop.
972 index = self.index
973 index = self.index
973 generaldelta = self._generaldelta
974 generaldelta = self._generaldelta
974
975
975 iterrev = rev
976 iterrev = rev
976 e = index[iterrev]
977 e = index[iterrev]
977 while iterrev != e[3] and iterrev != stoprev:
978 while iterrev != e[3] and iterrev != stoprev:
978 chain.append(iterrev)
979 chain.append(iterrev)
979 if generaldelta:
980 if generaldelta:
980 iterrev = e[3]
981 iterrev = e[3]
981 else:
982 else:
982 iterrev -= 1
983 iterrev -= 1
983 e = index[iterrev]
984 e = index[iterrev]
984
985
985 if iterrev == stoprev:
986 if iterrev == stoprev:
986 stopped = True
987 stopped = True
987 else:
988 else:
988 chain.append(iterrev)
989 chain.append(iterrev)
989 stopped = False
990 stopped = False
990
991
991 chain.reverse()
992 chain.reverse()
992 return chain, stopped
993 return chain, stopped
993
994
994 def ancestors(self, revs, stoprev=0, inclusive=False):
995 def ancestors(self, revs, stoprev=0, inclusive=False):
995 """Generate the ancestors of 'revs' in reverse topological order.
996 """Generate the ancestors of 'revs' in reverse topological order.
996 Does not generate revs lower than stoprev.
997 Does not generate revs lower than stoprev.
997
998
998 See the documentation for ancestor.lazyancestors for more details."""
999 See the documentation for ancestor.lazyancestors for more details."""
999
1000
1000 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1001 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1001 inclusive=inclusive)
1002 inclusive=inclusive)
1002
1003
1003 def descendants(self, revs):
1004 def descendants(self, revs):
1004 """Generate the descendants of 'revs' in revision order.
1005 """Generate the descendants of 'revs' in revision order.
1005
1006
1006 Yield a sequence of revision numbers starting with a child of
1007 Yield a sequence of revision numbers starting with a child of
1007 some rev in revs, i.e., each revision is *not* considered a
1008 some rev in revs, i.e., each revision is *not* considered a
1008 descendant of itself. Results are ordered by revision number (a
1009 descendant of itself. Results are ordered by revision number (a
1009 topological sort)."""
1010 topological sort)."""
1010 first = min(revs)
1011 first = min(revs)
1011 if first == nullrev:
1012 if first == nullrev:
1012 for i in self:
1013 for i in self:
1013 yield i
1014 yield i
1014 return
1015 return
1015
1016
1016 seen = set(revs)
1017 seen = set(revs)
1017 for i in self.revs(start=first + 1):
1018 for i in self.revs(start=first + 1):
1018 for x in self.parentrevs(i):
1019 for x in self.parentrevs(i):
1019 if x != nullrev and x in seen:
1020 if x != nullrev and x in seen:
1020 seen.add(i)
1021 seen.add(i)
1021 yield i
1022 yield i
1022 break
1023 break
1023
1024
1024 def findcommonmissing(self, common=None, heads=None):
1025 def findcommonmissing(self, common=None, heads=None):
1025 """Return a tuple of the ancestors of common and the ancestors of heads
1026 """Return a tuple of the ancestors of common and the ancestors of heads
1026 that are not ancestors of common. In revset terminology, we return the
1027 that are not ancestors of common. In revset terminology, we return the
1027 tuple:
1028 tuple:
1028
1029
1029 ::common, (::heads) - (::common)
1030 ::common, (::heads) - (::common)
1030
1031
1031 The list is sorted by revision number, meaning it is
1032 The list is sorted by revision number, meaning it is
1032 topologically sorted.
1033 topologically sorted.
1033
1034
1034 'heads' and 'common' are both lists of node IDs. If heads is
1035 'heads' and 'common' are both lists of node IDs. If heads is
1035 not supplied, uses all of the revlog's heads. If common is not
1036 not supplied, uses all of the revlog's heads. If common is not
1036 supplied, uses nullid."""
1037 supplied, uses nullid."""
1037 if common is None:
1038 if common is None:
1038 common = [nullid]
1039 common = [nullid]
1039 if heads is None:
1040 if heads is None:
1040 heads = self.heads()
1041 heads = self.heads()
1041
1042
1042 common = [self.rev(n) for n in common]
1043 common = [self.rev(n) for n in common]
1043 heads = [self.rev(n) for n in heads]
1044 heads = [self.rev(n) for n in heads]
1044
1045
1045 # we want the ancestors, but inclusive
1046 # we want the ancestors, but inclusive
1046 class lazyset(object):
1047 class lazyset(object):
1047 def __init__(self, lazyvalues):
1048 def __init__(self, lazyvalues):
1048 self.addedvalues = set()
1049 self.addedvalues = set()
1049 self.lazyvalues = lazyvalues
1050 self.lazyvalues = lazyvalues
1050
1051
1051 def __contains__(self, value):
1052 def __contains__(self, value):
1052 return value in self.addedvalues or value in self.lazyvalues
1053 return value in self.addedvalues or value in self.lazyvalues
1053
1054
1054 def __iter__(self):
1055 def __iter__(self):
1055 added = self.addedvalues
1056 added = self.addedvalues
1056 for r in added:
1057 for r in added:
1057 yield r
1058 yield r
1058 for r in self.lazyvalues:
1059 for r in self.lazyvalues:
1059 if not r in added:
1060 if not r in added:
1060 yield r
1061 yield r
1061
1062
1062 def add(self, value):
1063 def add(self, value):
1063 self.addedvalues.add(value)
1064 self.addedvalues.add(value)
1064
1065
1065 def update(self, values):
1066 def update(self, values):
1066 self.addedvalues.update(values)
1067 self.addedvalues.update(values)
1067
1068
1068 has = lazyset(self.ancestors(common))
1069 has = lazyset(self.ancestors(common))
1069 has.add(nullrev)
1070 has.add(nullrev)
1070 has.update(common)
1071 has.update(common)
1071
1072
1072 # take all ancestors from heads that aren't in has
1073 # take all ancestors from heads that aren't in has
1073 missing = set()
1074 missing = set()
1074 visit = collections.deque(r for r in heads if r not in has)
1075 visit = collections.deque(r for r in heads if r not in has)
1075 while visit:
1076 while visit:
1076 r = visit.popleft()
1077 r = visit.popleft()
1077 if r in missing:
1078 if r in missing:
1078 continue
1079 continue
1079 else:
1080 else:
1080 missing.add(r)
1081 missing.add(r)
1081 for p in self.parentrevs(r):
1082 for p in self.parentrevs(r):
1082 if p not in has:
1083 if p not in has:
1083 visit.append(p)
1084 visit.append(p)
1084 missing = list(missing)
1085 missing = list(missing)
1085 missing.sort()
1086 missing.sort()
1086 return has, [self.node(miss) for miss in missing]
1087 return has, [self.node(miss) for miss in missing]
1087
1088
1088 def incrementalmissingrevs(self, common=None):
1089 def incrementalmissingrevs(self, common=None):
1089 """Return an object that can be used to incrementally compute the
1090 """Return an object that can be used to incrementally compute the
1090 revision numbers of the ancestors of arbitrary sets that are not
1091 revision numbers of the ancestors of arbitrary sets that are not
1091 ancestors of common. This is an ancestor.incrementalmissingancestors
1092 ancestors of common. This is an ancestor.incrementalmissingancestors
1092 object.
1093 object.
1093
1094
1094 'common' is a list of revision numbers. If common is not supplied, uses
1095 'common' is a list of revision numbers. If common is not supplied, uses
1095 nullrev.
1096 nullrev.
1096 """
1097 """
1097 if common is None:
1098 if common is None:
1098 common = [nullrev]
1099 common = [nullrev]
1099
1100
1100 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101
1102
1102 def findmissingrevs(self, common=None, heads=None):
1103 def findmissingrevs(self, common=None, heads=None):
1103 """Return the revision numbers of the ancestors of heads that
1104 """Return the revision numbers of the ancestors of heads that
1104 are not ancestors of common.
1105 are not ancestors of common.
1105
1106
1106 More specifically, return a list of revision numbers corresponding to
1107 More specifically, return a list of revision numbers corresponding to
1107 nodes N such that every N satisfies the following constraints:
1108 nodes N such that every N satisfies the following constraints:
1108
1109
1109 1. N is an ancestor of some node in 'heads'
1110 1. N is an ancestor of some node in 'heads'
1110 2. N is not an ancestor of any node in 'common'
1111 2. N is not an ancestor of any node in 'common'
1111
1112
1112 The list is sorted by revision number, meaning it is
1113 The list is sorted by revision number, meaning it is
1113 topologically sorted.
1114 topologically sorted.
1114
1115
1115 'heads' and 'common' are both lists of revision numbers. If heads is
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1116 not supplied, uses all of the revlog's heads. If common is not
1117 not supplied, uses all of the revlog's heads. If common is not
1117 supplied, uses nullid."""
1118 supplied, uses nullid."""
1118 if common is None:
1119 if common is None:
1119 common = [nullrev]
1120 common = [nullrev]
1120 if heads is None:
1121 if heads is None:
1121 heads = self.headrevs()
1122 heads = self.headrevs()
1122
1123
1123 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1124 return inc.missingancestors(heads)
1125 return inc.missingancestors(heads)
1125
1126
1126 def findmissing(self, common=None, heads=None):
1127 def findmissing(self, common=None, heads=None):
1127 """Return the ancestors of heads that are not ancestors of common.
1128 """Return the ancestors of heads that are not ancestors of common.
1128
1129
1129 More specifically, return a list of nodes N such that every N
1130 More specifically, return a list of nodes N such that every N
1130 satisfies the following constraints:
1131 satisfies the following constraints:
1131
1132
1132 1. N is an ancestor of some node in 'heads'
1133 1. N is an ancestor of some node in 'heads'
1133 2. N is not an ancestor of any node in 'common'
1134 2. N is not an ancestor of any node in 'common'
1134
1135
1135 The list is sorted by revision number, meaning it is
1136 The list is sorted by revision number, meaning it is
1136 topologically sorted.
1137 topologically sorted.
1137
1138
1138 'heads' and 'common' are both lists of node IDs. If heads is
1139 'heads' and 'common' are both lists of node IDs. If heads is
1139 not supplied, uses all of the revlog's heads. If common is not
1140 not supplied, uses all of the revlog's heads. If common is not
1140 supplied, uses nullid."""
1141 supplied, uses nullid."""
1141 if common is None:
1142 if common is None:
1142 common = [nullid]
1143 common = [nullid]
1143 if heads is None:
1144 if heads is None:
1144 heads = self.heads()
1145 heads = self.heads()
1145
1146
1146 common = [self.rev(n) for n in common]
1147 common = [self.rev(n) for n in common]
1147 heads = [self.rev(n) for n in heads]
1148 heads = [self.rev(n) for n in heads]
1148
1149
1149 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1150 return [self.node(r) for r in inc.missingancestors(heads)]
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1151
1152
1152 def nodesbetween(self, roots=None, heads=None):
1153 def nodesbetween(self, roots=None, heads=None):
1153 """Return a topological path from 'roots' to 'heads'.
1154 """Return a topological path from 'roots' to 'heads'.
1154
1155
1155 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 topologically sorted list of all nodes N that satisfy both of
1157 topologically sorted list of all nodes N that satisfy both of
1157 these constraints:
1158 these constraints:
1158
1159
1159 1. N is a descendant of some node in 'roots'
1160 1. N is a descendant of some node in 'roots'
1160 2. N is an ancestor of some node in 'heads'
1161 2. N is an ancestor of some node in 'heads'
1161
1162
1162 Every node is considered to be both a descendant and an ancestor
1163 Every node is considered to be both a descendant and an ancestor
1163 of itself, so every reachable node in 'roots' and 'heads' will be
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1164 included in 'nodes'.
1165 included in 'nodes'.
1165
1166
1166 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169
1170
1170 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses list of all of the revlog's heads."""
1173 unspecified, uses list of all of the revlog's heads."""
1173 nonodes = ([], [], [])
1174 nonodes = ([], [], [])
1174 if roots is not None:
1175 if roots is not None:
1175 roots = list(roots)
1176 roots = list(roots)
1176 if not roots:
1177 if not roots:
1177 return nonodes
1178 return nonodes
1178 lowestrev = min([self.rev(n) for n in roots])
1179 lowestrev = min([self.rev(n) for n in roots])
1179 else:
1180 else:
1180 roots = [nullid] # Everybody's a descendant of nullid
1181 roots = [nullid] # Everybody's a descendant of nullid
1181 lowestrev = nullrev
1182 lowestrev = nullrev
1182 if (lowestrev == nullrev) and (heads is None):
1183 if (lowestrev == nullrev) and (heads is None):
1183 # We want _all_ the nodes!
1184 # We want _all_ the nodes!
1184 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1185 if heads is None:
1186 if heads is None:
1186 # All nodes are ancestors, so the latest ancestor is the last
1187 # All nodes are ancestors, so the latest ancestor is the last
1187 # node.
1188 # node.
1188 highestrev = len(self) - 1
1189 highestrev = len(self) - 1
1189 # Set ancestors to None to signal that every node is an ancestor.
1190 # Set ancestors to None to signal that every node is an ancestor.
1190 ancestors = None
1191 ancestors = None
1191 # Set heads to an empty dictionary for later discovery of heads
1192 # Set heads to an empty dictionary for later discovery of heads
1192 heads = {}
1193 heads = {}
1193 else:
1194 else:
1194 heads = list(heads)
1195 heads = list(heads)
1195 if not heads:
1196 if not heads:
1196 return nonodes
1197 return nonodes
1197 ancestors = set()
1198 ancestors = set()
1198 # Turn heads into a dictionary so we can remove 'fake' heads.
1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1199 # Also, later we will be using it to filter out the heads we can't
1200 # Also, later we will be using it to filter out the heads we can't
1200 # find from roots.
1201 # find from roots.
1201 heads = dict.fromkeys(heads, False)
1202 heads = dict.fromkeys(heads, False)
1202 # Start at the top and keep marking parents until we're done.
1203 # Start at the top and keep marking parents until we're done.
1203 nodestotag = set(heads)
1204 nodestotag = set(heads)
1204 # Remember where the top was so we can use it as a limit later.
1205 # Remember where the top was so we can use it as a limit later.
1205 highestrev = max([self.rev(n) for n in nodestotag])
1206 highestrev = max([self.rev(n) for n in nodestotag])
1206 while nodestotag:
1207 while nodestotag:
1207 # grab a node to tag
1208 # grab a node to tag
1208 n = nodestotag.pop()
1209 n = nodestotag.pop()
1209 # Never tag nullid
1210 # Never tag nullid
1210 if n == nullid:
1211 if n == nullid:
1211 continue
1212 continue
1212 # A node's revision number represents its place in a
1213 # A node's revision number represents its place in a
1213 # topologically sorted list of nodes.
1214 # topologically sorted list of nodes.
1214 r = self.rev(n)
1215 r = self.rev(n)
1215 if r >= lowestrev:
1216 if r >= lowestrev:
1216 if n not in ancestors:
1217 if n not in ancestors:
1217 # If we are possibly a descendant of one of the roots
1218 # If we are possibly a descendant of one of the roots
1218 # and we haven't already been marked as an ancestor
1219 # and we haven't already been marked as an ancestor
1219 ancestors.add(n) # Mark as ancestor
1220 ancestors.add(n) # Mark as ancestor
1220 # Add non-nullid parents to list of nodes to tag.
1221 # Add non-nullid parents to list of nodes to tag.
1221 nodestotag.update([p for p in self.parents(n) if
1222 nodestotag.update([p for p in self.parents(n) if
1222 p != nullid])
1223 p != nullid])
1223 elif n in heads: # We've seen it before, is it a fake head?
1224 elif n in heads: # We've seen it before, is it a fake head?
1224 # So it is, real heads should not be the ancestors of
1225 # So it is, real heads should not be the ancestors of
1225 # any other heads.
1226 # any other heads.
1226 heads.pop(n)
1227 heads.pop(n)
1227 if not ancestors:
1228 if not ancestors:
1228 return nonodes
1229 return nonodes
1229 # Now that we have our set of ancestors, we want to remove any
1230 # Now that we have our set of ancestors, we want to remove any
1230 # roots that are not ancestors.
1231 # roots that are not ancestors.
1231
1232
1232 # If one of the roots was nullid, everything is included anyway.
1233 # If one of the roots was nullid, everything is included anyway.
1233 if lowestrev > nullrev:
1234 if lowestrev > nullrev:
1234 # But, since we weren't, let's recompute the lowest rev to not
1235 # But, since we weren't, let's recompute the lowest rev to not
1235 # include roots that aren't ancestors.
1236 # include roots that aren't ancestors.
1236
1237
1237 # Filter out roots that aren't ancestors of heads
1238 # Filter out roots that aren't ancestors of heads
1238 roots = [root for root in roots if root in ancestors]
1239 roots = [root for root in roots if root in ancestors]
1239 # Recompute the lowest revision
1240 # Recompute the lowest revision
1240 if roots:
1241 if roots:
1241 lowestrev = min([self.rev(root) for root in roots])
1242 lowestrev = min([self.rev(root) for root in roots])
1242 else:
1243 else:
1243 # No more roots? Return empty list
1244 # No more roots? Return empty list
1244 return nonodes
1245 return nonodes
1245 else:
1246 else:
1246 # We are descending from nullid, and don't need to care about
1247 # We are descending from nullid, and don't need to care about
1247 # any other roots.
1248 # any other roots.
1248 lowestrev = nullrev
1249 lowestrev = nullrev
1249 roots = [nullid]
1250 roots = [nullid]
1250 # Transform our roots list into a set.
1251 # Transform our roots list into a set.
1251 descendants = set(roots)
1252 descendants = set(roots)
1252 # Also, keep the original roots so we can filter out roots that aren't
1253 # Also, keep the original roots so we can filter out roots that aren't
1253 # 'real' roots (i.e. are descended from other roots).
1254 # 'real' roots (i.e. are descended from other roots).
1254 roots = descendants.copy()
1255 roots = descendants.copy()
1255 # Our topologically sorted list of output nodes.
1256 # Our topologically sorted list of output nodes.
1256 orderedout = []
1257 orderedout = []
1257 # Don't start at nullid since we don't want nullid in our output list,
1258 # Don't start at nullid since we don't want nullid in our output list,
1258 # and if nullid shows up in descendants, empty parents will look like
1259 # and if nullid shows up in descendants, empty parents will look like
1259 # they're descendants.
1260 # they're descendants.
1260 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1261 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1261 n = self.node(r)
1262 n = self.node(r)
1262 isdescendant = False
1263 isdescendant = False
1263 if lowestrev == nullrev: # Everybody is a descendant of nullid
1264 if lowestrev == nullrev: # Everybody is a descendant of nullid
1264 isdescendant = True
1265 isdescendant = True
1265 elif n in descendants:
1266 elif n in descendants:
1266 # n is already a descendant
1267 # n is already a descendant
1267 isdescendant = True
1268 isdescendant = True
1268 # This check only needs to be done here because all the roots
1269 # This check only needs to be done here because all the roots
1269 # will start being marked is descendants before the loop.
1270 # will start being marked is descendants before the loop.
1270 if n in roots:
1271 if n in roots:
1271 # If n was a root, check if it's a 'real' root.
1272 # If n was a root, check if it's a 'real' root.
1272 p = tuple(self.parents(n))
1273 p = tuple(self.parents(n))
1273 # If any of its parents are descendants, it's not a root.
1274 # If any of its parents are descendants, it's not a root.
1274 if (p[0] in descendants) or (p[1] in descendants):
1275 if (p[0] in descendants) or (p[1] in descendants):
1275 roots.remove(n)
1276 roots.remove(n)
1276 else:
1277 else:
1277 p = tuple(self.parents(n))
1278 p = tuple(self.parents(n))
1278 # A node is a descendant if either of its parents are
1279 # A node is a descendant if either of its parents are
1279 # descendants. (We seeded the dependents list with the roots
1280 # descendants. (We seeded the dependents list with the roots
1280 # up there, remember?)
1281 # up there, remember?)
1281 if (p[0] in descendants) or (p[1] in descendants):
1282 if (p[0] in descendants) or (p[1] in descendants):
1282 descendants.add(n)
1283 descendants.add(n)
1283 isdescendant = True
1284 isdescendant = True
1284 if isdescendant and ((ancestors is None) or (n in ancestors)):
1285 if isdescendant and ((ancestors is None) or (n in ancestors)):
1285 # Only include nodes that are both descendants and ancestors.
1286 # Only include nodes that are both descendants and ancestors.
1286 orderedout.append(n)
1287 orderedout.append(n)
1287 if (ancestors is not None) and (n in heads):
1288 if (ancestors is not None) and (n in heads):
1288 # We're trying to figure out which heads are reachable
1289 # We're trying to figure out which heads are reachable
1289 # from roots.
1290 # from roots.
1290 # Mark this head as having been reached
1291 # Mark this head as having been reached
1291 heads[n] = True
1292 heads[n] = True
1292 elif ancestors is None:
1293 elif ancestors is None:
1293 # Otherwise, we're trying to discover the heads.
1294 # Otherwise, we're trying to discover the heads.
1294 # Assume this is a head because if it isn't, the next step
1295 # Assume this is a head because if it isn't, the next step
1295 # will eventually remove it.
1296 # will eventually remove it.
1296 heads[n] = True
1297 heads[n] = True
1297 # But, obviously its parents aren't.
1298 # But, obviously its parents aren't.
1298 for p in self.parents(n):
1299 for p in self.parents(n):
1299 heads.pop(p, None)
1300 heads.pop(p, None)
1300 heads = [head for head, flag in heads.iteritems() if flag]
1301 heads = [head for head, flag in heads.iteritems() if flag]
1301 roots = list(roots)
1302 roots = list(roots)
1302 assert orderedout
1303 assert orderedout
1303 assert roots
1304 assert roots
1304 assert heads
1305 assert heads
1305 return (orderedout, roots, heads)
1306 return (orderedout, roots, heads)
1306
1307
1307 def headrevs(self):
1308 def headrevs(self):
1308 try:
1309 try:
1309 return self.index.headrevs()
1310 return self.index.headrevs()
1310 except AttributeError:
1311 except AttributeError:
1311 return self._headrevs()
1312 return self._headrevs()
1312
1313
1313 def computephases(self, roots):
1314 def computephases(self, roots):
1314 return self.index.computephasesmapsets(roots)
1315 return self.index.computephasesmapsets(roots)
1315
1316
1316 def _headrevs(self):
1317 def _headrevs(self):
1317 count = len(self)
1318 count = len(self)
1318 if not count:
1319 if not count:
1319 return [nullrev]
1320 return [nullrev]
1320 # we won't iter over filtered rev so nobody is a head at start
1321 # we won't iter over filtered rev so nobody is a head at start
1321 ishead = [0] * (count + 1)
1322 ishead = [0] * (count + 1)
1322 index = self.index
1323 index = self.index
1323 for r in self:
1324 for r in self:
1324 ishead[r] = 1 # I may be an head
1325 ishead[r] = 1 # I may be an head
1325 e = index[r]
1326 e = index[r]
1326 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1327 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1327 return [r for r, val in enumerate(ishead) if val]
1328 return [r for r, val in enumerate(ishead) if val]
1328
1329
1329 def heads(self, start=None, stop=None):
1330 def heads(self, start=None, stop=None):
1330 """return the list of all nodes that have no children
1331 """return the list of all nodes that have no children
1331
1332
1332 if start is specified, only heads that are descendants of
1333 if start is specified, only heads that are descendants of
1333 start will be returned
1334 start will be returned
1334 if stop is specified, it will consider all the revs from stop
1335 if stop is specified, it will consider all the revs from stop
1335 as if they had no children
1336 as if they had no children
1336 """
1337 """
1337 if start is None and stop is None:
1338 if start is None and stop is None:
1338 if not len(self):
1339 if not len(self):
1339 return [nullid]
1340 return [nullid]
1340 return [self.node(r) for r in self.headrevs()]
1341 return [self.node(r) for r in self.headrevs()]
1341
1342
1342 if start is None:
1343 if start is None:
1343 start = nullid
1344 start = nullid
1344 if stop is None:
1345 if stop is None:
1345 stop = []
1346 stop = []
1346 stoprevs = set([self.rev(n) for n in stop])
1347 stoprevs = set([self.rev(n) for n in stop])
1347 startrev = self.rev(start)
1348 startrev = self.rev(start)
1348 reachable = {startrev}
1349 reachable = {startrev}
1349 heads = {startrev}
1350 heads = {startrev}
1350
1351
1351 parentrevs = self.parentrevs
1352 parentrevs = self.parentrevs
1352 for r in self.revs(start=startrev + 1):
1353 for r in self.revs(start=startrev + 1):
1353 for p in parentrevs(r):
1354 for p in parentrevs(r):
1354 if p in reachable:
1355 if p in reachable:
1355 if r not in stoprevs:
1356 if r not in stoprevs:
1356 reachable.add(r)
1357 reachable.add(r)
1357 heads.add(r)
1358 heads.add(r)
1358 if p in heads and p not in stoprevs:
1359 if p in heads and p not in stoprevs:
1359 heads.remove(p)
1360 heads.remove(p)
1360
1361
1361 return [self.node(r) for r in heads]
1362 return [self.node(r) for r in heads]
1362
1363
1363 def children(self, node):
1364 def children(self, node):
1364 """find the children of a given node"""
1365 """find the children of a given node"""
1365 c = []
1366 c = []
1366 p = self.rev(node)
1367 p = self.rev(node)
1367 for r in self.revs(start=p + 1):
1368 for r in self.revs(start=p + 1):
1368 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1369 if prevs:
1370 if prevs:
1370 for pr in prevs:
1371 for pr in prevs:
1371 if pr == p:
1372 if pr == p:
1372 c.append(self.node(r))
1373 c.append(self.node(r))
1373 elif p == nullrev:
1374 elif p == nullrev:
1374 c.append(self.node(r))
1375 c.append(self.node(r))
1375 return c
1376 return c
1376
1377
1377 def descendant(self, start, end):
1378 def descendant(self, start, end):
1378 if start == nullrev:
1379 if start == nullrev:
1379 return True
1380 return True
1380 for i in self.descendants([start]):
1381 for i in self.descendants([start]):
1381 if i == end:
1382 if i == end:
1382 return True
1383 return True
1383 elif i > end:
1384 elif i > end:
1384 break
1385 break
1385 return False
1386 return False
1386
1387
1387 def commonancestorsheads(self, a, b):
1388 def commonancestorsheads(self, a, b):
1388 """calculate all the heads of the common ancestors of nodes a and b"""
1389 """calculate all the heads of the common ancestors of nodes a and b"""
1389 a, b = self.rev(a), self.rev(b)
1390 a, b = self.rev(a), self.rev(b)
1390 try:
1391 try:
1391 ancs = self.index.commonancestorsheads(a, b)
1392 ancs = self.index.commonancestorsheads(a, b)
1392 except (AttributeError, OverflowError): # C implementation failed
1393 except (AttributeError, OverflowError): # C implementation failed
1393 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1394 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1394 return pycompat.maplist(self.node, ancs)
1395 return pycompat.maplist(self.node, ancs)
1395
1396
1396 def isancestor(self, a, b):
1397 def isancestor(self, a, b):
1397 """return True if node a is an ancestor of node b
1398 """return True if node a is an ancestor of node b
1398
1399
1399 The implementation of this is trivial but the use of
1400 The implementation of this is trivial but the use of
1400 commonancestorsheads is not."""
1401 commonancestorsheads is not."""
1401 return a in self.commonancestorsheads(a, b)
1402 return a in self.commonancestorsheads(a, b)
1402
1403
1403 def ancestor(self, a, b):
1404 def ancestor(self, a, b):
1404 """calculate the "best" common ancestor of nodes a and b"""
1405 """calculate the "best" common ancestor of nodes a and b"""
1405
1406
1406 a, b = self.rev(a), self.rev(b)
1407 a, b = self.rev(a), self.rev(b)
1407 try:
1408 try:
1408 ancs = self.index.ancestors(a, b)
1409 ancs = self.index.ancestors(a, b)
1409 except (AttributeError, OverflowError):
1410 except (AttributeError, OverflowError):
1410 ancs = ancestor.ancestors(self.parentrevs, a, b)
1411 ancs = ancestor.ancestors(self.parentrevs, a, b)
1411 if ancs:
1412 if ancs:
1412 # choose a consistent winner when there's a tie
1413 # choose a consistent winner when there's a tie
1413 return min(map(self.node, ancs))
1414 return min(map(self.node, ancs))
1414 return nullid
1415 return nullid
1415
1416
1416 def _match(self, id):
1417 def _match(self, id):
1417 if isinstance(id, int):
1418 if isinstance(id, int):
1418 # rev
1419 # rev
1419 return self.node(id)
1420 return self.node(id)
1420 if len(id) == 20:
1421 if len(id) == 20:
1421 # possibly a binary node
1422 # possibly a binary node
1422 # odds of a binary node being all hex in ASCII are 1 in 10**25
1423 # odds of a binary node being all hex in ASCII are 1 in 10**25
1423 try:
1424 try:
1424 node = id
1425 node = id
1425 self.rev(node) # quick search the index
1426 self.rev(node) # quick search the index
1426 return node
1427 return node
1427 except LookupError:
1428 except LookupError:
1428 pass # may be partial hex id
1429 pass # may be partial hex id
1429 try:
1430 try:
1430 # str(rev)
1431 # str(rev)
1431 rev = int(id)
1432 rev = int(id)
1432 if "%d" % rev != id:
1433 if "%d" % rev != id:
1433 raise ValueError
1434 raise ValueError
1434 if rev < 0:
1435 if rev < 0:
1435 rev = len(self) + rev
1436 rev = len(self) + rev
1436 if rev < 0 or rev >= len(self):
1437 if rev < 0 or rev >= len(self):
1437 raise ValueError
1438 raise ValueError
1438 return self.node(rev)
1439 return self.node(rev)
1439 except (ValueError, OverflowError):
1440 except (ValueError, OverflowError):
1440 pass
1441 pass
1441 if len(id) == 40:
1442 if len(id) == 40:
1442 try:
1443 try:
1443 # a full hex nodeid?
1444 # a full hex nodeid?
1444 node = bin(id)
1445 node = bin(id)
1445 self.rev(node)
1446 self.rev(node)
1446 return node
1447 return node
1447 except (TypeError, LookupError):
1448 except (TypeError, LookupError):
1448 pass
1449 pass
1449
1450
1450 def _partialmatch(self, id):
1451 def _partialmatch(self, id):
1451 # we don't care wdirfilenodeids as they should be always full hash
1452 # we don't care wdirfilenodeids as they should be always full hash
1452 maybewdir = wdirhex.startswith(id)
1453 maybewdir = wdirhex.startswith(id)
1453 try:
1454 try:
1454 partial = self.index.partialmatch(id)
1455 partial = self.index.partialmatch(id)
1455 if partial and self.hasnode(partial):
1456 if partial and self.hasnode(partial):
1456 if maybewdir:
1457 if maybewdir:
1457 # single 'ff...' match in radix tree, ambiguous with wdir
1458 # single 'ff...' match in radix tree, ambiguous with wdir
1458 raise RevlogError
1459 raise RevlogError
1459 return partial
1460 return partial
1460 if maybewdir:
1461 if maybewdir:
1461 # no 'ff...' match in radix tree, wdir identified
1462 # no 'ff...' match in radix tree, wdir identified
1462 raise error.WdirUnsupported
1463 raise error.WdirUnsupported
1463 return None
1464 return None
1464 except RevlogError:
1465 except RevlogError:
1465 # parsers.c radix tree lookup gave multiple matches
1466 # parsers.c radix tree lookup gave multiple matches
1466 # fast path: for unfiltered changelog, radix tree is accurate
1467 # fast path: for unfiltered changelog, radix tree is accurate
1467 if not getattr(self, 'filteredrevs', None):
1468 if not getattr(self, 'filteredrevs', None):
1468 raise LookupError(id, self.indexfile,
1469 raise LookupError(id, self.indexfile,
1469 _('ambiguous identifier'))
1470 _('ambiguous identifier'))
1470 # fall through to slow path that filters hidden revisions
1471 # fall through to slow path that filters hidden revisions
1471 except (AttributeError, ValueError):
1472 except (AttributeError, ValueError):
1472 # we are pure python, or key was too short to search radix tree
1473 # we are pure python, or key was too short to search radix tree
1473 pass
1474 pass
1474
1475
1475 if id in self._pcache:
1476 if id in self._pcache:
1476 return self._pcache[id]
1477 return self._pcache[id]
1477
1478
1478 if len(id) <= 40:
1479 if len(id) <= 40:
1479 try:
1480 try:
1480 # hex(node)[:...]
1481 # hex(node)[:...]
1481 l = len(id) // 2 # grab an even number of digits
1482 l = len(id) // 2 # grab an even number of digits
1482 prefix = bin(id[:l * 2])
1483 prefix = bin(id[:l * 2])
1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1484 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1484 nl = [n for n in nl if hex(n).startswith(id) and
1485 nl = [n for n in nl if hex(n).startswith(id) and
1485 self.hasnode(n)]
1486 self.hasnode(n)]
1486 if len(nl) > 0:
1487 if len(nl) > 0:
1487 if len(nl) == 1 and not maybewdir:
1488 if len(nl) == 1 and not maybewdir:
1488 self._pcache[id] = nl[0]
1489 self._pcache[id] = nl[0]
1489 return nl[0]
1490 return nl[0]
1490 raise LookupError(id, self.indexfile,
1491 raise LookupError(id, self.indexfile,
1491 _('ambiguous identifier'))
1492 _('ambiguous identifier'))
1492 if maybewdir:
1493 if maybewdir:
1493 raise error.WdirUnsupported
1494 raise error.WdirUnsupported
1494 return None
1495 return None
1495 except TypeError:
1496 except TypeError:
1496 pass
1497 pass
1497
1498
1498 def lookup(self, id):
1499 def lookup(self, id):
1499 """locate a node based on:
1500 """locate a node based on:
1500 - revision number or str(revision number)
1501 - revision number or str(revision number)
1501 - nodeid or subset of hex nodeid
1502 - nodeid or subset of hex nodeid
1502 """
1503 """
1503 n = self._match(id)
1504 n = self._match(id)
1504 if n is not None:
1505 if n is not None:
1505 return n
1506 return n
1506 n = self._partialmatch(id)
1507 n = self._partialmatch(id)
1507 if n:
1508 if n:
1508 return n
1509 return n
1509
1510
1510 raise LookupError(id, self.indexfile, _('no match found'))
1511 raise LookupError(id, self.indexfile, _('no match found'))
1511
1512
1512 def shortest(self, node, minlength=1):
1513 def shortest(self, node, minlength=1):
1513 """Find the shortest unambiguous prefix that matches node."""
1514 """Find the shortest unambiguous prefix that matches node."""
1514 def isvalid(prefix):
1515 def isvalid(prefix):
1515 try:
1516 try:
1516 node = self._partialmatch(prefix)
1517 node = self._partialmatch(prefix)
1517 except error.RevlogError:
1518 except error.RevlogError:
1518 return False
1519 return False
1519 except error.WdirUnsupported:
1520 except error.WdirUnsupported:
1520 # single 'ff...' match
1521 # single 'ff...' match
1521 return True
1522 return True
1522 if node is None:
1523 if node is None:
1523 raise LookupError(node, self.indexfile, _('no node'))
1524 raise LookupError(node, self.indexfile, _('no node'))
1524 return True
1525 return True
1525
1526
1526 def maybewdir(prefix):
1527 def maybewdir(prefix):
1527 return all(c == 'f' for c in prefix)
1528 return all(c == 'f' for c in prefix)
1528
1529
1529 hexnode = hex(node)
1530 hexnode = hex(node)
1530
1531
1531 def disambiguate(hexnode, minlength):
1532 def disambiguate(hexnode, minlength):
1532 """Disambiguate against wdirid."""
1533 """Disambiguate against wdirid."""
1533 for length in range(minlength, 41):
1534 for length in range(minlength, 41):
1534 prefix = hexnode[:length]
1535 prefix = hexnode[:length]
1535 if not maybewdir(prefix):
1536 if not maybewdir(prefix):
1536 return prefix
1537 return prefix
1537
1538
1538 if not getattr(self, 'filteredrevs', None):
1539 if not getattr(self, 'filteredrevs', None):
1539 try:
1540 try:
1540 length = max(self.index.shortest(node), minlength)
1541 length = max(self.index.shortest(node), minlength)
1541 return disambiguate(hexnode, length)
1542 return disambiguate(hexnode, length)
1542 except RevlogError:
1543 except RevlogError:
1543 if node != wdirid:
1544 if node != wdirid:
1544 raise LookupError(node, self.indexfile, _('no node'))
1545 raise LookupError(node, self.indexfile, _('no node'))
1545 except AttributeError:
1546 except AttributeError:
1546 # Fall through to pure code
1547 # Fall through to pure code
1547 pass
1548 pass
1548
1549
1549 if node == wdirid:
1550 if node == wdirid:
1550 for length in range(minlength, 41):
1551 for length in range(minlength, 41):
1551 prefix = hexnode[:length]
1552 prefix = hexnode[:length]
1552 if isvalid(prefix):
1553 if isvalid(prefix):
1553 return prefix
1554 return prefix
1554
1555
1555 for length in range(minlength, 41):
1556 for length in range(minlength, 41):
1556 prefix = hexnode[:length]
1557 prefix = hexnode[:length]
1557 if isvalid(prefix):
1558 if isvalid(prefix):
1558 return disambiguate(hexnode, length)
1559 return disambiguate(hexnode, length)
1559
1560
1560 def cmp(self, node, text):
1561 def cmp(self, node, text):
1561 """compare text with a given file revision
1562 """compare text with a given file revision
1562
1563
1563 returns True if text is different than what is stored.
1564 returns True if text is different than what is stored.
1564 """
1565 """
1565 p1, p2 = self.parents(node)
1566 p1, p2 = self.parents(node)
1566 return hash(text, p1, p2) != node
1567 return hash(text, p1, p2) != node
1567
1568
1568 def _cachesegment(self, offset, data):
1569 def _cachesegment(self, offset, data):
1569 """Add a segment to the revlog cache.
1570 """Add a segment to the revlog cache.
1570
1571
1571 Accepts an absolute offset and the data that is at that location.
1572 Accepts an absolute offset and the data that is at that location.
1572 """
1573 """
1573 o, d = self._chunkcache
1574 o, d = self._chunkcache
1574 # try to add to existing cache
1575 # try to add to existing cache
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 self._chunkcache = o, d + data
1577 self._chunkcache = o, d + data
1577 else:
1578 else:
1578 self._chunkcache = offset, data
1579 self._chunkcache = offset, data
1579
1580
1580 def _readsegment(self, offset, length, df=None):
1581 def _readsegment(self, offset, length, df=None):
1581 """Load a segment of raw data from the revlog.
1582 """Load a segment of raw data from the revlog.
1582
1583
1583 Accepts an absolute offset, length to read, and an optional existing
1584 Accepts an absolute offset, length to read, and an optional existing
1584 file handle to read from.
1585 file handle to read from.
1585
1586
1586 If an existing file handle is passed, it will be seeked and the
1587 If an existing file handle is passed, it will be seeked and the
1587 original seek position will NOT be restored.
1588 original seek position will NOT be restored.
1588
1589
1589 Returns a str or buffer of raw byte data.
1590 Returns a str or buffer of raw byte data.
1590 """
1591 """
1591 # Cache data both forward and backward around the requested
1592 # Cache data both forward and backward around the requested
1592 # data, in a fixed size window. This helps speed up operations
1593 # data, in a fixed size window. This helps speed up operations
1593 # involving reading the revlog backwards.
1594 # involving reading the revlog backwards.
1594 cachesize = self._chunkcachesize
1595 cachesize = self._chunkcachesize
1595 realoffset = offset & ~(cachesize - 1)
1596 realoffset = offset & ~(cachesize - 1)
1596 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1597 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1597 - realoffset)
1598 - realoffset)
1598 with self._datareadfp(df) as df:
1599 with self._datareadfp(df) as df:
1599 df.seek(realoffset)
1600 df.seek(realoffset)
1600 d = df.read(reallength)
1601 d = df.read(reallength)
1601 self._cachesegment(realoffset, d)
1602 self._cachesegment(realoffset, d)
1602 if offset != realoffset or reallength != length:
1603 if offset != realoffset or reallength != length:
1603 return util.buffer(d, offset - realoffset, length)
1604 return util.buffer(d, offset - realoffset, length)
1604 return d
1605 return d
1605
1606
1606 def _getsegment(self, offset, length, df=None):
1607 def _getsegment(self, offset, length, df=None):
1607 """Obtain a segment of raw data from the revlog.
1608 """Obtain a segment of raw data from the revlog.
1608
1609
1609 Accepts an absolute offset, length of bytes to obtain, and an
1610 Accepts an absolute offset, length of bytes to obtain, and an
1610 optional file handle to the already-opened revlog. If the file
1611 optional file handle to the already-opened revlog. If the file
1611 handle is used, it's original seek position will not be preserved.
1612 handle is used, it's original seek position will not be preserved.
1612
1613
1613 Requests for data may be returned from a cache.
1614 Requests for data may be returned from a cache.
1614
1615
1615 Returns a str or a buffer instance of raw byte data.
1616 Returns a str or a buffer instance of raw byte data.
1616 """
1617 """
1617 o, d = self._chunkcache
1618 o, d = self._chunkcache
1618 l = len(d)
1619 l = len(d)
1619
1620
1620 # is it in the cache?
1621 # is it in the cache?
1621 cachestart = offset - o
1622 cachestart = offset - o
1622 cacheend = cachestart + length
1623 cacheend = cachestart + length
1623 if cachestart >= 0 and cacheend <= l:
1624 if cachestart >= 0 and cacheend <= l:
1624 if cachestart == 0 and cacheend == l:
1625 if cachestart == 0 and cacheend == l:
1625 return d # avoid a copy
1626 return d # avoid a copy
1626 return util.buffer(d, cachestart, cacheend - cachestart)
1627 return util.buffer(d, cachestart, cacheend - cachestart)
1627
1628
1628 return self._readsegment(offset, length, df=df)
1629 return self._readsegment(offset, length, df=df)
1629
1630
1630 def _getsegmentforrevs(self, startrev, endrev, df=None):
1631 def _getsegmentforrevs(self, startrev, endrev, df=None):
1631 """Obtain a segment of raw data corresponding to a range of revisions.
1632 """Obtain a segment of raw data corresponding to a range of revisions.
1632
1633
1633 Accepts the start and end revisions and an optional already-open
1634 Accepts the start and end revisions and an optional already-open
1634 file handle to be used for reading. If the file handle is read, its
1635 file handle to be used for reading. If the file handle is read, its
1635 seek position will not be preserved.
1636 seek position will not be preserved.
1636
1637
1637 Requests for data may be satisfied by a cache.
1638 Requests for data may be satisfied by a cache.
1638
1639
1639 Returns a 2-tuple of (offset, data) for the requested range of
1640 Returns a 2-tuple of (offset, data) for the requested range of
1640 revisions. Offset is the integer offset from the beginning of the
1641 revisions. Offset is the integer offset from the beginning of the
1641 revlog and data is a str or buffer of the raw byte data.
1642 revlog and data is a str or buffer of the raw byte data.
1642
1643
1643 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1644 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1644 to determine where each revision's data begins and ends.
1645 to determine where each revision's data begins and ends.
1645 """
1646 """
1646 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1647 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1647 # (functions are expensive).
1648 # (functions are expensive).
1648 index = self.index
1649 index = self.index
1649 istart = index[startrev]
1650 istart = index[startrev]
1650 start = int(istart[0] >> 16)
1651 start = int(istart[0] >> 16)
1651 if startrev == endrev:
1652 if startrev == endrev:
1652 end = start + istart[1]
1653 end = start + istart[1]
1653 else:
1654 else:
1654 iend = index[endrev]
1655 iend = index[endrev]
1655 end = int(iend[0] >> 16) + iend[1]
1656 end = int(iend[0] >> 16) + iend[1]
1656
1657
1657 if self._inline:
1658 if self._inline:
1658 start += (startrev + 1) * self._io.size
1659 start += (startrev + 1) * self._io.size
1659 end += (endrev + 1) * self._io.size
1660 end += (endrev + 1) * self._io.size
1660 length = end - start
1661 length = end - start
1661
1662
1662 return start, self._getsegment(start, length, df=df)
1663 return start, self._getsegment(start, length, df=df)
1663
1664
1664 def _chunk(self, rev, df=None):
1665 def _chunk(self, rev, df=None):
1665 """Obtain a single decompressed chunk for a revision.
1666 """Obtain a single decompressed chunk for a revision.
1666
1667
1667 Accepts an integer revision and an optional already-open file handle
1668 Accepts an integer revision and an optional already-open file handle
1668 to be used for reading. If used, the seek position of the file will not
1669 to be used for reading. If used, the seek position of the file will not
1669 be preserved.
1670 be preserved.
1670
1671
1671 Returns a str holding uncompressed data for the requested revision.
1672 Returns a str holding uncompressed data for the requested revision.
1672 """
1673 """
1673 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1674 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1674
1675
1675 def _chunks(self, revs, df=None):
1676 def _chunks(self, revs, df=None):
1676 """Obtain decompressed chunks for the specified revisions.
1677 """Obtain decompressed chunks for the specified revisions.
1677
1678
1678 Accepts an iterable of numeric revisions that are assumed to be in
1679 Accepts an iterable of numeric revisions that are assumed to be in
1679 ascending order. Also accepts an optional already-open file handle
1680 ascending order. Also accepts an optional already-open file handle
1680 to be used for reading. If used, the seek position of the file will
1681 to be used for reading. If used, the seek position of the file will
1681 not be preserved.
1682 not be preserved.
1682
1683
1683 This function is similar to calling ``self._chunk()`` multiple times,
1684 This function is similar to calling ``self._chunk()`` multiple times,
1684 but is faster.
1685 but is faster.
1685
1686
1686 Returns a list with decompressed data for each requested revision.
1687 Returns a list with decompressed data for each requested revision.
1687 """
1688 """
1688 if not revs:
1689 if not revs:
1689 return []
1690 return []
1690 start = self.start
1691 start = self.start
1691 length = self.length
1692 length = self.length
1692 inline = self._inline
1693 inline = self._inline
1693 iosize = self._io.size
1694 iosize = self._io.size
1694 buffer = util.buffer
1695 buffer = util.buffer
1695
1696
1696 l = []
1697 l = []
1697 ladd = l.append
1698 ladd = l.append
1698
1699
1699 if not self._withsparseread:
1700 if not self._withsparseread:
1700 slicedchunks = (revs,)
1701 slicedchunks = (revs,)
1701 else:
1702 else:
1702 slicedchunks = _slicechunk(self, revs)
1703 slicedchunks = _slicechunk(self, revs)
1703
1704
1704 for revschunk in slicedchunks:
1705 for revschunk in slicedchunks:
1705 firstrev = revschunk[0]
1706 firstrev = revschunk[0]
1706 # Skip trailing revisions with empty diff
1707 # Skip trailing revisions with empty diff
1707 for lastrev in revschunk[::-1]:
1708 for lastrev in revschunk[::-1]:
1708 if length(lastrev) != 0:
1709 if length(lastrev) != 0:
1709 break
1710 break
1710
1711
1711 try:
1712 try:
1712 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1713 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1713 except OverflowError:
1714 except OverflowError:
1714 # issue4215 - we can't cache a run of chunks greater than
1715 # issue4215 - we can't cache a run of chunks greater than
1715 # 2G on Windows
1716 # 2G on Windows
1716 return [self._chunk(rev, df=df) for rev in revschunk]
1717 return [self._chunk(rev, df=df) for rev in revschunk]
1717
1718
1718 decomp = self.decompress
1719 decomp = self.decompress
1719 for rev in revschunk:
1720 for rev in revschunk:
1720 chunkstart = start(rev)
1721 chunkstart = start(rev)
1721 if inline:
1722 if inline:
1722 chunkstart += (rev + 1) * iosize
1723 chunkstart += (rev + 1) * iosize
1723 chunklength = length(rev)
1724 chunklength = length(rev)
1724 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1725 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1725
1726
1726 return l
1727 return l
1727
1728
1728 def _chunkclear(self):
1729 def _chunkclear(self):
1729 """Clear the raw chunk cache."""
1730 """Clear the raw chunk cache."""
1730 self._chunkcache = (0, '')
1731 self._chunkcache = (0, '')
1731
1732
1732 def deltaparent(self, rev):
1733 def deltaparent(self, rev):
1733 """return deltaparent of the given revision"""
1734 """return deltaparent of the given revision"""
1734 base = self.index[rev][3]
1735 base = self.index[rev][3]
1735 if base == rev:
1736 if base == rev:
1736 return nullrev
1737 return nullrev
1737 elif self._generaldelta:
1738 elif self._generaldelta:
1738 return base
1739 return base
1739 else:
1740 else:
1740 return rev - 1
1741 return rev - 1
1741
1742
1742 def revdiff(self, rev1, rev2):
1743 def revdiff(self, rev1, rev2):
1743 """return or calculate a delta between two revisions
1744 """return or calculate a delta between two revisions
1744
1745
1745 The delta calculated is in binary form and is intended to be written to
1746 The delta calculated is in binary form and is intended to be written to
1746 revlog data directly. So this function needs raw revision data.
1747 revlog data directly. So this function needs raw revision data.
1747 """
1748 """
1748 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1749 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1749 return bytes(self._chunk(rev2))
1750 return bytes(self._chunk(rev2))
1750
1751
1751 return mdiff.textdiff(self.revision(rev1, raw=True),
1752 return mdiff.textdiff(self.revision(rev1, raw=True),
1752 self.revision(rev2, raw=True))
1753 self.revision(rev2, raw=True))
1753
1754
1754 def revision(self, nodeorrev, _df=None, raw=False):
1755 def revision(self, nodeorrev, _df=None, raw=False):
1755 """return an uncompressed revision of a given node or revision
1756 """return an uncompressed revision of a given node or revision
1756 number.
1757 number.
1757
1758
1758 _df - an existing file handle to read from. (internal-only)
1759 _df - an existing file handle to read from. (internal-only)
1759 raw - an optional argument specifying if the revision data is to be
1760 raw - an optional argument specifying if the revision data is to be
1760 treated as raw data when applying flag transforms. 'raw' should be set
1761 treated as raw data when applying flag transforms. 'raw' should be set
1761 to True when generating changegroups or in debug commands.
1762 to True when generating changegroups or in debug commands.
1762 """
1763 """
1763 if isinstance(nodeorrev, int):
1764 if isinstance(nodeorrev, int):
1764 rev = nodeorrev
1765 rev = nodeorrev
1765 node = self.node(rev)
1766 node = self.node(rev)
1766 else:
1767 else:
1767 node = nodeorrev
1768 node = nodeorrev
1768 rev = None
1769 rev = None
1769
1770
1770 cachedrev = None
1771 cachedrev = None
1771 flags = None
1772 flags = None
1772 rawtext = None
1773 rawtext = None
1773 if node == nullid:
1774 if node == nullid:
1774 return ""
1775 return ""
1775 if self._cache:
1776 if self._cache:
1776 if self._cache[0] == node:
1777 if self._cache[0] == node:
1777 # _cache only stores rawtext
1778 # _cache only stores rawtext
1778 if raw:
1779 if raw:
1779 return self._cache[2]
1780 return self._cache[2]
1780 # duplicated, but good for perf
1781 # duplicated, but good for perf
1781 if rev is None:
1782 if rev is None:
1782 rev = self.rev(node)
1783 rev = self.rev(node)
1783 if flags is None:
1784 if flags is None:
1784 flags = self.flags(rev)
1785 flags = self.flags(rev)
1785 # no extra flags set, no flag processor runs, text = rawtext
1786 # no extra flags set, no flag processor runs, text = rawtext
1786 if flags == REVIDX_DEFAULT_FLAGS:
1787 if flags == REVIDX_DEFAULT_FLAGS:
1787 return self._cache[2]
1788 return self._cache[2]
1788 # rawtext is reusable. need to run flag processor
1789 # rawtext is reusable. need to run flag processor
1789 rawtext = self._cache[2]
1790 rawtext = self._cache[2]
1790
1791
1791 cachedrev = self._cache[1]
1792 cachedrev = self._cache[1]
1792
1793
1793 # look up what we need to read
1794 # look up what we need to read
1794 if rawtext is None:
1795 if rawtext is None:
1795 if rev is None:
1796 if rev is None:
1796 rev = self.rev(node)
1797 rev = self.rev(node)
1797
1798
1798 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1799 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1799 if stopped:
1800 if stopped:
1800 rawtext = self._cache[2]
1801 rawtext = self._cache[2]
1801
1802
1802 # drop cache to save memory
1803 # drop cache to save memory
1803 self._cache = None
1804 self._cache = None
1804
1805
1805 bins = self._chunks(chain, df=_df)
1806 bins = self._chunks(chain, df=_df)
1806 if rawtext is None:
1807 if rawtext is None:
1807 rawtext = bytes(bins[0])
1808 rawtext = bytes(bins[0])
1808 bins = bins[1:]
1809 bins = bins[1:]
1809
1810
1810 rawtext = mdiff.patches(rawtext, bins)
1811 rawtext = mdiff.patches(rawtext, bins)
1811 self._cache = (node, rev, rawtext)
1812 self._cache = (node, rev, rawtext)
1812
1813
1813 if flags is None:
1814 if flags is None:
1814 if rev is None:
1815 if rev is None:
1815 rev = self.rev(node)
1816 rev = self.rev(node)
1816 flags = self.flags(rev)
1817 flags = self.flags(rev)
1817
1818
1818 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1819 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1819 if validatehash:
1820 if validatehash:
1820 self.checkhash(text, node, rev=rev)
1821 self.checkhash(text, node, rev=rev)
1821
1822
1822 return text
1823 return text
1823
1824
1824 def hash(self, text, p1, p2):
1825 def hash(self, text, p1, p2):
1825 """Compute a node hash.
1826 """Compute a node hash.
1826
1827
1827 Available as a function so that subclasses can replace the hash
1828 Available as a function so that subclasses can replace the hash
1828 as needed.
1829 as needed.
1829 """
1830 """
1830 return hash(text, p1, p2)
1831 return hash(text, p1, p2)
1831
1832
1832 def _processflags(self, text, flags, operation, raw=False):
1833 def _processflags(self, text, flags, operation, raw=False):
1833 """Inspect revision data flags and applies transforms defined by
1834 """Inspect revision data flags and applies transforms defined by
1834 registered flag processors.
1835 registered flag processors.
1835
1836
1836 ``text`` - the revision data to process
1837 ``text`` - the revision data to process
1837 ``flags`` - the revision flags
1838 ``flags`` - the revision flags
1838 ``operation`` - the operation being performed (read or write)
1839 ``operation`` - the operation being performed (read or write)
1839 ``raw`` - an optional argument describing if the raw transform should be
1840 ``raw`` - an optional argument describing if the raw transform should be
1840 applied.
1841 applied.
1841
1842
1842 This method processes the flags in the order (or reverse order if
1843 This method processes the flags in the order (or reverse order if
1843 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1844 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1844 flag processors registered for present flags. The order of flags defined
1845 flag processors registered for present flags. The order of flags defined
1845 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1846 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1846
1847
1847 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1848 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1848 processed text and ``validatehash`` is a bool indicating whether the
1849 processed text and ``validatehash`` is a bool indicating whether the
1849 returned text should be checked for hash integrity.
1850 returned text should be checked for hash integrity.
1850
1851
1851 Note: If the ``raw`` argument is set, it has precedence over the
1852 Note: If the ``raw`` argument is set, it has precedence over the
1852 operation and will only update the value of ``validatehash``.
1853 operation and will only update the value of ``validatehash``.
1853 """
1854 """
1854 # fast path: no flag processors will run
1855 # fast path: no flag processors will run
1855 if flags == 0:
1856 if flags == 0:
1856 return text, True
1857 return text, True
1857 if not operation in ('read', 'write'):
1858 if not operation in ('read', 'write'):
1858 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1859 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1859 # Check all flags are known.
1860 # Check all flags are known.
1860 if flags & ~REVIDX_KNOWN_FLAGS:
1861 if flags & ~REVIDX_KNOWN_FLAGS:
1861 raise RevlogError(_("incompatible revision flag '%#x'") %
1862 raise RevlogError(_("incompatible revision flag '%#x'") %
1862 (flags & ~REVIDX_KNOWN_FLAGS))
1863 (flags & ~REVIDX_KNOWN_FLAGS))
1863 validatehash = True
1864 validatehash = True
1864 # Depending on the operation (read or write), the order might be
1865 # Depending on the operation (read or write), the order might be
1865 # reversed due to non-commutative transforms.
1866 # reversed due to non-commutative transforms.
1866 orderedflags = REVIDX_FLAGS_ORDER
1867 orderedflags = REVIDX_FLAGS_ORDER
1867 if operation == 'write':
1868 if operation == 'write':
1868 orderedflags = reversed(orderedflags)
1869 orderedflags = reversed(orderedflags)
1869
1870
1870 for flag in orderedflags:
1871 for flag in orderedflags:
1871 # If a flagprocessor has been registered for a known flag, apply the
1872 # If a flagprocessor has been registered for a known flag, apply the
1872 # related operation transform and update result tuple.
1873 # related operation transform and update result tuple.
1873 if flag & flags:
1874 if flag & flags:
1874 vhash = True
1875 vhash = True
1875
1876
1876 if flag not in _flagprocessors:
1877 if flag not in _flagprocessors:
1877 message = _("missing processor for flag '%#x'") % (flag)
1878 message = _("missing processor for flag '%#x'") % (flag)
1878 raise RevlogError(message)
1879 raise RevlogError(message)
1879
1880
1880 processor = _flagprocessors[flag]
1881 processor = _flagprocessors[flag]
1881 if processor is not None:
1882 if processor is not None:
1882 readtransform, writetransform, rawtransform = processor
1883 readtransform, writetransform, rawtransform = processor
1883
1884
1884 if raw:
1885 if raw:
1885 vhash = rawtransform(self, text)
1886 vhash = rawtransform(self, text)
1886 elif operation == 'read':
1887 elif operation == 'read':
1887 text, vhash = readtransform(self, text)
1888 text, vhash = readtransform(self, text)
1888 else: # write operation
1889 else: # write operation
1889 text, vhash = writetransform(self, text)
1890 text, vhash = writetransform(self, text)
1890 validatehash = validatehash and vhash
1891 validatehash = validatehash and vhash
1891
1892
1892 return text, validatehash
1893 return text, validatehash
1893
1894
1894 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1895 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1895 """Check node hash integrity.
1896 """Check node hash integrity.
1896
1897
1897 Available as a function so that subclasses can extend hash mismatch
1898 Available as a function so that subclasses can extend hash mismatch
1898 behaviors as needed.
1899 behaviors as needed.
1899 """
1900 """
1900 try:
1901 try:
1901 if p1 is None and p2 is None:
1902 if p1 is None and p2 is None:
1902 p1, p2 = self.parents(node)
1903 p1, p2 = self.parents(node)
1903 if node != self.hash(text, p1, p2):
1904 if node != self.hash(text, p1, p2):
1904 revornode = rev
1905 revornode = rev
1905 if revornode is None:
1906 if revornode is None:
1906 revornode = templatefilters.short(hex(node))
1907 revornode = templatefilters.short(hex(node))
1907 raise RevlogError(_("integrity check failed on %s:%s")
1908 raise RevlogError(_("integrity check failed on %s:%s")
1908 % (self.indexfile, pycompat.bytestr(revornode)))
1909 % (self.indexfile, pycompat.bytestr(revornode)))
1909 except RevlogError:
1910 except RevlogError:
1910 if self._censorable and _censoredtext(text):
1911 if self._censorable and _censoredtext(text):
1911 raise error.CensoredNodeError(self.indexfile, node, text)
1912 raise error.CensoredNodeError(self.indexfile, node, text)
1912 raise
1913 raise
1913
1914
1914 def _enforceinlinesize(self, tr, fp=None):
1915 def _enforceinlinesize(self, tr, fp=None):
1915 """Check if the revlog is too big for inline and convert if so.
1916 """Check if the revlog is too big for inline and convert if so.
1916
1917
1917 This should be called after revisions are added to the revlog. If the
1918 This should be called after revisions are added to the revlog. If the
1918 revlog has grown too large to be an inline revlog, it will convert it
1919 revlog has grown too large to be an inline revlog, it will convert it
1919 to use multiple index and data files.
1920 to use multiple index and data files.
1920 """
1921 """
1921 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1922 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1922 return
1923 return
1923
1924
1924 trinfo = tr.find(self.indexfile)
1925 trinfo = tr.find(self.indexfile)
1925 if trinfo is None:
1926 if trinfo is None:
1926 raise RevlogError(_("%s not found in the transaction")
1927 raise RevlogError(_("%s not found in the transaction")
1927 % self.indexfile)
1928 % self.indexfile)
1928
1929
1929 trindex = trinfo[2]
1930 trindex = trinfo[2]
1930 if trindex is not None:
1931 if trindex is not None:
1931 dataoff = self.start(trindex)
1932 dataoff = self.start(trindex)
1932 else:
1933 else:
1933 # revlog was stripped at start of transaction, use all leftover data
1934 # revlog was stripped at start of transaction, use all leftover data
1934 trindex = len(self) - 1
1935 trindex = len(self) - 1
1935 dataoff = self.end(-2)
1936 dataoff = self.end(-2)
1936
1937
1937 tr.add(self.datafile, dataoff)
1938 tr.add(self.datafile, dataoff)
1938
1939
1939 if fp:
1940 if fp:
1940 fp.flush()
1941 fp.flush()
1941 fp.close()
1942 fp.close()
1942
1943
1943 with self._datafp('w') as df:
1944 with self._datafp('w') as df:
1944 for r in self:
1945 for r in self:
1945 df.write(self._getsegmentforrevs(r, r)[1])
1946 df.write(self._getsegmentforrevs(r, r)[1])
1946
1947
1947 with self._indexfp('w') as fp:
1948 with self._indexfp('w') as fp:
1948 self.version &= ~FLAG_INLINE_DATA
1949 self.version &= ~FLAG_INLINE_DATA
1949 self._inline = False
1950 self._inline = False
1950 io = self._io
1951 io = self._io
1951 for i in self:
1952 for i in self:
1952 e = io.packentry(self.index[i], self.node, self.version, i)
1953 e = io.packentry(self.index[i], self.node, self.version, i)
1953 fp.write(e)
1954 fp.write(e)
1954
1955
1955 # the temp file replace the real index when we exit the context
1956 # the temp file replace the real index when we exit the context
1956 # manager
1957 # manager
1957
1958
1958 tr.replace(self.indexfile, trindex * self._io.size)
1959 tr.replace(self.indexfile, trindex * self._io.size)
1959 self._chunkclear()
1960 self._chunkclear()
1960
1961
1961 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1962 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1962 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1963 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1963 """add a revision to the log
1964 """add a revision to the log
1964
1965
1965 text - the revision data to add
1966 text - the revision data to add
1966 transaction - the transaction object used for rollback
1967 transaction - the transaction object used for rollback
1967 link - the linkrev data to add
1968 link - the linkrev data to add
1968 p1, p2 - the parent nodeids of the revision
1969 p1, p2 - the parent nodeids of the revision
1969 cachedelta - an optional precomputed delta
1970 cachedelta - an optional precomputed delta
1970 node - nodeid of revision; typically node is not specified, and it is
1971 node - nodeid of revision; typically node is not specified, and it is
1971 computed by default as hash(text, p1, p2), however subclasses might
1972 computed by default as hash(text, p1, p2), however subclasses might
1972 use different hashing method (and override checkhash() in such case)
1973 use different hashing method (and override checkhash() in such case)
1973 flags - the known flags to set on the revision
1974 flags - the known flags to set on the revision
1974 deltacomputer - an optional _deltacomputer instance shared between
1975 deltacomputer - an optional _deltacomputer instance shared between
1975 multiple calls
1976 multiple calls
1976 """
1977 """
1977 if link == nullrev:
1978 if link == nullrev:
1978 raise RevlogError(_("attempted to add linkrev -1 to %s")
1979 raise RevlogError(_("attempted to add linkrev -1 to %s")
1979 % self.indexfile)
1980 % self.indexfile)
1980
1981
1981 if flags:
1982 if flags:
1982 node = node or self.hash(text, p1, p2)
1983 node = node or self.hash(text, p1, p2)
1983
1984
1984 rawtext, validatehash = self._processflags(text, flags, 'write')
1985 rawtext, validatehash = self._processflags(text, flags, 'write')
1985
1986
1986 # If the flag processor modifies the revision data, ignore any provided
1987 # If the flag processor modifies the revision data, ignore any provided
1987 # cachedelta.
1988 # cachedelta.
1988 if rawtext != text:
1989 if rawtext != text:
1989 cachedelta = None
1990 cachedelta = None
1990
1991
1991 if len(rawtext) > _maxentrysize:
1992 if len(rawtext) > _maxentrysize:
1992 raise RevlogError(
1993 raise RevlogError(
1993 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1994 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1994 % (self.indexfile, len(rawtext)))
1995 % (self.indexfile, len(rawtext)))
1995
1996
1996 node = node or self.hash(rawtext, p1, p2)
1997 node = node or self.hash(rawtext, p1, p2)
1997 if node in self.nodemap:
1998 if node in self.nodemap:
1998 return node
1999 return node
1999
2000
2000 if validatehash:
2001 if validatehash:
2001 self.checkhash(rawtext, node, p1=p1, p2=p2)
2002 self.checkhash(rawtext, node, p1=p1, p2=p2)
2002
2003
2003 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2004 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2004 flags, cachedelta=cachedelta,
2005 flags, cachedelta=cachedelta,
2005 deltacomputer=deltacomputer)
2006 deltacomputer=deltacomputer)
2006
2007
2007 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2008 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2008 cachedelta=None, deltacomputer=None):
2009 cachedelta=None, deltacomputer=None):
2009 """add a raw revision with known flags, node and parents
2010 """add a raw revision with known flags, node and parents
2010 useful when reusing a revision not stored in this revlog (ex: received
2011 useful when reusing a revision not stored in this revlog (ex: received
2011 over wire, or read from an external bundle).
2012 over wire, or read from an external bundle).
2012 """
2013 """
2013 dfh = None
2014 dfh = None
2014 if not self._inline:
2015 if not self._inline:
2015 dfh = self._datafp("a+")
2016 dfh = self._datafp("a+")
2016 ifh = self._indexfp("a+")
2017 ifh = self._indexfp("a+")
2017 try:
2018 try:
2018 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2019 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2019 flags, cachedelta, ifh, dfh,
2020 flags, cachedelta, ifh, dfh,
2020 deltacomputer=deltacomputer)
2021 deltacomputer=deltacomputer)
2021 finally:
2022 finally:
2022 if dfh:
2023 if dfh:
2023 dfh.close()
2024 dfh.close()
2024 ifh.close()
2025 ifh.close()
2025
2026
2026 def compress(self, data):
2027 def compress(self, data):
2027 """Generate a possibly-compressed representation of data."""
2028 """Generate a possibly-compressed representation of data."""
2028 if not data:
2029 if not data:
2029 return '', data
2030 return '', data
2030
2031
2031 compressed = self._compressor.compress(data)
2032 compressed = self._compressor.compress(data)
2032
2033
2033 if compressed:
2034 if compressed:
2034 # The revlog compressor added the header in the returned data.
2035 # The revlog compressor added the header in the returned data.
2035 return '', compressed
2036 return '', compressed
2036
2037
2037 if data[0:1] == '\0':
2038 if data[0:1] == '\0':
2038 return '', data
2039 return '', data
2039 return 'u', data
2040 return 'u', data
2040
2041
2041 def decompress(self, data):
2042 def decompress(self, data):
2042 """Decompress a revlog chunk.
2043 """Decompress a revlog chunk.
2043
2044
2044 The chunk is expected to begin with a header identifying the
2045 The chunk is expected to begin with a header identifying the
2045 format type so it can be routed to an appropriate decompressor.
2046 format type so it can be routed to an appropriate decompressor.
2046 """
2047 """
2047 if not data:
2048 if not data:
2048 return data
2049 return data
2049
2050
2050 # Revlogs are read much more frequently than they are written and many
2051 # Revlogs are read much more frequently than they are written and many
2051 # chunks only take microseconds to decompress, so performance is
2052 # chunks only take microseconds to decompress, so performance is
2052 # important here.
2053 # important here.
2053 #
2054 #
2054 # We can make a few assumptions about revlogs:
2055 # We can make a few assumptions about revlogs:
2055 #
2056 #
2056 # 1) the majority of chunks will be compressed (as opposed to inline
2057 # 1) the majority of chunks will be compressed (as opposed to inline
2057 # raw data).
2058 # raw data).
2058 # 2) decompressing *any* data will likely by at least 10x slower than
2059 # 2) decompressing *any* data will likely by at least 10x slower than
2059 # returning raw inline data.
2060 # returning raw inline data.
2060 # 3) we want to prioritize common and officially supported compression
2061 # 3) we want to prioritize common and officially supported compression
2061 # engines
2062 # engines
2062 #
2063 #
2063 # It follows that we want to optimize for "decompress compressed data
2064 # It follows that we want to optimize for "decompress compressed data
2064 # when encoded with common and officially supported compression engines"
2065 # when encoded with common and officially supported compression engines"
2065 # case over "raw data" and "data encoded by less common or non-official
2066 # case over "raw data" and "data encoded by less common or non-official
2066 # compression engines." That is why we have the inline lookup first
2067 # compression engines." That is why we have the inline lookup first
2067 # followed by the compengines lookup.
2068 # followed by the compengines lookup.
2068 #
2069 #
2069 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2070 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2070 # compressed chunks. And this matters for changelog and manifest reads.
2071 # compressed chunks. And this matters for changelog and manifest reads.
2071 t = data[0:1]
2072 t = data[0:1]
2072
2073
2073 if t == 'x':
2074 if t == 'x':
2074 try:
2075 try:
2075 return _zlibdecompress(data)
2076 return _zlibdecompress(data)
2076 except zlib.error as e:
2077 except zlib.error as e:
2077 raise RevlogError(_('revlog decompress error: %s') %
2078 raise RevlogError(_('revlog decompress error: %s') %
2078 stringutil.forcebytestr(e))
2079 stringutil.forcebytestr(e))
2079 # '\0' is more common than 'u' so it goes first.
2080 # '\0' is more common than 'u' so it goes first.
2080 elif t == '\0':
2081 elif t == '\0':
2081 return data
2082 return data
2082 elif t == 'u':
2083 elif t == 'u':
2083 return util.buffer(data, 1)
2084 return util.buffer(data, 1)
2084
2085
2085 try:
2086 try:
2086 compressor = self._decompressors[t]
2087 compressor = self._decompressors[t]
2087 except KeyError:
2088 except KeyError:
2088 try:
2089 try:
2089 engine = util.compengines.forrevlogheader(t)
2090 engine = util.compengines.forrevlogheader(t)
2090 compressor = engine.revlogcompressor()
2091 compressor = engine.revlogcompressor()
2091 self._decompressors[t] = compressor
2092 self._decompressors[t] = compressor
2092 except KeyError:
2093 except KeyError:
2093 raise RevlogError(_('unknown compression type %r') % t)
2094 raise RevlogError(_('unknown compression type %r') % t)
2094
2095
2095 return compressor.decompress(data)
2096 return compressor.decompress(data)
2096
2097
2097 def _isgooddeltainfo(self, deltainfo, revinfo):
2098 def _isgooddeltainfo(self, deltainfo, revinfo):
2098 """Returns True if the given delta is good. Good means that it is within
2099 """Returns True if the given delta is good. Good means that it is within
2099 the disk span, disk size, and chain length bounds that we know to be
2100 the disk span, disk size, and chain length bounds that we know to be
2100 performant."""
2101 performant."""
2101 if deltainfo is None:
2102 if deltainfo is None:
2102 return False
2103 return False
2103
2104
2104 # - 'deltainfo.distance' is the distance from the base revision --
2105 # - 'deltainfo.distance' is the distance from the base revision --
2105 # bounding it limits the amount of I/O we need to do.
2106 # bounding it limits the amount of I/O we need to do.
2106 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2107 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2107 # deltas we need to apply -- bounding it limits the amount of CPU
2108 # deltas we need to apply -- bounding it limits the amount of CPU
2108 # we consume.
2109 # we consume.
2109
2110
2110 textlen = revinfo.textlen
2111 textlen = revinfo.textlen
2111 defaultmax = textlen * 4
2112 defaultmax = textlen * 4
2112 maxdist = self._maxdeltachainspan
2113 maxdist = self._maxdeltachainspan
2113 if not maxdist:
2114 if not maxdist:
2114 maxdist = deltainfo.distance # ensure the conditional pass
2115 maxdist = deltainfo.distance # ensure the conditional pass
2115 maxdist = max(maxdist, defaultmax)
2116 maxdist = max(maxdist, defaultmax)
2116 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2117 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2117 deltainfo.compresseddeltalen > textlen * 2 or
2118 deltainfo.compresseddeltalen > textlen * 2 or
2118 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2119 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2119 return False
2120 return False
2120
2121
2121 return True
2122 return True
2122
2123
2123 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2124 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2124 cachedelta, ifh, dfh, alwayscache=False,
2125 cachedelta, ifh, dfh, alwayscache=False,
2125 deltacomputer=None):
2126 deltacomputer=None):
2126 """internal function to add revisions to the log
2127 """internal function to add revisions to the log
2127
2128
2128 see addrevision for argument descriptions.
2129 see addrevision for argument descriptions.
2129
2130
2130 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2131 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2131
2132
2132 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2133 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2133 be used.
2134 be used.
2134
2135
2135 invariants:
2136 invariants:
2136 - rawtext is optional (can be None); if not set, cachedelta must be set.
2137 - rawtext is optional (can be None); if not set, cachedelta must be set.
2137 if both are set, they must correspond to each other.
2138 if both are set, they must correspond to each other.
2138 """
2139 """
2139 if node == nullid:
2140 if node == nullid:
2140 raise RevlogError(_("%s: attempt to add null revision") %
2141 raise RevlogError(_("%s: attempt to add null revision") %
2141 (self.indexfile))
2142 (self.indexfile))
2142 if node == wdirid or node in wdirfilenodeids:
2143 if node == wdirid or node in wdirfilenodeids:
2143 raise RevlogError(_("%s: attempt to add wdir revision") %
2144 raise RevlogError(_("%s: attempt to add wdir revision") %
2144 (self.indexfile))
2145 (self.indexfile))
2145
2146
2146 if self._inline:
2147 if self._inline:
2147 fh = ifh
2148 fh = ifh
2148 else:
2149 else:
2149 fh = dfh
2150 fh = dfh
2150
2151
2151 btext = [rawtext]
2152 btext = [rawtext]
2152
2153
2153 curr = len(self)
2154 curr = len(self)
2154 prev = curr - 1
2155 prev = curr - 1
2155 offset = self.end(prev)
2156 offset = self.end(prev)
2156 p1r, p2r = self.rev(p1), self.rev(p2)
2157 p1r, p2r = self.rev(p1), self.rev(p2)
2157
2158
2158 # full versions are inserted when the needed deltas
2159 # full versions are inserted when the needed deltas
2159 # become comparable to the uncompressed text
2160 # become comparable to the uncompressed text
2160 if rawtext is None:
2161 if rawtext is None:
2161 # need rawtext size, before changed by flag processors, which is
2162 # need rawtext size, before changed by flag processors, which is
2162 # the non-raw size. use revlog explicitly to avoid filelog's extra
2163 # the non-raw size. use revlog explicitly to avoid filelog's extra
2163 # logic that might remove metadata size.
2164 # logic that might remove metadata size.
2164 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2165 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2165 cachedelta[1])
2166 cachedelta[1])
2166 else:
2167 else:
2167 textlen = len(rawtext)
2168 textlen = len(rawtext)
2168
2169
2169 if deltacomputer is None:
2170 if deltacomputer is None:
2170 deltacomputer = _deltacomputer(self)
2171 deltacomputer = _deltacomputer(self)
2171
2172
2172 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2173 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2173
2174
2174 # no delta for flag processor revision (see "candelta" for why)
2175 # no delta for flag processor revision (see "candelta" for why)
2175 # not calling candelta since only one revision needs test, also to
2176 # not calling candelta since only one revision needs test, also to
2176 # avoid overhead fetching flags again.
2177 # avoid overhead fetching flags again.
2177 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2178 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2178 deltainfo = None
2179 deltainfo = None
2179 else:
2180 else:
2180 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2181 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2181
2182
2182 if deltainfo is not None:
2183 if deltainfo is not None:
2183 base = deltainfo.base
2184 base = deltainfo.base
2184 chainbase = deltainfo.chainbase
2185 chainbase = deltainfo.chainbase
2185 data = deltainfo.data
2186 data = deltainfo.data
2186 l = deltainfo.deltalen
2187 l = deltainfo.deltalen
2187 else:
2188 else:
2188 rawtext = deltacomputer.buildtext(revinfo, fh)
2189 rawtext = deltacomputer.buildtext(revinfo, fh)
2189 data = self.compress(rawtext)
2190 data = self.compress(rawtext)
2190 l = len(data[1]) + len(data[0])
2191 l = len(data[1]) + len(data[0])
2191 base = chainbase = curr
2192 base = chainbase = curr
2192
2193
2193 e = (offset_type(offset, flags), l, textlen,
2194 e = (offset_type(offset, flags), l, textlen,
2194 base, link, p1r, p2r, node)
2195 base, link, p1r, p2r, node)
2195 self.index.insert(-1, e)
2196 self.index.insert(-1, e)
2196 self.nodemap[node] = curr
2197 self.nodemap[node] = curr
2197
2198
2198 entry = self._io.packentry(e, self.node, self.version, curr)
2199 entry = self._io.packentry(e, self.node, self.version, curr)
2199 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2200 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2200
2201
2201 if alwayscache and rawtext is None:
2202 if alwayscache and rawtext is None:
2202 rawtext = deltacomputer._buildtext(revinfo, fh)
2203 rawtext = deltacomputer._buildtext(revinfo, fh)
2203
2204
2204 if type(rawtext) == bytes: # only accept immutable objects
2205 if type(rawtext) == bytes: # only accept immutable objects
2205 self._cache = (node, curr, rawtext)
2206 self._cache = (node, curr, rawtext)
2206 self._chainbasecache[curr] = chainbase
2207 self._chainbasecache[curr] = chainbase
2207 return node
2208 return node
2208
2209
2209 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2210 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2210 # Files opened in a+ mode have inconsistent behavior on various
2211 # Files opened in a+ mode have inconsistent behavior on various
2211 # platforms. Windows requires that a file positioning call be made
2212 # platforms. Windows requires that a file positioning call be made
2212 # when the file handle transitions between reads and writes. See
2213 # when the file handle transitions between reads and writes. See
2213 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2214 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2214 # platforms, Python or the platform itself can be buggy. Some versions
2215 # platforms, Python or the platform itself can be buggy. Some versions
2215 # of Solaris have been observed to not append at the end of the file
2216 # of Solaris have been observed to not append at the end of the file
2216 # if the file was seeked to before the end. See issue4943 for more.
2217 # if the file was seeked to before the end. See issue4943 for more.
2217 #
2218 #
2218 # We work around this issue by inserting a seek() before writing.
2219 # We work around this issue by inserting a seek() before writing.
2219 # Note: This is likely not necessary on Python 3.
2220 # Note: This is likely not necessary on Python 3.
2220 ifh.seek(0, os.SEEK_END)
2221 ifh.seek(0, os.SEEK_END)
2221 if dfh:
2222 if dfh:
2222 dfh.seek(0, os.SEEK_END)
2223 dfh.seek(0, os.SEEK_END)
2223
2224
2224 curr = len(self) - 1
2225 curr = len(self) - 1
2225 if not self._inline:
2226 if not self._inline:
2226 transaction.add(self.datafile, offset)
2227 transaction.add(self.datafile, offset)
2227 transaction.add(self.indexfile, curr * len(entry))
2228 transaction.add(self.indexfile, curr * len(entry))
2228 if data[0]:
2229 if data[0]:
2229 dfh.write(data[0])
2230 dfh.write(data[0])
2230 dfh.write(data[1])
2231 dfh.write(data[1])
2231 ifh.write(entry)
2232 ifh.write(entry)
2232 else:
2233 else:
2233 offset += curr * self._io.size
2234 offset += curr * self._io.size
2234 transaction.add(self.indexfile, offset, curr)
2235 transaction.add(self.indexfile, offset, curr)
2235 ifh.write(entry)
2236 ifh.write(entry)
2236 ifh.write(data[0])
2237 ifh.write(data[0])
2237 ifh.write(data[1])
2238 ifh.write(data[1])
2238 self._enforceinlinesize(transaction, ifh)
2239 self._enforceinlinesize(transaction, ifh)
2239
2240
2240 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2241 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2241 """
2242 """
2242 add a delta group
2243 add a delta group
2243
2244
2244 given a set of deltas, add them to the revision log. the
2245 given a set of deltas, add them to the revision log. the
2245 first delta is against its parent, which should be in our
2246 first delta is against its parent, which should be in our
2246 log, the rest are against the previous delta.
2247 log, the rest are against the previous delta.
2247
2248
2248 If ``addrevisioncb`` is defined, it will be called with arguments of
2249 If ``addrevisioncb`` is defined, it will be called with arguments of
2249 this revlog and the node that was added.
2250 this revlog and the node that was added.
2250 """
2251 """
2251
2252
2252 nodes = []
2253 nodes = []
2253
2254
2254 r = len(self)
2255 r = len(self)
2255 end = 0
2256 end = 0
2256 if r:
2257 if r:
2257 end = self.end(r - 1)
2258 end = self.end(r - 1)
2258 ifh = self._indexfp("a+")
2259 ifh = self._indexfp("a+")
2259 isize = r * self._io.size
2260 isize = r * self._io.size
2260 if self._inline:
2261 if self._inline:
2261 transaction.add(self.indexfile, end + isize, r)
2262 transaction.add(self.indexfile, end + isize, r)
2262 dfh = None
2263 dfh = None
2263 else:
2264 else:
2264 transaction.add(self.indexfile, isize, r)
2265 transaction.add(self.indexfile, isize, r)
2265 transaction.add(self.datafile, end)
2266 transaction.add(self.datafile, end)
2266 dfh = self._datafp("a+")
2267 dfh = self._datafp("a+")
2267 def flush():
2268 def flush():
2268 if dfh:
2269 if dfh:
2269 dfh.flush()
2270 dfh.flush()
2270 ifh.flush()
2271 ifh.flush()
2271 try:
2272 try:
2272 deltacomputer = _deltacomputer(self)
2273 deltacomputer = _deltacomputer(self)
2273 # loop through our set of deltas
2274 # loop through our set of deltas
2274 for data in deltas:
2275 for data in deltas:
2275 node, p1, p2, linknode, deltabase, delta, flags = data
2276 node, p1, p2, linknode, deltabase, delta, flags = data
2276 link = linkmapper(linknode)
2277 link = linkmapper(linknode)
2277 flags = flags or REVIDX_DEFAULT_FLAGS
2278 flags = flags or REVIDX_DEFAULT_FLAGS
2278
2279
2279 nodes.append(node)
2280 nodes.append(node)
2280
2281
2281 if node in self.nodemap:
2282 if node in self.nodemap:
2282 # this can happen if two branches make the same change
2283 # this can happen if two branches make the same change
2283 continue
2284 continue
2284
2285
2285 for p in (p1, p2):
2286 for p in (p1, p2):
2286 if p not in self.nodemap:
2287 if p not in self.nodemap:
2287 raise LookupError(p, self.indexfile,
2288 raise LookupError(p, self.indexfile,
2288 _('unknown parent'))
2289 _('unknown parent'))
2289
2290
2290 if deltabase not in self.nodemap:
2291 if deltabase not in self.nodemap:
2291 raise LookupError(deltabase, self.indexfile,
2292 raise LookupError(deltabase, self.indexfile,
2292 _('unknown delta base'))
2293 _('unknown delta base'))
2293
2294
2294 baserev = self.rev(deltabase)
2295 baserev = self.rev(deltabase)
2295
2296
2296 if baserev != nullrev and self.iscensored(baserev):
2297 if baserev != nullrev and self.iscensored(baserev):
2297 # if base is censored, delta must be full replacement in a
2298 # if base is censored, delta must be full replacement in a
2298 # single patch operation
2299 # single patch operation
2299 hlen = struct.calcsize(">lll")
2300 hlen = struct.calcsize(">lll")
2300 oldlen = self.rawsize(baserev)
2301 oldlen = self.rawsize(baserev)
2301 newlen = len(delta) - hlen
2302 newlen = len(delta) - hlen
2302 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2303 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2303 raise error.CensoredBaseError(self.indexfile,
2304 raise error.CensoredBaseError(self.indexfile,
2304 self.node(baserev))
2305 self.node(baserev))
2305
2306
2306 if not flags and self._peek_iscensored(baserev, delta, flush):
2307 if not flags and self._peek_iscensored(baserev, delta, flush):
2307 flags |= REVIDX_ISCENSORED
2308 flags |= REVIDX_ISCENSORED
2308
2309
2309 # We assume consumers of addrevisioncb will want to retrieve
2310 # We assume consumers of addrevisioncb will want to retrieve
2310 # the added revision, which will require a call to
2311 # the added revision, which will require a call to
2311 # revision(). revision() will fast path if there is a cache
2312 # revision(). revision() will fast path if there is a cache
2312 # hit. So, we tell _addrevision() to always cache in this case.
2313 # hit. So, we tell _addrevision() to always cache in this case.
2313 # We're only using addgroup() in the context of changegroup
2314 # We're only using addgroup() in the context of changegroup
2314 # generation so the revision data can always be handled as raw
2315 # generation so the revision data can always be handled as raw
2315 # by the flagprocessor.
2316 # by the flagprocessor.
2316 self._addrevision(node, None, transaction, link,
2317 self._addrevision(node, None, transaction, link,
2317 p1, p2, flags, (baserev, delta),
2318 p1, p2, flags, (baserev, delta),
2318 ifh, dfh,
2319 ifh, dfh,
2319 alwayscache=bool(addrevisioncb),
2320 alwayscache=bool(addrevisioncb),
2320 deltacomputer=deltacomputer)
2321 deltacomputer=deltacomputer)
2321
2322
2322 if addrevisioncb:
2323 if addrevisioncb:
2323 addrevisioncb(self, node)
2324 addrevisioncb(self, node)
2324
2325
2325 if not dfh and not self._inline:
2326 if not dfh and not self._inline:
2326 # addrevision switched from inline to conventional
2327 # addrevision switched from inline to conventional
2327 # reopen the index
2328 # reopen the index
2328 ifh.close()
2329 ifh.close()
2329 dfh = self._datafp("a+")
2330 dfh = self._datafp("a+")
2330 ifh = self._indexfp("a+")
2331 ifh = self._indexfp("a+")
2331 finally:
2332 finally:
2332 if dfh:
2333 if dfh:
2333 dfh.close()
2334 dfh.close()
2334 ifh.close()
2335 ifh.close()
2335
2336
2336 return nodes
2337 return nodes
2337
2338
2338 def iscensored(self, rev):
2339 def iscensored(self, rev):
2339 """Check if a file revision is censored."""
2340 """Check if a file revision is censored."""
2340 if not self._censorable:
2341 if not self._censorable:
2341 return False
2342 return False
2342
2343
2343 return self.flags(rev) & REVIDX_ISCENSORED
2344 return self.flags(rev) & REVIDX_ISCENSORED
2344
2345
2345 def _peek_iscensored(self, baserev, delta, flush):
2346 def _peek_iscensored(self, baserev, delta, flush):
2346 """Quickly check if a delta produces a censored revision."""
2347 """Quickly check if a delta produces a censored revision."""
2347 if not self._censorable:
2348 if not self._censorable:
2348 return False
2349 return False
2349
2350
2350 # Fragile heuristic: unless new file meta keys are added alphabetically
2351 # Fragile heuristic: unless new file meta keys are added alphabetically
2351 # preceding "censored", all censored revisions are prefixed by
2352 # preceding "censored", all censored revisions are prefixed by
2352 # "\1\ncensored:". A delta producing such a censored revision must be a
2353 # "\1\ncensored:". A delta producing such a censored revision must be a
2353 # full-replacement delta, so we inspect the first and only patch in the
2354 # full-replacement delta, so we inspect the first and only patch in the
2354 # delta for this prefix.
2355 # delta for this prefix.
2355 hlen = struct.calcsize(">lll")
2356 hlen = struct.calcsize(">lll")
2356 if len(delta) <= hlen:
2357 if len(delta) <= hlen:
2357 return False
2358 return False
2358
2359
2359 oldlen = self.rawsize(baserev)
2360 oldlen = self.rawsize(baserev)
2360 newlen = len(delta) - hlen
2361 newlen = len(delta) - hlen
2361 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2362 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2362 return False
2363 return False
2363
2364
2364 add = "\1\ncensored:"
2365 add = "\1\ncensored:"
2365 addlen = len(add)
2366 addlen = len(add)
2366 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2367 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2367
2368
2368 def getstrippoint(self, minlink):
2369 def getstrippoint(self, minlink):
2369 """find the minimum rev that must be stripped to strip the linkrev
2370 """find the minimum rev that must be stripped to strip the linkrev
2370
2371
2371 Returns a tuple containing the minimum rev and a set of all revs that
2372 Returns a tuple containing the minimum rev and a set of all revs that
2372 have linkrevs that will be broken by this strip.
2373 have linkrevs that will be broken by this strip.
2373 """
2374 """
2374 brokenrevs = set()
2375 brokenrevs = set()
2375 strippoint = len(self)
2376 strippoint = len(self)
2376
2377
2377 heads = {}
2378 heads = {}
2378 futurelargelinkrevs = set()
2379 futurelargelinkrevs = set()
2379 for head in self.headrevs():
2380 for head in self.headrevs():
2380 headlinkrev = self.linkrev(head)
2381 headlinkrev = self.linkrev(head)
2381 heads[head] = headlinkrev
2382 heads[head] = headlinkrev
2382 if headlinkrev >= minlink:
2383 if headlinkrev >= minlink:
2383 futurelargelinkrevs.add(headlinkrev)
2384 futurelargelinkrevs.add(headlinkrev)
2384
2385
2385 # This algorithm involves walking down the rev graph, starting at the
2386 # This algorithm involves walking down the rev graph, starting at the
2386 # heads. Since the revs are topologically sorted according to linkrev,
2387 # heads. Since the revs are topologically sorted according to linkrev,
2387 # once all head linkrevs are below the minlink, we know there are
2388 # once all head linkrevs are below the minlink, we know there are
2388 # no more revs that could have a linkrev greater than minlink.
2389 # no more revs that could have a linkrev greater than minlink.
2389 # So we can stop walking.
2390 # So we can stop walking.
2390 while futurelargelinkrevs:
2391 while futurelargelinkrevs:
2391 strippoint -= 1
2392 strippoint -= 1
2392 linkrev = heads.pop(strippoint)
2393 linkrev = heads.pop(strippoint)
2393
2394
2394 if linkrev < minlink:
2395 if linkrev < minlink:
2395 brokenrevs.add(strippoint)
2396 brokenrevs.add(strippoint)
2396 else:
2397 else:
2397 futurelargelinkrevs.remove(linkrev)
2398 futurelargelinkrevs.remove(linkrev)
2398
2399
2399 for p in self.parentrevs(strippoint):
2400 for p in self.parentrevs(strippoint):
2400 if p != nullrev:
2401 if p != nullrev:
2401 plinkrev = self.linkrev(p)
2402 plinkrev = self.linkrev(p)
2402 heads[p] = plinkrev
2403 heads[p] = plinkrev
2403 if plinkrev >= minlink:
2404 if plinkrev >= minlink:
2404 futurelargelinkrevs.add(plinkrev)
2405 futurelargelinkrevs.add(plinkrev)
2405
2406
2406 return strippoint, brokenrevs
2407 return strippoint, brokenrevs
2407
2408
2408 def strip(self, minlink, transaction):
2409 def strip(self, minlink, transaction):
2409 """truncate the revlog on the first revision with a linkrev >= minlink
2410 """truncate the revlog on the first revision with a linkrev >= minlink
2410
2411
2411 This function is called when we're stripping revision minlink and
2412 This function is called when we're stripping revision minlink and
2412 its descendants from the repository.
2413 its descendants from the repository.
2413
2414
2414 We have to remove all revisions with linkrev >= minlink, because
2415 We have to remove all revisions with linkrev >= minlink, because
2415 the equivalent changelog revisions will be renumbered after the
2416 the equivalent changelog revisions will be renumbered after the
2416 strip.
2417 strip.
2417
2418
2418 So we truncate the revlog on the first of these revisions, and
2419 So we truncate the revlog on the first of these revisions, and
2419 trust that the caller has saved the revisions that shouldn't be
2420 trust that the caller has saved the revisions that shouldn't be
2420 removed and that it'll re-add them after this truncation.
2421 removed and that it'll re-add them after this truncation.
2421 """
2422 """
2422 if len(self) == 0:
2423 if len(self) == 0:
2423 return
2424 return
2424
2425
2425 rev, _ = self.getstrippoint(minlink)
2426 rev, _ = self.getstrippoint(minlink)
2426 if rev == len(self):
2427 if rev == len(self):
2427 return
2428 return
2428
2429
2429 # first truncate the files on disk
2430 # first truncate the files on disk
2430 end = self.start(rev)
2431 end = self.start(rev)
2431 if not self._inline:
2432 if not self._inline:
2432 transaction.add(self.datafile, end)
2433 transaction.add(self.datafile, end)
2433 end = rev * self._io.size
2434 end = rev * self._io.size
2434 else:
2435 else:
2435 end += rev * self._io.size
2436 end += rev * self._io.size
2436
2437
2437 transaction.add(self.indexfile, end)
2438 transaction.add(self.indexfile, end)
2438
2439
2439 # then reset internal state in memory to forget those revisions
2440 # then reset internal state in memory to forget those revisions
2440 self._cache = None
2441 self._cache = None
2441 self._chaininfocache = {}
2442 self._chaininfocache = {}
2442 self._chunkclear()
2443 self._chunkclear()
2443 for x in xrange(rev, len(self)):
2444 for x in xrange(rev, len(self)):
2444 del self.nodemap[self.node(x)]
2445 del self.nodemap[self.node(x)]
2445
2446
2446 del self.index[rev:-1]
2447 del self.index[rev:-1]
2447 self._nodepos = None
2448 self._nodepos = None
2448
2449
2449 def checksize(self):
2450 def checksize(self):
2450 expected = 0
2451 expected = 0
2451 if len(self):
2452 if len(self):
2452 expected = max(0, self.end(len(self) - 1))
2453 expected = max(0, self.end(len(self) - 1))
2453
2454
2454 try:
2455 try:
2455 with self._datafp() as f:
2456 with self._datafp() as f:
2456 f.seek(0, 2)
2457 f.seek(0, 2)
2457 actual = f.tell()
2458 actual = f.tell()
2458 dd = actual - expected
2459 dd = actual - expected
2459 except IOError as inst:
2460 except IOError as inst:
2460 if inst.errno != errno.ENOENT:
2461 if inst.errno != errno.ENOENT:
2461 raise
2462 raise
2462 dd = 0
2463 dd = 0
2463
2464
2464 try:
2465 try:
2465 f = self.opener(self.indexfile)
2466 f = self.opener(self.indexfile)
2466 f.seek(0, 2)
2467 f.seek(0, 2)
2467 actual = f.tell()
2468 actual = f.tell()
2468 f.close()
2469 f.close()
2469 s = self._io.size
2470 s = self._io.size
2470 i = max(0, actual // s)
2471 i = max(0, actual // s)
2471 di = actual - (i * s)
2472 di = actual - (i * s)
2472 if self._inline:
2473 if self._inline:
2473 databytes = 0
2474 databytes = 0
2474 for r in self:
2475 for r in self:
2475 databytes += max(0, self.length(r))
2476 databytes += max(0, self.length(r))
2476 dd = 0
2477 dd = 0
2477 di = actual - len(self) * s - databytes
2478 di = actual - len(self) * s - databytes
2478 except IOError as inst:
2479 except IOError as inst:
2479 if inst.errno != errno.ENOENT:
2480 if inst.errno != errno.ENOENT:
2480 raise
2481 raise
2481 di = 0
2482 di = 0
2482
2483
2483 return (dd, di)
2484 return (dd, di)
2484
2485
2485 def files(self):
2486 def files(self):
2486 res = [self.indexfile]
2487 res = [self.indexfile]
2487 if not self._inline:
2488 if not self._inline:
2488 res.append(self.datafile)
2489 res.append(self.datafile)
2489 return res
2490 return res
2490
2491
2491 DELTAREUSEALWAYS = 'always'
2492 DELTAREUSEALWAYS = 'always'
2492 DELTAREUSESAMEREVS = 'samerevs'
2493 DELTAREUSESAMEREVS = 'samerevs'
2493 DELTAREUSENEVER = 'never'
2494 DELTAREUSENEVER = 'never'
2494
2495
2495 DELTAREUSEFULLADD = 'fulladd'
2496 DELTAREUSEFULLADD = 'fulladd'
2496
2497
2497 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2498 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2498
2499
2499 def clone(self, tr, destrevlog, addrevisioncb=None,
2500 def clone(self, tr, destrevlog, addrevisioncb=None,
2500 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2501 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2501 """Copy this revlog to another, possibly with format changes.
2502 """Copy this revlog to another, possibly with format changes.
2502
2503
2503 The destination revlog will contain the same revisions and nodes.
2504 The destination revlog will contain the same revisions and nodes.
2504 However, it may not be bit-for-bit identical due to e.g. delta encoding
2505 However, it may not be bit-for-bit identical due to e.g. delta encoding
2505 differences.
2506 differences.
2506
2507
2507 The ``deltareuse`` argument control how deltas from the existing revlog
2508 The ``deltareuse`` argument control how deltas from the existing revlog
2508 are preserved in the destination revlog. The argument can have the
2509 are preserved in the destination revlog. The argument can have the
2509 following values:
2510 following values:
2510
2511
2511 DELTAREUSEALWAYS
2512 DELTAREUSEALWAYS
2512 Deltas will always be reused (if possible), even if the destination
2513 Deltas will always be reused (if possible), even if the destination
2513 revlog would not select the same revisions for the delta. This is the
2514 revlog would not select the same revisions for the delta. This is the
2514 fastest mode of operation.
2515 fastest mode of operation.
2515 DELTAREUSESAMEREVS
2516 DELTAREUSESAMEREVS
2516 Deltas will be reused if the destination revlog would pick the same
2517 Deltas will be reused if the destination revlog would pick the same
2517 revisions for the delta. This mode strikes a balance between speed
2518 revisions for the delta. This mode strikes a balance between speed
2518 and optimization.
2519 and optimization.
2519 DELTAREUSENEVER
2520 DELTAREUSENEVER
2520 Deltas will never be reused. This is the slowest mode of execution.
2521 Deltas will never be reused. This is the slowest mode of execution.
2521 This mode can be used to recompute deltas (e.g. if the diff/delta
2522 This mode can be used to recompute deltas (e.g. if the diff/delta
2522 algorithm changes).
2523 algorithm changes).
2523
2524
2524 Delta computation can be slow, so the choice of delta reuse policy can
2525 Delta computation can be slow, so the choice of delta reuse policy can
2525 significantly affect run time.
2526 significantly affect run time.
2526
2527
2527 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2528 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2528 two extremes. Deltas will be reused if they are appropriate. But if the
2529 two extremes. Deltas will be reused if they are appropriate. But if the
2529 delta could choose a better revision, it will do so. This means if you
2530 delta could choose a better revision, it will do so. This means if you
2530 are converting a non-generaldelta revlog to a generaldelta revlog,
2531 are converting a non-generaldelta revlog to a generaldelta revlog,
2531 deltas will be recomputed if the delta's parent isn't a parent of the
2532 deltas will be recomputed if the delta's parent isn't a parent of the
2532 revision.
2533 revision.
2533
2534
2534 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2535 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2535 controls whether to compute deltas against both parents for merges.
2536 controls whether to compute deltas against both parents for merges.
2536 By default, the current default is used.
2537 By default, the current default is used.
2537 """
2538 """
2538 if deltareuse not in self.DELTAREUSEALL:
2539 if deltareuse not in self.DELTAREUSEALL:
2539 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2540 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2540
2541
2541 if len(destrevlog):
2542 if len(destrevlog):
2542 raise ValueError(_('destination revlog is not empty'))
2543 raise ValueError(_('destination revlog is not empty'))
2543
2544
2544 if getattr(self, 'filteredrevs', None):
2545 if getattr(self, 'filteredrevs', None):
2545 raise ValueError(_('source revlog has filtered revisions'))
2546 raise ValueError(_('source revlog has filtered revisions'))
2546 if getattr(destrevlog, 'filteredrevs', None):
2547 if getattr(destrevlog, 'filteredrevs', None):
2547 raise ValueError(_('destination revlog has filtered revisions'))
2548 raise ValueError(_('destination revlog has filtered revisions'))
2548
2549
2549 # lazydeltabase controls whether to reuse a cached delta, if possible.
2550 # lazydeltabase controls whether to reuse a cached delta, if possible.
2550 oldlazydeltabase = destrevlog._lazydeltabase
2551 oldlazydeltabase = destrevlog._lazydeltabase
2551 oldamd = destrevlog._aggressivemergedeltas
2552 oldamd = destrevlog._aggressivemergedeltas
2552
2553
2553 try:
2554 try:
2554 if deltareuse == self.DELTAREUSEALWAYS:
2555 if deltareuse == self.DELTAREUSEALWAYS:
2555 destrevlog._lazydeltabase = True
2556 destrevlog._lazydeltabase = True
2556 elif deltareuse == self.DELTAREUSESAMEREVS:
2557 elif deltareuse == self.DELTAREUSESAMEREVS:
2557 destrevlog._lazydeltabase = False
2558 destrevlog._lazydeltabase = False
2558
2559
2559 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2560 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2560
2561
2561 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2562 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2562 self.DELTAREUSESAMEREVS)
2563 self.DELTAREUSESAMEREVS)
2563
2564
2564 deltacomputer = _deltacomputer(destrevlog)
2565 deltacomputer = _deltacomputer(destrevlog)
2565 index = self.index
2566 index = self.index
2566 for rev in self:
2567 for rev in self:
2567 entry = index[rev]
2568 entry = index[rev]
2568
2569
2569 # Some classes override linkrev to take filtered revs into
2570 # Some classes override linkrev to take filtered revs into
2570 # account. Use raw entry from index.
2571 # account. Use raw entry from index.
2571 flags = entry[0] & 0xffff
2572 flags = entry[0] & 0xffff
2572 linkrev = entry[4]
2573 linkrev = entry[4]
2573 p1 = index[entry[5]][7]
2574 p1 = index[entry[5]][7]
2574 p2 = index[entry[6]][7]
2575 p2 = index[entry[6]][7]
2575 node = entry[7]
2576 node = entry[7]
2576
2577
2577 # (Possibly) reuse the delta from the revlog if allowed and
2578 # (Possibly) reuse the delta from the revlog if allowed and
2578 # the revlog chunk is a delta.
2579 # the revlog chunk is a delta.
2579 cachedelta = None
2580 cachedelta = None
2580 rawtext = None
2581 rawtext = None
2581 if populatecachedelta:
2582 if populatecachedelta:
2582 dp = self.deltaparent(rev)
2583 dp = self.deltaparent(rev)
2583 if dp != nullrev:
2584 if dp != nullrev:
2584 cachedelta = (dp, bytes(self._chunk(rev)))
2585 cachedelta = (dp, bytes(self._chunk(rev)))
2585
2586
2586 if not cachedelta:
2587 if not cachedelta:
2587 rawtext = self.revision(rev, raw=True)
2588 rawtext = self.revision(rev, raw=True)
2588
2589
2589
2590
2590 if deltareuse == self.DELTAREUSEFULLADD:
2591 if deltareuse == self.DELTAREUSEFULLADD:
2591 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2592 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2592 cachedelta=cachedelta,
2593 cachedelta=cachedelta,
2593 node=node, flags=flags,
2594 node=node, flags=flags,
2594 deltacomputer=deltacomputer)
2595 deltacomputer=deltacomputer)
2595 else:
2596 else:
2596 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2597 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2597 checkambig=False)
2598 checkambig=False)
2598 dfh = None
2599 dfh = None
2599 if not destrevlog._inline:
2600 if not destrevlog._inline:
2600 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2601 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2601 try:
2602 try:
2602 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2603 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2603 p2, flags, cachedelta, ifh, dfh,
2604 p2, flags, cachedelta, ifh, dfh,
2604 deltacomputer=deltacomputer)
2605 deltacomputer=deltacomputer)
2605 finally:
2606 finally:
2606 if dfh:
2607 if dfh:
2607 dfh.close()
2608 dfh.close()
2608 ifh.close()
2609 ifh.close()
2609
2610
2610 if addrevisioncb:
2611 if addrevisioncb:
2611 addrevisioncb(self, rev, node)
2612 addrevisioncb(self, rev, node)
2612 finally:
2613 finally:
2613 destrevlog._lazydeltabase = oldlazydeltabase
2614 destrevlog._lazydeltabase = oldlazydeltabase
2614 destrevlog._aggressivemergedeltas = oldamd
2615 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now