##// END OF EJS Templates
revlog: efficient implementation of 'descendant'...
Boris Feld -
r38532:6db38c9d default
parent child Browse files
Show More
@@ -1,2622 +1,2620 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 chainpayload = sum(length(r) for r in revs)
230 chainpayload = sum(length(r) for r in revs)
231
231
232 if deltachainspan:
232 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
233 density = chainpayload / float(deltachainspan)
234 else:
234 else:
235 density = 1.0
235 density = 1.0
236
236
237 # Store the gaps in a heap to have them sorted by decreasing size
237 # Store the gaps in a heap to have them sorted by decreasing size
238 gapsheap = []
238 gapsheap = []
239 heapq.heapify(gapsheap)
239 heapq.heapify(gapsheap)
240 prevend = None
240 prevend = None
241 for i, rev in enumerate(revs):
241 for i, rev in enumerate(revs):
242 revstart = start(rev)
242 revstart = start(rev)
243 revlen = length(rev)
243 revlen = length(rev)
244
244
245 # Skip empty revisions to form larger holes
245 # Skip empty revisions to form larger holes
246 if revlen == 0:
246 if revlen == 0:
247 continue
247 continue
248
248
249 if prevend is not None:
249 if prevend is not None:
250 gapsize = revstart - prevend
250 gapsize = revstart - prevend
251 # only consider holes that are large enough
251 # only consider holes that are large enough
252 if gapsize > revlog._srmingapsize:
252 if gapsize > revlog._srmingapsize:
253 heapq.heappush(gapsheap, (-gapsize, i))
253 heapq.heappush(gapsheap, (-gapsize, i))
254
254
255 prevend = revstart + revlen
255 prevend = revstart + revlen
256
256
257 # Collect the indices of the largest holes until the density is acceptable
257 # Collect the indices of the largest holes until the density is acceptable
258 indicesheap = []
258 indicesheap = []
259 heapq.heapify(indicesheap)
259 heapq.heapify(indicesheap)
260 while gapsheap and density < revlog._srdensitythreshold:
260 while gapsheap and density < revlog._srdensitythreshold:
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262
262
263 heapq.heappush(indicesheap, gapidx)
263 heapq.heappush(indicesheap, gapidx)
264
264
265 # the gap sizes are stored as negatives to be sorted decreasingly
265 # the gap sizes are stored as negatives to be sorted decreasingly
266 # by the heap
266 # by the heap
267 readdata -= (-oppgapsize)
267 readdata -= (-oppgapsize)
268 if readdata > 0:
268 if readdata > 0:
269 density = chainpayload / float(readdata)
269 density = chainpayload / float(readdata)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 # Cut the revs at collected indices
273 # Cut the revs at collected indices
274 previdx = 0
274 previdx = 0
275 while indicesheap:
275 while indicesheap:
276 idx = heapq.heappop(indicesheap)
276 idx = heapq.heappop(indicesheap)
277
277
278 chunk = _trimchunk(revlog, revs, previdx, idx)
278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 if chunk:
279 if chunk:
280 yield chunk
280 yield chunk
281
281
282 previdx = idx
282 previdx = idx
283
283
284 chunk = _trimchunk(revlog, revs, previdx)
284 chunk = _trimchunk(revlog, revs, previdx)
285 if chunk:
285 if chunk:
286 yield chunk
286 yield chunk
287
287
288 @attr.s(slots=True, frozen=True)
288 @attr.s(slots=True, frozen=True)
289 class _deltainfo(object):
289 class _deltainfo(object):
290 distance = attr.ib()
290 distance = attr.ib()
291 deltalen = attr.ib()
291 deltalen = attr.ib()
292 data = attr.ib()
292 data = attr.ib()
293 base = attr.ib()
293 base = attr.ib()
294 chainbase = attr.ib()
294 chainbase = attr.ib()
295 chainlen = attr.ib()
295 chainlen = attr.ib()
296 compresseddeltalen = attr.ib()
296 compresseddeltalen = attr.ib()
297
297
298 class _deltacomputer(object):
298 class _deltacomputer(object):
299 def __init__(self, revlog):
299 def __init__(self, revlog):
300 self.revlog = revlog
300 self.revlog = revlog
301
301
302 def _getcandidaterevs(self, p1, p2, cachedelta):
302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 """
303 """
304 Provides revisions that present an interest to be diffed against,
304 Provides revisions that present an interest to be diffed against,
305 grouped by level of easiness.
305 grouped by level of easiness.
306 """
306 """
307 revlog = self.revlog
307 revlog = self.revlog
308 gdelta = revlog._generaldelta
308 gdelta = revlog._generaldelta
309 curr = len(revlog)
309 curr = len(revlog)
310 prev = curr - 1
310 prev = curr - 1
311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
312
312
313 # should we try to build a delta?
313 # should we try to build a delta?
314 if prev != nullrev and revlog.storedeltachains:
314 if prev != nullrev and revlog.storedeltachains:
315 tested = set()
315 tested = set()
316 # This condition is true most of the time when processing
316 # This condition is true most of the time when processing
317 # changegroup data into a generaldelta repo. The only time it
317 # changegroup data into a generaldelta repo. The only time it
318 # isn't true is if this is the first revision in a delta chain
318 # isn't true is if this is the first revision in a delta chain
319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
320 if cachedelta and gdelta and revlog._lazydeltabase:
320 if cachedelta and gdelta and revlog._lazydeltabase:
321 # Assume what we received from the server is a good choice
321 # Assume what we received from the server is a good choice
322 # build delta will reuse the cache
322 # build delta will reuse the cache
323 yield (cachedelta[0],)
323 yield (cachedelta[0],)
324 tested.add(cachedelta[0])
324 tested.add(cachedelta[0])
325
325
326 if gdelta:
326 if gdelta:
327 # exclude already lazy tested base if any
327 # exclude already lazy tested base if any
328 parents = [p for p in (p1r, p2r)
328 parents = [p for p in (p1r, p2r)
329 if p != nullrev and p not in tested]
329 if p != nullrev and p not in tested]
330
330
331 if not revlog._aggressivemergedeltas and len(parents) == 2:
331 if not revlog._aggressivemergedeltas and len(parents) == 2:
332 parents.sort()
332 parents.sort()
333 # To minimize the chance of having to build a fulltext,
333 # To minimize the chance of having to build a fulltext,
334 # pick first whichever parent is closest to us (max rev)
334 # pick first whichever parent is closest to us (max rev)
335 yield (parents[1],)
335 yield (parents[1],)
336 # then the other one (min rev) if the first did not fit
336 # then the other one (min rev) if the first did not fit
337 yield (parents[0],)
337 yield (parents[0],)
338 tested.update(parents)
338 tested.update(parents)
339 elif len(parents) > 0:
339 elif len(parents) > 0:
340 # Test all parents (1 or 2), and keep the best candidate
340 # Test all parents (1 or 2), and keep the best candidate
341 yield parents
341 yield parents
342 tested.update(parents)
342 tested.update(parents)
343
343
344 if prev not in tested:
344 if prev not in tested:
345 # other approach failed try against prev to hopefully save us a
345 # other approach failed try against prev to hopefully save us a
346 # fulltext.
346 # fulltext.
347 yield (prev,)
347 yield (prev,)
348 tested.add(prev)
348 tested.add(prev)
349
349
350 def buildtext(self, revinfo, fh):
350 def buildtext(self, revinfo, fh):
351 """Builds a fulltext version of a revision
351 """Builds a fulltext version of a revision
352
352
353 revinfo: _revisioninfo instance that contains all needed info
353 revinfo: _revisioninfo instance that contains all needed info
354 fh: file handle to either the .i or the .d revlog file,
354 fh: file handle to either the .i or the .d revlog file,
355 depending on whether it is inlined or not
355 depending on whether it is inlined or not
356 """
356 """
357 btext = revinfo.btext
357 btext = revinfo.btext
358 if btext[0] is not None:
358 if btext[0] is not None:
359 return btext[0]
359 return btext[0]
360
360
361 revlog = self.revlog
361 revlog = self.revlog
362 cachedelta = revinfo.cachedelta
362 cachedelta = revinfo.cachedelta
363 flags = revinfo.flags
363 flags = revinfo.flags
364 node = revinfo.node
364 node = revinfo.node
365
365
366 baserev = cachedelta[0]
366 baserev = cachedelta[0]
367 delta = cachedelta[1]
367 delta = cachedelta[1]
368 # special case deltas which replace entire base; no need to decode
368 # special case deltas which replace entire base; no need to decode
369 # base revision. this neatly avoids censored bases, which throw when
369 # base revision. this neatly avoids censored bases, which throw when
370 # they're decoded.
370 # they're decoded.
371 hlen = struct.calcsize(">lll")
371 hlen = struct.calcsize(">lll")
372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
372 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
373 len(delta) - hlen):
373 len(delta) - hlen):
374 btext[0] = delta[hlen:]
374 btext[0] = delta[hlen:]
375 else:
375 else:
376 # deltabase is rawtext before changed by flag processors, which is
376 # deltabase is rawtext before changed by flag processors, which is
377 # equivalent to non-raw text
377 # equivalent to non-raw text
378 basetext = revlog.revision(baserev, _df=fh, raw=False)
378 basetext = revlog.revision(baserev, _df=fh, raw=False)
379 btext[0] = mdiff.patch(basetext, delta)
379 btext[0] = mdiff.patch(basetext, delta)
380
380
381 try:
381 try:
382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
382 res = revlog._processflags(btext[0], flags, 'read', raw=True)
383 btext[0], validatehash = res
383 btext[0], validatehash = res
384 if validatehash:
384 if validatehash:
385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
385 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
386 if flags & REVIDX_ISCENSORED:
386 if flags & REVIDX_ISCENSORED:
387 raise RevlogError(_('node %s is not censored') % node)
387 raise RevlogError(_('node %s is not censored') % node)
388 except CensoredNodeError:
388 except CensoredNodeError:
389 # must pass the censored index flag to add censored revisions
389 # must pass the censored index flag to add censored revisions
390 if not flags & REVIDX_ISCENSORED:
390 if not flags & REVIDX_ISCENSORED:
391 raise
391 raise
392 return btext[0]
392 return btext[0]
393
393
394 def _builddeltadiff(self, base, revinfo, fh):
394 def _builddeltadiff(self, base, revinfo, fh):
395 revlog = self.revlog
395 revlog = self.revlog
396 t = self.buildtext(revinfo, fh)
396 t = self.buildtext(revinfo, fh)
397 if revlog.iscensored(base):
397 if revlog.iscensored(base):
398 # deltas based on a censored revision must replace the
398 # deltas based on a censored revision must replace the
399 # full content in one patch, so delta works everywhere
399 # full content in one patch, so delta works everywhere
400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
400 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
401 delta = header + t
401 delta = header + t
402 else:
402 else:
403 ptext = revlog.revision(base, _df=fh, raw=True)
403 ptext = revlog.revision(base, _df=fh, raw=True)
404 delta = mdiff.textdiff(ptext, t)
404 delta = mdiff.textdiff(ptext, t)
405
405
406 return delta
406 return delta
407
407
408 def _builddeltainfo(self, revinfo, base, fh):
408 def _builddeltainfo(self, revinfo, base, fh):
409 # can we use the cached delta?
409 # can we use the cached delta?
410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
410 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
411 delta = revinfo.cachedelta[1]
411 delta = revinfo.cachedelta[1]
412 else:
412 else:
413 delta = self._builddeltadiff(base, revinfo, fh)
413 delta = self._builddeltadiff(base, revinfo, fh)
414 revlog = self.revlog
414 revlog = self.revlog
415 header, data = revlog.compress(delta)
415 header, data = revlog.compress(delta)
416 deltalen = len(header) + len(data)
416 deltalen = len(header) + len(data)
417 chainbase = revlog.chainbase(base)
417 chainbase = revlog.chainbase(base)
418 offset = revlog.end(len(revlog) - 1)
418 offset = revlog.end(len(revlog) - 1)
419 dist = deltalen + offset - revlog.start(chainbase)
419 dist = deltalen + offset - revlog.start(chainbase)
420 if revlog._generaldelta:
420 if revlog._generaldelta:
421 deltabase = base
421 deltabase = base
422 else:
422 else:
423 deltabase = chainbase
423 deltabase = chainbase
424 chainlen, compresseddeltalen = revlog._chaininfo(base)
424 chainlen, compresseddeltalen = revlog._chaininfo(base)
425 chainlen += 1
425 chainlen += 1
426 compresseddeltalen += deltalen
426 compresseddeltalen += deltalen
427 return _deltainfo(dist, deltalen, (header, data), deltabase,
427 return _deltainfo(dist, deltalen, (header, data), deltabase,
428 chainbase, chainlen, compresseddeltalen)
428 chainbase, chainlen, compresseddeltalen)
429
429
430 def finddeltainfo(self, revinfo, fh):
430 def finddeltainfo(self, revinfo, fh):
431 """Find an acceptable delta against a candidate revision
431 """Find an acceptable delta against a candidate revision
432
432
433 revinfo: information about the revision (instance of _revisioninfo)
433 revinfo: information about the revision (instance of _revisioninfo)
434 fh: file handle to either the .i or the .d revlog file,
434 fh: file handle to either the .i or the .d revlog file,
435 depending on whether it is inlined or not
435 depending on whether it is inlined or not
436
436
437 Returns the first acceptable candidate revision, as ordered by
437 Returns the first acceptable candidate revision, as ordered by
438 _getcandidaterevs
438 _getcandidaterevs
439 """
439 """
440 cachedelta = revinfo.cachedelta
440 cachedelta = revinfo.cachedelta
441 p1 = revinfo.p1
441 p1 = revinfo.p1
442 p2 = revinfo.p2
442 p2 = revinfo.p2
443 revlog = self.revlog
443 revlog = self.revlog
444
444
445 deltainfo = None
445 deltainfo = None
446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
446 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
447 nominateddeltas = []
447 nominateddeltas = []
448 for candidaterev in candidaterevs:
448 for candidaterev in candidaterevs:
449 # no delta for rawtext-changing revs (see "candelta" for why)
449 # no delta for rawtext-changing revs (see "candelta" for why)
450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
450 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
451 continue
451 continue
452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
452 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
453 if revlog._isgooddeltainfo(candidatedelta, revinfo):
454 nominateddeltas.append(candidatedelta)
454 nominateddeltas.append(candidatedelta)
455 if nominateddeltas:
455 if nominateddeltas:
456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
456 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
457 break
457 break
458
458
459 return deltainfo
459 return deltainfo
460
460
461 @attr.s(slots=True, frozen=True)
461 @attr.s(slots=True, frozen=True)
462 class _revisioninfo(object):
462 class _revisioninfo(object):
463 """Information about a revision that allows building its fulltext
463 """Information about a revision that allows building its fulltext
464 node: expected hash of the revision
464 node: expected hash of the revision
465 p1, p2: parent revs of the revision
465 p1, p2: parent revs of the revision
466 btext: built text cache consisting of a one-element list
466 btext: built text cache consisting of a one-element list
467 cachedelta: (baserev, uncompressed_delta) or None
467 cachedelta: (baserev, uncompressed_delta) or None
468 flags: flags associated to the revision storage
468 flags: flags associated to the revision storage
469
469
470 One of btext[0] or cachedelta must be set.
470 One of btext[0] or cachedelta must be set.
471 """
471 """
472 node = attr.ib()
472 node = attr.ib()
473 p1 = attr.ib()
473 p1 = attr.ib()
474 p2 = attr.ib()
474 p2 = attr.ib()
475 btext = attr.ib()
475 btext = attr.ib()
476 textlen = attr.ib()
476 textlen = attr.ib()
477 cachedelta = attr.ib()
477 cachedelta = attr.ib()
478 flags = attr.ib()
478 flags = attr.ib()
479
479
480 # index v0:
480 # index v0:
481 # 4 bytes: offset
481 # 4 bytes: offset
482 # 4 bytes: compressed length
482 # 4 bytes: compressed length
483 # 4 bytes: base rev
483 # 4 bytes: base rev
484 # 4 bytes: link rev
484 # 4 bytes: link rev
485 # 20 bytes: parent 1 nodeid
485 # 20 bytes: parent 1 nodeid
486 # 20 bytes: parent 2 nodeid
486 # 20 bytes: parent 2 nodeid
487 # 20 bytes: nodeid
487 # 20 bytes: nodeid
488 indexformatv0 = struct.Struct(">4l20s20s20s")
488 indexformatv0 = struct.Struct(">4l20s20s20s")
489 indexformatv0_pack = indexformatv0.pack
489 indexformatv0_pack = indexformatv0.pack
490 indexformatv0_unpack = indexformatv0.unpack
490 indexformatv0_unpack = indexformatv0.unpack
491
491
492 class revlogoldio(object):
492 class revlogoldio(object):
493 def __init__(self):
493 def __init__(self):
494 self.size = indexformatv0.size
494 self.size = indexformatv0.size
495
495
496 def parseindex(self, data, inline):
496 def parseindex(self, data, inline):
497 s = self.size
497 s = self.size
498 index = []
498 index = []
499 nodemap = {nullid: nullrev}
499 nodemap = {nullid: nullrev}
500 n = off = 0
500 n = off = 0
501 l = len(data)
501 l = len(data)
502 while off + s <= l:
502 while off + s <= l:
503 cur = data[off:off + s]
503 cur = data[off:off + s]
504 off += s
504 off += s
505 e = indexformatv0_unpack(cur)
505 e = indexformatv0_unpack(cur)
506 # transform to revlogv1 format
506 # transform to revlogv1 format
507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
507 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
508 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
509 index.append(e2)
509 index.append(e2)
510 nodemap[e[6]] = n
510 nodemap[e[6]] = n
511 n += 1
511 n += 1
512
512
513 # add the magic null revision at -1
513 # add the magic null revision at -1
514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
514 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
515
515
516 return index, nodemap, None
516 return index, nodemap, None
517
517
518 def packentry(self, entry, node, version, rev):
518 def packentry(self, entry, node, version, rev):
519 if gettype(entry[0]):
519 if gettype(entry[0]):
520 raise RevlogError(_('index entry flags need revlog version 1'))
520 raise RevlogError(_('index entry flags need revlog version 1'))
521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
521 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
522 node(entry[5]), node(entry[6]), entry[7])
522 node(entry[5]), node(entry[6]), entry[7])
523 return indexformatv0_pack(*e2)
523 return indexformatv0_pack(*e2)
524
524
525 # index ng:
525 # index ng:
526 # 6 bytes: offset
526 # 6 bytes: offset
527 # 2 bytes: flags
527 # 2 bytes: flags
528 # 4 bytes: compressed length
528 # 4 bytes: compressed length
529 # 4 bytes: uncompressed length
529 # 4 bytes: uncompressed length
530 # 4 bytes: base rev
530 # 4 bytes: base rev
531 # 4 bytes: link rev
531 # 4 bytes: link rev
532 # 4 bytes: parent 1 rev
532 # 4 bytes: parent 1 rev
533 # 4 bytes: parent 2 rev
533 # 4 bytes: parent 2 rev
534 # 32 bytes: nodeid
534 # 32 bytes: nodeid
535 indexformatng = struct.Struct(">Qiiiiii20s12x")
535 indexformatng = struct.Struct(">Qiiiiii20s12x")
536 indexformatng_pack = indexformatng.pack
536 indexformatng_pack = indexformatng.pack
537 versionformat = struct.Struct(">I")
537 versionformat = struct.Struct(">I")
538 versionformat_pack = versionformat.pack
538 versionformat_pack = versionformat.pack
539 versionformat_unpack = versionformat.unpack
539 versionformat_unpack = versionformat.unpack
540
540
541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
541 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
542 # signed integer)
542 # signed integer)
543 _maxentrysize = 0x7fffffff
543 _maxentrysize = 0x7fffffff
544
544
545 class revlogio(object):
545 class revlogio(object):
546 def __init__(self):
546 def __init__(self):
547 self.size = indexformatng.size
547 self.size = indexformatng.size
548
548
549 def parseindex(self, data, inline):
549 def parseindex(self, data, inline):
550 # call the C implementation to parse the index data
550 # call the C implementation to parse the index data
551 index, cache = parsers.parse_index2(data, inline)
551 index, cache = parsers.parse_index2(data, inline)
552 return index, getattr(index, 'nodemap', None), cache
552 return index, getattr(index, 'nodemap', None), cache
553
553
554 def packentry(self, entry, node, version, rev):
554 def packentry(self, entry, node, version, rev):
555 p = indexformatng_pack(*entry)
555 p = indexformatng_pack(*entry)
556 if rev == 0:
556 if rev == 0:
557 p = versionformat_pack(version) + p[4:]
557 p = versionformat_pack(version) + p[4:]
558 return p
558 return p
559
559
560 class revlog(object):
560 class revlog(object):
561 """
561 """
562 the underlying revision storage object
562 the underlying revision storage object
563
563
564 A revlog consists of two parts, an index and the revision data.
564 A revlog consists of two parts, an index and the revision data.
565
565
566 The index is a file with a fixed record size containing
566 The index is a file with a fixed record size containing
567 information on each revision, including its nodeid (hash), the
567 information on each revision, including its nodeid (hash), the
568 nodeids of its parents, the position and offset of its data within
568 nodeids of its parents, the position and offset of its data within
569 the data file, and the revision it's based on. Finally, each entry
569 the data file, and the revision it's based on. Finally, each entry
570 contains a linkrev entry that can serve as a pointer to external
570 contains a linkrev entry that can serve as a pointer to external
571 data.
571 data.
572
572
573 The revision data itself is a linear collection of data chunks.
573 The revision data itself is a linear collection of data chunks.
574 Each chunk represents a revision and is usually represented as a
574 Each chunk represents a revision and is usually represented as a
575 delta against the previous chunk. To bound lookup time, runs of
575 delta against the previous chunk. To bound lookup time, runs of
576 deltas are limited to about 2 times the length of the original
576 deltas are limited to about 2 times the length of the original
577 version data. This makes retrieval of a version proportional to
577 version data. This makes retrieval of a version proportional to
578 its size, or O(1) relative to the number of revisions.
578 its size, or O(1) relative to the number of revisions.
579
579
580 Both pieces of the revlog are written to in an append-only
580 Both pieces of the revlog are written to in an append-only
581 fashion, which means we never need to rewrite a file to insert or
581 fashion, which means we never need to rewrite a file to insert or
582 remove data, and can use some simple techniques to avoid the need
582 remove data, and can use some simple techniques to avoid the need
583 for locking while reading.
583 for locking while reading.
584
584
585 If checkambig, indexfile is opened with checkambig=True at
585 If checkambig, indexfile is opened with checkambig=True at
586 writing, to avoid file stat ambiguity.
586 writing, to avoid file stat ambiguity.
587
587
588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 If mmaplargeindex is True, and an mmapindexthreshold is set, the
589 index will be mmapped rather than read if it is larger than the
589 index will be mmapped rather than read if it is larger than the
590 configured threshold.
590 configured threshold.
591
591
592 If censorable is True, the revlog can have censored revisions.
592 If censorable is True, the revlog can have censored revisions.
593 """
593 """
594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
594 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
595 mmaplargeindex=False, censorable=False):
595 mmaplargeindex=False, censorable=False):
596 """
596 """
597 create a revlog object
597 create a revlog object
598
598
599 opener is a function that abstracts the file opening operation
599 opener is a function that abstracts the file opening operation
600 and can be used to implement COW semantics or the like.
600 and can be used to implement COW semantics or the like.
601 """
601 """
602 self.indexfile = indexfile
602 self.indexfile = indexfile
603 self.datafile = datafile or (indexfile[:-2] + ".d")
603 self.datafile = datafile or (indexfile[:-2] + ".d")
604 self.opener = opener
604 self.opener = opener
605 # When True, indexfile is opened with checkambig=True at writing, to
605 # When True, indexfile is opened with checkambig=True at writing, to
606 # avoid file stat ambiguity.
606 # avoid file stat ambiguity.
607 self._checkambig = checkambig
607 self._checkambig = checkambig
608 self._censorable = censorable
608 self._censorable = censorable
609 # 3-tuple of (node, rev, text) for a raw revision.
609 # 3-tuple of (node, rev, text) for a raw revision.
610 self._cache = None
610 self._cache = None
611 # Maps rev to chain base rev.
611 # Maps rev to chain base rev.
612 self._chainbasecache = util.lrucachedict(100)
612 self._chainbasecache = util.lrucachedict(100)
613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
613 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
614 self._chunkcache = (0, '')
614 self._chunkcache = (0, '')
615 # How much data to read and cache into the raw revlog data cache.
615 # How much data to read and cache into the raw revlog data cache.
616 self._chunkcachesize = 65536
616 self._chunkcachesize = 65536
617 self._maxchainlen = None
617 self._maxchainlen = None
618 self._aggressivemergedeltas = False
618 self._aggressivemergedeltas = False
619 self.index = []
619 self.index = []
620 # Mapping of partial identifiers to full nodes.
620 # Mapping of partial identifiers to full nodes.
621 self._pcache = {}
621 self._pcache = {}
622 # Mapping of revision integer to full node.
622 # Mapping of revision integer to full node.
623 self._nodecache = {nullid: nullrev}
623 self._nodecache = {nullid: nullrev}
624 self._nodepos = None
624 self._nodepos = None
625 self._compengine = 'zlib'
625 self._compengine = 'zlib'
626 self._maxdeltachainspan = -1
626 self._maxdeltachainspan = -1
627 self._withsparseread = False
627 self._withsparseread = False
628 self._srdensitythreshold = 0.25
628 self._srdensitythreshold = 0.25
629 self._srmingapsize = 262144
629 self._srmingapsize = 262144
630
630
631 mmapindexthreshold = None
631 mmapindexthreshold = None
632 v = REVLOG_DEFAULT_VERSION
632 v = REVLOG_DEFAULT_VERSION
633 opts = getattr(opener, 'options', None)
633 opts = getattr(opener, 'options', None)
634 if opts is not None:
634 if opts is not None:
635 if 'revlogv2' in opts:
635 if 'revlogv2' in opts:
636 # version 2 revlogs always use generaldelta.
636 # version 2 revlogs always use generaldelta.
637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
637 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
638 elif 'revlogv1' in opts:
638 elif 'revlogv1' in opts:
639 if 'generaldelta' in opts:
639 if 'generaldelta' in opts:
640 v |= FLAG_GENERALDELTA
640 v |= FLAG_GENERALDELTA
641 else:
641 else:
642 v = 0
642 v = 0
643 if 'chunkcachesize' in opts:
643 if 'chunkcachesize' in opts:
644 self._chunkcachesize = opts['chunkcachesize']
644 self._chunkcachesize = opts['chunkcachesize']
645 if 'maxchainlen' in opts:
645 if 'maxchainlen' in opts:
646 self._maxchainlen = opts['maxchainlen']
646 self._maxchainlen = opts['maxchainlen']
647 if 'aggressivemergedeltas' in opts:
647 if 'aggressivemergedeltas' in opts:
648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
648 self._aggressivemergedeltas = opts['aggressivemergedeltas']
649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
649 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
650 if 'compengine' in opts:
650 if 'compengine' in opts:
651 self._compengine = opts['compengine']
651 self._compengine = opts['compengine']
652 if 'maxdeltachainspan' in opts:
652 if 'maxdeltachainspan' in opts:
653 self._maxdeltachainspan = opts['maxdeltachainspan']
653 self._maxdeltachainspan = opts['maxdeltachainspan']
654 if mmaplargeindex and 'mmapindexthreshold' in opts:
654 if mmaplargeindex and 'mmapindexthreshold' in opts:
655 mmapindexthreshold = opts['mmapindexthreshold']
655 mmapindexthreshold = opts['mmapindexthreshold']
656 self._withsparseread = bool(opts.get('with-sparse-read', False))
656 self._withsparseread = bool(opts.get('with-sparse-read', False))
657 if 'sparse-read-density-threshold' in opts:
657 if 'sparse-read-density-threshold' in opts:
658 self._srdensitythreshold = opts['sparse-read-density-threshold']
658 self._srdensitythreshold = opts['sparse-read-density-threshold']
659 if 'sparse-read-min-gap-size' in opts:
659 if 'sparse-read-min-gap-size' in opts:
660 self._srmingapsize = opts['sparse-read-min-gap-size']
660 self._srmingapsize = opts['sparse-read-min-gap-size']
661
661
662 if self._chunkcachesize <= 0:
662 if self._chunkcachesize <= 0:
663 raise RevlogError(_('revlog chunk cache size %r is not greater '
663 raise RevlogError(_('revlog chunk cache size %r is not greater '
664 'than 0') % self._chunkcachesize)
664 'than 0') % self._chunkcachesize)
665 elif self._chunkcachesize & (self._chunkcachesize - 1):
665 elif self._chunkcachesize & (self._chunkcachesize - 1):
666 raise RevlogError(_('revlog chunk cache size %r is not a power '
666 raise RevlogError(_('revlog chunk cache size %r is not a power '
667 'of 2') % self._chunkcachesize)
667 'of 2') % self._chunkcachesize)
668
668
669 indexdata = ''
669 indexdata = ''
670 self._initempty = True
670 self._initempty = True
671 try:
671 try:
672 with self._indexfp() as f:
672 with self._indexfp() as f:
673 if (mmapindexthreshold is not None and
673 if (mmapindexthreshold is not None and
674 self.opener.fstat(f).st_size >= mmapindexthreshold):
674 self.opener.fstat(f).st_size >= mmapindexthreshold):
675 indexdata = util.buffer(util.mmapread(f))
675 indexdata = util.buffer(util.mmapread(f))
676 else:
676 else:
677 indexdata = f.read()
677 indexdata = f.read()
678 if len(indexdata) > 0:
678 if len(indexdata) > 0:
679 v = versionformat_unpack(indexdata[:4])[0]
679 v = versionformat_unpack(indexdata[:4])[0]
680 self._initempty = False
680 self._initempty = False
681 except IOError as inst:
681 except IOError as inst:
682 if inst.errno != errno.ENOENT:
682 if inst.errno != errno.ENOENT:
683 raise
683 raise
684
684
685 self.version = v
685 self.version = v
686 self._inline = v & FLAG_INLINE_DATA
686 self._inline = v & FLAG_INLINE_DATA
687 self._generaldelta = v & FLAG_GENERALDELTA
687 self._generaldelta = v & FLAG_GENERALDELTA
688 flags = v & ~0xFFFF
688 flags = v & ~0xFFFF
689 fmt = v & 0xFFFF
689 fmt = v & 0xFFFF
690 if fmt == REVLOGV0:
690 if fmt == REVLOGV0:
691 if flags:
691 if flags:
692 raise RevlogError(_('unknown flags (%#04x) in version %d '
692 raise RevlogError(_('unknown flags (%#04x) in version %d '
693 'revlog %s') %
693 'revlog %s') %
694 (flags >> 16, fmt, self.indexfile))
694 (flags >> 16, fmt, self.indexfile))
695 elif fmt == REVLOGV1:
695 elif fmt == REVLOGV1:
696 if flags & ~REVLOGV1_FLAGS:
696 if flags & ~REVLOGV1_FLAGS:
697 raise RevlogError(_('unknown flags (%#04x) in version %d '
697 raise RevlogError(_('unknown flags (%#04x) in version %d '
698 'revlog %s') %
698 'revlog %s') %
699 (flags >> 16, fmt, self.indexfile))
699 (flags >> 16, fmt, self.indexfile))
700 elif fmt == REVLOGV2:
700 elif fmt == REVLOGV2:
701 if flags & ~REVLOGV2_FLAGS:
701 if flags & ~REVLOGV2_FLAGS:
702 raise RevlogError(_('unknown flags (%#04x) in version %d '
702 raise RevlogError(_('unknown flags (%#04x) in version %d '
703 'revlog %s') %
703 'revlog %s') %
704 (flags >> 16, fmt, self.indexfile))
704 (flags >> 16, fmt, self.indexfile))
705 else:
705 else:
706 raise RevlogError(_('unknown version (%d) in revlog %s') %
706 raise RevlogError(_('unknown version (%d) in revlog %s') %
707 (fmt, self.indexfile))
707 (fmt, self.indexfile))
708
708
709 self.storedeltachains = True
709 self.storedeltachains = True
710
710
711 self._io = revlogio()
711 self._io = revlogio()
712 if self.version == REVLOGV0:
712 if self.version == REVLOGV0:
713 self._io = revlogoldio()
713 self._io = revlogoldio()
714 try:
714 try:
715 d = self._io.parseindex(indexdata, self._inline)
715 d = self._io.parseindex(indexdata, self._inline)
716 except (ValueError, IndexError):
716 except (ValueError, IndexError):
717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
717 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
718 self.index, nodemap, self._chunkcache = d
718 self.index, nodemap, self._chunkcache = d
719 if nodemap is not None:
719 if nodemap is not None:
720 self.nodemap = self._nodecache = nodemap
720 self.nodemap = self._nodecache = nodemap
721 if not self._chunkcache:
721 if not self._chunkcache:
722 self._chunkclear()
722 self._chunkclear()
723 # revnum -> (chain-length, sum-delta-length)
723 # revnum -> (chain-length, sum-delta-length)
724 self._chaininfocache = {}
724 self._chaininfocache = {}
725 # revlog header -> revlog compressor
725 # revlog header -> revlog compressor
726 self._decompressors = {}
726 self._decompressors = {}
727
727
728 @util.propertycache
728 @util.propertycache
729 def _compressor(self):
729 def _compressor(self):
730 return util.compengines[self._compengine].revlogcompressor()
730 return util.compengines[self._compengine].revlogcompressor()
731
731
732 def _indexfp(self, mode='r'):
732 def _indexfp(self, mode='r'):
733 """file object for the revlog's index file"""
733 """file object for the revlog's index file"""
734 args = {r'mode': mode}
734 args = {r'mode': mode}
735 if mode != 'r':
735 if mode != 'r':
736 args[r'checkambig'] = self._checkambig
736 args[r'checkambig'] = self._checkambig
737 if mode == 'w':
737 if mode == 'w':
738 args[r'atomictemp'] = True
738 args[r'atomictemp'] = True
739 return self.opener(self.indexfile, **args)
739 return self.opener(self.indexfile, **args)
740
740
741 def _datafp(self, mode='r'):
741 def _datafp(self, mode='r'):
742 """file object for the revlog's data file"""
742 """file object for the revlog's data file"""
743 return self.opener(self.datafile, mode=mode)
743 return self.opener(self.datafile, mode=mode)
744
744
745 @contextlib.contextmanager
745 @contextlib.contextmanager
746 def _datareadfp(self, existingfp=None):
746 def _datareadfp(self, existingfp=None):
747 """file object suitable to read data"""
747 """file object suitable to read data"""
748 if existingfp is not None:
748 if existingfp is not None:
749 yield existingfp
749 yield existingfp
750 else:
750 else:
751 if self._inline:
751 if self._inline:
752 func = self._indexfp
752 func = self._indexfp
753 else:
753 else:
754 func = self._datafp
754 func = self._datafp
755 with func() as fp:
755 with func() as fp:
756 yield fp
756 yield fp
757
757
758 def tip(self):
758 def tip(self):
759 return self.node(len(self.index) - 2)
759 return self.node(len(self.index) - 2)
760 def __contains__(self, rev):
760 def __contains__(self, rev):
761 return 0 <= rev < len(self)
761 return 0 <= rev < len(self)
762 def __len__(self):
762 def __len__(self):
763 return len(self.index) - 1
763 return len(self.index) - 1
764 def __iter__(self):
764 def __iter__(self):
765 return iter(xrange(len(self)))
765 return iter(xrange(len(self)))
766 def revs(self, start=0, stop=None):
766 def revs(self, start=0, stop=None):
767 """iterate over all rev in this revlog (from start to stop)"""
767 """iterate over all rev in this revlog (from start to stop)"""
768 step = 1
768 step = 1
769 if stop is not None:
769 if stop is not None:
770 if start > stop:
770 if start > stop:
771 step = -1
771 step = -1
772 stop += step
772 stop += step
773 else:
773 else:
774 stop = len(self)
774 stop = len(self)
775 return xrange(start, stop, step)
775 return xrange(start, stop, step)
776
776
777 @util.propertycache
777 @util.propertycache
778 def nodemap(self):
778 def nodemap(self):
779 self.rev(self.node(0))
779 self.rev(self.node(0))
780 return self._nodecache
780 return self._nodecache
781
781
782 def hasnode(self, node):
782 def hasnode(self, node):
783 try:
783 try:
784 self.rev(node)
784 self.rev(node)
785 return True
785 return True
786 except KeyError:
786 except KeyError:
787 return False
787 return False
788
788
789 def candelta(self, baserev, rev):
789 def candelta(self, baserev, rev):
790 """whether two revisions (baserev, rev) can be delta-ed or not"""
790 """whether two revisions (baserev, rev) can be delta-ed or not"""
791 # Disable delta if either rev requires a content-changing flag
791 # Disable delta if either rev requires a content-changing flag
792 # processor (ex. LFS). This is because such flag processor can alter
792 # processor (ex. LFS). This is because such flag processor can alter
793 # the rawtext content that the delta will be based on, and two clients
793 # the rawtext content that the delta will be based on, and two clients
794 # could have a same revlog node with different flags (i.e. different
794 # could have a same revlog node with different flags (i.e. different
795 # rawtext contents) and the delta could be incompatible.
795 # rawtext contents) and the delta could be incompatible.
796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
796 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
797 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
798 return False
798 return False
799 return True
799 return True
800
800
801 def clearcaches(self):
801 def clearcaches(self):
802 self._cache = None
802 self._cache = None
803 self._chainbasecache.clear()
803 self._chainbasecache.clear()
804 self._chunkcache = (0, '')
804 self._chunkcache = (0, '')
805 self._pcache = {}
805 self._pcache = {}
806
806
807 try:
807 try:
808 self._nodecache.clearcaches()
808 self._nodecache.clearcaches()
809 except AttributeError:
809 except AttributeError:
810 self._nodecache = {nullid: nullrev}
810 self._nodecache = {nullid: nullrev}
811 self._nodepos = None
811 self._nodepos = None
812
812
813 def rev(self, node):
813 def rev(self, node):
814 try:
814 try:
815 return self._nodecache[node]
815 return self._nodecache[node]
816 except TypeError:
816 except TypeError:
817 raise
817 raise
818 except RevlogError:
818 except RevlogError:
819 # parsers.c radix tree lookup failed
819 # parsers.c radix tree lookup failed
820 if node == wdirid or node in wdirfilenodeids:
820 if node == wdirid or node in wdirfilenodeids:
821 raise error.WdirUnsupported
821 raise error.WdirUnsupported
822 raise LookupError(node, self.indexfile, _('no node'))
822 raise LookupError(node, self.indexfile, _('no node'))
823 except KeyError:
823 except KeyError:
824 # pure python cache lookup failed
824 # pure python cache lookup failed
825 n = self._nodecache
825 n = self._nodecache
826 i = self.index
826 i = self.index
827 p = self._nodepos
827 p = self._nodepos
828 if p is None:
828 if p is None:
829 p = len(i) - 2
829 p = len(i) - 2
830 else:
830 else:
831 assert p < len(i)
831 assert p < len(i)
832 for r in xrange(p, -1, -1):
832 for r in xrange(p, -1, -1):
833 v = i[r][7]
833 v = i[r][7]
834 n[v] = r
834 n[v] = r
835 if v == node:
835 if v == node:
836 self._nodepos = r - 1
836 self._nodepos = r - 1
837 return r
837 return r
838 if node == wdirid or node in wdirfilenodeids:
838 if node == wdirid or node in wdirfilenodeids:
839 raise error.WdirUnsupported
839 raise error.WdirUnsupported
840 raise LookupError(node, self.indexfile, _('no node'))
840 raise LookupError(node, self.indexfile, _('no node'))
841
841
842 # Accessors for index entries.
842 # Accessors for index entries.
843
843
844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
845 # are flags.
845 # are flags.
846 def start(self, rev):
846 def start(self, rev):
847 return int(self.index[rev][0] >> 16)
847 return int(self.index[rev][0] >> 16)
848
848
849 def flags(self, rev):
849 def flags(self, rev):
850 return self.index[rev][0] & 0xFFFF
850 return self.index[rev][0] & 0xFFFF
851
851
852 def length(self, rev):
852 def length(self, rev):
853 return self.index[rev][1]
853 return self.index[rev][1]
854
854
855 def rawsize(self, rev):
855 def rawsize(self, rev):
856 """return the length of the uncompressed text for a given revision"""
856 """return the length of the uncompressed text for a given revision"""
857 l = self.index[rev][2]
857 l = self.index[rev][2]
858 if l >= 0:
858 if l >= 0:
859 return l
859 return l
860
860
861 t = self.revision(rev, raw=True)
861 t = self.revision(rev, raw=True)
862 return len(t)
862 return len(t)
863
863
864 def size(self, rev):
864 def size(self, rev):
865 """length of non-raw text (processed by a "read" flag processor)"""
865 """length of non-raw text (processed by a "read" flag processor)"""
866 # fast path: if no "read" flag processor could change the content,
866 # fast path: if no "read" flag processor could change the content,
867 # size is rawsize. note: ELLIPSIS is known to not change the content.
867 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 flags = self.flags(rev)
868 flags = self.flags(rev)
869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
869 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 return self.rawsize(rev)
870 return self.rawsize(rev)
871
871
872 return len(self.revision(rev, raw=False))
872 return len(self.revision(rev, raw=False))
873
873
874 def chainbase(self, rev):
874 def chainbase(self, rev):
875 base = self._chainbasecache.get(rev)
875 base = self._chainbasecache.get(rev)
876 if base is not None:
876 if base is not None:
877 return base
877 return base
878
878
879 index = self.index
879 index = self.index
880 iterrev = rev
880 iterrev = rev
881 base = index[iterrev][3]
881 base = index[iterrev][3]
882 while base != iterrev:
882 while base != iterrev:
883 iterrev = base
883 iterrev = base
884 base = index[iterrev][3]
884 base = index[iterrev][3]
885
885
886 self._chainbasecache[rev] = base
886 self._chainbasecache[rev] = base
887 return base
887 return base
888
888
889 def linkrev(self, rev):
889 def linkrev(self, rev):
890 return self.index[rev][4]
890 return self.index[rev][4]
891
891
892 def parentrevs(self, rev):
892 def parentrevs(self, rev):
893 try:
893 try:
894 entry = self.index[rev]
894 entry = self.index[rev]
895 except IndexError:
895 except IndexError:
896 if rev == wdirrev:
896 if rev == wdirrev:
897 raise error.WdirUnsupported
897 raise error.WdirUnsupported
898 raise
898 raise
899
899
900 return entry[5], entry[6]
900 return entry[5], entry[6]
901
901
902 def node(self, rev):
902 def node(self, rev):
903 try:
903 try:
904 return self.index[rev][7]
904 return self.index[rev][7]
905 except IndexError:
905 except IndexError:
906 if rev == wdirrev:
906 if rev == wdirrev:
907 raise error.WdirUnsupported
907 raise error.WdirUnsupported
908 raise
908 raise
909
909
910 # Derived from index values.
910 # Derived from index values.
911
911
912 def end(self, rev):
912 def end(self, rev):
913 return self.start(rev) + self.length(rev)
913 return self.start(rev) + self.length(rev)
914
914
915 def parents(self, node):
915 def parents(self, node):
916 i = self.index
916 i = self.index
917 d = i[self.rev(node)]
917 d = i[self.rev(node)]
918 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
918 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
919
919
920 def chainlen(self, rev):
920 def chainlen(self, rev):
921 return self._chaininfo(rev)[0]
921 return self._chaininfo(rev)[0]
922
922
923 def _chaininfo(self, rev):
923 def _chaininfo(self, rev):
924 chaininfocache = self._chaininfocache
924 chaininfocache = self._chaininfocache
925 if rev in chaininfocache:
925 if rev in chaininfocache:
926 return chaininfocache[rev]
926 return chaininfocache[rev]
927 index = self.index
927 index = self.index
928 generaldelta = self._generaldelta
928 generaldelta = self._generaldelta
929 iterrev = rev
929 iterrev = rev
930 e = index[iterrev]
930 e = index[iterrev]
931 clen = 0
931 clen = 0
932 compresseddeltalen = 0
932 compresseddeltalen = 0
933 while iterrev != e[3]:
933 while iterrev != e[3]:
934 clen += 1
934 clen += 1
935 compresseddeltalen += e[1]
935 compresseddeltalen += e[1]
936 if generaldelta:
936 if generaldelta:
937 iterrev = e[3]
937 iterrev = e[3]
938 else:
938 else:
939 iterrev -= 1
939 iterrev -= 1
940 if iterrev in chaininfocache:
940 if iterrev in chaininfocache:
941 t = chaininfocache[iterrev]
941 t = chaininfocache[iterrev]
942 clen += t[0]
942 clen += t[0]
943 compresseddeltalen += t[1]
943 compresseddeltalen += t[1]
944 break
944 break
945 e = index[iterrev]
945 e = index[iterrev]
946 else:
946 else:
947 # Add text length of base since decompressing that also takes
947 # Add text length of base since decompressing that also takes
948 # work. For cache hits the length is already included.
948 # work. For cache hits the length is already included.
949 compresseddeltalen += e[1]
949 compresseddeltalen += e[1]
950 r = (clen, compresseddeltalen)
950 r = (clen, compresseddeltalen)
951 chaininfocache[rev] = r
951 chaininfocache[rev] = r
952 return r
952 return r
953
953
954 def _deltachain(self, rev, stoprev=None):
954 def _deltachain(self, rev, stoprev=None):
955 """Obtain the delta chain for a revision.
955 """Obtain the delta chain for a revision.
956
956
957 ``stoprev`` specifies a revision to stop at. If not specified, we
957 ``stoprev`` specifies a revision to stop at. If not specified, we
958 stop at the base of the chain.
958 stop at the base of the chain.
959
959
960 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
960 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
961 revs in ascending order and ``stopped`` is a bool indicating whether
961 revs in ascending order and ``stopped`` is a bool indicating whether
962 ``stoprev`` was hit.
962 ``stoprev`` was hit.
963 """
963 """
964 # Try C implementation.
964 # Try C implementation.
965 try:
965 try:
966 return self.index.deltachain(rev, stoprev, self._generaldelta)
966 return self.index.deltachain(rev, stoprev, self._generaldelta)
967 except AttributeError:
967 except AttributeError:
968 pass
968 pass
969
969
970 chain = []
970 chain = []
971
971
972 # Alias to prevent attribute lookup in tight loop.
972 # Alias to prevent attribute lookup in tight loop.
973 index = self.index
973 index = self.index
974 generaldelta = self._generaldelta
974 generaldelta = self._generaldelta
975
975
976 iterrev = rev
976 iterrev = rev
977 e = index[iterrev]
977 e = index[iterrev]
978 while iterrev != e[3] and iterrev != stoprev:
978 while iterrev != e[3] and iterrev != stoprev:
979 chain.append(iterrev)
979 chain.append(iterrev)
980 if generaldelta:
980 if generaldelta:
981 iterrev = e[3]
981 iterrev = e[3]
982 else:
982 else:
983 iterrev -= 1
983 iterrev -= 1
984 e = index[iterrev]
984 e = index[iterrev]
985
985
986 if iterrev == stoprev:
986 if iterrev == stoprev:
987 stopped = True
987 stopped = True
988 else:
988 else:
989 chain.append(iterrev)
989 chain.append(iterrev)
990 stopped = False
990 stopped = False
991
991
992 chain.reverse()
992 chain.reverse()
993 return chain, stopped
993 return chain, stopped
994
994
995 def ancestors(self, revs, stoprev=0, inclusive=False):
995 def ancestors(self, revs, stoprev=0, inclusive=False):
996 """Generate the ancestors of 'revs' in reverse topological order.
996 """Generate the ancestors of 'revs' in reverse topological order.
997 Does not generate revs lower than stoprev.
997 Does not generate revs lower than stoprev.
998
998
999 See the documentation for ancestor.lazyancestors for more details."""
999 See the documentation for ancestor.lazyancestors for more details."""
1000
1000
1001 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1001 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1002 inclusive=inclusive)
1002 inclusive=inclusive)
1003
1003
1004 def descendants(self, revs):
1004 def descendants(self, revs):
1005 """Generate the descendants of 'revs' in revision order.
1005 """Generate the descendants of 'revs' in revision order.
1006
1006
1007 Yield a sequence of revision numbers starting with a child of
1007 Yield a sequence of revision numbers starting with a child of
1008 some rev in revs, i.e., each revision is *not* considered a
1008 some rev in revs, i.e., each revision is *not* considered a
1009 descendant of itself. Results are ordered by revision number (a
1009 descendant of itself. Results are ordered by revision number (a
1010 topological sort)."""
1010 topological sort)."""
1011 first = min(revs)
1011 first = min(revs)
1012 if first == nullrev:
1012 if first == nullrev:
1013 for i in self:
1013 for i in self:
1014 yield i
1014 yield i
1015 return
1015 return
1016
1016
1017 seen = set(revs)
1017 seen = set(revs)
1018 for i in self.revs(start=first + 1):
1018 for i in self.revs(start=first + 1):
1019 for x in self.parentrevs(i):
1019 for x in self.parentrevs(i):
1020 if x != nullrev and x in seen:
1020 if x != nullrev and x in seen:
1021 seen.add(i)
1021 seen.add(i)
1022 yield i
1022 yield i
1023 break
1023 break
1024
1024
1025 def findcommonmissing(self, common=None, heads=None):
1025 def findcommonmissing(self, common=None, heads=None):
1026 """Return a tuple of the ancestors of common and the ancestors of heads
1026 """Return a tuple of the ancestors of common and the ancestors of heads
1027 that are not ancestors of common. In revset terminology, we return the
1027 that are not ancestors of common. In revset terminology, we return the
1028 tuple:
1028 tuple:
1029
1029
1030 ::common, (::heads) - (::common)
1030 ::common, (::heads) - (::common)
1031
1031
1032 The list is sorted by revision number, meaning it is
1032 The list is sorted by revision number, meaning it is
1033 topologically sorted.
1033 topologically sorted.
1034
1034
1035 'heads' and 'common' are both lists of node IDs. If heads is
1035 'heads' and 'common' are both lists of node IDs. If heads is
1036 not supplied, uses all of the revlog's heads. If common is not
1036 not supplied, uses all of the revlog's heads. If common is not
1037 supplied, uses nullid."""
1037 supplied, uses nullid."""
1038 if common is None:
1038 if common is None:
1039 common = [nullid]
1039 common = [nullid]
1040 if heads is None:
1040 if heads is None:
1041 heads = self.heads()
1041 heads = self.heads()
1042
1042
1043 common = [self.rev(n) for n in common]
1043 common = [self.rev(n) for n in common]
1044 heads = [self.rev(n) for n in heads]
1044 heads = [self.rev(n) for n in heads]
1045
1045
1046 # we want the ancestors, but inclusive
1046 # we want the ancestors, but inclusive
1047 class lazyset(object):
1047 class lazyset(object):
1048 def __init__(self, lazyvalues):
1048 def __init__(self, lazyvalues):
1049 self.addedvalues = set()
1049 self.addedvalues = set()
1050 self.lazyvalues = lazyvalues
1050 self.lazyvalues = lazyvalues
1051
1051
1052 def __contains__(self, value):
1052 def __contains__(self, value):
1053 return value in self.addedvalues or value in self.lazyvalues
1053 return value in self.addedvalues or value in self.lazyvalues
1054
1054
1055 def __iter__(self):
1055 def __iter__(self):
1056 added = self.addedvalues
1056 added = self.addedvalues
1057 for r in added:
1057 for r in added:
1058 yield r
1058 yield r
1059 for r in self.lazyvalues:
1059 for r in self.lazyvalues:
1060 if not r in added:
1060 if not r in added:
1061 yield r
1061 yield r
1062
1062
1063 def add(self, value):
1063 def add(self, value):
1064 self.addedvalues.add(value)
1064 self.addedvalues.add(value)
1065
1065
1066 def update(self, values):
1066 def update(self, values):
1067 self.addedvalues.update(values)
1067 self.addedvalues.update(values)
1068
1068
1069 has = lazyset(self.ancestors(common))
1069 has = lazyset(self.ancestors(common))
1070 has.add(nullrev)
1070 has.add(nullrev)
1071 has.update(common)
1071 has.update(common)
1072
1072
1073 # take all ancestors from heads that aren't in has
1073 # take all ancestors from heads that aren't in has
1074 missing = set()
1074 missing = set()
1075 visit = collections.deque(r for r in heads if r not in has)
1075 visit = collections.deque(r for r in heads if r not in has)
1076 while visit:
1076 while visit:
1077 r = visit.popleft()
1077 r = visit.popleft()
1078 if r in missing:
1078 if r in missing:
1079 continue
1079 continue
1080 else:
1080 else:
1081 missing.add(r)
1081 missing.add(r)
1082 for p in self.parentrevs(r):
1082 for p in self.parentrevs(r):
1083 if p not in has:
1083 if p not in has:
1084 visit.append(p)
1084 visit.append(p)
1085 missing = list(missing)
1085 missing = list(missing)
1086 missing.sort()
1086 missing.sort()
1087 return has, [self.node(miss) for miss in missing]
1087 return has, [self.node(miss) for miss in missing]
1088
1088
1089 def incrementalmissingrevs(self, common=None):
1089 def incrementalmissingrevs(self, common=None):
1090 """Return an object that can be used to incrementally compute the
1090 """Return an object that can be used to incrementally compute the
1091 revision numbers of the ancestors of arbitrary sets that are not
1091 revision numbers of the ancestors of arbitrary sets that are not
1092 ancestors of common. This is an ancestor.incrementalmissingancestors
1092 ancestors of common. This is an ancestor.incrementalmissingancestors
1093 object.
1093 object.
1094
1094
1095 'common' is a list of revision numbers. If common is not supplied, uses
1095 'common' is a list of revision numbers. If common is not supplied, uses
1096 nullrev.
1096 nullrev.
1097 """
1097 """
1098 if common is None:
1098 if common is None:
1099 common = [nullrev]
1099 common = [nullrev]
1100
1100
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102
1102
1103 def findmissingrevs(self, common=None, heads=None):
1103 def findmissingrevs(self, common=None, heads=None):
1104 """Return the revision numbers of the ancestors of heads that
1104 """Return the revision numbers of the ancestors of heads that
1105 are not ancestors of common.
1105 are not ancestors of common.
1106
1106
1107 More specifically, return a list of revision numbers corresponding to
1107 More specifically, return a list of revision numbers corresponding to
1108 nodes N such that every N satisfies the following constraints:
1108 nodes N such that every N satisfies the following constraints:
1109
1109
1110 1. N is an ancestor of some node in 'heads'
1110 1. N is an ancestor of some node in 'heads'
1111 2. N is not an ancestor of any node in 'common'
1111 2. N is not an ancestor of any node in 'common'
1112
1112
1113 The list is sorted by revision number, meaning it is
1113 The list is sorted by revision number, meaning it is
1114 topologically sorted.
1114 topologically sorted.
1115
1115
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1117 not supplied, uses all of the revlog's heads. If common is not
1117 not supplied, uses all of the revlog's heads. If common is not
1118 supplied, uses nullid."""
1118 supplied, uses nullid."""
1119 if common is None:
1119 if common is None:
1120 common = [nullrev]
1120 common = [nullrev]
1121 if heads is None:
1121 if heads is None:
1122 heads = self.headrevs()
1122 heads = self.headrevs()
1123
1123
1124 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1125 return inc.missingancestors(heads)
1125 return inc.missingancestors(heads)
1126
1126
1127 def findmissing(self, common=None, heads=None):
1127 def findmissing(self, common=None, heads=None):
1128 """Return the ancestors of heads that are not ancestors of common.
1128 """Return the ancestors of heads that are not ancestors of common.
1129
1129
1130 More specifically, return a list of nodes N such that every N
1130 More specifically, return a list of nodes N such that every N
1131 satisfies the following constraints:
1131 satisfies the following constraints:
1132
1132
1133 1. N is an ancestor of some node in 'heads'
1133 1. N is an ancestor of some node in 'heads'
1134 2. N is not an ancestor of any node in 'common'
1134 2. N is not an ancestor of any node in 'common'
1135
1135
1136 The list is sorted by revision number, meaning it is
1136 The list is sorted by revision number, meaning it is
1137 topologically sorted.
1137 topologically sorted.
1138
1138
1139 'heads' and 'common' are both lists of node IDs. If heads is
1139 'heads' and 'common' are both lists of node IDs. If heads is
1140 not supplied, uses all of the revlog's heads. If common is not
1140 not supplied, uses all of the revlog's heads. If common is not
1141 supplied, uses nullid."""
1141 supplied, uses nullid."""
1142 if common is None:
1142 if common is None:
1143 common = [nullid]
1143 common = [nullid]
1144 if heads is None:
1144 if heads is None:
1145 heads = self.heads()
1145 heads = self.heads()
1146
1146
1147 common = [self.rev(n) for n in common]
1147 common = [self.rev(n) for n in common]
1148 heads = [self.rev(n) for n in heads]
1148 heads = [self.rev(n) for n in heads]
1149
1149
1150 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1152
1152
1153 def nodesbetween(self, roots=None, heads=None):
1153 def nodesbetween(self, roots=None, heads=None):
1154 """Return a topological path from 'roots' to 'heads'.
1154 """Return a topological path from 'roots' to 'heads'.
1155
1155
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 topologically sorted list of all nodes N that satisfy both of
1157 topologically sorted list of all nodes N that satisfy both of
1158 these constraints:
1158 these constraints:
1159
1159
1160 1. N is a descendant of some node in 'roots'
1160 1. N is a descendant of some node in 'roots'
1161 2. N is an ancestor of some node in 'heads'
1161 2. N is an ancestor of some node in 'heads'
1162
1162
1163 Every node is considered to be both a descendant and an ancestor
1163 Every node is considered to be both a descendant and an ancestor
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1165 included in 'nodes'.
1165 included in 'nodes'.
1166
1166
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170
1170
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1173 unspecified, uses list of all of the revlog's heads."""
1173 unspecified, uses list of all of the revlog's heads."""
1174 nonodes = ([], [], [])
1174 nonodes = ([], [], [])
1175 if roots is not None:
1175 if roots is not None:
1176 roots = list(roots)
1176 roots = list(roots)
1177 if not roots:
1177 if not roots:
1178 return nonodes
1178 return nonodes
1179 lowestrev = min([self.rev(n) for n in roots])
1179 lowestrev = min([self.rev(n) for n in roots])
1180 else:
1180 else:
1181 roots = [nullid] # Everybody's a descendant of nullid
1181 roots = [nullid] # Everybody's a descendant of nullid
1182 lowestrev = nullrev
1182 lowestrev = nullrev
1183 if (lowestrev == nullrev) and (heads is None):
1183 if (lowestrev == nullrev) and (heads is None):
1184 # We want _all_ the nodes!
1184 # We want _all_ the nodes!
1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1186 if heads is None:
1186 if heads is None:
1187 # All nodes are ancestors, so the latest ancestor is the last
1187 # All nodes are ancestors, so the latest ancestor is the last
1188 # node.
1188 # node.
1189 highestrev = len(self) - 1
1189 highestrev = len(self) - 1
1190 # Set ancestors to None to signal that every node is an ancestor.
1190 # Set ancestors to None to signal that every node is an ancestor.
1191 ancestors = None
1191 ancestors = None
1192 # Set heads to an empty dictionary for later discovery of heads
1192 # Set heads to an empty dictionary for later discovery of heads
1193 heads = {}
1193 heads = {}
1194 else:
1194 else:
1195 heads = list(heads)
1195 heads = list(heads)
1196 if not heads:
1196 if not heads:
1197 return nonodes
1197 return nonodes
1198 ancestors = set()
1198 ancestors = set()
1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1200 # Also, later we will be using it to filter out the heads we can't
1200 # Also, later we will be using it to filter out the heads we can't
1201 # find from roots.
1201 # find from roots.
1202 heads = dict.fromkeys(heads, False)
1202 heads = dict.fromkeys(heads, False)
1203 # Start at the top and keep marking parents until we're done.
1203 # Start at the top and keep marking parents until we're done.
1204 nodestotag = set(heads)
1204 nodestotag = set(heads)
1205 # Remember where the top was so we can use it as a limit later.
1205 # Remember where the top was so we can use it as a limit later.
1206 highestrev = max([self.rev(n) for n in nodestotag])
1206 highestrev = max([self.rev(n) for n in nodestotag])
1207 while nodestotag:
1207 while nodestotag:
1208 # grab a node to tag
1208 # grab a node to tag
1209 n = nodestotag.pop()
1209 n = nodestotag.pop()
1210 # Never tag nullid
1210 # Never tag nullid
1211 if n == nullid:
1211 if n == nullid:
1212 continue
1212 continue
1213 # A node's revision number represents its place in a
1213 # A node's revision number represents its place in a
1214 # topologically sorted list of nodes.
1214 # topologically sorted list of nodes.
1215 r = self.rev(n)
1215 r = self.rev(n)
1216 if r >= lowestrev:
1216 if r >= lowestrev:
1217 if n not in ancestors:
1217 if n not in ancestors:
1218 # If we are possibly a descendant of one of the roots
1218 # If we are possibly a descendant of one of the roots
1219 # and we haven't already been marked as an ancestor
1219 # and we haven't already been marked as an ancestor
1220 ancestors.add(n) # Mark as ancestor
1220 ancestors.add(n) # Mark as ancestor
1221 # Add non-nullid parents to list of nodes to tag.
1221 # Add non-nullid parents to list of nodes to tag.
1222 nodestotag.update([p for p in self.parents(n) if
1222 nodestotag.update([p for p in self.parents(n) if
1223 p != nullid])
1223 p != nullid])
1224 elif n in heads: # We've seen it before, is it a fake head?
1224 elif n in heads: # We've seen it before, is it a fake head?
1225 # So it is, real heads should not be the ancestors of
1225 # So it is, real heads should not be the ancestors of
1226 # any other heads.
1226 # any other heads.
1227 heads.pop(n)
1227 heads.pop(n)
1228 if not ancestors:
1228 if not ancestors:
1229 return nonodes
1229 return nonodes
1230 # Now that we have our set of ancestors, we want to remove any
1230 # Now that we have our set of ancestors, we want to remove any
1231 # roots that are not ancestors.
1231 # roots that are not ancestors.
1232
1232
1233 # If one of the roots was nullid, everything is included anyway.
1233 # If one of the roots was nullid, everything is included anyway.
1234 if lowestrev > nullrev:
1234 if lowestrev > nullrev:
1235 # But, since we weren't, let's recompute the lowest rev to not
1235 # But, since we weren't, let's recompute the lowest rev to not
1236 # include roots that aren't ancestors.
1236 # include roots that aren't ancestors.
1237
1237
1238 # Filter out roots that aren't ancestors of heads
1238 # Filter out roots that aren't ancestors of heads
1239 roots = [root for root in roots if root in ancestors]
1239 roots = [root for root in roots if root in ancestors]
1240 # Recompute the lowest revision
1240 # Recompute the lowest revision
1241 if roots:
1241 if roots:
1242 lowestrev = min([self.rev(root) for root in roots])
1242 lowestrev = min([self.rev(root) for root in roots])
1243 else:
1243 else:
1244 # No more roots? Return empty list
1244 # No more roots? Return empty list
1245 return nonodes
1245 return nonodes
1246 else:
1246 else:
1247 # We are descending from nullid, and don't need to care about
1247 # We are descending from nullid, and don't need to care about
1248 # any other roots.
1248 # any other roots.
1249 lowestrev = nullrev
1249 lowestrev = nullrev
1250 roots = [nullid]
1250 roots = [nullid]
1251 # Transform our roots list into a set.
1251 # Transform our roots list into a set.
1252 descendants = set(roots)
1252 descendants = set(roots)
1253 # Also, keep the original roots so we can filter out roots that aren't
1253 # Also, keep the original roots so we can filter out roots that aren't
1254 # 'real' roots (i.e. are descended from other roots).
1254 # 'real' roots (i.e. are descended from other roots).
1255 roots = descendants.copy()
1255 roots = descendants.copy()
1256 # Our topologically sorted list of output nodes.
1256 # Our topologically sorted list of output nodes.
1257 orderedout = []
1257 orderedout = []
1258 # Don't start at nullid since we don't want nullid in our output list,
1258 # Don't start at nullid since we don't want nullid in our output list,
1259 # and if nullid shows up in descendants, empty parents will look like
1259 # and if nullid shows up in descendants, empty parents will look like
1260 # they're descendants.
1260 # they're descendants.
1261 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1261 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1262 n = self.node(r)
1262 n = self.node(r)
1263 isdescendant = False
1263 isdescendant = False
1264 if lowestrev == nullrev: # Everybody is a descendant of nullid
1264 if lowestrev == nullrev: # Everybody is a descendant of nullid
1265 isdescendant = True
1265 isdescendant = True
1266 elif n in descendants:
1266 elif n in descendants:
1267 # n is already a descendant
1267 # n is already a descendant
1268 isdescendant = True
1268 isdescendant = True
1269 # This check only needs to be done here because all the roots
1269 # This check only needs to be done here because all the roots
1270 # will start being marked is descendants before the loop.
1270 # will start being marked is descendants before the loop.
1271 if n in roots:
1271 if n in roots:
1272 # If n was a root, check if it's a 'real' root.
1272 # If n was a root, check if it's a 'real' root.
1273 p = tuple(self.parents(n))
1273 p = tuple(self.parents(n))
1274 # If any of its parents are descendants, it's not a root.
1274 # If any of its parents are descendants, it's not a root.
1275 if (p[0] in descendants) or (p[1] in descendants):
1275 if (p[0] in descendants) or (p[1] in descendants):
1276 roots.remove(n)
1276 roots.remove(n)
1277 else:
1277 else:
1278 p = tuple(self.parents(n))
1278 p = tuple(self.parents(n))
1279 # A node is a descendant if either of its parents are
1279 # A node is a descendant if either of its parents are
1280 # descendants. (We seeded the dependents list with the roots
1280 # descendants. (We seeded the dependents list with the roots
1281 # up there, remember?)
1281 # up there, remember?)
1282 if (p[0] in descendants) or (p[1] in descendants):
1282 if (p[0] in descendants) or (p[1] in descendants):
1283 descendants.add(n)
1283 descendants.add(n)
1284 isdescendant = True
1284 isdescendant = True
1285 if isdescendant and ((ancestors is None) or (n in ancestors)):
1285 if isdescendant and ((ancestors is None) or (n in ancestors)):
1286 # Only include nodes that are both descendants and ancestors.
1286 # Only include nodes that are both descendants and ancestors.
1287 orderedout.append(n)
1287 orderedout.append(n)
1288 if (ancestors is not None) and (n in heads):
1288 if (ancestors is not None) and (n in heads):
1289 # We're trying to figure out which heads are reachable
1289 # We're trying to figure out which heads are reachable
1290 # from roots.
1290 # from roots.
1291 # Mark this head as having been reached
1291 # Mark this head as having been reached
1292 heads[n] = True
1292 heads[n] = True
1293 elif ancestors is None:
1293 elif ancestors is None:
1294 # Otherwise, we're trying to discover the heads.
1294 # Otherwise, we're trying to discover the heads.
1295 # Assume this is a head because if it isn't, the next step
1295 # Assume this is a head because if it isn't, the next step
1296 # will eventually remove it.
1296 # will eventually remove it.
1297 heads[n] = True
1297 heads[n] = True
1298 # But, obviously its parents aren't.
1298 # But, obviously its parents aren't.
1299 for p in self.parents(n):
1299 for p in self.parents(n):
1300 heads.pop(p, None)
1300 heads.pop(p, None)
1301 heads = [head for head, flag in heads.iteritems() if flag]
1301 heads = [head for head, flag in heads.iteritems() if flag]
1302 roots = list(roots)
1302 roots = list(roots)
1303 assert orderedout
1303 assert orderedout
1304 assert roots
1304 assert roots
1305 assert heads
1305 assert heads
1306 return (orderedout, roots, heads)
1306 return (orderedout, roots, heads)
1307
1307
1308 def headrevs(self):
1308 def headrevs(self):
1309 try:
1309 try:
1310 return self.index.headrevs()
1310 return self.index.headrevs()
1311 except AttributeError:
1311 except AttributeError:
1312 return self._headrevs()
1312 return self._headrevs()
1313
1313
1314 def computephases(self, roots):
1314 def computephases(self, roots):
1315 return self.index.computephasesmapsets(roots)
1315 return self.index.computephasesmapsets(roots)
1316
1316
1317 def _headrevs(self):
1317 def _headrevs(self):
1318 count = len(self)
1318 count = len(self)
1319 if not count:
1319 if not count:
1320 return [nullrev]
1320 return [nullrev]
1321 # we won't iter over filtered rev so nobody is a head at start
1321 # we won't iter over filtered rev so nobody is a head at start
1322 ishead = [0] * (count + 1)
1322 ishead = [0] * (count + 1)
1323 index = self.index
1323 index = self.index
1324 for r in self:
1324 for r in self:
1325 ishead[r] = 1 # I may be an head
1325 ishead[r] = 1 # I may be an head
1326 e = index[r]
1326 e = index[r]
1327 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1327 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1328 return [r for r, val in enumerate(ishead) if val]
1328 return [r for r, val in enumerate(ishead) if val]
1329
1329
1330 def heads(self, start=None, stop=None):
1330 def heads(self, start=None, stop=None):
1331 """return the list of all nodes that have no children
1331 """return the list of all nodes that have no children
1332
1332
1333 if start is specified, only heads that are descendants of
1333 if start is specified, only heads that are descendants of
1334 start will be returned
1334 start will be returned
1335 if stop is specified, it will consider all the revs from stop
1335 if stop is specified, it will consider all the revs from stop
1336 as if they had no children
1336 as if they had no children
1337 """
1337 """
1338 if start is None and stop is None:
1338 if start is None and stop is None:
1339 if not len(self):
1339 if not len(self):
1340 return [nullid]
1340 return [nullid]
1341 return [self.node(r) for r in self.headrevs()]
1341 return [self.node(r) for r in self.headrevs()]
1342
1342
1343 if start is None:
1343 if start is None:
1344 start = nullid
1344 start = nullid
1345 if stop is None:
1345 if stop is None:
1346 stop = []
1346 stop = []
1347 stoprevs = set([self.rev(n) for n in stop])
1347 stoprevs = set([self.rev(n) for n in stop])
1348 startrev = self.rev(start)
1348 startrev = self.rev(start)
1349 reachable = {startrev}
1349 reachable = {startrev}
1350 heads = {startrev}
1350 heads = {startrev}
1351
1351
1352 parentrevs = self.parentrevs
1352 parentrevs = self.parentrevs
1353 for r in self.revs(start=startrev + 1):
1353 for r in self.revs(start=startrev + 1):
1354 for p in parentrevs(r):
1354 for p in parentrevs(r):
1355 if p in reachable:
1355 if p in reachable:
1356 if r not in stoprevs:
1356 if r not in stoprevs:
1357 reachable.add(r)
1357 reachable.add(r)
1358 heads.add(r)
1358 heads.add(r)
1359 if p in heads and p not in stoprevs:
1359 if p in heads and p not in stoprevs:
1360 heads.remove(p)
1360 heads.remove(p)
1361
1361
1362 return [self.node(r) for r in heads]
1362 return [self.node(r) for r in heads]
1363
1363
1364 def children(self, node):
1364 def children(self, node):
1365 """find the children of a given node"""
1365 """find the children of a given node"""
1366 c = []
1366 c = []
1367 p = self.rev(node)
1367 p = self.rev(node)
1368 for r in self.revs(start=p + 1):
1368 for r in self.revs(start=p + 1):
1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1370 if prevs:
1370 if prevs:
1371 for pr in prevs:
1371 for pr in prevs:
1372 if pr == p:
1372 if pr == p:
1373 c.append(self.node(r))
1373 c.append(self.node(r))
1374 elif p == nullrev:
1374 elif p == nullrev:
1375 c.append(self.node(r))
1375 c.append(self.node(r))
1376 return c
1376 return c
1377
1377
1378 def descendant(self, start, end):
1378 def descendant(self, start, end):
1379 """True if revision 'end' is an descendant of revision 'start'
1380
1381 A revision is considered as a descendant of itself."""
1379 if start == nullrev:
1382 if start == nullrev:
1380 return True
1383 return True
1381 elif start == end:
1384 elif start == end:
1382 return True
1385 return True
1383 for i in self.descendants([start]):
1386 return start in self._commonancestorsheads(start, end)
1384 if i == end:
1385 return True
1386 elif i > end:
1387 break
1388 return False
1389
1387
1390 def commonancestorsheads(self, a, b):
1388 def commonancestorsheads(self, a, b):
1391 """calculate all the heads of the common ancestors of nodes a and b"""
1389 """calculate all the heads of the common ancestors of nodes a and b"""
1392 a, b = self.rev(a), self.rev(b)
1390 a, b = self.rev(a), self.rev(b)
1393 ancs = self._commonancestorsheads(a, b)
1391 ancs = self._commonancestorsheads(a, b)
1394 return pycompat.maplist(self.node, ancs)
1392 return pycompat.maplist(self.node, ancs)
1395
1393
1396 def _commonancestorsheads(self, *revs):
1394 def _commonancestorsheads(self, *revs):
1397 """calculate all the heads of the common ancestors of revs"""
1395 """calculate all the heads of the common ancestors of revs"""
1398 try:
1396 try:
1399 ancs = self.index.commonancestorsheads(*revs)
1397 ancs = self.index.commonancestorsheads(*revs)
1400 except (AttributeError, OverflowError): # C implementation failed
1398 except (AttributeError, OverflowError): # C implementation failed
1401 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1399 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1402 return ancs
1400 return ancs
1403
1401
1404 def isancestor(self, a, b):
1402 def isancestor(self, a, b):
1405 """return True if node a is an ancestor of node b
1403 """return True if node a is an ancestor of node b
1406
1404
1407 The implementation of this is trivial but the use of
1405 The implementation of this is trivial but the use of
1408 commonancestorsheads is not."""
1406 commonancestorsheads is not."""
1409 return a in self.commonancestorsheads(a, b)
1407 return a in self.commonancestorsheads(a, b)
1410
1408
1411 def ancestor(self, a, b):
1409 def ancestor(self, a, b):
1412 """calculate the "best" common ancestor of nodes a and b"""
1410 """calculate the "best" common ancestor of nodes a and b"""
1413
1411
1414 a, b = self.rev(a), self.rev(b)
1412 a, b = self.rev(a), self.rev(b)
1415 try:
1413 try:
1416 ancs = self.index.ancestors(a, b)
1414 ancs = self.index.ancestors(a, b)
1417 except (AttributeError, OverflowError):
1415 except (AttributeError, OverflowError):
1418 ancs = ancestor.ancestors(self.parentrevs, a, b)
1416 ancs = ancestor.ancestors(self.parentrevs, a, b)
1419 if ancs:
1417 if ancs:
1420 # choose a consistent winner when there's a tie
1418 # choose a consistent winner when there's a tie
1421 return min(map(self.node, ancs))
1419 return min(map(self.node, ancs))
1422 return nullid
1420 return nullid
1423
1421
1424 def _match(self, id):
1422 def _match(self, id):
1425 if isinstance(id, int):
1423 if isinstance(id, int):
1426 # rev
1424 # rev
1427 return self.node(id)
1425 return self.node(id)
1428 if len(id) == 20:
1426 if len(id) == 20:
1429 # possibly a binary node
1427 # possibly a binary node
1430 # odds of a binary node being all hex in ASCII are 1 in 10**25
1428 # odds of a binary node being all hex in ASCII are 1 in 10**25
1431 try:
1429 try:
1432 node = id
1430 node = id
1433 self.rev(node) # quick search the index
1431 self.rev(node) # quick search the index
1434 return node
1432 return node
1435 except LookupError:
1433 except LookupError:
1436 pass # may be partial hex id
1434 pass # may be partial hex id
1437 try:
1435 try:
1438 # str(rev)
1436 # str(rev)
1439 rev = int(id)
1437 rev = int(id)
1440 if "%d" % rev != id:
1438 if "%d" % rev != id:
1441 raise ValueError
1439 raise ValueError
1442 if rev < 0:
1440 if rev < 0:
1443 rev = len(self) + rev
1441 rev = len(self) + rev
1444 if rev < 0 or rev >= len(self):
1442 if rev < 0 or rev >= len(self):
1445 raise ValueError
1443 raise ValueError
1446 return self.node(rev)
1444 return self.node(rev)
1447 except (ValueError, OverflowError):
1445 except (ValueError, OverflowError):
1448 pass
1446 pass
1449 if len(id) == 40:
1447 if len(id) == 40:
1450 try:
1448 try:
1451 # a full hex nodeid?
1449 # a full hex nodeid?
1452 node = bin(id)
1450 node = bin(id)
1453 self.rev(node)
1451 self.rev(node)
1454 return node
1452 return node
1455 except (TypeError, LookupError):
1453 except (TypeError, LookupError):
1456 pass
1454 pass
1457
1455
1458 def _partialmatch(self, id):
1456 def _partialmatch(self, id):
1459 # we don't care wdirfilenodeids as they should be always full hash
1457 # we don't care wdirfilenodeids as they should be always full hash
1460 maybewdir = wdirhex.startswith(id)
1458 maybewdir = wdirhex.startswith(id)
1461 try:
1459 try:
1462 partial = self.index.partialmatch(id)
1460 partial = self.index.partialmatch(id)
1463 if partial and self.hasnode(partial):
1461 if partial and self.hasnode(partial):
1464 if maybewdir:
1462 if maybewdir:
1465 # single 'ff...' match in radix tree, ambiguous with wdir
1463 # single 'ff...' match in radix tree, ambiguous with wdir
1466 raise RevlogError
1464 raise RevlogError
1467 return partial
1465 return partial
1468 if maybewdir:
1466 if maybewdir:
1469 # no 'ff...' match in radix tree, wdir identified
1467 # no 'ff...' match in radix tree, wdir identified
1470 raise error.WdirUnsupported
1468 raise error.WdirUnsupported
1471 return None
1469 return None
1472 except RevlogError:
1470 except RevlogError:
1473 # parsers.c radix tree lookup gave multiple matches
1471 # parsers.c radix tree lookup gave multiple matches
1474 # fast path: for unfiltered changelog, radix tree is accurate
1472 # fast path: for unfiltered changelog, radix tree is accurate
1475 if not getattr(self, 'filteredrevs', None):
1473 if not getattr(self, 'filteredrevs', None):
1476 raise LookupError(id, self.indexfile,
1474 raise LookupError(id, self.indexfile,
1477 _('ambiguous identifier'))
1475 _('ambiguous identifier'))
1478 # fall through to slow path that filters hidden revisions
1476 # fall through to slow path that filters hidden revisions
1479 except (AttributeError, ValueError):
1477 except (AttributeError, ValueError):
1480 # we are pure python, or key was too short to search radix tree
1478 # we are pure python, or key was too short to search radix tree
1481 pass
1479 pass
1482
1480
1483 if id in self._pcache:
1481 if id in self._pcache:
1484 return self._pcache[id]
1482 return self._pcache[id]
1485
1483
1486 if len(id) <= 40:
1484 if len(id) <= 40:
1487 try:
1485 try:
1488 # hex(node)[:...]
1486 # hex(node)[:...]
1489 l = len(id) // 2 # grab an even number of digits
1487 l = len(id) // 2 # grab an even number of digits
1490 prefix = bin(id[:l * 2])
1488 prefix = bin(id[:l * 2])
1491 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1489 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1492 nl = [n for n in nl if hex(n).startswith(id) and
1490 nl = [n for n in nl if hex(n).startswith(id) and
1493 self.hasnode(n)]
1491 self.hasnode(n)]
1494 if len(nl) > 0:
1492 if len(nl) > 0:
1495 if len(nl) == 1 and not maybewdir:
1493 if len(nl) == 1 and not maybewdir:
1496 self._pcache[id] = nl[0]
1494 self._pcache[id] = nl[0]
1497 return nl[0]
1495 return nl[0]
1498 raise LookupError(id, self.indexfile,
1496 raise LookupError(id, self.indexfile,
1499 _('ambiguous identifier'))
1497 _('ambiguous identifier'))
1500 if maybewdir:
1498 if maybewdir:
1501 raise error.WdirUnsupported
1499 raise error.WdirUnsupported
1502 return None
1500 return None
1503 except TypeError:
1501 except TypeError:
1504 pass
1502 pass
1505
1503
1506 def lookup(self, id):
1504 def lookup(self, id):
1507 """locate a node based on:
1505 """locate a node based on:
1508 - revision number or str(revision number)
1506 - revision number or str(revision number)
1509 - nodeid or subset of hex nodeid
1507 - nodeid or subset of hex nodeid
1510 """
1508 """
1511 n = self._match(id)
1509 n = self._match(id)
1512 if n is not None:
1510 if n is not None:
1513 return n
1511 return n
1514 n = self._partialmatch(id)
1512 n = self._partialmatch(id)
1515 if n:
1513 if n:
1516 return n
1514 return n
1517
1515
1518 raise LookupError(id, self.indexfile, _('no match found'))
1516 raise LookupError(id, self.indexfile, _('no match found'))
1519
1517
1520 def shortest(self, node, minlength=1):
1518 def shortest(self, node, minlength=1):
1521 """Find the shortest unambiguous prefix that matches node."""
1519 """Find the shortest unambiguous prefix that matches node."""
1522 def isvalid(prefix):
1520 def isvalid(prefix):
1523 try:
1521 try:
1524 node = self._partialmatch(prefix)
1522 node = self._partialmatch(prefix)
1525 except error.RevlogError:
1523 except error.RevlogError:
1526 return False
1524 return False
1527 except error.WdirUnsupported:
1525 except error.WdirUnsupported:
1528 # single 'ff...' match
1526 # single 'ff...' match
1529 return True
1527 return True
1530 if node is None:
1528 if node is None:
1531 raise LookupError(node, self.indexfile, _('no node'))
1529 raise LookupError(node, self.indexfile, _('no node'))
1532 return True
1530 return True
1533
1531
1534 def maybewdir(prefix):
1532 def maybewdir(prefix):
1535 return all(c == 'f' for c in prefix)
1533 return all(c == 'f' for c in prefix)
1536
1534
1537 hexnode = hex(node)
1535 hexnode = hex(node)
1538
1536
1539 def disambiguate(hexnode, minlength):
1537 def disambiguate(hexnode, minlength):
1540 """Disambiguate against wdirid."""
1538 """Disambiguate against wdirid."""
1541 for length in range(minlength, 41):
1539 for length in range(minlength, 41):
1542 prefix = hexnode[:length]
1540 prefix = hexnode[:length]
1543 if not maybewdir(prefix):
1541 if not maybewdir(prefix):
1544 return prefix
1542 return prefix
1545
1543
1546 if not getattr(self, 'filteredrevs', None):
1544 if not getattr(self, 'filteredrevs', None):
1547 try:
1545 try:
1548 length = max(self.index.shortest(node), minlength)
1546 length = max(self.index.shortest(node), minlength)
1549 return disambiguate(hexnode, length)
1547 return disambiguate(hexnode, length)
1550 except RevlogError:
1548 except RevlogError:
1551 if node != wdirid:
1549 if node != wdirid:
1552 raise LookupError(node, self.indexfile, _('no node'))
1550 raise LookupError(node, self.indexfile, _('no node'))
1553 except AttributeError:
1551 except AttributeError:
1554 # Fall through to pure code
1552 # Fall through to pure code
1555 pass
1553 pass
1556
1554
1557 if node == wdirid:
1555 if node == wdirid:
1558 for length in range(minlength, 41):
1556 for length in range(minlength, 41):
1559 prefix = hexnode[:length]
1557 prefix = hexnode[:length]
1560 if isvalid(prefix):
1558 if isvalid(prefix):
1561 return prefix
1559 return prefix
1562
1560
1563 for length in range(minlength, 41):
1561 for length in range(minlength, 41):
1564 prefix = hexnode[:length]
1562 prefix = hexnode[:length]
1565 if isvalid(prefix):
1563 if isvalid(prefix):
1566 return disambiguate(hexnode, length)
1564 return disambiguate(hexnode, length)
1567
1565
1568 def cmp(self, node, text):
1566 def cmp(self, node, text):
1569 """compare text with a given file revision
1567 """compare text with a given file revision
1570
1568
1571 returns True if text is different than what is stored.
1569 returns True if text is different than what is stored.
1572 """
1570 """
1573 p1, p2 = self.parents(node)
1571 p1, p2 = self.parents(node)
1574 return hash(text, p1, p2) != node
1572 return hash(text, p1, p2) != node
1575
1573
1576 def _cachesegment(self, offset, data):
1574 def _cachesegment(self, offset, data):
1577 """Add a segment to the revlog cache.
1575 """Add a segment to the revlog cache.
1578
1576
1579 Accepts an absolute offset and the data that is at that location.
1577 Accepts an absolute offset and the data that is at that location.
1580 """
1578 """
1581 o, d = self._chunkcache
1579 o, d = self._chunkcache
1582 # try to add to existing cache
1580 # try to add to existing cache
1583 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1581 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1584 self._chunkcache = o, d + data
1582 self._chunkcache = o, d + data
1585 else:
1583 else:
1586 self._chunkcache = offset, data
1584 self._chunkcache = offset, data
1587
1585
1588 def _readsegment(self, offset, length, df=None):
1586 def _readsegment(self, offset, length, df=None):
1589 """Load a segment of raw data from the revlog.
1587 """Load a segment of raw data from the revlog.
1590
1588
1591 Accepts an absolute offset, length to read, and an optional existing
1589 Accepts an absolute offset, length to read, and an optional existing
1592 file handle to read from.
1590 file handle to read from.
1593
1591
1594 If an existing file handle is passed, it will be seeked and the
1592 If an existing file handle is passed, it will be seeked and the
1595 original seek position will NOT be restored.
1593 original seek position will NOT be restored.
1596
1594
1597 Returns a str or buffer of raw byte data.
1595 Returns a str or buffer of raw byte data.
1598 """
1596 """
1599 # Cache data both forward and backward around the requested
1597 # Cache data both forward and backward around the requested
1600 # data, in a fixed size window. This helps speed up operations
1598 # data, in a fixed size window. This helps speed up operations
1601 # involving reading the revlog backwards.
1599 # involving reading the revlog backwards.
1602 cachesize = self._chunkcachesize
1600 cachesize = self._chunkcachesize
1603 realoffset = offset & ~(cachesize - 1)
1601 realoffset = offset & ~(cachesize - 1)
1604 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1602 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1605 - realoffset)
1603 - realoffset)
1606 with self._datareadfp(df) as df:
1604 with self._datareadfp(df) as df:
1607 df.seek(realoffset)
1605 df.seek(realoffset)
1608 d = df.read(reallength)
1606 d = df.read(reallength)
1609 self._cachesegment(realoffset, d)
1607 self._cachesegment(realoffset, d)
1610 if offset != realoffset or reallength != length:
1608 if offset != realoffset or reallength != length:
1611 return util.buffer(d, offset - realoffset, length)
1609 return util.buffer(d, offset - realoffset, length)
1612 return d
1610 return d
1613
1611
1614 def _getsegment(self, offset, length, df=None):
1612 def _getsegment(self, offset, length, df=None):
1615 """Obtain a segment of raw data from the revlog.
1613 """Obtain a segment of raw data from the revlog.
1616
1614
1617 Accepts an absolute offset, length of bytes to obtain, and an
1615 Accepts an absolute offset, length of bytes to obtain, and an
1618 optional file handle to the already-opened revlog. If the file
1616 optional file handle to the already-opened revlog. If the file
1619 handle is used, it's original seek position will not be preserved.
1617 handle is used, it's original seek position will not be preserved.
1620
1618
1621 Requests for data may be returned from a cache.
1619 Requests for data may be returned from a cache.
1622
1620
1623 Returns a str or a buffer instance of raw byte data.
1621 Returns a str or a buffer instance of raw byte data.
1624 """
1622 """
1625 o, d = self._chunkcache
1623 o, d = self._chunkcache
1626 l = len(d)
1624 l = len(d)
1627
1625
1628 # is it in the cache?
1626 # is it in the cache?
1629 cachestart = offset - o
1627 cachestart = offset - o
1630 cacheend = cachestart + length
1628 cacheend = cachestart + length
1631 if cachestart >= 0 and cacheend <= l:
1629 if cachestart >= 0 and cacheend <= l:
1632 if cachestart == 0 and cacheend == l:
1630 if cachestart == 0 and cacheend == l:
1633 return d # avoid a copy
1631 return d # avoid a copy
1634 return util.buffer(d, cachestart, cacheend - cachestart)
1632 return util.buffer(d, cachestart, cacheend - cachestart)
1635
1633
1636 return self._readsegment(offset, length, df=df)
1634 return self._readsegment(offset, length, df=df)
1637
1635
1638 def _getsegmentforrevs(self, startrev, endrev, df=None):
1636 def _getsegmentforrevs(self, startrev, endrev, df=None):
1639 """Obtain a segment of raw data corresponding to a range of revisions.
1637 """Obtain a segment of raw data corresponding to a range of revisions.
1640
1638
1641 Accepts the start and end revisions and an optional already-open
1639 Accepts the start and end revisions and an optional already-open
1642 file handle to be used for reading. If the file handle is read, its
1640 file handle to be used for reading. If the file handle is read, its
1643 seek position will not be preserved.
1641 seek position will not be preserved.
1644
1642
1645 Requests for data may be satisfied by a cache.
1643 Requests for data may be satisfied by a cache.
1646
1644
1647 Returns a 2-tuple of (offset, data) for the requested range of
1645 Returns a 2-tuple of (offset, data) for the requested range of
1648 revisions. Offset is the integer offset from the beginning of the
1646 revisions. Offset is the integer offset from the beginning of the
1649 revlog and data is a str or buffer of the raw byte data.
1647 revlog and data is a str or buffer of the raw byte data.
1650
1648
1651 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1649 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1652 to determine where each revision's data begins and ends.
1650 to determine where each revision's data begins and ends.
1653 """
1651 """
1654 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1652 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1655 # (functions are expensive).
1653 # (functions are expensive).
1656 index = self.index
1654 index = self.index
1657 istart = index[startrev]
1655 istart = index[startrev]
1658 start = int(istart[0] >> 16)
1656 start = int(istart[0] >> 16)
1659 if startrev == endrev:
1657 if startrev == endrev:
1660 end = start + istart[1]
1658 end = start + istart[1]
1661 else:
1659 else:
1662 iend = index[endrev]
1660 iend = index[endrev]
1663 end = int(iend[0] >> 16) + iend[1]
1661 end = int(iend[0] >> 16) + iend[1]
1664
1662
1665 if self._inline:
1663 if self._inline:
1666 start += (startrev + 1) * self._io.size
1664 start += (startrev + 1) * self._io.size
1667 end += (endrev + 1) * self._io.size
1665 end += (endrev + 1) * self._io.size
1668 length = end - start
1666 length = end - start
1669
1667
1670 return start, self._getsegment(start, length, df=df)
1668 return start, self._getsegment(start, length, df=df)
1671
1669
1672 def _chunk(self, rev, df=None):
1670 def _chunk(self, rev, df=None):
1673 """Obtain a single decompressed chunk for a revision.
1671 """Obtain a single decompressed chunk for a revision.
1674
1672
1675 Accepts an integer revision and an optional already-open file handle
1673 Accepts an integer revision and an optional already-open file handle
1676 to be used for reading. If used, the seek position of the file will not
1674 to be used for reading. If used, the seek position of the file will not
1677 be preserved.
1675 be preserved.
1678
1676
1679 Returns a str holding uncompressed data for the requested revision.
1677 Returns a str holding uncompressed data for the requested revision.
1680 """
1678 """
1681 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1679 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1682
1680
1683 def _chunks(self, revs, df=None):
1681 def _chunks(self, revs, df=None):
1684 """Obtain decompressed chunks for the specified revisions.
1682 """Obtain decompressed chunks for the specified revisions.
1685
1683
1686 Accepts an iterable of numeric revisions that are assumed to be in
1684 Accepts an iterable of numeric revisions that are assumed to be in
1687 ascending order. Also accepts an optional already-open file handle
1685 ascending order. Also accepts an optional already-open file handle
1688 to be used for reading. If used, the seek position of the file will
1686 to be used for reading. If used, the seek position of the file will
1689 not be preserved.
1687 not be preserved.
1690
1688
1691 This function is similar to calling ``self._chunk()`` multiple times,
1689 This function is similar to calling ``self._chunk()`` multiple times,
1692 but is faster.
1690 but is faster.
1693
1691
1694 Returns a list with decompressed data for each requested revision.
1692 Returns a list with decompressed data for each requested revision.
1695 """
1693 """
1696 if not revs:
1694 if not revs:
1697 return []
1695 return []
1698 start = self.start
1696 start = self.start
1699 length = self.length
1697 length = self.length
1700 inline = self._inline
1698 inline = self._inline
1701 iosize = self._io.size
1699 iosize = self._io.size
1702 buffer = util.buffer
1700 buffer = util.buffer
1703
1701
1704 l = []
1702 l = []
1705 ladd = l.append
1703 ladd = l.append
1706
1704
1707 if not self._withsparseread:
1705 if not self._withsparseread:
1708 slicedchunks = (revs,)
1706 slicedchunks = (revs,)
1709 else:
1707 else:
1710 slicedchunks = _slicechunk(self, revs)
1708 slicedchunks = _slicechunk(self, revs)
1711
1709
1712 for revschunk in slicedchunks:
1710 for revschunk in slicedchunks:
1713 firstrev = revschunk[0]
1711 firstrev = revschunk[0]
1714 # Skip trailing revisions with empty diff
1712 # Skip trailing revisions with empty diff
1715 for lastrev in revschunk[::-1]:
1713 for lastrev in revschunk[::-1]:
1716 if length(lastrev) != 0:
1714 if length(lastrev) != 0:
1717 break
1715 break
1718
1716
1719 try:
1717 try:
1720 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1718 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1721 except OverflowError:
1719 except OverflowError:
1722 # issue4215 - we can't cache a run of chunks greater than
1720 # issue4215 - we can't cache a run of chunks greater than
1723 # 2G on Windows
1721 # 2G on Windows
1724 return [self._chunk(rev, df=df) for rev in revschunk]
1722 return [self._chunk(rev, df=df) for rev in revschunk]
1725
1723
1726 decomp = self.decompress
1724 decomp = self.decompress
1727 for rev in revschunk:
1725 for rev in revschunk:
1728 chunkstart = start(rev)
1726 chunkstart = start(rev)
1729 if inline:
1727 if inline:
1730 chunkstart += (rev + 1) * iosize
1728 chunkstart += (rev + 1) * iosize
1731 chunklength = length(rev)
1729 chunklength = length(rev)
1732 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1730 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1733
1731
1734 return l
1732 return l
1735
1733
1736 def _chunkclear(self):
1734 def _chunkclear(self):
1737 """Clear the raw chunk cache."""
1735 """Clear the raw chunk cache."""
1738 self._chunkcache = (0, '')
1736 self._chunkcache = (0, '')
1739
1737
1740 def deltaparent(self, rev):
1738 def deltaparent(self, rev):
1741 """return deltaparent of the given revision"""
1739 """return deltaparent of the given revision"""
1742 base = self.index[rev][3]
1740 base = self.index[rev][3]
1743 if base == rev:
1741 if base == rev:
1744 return nullrev
1742 return nullrev
1745 elif self._generaldelta:
1743 elif self._generaldelta:
1746 return base
1744 return base
1747 else:
1745 else:
1748 return rev - 1
1746 return rev - 1
1749
1747
1750 def revdiff(self, rev1, rev2):
1748 def revdiff(self, rev1, rev2):
1751 """return or calculate a delta between two revisions
1749 """return or calculate a delta between two revisions
1752
1750
1753 The delta calculated is in binary form and is intended to be written to
1751 The delta calculated is in binary form and is intended to be written to
1754 revlog data directly. So this function needs raw revision data.
1752 revlog data directly. So this function needs raw revision data.
1755 """
1753 """
1756 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1757 return bytes(self._chunk(rev2))
1755 return bytes(self._chunk(rev2))
1758
1756
1759 return mdiff.textdiff(self.revision(rev1, raw=True),
1757 return mdiff.textdiff(self.revision(rev1, raw=True),
1760 self.revision(rev2, raw=True))
1758 self.revision(rev2, raw=True))
1761
1759
1762 def revision(self, nodeorrev, _df=None, raw=False):
1760 def revision(self, nodeorrev, _df=None, raw=False):
1763 """return an uncompressed revision of a given node or revision
1761 """return an uncompressed revision of a given node or revision
1764 number.
1762 number.
1765
1763
1766 _df - an existing file handle to read from. (internal-only)
1764 _df - an existing file handle to read from. (internal-only)
1767 raw - an optional argument specifying if the revision data is to be
1765 raw - an optional argument specifying if the revision data is to be
1768 treated as raw data when applying flag transforms. 'raw' should be set
1766 treated as raw data when applying flag transforms. 'raw' should be set
1769 to True when generating changegroups or in debug commands.
1767 to True when generating changegroups or in debug commands.
1770 """
1768 """
1771 if isinstance(nodeorrev, int):
1769 if isinstance(nodeorrev, int):
1772 rev = nodeorrev
1770 rev = nodeorrev
1773 node = self.node(rev)
1771 node = self.node(rev)
1774 else:
1772 else:
1775 node = nodeorrev
1773 node = nodeorrev
1776 rev = None
1774 rev = None
1777
1775
1778 cachedrev = None
1776 cachedrev = None
1779 flags = None
1777 flags = None
1780 rawtext = None
1778 rawtext = None
1781 if node == nullid:
1779 if node == nullid:
1782 return ""
1780 return ""
1783 if self._cache:
1781 if self._cache:
1784 if self._cache[0] == node:
1782 if self._cache[0] == node:
1785 # _cache only stores rawtext
1783 # _cache only stores rawtext
1786 if raw:
1784 if raw:
1787 return self._cache[2]
1785 return self._cache[2]
1788 # duplicated, but good for perf
1786 # duplicated, but good for perf
1789 if rev is None:
1787 if rev is None:
1790 rev = self.rev(node)
1788 rev = self.rev(node)
1791 if flags is None:
1789 if flags is None:
1792 flags = self.flags(rev)
1790 flags = self.flags(rev)
1793 # no extra flags set, no flag processor runs, text = rawtext
1791 # no extra flags set, no flag processor runs, text = rawtext
1794 if flags == REVIDX_DEFAULT_FLAGS:
1792 if flags == REVIDX_DEFAULT_FLAGS:
1795 return self._cache[2]
1793 return self._cache[2]
1796 # rawtext is reusable. need to run flag processor
1794 # rawtext is reusable. need to run flag processor
1797 rawtext = self._cache[2]
1795 rawtext = self._cache[2]
1798
1796
1799 cachedrev = self._cache[1]
1797 cachedrev = self._cache[1]
1800
1798
1801 # look up what we need to read
1799 # look up what we need to read
1802 if rawtext is None:
1800 if rawtext is None:
1803 if rev is None:
1801 if rev is None:
1804 rev = self.rev(node)
1802 rev = self.rev(node)
1805
1803
1806 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1804 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1807 if stopped:
1805 if stopped:
1808 rawtext = self._cache[2]
1806 rawtext = self._cache[2]
1809
1807
1810 # drop cache to save memory
1808 # drop cache to save memory
1811 self._cache = None
1809 self._cache = None
1812
1810
1813 bins = self._chunks(chain, df=_df)
1811 bins = self._chunks(chain, df=_df)
1814 if rawtext is None:
1812 if rawtext is None:
1815 rawtext = bytes(bins[0])
1813 rawtext = bytes(bins[0])
1816 bins = bins[1:]
1814 bins = bins[1:]
1817
1815
1818 rawtext = mdiff.patches(rawtext, bins)
1816 rawtext = mdiff.patches(rawtext, bins)
1819 self._cache = (node, rev, rawtext)
1817 self._cache = (node, rev, rawtext)
1820
1818
1821 if flags is None:
1819 if flags is None:
1822 if rev is None:
1820 if rev is None:
1823 rev = self.rev(node)
1821 rev = self.rev(node)
1824 flags = self.flags(rev)
1822 flags = self.flags(rev)
1825
1823
1826 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1824 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1827 if validatehash:
1825 if validatehash:
1828 self.checkhash(text, node, rev=rev)
1826 self.checkhash(text, node, rev=rev)
1829
1827
1830 return text
1828 return text
1831
1829
1832 def hash(self, text, p1, p2):
1830 def hash(self, text, p1, p2):
1833 """Compute a node hash.
1831 """Compute a node hash.
1834
1832
1835 Available as a function so that subclasses can replace the hash
1833 Available as a function so that subclasses can replace the hash
1836 as needed.
1834 as needed.
1837 """
1835 """
1838 return hash(text, p1, p2)
1836 return hash(text, p1, p2)
1839
1837
1840 def _processflags(self, text, flags, operation, raw=False):
1838 def _processflags(self, text, flags, operation, raw=False):
1841 """Inspect revision data flags and applies transforms defined by
1839 """Inspect revision data flags and applies transforms defined by
1842 registered flag processors.
1840 registered flag processors.
1843
1841
1844 ``text`` - the revision data to process
1842 ``text`` - the revision data to process
1845 ``flags`` - the revision flags
1843 ``flags`` - the revision flags
1846 ``operation`` - the operation being performed (read or write)
1844 ``operation`` - the operation being performed (read or write)
1847 ``raw`` - an optional argument describing if the raw transform should be
1845 ``raw`` - an optional argument describing if the raw transform should be
1848 applied.
1846 applied.
1849
1847
1850 This method processes the flags in the order (or reverse order if
1848 This method processes the flags in the order (or reverse order if
1851 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1849 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1852 flag processors registered for present flags. The order of flags defined
1850 flag processors registered for present flags. The order of flags defined
1853 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1851 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1854
1852
1855 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1853 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1856 processed text and ``validatehash`` is a bool indicating whether the
1854 processed text and ``validatehash`` is a bool indicating whether the
1857 returned text should be checked for hash integrity.
1855 returned text should be checked for hash integrity.
1858
1856
1859 Note: If the ``raw`` argument is set, it has precedence over the
1857 Note: If the ``raw`` argument is set, it has precedence over the
1860 operation and will only update the value of ``validatehash``.
1858 operation and will only update the value of ``validatehash``.
1861 """
1859 """
1862 # fast path: no flag processors will run
1860 # fast path: no flag processors will run
1863 if flags == 0:
1861 if flags == 0:
1864 return text, True
1862 return text, True
1865 if not operation in ('read', 'write'):
1863 if not operation in ('read', 'write'):
1866 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1864 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1867 # Check all flags are known.
1865 # Check all flags are known.
1868 if flags & ~REVIDX_KNOWN_FLAGS:
1866 if flags & ~REVIDX_KNOWN_FLAGS:
1869 raise RevlogError(_("incompatible revision flag '%#x'") %
1867 raise RevlogError(_("incompatible revision flag '%#x'") %
1870 (flags & ~REVIDX_KNOWN_FLAGS))
1868 (flags & ~REVIDX_KNOWN_FLAGS))
1871 validatehash = True
1869 validatehash = True
1872 # Depending on the operation (read or write), the order might be
1870 # Depending on the operation (read or write), the order might be
1873 # reversed due to non-commutative transforms.
1871 # reversed due to non-commutative transforms.
1874 orderedflags = REVIDX_FLAGS_ORDER
1872 orderedflags = REVIDX_FLAGS_ORDER
1875 if operation == 'write':
1873 if operation == 'write':
1876 orderedflags = reversed(orderedflags)
1874 orderedflags = reversed(orderedflags)
1877
1875
1878 for flag in orderedflags:
1876 for flag in orderedflags:
1879 # If a flagprocessor has been registered for a known flag, apply the
1877 # If a flagprocessor has been registered for a known flag, apply the
1880 # related operation transform and update result tuple.
1878 # related operation transform and update result tuple.
1881 if flag & flags:
1879 if flag & flags:
1882 vhash = True
1880 vhash = True
1883
1881
1884 if flag not in _flagprocessors:
1882 if flag not in _flagprocessors:
1885 message = _("missing processor for flag '%#x'") % (flag)
1883 message = _("missing processor for flag '%#x'") % (flag)
1886 raise RevlogError(message)
1884 raise RevlogError(message)
1887
1885
1888 processor = _flagprocessors[flag]
1886 processor = _flagprocessors[flag]
1889 if processor is not None:
1887 if processor is not None:
1890 readtransform, writetransform, rawtransform = processor
1888 readtransform, writetransform, rawtransform = processor
1891
1889
1892 if raw:
1890 if raw:
1893 vhash = rawtransform(self, text)
1891 vhash = rawtransform(self, text)
1894 elif operation == 'read':
1892 elif operation == 'read':
1895 text, vhash = readtransform(self, text)
1893 text, vhash = readtransform(self, text)
1896 else: # write operation
1894 else: # write operation
1897 text, vhash = writetransform(self, text)
1895 text, vhash = writetransform(self, text)
1898 validatehash = validatehash and vhash
1896 validatehash = validatehash and vhash
1899
1897
1900 return text, validatehash
1898 return text, validatehash
1901
1899
1902 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1900 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1903 """Check node hash integrity.
1901 """Check node hash integrity.
1904
1902
1905 Available as a function so that subclasses can extend hash mismatch
1903 Available as a function so that subclasses can extend hash mismatch
1906 behaviors as needed.
1904 behaviors as needed.
1907 """
1905 """
1908 try:
1906 try:
1909 if p1 is None and p2 is None:
1907 if p1 is None and p2 is None:
1910 p1, p2 = self.parents(node)
1908 p1, p2 = self.parents(node)
1911 if node != self.hash(text, p1, p2):
1909 if node != self.hash(text, p1, p2):
1912 revornode = rev
1910 revornode = rev
1913 if revornode is None:
1911 if revornode is None:
1914 revornode = templatefilters.short(hex(node))
1912 revornode = templatefilters.short(hex(node))
1915 raise RevlogError(_("integrity check failed on %s:%s")
1913 raise RevlogError(_("integrity check failed on %s:%s")
1916 % (self.indexfile, pycompat.bytestr(revornode)))
1914 % (self.indexfile, pycompat.bytestr(revornode)))
1917 except RevlogError:
1915 except RevlogError:
1918 if self._censorable and _censoredtext(text):
1916 if self._censorable and _censoredtext(text):
1919 raise error.CensoredNodeError(self.indexfile, node, text)
1917 raise error.CensoredNodeError(self.indexfile, node, text)
1920 raise
1918 raise
1921
1919
1922 def _enforceinlinesize(self, tr, fp=None):
1920 def _enforceinlinesize(self, tr, fp=None):
1923 """Check if the revlog is too big for inline and convert if so.
1921 """Check if the revlog is too big for inline and convert if so.
1924
1922
1925 This should be called after revisions are added to the revlog. If the
1923 This should be called after revisions are added to the revlog. If the
1926 revlog has grown too large to be an inline revlog, it will convert it
1924 revlog has grown too large to be an inline revlog, it will convert it
1927 to use multiple index and data files.
1925 to use multiple index and data files.
1928 """
1926 """
1929 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1927 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1930 return
1928 return
1931
1929
1932 trinfo = tr.find(self.indexfile)
1930 trinfo = tr.find(self.indexfile)
1933 if trinfo is None:
1931 if trinfo is None:
1934 raise RevlogError(_("%s not found in the transaction")
1932 raise RevlogError(_("%s not found in the transaction")
1935 % self.indexfile)
1933 % self.indexfile)
1936
1934
1937 trindex = trinfo[2]
1935 trindex = trinfo[2]
1938 if trindex is not None:
1936 if trindex is not None:
1939 dataoff = self.start(trindex)
1937 dataoff = self.start(trindex)
1940 else:
1938 else:
1941 # revlog was stripped at start of transaction, use all leftover data
1939 # revlog was stripped at start of transaction, use all leftover data
1942 trindex = len(self) - 1
1940 trindex = len(self) - 1
1943 dataoff = self.end(-2)
1941 dataoff = self.end(-2)
1944
1942
1945 tr.add(self.datafile, dataoff)
1943 tr.add(self.datafile, dataoff)
1946
1944
1947 if fp:
1945 if fp:
1948 fp.flush()
1946 fp.flush()
1949 fp.close()
1947 fp.close()
1950
1948
1951 with self._datafp('w') as df:
1949 with self._datafp('w') as df:
1952 for r in self:
1950 for r in self:
1953 df.write(self._getsegmentforrevs(r, r)[1])
1951 df.write(self._getsegmentforrevs(r, r)[1])
1954
1952
1955 with self._indexfp('w') as fp:
1953 with self._indexfp('w') as fp:
1956 self.version &= ~FLAG_INLINE_DATA
1954 self.version &= ~FLAG_INLINE_DATA
1957 self._inline = False
1955 self._inline = False
1958 io = self._io
1956 io = self._io
1959 for i in self:
1957 for i in self:
1960 e = io.packentry(self.index[i], self.node, self.version, i)
1958 e = io.packentry(self.index[i], self.node, self.version, i)
1961 fp.write(e)
1959 fp.write(e)
1962
1960
1963 # the temp file replace the real index when we exit the context
1961 # the temp file replace the real index when we exit the context
1964 # manager
1962 # manager
1965
1963
1966 tr.replace(self.indexfile, trindex * self._io.size)
1964 tr.replace(self.indexfile, trindex * self._io.size)
1967 self._chunkclear()
1965 self._chunkclear()
1968
1966
1969 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1967 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1970 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1968 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1971 """add a revision to the log
1969 """add a revision to the log
1972
1970
1973 text - the revision data to add
1971 text - the revision data to add
1974 transaction - the transaction object used for rollback
1972 transaction - the transaction object used for rollback
1975 link - the linkrev data to add
1973 link - the linkrev data to add
1976 p1, p2 - the parent nodeids of the revision
1974 p1, p2 - the parent nodeids of the revision
1977 cachedelta - an optional precomputed delta
1975 cachedelta - an optional precomputed delta
1978 node - nodeid of revision; typically node is not specified, and it is
1976 node - nodeid of revision; typically node is not specified, and it is
1979 computed by default as hash(text, p1, p2), however subclasses might
1977 computed by default as hash(text, p1, p2), however subclasses might
1980 use different hashing method (and override checkhash() in such case)
1978 use different hashing method (and override checkhash() in such case)
1981 flags - the known flags to set on the revision
1979 flags - the known flags to set on the revision
1982 deltacomputer - an optional _deltacomputer instance shared between
1980 deltacomputer - an optional _deltacomputer instance shared between
1983 multiple calls
1981 multiple calls
1984 """
1982 """
1985 if link == nullrev:
1983 if link == nullrev:
1986 raise RevlogError(_("attempted to add linkrev -1 to %s")
1984 raise RevlogError(_("attempted to add linkrev -1 to %s")
1987 % self.indexfile)
1985 % self.indexfile)
1988
1986
1989 if flags:
1987 if flags:
1990 node = node or self.hash(text, p1, p2)
1988 node = node or self.hash(text, p1, p2)
1991
1989
1992 rawtext, validatehash = self._processflags(text, flags, 'write')
1990 rawtext, validatehash = self._processflags(text, flags, 'write')
1993
1991
1994 # If the flag processor modifies the revision data, ignore any provided
1992 # If the flag processor modifies the revision data, ignore any provided
1995 # cachedelta.
1993 # cachedelta.
1996 if rawtext != text:
1994 if rawtext != text:
1997 cachedelta = None
1995 cachedelta = None
1998
1996
1999 if len(rawtext) > _maxentrysize:
1997 if len(rawtext) > _maxentrysize:
2000 raise RevlogError(
1998 raise RevlogError(
2001 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1999 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2002 % (self.indexfile, len(rawtext)))
2000 % (self.indexfile, len(rawtext)))
2003
2001
2004 node = node or self.hash(rawtext, p1, p2)
2002 node = node or self.hash(rawtext, p1, p2)
2005 if node in self.nodemap:
2003 if node in self.nodemap:
2006 return node
2004 return node
2007
2005
2008 if validatehash:
2006 if validatehash:
2009 self.checkhash(rawtext, node, p1=p1, p2=p2)
2007 self.checkhash(rawtext, node, p1=p1, p2=p2)
2010
2008
2011 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2009 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2012 flags, cachedelta=cachedelta,
2010 flags, cachedelta=cachedelta,
2013 deltacomputer=deltacomputer)
2011 deltacomputer=deltacomputer)
2014
2012
2015 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2013 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2016 cachedelta=None, deltacomputer=None):
2014 cachedelta=None, deltacomputer=None):
2017 """add a raw revision with known flags, node and parents
2015 """add a raw revision with known flags, node and parents
2018 useful when reusing a revision not stored in this revlog (ex: received
2016 useful when reusing a revision not stored in this revlog (ex: received
2019 over wire, or read from an external bundle).
2017 over wire, or read from an external bundle).
2020 """
2018 """
2021 dfh = None
2019 dfh = None
2022 if not self._inline:
2020 if not self._inline:
2023 dfh = self._datafp("a+")
2021 dfh = self._datafp("a+")
2024 ifh = self._indexfp("a+")
2022 ifh = self._indexfp("a+")
2025 try:
2023 try:
2026 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2024 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2027 flags, cachedelta, ifh, dfh,
2025 flags, cachedelta, ifh, dfh,
2028 deltacomputer=deltacomputer)
2026 deltacomputer=deltacomputer)
2029 finally:
2027 finally:
2030 if dfh:
2028 if dfh:
2031 dfh.close()
2029 dfh.close()
2032 ifh.close()
2030 ifh.close()
2033
2031
2034 def compress(self, data):
2032 def compress(self, data):
2035 """Generate a possibly-compressed representation of data."""
2033 """Generate a possibly-compressed representation of data."""
2036 if not data:
2034 if not data:
2037 return '', data
2035 return '', data
2038
2036
2039 compressed = self._compressor.compress(data)
2037 compressed = self._compressor.compress(data)
2040
2038
2041 if compressed:
2039 if compressed:
2042 # The revlog compressor added the header in the returned data.
2040 # The revlog compressor added the header in the returned data.
2043 return '', compressed
2041 return '', compressed
2044
2042
2045 if data[0:1] == '\0':
2043 if data[0:1] == '\0':
2046 return '', data
2044 return '', data
2047 return 'u', data
2045 return 'u', data
2048
2046
2049 def decompress(self, data):
2047 def decompress(self, data):
2050 """Decompress a revlog chunk.
2048 """Decompress a revlog chunk.
2051
2049
2052 The chunk is expected to begin with a header identifying the
2050 The chunk is expected to begin with a header identifying the
2053 format type so it can be routed to an appropriate decompressor.
2051 format type so it can be routed to an appropriate decompressor.
2054 """
2052 """
2055 if not data:
2053 if not data:
2056 return data
2054 return data
2057
2055
2058 # Revlogs are read much more frequently than they are written and many
2056 # Revlogs are read much more frequently than they are written and many
2059 # chunks only take microseconds to decompress, so performance is
2057 # chunks only take microseconds to decompress, so performance is
2060 # important here.
2058 # important here.
2061 #
2059 #
2062 # We can make a few assumptions about revlogs:
2060 # We can make a few assumptions about revlogs:
2063 #
2061 #
2064 # 1) the majority of chunks will be compressed (as opposed to inline
2062 # 1) the majority of chunks will be compressed (as opposed to inline
2065 # raw data).
2063 # raw data).
2066 # 2) decompressing *any* data will likely by at least 10x slower than
2064 # 2) decompressing *any* data will likely by at least 10x slower than
2067 # returning raw inline data.
2065 # returning raw inline data.
2068 # 3) we want to prioritize common and officially supported compression
2066 # 3) we want to prioritize common and officially supported compression
2069 # engines
2067 # engines
2070 #
2068 #
2071 # It follows that we want to optimize for "decompress compressed data
2069 # It follows that we want to optimize for "decompress compressed data
2072 # when encoded with common and officially supported compression engines"
2070 # when encoded with common and officially supported compression engines"
2073 # case over "raw data" and "data encoded by less common or non-official
2071 # case over "raw data" and "data encoded by less common or non-official
2074 # compression engines." That is why we have the inline lookup first
2072 # compression engines." That is why we have the inline lookup first
2075 # followed by the compengines lookup.
2073 # followed by the compengines lookup.
2076 #
2074 #
2077 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2075 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2078 # compressed chunks. And this matters for changelog and manifest reads.
2076 # compressed chunks. And this matters for changelog and manifest reads.
2079 t = data[0:1]
2077 t = data[0:1]
2080
2078
2081 if t == 'x':
2079 if t == 'x':
2082 try:
2080 try:
2083 return _zlibdecompress(data)
2081 return _zlibdecompress(data)
2084 except zlib.error as e:
2082 except zlib.error as e:
2085 raise RevlogError(_('revlog decompress error: %s') %
2083 raise RevlogError(_('revlog decompress error: %s') %
2086 stringutil.forcebytestr(e))
2084 stringutil.forcebytestr(e))
2087 # '\0' is more common than 'u' so it goes first.
2085 # '\0' is more common than 'u' so it goes first.
2088 elif t == '\0':
2086 elif t == '\0':
2089 return data
2087 return data
2090 elif t == 'u':
2088 elif t == 'u':
2091 return util.buffer(data, 1)
2089 return util.buffer(data, 1)
2092
2090
2093 try:
2091 try:
2094 compressor = self._decompressors[t]
2092 compressor = self._decompressors[t]
2095 except KeyError:
2093 except KeyError:
2096 try:
2094 try:
2097 engine = util.compengines.forrevlogheader(t)
2095 engine = util.compengines.forrevlogheader(t)
2098 compressor = engine.revlogcompressor()
2096 compressor = engine.revlogcompressor()
2099 self._decompressors[t] = compressor
2097 self._decompressors[t] = compressor
2100 except KeyError:
2098 except KeyError:
2101 raise RevlogError(_('unknown compression type %r') % t)
2099 raise RevlogError(_('unknown compression type %r') % t)
2102
2100
2103 return compressor.decompress(data)
2101 return compressor.decompress(data)
2104
2102
2105 def _isgooddeltainfo(self, deltainfo, revinfo):
2103 def _isgooddeltainfo(self, deltainfo, revinfo):
2106 """Returns True if the given delta is good. Good means that it is within
2104 """Returns True if the given delta is good. Good means that it is within
2107 the disk span, disk size, and chain length bounds that we know to be
2105 the disk span, disk size, and chain length bounds that we know to be
2108 performant."""
2106 performant."""
2109 if deltainfo is None:
2107 if deltainfo is None:
2110 return False
2108 return False
2111
2109
2112 # - 'deltainfo.distance' is the distance from the base revision --
2110 # - 'deltainfo.distance' is the distance from the base revision --
2113 # bounding it limits the amount of I/O we need to do.
2111 # bounding it limits the amount of I/O we need to do.
2114 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2112 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2115 # deltas we need to apply -- bounding it limits the amount of CPU
2113 # deltas we need to apply -- bounding it limits the amount of CPU
2116 # we consume.
2114 # we consume.
2117
2115
2118 textlen = revinfo.textlen
2116 textlen = revinfo.textlen
2119 defaultmax = textlen * 4
2117 defaultmax = textlen * 4
2120 maxdist = self._maxdeltachainspan
2118 maxdist = self._maxdeltachainspan
2121 if not maxdist:
2119 if not maxdist:
2122 maxdist = deltainfo.distance # ensure the conditional pass
2120 maxdist = deltainfo.distance # ensure the conditional pass
2123 maxdist = max(maxdist, defaultmax)
2121 maxdist = max(maxdist, defaultmax)
2124 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2122 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2125 deltainfo.compresseddeltalen > textlen * 2 or
2123 deltainfo.compresseddeltalen > textlen * 2 or
2126 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2124 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2127 return False
2125 return False
2128
2126
2129 return True
2127 return True
2130
2128
2131 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2129 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2132 cachedelta, ifh, dfh, alwayscache=False,
2130 cachedelta, ifh, dfh, alwayscache=False,
2133 deltacomputer=None):
2131 deltacomputer=None):
2134 """internal function to add revisions to the log
2132 """internal function to add revisions to the log
2135
2133
2136 see addrevision for argument descriptions.
2134 see addrevision for argument descriptions.
2137
2135
2138 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2136 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2139
2137
2140 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2138 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2141 be used.
2139 be used.
2142
2140
2143 invariants:
2141 invariants:
2144 - rawtext is optional (can be None); if not set, cachedelta must be set.
2142 - rawtext is optional (can be None); if not set, cachedelta must be set.
2145 if both are set, they must correspond to each other.
2143 if both are set, they must correspond to each other.
2146 """
2144 """
2147 if node == nullid:
2145 if node == nullid:
2148 raise RevlogError(_("%s: attempt to add null revision") %
2146 raise RevlogError(_("%s: attempt to add null revision") %
2149 (self.indexfile))
2147 (self.indexfile))
2150 if node == wdirid or node in wdirfilenodeids:
2148 if node == wdirid or node in wdirfilenodeids:
2151 raise RevlogError(_("%s: attempt to add wdir revision") %
2149 raise RevlogError(_("%s: attempt to add wdir revision") %
2152 (self.indexfile))
2150 (self.indexfile))
2153
2151
2154 if self._inline:
2152 if self._inline:
2155 fh = ifh
2153 fh = ifh
2156 else:
2154 else:
2157 fh = dfh
2155 fh = dfh
2158
2156
2159 btext = [rawtext]
2157 btext = [rawtext]
2160
2158
2161 curr = len(self)
2159 curr = len(self)
2162 prev = curr - 1
2160 prev = curr - 1
2163 offset = self.end(prev)
2161 offset = self.end(prev)
2164 p1r, p2r = self.rev(p1), self.rev(p2)
2162 p1r, p2r = self.rev(p1), self.rev(p2)
2165
2163
2166 # full versions are inserted when the needed deltas
2164 # full versions are inserted when the needed deltas
2167 # become comparable to the uncompressed text
2165 # become comparable to the uncompressed text
2168 if rawtext is None:
2166 if rawtext is None:
2169 # need rawtext size, before changed by flag processors, which is
2167 # need rawtext size, before changed by flag processors, which is
2170 # the non-raw size. use revlog explicitly to avoid filelog's extra
2168 # the non-raw size. use revlog explicitly to avoid filelog's extra
2171 # logic that might remove metadata size.
2169 # logic that might remove metadata size.
2172 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2170 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2173 cachedelta[1])
2171 cachedelta[1])
2174 else:
2172 else:
2175 textlen = len(rawtext)
2173 textlen = len(rawtext)
2176
2174
2177 if deltacomputer is None:
2175 if deltacomputer is None:
2178 deltacomputer = _deltacomputer(self)
2176 deltacomputer = _deltacomputer(self)
2179
2177
2180 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2178 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2181
2179
2182 # no delta for flag processor revision (see "candelta" for why)
2180 # no delta for flag processor revision (see "candelta" for why)
2183 # not calling candelta since only one revision needs test, also to
2181 # not calling candelta since only one revision needs test, also to
2184 # avoid overhead fetching flags again.
2182 # avoid overhead fetching flags again.
2185 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2183 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2186 deltainfo = None
2184 deltainfo = None
2187 else:
2185 else:
2188 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2186 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2189
2187
2190 if deltainfo is not None:
2188 if deltainfo is not None:
2191 base = deltainfo.base
2189 base = deltainfo.base
2192 chainbase = deltainfo.chainbase
2190 chainbase = deltainfo.chainbase
2193 data = deltainfo.data
2191 data = deltainfo.data
2194 l = deltainfo.deltalen
2192 l = deltainfo.deltalen
2195 else:
2193 else:
2196 rawtext = deltacomputer.buildtext(revinfo, fh)
2194 rawtext = deltacomputer.buildtext(revinfo, fh)
2197 data = self.compress(rawtext)
2195 data = self.compress(rawtext)
2198 l = len(data[1]) + len(data[0])
2196 l = len(data[1]) + len(data[0])
2199 base = chainbase = curr
2197 base = chainbase = curr
2200
2198
2201 e = (offset_type(offset, flags), l, textlen,
2199 e = (offset_type(offset, flags), l, textlen,
2202 base, link, p1r, p2r, node)
2200 base, link, p1r, p2r, node)
2203 self.index.insert(-1, e)
2201 self.index.insert(-1, e)
2204 self.nodemap[node] = curr
2202 self.nodemap[node] = curr
2205
2203
2206 entry = self._io.packentry(e, self.node, self.version, curr)
2204 entry = self._io.packentry(e, self.node, self.version, curr)
2207 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2205 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2208
2206
2209 if alwayscache and rawtext is None:
2207 if alwayscache and rawtext is None:
2210 rawtext = deltacomputer._buildtext(revinfo, fh)
2208 rawtext = deltacomputer._buildtext(revinfo, fh)
2211
2209
2212 if type(rawtext) == bytes: # only accept immutable objects
2210 if type(rawtext) == bytes: # only accept immutable objects
2213 self._cache = (node, curr, rawtext)
2211 self._cache = (node, curr, rawtext)
2214 self._chainbasecache[curr] = chainbase
2212 self._chainbasecache[curr] = chainbase
2215 return node
2213 return node
2216
2214
2217 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2215 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2218 # Files opened in a+ mode have inconsistent behavior on various
2216 # Files opened in a+ mode have inconsistent behavior on various
2219 # platforms. Windows requires that a file positioning call be made
2217 # platforms. Windows requires that a file positioning call be made
2220 # when the file handle transitions between reads and writes. See
2218 # when the file handle transitions between reads and writes. See
2221 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2219 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2222 # platforms, Python or the platform itself can be buggy. Some versions
2220 # platforms, Python or the platform itself can be buggy. Some versions
2223 # of Solaris have been observed to not append at the end of the file
2221 # of Solaris have been observed to not append at the end of the file
2224 # if the file was seeked to before the end. See issue4943 for more.
2222 # if the file was seeked to before the end. See issue4943 for more.
2225 #
2223 #
2226 # We work around this issue by inserting a seek() before writing.
2224 # We work around this issue by inserting a seek() before writing.
2227 # Note: This is likely not necessary on Python 3.
2225 # Note: This is likely not necessary on Python 3.
2228 ifh.seek(0, os.SEEK_END)
2226 ifh.seek(0, os.SEEK_END)
2229 if dfh:
2227 if dfh:
2230 dfh.seek(0, os.SEEK_END)
2228 dfh.seek(0, os.SEEK_END)
2231
2229
2232 curr = len(self) - 1
2230 curr = len(self) - 1
2233 if not self._inline:
2231 if not self._inline:
2234 transaction.add(self.datafile, offset)
2232 transaction.add(self.datafile, offset)
2235 transaction.add(self.indexfile, curr * len(entry))
2233 transaction.add(self.indexfile, curr * len(entry))
2236 if data[0]:
2234 if data[0]:
2237 dfh.write(data[0])
2235 dfh.write(data[0])
2238 dfh.write(data[1])
2236 dfh.write(data[1])
2239 ifh.write(entry)
2237 ifh.write(entry)
2240 else:
2238 else:
2241 offset += curr * self._io.size
2239 offset += curr * self._io.size
2242 transaction.add(self.indexfile, offset, curr)
2240 transaction.add(self.indexfile, offset, curr)
2243 ifh.write(entry)
2241 ifh.write(entry)
2244 ifh.write(data[0])
2242 ifh.write(data[0])
2245 ifh.write(data[1])
2243 ifh.write(data[1])
2246 self._enforceinlinesize(transaction, ifh)
2244 self._enforceinlinesize(transaction, ifh)
2247
2245
2248 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2246 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2249 """
2247 """
2250 add a delta group
2248 add a delta group
2251
2249
2252 given a set of deltas, add them to the revision log. the
2250 given a set of deltas, add them to the revision log. the
2253 first delta is against its parent, which should be in our
2251 first delta is against its parent, which should be in our
2254 log, the rest are against the previous delta.
2252 log, the rest are against the previous delta.
2255
2253
2256 If ``addrevisioncb`` is defined, it will be called with arguments of
2254 If ``addrevisioncb`` is defined, it will be called with arguments of
2257 this revlog and the node that was added.
2255 this revlog and the node that was added.
2258 """
2256 """
2259
2257
2260 nodes = []
2258 nodes = []
2261
2259
2262 r = len(self)
2260 r = len(self)
2263 end = 0
2261 end = 0
2264 if r:
2262 if r:
2265 end = self.end(r - 1)
2263 end = self.end(r - 1)
2266 ifh = self._indexfp("a+")
2264 ifh = self._indexfp("a+")
2267 isize = r * self._io.size
2265 isize = r * self._io.size
2268 if self._inline:
2266 if self._inline:
2269 transaction.add(self.indexfile, end + isize, r)
2267 transaction.add(self.indexfile, end + isize, r)
2270 dfh = None
2268 dfh = None
2271 else:
2269 else:
2272 transaction.add(self.indexfile, isize, r)
2270 transaction.add(self.indexfile, isize, r)
2273 transaction.add(self.datafile, end)
2271 transaction.add(self.datafile, end)
2274 dfh = self._datafp("a+")
2272 dfh = self._datafp("a+")
2275 def flush():
2273 def flush():
2276 if dfh:
2274 if dfh:
2277 dfh.flush()
2275 dfh.flush()
2278 ifh.flush()
2276 ifh.flush()
2279 try:
2277 try:
2280 deltacomputer = _deltacomputer(self)
2278 deltacomputer = _deltacomputer(self)
2281 # loop through our set of deltas
2279 # loop through our set of deltas
2282 for data in deltas:
2280 for data in deltas:
2283 node, p1, p2, linknode, deltabase, delta, flags = data
2281 node, p1, p2, linknode, deltabase, delta, flags = data
2284 link = linkmapper(linknode)
2282 link = linkmapper(linknode)
2285 flags = flags or REVIDX_DEFAULT_FLAGS
2283 flags = flags or REVIDX_DEFAULT_FLAGS
2286
2284
2287 nodes.append(node)
2285 nodes.append(node)
2288
2286
2289 if node in self.nodemap:
2287 if node in self.nodemap:
2290 # this can happen if two branches make the same change
2288 # this can happen if two branches make the same change
2291 continue
2289 continue
2292
2290
2293 for p in (p1, p2):
2291 for p in (p1, p2):
2294 if p not in self.nodemap:
2292 if p not in self.nodemap:
2295 raise LookupError(p, self.indexfile,
2293 raise LookupError(p, self.indexfile,
2296 _('unknown parent'))
2294 _('unknown parent'))
2297
2295
2298 if deltabase not in self.nodemap:
2296 if deltabase not in self.nodemap:
2299 raise LookupError(deltabase, self.indexfile,
2297 raise LookupError(deltabase, self.indexfile,
2300 _('unknown delta base'))
2298 _('unknown delta base'))
2301
2299
2302 baserev = self.rev(deltabase)
2300 baserev = self.rev(deltabase)
2303
2301
2304 if baserev != nullrev and self.iscensored(baserev):
2302 if baserev != nullrev and self.iscensored(baserev):
2305 # if base is censored, delta must be full replacement in a
2303 # if base is censored, delta must be full replacement in a
2306 # single patch operation
2304 # single patch operation
2307 hlen = struct.calcsize(">lll")
2305 hlen = struct.calcsize(">lll")
2308 oldlen = self.rawsize(baserev)
2306 oldlen = self.rawsize(baserev)
2309 newlen = len(delta) - hlen
2307 newlen = len(delta) - hlen
2310 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2308 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2311 raise error.CensoredBaseError(self.indexfile,
2309 raise error.CensoredBaseError(self.indexfile,
2312 self.node(baserev))
2310 self.node(baserev))
2313
2311
2314 if not flags and self._peek_iscensored(baserev, delta, flush):
2312 if not flags and self._peek_iscensored(baserev, delta, flush):
2315 flags |= REVIDX_ISCENSORED
2313 flags |= REVIDX_ISCENSORED
2316
2314
2317 # We assume consumers of addrevisioncb will want to retrieve
2315 # We assume consumers of addrevisioncb will want to retrieve
2318 # the added revision, which will require a call to
2316 # the added revision, which will require a call to
2319 # revision(). revision() will fast path if there is a cache
2317 # revision(). revision() will fast path if there is a cache
2320 # hit. So, we tell _addrevision() to always cache in this case.
2318 # hit. So, we tell _addrevision() to always cache in this case.
2321 # We're only using addgroup() in the context of changegroup
2319 # We're only using addgroup() in the context of changegroup
2322 # generation so the revision data can always be handled as raw
2320 # generation so the revision data can always be handled as raw
2323 # by the flagprocessor.
2321 # by the flagprocessor.
2324 self._addrevision(node, None, transaction, link,
2322 self._addrevision(node, None, transaction, link,
2325 p1, p2, flags, (baserev, delta),
2323 p1, p2, flags, (baserev, delta),
2326 ifh, dfh,
2324 ifh, dfh,
2327 alwayscache=bool(addrevisioncb),
2325 alwayscache=bool(addrevisioncb),
2328 deltacomputer=deltacomputer)
2326 deltacomputer=deltacomputer)
2329
2327
2330 if addrevisioncb:
2328 if addrevisioncb:
2331 addrevisioncb(self, node)
2329 addrevisioncb(self, node)
2332
2330
2333 if not dfh and not self._inline:
2331 if not dfh and not self._inline:
2334 # addrevision switched from inline to conventional
2332 # addrevision switched from inline to conventional
2335 # reopen the index
2333 # reopen the index
2336 ifh.close()
2334 ifh.close()
2337 dfh = self._datafp("a+")
2335 dfh = self._datafp("a+")
2338 ifh = self._indexfp("a+")
2336 ifh = self._indexfp("a+")
2339 finally:
2337 finally:
2340 if dfh:
2338 if dfh:
2341 dfh.close()
2339 dfh.close()
2342 ifh.close()
2340 ifh.close()
2343
2341
2344 return nodes
2342 return nodes
2345
2343
2346 def iscensored(self, rev):
2344 def iscensored(self, rev):
2347 """Check if a file revision is censored."""
2345 """Check if a file revision is censored."""
2348 if not self._censorable:
2346 if not self._censorable:
2349 return False
2347 return False
2350
2348
2351 return self.flags(rev) & REVIDX_ISCENSORED
2349 return self.flags(rev) & REVIDX_ISCENSORED
2352
2350
2353 def _peek_iscensored(self, baserev, delta, flush):
2351 def _peek_iscensored(self, baserev, delta, flush):
2354 """Quickly check if a delta produces a censored revision."""
2352 """Quickly check if a delta produces a censored revision."""
2355 if not self._censorable:
2353 if not self._censorable:
2356 return False
2354 return False
2357
2355
2358 # Fragile heuristic: unless new file meta keys are added alphabetically
2356 # Fragile heuristic: unless new file meta keys are added alphabetically
2359 # preceding "censored", all censored revisions are prefixed by
2357 # preceding "censored", all censored revisions are prefixed by
2360 # "\1\ncensored:". A delta producing such a censored revision must be a
2358 # "\1\ncensored:". A delta producing such a censored revision must be a
2361 # full-replacement delta, so we inspect the first and only patch in the
2359 # full-replacement delta, so we inspect the first and only patch in the
2362 # delta for this prefix.
2360 # delta for this prefix.
2363 hlen = struct.calcsize(">lll")
2361 hlen = struct.calcsize(">lll")
2364 if len(delta) <= hlen:
2362 if len(delta) <= hlen:
2365 return False
2363 return False
2366
2364
2367 oldlen = self.rawsize(baserev)
2365 oldlen = self.rawsize(baserev)
2368 newlen = len(delta) - hlen
2366 newlen = len(delta) - hlen
2369 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2367 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2370 return False
2368 return False
2371
2369
2372 add = "\1\ncensored:"
2370 add = "\1\ncensored:"
2373 addlen = len(add)
2371 addlen = len(add)
2374 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2372 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2375
2373
2376 def getstrippoint(self, minlink):
2374 def getstrippoint(self, minlink):
2377 """find the minimum rev that must be stripped to strip the linkrev
2375 """find the minimum rev that must be stripped to strip the linkrev
2378
2376
2379 Returns a tuple containing the minimum rev and a set of all revs that
2377 Returns a tuple containing the minimum rev and a set of all revs that
2380 have linkrevs that will be broken by this strip.
2378 have linkrevs that will be broken by this strip.
2381 """
2379 """
2382 brokenrevs = set()
2380 brokenrevs = set()
2383 strippoint = len(self)
2381 strippoint = len(self)
2384
2382
2385 heads = {}
2383 heads = {}
2386 futurelargelinkrevs = set()
2384 futurelargelinkrevs = set()
2387 for head in self.headrevs():
2385 for head in self.headrevs():
2388 headlinkrev = self.linkrev(head)
2386 headlinkrev = self.linkrev(head)
2389 heads[head] = headlinkrev
2387 heads[head] = headlinkrev
2390 if headlinkrev >= minlink:
2388 if headlinkrev >= minlink:
2391 futurelargelinkrevs.add(headlinkrev)
2389 futurelargelinkrevs.add(headlinkrev)
2392
2390
2393 # This algorithm involves walking down the rev graph, starting at the
2391 # This algorithm involves walking down the rev graph, starting at the
2394 # heads. Since the revs are topologically sorted according to linkrev,
2392 # heads. Since the revs are topologically sorted according to linkrev,
2395 # once all head linkrevs are below the minlink, we know there are
2393 # once all head linkrevs are below the minlink, we know there are
2396 # no more revs that could have a linkrev greater than minlink.
2394 # no more revs that could have a linkrev greater than minlink.
2397 # So we can stop walking.
2395 # So we can stop walking.
2398 while futurelargelinkrevs:
2396 while futurelargelinkrevs:
2399 strippoint -= 1
2397 strippoint -= 1
2400 linkrev = heads.pop(strippoint)
2398 linkrev = heads.pop(strippoint)
2401
2399
2402 if linkrev < minlink:
2400 if linkrev < minlink:
2403 brokenrevs.add(strippoint)
2401 brokenrevs.add(strippoint)
2404 else:
2402 else:
2405 futurelargelinkrevs.remove(linkrev)
2403 futurelargelinkrevs.remove(linkrev)
2406
2404
2407 for p in self.parentrevs(strippoint):
2405 for p in self.parentrevs(strippoint):
2408 if p != nullrev:
2406 if p != nullrev:
2409 plinkrev = self.linkrev(p)
2407 plinkrev = self.linkrev(p)
2410 heads[p] = plinkrev
2408 heads[p] = plinkrev
2411 if plinkrev >= minlink:
2409 if plinkrev >= minlink:
2412 futurelargelinkrevs.add(plinkrev)
2410 futurelargelinkrevs.add(plinkrev)
2413
2411
2414 return strippoint, brokenrevs
2412 return strippoint, brokenrevs
2415
2413
2416 def strip(self, minlink, transaction):
2414 def strip(self, minlink, transaction):
2417 """truncate the revlog on the first revision with a linkrev >= minlink
2415 """truncate the revlog on the first revision with a linkrev >= minlink
2418
2416
2419 This function is called when we're stripping revision minlink and
2417 This function is called when we're stripping revision minlink and
2420 its descendants from the repository.
2418 its descendants from the repository.
2421
2419
2422 We have to remove all revisions with linkrev >= minlink, because
2420 We have to remove all revisions with linkrev >= minlink, because
2423 the equivalent changelog revisions will be renumbered after the
2421 the equivalent changelog revisions will be renumbered after the
2424 strip.
2422 strip.
2425
2423
2426 So we truncate the revlog on the first of these revisions, and
2424 So we truncate the revlog on the first of these revisions, and
2427 trust that the caller has saved the revisions that shouldn't be
2425 trust that the caller has saved the revisions that shouldn't be
2428 removed and that it'll re-add them after this truncation.
2426 removed and that it'll re-add them after this truncation.
2429 """
2427 """
2430 if len(self) == 0:
2428 if len(self) == 0:
2431 return
2429 return
2432
2430
2433 rev, _ = self.getstrippoint(minlink)
2431 rev, _ = self.getstrippoint(minlink)
2434 if rev == len(self):
2432 if rev == len(self):
2435 return
2433 return
2436
2434
2437 # first truncate the files on disk
2435 # first truncate the files on disk
2438 end = self.start(rev)
2436 end = self.start(rev)
2439 if not self._inline:
2437 if not self._inline:
2440 transaction.add(self.datafile, end)
2438 transaction.add(self.datafile, end)
2441 end = rev * self._io.size
2439 end = rev * self._io.size
2442 else:
2440 else:
2443 end += rev * self._io.size
2441 end += rev * self._io.size
2444
2442
2445 transaction.add(self.indexfile, end)
2443 transaction.add(self.indexfile, end)
2446
2444
2447 # then reset internal state in memory to forget those revisions
2445 # then reset internal state in memory to forget those revisions
2448 self._cache = None
2446 self._cache = None
2449 self._chaininfocache = {}
2447 self._chaininfocache = {}
2450 self._chunkclear()
2448 self._chunkclear()
2451 for x in xrange(rev, len(self)):
2449 for x in xrange(rev, len(self)):
2452 del self.nodemap[self.node(x)]
2450 del self.nodemap[self.node(x)]
2453
2451
2454 del self.index[rev:-1]
2452 del self.index[rev:-1]
2455 self._nodepos = None
2453 self._nodepos = None
2456
2454
2457 def checksize(self):
2455 def checksize(self):
2458 expected = 0
2456 expected = 0
2459 if len(self):
2457 if len(self):
2460 expected = max(0, self.end(len(self) - 1))
2458 expected = max(0, self.end(len(self) - 1))
2461
2459
2462 try:
2460 try:
2463 with self._datafp() as f:
2461 with self._datafp() as f:
2464 f.seek(0, 2)
2462 f.seek(0, 2)
2465 actual = f.tell()
2463 actual = f.tell()
2466 dd = actual - expected
2464 dd = actual - expected
2467 except IOError as inst:
2465 except IOError as inst:
2468 if inst.errno != errno.ENOENT:
2466 if inst.errno != errno.ENOENT:
2469 raise
2467 raise
2470 dd = 0
2468 dd = 0
2471
2469
2472 try:
2470 try:
2473 f = self.opener(self.indexfile)
2471 f = self.opener(self.indexfile)
2474 f.seek(0, 2)
2472 f.seek(0, 2)
2475 actual = f.tell()
2473 actual = f.tell()
2476 f.close()
2474 f.close()
2477 s = self._io.size
2475 s = self._io.size
2478 i = max(0, actual // s)
2476 i = max(0, actual // s)
2479 di = actual - (i * s)
2477 di = actual - (i * s)
2480 if self._inline:
2478 if self._inline:
2481 databytes = 0
2479 databytes = 0
2482 for r in self:
2480 for r in self:
2483 databytes += max(0, self.length(r))
2481 databytes += max(0, self.length(r))
2484 dd = 0
2482 dd = 0
2485 di = actual - len(self) * s - databytes
2483 di = actual - len(self) * s - databytes
2486 except IOError as inst:
2484 except IOError as inst:
2487 if inst.errno != errno.ENOENT:
2485 if inst.errno != errno.ENOENT:
2488 raise
2486 raise
2489 di = 0
2487 di = 0
2490
2488
2491 return (dd, di)
2489 return (dd, di)
2492
2490
2493 def files(self):
2491 def files(self):
2494 res = [self.indexfile]
2492 res = [self.indexfile]
2495 if not self._inline:
2493 if not self._inline:
2496 res.append(self.datafile)
2494 res.append(self.datafile)
2497 return res
2495 return res
2498
2496
2499 DELTAREUSEALWAYS = 'always'
2497 DELTAREUSEALWAYS = 'always'
2500 DELTAREUSESAMEREVS = 'samerevs'
2498 DELTAREUSESAMEREVS = 'samerevs'
2501 DELTAREUSENEVER = 'never'
2499 DELTAREUSENEVER = 'never'
2502
2500
2503 DELTAREUSEFULLADD = 'fulladd'
2501 DELTAREUSEFULLADD = 'fulladd'
2504
2502
2505 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2503 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2506
2504
2507 def clone(self, tr, destrevlog, addrevisioncb=None,
2505 def clone(self, tr, destrevlog, addrevisioncb=None,
2508 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2506 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2509 """Copy this revlog to another, possibly with format changes.
2507 """Copy this revlog to another, possibly with format changes.
2510
2508
2511 The destination revlog will contain the same revisions and nodes.
2509 The destination revlog will contain the same revisions and nodes.
2512 However, it may not be bit-for-bit identical due to e.g. delta encoding
2510 However, it may not be bit-for-bit identical due to e.g. delta encoding
2513 differences.
2511 differences.
2514
2512
2515 The ``deltareuse`` argument control how deltas from the existing revlog
2513 The ``deltareuse`` argument control how deltas from the existing revlog
2516 are preserved in the destination revlog. The argument can have the
2514 are preserved in the destination revlog. The argument can have the
2517 following values:
2515 following values:
2518
2516
2519 DELTAREUSEALWAYS
2517 DELTAREUSEALWAYS
2520 Deltas will always be reused (if possible), even if the destination
2518 Deltas will always be reused (if possible), even if the destination
2521 revlog would not select the same revisions for the delta. This is the
2519 revlog would not select the same revisions for the delta. This is the
2522 fastest mode of operation.
2520 fastest mode of operation.
2523 DELTAREUSESAMEREVS
2521 DELTAREUSESAMEREVS
2524 Deltas will be reused if the destination revlog would pick the same
2522 Deltas will be reused if the destination revlog would pick the same
2525 revisions for the delta. This mode strikes a balance between speed
2523 revisions for the delta. This mode strikes a balance between speed
2526 and optimization.
2524 and optimization.
2527 DELTAREUSENEVER
2525 DELTAREUSENEVER
2528 Deltas will never be reused. This is the slowest mode of execution.
2526 Deltas will never be reused. This is the slowest mode of execution.
2529 This mode can be used to recompute deltas (e.g. if the diff/delta
2527 This mode can be used to recompute deltas (e.g. if the diff/delta
2530 algorithm changes).
2528 algorithm changes).
2531
2529
2532 Delta computation can be slow, so the choice of delta reuse policy can
2530 Delta computation can be slow, so the choice of delta reuse policy can
2533 significantly affect run time.
2531 significantly affect run time.
2534
2532
2535 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2533 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2536 two extremes. Deltas will be reused if they are appropriate. But if the
2534 two extremes. Deltas will be reused if they are appropriate. But if the
2537 delta could choose a better revision, it will do so. This means if you
2535 delta could choose a better revision, it will do so. This means if you
2538 are converting a non-generaldelta revlog to a generaldelta revlog,
2536 are converting a non-generaldelta revlog to a generaldelta revlog,
2539 deltas will be recomputed if the delta's parent isn't a parent of the
2537 deltas will be recomputed if the delta's parent isn't a parent of the
2540 revision.
2538 revision.
2541
2539
2542 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2540 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2543 controls whether to compute deltas against both parents for merges.
2541 controls whether to compute deltas against both parents for merges.
2544 By default, the current default is used.
2542 By default, the current default is used.
2545 """
2543 """
2546 if deltareuse not in self.DELTAREUSEALL:
2544 if deltareuse not in self.DELTAREUSEALL:
2547 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2545 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2548
2546
2549 if len(destrevlog):
2547 if len(destrevlog):
2550 raise ValueError(_('destination revlog is not empty'))
2548 raise ValueError(_('destination revlog is not empty'))
2551
2549
2552 if getattr(self, 'filteredrevs', None):
2550 if getattr(self, 'filteredrevs', None):
2553 raise ValueError(_('source revlog has filtered revisions'))
2551 raise ValueError(_('source revlog has filtered revisions'))
2554 if getattr(destrevlog, 'filteredrevs', None):
2552 if getattr(destrevlog, 'filteredrevs', None):
2555 raise ValueError(_('destination revlog has filtered revisions'))
2553 raise ValueError(_('destination revlog has filtered revisions'))
2556
2554
2557 # lazydeltabase controls whether to reuse a cached delta, if possible.
2555 # lazydeltabase controls whether to reuse a cached delta, if possible.
2558 oldlazydeltabase = destrevlog._lazydeltabase
2556 oldlazydeltabase = destrevlog._lazydeltabase
2559 oldamd = destrevlog._aggressivemergedeltas
2557 oldamd = destrevlog._aggressivemergedeltas
2560
2558
2561 try:
2559 try:
2562 if deltareuse == self.DELTAREUSEALWAYS:
2560 if deltareuse == self.DELTAREUSEALWAYS:
2563 destrevlog._lazydeltabase = True
2561 destrevlog._lazydeltabase = True
2564 elif deltareuse == self.DELTAREUSESAMEREVS:
2562 elif deltareuse == self.DELTAREUSESAMEREVS:
2565 destrevlog._lazydeltabase = False
2563 destrevlog._lazydeltabase = False
2566
2564
2567 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2565 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2568
2566
2569 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2567 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2570 self.DELTAREUSESAMEREVS)
2568 self.DELTAREUSESAMEREVS)
2571
2569
2572 deltacomputer = _deltacomputer(destrevlog)
2570 deltacomputer = _deltacomputer(destrevlog)
2573 index = self.index
2571 index = self.index
2574 for rev in self:
2572 for rev in self:
2575 entry = index[rev]
2573 entry = index[rev]
2576
2574
2577 # Some classes override linkrev to take filtered revs into
2575 # Some classes override linkrev to take filtered revs into
2578 # account. Use raw entry from index.
2576 # account. Use raw entry from index.
2579 flags = entry[0] & 0xffff
2577 flags = entry[0] & 0xffff
2580 linkrev = entry[4]
2578 linkrev = entry[4]
2581 p1 = index[entry[5]][7]
2579 p1 = index[entry[5]][7]
2582 p2 = index[entry[6]][7]
2580 p2 = index[entry[6]][7]
2583 node = entry[7]
2581 node = entry[7]
2584
2582
2585 # (Possibly) reuse the delta from the revlog if allowed and
2583 # (Possibly) reuse the delta from the revlog if allowed and
2586 # the revlog chunk is a delta.
2584 # the revlog chunk is a delta.
2587 cachedelta = None
2585 cachedelta = None
2588 rawtext = None
2586 rawtext = None
2589 if populatecachedelta:
2587 if populatecachedelta:
2590 dp = self.deltaparent(rev)
2588 dp = self.deltaparent(rev)
2591 if dp != nullrev:
2589 if dp != nullrev:
2592 cachedelta = (dp, bytes(self._chunk(rev)))
2590 cachedelta = (dp, bytes(self._chunk(rev)))
2593
2591
2594 if not cachedelta:
2592 if not cachedelta:
2595 rawtext = self.revision(rev, raw=True)
2593 rawtext = self.revision(rev, raw=True)
2596
2594
2597
2595
2598 if deltareuse == self.DELTAREUSEFULLADD:
2596 if deltareuse == self.DELTAREUSEFULLADD:
2599 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2597 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2600 cachedelta=cachedelta,
2598 cachedelta=cachedelta,
2601 node=node, flags=flags,
2599 node=node, flags=flags,
2602 deltacomputer=deltacomputer)
2600 deltacomputer=deltacomputer)
2603 else:
2601 else:
2604 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2602 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2605 checkambig=False)
2603 checkambig=False)
2606 dfh = None
2604 dfh = None
2607 if not destrevlog._inline:
2605 if not destrevlog._inline:
2608 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2606 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2609 try:
2607 try:
2610 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2608 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2611 p2, flags, cachedelta, ifh, dfh,
2609 p2, flags, cachedelta, ifh, dfh,
2612 deltacomputer=deltacomputer)
2610 deltacomputer=deltacomputer)
2613 finally:
2611 finally:
2614 if dfh:
2612 if dfh:
2615 dfh.close()
2613 dfh.close()
2616 ifh.close()
2614 ifh.close()
2617
2615
2618 if addrevisioncb:
2616 if addrevisioncb:
2619 addrevisioncb(self, rev, node)
2617 addrevisioncb(self, rev, node)
2620 finally:
2618 finally:
2621 destrevlog._lazydeltabase = oldlazydeltabase
2619 destrevlog._lazydeltabase = oldlazydeltabase
2622 destrevlog._aggressivemergedeltas = oldamd
2620 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now