##// END OF EJS Templates
shortest: make pure code also disambigute against revnums at end...
Martin von Zweigbergk -
r37988:0db7fe7c default
parent child Browse files
Show More
@@ -1,2623 +1,2614 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 chainpayload = sum(length(r) for r in revs)
230 chainpayload = sum(length(r) for r in revs)
231
231
232 if deltachainspan:
232 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
233 density = chainpayload / float(deltachainspan)
234 else:
234 else:
235 density = 1.0
235 density = 1.0
236
236
237 # Store the gaps in a heap to have them sorted by decreasing size
237 # Store the gaps in a heap to have them sorted by decreasing size
238 gapsheap = []
238 gapsheap = []
239 heapq.heapify(gapsheap)
239 heapq.heapify(gapsheap)
240 prevend = None
240 prevend = None
241 for i, rev in enumerate(revs):
241 for i, rev in enumerate(revs):
242 revstart = start(rev)
242 revstart = start(rev)
243 revlen = length(rev)
243 revlen = length(rev)
244
244
245 # Skip empty revisions to form larger holes
245 # Skip empty revisions to form larger holes
246 if revlen == 0:
246 if revlen == 0:
247 continue
247 continue
248
248
249 if prevend is not None:
249 if prevend is not None:
250 gapsize = revstart - prevend
250 gapsize = revstart - prevend
251 # only consider holes that are large enough
251 # only consider holes that are large enough
252 if gapsize > revlog._srmingapsize:
252 if gapsize > revlog._srmingapsize:
253 heapq.heappush(gapsheap, (-gapsize, i))
253 heapq.heappush(gapsheap, (-gapsize, i))
254
254
255 prevend = revstart + revlen
255 prevend = revstart + revlen
256
256
257 # Collect the indices of the largest holes until the density is acceptable
257 # Collect the indices of the largest holes until the density is acceptable
258 indicesheap = []
258 indicesheap = []
259 heapq.heapify(indicesheap)
259 heapq.heapify(indicesheap)
260 while gapsheap and density < revlog._srdensitythreshold:
260 while gapsheap and density < revlog._srdensitythreshold:
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262
262
263 heapq.heappush(indicesheap, gapidx)
263 heapq.heappush(indicesheap, gapidx)
264
264
265 # the gap sizes are stored as negatives to be sorted decreasingly
265 # the gap sizes are stored as negatives to be sorted decreasingly
266 # by the heap
266 # by the heap
267 readdata -= (-oppgapsize)
267 readdata -= (-oppgapsize)
268 if readdata > 0:
268 if readdata > 0:
269 density = chainpayload / float(readdata)
269 density = chainpayload / float(readdata)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 # Cut the revs at collected indices
273 # Cut the revs at collected indices
274 previdx = 0
274 previdx = 0
275 while indicesheap:
275 while indicesheap:
276 idx = heapq.heappop(indicesheap)
276 idx = heapq.heappop(indicesheap)
277
277
278 chunk = _trimchunk(revlog, revs, previdx, idx)
278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 if chunk:
279 if chunk:
280 yield chunk
280 yield chunk
281
281
282 previdx = idx
282 previdx = idx
283
283
284 chunk = _trimchunk(revlog, revs, previdx)
284 chunk = _trimchunk(revlog, revs, previdx)
285 if chunk:
285 if chunk:
286 yield chunk
286 yield chunk
287
287
288 @attr.s(slots=True, frozen=True)
288 @attr.s(slots=True, frozen=True)
289 class _deltainfo(object):
289 class _deltainfo(object):
290 distance = attr.ib()
290 distance = attr.ib()
291 deltalen = attr.ib()
291 deltalen = attr.ib()
292 data = attr.ib()
292 data = attr.ib()
293 base = attr.ib()
293 base = attr.ib()
294 chainbase = attr.ib()
294 chainbase = attr.ib()
295 chainlen = attr.ib()
295 chainlen = attr.ib()
296 compresseddeltalen = attr.ib()
296 compresseddeltalen = attr.ib()
297
297
298 class _deltacomputer(object):
298 class _deltacomputer(object):
299 def __init__(self, revlog):
299 def __init__(self, revlog):
300 self.revlog = revlog
300 self.revlog = revlog
301
301
302 def _getcandidaterevs(self, p1, p2, cachedelta):
302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 """
303 """
304 Provides revisions that present an interest to be diffed against,
304 Provides revisions that present an interest to be diffed against,
305 grouped by level of easiness.
305 grouped by level of easiness.
306 """
306 """
307 revlog = self.revlog
307 revlog = self.revlog
308 curr = len(revlog)
308 curr = len(revlog)
309 prev = curr - 1
309 prev = curr - 1
310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311
311
312 # should we try to build a delta?
312 # should we try to build a delta?
313 if prev != nullrev and revlog.storedeltachains:
313 if prev != nullrev and revlog.storedeltachains:
314 tested = set()
314 tested = set()
315 # This condition is true most of the time when processing
315 # This condition is true most of the time when processing
316 # changegroup data into a generaldelta repo. The only time it
316 # changegroup data into a generaldelta repo. The only time it
317 # isn't true is if this is the first revision in a delta chain
317 # isn't true is if this is the first revision in a delta chain
318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 # Assume what we received from the server is a good choice
320 # Assume what we received from the server is a good choice
321 # build delta will reuse the cache
321 # build delta will reuse the cache
322 yield (cachedelta[0],)
322 yield (cachedelta[0],)
323 tested.add(cachedelta[0])
323 tested.add(cachedelta[0])
324
324
325 if revlog._generaldelta:
325 if revlog._generaldelta:
326 # exclude already lazy tested base if any
326 # exclude already lazy tested base if any
327 parents = [p for p in (p1r, p2r)
327 parents = [p for p in (p1r, p2r)
328 if p != nullrev and p not in tested]
328 if p != nullrev and p not in tested]
329 if parents and not revlog._aggressivemergedeltas:
329 if parents and not revlog._aggressivemergedeltas:
330 # Pick whichever parent is closer to us (to minimize the
330 # Pick whichever parent is closer to us (to minimize the
331 # chance of having to build a fulltext).
331 # chance of having to build a fulltext).
332 parents = [max(parents)]
332 parents = [max(parents)]
333 tested.update(parents)
333 tested.update(parents)
334 yield parents
334 yield parents
335
335
336 if prev not in tested:
336 if prev not in tested:
337 # other approach failed try against prev to hopefully save us a
337 # other approach failed try against prev to hopefully save us a
338 # fulltext.
338 # fulltext.
339 yield (prev,)
339 yield (prev,)
340
340
341 def buildtext(self, revinfo, fh):
341 def buildtext(self, revinfo, fh):
342 """Builds a fulltext version of a revision
342 """Builds a fulltext version of a revision
343
343
344 revinfo: _revisioninfo instance that contains all needed info
344 revinfo: _revisioninfo instance that contains all needed info
345 fh: file handle to either the .i or the .d revlog file,
345 fh: file handle to either the .i or the .d revlog file,
346 depending on whether it is inlined or not
346 depending on whether it is inlined or not
347 """
347 """
348 btext = revinfo.btext
348 btext = revinfo.btext
349 if btext[0] is not None:
349 if btext[0] is not None:
350 return btext[0]
350 return btext[0]
351
351
352 revlog = self.revlog
352 revlog = self.revlog
353 cachedelta = revinfo.cachedelta
353 cachedelta = revinfo.cachedelta
354 flags = revinfo.flags
354 flags = revinfo.flags
355 node = revinfo.node
355 node = revinfo.node
356
356
357 baserev = cachedelta[0]
357 baserev = cachedelta[0]
358 delta = cachedelta[1]
358 delta = cachedelta[1]
359 # special case deltas which replace entire base; no need to decode
359 # special case deltas which replace entire base; no need to decode
360 # base revision. this neatly avoids censored bases, which throw when
360 # base revision. this neatly avoids censored bases, which throw when
361 # they're decoded.
361 # they're decoded.
362 hlen = struct.calcsize(">lll")
362 hlen = struct.calcsize(">lll")
363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 len(delta) - hlen):
364 len(delta) - hlen):
365 btext[0] = delta[hlen:]
365 btext[0] = delta[hlen:]
366 else:
366 else:
367 # deltabase is rawtext before changed by flag processors, which is
367 # deltabase is rawtext before changed by flag processors, which is
368 # equivalent to non-raw text
368 # equivalent to non-raw text
369 basetext = revlog.revision(baserev, _df=fh, raw=False)
369 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 btext[0] = mdiff.patch(basetext, delta)
370 btext[0] = mdiff.patch(basetext, delta)
371
371
372 try:
372 try:
373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 btext[0], validatehash = res
374 btext[0], validatehash = res
375 if validatehash:
375 if validatehash:
376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 if flags & REVIDX_ISCENSORED:
377 if flags & REVIDX_ISCENSORED:
378 raise RevlogError(_('node %s is not censored') % node)
378 raise RevlogError(_('node %s is not censored') % node)
379 except CensoredNodeError:
379 except CensoredNodeError:
380 # must pass the censored index flag to add censored revisions
380 # must pass the censored index flag to add censored revisions
381 if not flags & REVIDX_ISCENSORED:
381 if not flags & REVIDX_ISCENSORED:
382 raise
382 raise
383 return btext[0]
383 return btext[0]
384
384
385 def _builddeltadiff(self, base, revinfo, fh):
385 def _builddeltadiff(self, base, revinfo, fh):
386 revlog = self.revlog
386 revlog = self.revlog
387 t = self.buildtext(revinfo, fh)
387 t = self.buildtext(revinfo, fh)
388 if revlog.iscensored(base):
388 if revlog.iscensored(base):
389 # deltas based on a censored revision must replace the
389 # deltas based on a censored revision must replace the
390 # full content in one patch, so delta works everywhere
390 # full content in one patch, so delta works everywhere
391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 delta = header + t
392 delta = header + t
393 else:
393 else:
394 ptext = revlog.revision(base, _df=fh, raw=True)
394 ptext = revlog.revision(base, _df=fh, raw=True)
395 delta = mdiff.textdiff(ptext, t)
395 delta = mdiff.textdiff(ptext, t)
396
396
397 return delta
397 return delta
398
398
399 def _builddeltainfo(self, revinfo, base, fh):
399 def _builddeltainfo(self, revinfo, base, fh):
400 # can we use the cached delta?
400 # can we use the cached delta?
401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 delta = revinfo.cachedelta[1]
402 delta = revinfo.cachedelta[1]
403 else:
403 else:
404 delta = self._builddeltadiff(base, revinfo, fh)
404 delta = self._builddeltadiff(base, revinfo, fh)
405 revlog = self.revlog
405 revlog = self.revlog
406 header, data = revlog.compress(delta)
406 header, data = revlog.compress(delta)
407 deltalen = len(header) + len(data)
407 deltalen = len(header) + len(data)
408 chainbase = revlog.chainbase(base)
408 chainbase = revlog.chainbase(base)
409 offset = revlog.end(len(revlog) - 1)
409 offset = revlog.end(len(revlog) - 1)
410 dist = deltalen + offset - revlog.start(chainbase)
410 dist = deltalen + offset - revlog.start(chainbase)
411 if revlog._generaldelta:
411 if revlog._generaldelta:
412 deltabase = base
412 deltabase = base
413 else:
413 else:
414 deltabase = chainbase
414 deltabase = chainbase
415 chainlen, compresseddeltalen = revlog._chaininfo(base)
415 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 chainlen += 1
416 chainlen += 1
417 compresseddeltalen += deltalen
417 compresseddeltalen += deltalen
418 return _deltainfo(dist, deltalen, (header, data), deltabase,
418 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 chainbase, chainlen, compresseddeltalen)
419 chainbase, chainlen, compresseddeltalen)
420
420
421 def finddeltainfo(self, revinfo, fh):
421 def finddeltainfo(self, revinfo, fh):
422 """Find an acceptable delta against a candidate revision
422 """Find an acceptable delta against a candidate revision
423
423
424 revinfo: information about the revision (instance of _revisioninfo)
424 revinfo: information about the revision (instance of _revisioninfo)
425 fh: file handle to either the .i or the .d revlog file,
425 fh: file handle to either the .i or the .d revlog file,
426 depending on whether it is inlined or not
426 depending on whether it is inlined or not
427
427
428 Returns the first acceptable candidate revision, as ordered by
428 Returns the first acceptable candidate revision, as ordered by
429 _getcandidaterevs
429 _getcandidaterevs
430 """
430 """
431 cachedelta = revinfo.cachedelta
431 cachedelta = revinfo.cachedelta
432 p1 = revinfo.p1
432 p1 = revinfo.p1
433 p2 = revinfo.p2
433 p2 = revinfo.p2
434 revlog = self.revlog
434 revlog = self.revlog
435
435
436 deltainfo = None
436 deltainfo = None
437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 nominateddeltas = []
438 nominateddeltas = []
439 for candidaterev in candidaterevs:
439 for candidaterev in candidaterevs:
440 # no delta for rawtext-changing revs (see "candelta" for why)
440 # no delta for rawtext-changing revs (see "candelta" for why)
441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 continue
442 continue
443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 nominateddeltas.append(candidatedelta)
445 nominateddeltas.append(candidatedelta)
446 if nominateddeltas:
446 if nominateddeltas:
447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 break
448 break
449
449
450 return deltainfo
450 return deltainfo
451
451
452 @attr.s(slots=True, frozen=True)
452 @attr.s(slots=True, frozen=True)
453 class _revisioninfo(object):
453 class _revisioninfo(object):
454 """Information about a revision that allows building its fulltext
454 """Information about a revision that allows building its fulltext
455 node: expected hash of the revision
455 node: expected hash of the revision
456 p1, p2: parent revs of the revision
456 p1, p2: parent revs of the revision
457 btext: built text cache consisting of a one-element list
457 btext: built text cache consisting of a one-element list
458 cachedelta: (baserev, uncompressed_delta) or None
458 cachedelta: (baserev, uncompressed_delta) or None
459 flags: flags associated to the revision storage
459 flags: flags associated to the revision storage
460
460
461 One of btext[0] or cachedelta must be set.
461 One of btext[0] or cachedelta must be set.
462 """
462 """
463 node = attr.ib()
463 node = attr.ib()
464 p1 = attr.ib()
464 p1 = attr.ib()
465 p2 = attr.ib()
465 p2 = attr.ib()
466 btext = attr.ib()
466 btext = attr.ib()
467 textlen = attr.ib()
467 textlen = attr.ib()
468 cachedelta = attr.ib()
468 cachedelta = attr.ib()
469 flags = attr.ib()
469 flags = attr.ib()
470
470
471 # index v0:
471 # index v0:
472 # 4 bytes: offset
472 # 4 bytes: offset
473 # 4 bytes: compressed length
473 # 4 bytes: compressed length
474 # 4 bytes: base rev
474 # 4 bytes: base rev
475 # 4 bytes: link rev
475 # 4 bytes: link rev
476 # 20 bytes: parent 1 nodeid
476 # 20 bytes: parent 1 nodeid
477 # 20 bytes: parent 2 nodeid
477 # 20 bytes: parent 2 nodeid
478 # 20 bytes: nodeid
478 # 20 bytes: nodeid
479 indexformatv0 = struct.Struct(">4l20s20s20s")
479 indexformatv0 = struct.Struct(">4l20s20s20s")
480 indexformatv0_pack = indexformatv0.pack
480 indexformatv0_pack = indexformatv0.pack
481 indexformatv0_unpack = indexformatv0.unpack
481 indexformatv0_unpack = indexformatv0.unpack
482
482
483 class revlogoldio(object):
483 class revlogoldio(object):
484 def __init__(self):
484 def __init__(self):
485 self.size = indexformatv0.size
485 self.size = indexformatv0.size
486
486
487 def parseindex(self, data, inline):
487 def parseindex(self, data, inline):
488 s = self.size
488 s = self.size
489 index = []
489 index = []
490 nodemap = {nullid: nullrev}
490 nodemap = {nullid: nullrev}
491 n = off = 0
491 n = off = 0
492 l = len(data)
492 l = len(data)
493 while off + s <= l:
493 while off + s <= l:
494 cur = data[off:off + s]
494 cur = data[off:off + s]
495 off += s
495 off += s
496 e = indexformatv0_unpack(cur)
496 e = indexformatv0_unpack(cur)
497 # transform to revlogv1 format
497 # transform to revlogv1 format
498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 index.append(e2)
500 index.append(e2)
501 nodemap[e[6]] = n
501 nodemap[e[6]] = n
502 n += 1
502 n += 1
503
503
504 # add the magic null revision at -1
504 # add the magic null revision at -1
505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506
506
507 return index, nodemap, None
507 return index, nodemap, None
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 if gettype(entry[0]):
510 if gettype(entry[0]):
511 raise RevlogError(_('index entry flags need revlog version 1'))
511 raise RevlogError(_('index entry flags need revlog version 1'))
512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 node(entry[5]), node(entry[6]), entry[7])
513 node(entry[5]), node(entry[6]), entry[7])
514 return indexformatv0_pack(*e2)
514 return indexformatv0_pack(*e2)
515
515
516 # index ng:
516 # index ng:
517 # 6 bytes: offset
517 # 6 bytes: offset
518 # 2 bytes: flags
518 # 2 bytes: flags
519 # 4 bytes: compressed length
519 # 4 bytes: compressed length
520 # 4 bytes: uncompressed length
520 # 4 bytes: uncompressed length
521 # 4 bytes: base rev
521 # 4 bytes: base rev
522 # 4 bytes: link rev
522 # 4 bytes: link rev
523 # 4 bytes: parent 1 rev
523 # 4 bytes: parent 1 rev
524 # 4 bytes: parent 2 rev
524 # 4 bytes: parent 2 rev
525 # 32 bytes: nodeid
525 # 32 bytes: nodeid
526 indexformatng = struct.Struct(">Qiiiiii20s12x")
526 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 indexformatng_pack = indexformatng.pack
527 indexformatng_pack = indexformatng.pack
528 versionformat = struct.Struct(">I")
528 versionformat = struct.Struct(">I")
529 versionformat_pack = versionformat.pack
529 versionformat_pack = versionformat.pack
530 versionformat_unpack = versionformat.unpack
530 versionformat_unpack = versionformat.unpack
531
531
532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 # signed integer)
533 # signed integer)
534 _maxentrysize = 0x7fffffff
534 _maxentrysize = 0x7fffffff
535
535
536 class revlogio(object):
536 class revlogio(object):
537 def __init__(self):
537 def __init__(self):
538 self.size = indexformatng.size
538 self.size = indexformatng.size
539
539
540 def parseindex(self, data, inline):
540 def parseindex(self, data, inline):
541 # call the C implementation to parse the index data
541 # call the C implementation to parse the index data
542 index, cache = parsers.parse_index2(data, inline)
542 index, cache = parsers.parse_index2(data, inline)
543 return index, getattr(index, 'nodemap', None), cache
543 return index, getattr(index, 'nodemap', None), cache
544
544
545 def packentry(self, entry, node, version, rev):
545 def packentry(self, entry, node, version, rev):
546 p = indexformatng_pack(*entry)
546 p = indexformatng_pack(*entry)
547 if rev == 0:
547 if rev == 0:
548 p = versionformat_pack(version) + p[4:]
548 p = versionformat_pack(version) + p[4:]
549 return p
549 return p
550
550
551 class revlog(object):
551 class revlog(object):
552 """
552 """
553 the underlying revision storage object
553 the underlying revision storage object
554
554
555 A revlog consists of two parts, an index and the revision data.
555 A revlog consists of two parts, an index and the revision data.
556
556
557 The index is a file with a fixed record size containing
557 The index is a file with a fixed record size containing
558 information on each revision, including its nodeid (hash), the
558 information on each revision, including its nodeid (hash), the
559 nodeids of its parents, the position and offset of its data within
559 nodeids of its parents, the position and offset of its data within
560 the data file, and the revision it's based on. Finally, each entry
560 the data file, and the revision it's based on. Finally, each entry
561 contains a linkrev entry that can serve as a pointer to external
561 contains a linkrev entry that can serve as a pointer to external
562 data.
562 data.
563
563
564 The revision data itself is a linear collection of data chunks.
564 The revision data itself is a linear collection of data chunks.
565 Each chunk represents a revision and is usually represented as a
565 Each chunk represents a revision and is usually represented as a
566 delta against the previous chunk. To bound lookup time, runs of
566 delta against the previous chunk. To bound lookup time, runs of
567 deltas are limited to about 2 times the length of the original
567 deltas are limited to about 2 times the length of the original
568 version data. This makes retrieval of a version proportional to
568 version data. This makes retrieval of a version proportional to
569 its size, or O(1) relative to the number of revisions.
569 its size, or O(1) relative to the number of revisions.
570
570
571 Both pieces of the revlog are written to in an append-only
571 Both pieces of the revlog are written to in an append-only
572 fashion, which means we never need to rewrite a file to insert or
572 fashion, which means we never need to rewrite a file to insert or
573 remove data, and can use some simple techniques to avoid the need
573 remove data, and can use some simple techniques to avoid the need
574 for locking while reading.
574 for locking while reading.
575
575
576 If checkambig, indexfile is opened with checkambig=True at
576 If checkambig, indexfile is opened with checkambig=True at
577 writing, to avoid file stat ambiguity.
577 writing, to avoid file stat ambiguity.
578
578
579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 index will be mmapped rather than read if it is larger than the
580 index will be mmapped rather than read if it is larger than the
581 configured threshold.
581 configured threshold.
582
582
583 If censorable is True, the revlog can have censored revisions.
583 If censorable is True, the revlog can have censored revisions.
584 """
584 """
585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 mmaplargeindex=False, censorable=False):
586 mmaplargeindex=False, censorable=False):
587 """
587 """
588 create a revlog object
588 create a revlog object
589
589
590 opener is a function that abstracts the file opening operation
590 opener is a function that abstracts the file opening operation
591 and can be used to implement COW semantics or the like.
591 and can be used to implement COW semantics or the like.
592 """
592 """
593 self.indexfile = indexfile
593 self.indexfile = indexfile
594 self.datafile = datafile or (indexfile[:-2] + ".d")
594 self.datafile = datafile or (indexfile[:-2] + ".d")
595 self.opener = opener
595 self.opener = opener
596 # When True, indexfile is opened with checkambig=True at writing, to
596 # When True, indexfile is opened with checkambig=True at writing, to
597 # avoid file stat ambiguity.
597 # avoid file stat ambiguity.
598 self._checkambig = checkambig
598 self._checkambig = checkambig
599 self._censorable = censorable
599 self._censorable = censorable
600 # 3-tuple of (node, rev, text) for a raw revision.
600 # 3-tuple of (node, rev, text) for a raw revision.
601 self._cache = None
601 self._cache = None
602 # Maps rev to chain base rev.
602 # Maps rev to chain base rev.
603 self._chainbasecache = util.lrucachedict(100)
603 self._chainbasecache = util.lrucachedict(100)
604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 self._chunkcache = (0, '')
605 self._chunkcache = (0, '')
606 # How much data to read and cache into the raw revlog data cache.
606 # How much data to read and cache into the raw revlog data cache.
607 self._chunkcachesize = 65536
607 self._chunkcachesize = 65536
608 self._maxchainlen = None
608 self._maxchainlen = None
609 self._aggressivemergedeltas = False
609 self._aggressivemergedeltas = False
610 self.index = []
610 self.index = []
611 # Mapping of partial identifiers to full nodes.
611 # Mapping of partial identifiers to full nodes.
612 self._pcache = {}
612 self._pcache = {}
613 # Mapping of revision integer to full node.
613 # Mapping of revision integer to full node.
614 self._nodecache = {nullid: nullrev}
614 self._nodecache = {nullid: nullrev}
615 self._nodepos = None
615 self._nodepos = None
616 self._compengine = 'zlib'
616 self._compengine = 'zlib'
617 self._maxdeltachainspan = -1
617 self._maxdeltachainspan = -1
618 self._withsparseread = False
618 self._withsparseread = False
619 self._srdensitythreshold = 0.25
619 self._srdensitythreshold = 0.25
620 self._srmingapsize = 262144
620 self._srmingapsize = 262144
621
621
622 mmapindexthreshold = None
622 mmapindexthreshold = None
623 v = REVLOG_DEFAULT_VERSION
623 v = REVLOG_DEFAULT_VERSION
624 opts = getattr(opener, 'options', None)
624 opts = getattr(opener, 'options', None)
625 if opts is not None:
625 if opts is not None:
626 if 'revlogv2' in opts:
626 if 'revlogv2' in opts:
627 # version 2 revlogs always use generaldelta.
627 # version 2 revlogs always use generaldelta.
628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 elif 'revlogv1' in opts:
629 elif 'revlogv1' in opts:
630 if 'generaldelta' in opts:
630 if 'generaldelta' in opts:
631 v |= FLAG_GENERALDELTA
631 v |= FLAG_GENERALDELTA
632 else:
632 else:
633 v = 0
633 v = 0
634 if 'chunkcachesize' in opts:
634 if 'chunkcachesize' in opts:
635 self._chunkcachesize = opts['chunkcachesize']
635 self._chunkcachesize = opts['chunkcachesize']
636 if 'maxchainlen' in opts:
636 if 'maxchainlen' in opts:
637 self._maxchainlen = opts['maxchainlen']
637 self._maxchainlen = opts['maxchainlen']
638 if 'aggressivemergedeltas' in opts:
638 if 'aggressivemergedeltas' in opts:
639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 if 'compengine' in opts:
641 if 'compengine' in opts:
642 self._compengine = opts['compengine']
642 self._compengine = opts['compengine']
643 if 'maxdeltachainspan' in opts:
643 if 'maxdeltachainspan' in opts:
644 self._maxdeltachainspan = opts['maxdeltachainspan']
644 self._maxdeltachainspan = opts['maxdeltachainspan']
645 if mmaplargeindex and 'mmapindexthreshold' in opts:
645 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 mmapindexthreshold = opts['mmapindexthreshold']
646 mmapindexthreshold = opts['mmapindexthreshold']
647 self._withsparseread = bool(opts.get('with-sparse-read', False))
647 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 if 'sparse-read-density-threshold' in opts:
648 if 'sparse-read-density-threshold' in opts:
649 self._srdensitythreshold = opts['sparse-read-density-threshold']
649 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 if 'sparse-read-min-gap-size' in opts:
650 if 'sparse-read-min-gap-size' in opts:
651 self._srmingapsize = opts['sparse-read-min-gap-size']
651 self._srmingapsize = opts['sparse-read-min-gap-size']
652
652
653 if self._chunkcachesize <= 0:
653 if self._chunkcachesize <= 0:
654 raise RevlogError(_('revlog chunk cache size %r is not greater '
654 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 'than 0') % self._chunkcachesize)
655 'than 0') % self._chunkcachesize)
656 elif self._chunkcachesize & (self._chunkcachesize - 1):
656 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 raise RevlogError(_('revlog chunk cache size %r is not a power '
657 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 'of 2') % self._chunkcachesize)
658 'of 2') % self._chunkcachesize)
659
659
660 indexdata = ''
660 indexdata = ''
661 self._initempty = True
661 self._initempty = True
662 try:
662 try:
663 with self._indexfp() as f:
663 with self._indexfp() as f:
664 if (mmapindexthreshold is not None and
664 if (mmapindexthreshold is not None and
665 self.opener.fstat(f).st_size >= mmapindexthreshold):
665 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 indexdata = util.buffer(util.mmapread(f))
666 indexdata = util.buffer(util.mmapread(f))
667 else:
667 else:
668 indexdata = f.read()
668 indexdata = f.read()
669 if len(indexdata) > 0:
669 if len(indexdata) > 0:
670 v = versionformat_unpack(indexdata[:4])[0]
670 v = versionformat_unpack(indexdata[:4])[0]
671 self._initempty = False
671 self._initempty = False
672 except IOError as inst:
672 except IOError as inst:
673 if inst.errno != errno.ENOENT:
673 if inst.errno != errno.ENOENT:
674 raise
674 raise
675
675
676 self.version = v
676 self.version = v
677 self._inline = v & FLAG_INLINE_DATA
677 self._inline = v & FLAG_INLINE_DATA
678 self._generaldelta = v & FLAG_GENERALDELTA
678 self._generaldelta = v & FLAG_GENERALDELTA
679 flags = v & ~0xFFFF
679 flags = v & ~0xFFFF
680 fmt = v & 0xFFFF
680 fmt = v & 0xFFFF
681 if fmt == REVLOGV0:
681 if fmt == REVLOGV0:
682 if flags:
682 if flags:
683 raise RevlogError(_('unknown flags (%#04x) in version %d '
683 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 'revlog %s') %
684 'revlog %s') %
685 (flags >> 16, fmt, self.indexfile))
685 (flags >> 16, fmt, self.indexfile))
686 elif fmt == REVLOGV1:
686 elif fmt == REVLOGV1:
687 if flags & ~REVLOGV1_FLAGS:
687 if flags & ~REVLOGV1_FLAGS:
688 raise RevlogError(_('unknown flags (%#04x) in version %d '
688 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 'revlog %s') %
689 'revlog %s') %
690 (flags >> 16, fmt, self.indexfile))
690 (flags >> 16, fmt, self.indexfile))
691 elif fmt == REVLOGV2:
691 elif fmt == REVLOGV2:
692 if flags & ~REVLOGV2_FLAGS:
692 if flags & ~REVLOGV2_FLAGS:
693 raise RevlogError(_('unknown flags (%#04x) in version %d '
693 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 'revlog %s') %
694 'revlog %s') %
695 (flags >> 16, fmt, self.indexfile))
695 (flags >> 16, fmt, self.indexfile))
696 else:
696 else:
697 raise RevlogError(_('unknown version (%d) in revlog %s') %
697 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 (fmt, self.indexfile))
698 (fmt, self.indexfile))
699
699
700 self.storedeltachains = True
700 self.storedeltachains = True
701
701
702 self._io = revlogio()
702 self._io = revlogio()
703 if self.version == REVLOGV0:
703 if self.version == REVLOGV0:
704 self._io = revlogoldio()
704 self._io = revlogoldio()
705 try:
705 try:
706 d = self._io.parseindex(indexdata, self._inline)
706 d = self._io.parseindex(indexdata, self._inline)
707 except (ValueError, IndexError):
707 except (ValueError, IndexError):
708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 self.index, nodemap, self._chunkcache = d
709 self.index, nodemap, self._chunkcache = d
710 if nodemap is not None:
710 if nodemap is not None:
711 self.nodemap = self._nodecache = nodemap
711 self.nodemap = self._nodecache = nodemap
712 if not self._chunkcache:
712 if not self._chunkcache:
713 self._chunkclear()
713 self._chunkclear()
714 # revnum -> (chain-length, sum-delta-length)
714 # revnum -> (chain-length, sum-delta-length)
715 self._chaininfocache = {}
715 self._chaininfocache = {}
716 # revlog header -> revlog compressor
716 # revlog header -> revlog compressor
717 self._decompressors = {}
717 self._decompressors = {}
718
718
719 @util.propertycache
719 @util.propertycache
720 def _compressor(self):
720 def _compressor(self):
721 return util.compengines[self._compengine].revlogcompressor()
721 return util.compengines[self._compengine].revlogcompressor()
722
722
723 def _indexfp(self, mode='r'):
723 def _indexfp(self, mode='r'):
724 """file object for the revlog's index file"""
724 """file object for the revlog's index file"""
725 args = {r'mode': mode}
725 args = {r'mode': mode}
726 if mode != 'r':
726 if mode != 'r':
727 args[r'checkambig'] = self._checkambig
727 args[r'checkambig'] = self._checkambig
728 if mode == 'w':
728 if mode == 'w':
729 args[r'atomictemp'] = True
729 args[r'atomictemp'] = True
730 return self.opener(self.indexfile, **args)
730 return self.opener(self.indexfile, **args)
731
731
732 def _datafp(self, mode='r'):
732 def _datafp(self, mode='r'):
733 """file object for the revlog's data file"""
733 """file object for the revlog's data file"""
734 return self.opener(self.datafile, mode=mode)
734 return self.opener(self.datafile, mode=mode)
735
735
736 @contextlib.contextmanager
736 @contextlib.contextmanager
737 def _datareadfp(self, existingfp=None):
737 def _datareadfp(self, existingfp=None):
738 """file object suitable to read data"""
738 """file object suitable to read data"""
739 if existingfp is not None:
739 if existingfp is not None:
740 yield existingfp
740 yield existingfp
741 else:
741 else:
742 if self._inline:
742 if self._inline:
743 func = self._indexfp
743 func = self._indexfp
744 else:
744 else:
745 func = self._datafp
745 func = self._datafp
746 with func() as fp:
746 with func() as fp:
747 yield fp
747 yield fp
748
748
749 def tip(self):
749 def tip(self):
750 return self.node(len(self.index) - 2)
750 return self.node(len(self.index) - 2)
751 def __contains__(self, rev):
751 def __contains__(self, rev):
752 return 0 <= rev < len(self)
752 return 0 <= rev < len(self)
753 def __len__(self):
753 def __len__(self):
754 return len(self.index) - 1
754 return len(self.index) - 1
755 def __iter__(self):
755 def __iter__(self):
756 return iter(xrange(len(self)))
756 return iter(xrange(len(self)))
757 def revs(self, start=0, stop=None):
757 def revs(self, start=0, stop=None):
758 """iterate over all rev in this revlog (from start to stop)"""
758 """iterate over all rev in this revlog (from start to stop)"""
759 step = 1
759 step = 1
760 if stop is not None:
760 if stop is not None:
761 if start > stop:
761 if start > stop:
762 step = -1
762 step = -1
763 stop += step
763 stop += step
764 else:
764 else:
765 stop = len(self)
765 stop = len(self)
766 return xrange(start, stop, step)
766 return xrange(start, stop, step)
767
767
768 @util.propertycache
768 @util.propertycache
769 def nodemap(self):
769 def nodemap(self):
770 self.rev(self.node(0))
770 self.rev(self.node(0))
771 return self._nodecache
771 return self._nodecache
772
772
773 def hasnode(self, node):
773 def hasnode(self, node):
774 try:
774 try:
775 self.rev(node)
775 self.rev(node)
776 return True
776 return True
777 except KeyError:
777 except KeyError:
778 return False
778 return False
779
779
780 def candelta(self, baserev, rev):
780 def candelta(self, baserev, rev):
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 # Disable delta if either rev requires a content-changing flag
782 # Disable delta if either rev requires a content-changing flag
783 # processor (ex. LFS). This is because such flag processor can alter
783 # processor (ex. LFS). This is because such flag processor can alter
784 # the rawtext content that the delta will be based on, and two clients
784 # the rawtext content that the delta will be based on, and two clients
785 # could have a same revlog node with different flags (i.e. different
785 # could have a same revlog node with different flags (i.e. different
786 # rawtext contents) and the delta could be incompatible.
786 # rawtext contents) and the delta could be incompatible.
787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 return False
789 return False
790 return True
790 return True
791
791
792 def clearcaches(self):
792 def clearcaches(self):
793 self._cache = None
793 self._cache = None
794 self._chainbasecache.clear()
794 self._chainbasecache.clear()
795 self._chunkcache = (0, '')
795 self._chunkcache = (0, '')
796 self._pcache = {}
796 self._pcache = {}
797
797
798 try:
798 try:
799 self._nodecache.clearcaches()
799 self._nodecache.clearcaches()
800 except AttributeError:
800 except AttributeError:
801 self._nodecache = {nullid: nullrev}
801 self._nodecache = {nullid: nullrev}
802 self._nodepos = None
802 self._nodepos = None
803
803
804 def rev(self, node):
804 def rev(self, node):
805 try:
805 try:
806 return self._nodecache[node]
806 return self._nodecache[node]
807 except TypeError:
807 except TypeError:
808 raise
808 raise
809 except RevlogError:
809 except RevlogError:
810 # parsers.c radix tree lookup failed
810 # parsers.c radix tree lookup failed
811 if node == wdirid or node in wdirfilenodeids:
811 if node == wdirid or node in wdirfilenodeids:
812 raise error.WdirUnsupported
812 raise error.WdirUnsupported
813 raise LookupError(node, self.indexfile, _('no node'))
813 raise LookupError(node, self.indexfile, _('no node'))
814 except KeyError:
814 except KeyError:
815 # pure python cache lookup failed
815 # pure python cache lookup failed
816 n = self._nodecache
816 n = self._nodecache
817 i = self.index
817 i = self.index
818 p = self._nodepos
818 p = self._nodepos
819 if p is None:
819 if p is None:
820 p = len(i) - 2
820 p = len(i) - 2
821 else:
821 else:
822 assert p < len(i)
822 assert p < len(i)
823 for r in xrange(p, -1, -1):
823 for r in xrange(p, -1, -1):
824 v = i[r][7]
824 v = i[r][7]
825 n[v] = r
825 n[v] = r
826 if v == node:
826 if v == node:
827 self._nodepos = r - 1
827 self._nodepos = r - 1
828 return r
828 return r
829 if node == wdirid or node in wdirfilenodeids:
829 if node == wdirid or node in wdirfilenodeids:
830 raise error.WdirUnsupported
830 raise error.WdirUnsupported
831 raise LookupError(node, self.indexfile, _('no node'))
831 raise LookupError(node, self.indexfile, _('no node'))
832
832
833 # Accessors for index entries.
833 # Accessors for index entries.
834
834
835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 # are flags.
836 # are flags.
837 def start(self, rev):
837 def start(self, rev):
838 return int(self.index[rev][0] >> 16)
838 return int(self.index[rev][0] >> 16)
839
839
840 def flags(self, rev):
840 def flags(self, rev):
841 return self.index[rev][0] & 0xFFFF
841 return self.index[rev][0] & 0xFFFF
842
842
843 def length(self, rev):
843 def length(self, rev):
844 return self.index[rev][1]
844 return self.index[rev][1]
845
845
846 def rawsize(self, rev):
846 def rawsize(self, rev):
847 """return the length of the uncompressed text for a given revision"""
847 """return the length of the uncompressed text for a given revision"""
848 l = self.index[rev][2]
848 l = self.index[rev][2]
849 if l >= 0:
849 if l >= 0:
850 return l
850 return l
851
851
852 t = self.revision(rev, raw=True)
852 t = self.revision(rev, raw=True)
853 return len(t)
853 return len(t)
854
854
855 def size(self, rev):
855 def size(self, rev):
856 """length of non-raw text (processed by a "read" flag processor)"""
856 """length of non-raw text (processed by a "read" flag processor)"""
857 # fast path: if no "read" flag processor could change the content,
857 # fast path: if no "read" flag processor could change the content,
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 flags = self.flags(rev)
859 flags = self.flags(rev)
860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 return self.rawsize(rev)
861 return self.rawsize(rev)
862
862
863 return len(self.revision(rev, raw=False))
863 return len(self.revision(rev, raw=False))
864
864
865 def chainbase(self, rev):
865 def chainbase(self, rev):
866 base = self._chainbasecache.get(rev)
866 base = self._chainbasecache.get(rev)
867 if base is not None:
867 if base is not None:
868 return base
868 return base
869
869
870 index = self.index
870 index = self.index
871 base = index[rev][3]
871 base = index[rev][3]
872 while base != rev:
872 while base != rev:
873 rev = base
873 rev = base
874 base = index[rev][3]
874 base = index[rev][3]
875
875
876 self._chainbasecache[rev] = base
876 self._chainbasecache[rev] = base
877 return base
877 return base
878
878
879 def linkrev(self, rev):
879 def linkrev(self, rev):
880 return self.index[rev][4]
880 return self.index[rev][4]
881
881
882 def parentrevs(self, rev):
882 def parentrevs(self, rev):
883 try:
883 try:
884 entry = self.index[rev]
884 entry = self.index[rev]
885 except IndexError:
885 except IndexError:
886 if rev == wdirrev:
886 if rev == wdirrev:
887 raise error.WdirUnsupported
887 raise error.WdirUnsupported
888 raise
888 raise
889
889
890 return entry[5], entry[6]
890 return entry[5], entry[6]
891
891
892 def node(self, rev):
892 def node(self, rev):
893 try:
893 try:
894 return self.index[rev][7]
894 return self.index[rev][7]
895 except IndexError:
895 except IndexError:
896 if rev == wdirrev:
896 if rev == wdirrev:
897 raise error.WdirUnsupported
897 raise error.WdirUnsupported
898 raise
898 raise
899
899
900 # Derived from index values.
900 # Derived from index values.
901
901
902 def end(self, rev):
902 def end(self, rev):
903 return self.start(rev) + self.length(rev)
903 return self.start(rev) + self.length(rev)
904
904
905 def parents(self, node):
905 def parents(self, node):
906 i = self.index
906 i = self.index
907 d = i[self.rev(node)]
907 d = i[self.rev(node)]
908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909
909
910 def chainlen(self, rev):
910 def chainlen(self, rev):
911 return self._chaininfo(rev)[0]
911 return self._chaininfo(rev)[0]
912
912
913 def _chaininfo(self, rev):
913 def _chaininfo(self, rev):
914 chaininfocache = self._chaininfocache
914 chaininfocache = self._chaininfocache
915 if rev in chaininfocache:
915 if rev in chaininfocache:
916 return chaininfocache[rev]
916 return chaininfocache[rev]
917 index = self.index
917 index = self.index
918 generaldelta = self._generaldelta
918 generaldelta = self._generaldelta
919 iterrev = rev
919 iterrev = rev
920 e = index[iterrev]
920 e = index[iterrev]
921 clen = 0
921 clen = 0
922 compresseddeltalen = 0
922 compresseddeltalen = 0
923 while iterrev != e[3]:
923 while iterrev != e[3]:
924 clen += 1
924 clen += 1
925 compresseddeltalen += e[1]
925 compresseddeltalen += e[1]
926 if generaldelta:
926 if generaldelta:
927 iterrev = e[3]
927 iterrev = e[3]
928 else:
928 else:
929 iterrev -= 1
929 iterrev -= 1
930 if iterrev in chaininfocache:
930 if iterrev in chaininfocache:
931 t = chaininfocache[iterrev]
931 t = chaininfocache[iterrev]
932 clen += t[0]
932 clen += t[0]
933 compresseddeltalen += t[1]
933 compresseddeltalen += t[1]
934 break
934 break
935 e = index[iterrev]
935 e = index[iterrev]
936 else:
936 else:
937 # Add text length of base since decompressing that also takes
937 # Add text length of base since decompressing that also takes
938 # work. For cache hits the length is already included.
938 # work. For cache hits the length is already included.
939 compresseddeltalen += e[1]
939 compresseddeltalen += e[1]
940 r = (clen, compresseddeltalen)
940 r = (clen, compresseddeltalen)
941 chaininfocache[rev] = r
941 chaininfocache[rev] = r
942 return r
942 return r
943
943
944 def _deltachain(self, rev, stoprev=None):
944 def _deltachain(self, rev, stoprev=None):
945 """Obtain the delta chain for a revision.
945 """Obtain the delta chain for a revision.
946
946
947 ``stoprev`` specifies a revision to stop at. If not specified, we
947 ``stoprev`` specifies a revision to stop at. If not specified, we
948 stop at the base of the chain.
948 stop at the base of the chain.
949
949
950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 revs in ascending order and ``stopped`` is a bool indicating whether
951 revs in ascending order and ``stopped`` is a bool indicating whether
952 ``stoprev`` was hit.
952 ``stoprev`` was hit.
953 """
953 """
954 # Try C implementation.
954 # Try C implementation.
955 try:
955 try:
956 return self.index.deltachain(rev, stoprev, self._generaldelta)
956 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 except AttributeError:
957 except AttributeError:
958 pass
958 pass
959
959
960 chain = []
960 chain = []
961
961
962 # Alias to prevent attribute lookup in tight loop.
962 # Alias to prevent attribute lookup in tight loop.
963 index = self.index
963 index = self.index
964 generaldelta = self._generaldelta
964 generaldelta = self._generaldelta
965
965
966 iterrev = rev
966 iterrev = rev
967 e = index[iterrev]
967 e = index[iterrev]
968 while iterrev != e[3] and iterrev != stoprev:
968 while iterrev != e[3] and iterrev != stoprev:
969 chain.append(iterrev)
969 chain.append(iterrev)
970 if generaldelta:
970 if generaldelta:
971 iterrev = e[3]
971 iterrev = e[3]
972 else:
972 else:
973 iterrev -= 1
973 iterrev -= 1
974 e = index[iterrev]
974 e = index[iterrev]
975
975
976 if iterrev == stoprev:
976 if iterrev == stoprev:
977 stopped = True
977 stopped = True
978 else:
978 else:
979 chain.append(iterrev)
979 chain.append(iterrev)
980 stopped = False
980 stopped = False
981
981
982 chain.reverse()
982 chain.reverse()
983 return chain, stopped
983 return chain, stopped
984
984
985 def ancestors(self, revs, stoprev=0, inclusive=False):
985 def ancestors(self, revs, stoprev=0, inclusive=False):
986 """Generate the ancestors of 'revs' in reverse topological order.
986 """Generate the ancestors of 'revs' in reverse topological order.
987 Does not generate revs lower than stoprev.
987 Does not generate revs lower than stoprev.
988
988
989 See the documentation for ancestor.lazyancestors for more details."""
989 See the documentation for ancestor.lazyancestors for more details."""
990
990
991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 inclusive=inclusive)
992 inclusive=inclusive)
993
993
994 def descendants(self, revs):
994 def descendants(self, revs):
995 """Generate the descendants of 'revs' in revision order.
995 """Generate the descendants of 'revs' in revision order.
996
996
997 Yield a sequence of revision numbers starting with a child of
997 Yield a sequence of revision numbers starting with a child of
998 some rev in revs, i.e., each revision is *not* considered a
998 some rev in revs, i.e., each revision is *not* considered a
999 descendant of itself. Results are ordered by revision number (a
999 descendant of itself. Results are ordered by revision number (a
1000 topological sort)."""
1000 topological sort)."""
1001 first = min(revs)
1001 first = min(revs)
1002 if first == nullrev:
1002 if first == nullrev:
1003 for i in self:
1003 for i in self:
1004 yield i
1004 yield i
1005 return
1005 return
1006
1006
1007 seen = set(revs)
1007 seen = set(revs)
1008 for i in self.revs(start=first + 1):
1008 for i in self.revs(start=first + 1):
1009 for x in self.parentrevs(i):
1009 for x in self.parentrevs(i):
1010 if x != nullrev and x in seen:
1010 if x != nullrev and x in seen:
1011 seen.add(i)
1011 seen.add(i)
1012 yield i
1012 yield i
1013 break
1013 break
1014
1014
1015 def findcommonmissing(self, common=None, heads=None):
1015 def findcommonmissing(self, common=None, heads=None):
1016 """Return a tuple of the ancestors of common and the ancestors of heads
1016 """Return a tuple of the ancestors of common and the ancestors of heads
1017 that are not ancestors of common. In revset terminology, we return the
1017 that are not ancestors of common. In revset terminology, we return the
1018 tuple:
1018 tuple:
1019
1019
1020 ::common, (::heads) - (::common)
1020 ::common, (::heads) - (::common)
1021
1021
1022 The list is sorted by revision number, meaning it is
1022 The list is sorted by revision number, meaning it is
1023 topologically sorted.
1023 topologically sorted.
1024
1024
1025 'heads' and 'common' are both lists of node IDs. If heads is
1025 'heads' and 'common' are both lists of node IDs. If heads is
1026 not supplied, uses all of the revlog's heads. If common is not
1026 not supplied, uses all of the revlog's heads. If common is not
1027 supplied, uses nullid."""
1027 supplied, uses nullid."""
1028 if common is None:
1028 if common is None:
1029 common = [nullid]
1029 common = [nullid]
1030 if heads is None:
1030 if heads is None:
1031 heads = self.heads()
1031 heads = self.heads()
1032
1032
1033 common = [self.rev(n) for n in common]
1033 common = [self.rev(n) for n in common]
1034 heads = [self.rev(n) for n in heads]
1034 heads = [self.rev(n) for n in heads]
1035
1035
1036 # we want the ancestors, but inclusive
1036 # we want the ancestors, but inclusive
1037 class lazyset(object):
1037 class lazyset(object):
1038 def __init__(self, lazyvalues):
1038 def __init__(self, lazyvalues):
1039 self.addedvalues = set()
1039 self.addedvalues = set()
1040 self.lazyvalues = lazyvalues
1040 self.lazyvalues = lazyvalues
1041
1041
1042 def __contains__(self, value):
1042 def __contains__(self, value):
1043 return value in self.addedvalues or value in self.lazyvalues
1043 return value in self.addedvalues or value in self.lazyvalues
1044
1044
1045 def __iter__(self):
1045 def __iter__(self):
1046 added = self.addedvalues
1046 added = self.addedvalues
1047 for r in added:
1047 for r in added:
1048 yield r
1048 yield r
1049 for r in self.lazyvalues:
1049 for r in self.lazyvalues:
1050 if not r in added:
1050 if not r in added:
1051 yield r
1051 yield r
1052
1052
1053 def add(self, value):
1053 def add(self, value):
1054 self.addedvalues.add(value)
1054 self.addedvalues.add(value)
1055
1055
1056 def update(self, values):
1056 def update(self, values):
1057 self.addedvalues.update(values)
1057 self.addedvalues.update(values)
1058
1058
1059 has = lazyset(self.ancestors(common))
1059 has = lazyset(self.ancestors(common))
1060 has.add(nullrev)
1060 has.add(nullrev)
1061 has.update(common)
1061 has.update(common)
1062
1062
1063 # take all ancestors from heads that aren't in has
1063 # take all ancestors from heads that aren't in has
1064 missing = set()
1064 missing = set()
1065 visit = collections.deque(r for r in heads if r not in has)
1065 visit = collections.deque(r for r in heads if r not in has)
1066 while visit:
1066 while visit:
1067 r = visit.popleft()
1067 r = visit.popleft()
1068 if r in missing:
1068 if r in missing:
1069 continue
1069 continue
1070 else:
1070 else:
1071 missing.add(r)
1071 missing.add(r)
1072 for p in self.parentrevs(r):
1072 for p in self.parentrevs(r):
1073 if p not in has:
1073 if p not in has:
1074 visit.append(p)
1074 visit.append(p)
1075 missing = list(missing)
1075 missing = list(missing)
1076 missing.sort()
1076 missing.sort()
1077 return has, [self.node(miss) for miss in missing]
1077 return has, [self.node(miss) for miss in missing]
1078
1078
1079 def incrementalmissingrevs(self, common=None):
1079 def incrementalmissingrevs(self, common=None):
1080 """Return an object that can be used to incrementally compute the
1080 """Return an object that can be used to incrementally compute the
1081 revision numbers of the ancestors of arbitrary sets that are not
1081 revision numbers of the ancestors of arbitrary sets that are not
1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 object.
1083 object.
1084
1084
1085 'common' is a list of revision numbers. If common is not supplied, uses
1085 'common' is a list of revision numbers. If common is not supplied, uses
1086 nullrev.
1086 nullrev.
1087 """
1087 """
1088 if common is None:
1088 if common is None:
1089 common = [nullrev]
1089 common = [nullrev]
1090
1090
1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092
1092
1093 def findmissingrevs(self, common=None, heads=None):
1093 def findmissingrevs(self, common=None, heads=None):
1094 """Return the revision numbers of the ancestors of heads that
1094 """Return the revision numbers of the ancestors of heads that
1095 are not ancestors of common.
1095 are not ancestors of common.
1096
1096
1097 More specifically, return a list of revision numbers corresponding to
1097 More specifically, return a list of revision numbers corresponding to
1098 nodes N such that every N satisfies the following constraints:
1098 nodes N such that every N satisfies the following constraints:
1099
1099
1100 1. N is an ancestor of some node in 'heads'
1100 1. N is an ancestor of some node in 'heads'
1101 2. N is not an ancestor of any node in 'common'
1101 2. N is not an ancestor of any node in 'common'
1102
1102
1103 The list is sorted by revision number, meaning it is
1103 The list is sorted by revision number, meaning it is
1104 topologically sorted.
1104 topologically sorted.
1105
1105
1106 'heads' and 'common' are both lists of revision numbers. If heads is
1106 'heads' and 'common' are both lists of revision numbers. If heads is
1107 not supplied, uses all of the revlog's heads. If common is not
1107 not supplied, uses all of the revlog's heads. If common is not
1108 supplied, uses nullid."""
1108 supplied, uses nullid."""
1109 if common is None:
1109 if common is None:
1110 common = [nullrev]
1110 common = [nullrev]
1111 if heads is None:
1111 if heads is None:
1112 heads = self.headrevs()
1112 heads = self.headrevs()
1113
1113
1114 inc = self.incrementalmissingrevs(common=common)
1114 inc = self.incrementalmissingrevs(common=common)
1115 return inc.missingancestors(heads)
1115 return inc.missingancestors(heads)
1116
1116
1117 def findmissing(self, common=None, heads=None):
1117 def findmissing(self, common=None, heads=None):
1118 """Return the ancestors of heads that are not ancestors of common.
1118 """Return the ancestors of heads that are not ancestors of common.
1119
1119
1120 More specifically, return a list of nodes N such that every N
1120 More specifically, return a list of nodes N such that every N
1121 satisfies the following constraints:
1121 satisfies the following constraints:
1122
1122
1123 1. N is an ancestor of some node in 'heads'
1123 1. N is an ancestor of some node in 'heads'
1124 2. N is not an ancestor of any node in 'common'
1124 2. N is not an ancestor of any node in 'common'
1125
1125
1126 The list is sorted by revision number, meaning it is
1126 The list is sorted by revision number, meaning it is
1127 topologically sorted.
1127 topologically sorted.
1128
1128
1129 'heads' and 'common' are both lists of node IDs. If heads is
1129 'heads' and 'common' are both lists of node IDs. If heads is
1130 not supplied, uses all of the revlog's heads. If common is not
1130 not supplied, uses all of the revlog's heads. If common is not
1131 supplied, uses nullid."""
1131 supplied, uses nullid."""
1132 if common is None:
1132 if common is None:
1133 common = [nullid]
1133 common = [nullid]
1134 if heads is None:
1134 if heads is None:
1135 heads = self.heads()
1135 heads = self.heads()
1136
1136
1137 common = [self.rev(n) for n in common]
1137 common = [self.rev(n) for n in common]
1138 heads = [self.rev(n) for n in heads]
1138 heads = [self.rev(n) for n in heads]
1139
1139
1140 inc = self.incrementalmissingrevs(common=common)
1140 inc = self.incrementalmissingrevs(common=common)
1141 return [self.node(r) for r in inc.missingancestors(heads)]
1141 return [self.node(r) for r in inc.missingancestors(heads)]
1142
1142
1143 def nodesbetween(self, roots=None, heads=None):
1143 def nodesbetween(self, roots=None, heads=None):
1144 """Return a topological path from 'roots' to 'heads'.
1144 """Return a topological path from 'roots' to 'heads'.
1145
1145
1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 topologically sorted list of all nodes N that satisfy both of
1147 topologically sorted list of all nodes N that satisfy both of
1148 these constraints:
1148 these constraints:
1149
1149
1150 1. N is a descendant of some node in 'roots'
1150 1. N is a descendant of some node in 'roots'
1151 2. N is an ancestor of some node in 'heads'
1151 2. N is an ancestor of some node in 'heads'
1152
1152
1153 Every node is considered to be both a descendant and an ancestor
1153 Every node is considered to be both a descendant and an ancestor
1154 of itself, so every reachable node in 'roots' and 'heads' will be
1154 of itself, so every reachable node in 'roots' and 'heads' will be
1155 included in 'nodes'.
1155 included in 'nodes'.
1156
1156
1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160
1160
1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 unspecified, uses nullid as the only root. If 'heads' is
1162 unspecified, uses nullid as the only root. If 'heads' is
1163 unspecified, uses list of all of the revlog's heads."""
1163 unspecified, uses list of all of the revlog's heads."""
1164 nonodes = ([], [], [])
1164 nonodes = ([], [], [])
1165 if roots is not None:
1165 if roots is not None:
1166 roots = list(roots)
1166 roots = list(roots)
1167 if not roots:
1167 if not roots:
1168 return nonodes
1168 return nonodes
1169 lowestrev = min([self.rev(n) for n in roots])
1169 lowestrev = min([self.rev(n) for n in roots])
1170 else:
1170 else:
1171 roots = [nullid] # Everybody's a descendant of nullid
1171 roots = [nullid] # Everybody's a descendant of nullid
1172 lowestrev = nullrev
1172 lowestrev = nullrev
1173 if (lowestrev == nullrev) and (heads is None):
1173 if (lowestrev == nullrev) and (heads is None):
1174 # We want _all_ the nodes!
1174 # We want _all_ the nodes!
1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 if heads is None:
1176 if heads is None:
1177 # All nodes are ancestors, so the latest ancestor is the last
1177 # All nodes are ancestors, so the latest ancestor is the last
1178 # node.
1178 # node.
1179 highestrev = len(self) - 1
1179 highestrev = len(self) - 1
1180 # Set ancestors to None to signal that every node is an ancestor.
1180 # Set ancestors to None to signal that every node is an ancestor.
1181 ancestors = None
1181 ancestors = None
1182 # Set heads to an empty dictionary for later discovery of heads
1182 # Set heads to an empty dictionary for later discovery of heads
1183 heads = {}
1183 heads = {}
1184 else:
1184 else:
1185 heads = list(heads)
1185 heads = list(heads)
1186 if not heads:
1186 if not heads:
1187 return nonodes
1187 return nonodes
1188 ancestors = set()
1188 ancestors = set()
1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 # Also, later we will be using it to filter out the heads we can't
1190 # Also, later we will be using it to filter out the heads we can't
1191 # find from roots.
1191 # find from roots.
1192 heads = dict.fromkeys(heads, False)
1192 heads = dict.fromkeys(heads, False)
1193 # Start at the top and keep marking parents until we're done.
1193 # Start at the top and keep marking parents until we're done.
1194 nodestotag = set(heads)
1194 nodestotag = set(heads)
1195 # Remember where the top was so we can use it as a limit later.
1195 # Remember where the top was so we can use it as a limit later.
1196 highestrev = max([self.rev(n) for n in nodestotag])
1196 highestrev = max([self.rev(n) for n in nodestotag])
1197 while nodestotag:
1197 while nodestotag:
1198 # grab a node to tag
1198 # grab a node to tag
1199 n = nodestotag.pop()
1199 n = nodestotag.pop()
1200 # Never tag nullid
1200 # Never tag nullid
1201 if n == nullid:
1201 if n == nullid:
1202 continue
1202 continue
1203 # A node's revision number represents its place in a
1203 # A node's revision number represents its place in a
1204 # topologically sorted list of nodes.
1204 # topologically sorted list of nodes.
1205 r = self.rev(n)
1205 r = self.rev(n)
1206 if r >= lowestrev:
1206 if r >= lowestrev:
1207 if n not in ancestors:
1207 if n not in ancestors:
1208 # If we are possibly a descendant of one of the roots
1208 # If we are possibly a descendant of one of the roots
1209 # and we haven't already been marked as an ancestor
1209 # and we haven't already been marked as an ancestor
1210 ancestors.add(n) # Mark as ancestor
1210 ancestors.add(n) # Mark as ancestor
1211 # Add non-nullid parents to list of nodes to tag.
1211 # Add non-nullid parents to list of nodes to tag.
1212 nodestotag.update([p for p in self.parents(n) if
1212 nodestotag.update([p for p in self.parents(n) if
1213 p != nullid])
1213 p != nullid])
1214 elif n in heads: # We've seen it before, is it a fake head?
1214 elif n in heads: # We've seen it before, is it a fake head?
1215 # So it is, real heads should not be the ancestors of
1215 # So it is, real heads should not be the ancestors of
1216 # any other heads.
1216 # any other heads.
1217 heads.pop(n)
1217 heads.pop(n)
1218 if not ancestors:
1218 if not ancestors:
1219 return nonodes
1219 return nonodes
1220 # Now that we have our set of ancestors, we want to remove any
1220 # Now that we have our set of ancestors, we want to remove any
1221 # roots that are not ancestors.
1221 # roots that are not ancestors.
1222
1222
1223 # If one of the roots was nullid, everything is included anyway.
1223 # If one of the roots was nullid, everything is included anyway.
1224 if lowestrev > nullrev:
1224 if lowestrev > nullrev:
1225 # But, since we weren't, let's recompute the lowest rev to not
1225 # But, since we weren't, let's recompute the lowest rev to not
1226 # include roots that aren't ancestors.
1226 # include roots that aren't ancestors.
1227
1227
1228 # Filter out roots that aren't ancestors of heads
1228 # Filter out roots that aren't ancestors of heads
1229 roots = [root for root in roots if root in ancestors]
1229 roots = [root for root in roots if root in ancestors]
1230 # Recompute the lowest revision
1230 # Recompute the lowest revision
1231 if roots:
1231 if roots:
1232 lowestrev = min([self.rev(root) for root in roots])
1232 lowestrev = min([self.rev(root) for root in roots])
1233 else:
1233 else:
1234 # No more roots? Return empty list
1234 # No more roots? Return empty list
1235 return nonodes
1235 return nonodes
1236 else:
1236 else:
1237 # We are descending from nullid, and don't need to care about
1237 # We are descending from nullid, and don't need to care about
1238 # any other roots.
1238 # any other roots.
1239 lowestrev = nullrev
1239 lowestrev = nullrev
1240 roots = [nullid]
1240 roots = [nullid]
1241 # Transform our roots list into a set.
1241 # Transform our roots list into a set.
1242 descendants = set(roots)
1242 descendants = set(roots)
1243 # Also, keep the original roots so we can filter out roots that aren't
1243 # Also, keep the original roots so we can filter out roots that aren't
1244 # 'real' roots (i.e. are descended from other roots).
1244 # 'real' roots (i.e. are descended from other roots).
1245 roots = descendants.copy()
1245 roots = descendants.copy()
1246 # Our topologically sorted list of output nodes.
1246 # Our topologically sorted list of output nodes.
1247 orderedout = []
1247 orderedout = []
1248 # Don't start at nullid since we don't want nullid in our output list,
1248 # Don't start at nullid since we don't want nullid in our output list,
1249 # and if nullid shows up in descendants, empty parents will look like
1249 # and if nullid shows up in descendants, empty parents will look like
1250 # they're descendants.
1250 # they're descendants.
1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 n = self.node(r)
1252 n = self.node(r)
1253 isdescendant = False
1253 isdescendant = False
1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 isdescendant = True
1255 isdescendant = True
1256 elif n in descendants:
1256 elif n in descendants:
1257 # n is already a descendant
1257 # n is already a descendant
1258 isdescendant = True
1258 isdescendant = True
1259 # This check only needs to be done here because all the roots
1259 # This check only needs to be done here because all the roots
1260 # will start being marked is descendants before the loop.
1260 # will start being marked is descendants before the loop.
1261 if n in roots:
1261 if n in roots:
1262 # If n was a root, check if it's a 'real' root.
1262 # If n was a root, check if it's a 'real' root.
1263 p = tuple(self.parents(n))
1263 p = tuple(self.parents(n))
1264 # If any of its parents are descendants, it's not a root.
1264 # If any of its parents are descendants, it's not a root.
1265 if (p[0] in descendants) or (p[1] in descendants):
1265 if (p[0] in descendants) or (p[1] in descendants):
1266 roots.remove(n)
1266 roots.remove(n)
1267 else:
1267 else:
1268 p = tuple(self.parents(n))
1268 p = tuple(self.parents(n))
1269 # A node is a descendant if either of its parents are
1269 # A node is a descendant if either of its parents are
1270 # descendants. (We seeded the dependents list with the roots
1270 # descendants. (We seeded the dependents list with the roots
1271 # up there, remember?)
1271 # up there, remember?)
1272 if (p[0] in descendants) or (p[1] in descendants):
1272 if (p[0] in descendants) or (p[1] in descendants):
1273 descendants.add(n)
1273 descendants.add(n)
1274 isdescendant = True
1274 isdescendant = True
1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 # Only include nodes that are both descendants and ancestors.
1276 # Only include nodes that are both descendants and ancestors.
1277 orderedout.append(n)
1277 orderedout.append(n)
1278 if (ancestors is not None) and (n in heads):
1278 if (ancestors is not None) and (n in heads):
1279 # We're trying to figure out which heads are reachable
1279 # We're trying to figure out which heads are reachable
1280 # from roots.
1280 # from roots.
1281 # Mark this head as having been reached
1281 # Mark this head as having been reached
1282 heads[n] = True
1282 heads[n] = True
1283 elif ancestors is None:
1283 elif ancestors is None:
1284 # Otherwise, we're trying to discover the heads.
1284 # Otherwise, we're trying to discover the heads.
1285 # Assume this is a head because if it isn't, the next step
1285 # Assume this is a head because if it isn't, the next step
1286 # will eventually remove it.
1286 # will eventually remove it.
1287 heads[n] = True
1287 heads[n] = True
1288 # But, obviously its parents aren't.
1288 # But, obviously its parents aren't.
1289 for p in self.parents(n):
1289 for p in self.parents(n):
1290 heads.pop(p, None)
1290 heads.pop(p, None)
1291 heads = [head for head, flag in heads.iteritems() if flag]
1291 heads = [head for head, flag in heads.iteritems() if flag]
1292 roots = list(roots)
1292 roots = list(roots)
1293 assert orderedout
1293 assert orderedout
1294 assert roots
1294 assert roots
1295 assert heads
1295 assert heads
1296 return (orderedout, roots, heads)
1296 return (orderedout, roots, heads)
1297
1297
1298 def headrevs(self):
1298 def headrevs(self):
1299 try:
1299 try:
1300 return self.index.headrevs()
1300 return self.index.headrevs()
1301 except AttributeError:
1301 except AttributeError:
1302 return self._headrevs()
1302 return self._headrevs()
1303
1303
1304 def computephases(self, roots):
1304 def computephases(self, roots):
1305 return self.index.computephasesmapsets(roots)
1305 return self.index.computephasesmapsets(roots)
1306
1306
1307 def _headrevs(self):
1307 def _headrevs(self):
1308 count = len(self)
1308 count = len(self)
1309 if not count:
1309 if not count:
1310 return [nullrev]
1310 return [nullrev]
1311 # we won't iter over filtered rev so nobody is a head at start
1311 # we won't iter over filtered rev so nobody is a head at start
1312 ishead = [0] * (count + 1)
1312 ishead = [0] * (count + 1)
1313 index = self.index
1313 index = self.index
1314 for r in self:
1314 for r in self:
1315 ishead[r] = 1 # I may be an head
1315 ishead[r] = 1 # I may be an head
1316 e = index[r]
1316 e = index[r]
1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 return [r for r, val in enumerate(ishead) if val]
1318 return [r for r, val in enumerate(ishead) if val]
1319
1319
1320 def heads(self, start=None, stop=None):
1320 def heads(self, start=None, stop=None):
1321 """return the list of all nodes that have no children
1321 """return the list of all nodes that have no children
1322
1322
1323 if start is specified, only heads that are descendants of
1323 if start is specified, only heads that are descendants of
1324 start will be returned
1324 start will be returned
1325 if stop is specified, it will consider all the revs from stop
1325 if stop is specified, it will consider all the revs from stop
1326 as if they had no children
1326 as if they had no children
1327 """
1327 """
1328 if start is None and stop is None:
1328 if start is None and stop is None:
1329 if not len(self):
1329 if not len(self):
1330 return [nullid]
1330 return [nullid]
1331 return [self.node(r) for r in self.headrevs()]
1331 return [self.node(r) for r in self.headrevs()]
1332
1332
1333 if start is None:
1333 if start is None:
1334 start = nullid
1334 start = nullid
1335 if stop is None:
1335 if stop is None:
1336 stop = []
1336 stop = []
1337 stoprevs = set([self.rev(n) for n in stop])
1337 stoprevs = set([self.rev(n) for n in stop])
1338 startrev = self.rev(start)
1338 startrev = self.rev(start)
1339 reachable = {startrev}
1339 reachable = {startrev}
1340 heads = {startrev}
1340 heads = {startrev}
1341
1341
1342 parentrevs = self.parentrevs
1342 parentrevs = self.parentrevs
1343 for r in self.revs(start=startrev + 1):
1343 for r in self.revs(start=startrev + 1):
1344 for p in parentrevs(r):
1344 for p in parentrevs(r):
1345 if p in reachable:
1345 if p in reachable:
1346 if r not in stoprevs:
1346 if r not in stoprevs:
1347 reachable.add(r)
1347 reachable.add(r)
1348 heads.add(r)
1348 heads.add(r)
1349 if p in heads and p not in stoprevs:
1349 if p in heads and p not in stoprevs:
1350 heads.remove(p)
1350 heads.remove(p)
1351
1351
1352 return [self.node(r) for r in heads]
1352 return [self.node(r) for r in heads]
1353
1353
1354 def children(self, node):
1354 def children(self, node):
1355 """find the children of a given node"""
1355 """find the children of a given node"""
1356 c = []
1356 c = []
1357 p = self.rev(node)
1357 p = self.rev(node)
1358 for r in self.revs(start=p + 1):
1358 for r in self.revs(start=p + 1):
1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 if prevs:
1360 if prevs:
1361 for pr in prevs:
1361 for pr in prevs:
1362 if pr == p:
1362 if pr == p:
1363 c.append(self.node(r))
1363 c.append(self.node(r))
1364 elif p == nullrev:
1364 elif p == nullrev:
1365 c.append(self.node(r))
1365 c.append(self.node(r))
1366 return c
1366 return c
1367
1367
1368 def descendant(self, start, end):
1368 def descendant(self, start, end):
1369 if start == nullrev:
1369 if start == nullrev:
1370 return True
1370 return True
1371 for i in self.descendants([start]):
1371 for i in self.descendants([start]):
1372 if i == end:
1372 if i == end:
1373 return True
1373 return True
1374 elif i > end:
1374 elif i > end:
1375 break
1375 break
1376 return False
1376 return False
1377
1377
1378 def commonancestorsheads(self, a, b):
1378 def commonancestorsheads(self, a, b):
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1380 a, b = self.rev(a), self.rev(b)
1380 a, b = self.rev(a), self.rev(b)
1381 try:
1381 try:
1382 ancs = self.index.commonancestorsheads(a, b)
1382 ancs = self.index.commonancestorsheads(a, b)
1383 except (AttributeError, OverflowError): # C implementation failed
1383 except (AttributeError, OverflowError): # C implementation failed
1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 return pycompat.maplist(self.node, ancs)
1385 return pycompat.maplist(self.node, ancs)
1386
1386
1387 def isancestor(self, a, b):
1387 def isancestor(self, a, b):
1388 """return True if node a is an ancestor of node b
1388 """return True if node a is an ancestor of node b
1389
1389
1390 The implementation of this is trivial but the use of
1390 The implementation of this is trivial but the use of
1391 commonancestorsheads is not."""
1391 commonancestorsheads is not."""
1392 return a in self.commonancestorsheads(a, b)
1392 return a in self.commonancestorsheads(a, b)
1393
1393
1394 def ancestor(self, a, b):
1394 def ancestor(self, a, b):
1395 """calculate the "best" common ancestor of nodes a and b"""
1395 """calculate the "best" common ancestor of nodes a and b"""
1396
1396
1397 a, b = self.rev(a), self.rev(b)
1397 a, b = self.rev(a), self.rev(b)
1398 try:
1398 try:
1399 ancs = self.index.ancestors(a, b)
1399 ancs = self.index.ancestors(a, b)
1400 except (AttributeError, OverflowError):
1400 except (AttributeError, OverflowError):
1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 if ancs:
1402 if ancs:
1403 # choose a consistent winner when there's a tie
1403 # choose a consistent winner when there's a tie
1404 return min(map(self.node, ancs))
1404 return min(map(self.node, ancs))
1405 return nullid
1405 return nullid
1406
1406
1407 def _match(self, id):
1407 def _match(self, id):
1408 if isinstance(id, int):
1408 if isinstance(id, int):
1409 # rev
1409 # rev
1410 return self.node(id)
1410 return self.node(id)
1411 if len(id) == 20:
1411 if len(id) == 20:
1412 # possibly a binary node
1412 # possibly a binary node
1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 try:
1414 try:
1415 node = id
1415 node = id
1416 self.rev(node) # quick search the index
1416 self.rev(node) # quick search the index
1417 return node
1417 return node
1418 except LookupError:
1418 except LookupError:
1419 pass # may be partial hex id
1419 pass # may be partial hex id
1420 try:
1420 try:
1421 # str(rev)
1421 # str(rev)
1422 rev = int(id)
1422 rev = int(id)
1423 if "%d" % rev != id:
1423 if "%d" % rev != id:
1424 raise ValueError
1424 raise ValueError
1425 if rev < 0:
1425 if rev < 0:
1426 rev = len(self) + rev
1426 rev = len(self) + rev
1427 if rev < 0 or rev >= len(self):
1427 if rev < 0 or rev >= len(self):
1428 raise ValueError
1428 raise ValueError
1429 return self.node(rev)
1429 return self.node(rev)
1430 except (ValueError, OverflowError):
1430 except (ValueError, OverflowError):
1431 pass
1431 pass
1432 if len(id) == 40:
1432 if len(id) == 40:
1433 try:
1433 try:
1434 # a full hex nodeid?
1434 # a full hex nodeid?
1435 node = bin(id)
1435 node = bin(id)
1436 self.rev(node)
1436 self.rev(node)
1437 return node
1437 return node
1438 except (TypeError, LookupError):
1438 except (TypeError, LookupError):
1439 pass
1439 pass
1440
1440
1441 def _partialmatch(self, id):
1441 def _partialmatch(self, id):
1442 # we don't care wdirfilenodeids as they should be always full hash
1442 # we don't care wdirfilenodeids as they should be always full hash
1443 maybewdir = wdirhex.startswith(id)
1443 maybewdir = wdirhex.startswith(id)
1444 try:
1444 try:
1445 partial = self.index.partialmatch(id)
1445 partial = self.index.partialmatch(id)
1446 if partial and self.hasnode(partial):
1446 if partial and self.hasnode(partial):
1447 if maybewdir:
1447 if maybewdir:
1448 # single 'ff...' match in radix tree, ambiguous with wdir
1448 # single 'ff...' match in radix tree, ambiguous with wdir
1449 raise RevlogError
1449 raise RevlogError
1450 return partial
1450 return partial
1451 if maybewdir:
1451 if maybewdir:
1452 # no 'ff...' match in radix tree, wdir identified
1452 # no 'ff...' match in radix tree, wdir identified
1453 raise error.WdirUnsupported
1453 raise error.WdirUnsupported
1454 return None
1454 return None
1455 except RevlogError:
1455 except RevlogError:
1456 # parsers.c radix tree lookup gave multiple matches
1456 # parsers.c radix tree lookup gave multiple matches
1457 # fast path: for unfiltered changelog, radix tree is accurate
1457 # fast path: for unfiltered changelog, radix tree is accurate
1458 if not getattr(self, 'filteredrevs', None):
1458 if not getattr(self, 'filteredrevs', None):
1459 raise LookupError(id, self.indexfile,
1459 raise LookupError(id, self.indexfile,
1460 _('ambiguous identifier'))
1460 _('ambiguous identifier'))
1461 # fall through to slow path that filters hidden revisions
1461 # fall through to slow path that filters hidden revisions
1462 except (AttributeError, ValueError):
1462 except (AttributeError, ValueError):
1463 # we are pure python, or key was too short to search radix tree
1463 # we are pure python, or key was too short to search radix tree
1464 pass
1464 pass
1465
1465
1466 if id in self._pcache:
1466 if id in self._pcache:
1467 return self._pcache[id]
1467 return self._pcache[id]
1468
1468
1469 if len(id) <= 40:
1469 if len(id) <= 40:
1470 try:
1470 try:
1471 # hex(node)[:...]
1471 # hex(node)[:...]
1472 l = len(id) // 2 # grab an even number of digits
1472 l = len(id) // 2 # grab an even number of digits
1473 prefix = bin(id[:l * 2])
1473 prefix = bin(id[:l * 2])
1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 nl = [n for n in nl if hex(n).startswith(id) and
1475 nl = [n for n in nl if hex(n).startswith(id) and
1476 self.hasnode(n)]
1476 self.hasnode(n)]
1477 if len(nl) > 0:
1477 if len(nl) > 0:
1478 if len(nl) == 1 and not maybewdir:
1478 if len(nl) == 1 and not maybewdir:
1479 self._pcache[id] = nl[0]
1479 self._pcache[id] = nl[0]
1480 return nl[0]
1480 return nl[0]
1481 raise LookupError(id, self.indexfile,
1481 raise LookupError(id, self.indexfile,
1482 _('ambiguous identifier'))
1482 _('ambiguous identifier'))
1483 if maybewdir:
1483 if maybewdir:
1484 raise error.WdirUnsupported
1484 raise error.WdirUnsupported
1485 return None
1485 return None
1486 except TypeError:
1486 except TypeError:
1487 pass
1487 pass
1488
1488
1489 def lookup(self, id):
1489 def lookup(self, id):
1490 """locate a node based on:
1490 """locate a node based on:
1491 - revision number or str(revision number)
1491 - revision number or str(revision number)
1492 - nodeid or subset of hex nodeid
1492 - nodeid or subset of hex nodeid
1493 """
1493 """
1494 n = self._match(id)
1494 n = self._match(id)
1495 if n is not None:
1495 if n is not None:
1496 return n
1496 return n
1497 n = self._partialmatch(id)
1497 n = self._partialmatch(id)
1498 if n:
1498 if n:
1499 return n
1499 return n
1500
1500
1501 raise LookupError(id, self.indexfile, _('no match found'))
1501 raise LookupError(id, self.indexfile, _('no match found'))
1502
1502
1503 def shortest(self, node, minlength=1):
1503 def shortest(self, node, minlength=1):
1504 """Find the shortest unambiguous prefix that matches node."""
1504 """Find the shortest unambiguous prefix that matches node."""
1505 def isrev(prefix):
1505 def isrev(prefix):
1506 try:
1506 try:
1507 i = int(prefix)
1507 i = int(prefix)
1508 # if we are a pure int, then starting with zero will not be
1508 # if we are a pure int, then starting with zero will not be
1509 # confused as a rev; or, obviously, if the int is larger
1509 # confused as a rev; or, obviously, if the int is larger
1510 # than the value of the tip rev
1510 # than the value of the tip rev
1511 if prefix[0] == '0' or i > len(self):
1511 if prefix[0] == '0' or i > len(self):
1512 return False
1512 return False
1513 return True
1513 return True
1514 except ValueError:
1514 except ValueError:
1515 return False
1515 return False
1516
1516
1517 def isvalid(prefix):
1517 def isvalid(prefix):
1518 try:
1518 try:
1519 node = self._partialmatch(prefix)
1519 node = self._partialmatch(prefix)
1520 except error.RevlogError:
1520 except error.RevlogError:
1521 return False
1521 return False
1522 except error.WdirUnsupported:
1522 except error.WdirUnsupported:
1523 # single 'ff...' match
1523 # single 'ff...' match
1524 return True
1524 return True
1525 if node is None:
1525 if node is None:
1526 raise LookupError(node, self.indexfile, _('no node'))
1526 raise LookupError(node, self.indexfile, _('no node'))
1527 return not isrev(prefix)
1527 return not isrev(prefix)
1528
1528
1529 def maybewdir(prefix):
1529 def maybewdir(prefix):
1530 return all(c == 'f' for c in prefix)
1530 return all(c == 'f' for c in prefix)
1531
1531
1532 hexnode = hex(node)
1532 hexnode = hex(node)
1533
1533
1534 def disambiguate(hexnode, minlength):
1534 def disambiguate(hexnode, minlength):
1535 for length in range(minlength, 41):
1535 for length in range(minlength, 41):
1536 prefix = hexnode[:length]
1536 prefix = hexnode[:length]
1537 if not isrev(prefix) and not maybewdir(prefix):
1537 if not isrev(prefix) and not maybewdir(prefix):
1538 return prefix
1538 return prefix
1539
1539
1540 if not getattr(self, 'filteredrevs', None):
1540 if not getattr(self, 'filteredrevs', None):
1541 try:
1541 try:
1542 length = max(self.index.shortest(node), minlength)
1542 length = max(self.index.shortest(node), minlength)
1543 return disambiguate(hexnode, length)
1543 return disambiguate(hexnode, length)
1544 except RevlogError:
1544 except RevlogError:
1545 if node == wdirid:
1545 if node != wdirid:
1546 for length in range(minlength, 41):
1547 prefix = hexnode[:length]
1548 if isvalid(prefix):
1549 return prefix
1550 else:
1551 raise LookupError(node, self.indexfile, _('no node'))
1546 raise LookupError(node, self.indexfile, _('no node'))
1552 except AttributeError:
1547 except AttributeError:
1553 # Fall through to pure code
1548 # Fall through to pure code
1554 pass
1549 pass
1555
1550
1556 shortest = hexnode
1551 if node == wdirid:
1557 startlength = max(6, minlength)
1552 for length in range(minlength, 41):
1558 length = startlength
1553 prefix = hexnode[:length]
1559 while True:
1554 if isvalid(prefix):
1555 return prefix
1556
1557 for length in range(minlength, 41):
1560 prefix = hexnode[:length]
1558 prefix = hexnode[:length]
1561 if isvalid(prefix):
1559 if isvalid(prefix):
1562 shortest = prefix
1560 return disambiguate(hexnode, length)
1563 if length == minlength or length > startlength:
1564 return shortest
1565 length -= 1
1566 else:
1567 length += 1
1568 if len(shortest) <= length:
1569 return shortest
1570
1561
1571 def cmp(self, node, text):
1562 def cmp(self, node, text):
1572 """compare text with a given file revision
1563 """compare text with a given file revision
1573
1564
1574 returns True if text is different than what is stored.
1565 returns True if text is different than what is stored.
1575 """
1566 """
1576 p1, p2 = self.parents(node)
1567 p1, p2 = self.parents(node)
1577 return hash(text, p1, p2) != node
1568 return hash(text, p1, p2) != node
1578
1569
1579 def _cachesegment(self, offset, data):
1570 def _cachesegment(self, offset, data):
1580 """Add a segment to the revlog cache.
1571 """Add a segment to the revlog cache.
1581
1572
1582 Accepts an absolute offset and the data that is at that location.
1573 Accepts an absolute offset and the data that is at that location.
1583 """
1574 """
1584 o, d = self._chunkcache
1575 o, d = self._chunkcache
1585 # try to add to existing cache
1576 # try to add to existing cache
1586 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1577 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1587 self._chunkcache = o, d + data
1578 self._chunkcache = o, d + data
1588 else:
1579 else:
1589 self._chunkcache = offset, data
1580 self._chunkcache = offset, data
1590
1581
1591 def _readsegment(self, offset, length, df=None):
1582 def _readsegment(self, offset, length, df=None):
1592 """Load a segment of raw data from the revlog.
1583 """Load a segment of raw data from the revlog.
1593
1584
1594 Accepts an absolute offset, length to read, and an optional existing
1585 Accepts an absolute offset, length to read, and an optional existing
1595 file handle to read from.
1586 file handle to read from.
1596
1587
1597 If an existing file handle is passed, it will be seeked and the
1588 If an existing file handle is passed, it will be seeked and the
1598 original seek position will NOT be restored.
1589 original seek position will NOT be restored.
1599
1590
1600 Returns a str or buffer of raw byte data.
1591 Returns a str or buffer of raw byte data.
1601 """
1592 """
1602 # Cache data both forward and backward around the requested
1593 # Cache data both forward and backward around the requested
1603 # data, in a fixed size window. This helps speed up operations
1594 # data, in a fixed size window. This helps speed up operations
1604 # involving reading the revlog backwards.
1595 # involving reading the revlog backwards.
1605 cachesize = self._chunkcachesize
1596 cachesize = self._chunkcachesize
1606 realoffset = offset & ~(cachesize - 1)
1597 realoffset = offset & ~(cachesize - 1)
1607 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1598 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1608 - realoffset)
1599 - realoffset)
1609 with self._datareadfp(df) as df:
1600 with self._datareadfp(df) as df:
1610 df.seek(realoffset)
1601 df.seek(realoffset)
1611 d = df.read(reallength)
1602 d = df.read(reallength)
1612 self._cachesegment(realoffset, d)
1603 self._cachesegment(realoffset, d)
1613 if offset != realoffset or reallength != length:
1604 if offset != realoffset or reallength != length:
1614 return util.buffer(d, offset - realoffset, length)
1605 return util.buffer(d, offset - realoffset, length)
1615 return d
1606 return d
1616
1607
1617 def _getsegment(self, offset, length, df=None):
1608 def _getsegment(self, offset, length, df=None):
1618 """Obtain a segment of raw data from the revlog.
1609 """Obtain a segment of raw data from the revlog.
1619
1610
1620 Accepts an absolute offset, length of bytes to obtain, and an
1611 Accepts an absolute offset, length of bytes to obtain, and an
1621 optional file handle to the already-opened revlog. If the file
1612 optional file handle to the already-opened revlog. If the file
1622 handle is used, it's original seek position will not be preserved.
1613 handle is used, it's original seek position will not be preserved.
1623
1614
1624 Requests for data may be returned from a cache.
1615 Requests for data may be returned from a cache.
1625
1616
1626 Returns a str or a buffer instance of raw byte data.
1617 Returns a str or a buffer instance of raw byte data.
1627 """
1618 """
1628 o, d = self._chunkcache
1619 o, d = self._chunkcache
1629 l = len(d)
1620 l = len(d)
1630
1621
1631 # is it in the cache?
1622 # is it in the cache?
1632 cachestart = offset - o
1623 cachestart = offset - o
1633 cacheend = cachestart + length
1624 cacheend = cachestart + length
1634 if cachestart >= 0 and cacheend <= l:
1625 if cachestart >= 0 and cacheend <= l:
1635 if cachestart == 0 and cacheend == l:
1626 if cachestart == 0 and cacheend == l:
1636 return d # avoid a copy
1627 return d # avoid a copy
1637 return util.buffer(d, cachestart, cacheend - cachestart)
1628 return util.buffer(d, cachestart, cacheend - cachestart)
1638
1629
1639 return self._readsegment(offset, length, df=df)
1630 return self._readsegment(offset, length, df=df)
1640
1631
1641 def _getsegmentforrevs(self, startrev, endrev, df=None):
1632 def _getsegmentforrevs(self, startrev, endrev, df=None):
1642 """Obtain a segment of raw data corresponding to a range of revisions.
1633 """Obtain a segment of raw data corresponding to a range of revisions.
1643
1634
1644 Accepts the start and end revisions and an optional already-open
1635 Accepts the start and end revisions and an optional already-open
1645 file handle to be used for reading. If the file handle is read, its
1636 file handle to be used for reading. If the file handle is read, its
1646 seek position will not be preserved.
1637 seek position will not be preserved.
1647
1638
1648 Requests for data may be satisfied by a cache.
1639 Requests for data may be satisfied by a cache.
1649
1640
1650 Returns a 2-tuple of (offset, data) for the requested range of
1641 Returns a 2-tuple of (offset, data) for the requested range of
1651 revisions. Offset is the integer offset from the beginning of the
1642 revisions. Offset is the integer offset from the beginning of the
1652 revlog and data is a str or buffer of the raw byte data.
1643 revlog and data is a str or buffer of the raw byte data.
1653
1644
1654 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1645 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1655 to determine where each revision's data begins and ends.
1646 to determine where each revision's data begins and ends.
1656 """
1647 """
1657 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1648 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1658 # (functions are expensive).
1649 # (functions are expensive).
1659 index = self.index
1650 index = self.index
1660 istart = index[startrev]
1651 istart = index[startrev]
1661 start = int(istart[0] >> 16)
1652 start = int(istart[0] >> 16)
1662 if startrev == endrev:
1653 if startrev == endrev:
1663 end = start + istart[1]
1654 end = start + istart[1]
1664 else:
1655 else:
1665 iend = index[endrev]
1656 iend = index[endrev]
1666 end = int(iend[0] >> 16) + iend[1]
1657 end = int(iend[0] >> 16) + iend[1]
1667
1658
1668 if self._inline:
1659 if self._inline:
1669 start += (startrev + 1) * self._io.size
1660 start += (startrev + 1) * self._io.size
1670 end += (endrev + 1) * self._io.size
1661 end += (endrev + 1) * self._io.size
1671 length = end - start
1662 length = end - start
1672
1663
1673 return start, self._getsegment(start, length, df=df)
1664 return start, self._getsegment(start, length, df=df)
1674
1665
1675 def _chunk(self, rev, df=None):
1666 def _chunk(self, rev, df=None):
1676 """Obtain a single decompressed chunk for a revision.
1667 """Obtain a single decompressed chunk for a revision.
1677
1668
1678 Accepts an integer revision and an optional already-open file handle
1669 Accepts an integer revision and an optional already-open file handle
1679 to be used for reading. If used, the seek position of the file will not
1670 to be used for reading. If used, the seek position of the file will not
1680 be preserved.
1671 be preserved.
1681
1672
1682 Returns a str holding uncompressed data for the requested revision.
1673 Returns a str holding uncompressed data for the requested revision.
1683 """
1674 """
1684 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1675 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1685
1676
1686 def _chunks(self, revs, df=None):
1677 def _chunks(self, revs, df=None):
1687 """Obtain decompressed chunks for the specified revisions.
1678 """Obtain decompressed chunks for the specified revisions.
1688
1679
1689 Accepts an iterable of numeric revisions that are assumed to be in
1680 Accepts an iterable of numeric revisions that are assumed to be in
1690 ascending order. Also accepts an optional already-open file handle
1681 ascending order. Also accepts an optional already-open file handle
1691 to be used for reading. If used, the seek position of the file will
1682 to be used for reading. If used, the seek position of the file will
1692 not be preserved.
1683 not be preserved.
1693
1684
1694 This function is similar to calling ``self._chunk()`` multiple times,
1685 This function is similar to calling ``self._chunk()`` multiple times,
1695 but is faster.
1686 but is faster.
1696
1687
1697 Returns a list with decompressed data for each requested revision.
1688 Returns a list with decompressed data for each requested revision.
1698 """
1689 """
1699 if not revs:
1690 if not revs:
1700 return []
1691 return []
1701 start = self.start
1692 start = self.start
1702 length = self.length
1693 length = self.length
1703 inline = self._inline
1694 inline = self._inline
1704 iosize = self._io.size
1695 iosize = self._io.size
1705 buffer = util.buffer
1696 buffer = util.buffer
1706
1697
1707 l = []
1698 l = []
1708 ladd = l.append
1699 ladd = l.append
1709
1700
1710 if not self._withsparseread:
1701 if not self._withsparseread:
1711 slicedchunks = (revs,)
1702 slicedchunks = (revs,)
1712 else:
1703 else:
1713 slicedchunks = _slicechunk(self, revs)
1704 slicedchunks = _slicechunk(self, revs)
1714
1705
1715 for revschunk in slicedchunks:
1706 for revschunk in slicedchunks:
1716 firstrev = revschunk[0]
1707 firstrev = revschunk[0]
1717 # Skip trailing revisions with empty diff
1708 # Skip trailing revisions with empty diff
1718 for lastrev in revschunk[::-1]:
1709 for lastrev in revschunk[::-1]:
1719 if length(lastrev) != 0:
1710 if length(lastrev) != 0:
1720 break
1711 break
1721
1712
1722 try:
1713 try:
1723 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1714 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1724 except OverflowError:
1715 except OverflowError:
1725 # issue4215 - we can't cache a run of chunks greater than
1716 # issue4215 - we can't cache a run of chunks greater than
1726 # 2G on Windows
1717 # 2G on Windows
1727 return [self._chunk(rev, df=df) for rev in revschunk]
1718 return [self._chunk(rev, df=df) for rev in revschunk]
1728
1719
1729 decomp = self.decompress
1720 decomp = self.decompress
1730 for rev in revschunk:
1721 for rev in revschunk:
1731 chunkstart = start(rev)
1722 chunkstart = start(rev)
1732 if inline:
1723 if inline:
1733 chunkstart += (rev + 1) * iosize
1724 chunkstart += (rev + 1) * iosize
1734 chunklength = length(rev)
1725 chunklength = length(rev)
1735 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1726 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1736
1727
1737 return l
1728 return l
1738
1729
1739 def _chunkclear(self):
1730 def _chunkclear(self):
1740 """Clear the raw chunk cache."""
1731 """Clear the raw chunk cache."""
1741 self._chunkcache = (0, '')
1732 self._chunkcache = (0, '')
1742
1733
1743 def deltaparent(self, rev):
1734 def deltaparent(self, rev):
1744 """return deltaparent of the given revision"""
1735 """return deltaparent of the given revision"""
1745 base = self.index[rev][3]
1736 base = self.index[rev][3]
1746 if base == rev:
1737 if base == rev:
1747 return nullrev
1738 return nullrev
1748 elif self._generaldelta:
1739 elif self._generaldelta:
1749 return base
1740 return base
1750 else:
1741 else:
1751 return rev - 1
1742 return rev - 1
1752
1743
1753 def revdiff(self, rev1, rev2):
1744 def revdiff(self, rev1, rev2):
1754 """return or calculate a delta between two revisions
1745 """return or calculate a delta between two revisions
1755
1746
1756 The delta calculated is in binary form and is intended to be written to
1747 The delta calculated is in binary form and is intended to be written to
1757 revlog data directly. So this function needs raw revision data.
1748 revlog data directly. So this function needs raw revision data.
1758 """
1749 """
1759 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1750 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 return bytes(self._chunk(rev2))
1751 return bytes(self._chunk(rev2))
1761
1752
1762 return mdiff.textdiff(self.revision(rev1, raw=True),
1753 return mdiff.textdiff(self.revision(rev1, raw=True),
1763 self.revision(rev2, raw=True))
1754 self.revision(rev2, raw=True))
1764
1755
1765 def revision(self, nodeorrev, _df=None, raw=False):
1756 def revision(self, nodeorrev, _df=None, raw=False):
1766 """return an uncompressed revision of a given node or revision
1757 """return an uncompressed revision of a given node or revision
1767 number.
1758 number.
1768
1759
1769 _df - an existing file handle to read from. (internal-only)
1760 _df - an existing file handle to read from. (internal-only)
1770 raw - an optional argument specifying if the revision data is to be
1761 raw - an optional argument specifying if the revision data is to be
1771 treated as raw data when applying flag transforms. 'raw' should be set
1762 treated as raw data when applying flag transforms. 'raw' should be set
1772 to True when generating changegroups or in debug commands.
1763 to True when generating changegroups or in debug commands.
1773 """
1764 """
1774 if isinstance(nodeorrev, int):
1765 if isinstance(nodeorrev, int):
1775 rev = nodeorrev
1766 rev = nodeorrev
1776 node = self.node(rev)
1767 node = self.node(rev)
1777 else:
1768 else:
1778 node = nodeorrev
1769 node = nodeorrev
1779 rev = None
1770 rev = None
1780
1771
1781 cachedrev = None
1772 cachedrev = None
1782 flags = None
1773 flags = None
1783 rawtext = None
1774 rawtext = None
1784 if node == nullid:
1775 if node == nullid:
1785 return ""
1776 return ""
1786 if self._cache:
1777 if self._cache:
1787 if self._cache[0] == node:
1778 if self._cache[0] == node:
1788 # _cache only stores rawtext
1779 # _cache only stores rawtext
1789 if raw:
1780 if raw:
1790 return self._cache[2]
1781 return self._cache[2]
1791 # duplicated, but good for perf
1782 # duplicated, but good for perf
1792 if rev is None:
1783 if rev is None:
1793 rev = self.rev(node)
1784 rev = self.rev(node)
1794 if flags is None:
1785 if flags is None:
1795 flags = self.flags(rev)
1786 flags = self.flags(rev)
1796 # no extra flags set, no flag processor runs, text = rawtext
1787 # no extra flags set, no flag processor runs, text = rawtext
1797 if flags == REVIDX_DEFAULT_FLAGS:
1788 if flags == REVIDX_DEFAULT_FLAGS:
1798 return self._cache[2]
1789 return self._cache[2]
1799 # rawtext is reusable. need to run flag processor
1790 # rawtext is reusable. need to run flag processor
1800 rawtext = self._cache[2]
1791 rawtext = self._cache[2]
1801
1792
1802 cachedrev = self._cache[1]
1793 cachedrev = self._cache[1]
1803
1794
1804 # look up what we need to read
1795 # look up what we need to read
1805 if rawtext is None:
1796 if rawtext is None:
1806 if rev is None:
1797 if rev is None:
1807 rev = self.rev(node)
1798 rev = self.rev(node)
1808
1799
1809 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1800 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1810 if stopped:
1801 if stopped:
1811 rawtext = self._cache[2]
1802 rawtext = self._cache[2]
1812
1803
1813 # drop cache to save memory
1804 # drop cache to save memory
1814 self._cache = None
1805 self._cache = None
1815
1806
1816 bins = self._chunks(chain, df=_df)
1807 bins = self._chunks(chain, df=_df)
1817 if rawtext is None:
1808 if rawtext is None:
1818 rawtext = bytes(bins[0])
1809 rawtext = bytes(bins[0])
1819 bins = bins[1:]
1810 bins = bins[1:]
1820
1811
1821 rawtext = mdiff.patches(rawtext, bins)
1812 rawtext = mdiff.patches(rawtext, bins)
1822 self._cache = (node, rev, rawtext)
1813 self._cache = (node, rev, rawtext)
1823
1814
1824 if flags is None:
1815 if flags is None:
1825 if rev is None:
1816 if rev is None:
1826 rev = self.rev(node)
1817 rev = self.rev(node)
1827 flags = self.flags(rev)
1818 flags = self.flags(rev)
1828
1819
1829 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1820 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1830 if validatehash:
1821 if validatehash:
1831 self.checkhash(text, node, rev=rev)
1822 self.checkhash(text, node, rev=rev)
1832
1823
1833 return text
1824 return text
1834
1825
1835 def hash(self, text, p1, p2):
1826 def hash(self, text, p1, p2):
1836 """Compute a node hash.
1827 """Compute a node hash.
1837
1828
1838 Available as a function so that subclasses can replace the hash
1829 Available as a function so that subclasses can replace the hash
1839 as needed.
1830 as needed.
1840 """
1831 """
1841 return hash(text, p1, p2)
1832 return hash(text, p1, p2)
1842
1833
1843 def _processflags(self, text, flags, operation, raw=False):
1834 def _processflags(self, text, flags, operation, raw=False):
1844 """Inspect revision data flags and applies transforms defined by
1835 """Inspect revision data flags and applies transforms defined by
1845 registered flag processors.
1836 registered flag processors.
1846
1837
1847 ``text`` - the revision data to process
1838 ``text`` - the revision data to process
1848 ``flags`` - the revision flags
1839 ``flags`` - the revision flags
1849 ``operation`` - the operation being performed (read or write)
1840 ``operation`` - the operation being performed (read or write)
1850 ``raw`` - an optional argument describing if the raw transform should be
1841 ``raw`` - an optional argument describing if the raw transform should be
1851 applied.
1842 applied.
1852
1843
1853 This method processes the flags in the order (or reverse order if
1844 This method processes the flags in the order (or reverse order if
1854 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1845 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1855 flag processors registered for present flags. The order of flags defined
1846 flag processors registered for present flags. The order of flags defined
1856 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1847 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1857
1848
1858 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1849 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1859 processed text and ``validatehash`` is a bool indicating whether the
1850 processed text and ``validatehash`` is a bool indicating whether the
1860 returned text should be checked for hash integrity.
1851 returned text should be checked for hash integrity.
1861
1852
1862 Note: If the ``raw`` argument is set, it has precedence over the
1853 Note: If the ``raw`` argument is set, it has precedence over the
1863 operation and will only update the value of ``validatehash``.
1854 operation and will only update the value of ``validatehash``.
1864 """
1855 """
1865 # fast path: no flag processors will run
1856 # fast path: no flag processors will run
1866 if flags == 0:
1857 if flags == 0:
1867 return text, True
1858 return text, True
1868 if not operation in ('read', 'write'):
1859 if not operation in ('read', 'write'):
1869 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1860 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1870 # Check all flags are known.
1861 # Check all flags are known.
1871 if flags & ~REVIDX_KNOWN_FLAGS:
1862 if flags & ~REVIDX_KNOWN_FLAGS:
1872 raise RevlogError(_("incompatible revision flag '%#x'") %
1863 raise RevlogError(_("incompatible revision flag '%#x'") %
1873 (flags & ~REVIDX_KNOWN_FLAGS))
1864 (flags & ~REVIDX_KNOWN_FLAGS))
1874 validatehash = True
1865 validatehash = True
1875 # Depending on the operation (read or write), the order might be
1866 # Depending on the operation (read or write), the order might be
1876 # reversed due to non-commutative transforms.
1867 # reversed due to non-commutative transforms.
1877 orderedflags = REVIDX_FLAGS_ORDER
1868 orderedflags = REVIDX_FLAGS_ORDER
1878 if operation == 'write':
1869 if operation == 'write':
1879 orderedflags = reversed(orderedflags)
1870 orderedflags = reversed(orderedflags)
1880
1871
1881 for flag in orderedflags:
1872 for flag in orderedflags:
1882 # If a flagprocessor has been registered for a known flag, apply the
1873 # If a flagprocessor has been registered for a known flag, apply the
1883 # related operation transform and update result tuple.
1874 # related operation transform and update result tuple.
1884 if flag & flags:
1875 if flag & flags:
1885 vhash = True
1876 vhash = True
1886
1877
1887 if flag not in _flagprocessors:
1878 if flag not in _flagprocessors:
1888 message = _("missing processor for flag '%#x'") % (flag)
1879 message = _("missing processor for flag '%#x'") % (flag)
1889 raise RevlogError(message)
1880 raise RevlogError(message)
1890
1881
1891 processor = _flagprocessors[flag]
1882 processor = _flagprocessors[flag]
1892 if processor is not None:
1883 if processor is not None:
1893 readtransform, writetransform, rawtransform = processor
1884 readtransform, writetransform, rawtransform = processor
1894
1885
1895 if raw:
1886 if raw:
1896 vhash = rawtransform(self, text)
1887 vhash = rawtransform(self, text)
1897 elif operation == 'read':
1888 elif operation == 'read':
1898 text, vhash = readtransform(self, text)
1889 text, vhash = readtransform(self, text)
1899 else: # write operation
1890 else: # write operation
1900 text, vhash = writetransform(self, text)
1891 text, vhash = writetransform(self, text)
1901 validatehash = validatehash and vhash
1892 validatehash = validatehash and vhash
1902
1893
1903 return text, validatehash
1894 return text, validatehash
1904
1895
1905 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1896 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1906 """Check node hash integrity.
1897 """Check node hash integrity.
1907
1898
1908 Available as a function so that subclasses can extend hash mismatch
1899 Available as a function so that subclasses can extend hash mismatch
1909 behaviors as needed.
1900 behaviors as needed.
1910 """
1901 """
1911 try:
1902 try:
1912 if p1 is None and p2 is None:
1903 if p1 is None and p2 is None:
1913 p1, p2 = self.parents(node)
1904 p1, p2 = self.parents(node)
1914 if node != self.hash(text, p1, p2):
1905 if node != self.hash(text, p1, p2):
1915 revornode = rev
1906 revornode = rev
1916 if revornode is None:
1907 if revornode is None:
1917 revornode = templatefilters.short(hex(node))
1908 revornode = templatefilters.short(hex(node))
1918 raise RevlogError(_("integrity check failed on %s:%s")
1909 raise RevlogError(_("integrity check failed on %s:%s")
1919 % (self.indexfile, pycompat.bytestr(revornode)))
1910 % (self.indexfile, pycompat.bytestr(revornode)))
1920 except RevlogError:
1911 except RevlogError:
1921 if self._censorable and _censoredtext(text):
1912 if self._censorable and _censoredtext(text):
1922 raise error.CensoredNodeError(self.indexfile, node, text)
1913 raise error.CensoredNodeError(self.indexfile, node, text)
1923 raise
1914 raise
1924
1915
1925 def _enforceinlinesize(self, tr, fp=None):
1916 def _enforceinlinesize(self, tr, fp=None):
1926 """Check if the revlog is too big for inline and convert if so.
1917 """Check if the revlog is too big for inline and convert if so.
1927
1918
1928 This should be called after revisions are added to the revlog. If the
1919 This should be called after revisions are added to the revlog. If the
1929 revlog has grown too large to be an inline revlog, it will convert it
1920 revlog has grown too large to be an inline revlog, it will convert it
1930 to use multiple index and data files.
1921 to use multiple index and data files.
1931 """
1922 """
1932 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1923 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1933 return
1924 return
1934
1925
1935 trinfo = tr.find(self.indexfile)
1926 trinfo = tr.find(self.indexfile)
1936 if trinfo is None:
1927 if trinfo is None:
1937 raise RevlogError(_("%s not found in the transaction")
1928 raise RevlogError(_("%s not found in the transaction")
1938 % self.indexfile)
1929 % self.indexfile)
1939
1930
1940 trindex = trinfo[2]
1931 trindex = trinfo[2]
1941 if trindex is not None:
1932 if trindex is not None:
1942 dataoff = self.start(trindex)
1933 dataoff = self.start(trindex)
1943 else:
1934 else:
1944 # revlog was stripped at start of transaction, use all leftover data
1935 # revlog was stripped at start of transaction, use all leftover data
1945 trindex = len(self) - 1
1936 trindex = len(self) - 1
1946 dataoff = self.end(-2)
1937 dataoff = self.end(-2)
1947
1938
1948 tr.add(self.datafile, dataoff)
1939 tr.add(self.datafile, dataoff)
1949
1940
1950 if fp:
1941 if fp:
1951 fp.flush()
1942 fp.flush()
1952 fp.close()
1943 fp.close()
1953
1944
1954 with self._datafp('w') as df:
1945 with self._datafp('w') as df:
1955 for r in self:
1946 for r in self:
1956 df.write(self._getsegmentforrevs(r, r)[1])
1947 df.write(self._getsegmentforrevs(r, r)[1])
1957
1948
1958 with self._indexfp('w') as fp:
1949 with self._indexfp('w') as fp:
1959 self.version &= ~FLAG_INLINE_DATA
1950 self.version &= ~FLAG_INLINE_DATA
1960 self._inline = False
1951 self._inline = False
1961 io = self._io
1952 io = self._io
1962 for i in self:
1953 for i in self:
1963 e = io.packentry(self.index[i], self.node, self.version, i)
1954 e = io.packentry(self.index[i], self.node, self.version, i)
1964 fp.write(e)
1955 fp.write(e)
1965
1956
1966 # the temp file replace the real index when we exit the context
1957 # the temp file replace the real index when we exit the context
1967 # manager
1958 # manager
1968
1959
1969 tr.replace(self.indexfile, trindex * self._io.size)
1960 tr.replace(self.indexfile, trindex * self._io.size)
1970 self._chunkclear()
1961 self._chunkclear()
1971
1962
1972 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1963 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1973 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1964 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1974 """add a revision to the log
1965 """add a revision to the log
1975
1966
1976 text - the revision data to add
1967 text - the revision data to add
1977 transaction - the transaction object used for rollback
1968 transaction - the transaction object used for rollback
1978 link - the linkrev data to add
1969 link - the linkrev data to add
1979 p1, p2 - the parent nodeids of the revision
1970 p1, p2 - the parent nodeids of the revision
1980 cachedelta - an optional precomputed delta
1971 cachedelta - an optional precomputed delta
1981 node - nodeid of revision; typically node is not specified, and it is
1972 node - nodeid of revision; typically node is not specified, and it is
1982 computed by default as hash(text, p1, p2), however subclasses might
1973 computed by default as hash(text, p1, p2), however subclasses might
1983 use different hashing method (and override checkhash() in such case)
1974 use different hashing method (and override checkhash() in such case)
1984 flags - the known flags to set on the revision
1975 flags - the known flags to set on the revision
1985 deltacomputer - an optional _deltacomputer instance shared between
1976 deltacomputer - an optional _deltacomputer instance shared between
1986 multiple calls
1977 multiple calls
1987 """
1978 """
1988 if link == nullrev:
1979 if link == nullrev:
1989 raise RevlogError(_("attempted to add linkrev -1 to %s")
1980 raise RevlogError(_("attempted to add linkrev -1 to %s")
1990 % self.indexfile)
1981 % self.indexfile)
1991
1982
1992 if flags:
1983 if flags:
1993 node = node or self.hash(text, p1, p2)
1984 node = node or self.hash(text, p1, p2)
1994
1985
1995 rawtext, validatehash = self._processflags(text, flags, 'write')
1986 rawtext, validatehash = self._processflags(text, flags, 'write')
1996
1987
1997 # If the flag processor modifies the revision data, ignore any provided
1988 # If the flag processor modifies the revision data, ignore any provided
1998 # cachedelta.
1989 # cachedelta.
1999 if rawtext != text:
1990 if rawtext != text:
2000 cachedelta = None
1991 cachedelta = None
2001
1992
2002 if len(rawtext) > _maxentrysize:
1993 if len(rawtext) > _maxentrysize:
2003 raise RevlogError(
1994 raise RevlogError(
2004 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1995 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2005 % (self.indexfile, len(rawtext)))
1996 % (self.indexfile, len(rawtext)))
2006
1997
2007 node = node or self.hash(rawtext, p1, p2)
1998 node = node or self.hash(rawtext, p1, p2)
2008 if node in self.nodemap:
1999 if node in self.nodemap:
2009 return node
2000 return node
2010
2001
2011 if validatehash:
2002 if validatehash:
2012 self.checkhash(rawtext, node, p1=p1, p2=p2)
2003 self.checkhash(rawtext, node, p1=p1, p2=p2)
2013
2004
2014 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2005 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2015 flags, cachedelta=cachedelta,
2006 flags, cachedelta=cachedelta,
2016 deltacomputer=deltacomputer)
2007 deltacomputer=deltacomputer)
2017
2008
2018 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2009 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2019 cachedelta=None, deltacomputer=None):
2010 cachedelta=None, deltacomputer=None):
2020 """add a raw revision with known flags, node and parents
2011 """add a raw revision with known flags, node and parents
2021 useful when reusing a revision not stored in this revlog (ex: received
2012 useful when reusing a revision not stored in this revlog (ex: received
2022 over wire, or read from an external bundle).
2013 over wire, or read from an external bundle).
2023 """
2014 """
2024 dfh = None
2015 dfh = None
2025 if not self._inline:
2016 if not self._inline:
2026 dfh = self._datafp("a+")
2017 dfh = self._datafp("a+")
2027 ifh = self._indexfp("a+")
2018 ifh = self._indexfp("a+")
2028 try:
2019 try:
2029 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2020 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2030 flags, cachedelta, ifh, dfh,
2021 flags, cachedelta, ifh, dfh,
2031 deltacomputer=deltacomputer)
2022 deltacomputer=deltacomputer)
2032 finally:
2023 finally:
2033 if dfh:
2024 if dfh:
2034 dfh.close()
2025 dfh.close()
2035 ifh.close()
2026 ifh.close()
2036
2027
2037 def compress(self, data):
2028 def compress(self, data):
2038 """Generate a possibly-compressed representation of data."""
2029 """Generate a possibly-compressed representation of data."""
2039 if not data:
2030 if not data:
2040 return '', data
2031 return '', data
2041
2032
2042 compressed = self._compressor.compress(data)
2033 compressed = self._compressor.compress(data)
2043
2034
2044 if compressed:
2035 if compressed:
2045 # The revlog compressor added the header in the returned data.
2036 # The revlog compressor added the header in the returned data.
2046 return '', compressed
2037 return '', compressed
2047
2038
2048 if data[0:1] == '\0':
2039 if data[0:1] == '\0':
2049 return '', data
2040 return '', data
2050 return 'u', data
2041 return 'u', data
2051
2042
2052 def decompress(self, data):
2043 def decompress(self, data):
2053 """Decompress a revlog chunk.
2044 """Decompress a revlog chunk.
2054
2045
2055 The chunk is expected to begin with a header identifying the
2046 The chunk is expected to begin with a header identifying the
2056 format type so it can be routed to an appropriate decompressor.
2047 format type so it can be routed to an appropriate decompressor.
2057 """
2048 """
2058 if not data:
2049 if not data:
2059 return data
2050 return data
2060
2051
2061 # Revlogs are read much more frequently than they are written and many
2052 # Revlogs are read much more frequently than they are written and many
2062 # chunks only take microseconds to decompress, so performance is
2053 # chunks only take microseconds to decompress, so performance is
2063 # important here.
2054 # important here.
2064 #
2055 #
2065 # We can make a few assumptions about revlogs:
2056 # We can make a few assumptions about revlogs:
2066 #
2057 #
2067 # 1) the majority of chunks will be compressed (as opposed to inline
2058 # 1) the majority of chunks will be compressed (as opposed to inline
2068 # raw data).
2059 # raw data).
2069 # 2) decompressing *any* data will likely by at least 10x slower than
2060 # 2) decompressing *any* data will likely by at least 10x slower than
2070 # returning raw inline data.
2061 # returning raw inline data.
2071 # 3) we want to prioritize common and officially supported compression
2062 # 3) we want to prioritize common and officially supported compression
2072 # engines
2063 # engines
2073 #
2064 #
2074 # It follows that we want to optimize for "decompress compressed data
2065 # It follows that we want to optimize for "decompress compressed data
2075 # when encoded with common and officially supported compression engines"
2066 # when encoded with common and officially supported compression engines"
2076 # case over "raw data" and "data encoded by less common or non-official
2067 # case over "raw data" and "data encoded by less common or non-official
2077 # compression engines." That is why we have the inline lookup first
2068 # compression engines." That is why we have the inline lookup first
2078 # followed by the compengines lookup.
2069 # followed by the compengines lookup.
2079 #
2070 #
2080 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2071 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2081 # compressed chunks. And this matters for changelog and manifest reads.
2072 # compressed chunks. And this matters for changelog and manifest reads.
2082 t = data[0:1]
2073 t = data[0:1]
2083
2074
2084 if t == 'x':
2075 if t == 'x':
2085 try:
2076 try:
2086 return _zlibdecompress(data)
2077 return _zlibdecompress(data)
2087 except zlib.error as e:
2078 except zlib.error as e:
2088 raise RevlogError(_('revlog decompress error: %s') %
2079 raise RevlogError(_('revlog decompress error: %s') %
2089 stringutil.forcebytestr(e))
2080 stringutil.forcebytestr(e))
2090 # '\0' is more common than 'u' so it goes first.
2081 # '\0' is more common than 'u' so it goes first.
2091 elif t == '\0':
2082 elif t == '\0':
2092 return data
2083 return data
2093 elif t == 'u':
2084 elif t == 'u':
2094 return util.buffer(data, 1)
2085 return util.buffer(data, 1)
2095
2086
2096 try:
2087 try:
2097 compressor = self._decompressors[t]
2088 compressor = self._decompressors[t]
2098 except KeyError:
2089 except KeyError:
2099 try:
2090 try:
2100 engine = util.compengines.forrevlogheader(t)
2091 engine = util.compengines.forrevlogheader(t)
2101 compressor = engine.revlogcompressor()
2092 compressor = engine.revlogcompressor()
2102 self._decompressors[t] = compressor
2093 self._decompressors[t] = compressor
2103 except KeyError:
2094 except KeyError:
2104 raise RevlogError(_('unknown compression type %r') % t)
2095 raise RevlogError(_('unknown compression type %r') % t)
2105
2096
2106 return compressor.decompress(data)
2097 return compressor.decompress(data)
2107
2098
2108 def _isgooddeltainfo(self, d, textlen):
2099 def _isgooddeltainfo(self, d, textlen):
2109 """Returns True if the given delta is good. Good means that it is within
2100 """Returns True if the given delta is good. Good means that it is within
2110 the disk span, disk size, and chain length bounds that we know to be
2101 the disk span, disk size, and chain length bounds that we know to be
2111 performant."""
2102 performant."""
2112 if d is None:
2103 if d is None:
2113 return False
2104 return False
2114
2105
2115 # - 'd.distance' is the distance from the base revision -- bounding it
2106 # - 'd.distance' is the distance from the base revision -- bounding it
2116 # limits the amount of I/O we need to do.
2107 # limits the amount of I/O we need to do.
2117 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2108 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2118 # need to apply -- bounding it limits the amount of CPU we consume.
2109 # need to apply -- bounding it limits the amount of CPU we consume.
2119
2110
2120 defaultmax = textlen * 4
2111 defaultmax = textlen * 4
2121 maxdist = self._maxdeltachainspan
2112 maxdist = self._maxdeltachainspan
2122 if not maxdist:
2113 if not maxdist:
2123 maxdist = d.distance # ensure the conditional pass
2114 maxdist = d.distance # ensure the conditional pass
2124 maxdist = max(maxdist, defaultmax)
2115 maxdist = max(maxdist, defaultmax)
2125 if (d.distance > maxdist or d.deltalen > textlen or
2116 if (d.distance > maxdist or d.deltalen > textlen or
2126 d.compresseddeltalen > textlen * 2 or
2117 d.compresseddeltalen > textlen * 2 or
2127 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2118 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2128 return False
2119 return False
2129
2120
2130 return True
2121 return True
2131
2122
2132 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2123 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2133 cachedelta, ifh, dfh, alwayscache=False,
2124 cachedelta, ifh, dfh, alwayscache=False,
2134 deltacomputer=None):
2125 deltacomputer=None):
2135 """internal function to add revisions to the log
2126 """internal function to add revisions to the log
2136
2127
2137 see addrevision for argument descriptions.
2128 see addrevision for argument descriptions.
2138
2129
2139 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2130 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2140
2131
2141 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2132 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2142 be used.
2133 be used.
2143
2134
2144 invariants:
2135 invariants:
2145 - rawtext is optional (can be None); if not set, cachedelta must be set.
2136 - rawtext is optional (can be None); if not set, cachedelta must be set.
2146 if both are set, they must correspond to each other.
2137 if both are set, they must correspond to each other.
2147 """
2138 """
2148 if node == nullid:
2139 if node == nullid:
2149 raise RevlogError(_("%s: attempt to add null revision") %
2140 raise RevlogError(_("%s: attempt to add null revision") %
2150 (self.indexfile))
2141 (self.indexfile))
2151 if node == wdirid or node in wdirfilenodeids:
2142 if node == wdirid or node in wdirfilenodeids:
2152 raise RevlogError(_("%s: attempt to add wdir revision") %
2143 raise RevlogError(_("%s: attempt to add wdir revision") %
2153 (self.indexfile))
2144 (self.indexfile))
2154
2145
2155 if self._inline:
2146 if self._inline:
2156 fh = ifh
2147 fh = ifh
2157 else:
2148 else:
2158 fh = dfh
2149 fh = dfh
2159
2150
2160 btext = [rawtext]
2151 btext = [rawtext]
2161
2152
2162 curr = len(self)
2153 curr = len(self)
2163 prev = curr - 1
2154 prev = curr - 1
2164 offset = self.end(prev)
2155 offset = self.end(prev)
2165 p1r, p2r = self.rev(p1), self.rev(p2)
2156 p1r, p2r = self.rev(p1), self.rev(p2)
2166
2157
2167 # full versions are inserted when the needed deltas
2158 # full versions are inserted when the needed deltas
2168 # become comparable to the uncompressed text
2159 # become comparable to the uncompressed text
2169 if rawtext is None:
2160 if rawtext is None:
2170 # need rawtext size, before changed by flag processors, which is
2161 # need rawtext size, before changed by flag processors, which is
2171 # the non-raw size. use revlog explicitly to avoid filelog's extra
2162 # the non-raw size. use revlog explicitly to avoid filelog's extra
2172 # logic that might remove metadata size.
2163 # logic that might remove metadata size.
2173 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2164 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2174 cachedelta[1])
2165 cachedelta[1])
2175 else:
2166 else:
2176 textlen = len(rawtext)
2167 textlen = len(rawtext)
2177
2168
2178 if deltacomputer is None:
2169 if deltacomputer is None:
2179 deltacomputer = _deltacomputer(self)
2170 deltacomputer = _deltacomputer(self)
2180
2171
2181 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2172 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2182
2173
2183 # no delta for flag processor revision (see "candelta" for why)
2174 # no delta for flag processor revision (see "candelta" for why)
2184 # not calling candelta since only one revision needs test, also to
2175 # not calling candelta since only one revision needs test, also to
2185 # avoid overhead fetching flags again.
2176 # avoid overhead fetching flags again.
2186 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2177 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2187 deltainfo = None
2178 deltainfo = None
2188 else:
2179 else:
2189 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2180 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2190
2181
2191 if deltainfo is not None:
2182 if deltainfo is not None:
2192 base = deltainfo.base
2183 base = deltainfo.base
2193 chainbase = deltainfo.chainbase
2184 chainbase = deltainfo.chainbase
2194 data = deltainfo.data
2185 data = deltainfo.data
2195 l = deltainfo.deltalen
2186 l = deltainfo.deltalen
2196 else:
2187 else:
2197 rawtext = deltacomputer.buildtext(revinfo, fh)
2188 rawtext = deltacomputer.buildtext(revinfo, fh)
2198 data = self.compress(rawtext)
2189 data = self.compress(rawtext)
2199 l = len(data[1]) + len(data[0])
2190 l = len(data[1]) + len(data[0])
2200 base = chainbase = curr
2191 base = chainbase = curr
2201
2192
2202 e = (offset_type(offset, flags), l, textlen,
2193 e = (offset_type(offset, flags), l, textlen,
2203 base, link, p1r, p2r, node)
2194 base, link, p1r, p2r, node)
2204 self.index.insert(-1, e)
2195 self.index.insert(-1, e)
2205 self.nodemap[node] = curr
2196 self.nodemap[node] = curr
2206
2197
2207 entry = self._io.packentry(e, self.node, self.version, curr)
2198 entry = self._io.packentry(e, self.node, self.version, curr)
2208 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2199 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2209
2200
2210 if alwayscache and rawtext is None:
2201 if alwayscache and rawtext is None:
2211 rawtext = deltacomputer._buildtext(revinfo, fh)
2202 rawtext = deltacomputer._buildtext(revinfo, fh)
2212
2203
2213 if type(rawtext) == bytes: # only accept immutable objects
2204 if type(rawtext) == bytes: # only accept immutable objects
2214 self._cache = (node, curr, rawtext)
2205 self._cache = (node, curr, rawtext)
2215 self._chainbasecache[curr] = chainbase
2206 self._chainbasecache[curr] = chainbase
2216 return node
2207 return node
2217
2208
2218 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2209 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2219 # Files opened in a+ mode have inconsistent behavior on various
2210 # Files opened in a+ mode have inconsistent behavior on various
2220 # platforms. Windows requires that a file positioning call be made
2211 # platforms. Windows requires that a file positioning call be made
2221 # when the file handle transitions between reads and writes. See
2212 # when the file handle transitions between reads and writes. See
2222 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2213 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2223 # platforms, Python or the platform itself can be buggy. Some versions
2214 # platforms, Python or the platform itself can be buggy. Some versions
2224 # of Solaris have been observed to not append at the end of the file
2215 # of Solaris have been observed to not append at the end of the file
2225 # if the file was seeked to before the end. See issue4943 for more.
2216 # if the file was seeked to before the end. See issue4943 for more.
2226 #
2217 #
2227 # We work around this issue by inserting a seek() before writing.
2218 # We work around this issue by inserting a seek() before writing.
2228 # Note: This is likely not necessary on Python 3.
2219 # Note: This is likely not necessary on Python 3.
2229 ifh.seek(0, os.SEEK_END)
2220 ifh.seek(0, os.SEEK_END)
2230 if dfh:
2221 if dfh:
2231 dfh.seek(0, os.SEEK_END)
2222 dfh.seek(0, os.SEEK_END)
2232
2223
2233 curr = len(self) - 1
2224 curr = len(self) - 1
2234 if not self._inline:
2225 if not self._inline:
2235 transaction.add(self.datafile, offset)
2226 transaction.add(self.datafile, offset)
2236 transaction.add(self.indexfile, curr * len(entry))
2227 transaction.add(self.indexfile, curr * len(entry))
2237 if data[0]:
2228 if data[0]:
2238 dfh.write(data[0])
2229 dfh.write(data[0])
2239 dfh.write(data[1])
2230 dfh.write(data[1])
2240 ifh.write(entry)
2231 ifh.write(entry)
2241 else:
2232 else:
2242 offset += curr * self._io.size
2233 offset += curr * self._io.size
2243 transaction.add(self.indexfile, offset, curr)
2234 transaction.add(self.indexfile, offset, curr)
2244 ifh.write(entry)
2235 ifh.write(entry)
2245 ifh.write(data[0])
2236 ifh.write(data[0])
2246 ifh.write(data[1])
2237 ifh.write(data[1])
2247 self._enforceinlinesize(transaction, ifh)
2238 self._enforceinlinesize(transaction, ifh)
2248
2239
2249 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2240 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2250 """
2241 """
2251 add a delta group
2242 add a delta group
2252
2243
2253 given a set of deltas, add them to the revision log. the
2244 given a set of deltas, add them to the revision log. the
2254 first delta is against its parent, which should be in our
2245 first delta is against its parent, which should be in our
2255 log, the rest are against the previous delta.
2246 log, the rest are against the previous delta.
2256
2247
2257 If ``addrevisioncb`` is defined, it will be called with arguments of
2248 If ``addrevisioncb`` is defined, it will be called with arguments of
2258 this revlog and the node that was added.
2249 this revlog and the node that was added.
2259 """
2250 """
2260
2251
2261 nodes = []
2252 nodes = []
2262
2253
2263 r = len(self)
2254 r = len(self)
2264 end = 0
2255 end = 0
2265 if r:
2256 if r:
2266 end = self.end(r - 1)
2257 end = self.end(r - 1)
2267 ifh = self._indexfp("a+")
2258 ifh = self._indexfp("a+")
2268 isize = r * self._io.size
2259 isize = r * self._io.size
2269 if self._inline:
2260 if self._inline:
2270 transaction.add(self.indexfile, end + isize, r)
2261 transaction.add(self.indexfile, end + isize, r)
2271 dfh = None
2262 dfh = None
2272 else:
2263 else:
2273 transaction.add(self.indexfile, isize, r)
2264 transaction.add(self.indexfile, isize, r)
2274 transaction.add(self.datafile, end)
2265 transaction.add(self.datafile, end)
2275 dfh = self._datafp("a+")
2266 dfh = self._datafp("a+")
2276 def flush():
2267 def flush():
2277 if dfh:
2268 if dfh:
2278 dfh.flush()
2269 dfh.flush()
2279 ifh.flush()
2270 ifh.flush()
2280 try:
2271 try:
2281 deltacomputer = _deltacomputer(self)
2272 deltacomputer = _deltacomputer(self)
2282 # loop through our set of deltas
2273 # loop through our set of deltas
2283 for data in deltas:
2274 for data in deltas:
2284 node, p1, p2, linknode, deltabase, delta, flags = data
2275 node, p1, p2, linknode, deltabase, delta, flags = data
2285 link = linkmapper(linknode)
2276 link = linkmapper(linknode)
2286 flags = flags or REVIDX_DEFAULT_FLAGS
2277 flags = flags or REVIDX_DEFAULT_FLAGS
2287
2278
2288 nodes.append(node)
2279 nodes.append(node)
2289
2280
2290 if node in self.nodemap:
2281 if node in self.nodemap:
2291 # this can happen if two branches make the same change
2282 # this can happen if two branches make the same change
2292 continue
2283 continue
2293
2284
2294 for p in (p1, p2):
2285 for p in (p1, p2):
2295 if p not in self.nodemap:
2286 if p not in self.nodemap:
2296 raise LookupError(p, self.indexfile,
2287 raise LookupError(p, self.indexfile,
2297 _('unknown parent'))
2288 _('unknown parent'))
2298
2289
2299 if deltabase not in self.nodemap:
2290 if deltabase not in self.nodemap:
2300 raise LookupError(deltabase, self.indexfile,
2291 raise LookupError(deltabase, self.indexfile,
2301 _('unknown delta base'))
2292 _('unknown delta base'))
2302
2293
2303 baserev = self.rev(deltabase)
2294 baserev = self.rev(deltabase)
2304
2295
2305 if baserev != nullrev and self.iscensored(baserev):
2296 if baserev != nullrev and self.iscensored(baserev):
2306 # if base is censored, delta must be full replacement in a
2297 # if base is censored, delta must be full replacement in a
2307 # single patch operation
2298 # single patch operation
2308 hlen = struct.calcsize(">lll")
2299 hlen = struct.calcsize(">lll")
2309 oldlen = self.rawsize(baserev)
2300 oldlen = self.rawsize(baserev)
2310 newlen = len(delta) - hlen
2301 newlen = len(delta) - hlen
2311 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2302 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2312 raise error.CensoredBaseError(self.indexfile,
2303 raise error.CensoredBaseError(self.indexfile,
2313 self.node(baserev))
2304 self.node(baserev))
2314
2305
2315 if not flags and self._peek_iscensored(baserev, delta, flush):
2306 if not flags and self._peek_iscensored(baserev, delta, flush):
2316 flags |= REVIDX_ISCENSORED
2307 flags |= REVIDX_ISCENSORED
2317
2308
2318 # We assume consumers of addrevisioncb will want to retrieve
2309 # We assume consumers of addrevisioncb will want to retrieve
2319 # the added revision, which will require a call to
2310 # the added revision, which will require a call to
2320 # revision(). revision() will fast path if there is a cache
2311 # revision(). revision() will fast path if there is a cache
2321 # hit. So, we tell _addrevision() to always cache in this case.
2312 # hit. So, we tell _addrevision() to always cache in this case.
2322 # We're only using addgroup() in the context of changegroup
2313 # We're only using addgroup() in the context of changegroup
2323 # generation so the revision data can always be handled as raw
2314 # generation so the revision data can always be handled as raw
2324 # by the flagprocessor.
2315 # by the flagprocessor.
2325 self._addrevision(node, None, transaction, link,
2316 self._addrevision(node, None, transaction, link,
2326 p1, p2, flags, (baserev, delta),
2317 p1, p2, flags, (baserev, delta),
2327 ifh, dfh,
2318 ifh, dfh,
2328 alwayscache=bool(addrevisioncb),
2319 alwayscache=bool(addrevisioncb),
2329 deltacomputer=deltacomputer)
2320 deltacomputer=deltacomputer)
2330
2321
2331 if addrevisioncb:
2322 if addrevisioncb:
2332 addrevisioncb(self, node)
2323 addrevisioncb(self, node)
2333
2324
2334 if not dfh and not self._inline:
2325 if not dfh and not self._inline:
2335 # addrevision switched from inline to conventional
2326 # addrevision switched from inline to conventional
2336 # reopen the index
2327 # reopen the index
2337 ifh.close()
2328 ifh.close()
2338 dfh = self._datafp("a+")
2329 dfh = self._datafp("a+")
2339 ifh = self._indexfp("a+")
2330 ifh = self._indexfp("a+")
2340 finally:
2331 finally:
2341 if dfh:
2332 if dfh:
2342 dfh.close()
2333 dfh.close()
2343 ifh.close()
2334 ifh.close()
2344
2335
2345 return nodes
2336 return nodes
2346
2337
2347 def iscensored(self, rev):
2338 def iscensored(self, rev):
2348 """Check if a file revision is censored."""
2339 """Check if a file revision is censored."""
2349 if not self._censorable:
2340 if not self._censorable:
2350 return False
2341 return False
2351
2342
2352 return self.flags(rev) & REVIDX_ISCENSORED
2343 return self.flags(rev) & REVIDX_ISCENSORED
2353
2344
2354 def _peek_iscensored(self, baserev, delta, flush):
2345 def _peek_iscensored(self, baserev, delta, flush):
2355 """Quickly check if a delta produces a censored revision."""
2346 """Quickly check if a delta produces a censored revision."""
2356 if not self._censorable:
2347 if not self._censorable:
2357 return False
2348 return False
2358
2349
2359 # Fragile heuristic: unless new file meta keys are added alphabetically
2350 # Fragile heuristic: unless new file meta keys are added alphabetically
2360 # preceding "censored", all censored revisions are prefixed by
2351 # preceding "censored", all censored revisions are prefixed by
2361 # "\1\ncensored:". A delta producing such a censored revision must be a
2352 # "\1\ncensored:". A delta producing such a censored revision must be a
2362 # full-replacement delta, so we inspect the first and only patch in the
2353 # full-replacement delta, so we inspect the first and only patch in the
2363 # delta for this prefix.
2354 # delta for this prefix.
2364 hlen = struct.calcsize(">lll")
2355 hlen = struct.calcsize(">lll")
2365 if len(delta) <= hlen:
2356 if len(delta) <= hlen:
2366 return False
2357 return False
2367
2358
2368 oldlen = self.rawsize(baserev)
2359 oldlen = self.rawsize(baserev)
2369 newlen = len(delta) - hlen
2360 newlen = len(delta) - hlen
2370 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2361 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2371 return False
2362 return False
2372
2363
2373 add = "\1\ncensored:"
2364 add = "\1\ncensored:"
2374 addlen = len(add)
2365 addlen = len(add)
2375 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2366 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2376
2367
2377 def getstrippoint(self, minlink):
2368 def getstrippoint(self, minlink):
2378 """find the minimum rev that must be stripped to strip the linkrev
2369 """find the minimum rev that must be stripped to strip the linkrev
2379
2370
2380 Returns a tuple containing the minimum rev and a set of all revs that
2371 Returns a tuple containing the minimum rev and a set of all revs that
2381 have linkrevs that will be broken by this strip.
2372 have linkrevs that will be broken by this strip.
2382 """
2373 """
2383 brokenrevs = set()
2374 brokenrevs = set()
2384 strippoint = len(self)
2375 strippoint = len(self)
2385
2376
2386 heads = {}
2377 heads = {}
2387 futurelargelinkrevs = set()
2378 futurelargelinkrevs = set()
2388 for head in self.headrevs():
2379 for head in self.headrevs():
2389 headlinkrev = self.linkrev(head)
2380 headlinkrev = self.linkrev(head)
2390 heads[head] = headlinkrev
2381 heads[head] = headlinkrev
2391 if headlinkrev >= minlink:
2382 if headlinkrev >= minlink:
2392 futurelargelinkrevs.add(headlinkrev)
2383 futurelargelinkrevs.add(headlinkrev)
2393
2384
2394 # This algorithm involves walking down the rev graph, starting at the
2385 # This algorithm involves walking down the rev graph, starting at the
2395 # heads. Since the revs are topologically sorted according to linkrev,
2386 # heads. Since the revs are topologically sorted according to linkrev,
2396 # once all head linkrevs are below the minlink, we know there are
2387 # once all head linkrevs are below the minlink, we know there are
2397 # no more revs that could have a linkrev greater than minlink.
2388 # no more revs that could have a linkrev greater than minlink.
2398 # So we can stop walking.
2389 # So we can stop walking.
2399 while futurelargelinkrevs:
2390 while futurelargelinkrevs:
2400 strippoint -= 1
2391 strippoint -= 1
2401 linkrev = heads.pop(strippoint)
2392 linkrev = heads.pop(strippoint)
2402
2393
2403 if linkrev < minlink:
2394 if linkrev < minlink:
2404 brokenrevs.add(strippoint)
2395 brokenrevs.add(strippoint)
2405 else:
2396 else:
2406 futurelargelinkrevs.remove(linkrev)
2397 futurelargelinkrevs.remove(linkrev)
2407
2398
2408 for p in self.parentrevs(strippoint):
2399 for p in self.parentrevs(strippoint):
2409 if p != nullrev:
2400 if p != nullrev:
2410 plinkrev = self.linkrev(p)
2401 plinkrev = self.linkrev(p)
2411 heads[p] = plinkrev
2402 heads[p] = plinkrev
2412 if plinkrev >= minlink:
2403 if plinkrev >= minlink:
2413 futurelargelinkrevs.add(plinkrev)
2404 futurelargelinkrevs.add(plinkrev)
2414
2405
2415 return strippoint, brokenrevs
2406 return strippoint, brokenrevs
2416
2407
2417 def strip(self, minlink, transaction):
2408 def strip(self, minlink, transaction):
2418 """truncate the revlog on the first revision with a linkrev >= minlink
2409 """truncate the revlog on the first revision with a linkrev >= minlink
2419
2410
2420 This function is called when we're stripping revision minlink and
2411 This function is called when we're stripping revision minlink and
2421 its descendants from the repository.
2412 its descendants from the repository.
2422
2413
2423 We have to remove all revisions with linkrev >= minlink, because
2414 We have to remove all revisions with linkrev >= minlink, because
2424 the equivalent changelog revisions will be renumbered after the
2415 the equivalent changelog revisions will be renumbered after the
2425 strip.
2416 strip.
2426
2417
2427 So we truncate the revlog on the first of these revisions, and
2418 So we truncate the revlog on the first of these revisions, and
2428 trust that the caller has saved the revisions that shouldn't be
2419 trust that the caller has saved the revisions that shouldn't be
2429 removed and that it'll re-add them after this truncation.
2420 removed and that it'll re-add them after this truncation.
2430 """
2421 """
2431 if len(self) == 0:
2422 if len(self) == 0:
2432 return
2423 return
2433
2424
2434 rev, _ = self.getstrippoint(minlink)
2425 rev, _ = self.getstrippoint(minlink)
2435 if rev == len(self):
2426 if rev == len(self):
2436 return
2427 return
2437
2428
2438 # first truncate the files on disk
2429 # first truncate the files on disk
2439 end = self.start(rev)
2430 end = self.start(rev)
2440 if not self._inline:
2431 if not self._inline:
2441 transaction.add(self.datafile, end)
2432 transaction.add(self.datafile, end)
2442 end = rev * self._io.size
2433 end = rev * self._io.size
2443 else:
2434 else:
2444 end += rev * self._io.size
2435 end += rev * self._io.size
2445
2436
2446 transaction.add(self.indexfile, end)
2437 transaction.add(self.indexfile, end)
2447
2438
2448 # then reset internal state in memory to forget those revisions
2439 # then reset internal state in memory to forget those revisions
2449 self._cache = None
2440 self._cache = None
2450 self._chaininfocache = {}
2441 self._chaininfocache = {}
2451 self._chunkclear()
2442 self._chunkclear()
2452 for x in xrange(rev, len(self)):
2443 for x in xrange(rev, len(self)):
2453 del self.nodemap[self.node(x)]
2444 del self.nodemap[self.node(x)]
2454
2445
2455 del self.index[rev:-1]
2446 del self.index[rev:-1]
2456 self._nodepos = None
2447 self._nodepos = None
2457
2448
2458 def checksize(self):
2449 def checksize(self):
2459 expected = 0
2450 expected = 0
2460 if len(self):
2451 if len(self):
2461 expected = max(0, self.end(len(self) - 1))
2452 expected = max(0, self.end(len(self) - 1))
2462
2453
2463 try:
2454 try:
2464 with self._datafp() as f:
2455 with self._datafp() as f:
2465 f.seek(0, 2)
2456 f.seek(0, 2)
2466 actual = f.tell()
2457 actual = f.tell()
2467 dd = actual - expected
2458 dd = actual - expected
2468 except IOError as inst:
2459 except IOError as inst:
2469 if inst.errno != errno.ENOENT:
2460 if inst.errno != errno.ENOENT:
2470 raise
2461 raise
2471 dd = 0
2462 dd = 0
2472
2463
2473 try:
2464 try:
2474 f = self.opener(self.indexfile)
2465 f = self.opener(self.indexfile)
2475 f.seek(0, 2)
2466 f.seek(0, 2)
2476 actual = f.tell()
2467 actual = f.tell()
2477 f.close()
2468 f.close()
2478 s = self._io.size
2469 s = self._io.size
2479 i = max(0, actual // s)
2470 i = max(0, actual // s)
2480 di = actual - (i * s)
2471 di = actual - (i * s)
2481 if self._inline:
2472 if self._inline:
2482 databytes = 0
2473 databytes = 0
2483 for r in self:
2474 for r in self:
2484 databytes += max(0, self.length(r))
2475 databytes += max(0, self.length(r))
2485 dd = 0
2476 dd = 0
2486 di = actual - len(self) * s - databytes
2477 di = actual - len(self) * s - databytes
2487 except IOError as inst:
2478 except IOError as inst:
2488 if inst.errno != errno.ENOENT:
2479 if inst.errno != errno.ENOENT:
2489 raise
2480 raise
2490 di = 0
2481 di = 0
2491
2482
2492 return (dd, di)
2483 return (dd, di)
2493
2484
2494 def files(self):
2485 def files(self):
2495 res = [self.indexfile]
2486 res = [self.indexfile]
2496 if not self._inline:
2487 if not self._inline:
2497 res.append(self.datafile)
2488 res.append(self.datafile)
2498 return res
2489 return res
2499
2490
2500 DELTAREUSEALWAYS = 'always'
2491 DELTAREUSEALWAYS = 'always'
2501 DELTAREUSESAMEREVS = 'samerevs'
2492 DELTAREUSESAMEREVS = 'samerevs'
2502 DELTAREUSENEVER = 'never'
2493 DELTAREUSENEVER = 'never'
2503
2494
2504 DELTAREUSEFULLADD = 'fulladd'
2495 DELTAREUSEFULLADD = 'fulladd'
2505
2496
2506 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2497 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2507
2498
2508 def clone(self, tr, destrevlog, addrevisioncb=None,
2499 def clone(self, tr, destrevlog, addrevisioncb=None,
2509 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2500 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2510 """Copy this revlog to another, possibly with format changes.
2501 """Copy this revlog to another, possibly with format changes.
2511
2502
2512 The destination revlog will contain the same revisions and nodes.
2503 The destination revlog will contain the same revisions and nodes.
2513 However, it may not be bit-for-bit identical due to e.g. delta encoding
2504 However, it may not be bit-for-bit identical due to e.g. delta encoding
2514 differences.
2505 differences.
2515
2506
2516 The ``deltareuse`` argument control how deltas from the existing revlog
2507 The ``deltareuse`` argument control how deltas from the existing revlog
2517 are preserved in the destination revlog. The argument can have the
2508 are preserved in the destination revlog. The argument can have the
2518 following values:
2509 following values:
2519
2510
2520 DELTAREUSEALWAYS
2511 DELTAREUSEALWAYS
2521 Deltas will always be reused (if possible), even if the destination
2512 Deltas will always be reused (if possible), even if the destination
2522 revlog would not select the same revisions for the delta. This is the
2513 revlog would not select the same revisions for the delta. This is the
2523 fastest mode of operation.
2514 fastest mode of operation.
2524 DELTAREUSESAMEREVS
2515 DELTAREUSESAMEREVS
2525 Deltas will be reused if the destination revlog would pick the same
2516 Deltas will be reused if the destination revlog would pick the same
2526 revisions for the delta. This mode strikes a balance between speed
2517 revisions for the delta. This mode strikes a balance between speed
2527 and optimization.
2518 and optimization.
2528 DELTAREUSENEVER
2519 DELTAREUSENEVER
2529 Deltas will never be reused. This is the slowest mode of execution.
2520 Deltas will never be reused. This is the slowest mode of execution.
2530 This mode can be used to recompute deltas (e.g. if the diff/delta
2521 This mode can be used to recompute deltas (e.g. if the diff/delta
2531 algorithm changes).
2522 algorithm changes).
2532
2523
2533 Delta computation can be slow, so the choice of delta reuse policy can
2524 Delta computation can be slow, so the choice of delta reuse policy can
2534 significantly affect run time.
2525 significantly affect run time.
2535
2526
2536 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2527 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2537 two extremes. Deltas will be reused if they are appropriate. But if the
2528 two extremes. Deltas will be reused if they are appropriate. But if the
2538 delta could choose a better revision, it will do so. This means if you
2529 delta could choose a better revision, it will do so. This means if you
2539 are converting a non-generaldelta revlog to a generaldelta revlog,
2530 are converting a non-generaldelta revlog to a generaldelta revlog,
2540 deltas will be recomputed if the delta's parent isn't a parent of the
2531 deltas will be recomputed if the delta's parent isn't a parent of the
2541 revision.
2532 revision.
2542
2533
2543 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2534 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2544 controls whether to compute deltas against both parents for merges.
2535 controls whether to compute deltas against both parents for merges.
2545 By default, the current default is used.
2536 By default, the current default is used.
2546 """
2537 """
2547 if deltareuse not in self.DELTAREUSEALL:
2538 if deltareuse not in self.DELTAREUSEALL:
2548 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2539 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2549
2540
2550 if len(destrevlog):
2541 if len(destrevlog):
2551 raise ValueError(_('destination revlog is not empty'))
2542 raise ValueError(_('destination revlog is not empty'))
2552
2543
2553 if getattr(self, 'filteredrevs', None):
2544 if getattr(self, 'filteredrevs', None):
2554 raise ValueError(_('source revlog has filtered revisions'))
2545 raise ValueError(_('source revlog has filtered revisions'))
2555 if getattr(destrevlog, 'filteredrevs', None):
2546 if getattr(destrevlog, 'filteredrevs', None):
2556 raise ValueError(_('destination revlog has filtered revisions'))
2547 raise ValueError(_('destination revlog has filtered revisions'))
2557
2548
2558 # lazydeltabase controls whether to reuse a cached delta, if possible.
2549 # lazydeltabase controls whether to reuse a cached delta, if possible.
2559 oldlazydeltabase = destrevlog._lazydeltabase
2550 oldlazydeltabase = destrevlog._lazydeltabase
2560 oldamd = destrevlog._aggressivemergedeltas
2551 oldamd = destrevlog._aggressivemergedeltas
2561
2552
2562 try:
2553 try:
2563 if deltareuse == self.DELTAREUSEALWAYS:
2554 if deltareuse == self.DELTAREUSEALWAYS:
2564 destrevlog._lazydeltabase = True
2555 destrevlog._lazydeltabase = True
2565 elif deltareuse == self.DELTAREUSESAMEREVS:
2556 elif deltareuse == self.DELTAREUSESAMEREVS:
2566 destrevlog._lazydeltabase = False
2557 destrevlog._lazydeltabase = False
2567
2558
2568 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2559 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2569
2560
2570 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2561 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2571 self.DELTAREUSESAMEREVS)
2562 self.DELTAREUSESAMEREVS)
2572
2563
2573 deltacomputer = _deltacomputer(destrevlog)
2564 deltacomputer = _deltacomputer(destrevlog)
2574 index = self.index
2565 index = self.index
2575 for rev in self:
2566 for rev in self:
2576 entry = index[rev]
2567 entry = index[rev]
2577
2568
2578 # Some classes override linkrev to take filtered revs into
2569 # Some classes override linkrev to take filtered revs into
2579 # account. Use raw entry from index.
2570 # account. Use raw entry from index.
2580 flags = entry[0] & 0xffff
2571 flags = entry[0] & 0xffff
2581 linkrev = entry[4]
2572 linkrev = entry[4]
2582 p1 = index[entry[5]][7]
2573 p1 = index[entry[5]][7]
2583 p2 = index[entry[6]][7]
2574 p2 = index[entry[6]][7]
2584 node = entry[7]
2575 node = entry[7]
2585
2576
2586 # (Possibly) reuse the delta from the revlog if allowed and
2577 # (Possibly) reuse the delta from the revlog if allowed and
2587 # the revlog chunk is a delta.
2578 # the revlog chunk is a delta.
2588 cachedelta = None
2579 cachedelta = None
2589 rawtext = None
2580 rawtext = None
2590 if populatecachedelta:
2581 if populatecachedelta:
2591 dp = self.deltaparent(rev)
2582 dp = self.deltaparent(rev)
2592 if dp != nullrev:
2583 if dp != nullrev:
2593 cachedelta = (dp, bytes(self._chunk(rev)))
2584 cachedelta = (dp, bytes(self._chunk(rev)))
2594
2585
2595 if not cachedelta:
2586 if not cachedelta:
2596 rawtext = self.revision(rev, raw=True)
2587 rawtext = self.revision(rev, raw=True)
2597
2588
2598
2589
2599 if deltareuse == self.DELTAREUSEFULLADD:
2590 if deltareuse == self.DELTAREUSEFULLADD:
2600 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2591 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2601 cachedelta=cachedelta,
2592 cachedelta=cachedelta,
2602 node=node, flags=flags,
2593 node=node, flags=flags,
2603 deltacomputer=deltacomputer)
2594 deltacomputer=deltacomputer)
2604 else:
2595 else:
2605 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2596 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2606 checkambig=False)
2597 checkambig=False)
2607 dfh = None
2598 dfh = None
2608 if not destrevlog._inline:
2599 if not destrevlog._inline:
2609 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2600 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2610 try:
2601 try:
2611 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2602 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2612 p2, flags, cachedelta, ifh, dfh,
2603 p2, flags, cachedelta, ifh, dfh,
2613 deltacomputer=deltacomputer)
2604 deltacomputer=deltacomputer)
2614 finally:
2605 finally:
2615 if dfh:
2606 if dfh:
2616 dfh.close()
2607 dfh.close()
2617 ifh.close()
2608 ifh.close()
2618
2609
2619 if addrevisioncb:
2610 if addrevisioncb:
2620 addrevisioncb(self, rev, node)
2611 addrevisioncb(self, rev, node)
2621 finally:
2612 finally:
2622 destrevlog._lazydeltabase = oldlazydeltabase
2613 destrevlog._lazydeltabase = oldlazydeltabase
2623 destrevlog._aggressivemergedeltas = oldamd
2614 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now