##// END OF EJS Templates
revlog: correct type in check to verify rawtext is immutable...
Augie Fackler -
r35863:be923ce4 default
parent child Browse files
Show More
@@ -1,2491 +1,2491
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
80
81 # max size of revlog with inline data
81 # max size of revlog with inline data
82 _maxinline = 131072
82 _maxinline = 131072
83 _chunksize = 1048576
83 _chunksize = 1048576
84
84
85 RevlogError = error.RevlogError
85 RevlogError = error.RevlogError
86 LookupError = error.LookupError
86 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
87 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
88 ProgrammingError = error.ProgrammingError
89
89
90 # Store flag processors (cf. 'addflagprocessor()' to register)
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
91 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
92 REVIDX_ISCENSORED: None,
93 }
93 }
94
94
95 def addflagprocessor(flag, processor):
95 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
96 """Register a flag processor on a revision data flag.
97
97
98 Invariant:
98 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
100 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
102 following signatures:
103 - (read) f(self, rawtext) -> text, bool
103 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
105 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
107 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
108 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
109 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
110 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
111 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
112 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
113 different booleans. And "raw" returns a same boolean as "write".
114
114
115 Note: The 'raw' transform is used for changegroup generation and in some
115 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
116 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
117 contents can be used for hash integrity checks.
118 """
118 """
119 if not flag & REVIDX_KNOWN_FLAGS:
119 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
122 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
125 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
127 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
128 _flagprocessors[flag] = processor
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 _nullhash = hashlib.sha1(nullid)
141 _nullhash = hashlib.sha1(nullid)
142
142
143 def hash(text, p1, p2):
143 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
144 """generate a hash from the given text and its parent hashes
145
145
146 This hash combines both the current file contents and its history
146 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
147 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
148 content in the revision graph.
149 """
149 """
150 # As of now, if one of the parent node is null, p2 is null
150 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
151 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
152 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
153 s = _nullhash.copy()
154 s.update(p1)
154 s.update(p1)
155 else:
155 else:
156 # none of the parent nodes are nullid
156 # none of the parent nodes are nullid
157 if p1 < p2:
157 if p1 < p2:
158 a = p1
158 a = p1
159 b = p2
159 b = p2
160 else:
160 else:
161 a = p2
161 a = p2
162 b = p1
162 b = p1
163 s = hashlib.sha1(a)
163 s = hashlib.sha1(a)
164 s.update(b)
164 s.update(b)
165 s.update(text)
165 s.update(text)
166 return s.digest()
166 return s.digest()
167
167
168 def _trimchunk(revlog, revs, startidx, endidx=None):
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
169 """returns revs[startidx:endidx] without empty trailing revs
170 """
170 """
171 length = revlog.length
171 length = revlog.length
172
172
173 if endidx is None:
173 if endidx is None:
174 endidx = len(revs)
174 endidx = len(revs)
175
175
176 # Trim empty revs at the end, but never the very first revision of a chain
176 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
178 endidx -= 1
179
179
180 return revs[startidx:endidx]
180 return revs[startidx:endidx]
181
181
182 def _slicechunk(revlog, revs):
182 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184
184
185 ``revs`` is sliced into groups that should be read in one time.
185 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
186 Assume that revs are sorted.
187 """
187 """
188 start = revlog.start
188 start = revlog.start
189 length = revlog.length
189 length = revlog.length
190
190
191 if len(revs) <= 1:
191 if len(revs) <= 1:
192 yield revs
192 yield revs
193 return
193 return
194
194
195 startbyte = start(revs[0])
195 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
197 readdata = deltachainspan = endbyte - startbyte
198
198
199 chainpayload = sum(length(r) for r in revs)
199 chainpayload = sum(length(r) for r in revs)
200
200
201 if deltachainspan:
201 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
202 density = chainpayload / float(deltachainspan)
203 else:
203 else:
204 density = 1.0
204 density = 1.0
205
205
206 # Store the gaps in a heap to have them sorted by decreasing size
206 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
207 gapsheap = []
208 heapq.heapify(gapsheap)
208 heapq.heapify(gapsheap)
209 prevend = None
209 prevend = None
210 for i, rev in enumerate(revs):
210 for i, rev in enumerate(revs):
211 revstart = start(rev)
211 revstart = start(rev)
212 revlen = length(rev)
212 revlen = length(rev)
213
213
214 # Skip empty revisions to form larger holes
214 # Skip empty revisions to form larger holes
215 if revlen == 0:
215 if revlen == 0:
216 continue
216 continue
217
217
218 if prevend is not None:
218 if prevend is not None:
219 gapsize = revstart - prevend
219 gapsize = revstart - prevend
220 # only consider holes that are large enough
220 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
221 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
222 heapq.heappush(gapsheap, (-gapsize, i))
223
223
224 prevend = revstart + revlen
224 prevend = revstart + revlen
225
225
226 # Collect the indices of the largest holes until the density is acceptable
226 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
227 indicesheap = []
228 heapq.heapify(indicesheap)
228 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
229 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
231
232 heapq.heappush(indicesheap, gapidx)
232 heapq.heappush(indicesheap, gapidx)
233
233
234 # the gap sizes are stored as negatives to be sorted decreasingly
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
235 # by the heap
236 readdata -= (-oppgapsize)
236 readdata -= (-oppgapsize)
237 if readdata > 0:
237 if readdata > 0:
238 density = chainpayload / float(readdata)
238 density = chainpayload / float(readdata)
239 else:
239 else:
240 density = 1.0
240 density = 1.0
241
241
242 # Cut the revs at collected indices
242 # Cut the revs at collected indices
243 previdx = 0
243 previdx = 0
244 while indicesheap:
244 while indicesheap:
245 idx = heapq.heappop(indicesheap)
245 idx = heapq.heappop(indicesheap)
246
246
247 chunk = _trimchunk(revlog, revs, previdx, idx)
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
248 if chunk:
249 yield chunk
249 yield chunk
250
250
251 previdx = idx
251 previdx = idx
252
252
253 chunk = _trimchunk(revlog, revs, previdx)
253 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
254 if chunk:
255 yield chunk
255 yield chunk
256
256
257 @attr.s(slots=True, frozen=True)
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
258 class _deltainfo(object):
259 distance = attr.ib()
259 distance = attr.ib()
260 deltalen = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
265 compresseddeltalen = attr.ib()
266
266
267 class _deltacomputer(object):
267 class _deltacomputer(object):
268 def __init__(self, revlog):
268 def __init__(self, revlog):
269 self.revlog = revlog
269 self.revlog = revlog
270
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
272 """
273 Provides revisions that present an interest to be diffed against,
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
274 grouped by level of easiness.
275 """
275 """
276 revlog = self.revlog
276 revlog = self.revlog
277 curr = len(revlog)
277 curr = len(revlog)
278 prev = curr - 1
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
280
281 # should we try to build a delta?
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
283 tested = set()
284 # This condition is true most of the time when processing
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
292 tested.add(cachedelta[0])
293
293
294 if revlog._generaldelta:
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
301 parents = [max(parents)]
302 tested.update(parents)
302 tested.update(parents)
303 yield parents
303 yield parents
304
304
305 if prev not in tested:
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
307 # fulltext.
308 yield (prev,)
308 yield (prev,)
309
309
310 def buildtext(self, revinfo, fh):
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
311 """Builds a fulltext version of a revision
312
312
313 revinfo: _revisioninfo instance that contains all needed info
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
315 depending on whether it is inlined or not
316 """
316 """
317 btext = revinfo.btext
317 btext = revinfo.btext
318 if btext[0] is not None:
318 if btext[0] is not None:
319 return btext[0]
319 return btext[0]
320
320
321 revlog = self.revlog
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
323 flags = revinfo.flags
324 node = revinfo.node
324 node = revinfo.node
325
325
326 baserev = cachedelta[0]
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
334 btext[0] = delta[hlen:]
335 else:
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
337 btext[0] = mdiff.patch(basetext, delta)
338
338
339 try:
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
341 btext[0], validatehash = res
342 if validatehash:
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
348 if not flags & REVIDX_ISCENSORED:
349 raise
349 raise
350 return btext[0]
350 return btext[0]
351
351
352 def _builddeltadiff(self, base, revinfo, fh):
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
359 delta = header + t
360 else:
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
362 delta = mdiff.textdiff(ptext, t)
363
363
364 return delta
364 return delta
365
365
366 def _builddeltainfo(self, revinfo, base, fh):
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
369 delta = revinfo.cachedelta[1]
370 else:
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
378 if revlog._generaldelta:
379 deltabase = base
379 deltabase = base
380 else:
380 else:
381 deltabase = chainbase
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
383 chainlen += 1
384 compresseddeltalen += deltalen
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
386 chainbase, chainlen, compresseddeltalen)
387
387
388 def finddeltainfo(self, revinfo, fh):
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
389 """Find an acceptable delta against a candidate revision
390
390
391 revinfo: information about the revision (instance of _revisioninfo)
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
393 depending on whether it is inlined or not
394
394
395 Returns the first acceptable candidate revision, as ordered by
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
396 _getcandidaterevs
397 """
397 """
398 cachedelta = revinfo.cachedelta
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
399 p1 = revinfo.p1
400 p2 = revinfo.p2
400 p2 = revinfo.p2
401 revlog = self.revlog
401 revlog = self.revlog
402
402
403 deltainfo = None
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
412 break
413
413
414 return deltainfo
414 return deltainfo
415
415
416 @attr.s(slots=True, frozen=True)
416 @attr.s(slots=True, frozen=True)
417 class _revisioninfo(object):
417 class _revisioninfo(object):
418 """Information about a revision that allows building its fulltext
418 """Information about a revision that allows building its fulltext
419 node: expected hash of the revision
419 node: expected hash of the revision
420 p1, p2: parent revs of the revision
420 p1, p2: parent revs of the revision
421 btext: built text cache consisting of a one-element list
421 btext: built text cache consisting of a one-element list
422 cachedelta: (baserev, uncompressed_delta) or None
422 cachedelta: (baserev, uncompressed_delta) or None
423 flags: flags associated to the revision storage
423 flags: flags associated to the revision storage
424
424
425 One of btext[0] or cachedelta must be set.
425 One of btext[0] or cachedelta must be set.
426 """
426 """
427 node = attr.ib()
427 node = attr.ib()
428 p1 = attr.ib()
428 p1 = attr.ib()
429 p2 = attr.ib()
429 p2 = attr.ib()
430 btext = attr.ib()
430 btext = attr.ib()
431 textlen = attr.ib()
431 textlen = attr.ib()
432 cachedelta = attr.ib()
432 cachedelta = attr.ib()
433 flags = attr.ib()
433 flags = attr.ib()
434
434
435 # index v0:
435 # index v0:
436 # 4 bytes: offset
436 # 4 bytes: offset
437 # 4 bytes: compressed length
437 # 4 bytes: compressed length
438 # 4 bytes: base rev
438 # 4 bytes: base rev
439 # 4 bytes: link rev
439 # 4 bytes: link rev
440 # 20 bytes: parent 1 nodeid
440 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 2 nodeid
441 # 20 bytes: parent 2 nodeid
442 # 20 bytes: nodeid
442 # 20 bytes: nodeid
443 indexformatv0 = struct.Struct(">4l20s20s20s")
443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0_pack = indexformatv0.pack
444 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_unpack = indexformatv0.unpack
445 indexformatv0_unpack = indexformatv0.unpack
446
446
447 class revlogoldio(object):
447 class revlogoldio(object):
448 def __init__(self):
448 def __init__(self):
449 self.size = indexformatv0.size
449 self.size = indexformatv0.size
450
450
451 def parseindex(self, data, inline):
451 def parseindex(self, data, inline):
452 s = self.size
452 s = self.size
453 index = []
453 index = []
454 nodemap = {nullid: nullrev}
454 nodemap = {nullid: nullrev}
455 n = off = 0
455 n = off = 0
456 l = len(data)
456 l = len(data)
457 while off + s <= l:
457 while off + s <= l:
458 cur = data[off:off + s]
458 cur = data[off:off + s]
459 off += s
459 off += s
460 e = indexformatv0_unpack(cur)
460 e = indexformatv0_unpack(cur)
461 # transform to revlogv1 format
461 # transform to revlogv1 format
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 index.append(e2)
464 index.append(e2)
465 nodemap[e[6]] = n
465 nodemap[e[6]] = n
466 n += 1
466 n += 1
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 return index, nodemap, None
471 return index, nodemap, None
472
472
473 def packentry(self, entry, node, version, rev):
473 def packentry(self, entry, node, version, rev):
474 if gettype(entry[0]):
474 if gettype(entry[0]):
475 raise RevlogError(_('index entry flags need revlog version 1'))
475 raise RevlogError(_('index entry flags need revlog version 1'))
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 node(entry[5]), node(entry[6]), entry[7])
477 node(entry[5]), node(entry[6]), entry[7])
478 return indexformatv0_pack(*e2)
478 return indexformatv0_pack(*e2)
479
479
480 # index ng:
480 # index ng:
481 # 6 bytes: offset
481 # 6 bytes: offset
482 # 2 bytes: flags
482 # 2 bytes: flags
483 # 4 bytes: compressed length
483 # 4 bytes: compressed length
484 # 4 bytes: uncompressed length
484 # 4 bytes: uncompressed length
485 # 4 bytes: base rev
485 # 4 bytes: base rev
486 # 4 bytes: link rev
486 # 4 bytes: link rev
487 # 4 bytes: parent 1 rev
487 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 2 rev
488 # 4 bytes: parent 2 rev
489 # 32 bytes: nodeid
489 # 32 bytes: nodeid
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng_pack = indexformatng.pack
491 indexformatng_pack = indexformatng.pack
492 versionformat = struct.Struct(">I")
492 versionformat = struct.Struct(">I")
493 versionformat_pack = versionformat.pack
493 versionformat_pack = versionformat.pack
494 versionformat_unpack = versionformat.unpack
494 versionformat_unpack = versionformat.unpack
495
495
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # signed integer)
497 # signed integer)
498 _maxentrysize = 0x7fffffff
498 _maxentrysize = 0x7fffffff
499
499
500 class revlogio(object):
500 class revlogio(object):
501 def __init__(self):
501 def __init__(self):
502 self.size = indexformatng.size
502 self.size = indexformatng.size
503
503
504 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
505 # call the C implementation to parse the index data
505 # call the C implementation to parse the index data
506 index, cache = parsers.parse_index2(data, inline)
506 index, cache = parsers.parse_index2(data, inline)
507 return index, getattr(index, 'nodemap', None), cache
507 return index, getattr(index, 'nodemap', None), cache
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 p = indexformatng_pack(*entry)
510 p = indexformatng_pack(*entry)
511 if rev == 0:
511 if rev == 0:
512 p = versionformat_pack(version) + p[4:]
512 p = versionformat_pack(version) + p[4:]
513 return p
513 return p
514
514
515 class revlog(object):
515 class revlog(object):
516 """
516 """
517 the underlying revision storage object
517 the underlying revision storage object
518
518
519 A revlog consists of two parts, an index and the revision data.
519 A revlog consists of two parts, an index and the revision data.
520
520
521 The index is a file with a fixed record size containing
521 The index is a file with a fixed record size containing
522 information on each revision, including its nodeid (hash), the
522 information on each revision, including its nodeid (hash), the
523 nodeids of its parents, the position and offset of its data within
523 nodeids of its parents, the position and offset of its data within
524 the data file, and the revision it's based on. Finally, each entry
524 the data file, and the revision it's based on. Finally, each entry
525 contains a linkrev entry that can serve as a pointer to external
525 contains a linkrev entry that can serve as a pointer to external
526 data.
526 data.
527
527
528 The revision data itself is a linear collection of data chunks.
528 The revision data itself is a linear collection of data chunks.
529 Each chunk represents a revision and is usually represented as a
529 Each chunk represents a revision and is usually represented as a
530 delta against the previous chunk. To bound lookup time, runs of
530 delta against the previous chunk. To bound lookup time, runs of
531 deltas are limited to about 2 times the length of the original
531 deltas are limited to about 2 times the length of the original
532 version data. This makes retrieval of a version proportional to
532 version data. This makes retrieval of a version proportional to
533 its size, or O(1) relative to the number of revisions.
533 its size, or O(1) relative to the number of revisions.
534
534
535 Both pieces of the revlog are written to in an append-only
535 Both pieces of the revlog are written to in an append-only
536 fashion, which means we never need to rewrite a file to insert or
536 fashion, which means we never need to rewrite a file to insert or
537 remove data, and can use some simple techniques to avoid the need
537 remove data, and can use some simple techniques to avoid the need
538 for locking while reading.
538 for locking while reading.
539
539
540 If checkambig, indexfile is opened with checkambig=True at
540 If checkambig, indexfile is opened with checkambig=True at
541 writing, to avoid file stat ambiguity.
541 writing, to avoid file stat ambiguity.
542
542
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 index will be mmapped rather than read if it is larger than the
544 index will be mmapped rather than read if it is larger than the
545 configured threshold.
545 configured threshold.
546 """
546 """
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 mmaplargeindex=False):
548 mmaplargeindex=False):
549 """
549 """
550 create a revlog object
550 create a revlog object
551
551
552 opener is a function that abstracts the file opening operation
552 opener is a function that abstracts the file opening operation
553 and can be used to implement COW semantics or the like.
553 and can be used to implement COW semantics or the like.
554 """
554 """
555 self.indexfile = indexfile
555 self.indexfile = indexfile
556 self.datafile = datafile or (indexfile[:-2] + ".d")
556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.opener = opener
557 self.opener = opener
558 # When True, indexfile is opened with checkambig=True at writing, to
558 # When True, indexfile is opened with checkambig=True at writing, to
559 # avoid file stat ambiguity.
559 # avoid file stat ambiguity.
560 self._checkambig = checkambig
560 self._checkambig = checkambig
561 # 3-tuple of (node, rev, text) for a raw revision.
561 # 3-tuple of (node, rev, text) for a raw revision.
562 self._cache = None
562 self._cache = None
563 # Maps rev to chain base rev.
563 # Maps rev to chain base rev.
564 self._chainbasecache = util.lrucachedict(100)
564 self._chainbasecache = util.lrucachedict(100)
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 self._chunkcache = (0, '')
566 self._chunkcache = (0, '')
567 # How much data to read and cache into the raw revlog data cache.
567 # How much data to read and cache into the raw revlog data cache.
568 self._chunkcachesize = 65536
568 self._chunkcachesize = 65536
569 self._maxchainlen = None
569 self._maxchainlen = None
570 self._aggressivemergedeltas = False
570 self._aggressivemergedeltas = False
571 self.index = []
571 self.index = []
572 # Mapping of partial identifiers to full nodes.
572 # Mapping of partial identifiers to full nodes.
573 self._pcache = {}
573 self._pcache = {}
574 # Mapping of revision integer to full node.
574 # Mapping of revision integer to full node.
575 self._nodecache = {nullid: nullrev}
575 self._nodecache = {nullid: nullrev}
576 self._nodepos = None
576 self._nodepos = None
577 self._compengine = 'zlib'
577 self._compengine = 'zlib'
578 self._maxdeltachainspan = -1
578 self._maxdeltachainspan = -1
579 self._withsparseread = False
579 self._withsparseread = False
580 self._srdensitythreshold = 0.25
580 self._srdensitythreshold = 0.25
581 self._srmingapsize = 262144
581 self._srmingapsize = 262144
582
582
583 mmapindexthreshold = None
583 mmapindexthreshold = None
584 v = REVLOG_DEFAULT_VERSION
584 v = REVLOG_DEFAULT_VERSION
585 opts = getattr(opener, 'options', None)
585 opts = getattr(opener, 'options', None)
586 if opts is not None:
586 if opts is not None:
587 if 'revlogv2' in opts:
587 if 'revlogv2' in opts:
588 # version 2 revlogs always use generaldelta.
588 # version 2 revlogs always use generaldelta.
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 elif 'revlogv1' in opts:
590 elif 'revlogv1' in opts:
591 if 'generaldelta' in opts:
591 if 'generaldelta' in opts:
592 v |= FLAG_GENERALDELTA
592 v |= FLAG_GENERALDELTA
593 else:
593 else:
594 v = 0
594 v = 0
595 if 'chunkcachesize' in opts:
595 if 'chunkcachesize' in opts:
596 self._chunkcachesize = opts['chunkcachesize']
596 self._chunkcachesize = opts['chunkcachesize']
597 if 'maxchainlen' in opts:
597 if 'maxchainlen' in opts:
598 self._maxchainlen = opts['maxchainlen']
598 self._maxchainlen = opts['maxchainlen']
599 if 'aggressivemergedeltas' in opts:
599 if 'aggressivemergedeltas' in opts:
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 if 'compengine' in opts:
602 if 'compengine' in opts:
603 self._compengine = opts['compengine']
603 self._compengine = opts['compengine']
604 if 'maxdeltachainspan' in opts:
604 if 'maxdeltachainspan' in opts:
605 self._maxdeltachainspan = opts['maxdeltachainspan']
605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 mmapindexthreshold = opts['mmapindexthreshold']
607 mmapindexthreshold = opts['mmapindexthreshold']
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 if 'sparse-read-density-threshold' in opts:
609 if 'sparse-read-density-threshold' in opts:
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 if 'sparse-read-min-gap-size' in opts:
611 if 'sparse-read-min-gap-size' in opts:
612 self._srmingapsize = opts['sparse-read-min-gap-size']
612 self._srmingapsize = opts['sparse-read-min-gap-size']
613
613
614 if self._chunkcachesize <= 0:
614 if self._chunkcachesize <= 0:
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 'than 0') % self._chunkcachesize)
616 'than 0') % self._chunkcachesize)
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 'of 2') % self._chunkcachesize)
619 'of 2') % self._chunkcachesize)
620
620
621 indexdata = ''
621 indexdata = ''
622 self._initempty = True
622 self._initempty = True
623 try:
623 try:
624 f = self.opener(self.indexfile)
624 f = self.opener(self.indexfile)
625 if (mmapindexthreshold is not None and
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
627 indexdata = util.buffer(util.mmapread(f))
628 else:
628 else:
629 indexdata = f.read()
629 indexdata = f.read()
630 f.close()
630 f.close()
631 if len(indexdata) > 0:
631 if len(indexdata) > 0:
632 v = versionformat_unpack(indexdata[:4])[0]
632 v = versionformat_unpack(indexdata[:4])[0]
633 self._initempty = False
633 self._initempty = False
634 except IOError as inst:
634 except IOError as inst:
635 if inst.errno != errno.ENOENT:
635 if inst.errno != errno.ENOENT:
636 raise
636 raise
637
637
638 self.version = v
638 self.version = v
639 self._inline = v & FLAG_INLINE_DATA
639 self._inline = v & FLAG_INLINE_DATA
640 self._generaldelta = v & FLAG_GENERALDELTA
640 self._generaldelta = v & FLAG_GENERALDELTA
641 flags = v & ~0xFFFF
641 flags = v & ~0xFFFF
642 fmt = v & 0xFFFF
642 fmt = v & 0xFFFF
643 if fmt == REVLOGV0:
643 if fmt == REVLOGV0:
644 if flags:
644 if flags:
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 'revlog %s') %
646 'revlog %s') %
647 (flags >> 16, fmt, self.indexfile))
647 (flags >> 16, fmt, self.indexfile))
648 elif fmt == REVLOGV1:
648 elif fmt == REVLOGV1:
649 if flags & ~REVLOGV1_FLAGS:
649 if flags & ~REVLOGV1_FLAGS:
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 'revlog %s') %
651 'revlog %s') %
652 (flags >> 16, fmt, self.indexfile))
652 (flags >> 16, fmt, self.indexfile))
653 elif fmt == REVLOGV2:
653 elif fmt == REVLOGV2:
654 if flags & ~REVLOGV2_FLAGS:
654 if flags & ~REVLOGV2_FLAGS:
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 'revlog %s') %
656 'revlog %s') %
657 (flags >> 16, fmt, self.indexfile))
657 (flags >> 16, fmt, self.indexfile))
658 else:
658 else:
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 (fmt, self.indexfile))
660 (fmt, self.indexfile))
661
661
662 self.storedeltachains = True
662 self.storedeltachains = True
663
663
664 self._io = revlogio()
664 self._io = revlogio()
665 if self.version == REVLOGV0:
665 if self.version == REVLOGV0:
666 self._io = revlogoldio()
666 self._io = revlogoldio()
667 try:
667 try:
668 d = self._io.parseindex(indexdata, self._inline)
668 d = self._io.parseindex(indexdata, self._inline)
669 except (ValueError, IndexError):
669 except (ValueError, IndexError):
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 self.index, nodemap, self._chunkcache = d
671 self.index, nodemap, self._chunkcache = d
672 if nodemap is not None:
672 if nodemap is not None:
673 self.nodemap = self._nodecache = nodemap
673 self.nodemap = self._nodecache = nodemap
674 if not self._chunkcache:
674 if not self._chunkcache:
675 self._chunkclear()
675 self._chunkclear()
676 # revnum -> (chain-length, sum-delta-length)
676 # revnum -> (chain-length, sum-delta-length)
677 self._chaininfocache = {}
677 self._chaininfocache = {}
678 # revlog header -> revlog compressor
678 # revlog header -> revlog compressor
679 self._decompressors = {}
679 self._decompressors = {}
680
680
681 @util.propertycache
681 @util.propertycache
682 def _compressor(self):
682 def _compressor(self):
683 return util.compengines[self._compengine].revlogcompressor()
683 return util.compengines[self._compengine].revlogcompressor()
684
684
685 def tip(self):
685 def tip(self):
686 return self.node(len(self.index) - 2)
686 return self.node(len(self.index) - 2)
687 def __contains__(self, rev):
687 def __contains__(self, rev):
688 return 0 <= rev < len(self)
688 return 0 <= rev < len(self)
689 def __len__(self):
689 def __len__(self):
690 return len(self.index) - 1
690 return len(self.index) - 1
691 def __iter__(self):
691 def __iter__(self):
692 return iter(xrange(len(self)))
692 return iter(xrange(len(self)))
693 def revs(self, start=0, stop=None):
693 def revs(self, start=0, stop=None):
694 """iterate over all rev in this revlog (from start to stop)"""
694 """iterate over all rev in this revlog (from start to stop)"""
695 step = 1
695 step = 1
696 if stop is not None:
696 if stop is not None:
697 if start > stop:
697 if start > stop:
698 step = -1
698 step = -1
699 stop += step
699 stop += step
700 else:
700 else:
701 stop = len(self)
701 stop = len(self)
702 return xrange(start, stop, step)
702 return xrange(start, stop, step)
703
703
704 @util.propertycache
704 @util.propertycache
705 def nodemap(self):
705 def nodemap(self):
706 self.rev(self.node(0))
706 self.rev(self.node(0))
707 return self._nodecache
707 return self._nodecache
708
708
709 def hasnode(self, node):
709 def hasnode(self, node):
710 try:
710 try:
711 self.rev(node)
711 self.rev(node)
712 return True
712 return True
713 except KeyError:
713 except KeyError:
714 return False
714 return False
715
715
716 def clearcaches(self):
716 def clearcaches(self):
717 self._cache = None
717 self._cache = None
718 self._chainbasecache.clear()
718 self._chainbasecache.clear()
719 self._chunkcache = (0, '')
719 self._chunkcache = (0, '')
720 self._pcache = {}
720 self._pcache = {}
721
721
722 try:
722 try:
723 self._nodecache.clearcaches()
723 self._nodecache.clearcaches()
724 except AttributeError:
724 except AttributeError:
725 self._nodecache = {nullid: nullrev}
725 self._nodecache = {nullid: nullrev}
726 self._nodepos = None
726 self._nodepos = None
727
727
728 def rev(self, node):
728 def rev(self, node):
729 try:
729 try:
730 return self._nodecache[node]
730 return self._nodecache[node]
731 except TypeError:
731 except TypeError:
732 raise
732 raise
733 except RevlogError:
733 except RevlogError:
734 # parsers.c radix tree lookup failed
734 # parsers.c radix tree lookup failed
735 if node == wdirid:
735 if node == wdirid:
736 raise error.WdirUnsupported
736 raise error.WdirUnsupported
737 raise LookupError(node, self.indexfile, _('no node'))
737 raise LookupError(node, self.indexfile, _('no node'))
738 except KeyError:
738 except KeyError:
739 # pure python cache lookup failed
739 # pure python cache lookup failed
740 n = self._nodecache
740 n = self._nodecache
741 i = self.index
741 i = self.index
742 p = self._nodepos
742 p = self._nodepos
743 if p is None:
743 if p is None:
744 p = len(i) - 2
744 p = len(i) - 2
745 for r in xrange(p, -1, -1):
745 for r in xrange(p, -1, -1):
746 v = i[r][7]
746 v = i[r][7]
747 n[v] = r
747 n[v] = r
748 if v == node:
748 if v == node:
749 self._nodepos = r - 1
749 self._nodepos = r - 1
750 return r
750 return r
751 if node == wdirid:
751 if node == wdirid:
752 raise error.WdirUnsupported
752 raise error.WdirUnsupported
753 raise LookupError(node, self.indexfile, _('no node'))
753 raise LookupError(node, self.indexfile, _('no node'))
754
754
755 # Accessors for index entries.
755 # Accessors for index entries.
756
756
757 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
757 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
758 # are flags.
758 # are flags.
759 def start(self, rev):
759 def start(self, rev):
760 return int(self.index[rev][0] >> 16)
760 return int(self.index[rev][0] >> 16)
761
761
762 def flags(self, rev):
762 def flags(self, rev):
763 return self.index[rev][0] & 0xFFFF
763 return self.index[rev][0] & 0xFFFF
764
764
765 def length(self, rev):
765 def length(self, rev):
766 return self.index[rev][1]
766 return self.index[rev][1]
767
767
768 def rawsize(self, rev):
768 def rawsize(self, rev):
769 """return the length of the uncompressed text for a given revision"""
769 """return the length of the uncompressed text for a given revision"""
770 l = self.index[rev][2]
770 l = self.index[rev][2]
771 if l >= 0:
771 if l >= 0:
772 return l
772 return l
773
773
774 t = self.revision(rev, raw=True)
774 t = self.revision(rev, raw=True)
775 return len(t)
775 return len(t)
776
776
777 def size(self, rev):
777 def size(self, rev):
778 """length of non-raw text (processed by a "read" flag processor)"""
778 """length of non-raw text (processed by a "read" flag processor)"""
779 # fast path: if no "read" flag processor could change the content,
779 # fast path: if no "read" flag processor could change the content,
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
781 flags = self.flags(rev)
781 flags = self.flags(rev)
782 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
782 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
783 return self.rawsize(rev)
783 return self.rawsize(rev)
784
784
785 return len(self.revision(rev, raw=False))
785 return len(self.revision(rev, raw=False))
786
786
787 def chainbase(self, rev):
787 def chainbase(self, rev):
788 base = self._chainbasecache.get(rev)
788 base = self._chainbasecache.get(rev)
789 if base is not None:
789 if base is not None:
790 return base
790 return base
791
791
792 index = self.index
792 index = self.index
793 base = index[rev][3]
793 base = index[rev][3]
794 while base != rev:
794 while base != rev:
795 rev = base
795 rev = base
796 base = index[rev][3]
796 base = index[rev][3]
797
797
798 self._chainbasecache[rev] = base
798 self._chainbasecache[rev] = base
799 return base
799 return base
800
800
801 def linkrev(self, rev):
801 def linkrev(self, rev):
802 return self.index[rev][4]
802 return self.index[rev][4]
803
803
804 def parentrevs(self, rev):
804 def parentrevs(self, rev):
805 try:
805 try:
806 entry = self.index[rev]
806 entry = self.index[rev]
807 except IndexError:
807 except IndexError:
808 if rev == wdirrev:
808 if rev == wdirrev:
809 raise error.WdirUnsupported
809 raise error.WdirUnsupported
810 raise
810 raise
811
811
812 return entry[5], entry[6]
812 return entry[5], entry[6]
813
813
814 def node(self, rev):
814 def node(self, rev):
815 try:
815 try:
816 return self.index[rev][7]
816 return self.index[rev][7]
817 except IndexError:
817 except IndexError:
818 if rev == wdirrev:
818 if rev == wdirrev:
819 raise error.WdirUnsupported
819 raise error.WdirUnsupported
820 raise
820 raise
821
821
822 # Derived from index values.
822 # Derived from index values.
823
823
824 def end(self, rev):
824 def end(self, rev):
825 return self.start(rev) + self.length(rev)
825 return self.start(rev) + self.length(rev)
826
826
827 def parents(self, node):
827 def parents(self, node):
828 i = self.index
828 i = self.index
829 d = i[self.rev(node)]
829 d = i[self.rev(node)]
830 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
830 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
831
831
832 def chainlen(self, rev):
832 def chainlen(self, rev):
833 return self._chaininfo(rev)[0]
833 return self._chaininfo(rev)[0]
834
834
835 def _chaininfo(self, rev):
835 def _chaininfo(self, rev):
836 chaininfocache = self._chaininfocache
836 chaininfocache = self._chaininfocache
837 if rev in chaininfocache:
837 if rev in chaininfocache:
838 return chaininfocache[rev]
838 return chaininfocache[rev]
839 index = self.index
839 index = self.index
840 generaldelta = self._generaldelta
840 generaldelta = self._generaldelta
841 iterrev = rev
841 iterrev = rev
842 e = index[iterrev]
842 e = index[iterrev]
843 clen = 0
843 clen = 0
844 compresseddeltalen = 0
844 compresseddeltalen = 0
845 while iterrev != e[3]:
845 while iterrev != e[3]:
846 clen += 1
846 clen += 1
847 compresseddeltalen += e[1]
847 compresseddeltalen += e[1]
848 if generaldelta:
848 if generaldelta:
849 iterrev = e[3]
849 iterrev = e[3]
850 else:
850 else:
851 iterrev -= 1
851 iterrev -= 1
852 if iterrev in chaininfocache:
852 if iterrev in chaininfocache:
853 t = chaininfocache[iterrev]
853 t = chaininfocache[iterrev]
854 clen += t[0]
854 clen += t[0]
855 compresseddeltalen += t[1]
855 compresseddeltalen += t[1]
856 break
856 break
857 e = index[iterrev]
857 e = index[iterrev]
858 else:
858 else:
859 # Add text length of base since decompressing that also takes
859 # Add text length of base since decompressing that also takes
860 # work. For cache hits the length is already included.
860 # work. For cache hits the length is already included.
861 compresseddeltalen += e[1]
861 compresseddeltalen += e[1]
862 r = (clen, compresseddeltalen)
862 r = (clen, compresseddeltalen)
863 chaininfocache[rev] = r
863 chaininfocache[rev] = r
864 return r
864 return r
865
865
866 def _deltachain(self, rev, stoprev=None):
866 def _deltachain(self, rev, stoprev=None):
867 """Obtain the delta chain for a revision.
867 """Obtain the delta chain for a revision.
868
868
869 ``stoprev`` specifies a revision to stop at. If not specified, we
869 ``stoprev`` specifies a revision to stop at. If not specified, we
870 stop at the base of the chain.
870 stop at the base of the chain.
871
871
872 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
872 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
873 revs in ascending order and ``stopped`` is a bool indicating whether
873 revs in ascending order and ``stopped`` is a bool indicating whether
874 ``stoprev`` was hit.
874 ``stoprev`` was hit.
875 """
875 """
876 # Try C implementation.
876 # Try C implementation.
877 try:
877 try:
878 return self.index.deltachain(rev, stoprev, self._generaldelta)
878 return self.index.deltachain(rev, stoprev, self._generaldelta)
879 except AttributeError:
879 except AttributeError:
880 pass
880 pass
881
881
882 chain = []
882 chain = []
883
883
884 # Alias to prevent attribute lookup in tight loop.
884 # Alias to prevent attribute lookup in tight loop.
885 index = self.index
885 index = self.index
886 generaldelta = self._generaldelta
886 generaldelta = self._generaldelta
887
887
888 iterrev = rev
888 iterrev = rev
889 e = index[iterrev]
889 e = index[iterrev]
890 while iterrev != e[3] and iterrev != stoprev:
890 while iterrev != e[3] and iterrev != stoprev:
891 chain.append(iterrev)
891 chain.append(iterrev)
892 if generaldelta:
892 if generaldelta:
893 iterrev = e[3]
893 iterrev = e[3]
894 else:
894 else:
895 iterrev -= 1
895 iterrev -= 1
896 e = index[iterrev]
896 e = index[iterrev]
897
897
898 if iterrev == stoprev:
898 if iterrev == stoprev:
899 stopped = True
899 stopped = True
900 else:
900 else:
901 chain.append(iterrev)
901 chain.append(iterrev)
902 stopped = False
902 stopped = False
903
903
904 chain.reverse()
904 chain.reverse()
905 return chain, stopped
905 return chain, stopped
906
906
907 def ancestors(self, revs, stoprev=0, inclusive=False):
907 def ancestors(self, revs, stoprev=0, inclusive=False):
908 """Generate the ancestors of 'revs' in reverse topological order.
908 """Generate the ancestors of 'revs' in reverse topological order.
909 Does not generate revs lower than stoprev.
909 Does not generate revs lower than stoprev.
910
910
911 See the documentation for ancestor.lazyancestors for more details."""
911 See the documentation for ancestor.lazyancestors for more details."""
912
912
913 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
913 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
914 inclusive=inclusive)
914 inclusive=inclusive)
915
915
916 def descendants(self, revs):
916 def descendants(self, revs):
917 """Generate the descendants of 'revs' in revision order.
917 """Generate the descendants of 'revs' in revision order.
918
918
919 Yield a sequence of revision numbers starting with a child of
919 Yield a sequence of revision numbers starting with a child of
920 some rev in revs, i.e., each revision is *not* considered a
920 some rev in revs, i.e., each revision is *not* considered a
921 descendant of itself. Results are ordered by revision number (a
921 descendant of itself. Results are ordered by revision number (a
922 topological sort)."""
922 topological sort)."""
923 first = min(revs)
923 first = min(revs)
924 if first == nullrev:
924 if first == nullrev:
925 for i in self:
925 for i in self:
926 yield i
926 yield i
927 return
927 return
928
928
929 seen = set(revs)
929 seen = set(revs)
930 for i in self.revs(start=first + 1):
930 for i in self.revs(start=first + 1):
931 for x in self.parentrevs(i):
931 for x in self.parentrevs(i):
932 if x != nullrev and x in seen:
932 if x != nullrev and x in seen:
933 seen.add(i)
933 seen.add(i)
934 yield i
934 yield i
935 break
935 break
936
936
937 def findcommonmissing(self, common=None, heads=None):
937 def findcommonmissing(self, common=None, heads=None):
938 """Return a tuple of the ancestors of common and the ancestors of heads
938 """Return a tuple of the ancestors of common and the ancestors of heads
939 that are not ancestors of common. In revset terminology, we return the
939 that are not ancestors of common. In revset terminology, we return the
940 tuple:
940 tuple:
941
941
942 ::common, (::heads) - (::common)
942 ::common, (::heads) - (::common)
943
943
944 The list is sorted by revision number, meaning it is
944 The list is sorted by revision number, meaning it is
945 topologically sorted.
945 topologically sorted.
946
946
947 'heads' and 'common' are both lists of node IDs. If heads is
947 'heads' and 'common' are both lists of node IDs. If heads is
948 not supplied, uses all of the revlog's heads. If common is not
948 not supplied, uses all of the revlog's heads. If common is not
949 supplied, uses nullid."""
949 supplied, uses nullid."""
950 if common is None:
950 if common is None:
951 common = [nullid]
951 common = [nullid]
952 if heads is None:
952 if heads is None:
953 heads = self.heads()
953 heads = self.heads()
954
954
955 common = [self.rev(n) for n in common]
955 common = [self.rev(n) for n in common]
956 heads = [self.rev(n) for n in heads]
956 heads = [self.rev(n) for n in heads]
957
957
958 # we want the ancestors, but inclusive
958 # we want the ancestors, but inclusive
959 class lazyset(object):
959 class lazyset(object):
960 def __init__(self, lazyvalues):
960 def __init__(self, lazyvalues):
961 self.addedvalues = set()
961 self.addedvalues = set()
962 self.lazyvalues = lazyvalues
962 self.lazyvalues = lazyvalues
963
963
964 def __contains__(self, value):
964 def __contains__(self, value):
965 return value in self.addedvalues or value in self.lazyvalues
965 return value in self.addedvalues or value in self.lazyvalues
966
966
967 def __iter__(self):
967 def __iter__(self):
968 added = self.addedvalues
968 added = self.addedvalues
969 for r in added:
969 for r in added:
970 yield r
970 yield r
971 for r in self.lazyvalues:
971 for r in self.lazyvalues:
972 if not r in added:
972 if not r in added:
973 yield r
973 yield r
974
974
975 def add(self, value):
975 def add(self, value):
976 self.addedvalues.add(value)
976 self.addedvalues.add(value)
977
977
978 def update(self, values):
978 def update(self, values):
979 self.addedvalues.update(values)
979 self.addedvalues.update(values)
980
980
981 has = lazyset(self.ancestors(common))
981 has = lazyset(self.ancestors(common))
982 has.add(nullrev)
982 has.add(nullrev)
983 has.update(common)
983 has.update(common)
984
984
985 # take all ancestors from heads that aren't in has
985 # take all ancestors from heads that aren't in has
986 missing = set()
986 missing = set()
987 visit = collections.deque(r for r in heads if r not in has)
987 visit = collections.deque(r for r in heads if r not in has)
988 while visit:
988 while visit:
989 r = visit.popleft()
989 r = visit.popleft()
990 if r in missing:
990 if r in missing:
991 continue
991 continue
992 else:
992 else:
993 missing.add(r)
993 missing.add(r)
994 for p in self.parentrevs(r):
994 for p in self.parentrevs(r):
995 if p not in has:
995 if p not in has:
996 visit.append(p)
996 visit.append(p)
997 missing = list(missing)
997 missing = list(missing)
998 missing.sort()
998 missing.sort()
999 return has, [self.node(miss) for miss in missing]
999 return has, [self.node(miss) for miss in missing]
1000
1000
1001 def incrementalmissingrevs(self, common=None):
1001 def incrementalmissingrevs(self, common=None):
1002 """Return an object that can be used to incrementally compute the
1002 """Return an object that can be used to incrementally compute the
1003 revision numbers of the ancestors of arbitrary sets that are not
1003 revision numbers of the ancestors of arbitrary sets that are not
1004 ancestors of common. This is an ancestor.incrementalmissingancestors
1004 ancestors of common. This is an ancestor.incrementalmissingancestors
1005 object.
1005 object.
1006
1006
1007 'common' is a list of revision numbers. If common is not supplied, uses
1007 'common' is a list of revision numbers. If common is not supplied, uses
1008 nullrev.
1008 nullrev.
1009 """
1009 """
1010 if common is None:
1010 if common is None:
1011 common = [nullrev]
1011 common = [nullrev]
1012
1012
1013 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1013 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1014
1014
1015 def findmissingrevs(self, common=None, heads=None):
1015 def findmissingrevs(self, common=None, heads=None):
1016 """Return the revision numbers of the ancestors of heads that
1016 """Return the revision numbers of the ancestors of heads that
1017 are not ancestors of common.
1017 are not ancestors of common.
1018
1018
1019 More specifically, return a list of revision numbers corresponding to
1019 More specifically, return a list of revision numbers corresponding to
1020 nodes N such that every N satisfies the following constraints:
1020 nodes N such that every N satisfies the following constraints:
1021
1021
1022 1. N is an ancestor of some node in 'heads'
1022 1. N is an ancestor of some node in 'heads'
1023 2. N is not an ancestor of any node in 'common'
1023 2. N is not an ancestor of any node in 'common'
1024
1024
1025 The list is sorted by revision number, meaning it is
1025 The list is sorted by revision number, meaning it is
1026 topologically sorted.
1026 topologically sorted.
1027
1027
1028 'heads' and 'common' are both lists of revision numbers. If heads is
1028 'heads' and 'common' are both lists of revision numbers. If heads is
1029 not supplied, uses all of the revlog's heads. If common is not
1029 not supplied, uses all of the revlog's heads. If common is not
1030 supplied, uses nullid."""
1030 supplied, uses nullid."""
1031 if common is None:
1031 if common is None:
1032 common = [nullrev]
1032 common = [nullrev]
1033 if heads is None:
1033 if heads is None:
1034 heads = self.headrevs()
1034 heads = self.headrevs()
1035
1035
1036 inc = self.incrementalmissingrevs(common=common)
1036 inc = self.incrementalmissingrevs(common=common)
1037 return inc.missingancestors(heads)
1037 return inc.missingancestors(heads)
1038
1038
1039 def findmissing(self, common=None, heads=None):
1039 def findmissing(self, common=None, heads=None):
1040 """Return the ancestors of heads that are not ancestors of common.
1040 """Return the ancestors of heads that are not ancestors of common.
1041
1041
1042 More specifically, return a list of nodes N such that every N
1042 More specifically, return a list of nodes N such that every N
1043 satisfies the following constraints:
1043 satisfies the following constraints:
1044
1044
1045 1. N is an ancestor of some node in 'heads'
1045 1. N is an ancestor of some node in 'heads'
1046 2. N is not an ancestor of any node in 'common'
1046 2. N is not an ancestor of any node in 'common'
1047
1047
1048 The list is sorted by revision number, meaning it is
1048 The list is sorted by revision number, meaning it is
1049 topologically sorted.
1049 topologically sorted.
1050
1050
1051 'heads' and 'common' are both lists of node IDs. If heads is
1051 'heads' and 'common' are both lists of node IDs. If heads is
1052 not supplied, uses all of the revlog's heads. If common is not
1052 not supplied, uses all of the revlog's heads. If common is not
1053 supplied, uses nullid."""
1053 supplied, uses nullid."""
1054 if common is None:
1054 if common is None:
1055 common = [nullid]
1055 common = [nullid]
1056 if heads is None:
1056 if heads is None:
1057 heads = self.heads()
1057 heads = self.heads()
1058
1058
1059 common = [self.rev(n) for n in common]
1059 common = [self.rev(n) for n in common]
1060 heads = [self.rev(n) for n in heads]
1060 heads = [self.rev(n) for n in heads]
1061
1061
1062 inc = self.incrementalmissingrevs(common=common)
1062 inc = self.incrementalmissingrevs(common=common)
1063 return [self.node(r) for r in inc.missingancestors(heads)]
1063 return [self.node(r) for r in inc.missingancestors(heads)]
1064
1064
1065 def nodesbetween(self, roots=None, heads=None):
1065 def nodesbetween(self, roots=None, heads=None):
1066 """Return a topological path from 'roots' to 'heads'.
1066 """Return a topological path from 'roots' to 'heads'.
1067
1067
1068 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1068 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1069 topologically sorted list of all nodes N that satisfy both of
1069 topologically sorted list of all nodes N that satisfy both of
1070 these constraints:
1070 these constraints:
1071
1071
1072 1. N is a descendant of some node in 'roots'
1072 1. N is a descendant of some node in 'roots'
1073 2. N is an ancestor of some node in 'heads'
1073 2. N is an ancestor of some node in 'heads'
1074
1074
1075 Every node is considered to be both a descendant and an ancestor
1075 Every node is considered to be both a descendant and an ancestor
1076 of itself, so every reachable node in 'roots' and 'heads' will be
1076 of itself, so every reachable node in 'roots' and 'heads' will be
1077 included in 'nodes'.
1077 included in 'nodes'.
1078
1078
1079 'outroots' is the list of reachable nodes in 'roots', i.e., the
1079 'outroots' is the list of reachable nodes in 'roots', i.e., the
1080 subset of 'roots' that is returned in 'nodes'. Likewise,
1080 subset of 'roots' that is returned in 'nodes'. Likewise,
1081 'outheads' is the subset of 'heads' that is also in 'nodes'.
1081 'outheads' is the subset of 'heads' that is also in 'nodes'.
1082
1082
1083 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1083 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1084 unspecified, uses nullid as the only root. If 'heads' is
1084 unspecified, uses nullid as the only root. If 'heads' is
1085 unspecified, uses list of all of the revlog's heads."""
1085 unspecified, uses list of all of the revlog's heads."""
1086 nonodes = ([], [], [])
1086 nonodes = ([], [], [])
1087 if roots is not None:
1087 if roots is not None:
1088 roots = list(roots)
1088 roots = list(roots)
1089 if not roots:
1089 if not roots:
1090 return nonodes
1090 return nonodes
1091 lowestrev = min([self.rev(n) for n in roots])
1091 lowestrev = min([self.rev(n) for n in roots])
1092 else:
1092 else:
1093 roots = [nullid] # Everybody's a descendant of nullid
1093 roots = [nullid] # Everybody's a descendant of nullid
1094 lowestrev = nullrev
1094 lowestrev = nullrev
1095 if (lowestrev == nullrev) and (heads is None):
1095 if (lowestrev == nullrev) and (heads is None):
1096 # We want _all_ the nodes!
1096 # We want _all_ the nodes!
1097 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1097 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1098 if heads is None:
1098 if heads is None:
1099 # All nodes are ancestors, so the latest ancestor is the last
1099 # All nodes are ancestors, so the latest ancestor is the last
1100 # node.
1100 # node.
1101 highestrev = len(self) - 1
1101 highestrev = len(self) - 1
1102 # Set ancestors to None to signal that every node is an ancestor.
1102 # Set ancestors to None to signal that every node is an ancestor.
1103 ancestors = None
1103 ancestors = None
1104 # Set heads to an empty dictionary for later discovery of heads
1104 # Set heads to an empty dictionary for later discovery of heads
1105 heads = {}
1105 heads = {}
1106 else:
1106 else:
1107 heads = list(heads)
1107 heads = list(heads)
1108 if not heads:
1108 if not heads:
1109 return nonodes
1109 return nonodes
1110 ancestors = set()
1110 ancestors = set()
1111 # Turn heads into a dictionary so we can remove 'fake' heads.
1111 # Turn heads into a dictionary so we can remove 'fake' heads.
1112 # Also, later we will be using it to filter out the heads we can't
1112 # Also, later we will be using it to filter out the heads we can't
1113 # find from roots.
1113 # find from roots.
1114 heads = dict.fromkeys(heads, False)
1114 heads = dict.fromkeys(heads, False)
1115 # Start at the top and keep marking parents until we're done.
1115 # Start at the top and keep marking parents until we're done.
1116 nodestotag = set(heads)
1116 nodestotag = set(heads)
1117 # Remember where the top was so we can use it as a limit later.
1117 # Remember where the top was so we can use it as a limit later.
1118 highestrev = max([self.rev(n) for n in nodestotag])
1118 highestrev = max([self.rev(n) for n in nodestotag])
1119 while nodestotag:
1119 while nodestotag:
1120 # grab a node to tag
1120 # grab a node to tag
1121 n = nodestotag.pop()
1121 n = nodestotag.pop()
1122 # Never tag nullid
1122 # Never tag nullid
1123 if n == nullid:
1123 if n == nullid:
1124 continue
1124 continue
1125 # A node's revision number represents its place in a
1125 # A node's revision number represents its place in a
1126 # topologically sorted list of nodes.
1126 # topologically sorted list of nodes.
1127 r = self.rev(n)
1127 r = self.rev(n)
1128 if r >= lowestrev:
1128 if r >= lowestrev:
1129 if n not in ancestors:
1129 if n not in ancestors:
1130 # If we are possibly a descendant of one of the roots
1130 # If we are possibly a descendant of one of the roots
1131 # and we haven't already been marked as an ancestor
1131 # and we haven't already been marked as an ancestor
1132 ancestors.add(n) # Mark as ancestor
1132 ancestors.add(n) # Mark as ancestor
1133 # Add non-nullid parents to list of nodes to tag.
1133 # Add non-nullid parents to list of nodes to tag.
1134 nodestotag.update([p for p in self.parents(n) if
1134 nodestotag.update([p for p in self.parents(n) if
1135 p != nullid])
1135 p != nullid])
1136 elif n in heads: # We've seen it before, is it a fake head?
1136 elif n in heads: # We've seen it before, is it a fake head?
1137 # So it is, real heads should not be the ancestors of
1137 # So it is, real heads should not be the ancestors of
1138 # any other heads.
1138 # any other heads.
1139 heads.pop(n)
1139 heads.pop(n)
1140 if not ancestors:
1140 if not ancestors:
1141 return nonodes
1141 return nonodes
1142 # Now that we have our set of ancestors, we want to remove any
1142 # Now that we have our set of ancestors, we want to remove any
1143 # roots that are not ancestors.
1143 # roots that are not ancestors.
1144
1144
1145 # If one of the roots was nullid, everything is included anyway.
1145 # If one of the roots was nullid, everything is included anyway.
1146 if lowestrev > nullrev:
1146 if lowestrev > nullrev:
1147 # But, since we weren't, let's recompute the lowest rev to not
1147 # But, since we weren't, let's recompute the lowest rev to not
1148 # include roots that aren't ancestors.
1148 # include roots that aren't ancestors.
1149
1149
1150 # Filter out roots that aren't ancestors of heads
1150 # Filter out roots that aren't ancestors of heads
1151 roots = [root for root in roots if root in ancestors]
1151 roots = [root for root in roots if root in ancestors]
1152 # Recompute the lowest revision
1152 # Recompute the lowest revision
1153 if roots:
1153 if roots:
1154 lowestrev = min([self.rev(root) for root in roots])
1154 lowestrev = min([self.rev(root) for root in roots])
1155 else:
1155 else:
1156 # No more roots? Return empty list
1156 # No more roots? Return empty list
1157 return nonodes
1157 return nonodes
1158 else:
1158 else:
1159 # We are descending from nullid, and don't need to care about
1159 # We are descending from nullid, and don't need to care about
1160 # any other roots.
1160 # any other roots.
1161 lowestrev = nullrev
1161 lowestrev = nullrev
1162 roots = [nullid]
1162 roots = [nullid]
1163 # Transform our roots list into a set.
1163 # Transform our roots list into a set.
1164 descendants = set(roots)
1164 descendants = set(roots)
1165 # Also, keep the original roots so we can filter out roots that aren't
1165 # Also, keep the original roots so we can filter out roots that aren't
1166 # 'real' roots (i.e. are descended from other roots).
1166 # 'real' roots (i.e. are descended from other roots).
1167 roots = descendants.copy()
1167 roots = descendants.copy()
1168 # Our topologically sorted list of output nodes.
1168 # Our topologically sorted list of output nodes.
1169 orderedout = []
1169 orderedout = []
1170 # Don't start at nullid since we don't want nullid in our output list,
1170 # Don't start at nullid since we don't want nullid in our output list,
1171 # and if nullid shows up in descendants, empty parents will look like
1171 # and if nullid shows up in descendants, empty parents will look like
1172 # they're descendants.
1172 # they're descendants.
1173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1174 n = self.node(r)
1174 n = self.node(r)
1175 isdescendant = False
1175 isdescendant = False
1176 if lowestrev == nullrev: # Everybody is a descendant of nullid
1176 if lowestrev == nullrev: # Everybody is a descendant of nullid
1177 isdescendant = True
1177 isdescendant = True
1178 elif n in descendants:
1178 elif n in descendants:
1179 # n is already a descendant
1179 # n is already a descendant
1180 isdescendant = True
1180 isdescendant = True
1181 # This check only needs to be done here because all the roots
1181 # This check only needs to be done here because all the roots
1182 # will start being marked is descendants before the loop.
1182 # will start being marked is descendants before the loop.
1183 if n in roots:
1183 if n in roots:
1184 # If n was a root, check if it's a 'real' root.
1184 # If n was a root, check if it's a 'real' root.
1185 p = tuple(self.parents(n))
1185 p = tuple(self.parents(n))
1186 # If any of its parents are descendants, it's not a root.
1186 # If any of its parents are descendants, it's not a root.
1187 if (p[0] in descendants) or (p[1] in descendants):
1187 if (p[0] in descendants) or (p[1] in descendants):
1188 roots.remove(n)
1188 roots.remove(n)
1189 else:
1189 else:
1190 p = tuple(self.parents(n))
1190 p = tuple(self.parents(n))
1191 # A node is a descendant if either of its parents are
1191 # A node is a descendant if either of its parents are
1192 # descendants. (We seeded the dependents list with the roots
1192 # descendants. (We seeded the dependents list with the roots
1193 # up there, remember?)
1193 # up there, remember?)
1194 if (p[0] in descendants) or (p[1] in descendants):
1194 if (p[0] in descendants) or (p[1] in descendants):
1195 descendants.add(n)
1195 descendants.add(n)
1196 isdescendant = True
1196 isdescendant = True
1197 if isdescendant and ((ancestors is None) or (n in ancestors)):
1197 if isdescendant and ((ancestors is None) or (n in ancestors)):
1198 # Only include nodes that are both descendants and ancestors.
1198 # Only include nodes that are both descendants and ancestors.
1199 orderedout.append(n)
1199 orderedout.append(n)
1200 if (ancestors is not None) and (n in heads):
1200 if (ancestors is not None) and (n in heads):
1201 # We're trying to figure out which heads are reachable
1201 # We're trying to figure out which heads are reachable
1202 # from roots.
1202 # from roots.
1203 # Mark this head as having been reached
1203 # Mark this head as having been reached
1204 heads[n] = True
1204 heads[n] = True
1205 elif ancestors is None:
1205 elif ancestors is None:
1206 # Otherwise, we're trying to discover the heads.
1206 # Otherwise, we're trying to discover the heads.
1207 # Assume this is a head because if it isn't, the next step
1207 # Assume this is a head because if it isn't, the next step
1208 # will eventually remove it.
1208 # will eventually remove it.
1209 heads[n] = True
1209 heads[n] = True
1210 # But, obviously its parents aren't.
1210 # But, obviously its parents aren't.
1211 for p in self.parents(n):
1211 for p in self.parents(n):
1212 heads.pop(p, None)
1212 heads.pop(p, None)
1213 heads = [head for head, flag in heads.iteritems() if flag]
1213 heads = [head for head, flag in heads.iteritems() if flag]
1214 roots = list(roots)
1214 roots = list(roots)
1215 assert orderedout
1215 assert orderedout
1216 assert roots
1216 assert roots
1217 assert heads
1217 assert heads
1218 return (orderedout, roots, heads)
1218 return (orderedout, roots, heads)
1219
1219
1220 def headrevs(self):
1220 def headrevs(self):
1221 try:
1221 try:
1222 return self.index.headrevs()
1222 return self.index.headrevs()
1223 except AttributeError:
1223 except AttributeError:
1224 return self._headrevs()
1224 return self._headrevs()
1225
1225
1226 def computephases(self, roots):
1226 def computephases(self, roots):
1227 return self.index.computephasesmapsets(roots)
1227 return self.index.computephasesmapsets(roots)
1228
1228
1229 def _headrevs(self):
1229 def _headrevs(self):
1230 count = len(self)
1230 count = len(self)
1231 if not count:
1231 if not count:
1232 return [nullrev]
1232 return [nullrev]
1233 # we won't iter over filtered rev so nobody is a head at start
1233 # we won't iter over filtered rev so nobody is a head at start
1234 ishead = [0] * (count + 1)
1234 ishead = [0] * (count + 1)
1235 index = self.index
1235 index = self.index
1236 for r in self:
1236 for r in self:
1237 ishead[r] = 1 # I may be an head
1237 ishead[r] = 1 # I may be an head
1238 e = index[r]
1238 e = index[r]
1239 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1239 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1240 return [r for r, val in enumerate(ishead) if val]
1240 return [r for r, val in enumerate(ishead) if val]
1241
1241
1242 def heads(self, start=None, stop=None):
1242 def heads(self, start=None, stop=None):
1243 """return the list of all nodes that have no children
1243 """return the list of all nodes that have no children
1244
1244
1245 if start is specified, only heads that are descendants of
1245 if start is specified, only heads that are descendants of
1246 start will be returned
1246 start will be returned
1247 if stop is specified, it will consider all the revs from stop
1247 if stop is specified, it will consider all the revs from stop
1248 as if they had no children
1248 as if they had no children
1249 """
1249 """
1250 if start is None and stop is None:
1250 if start is None and stop is None:
1251 if not len(self):
1251 if not len(self):
1252 return [nullid]
1252 return [nullid]
1253 return [self.node(r) for r in self.headrevs()]
1253 return [self.node(r) for r in self.headrevs()]
1254
1254
1255 if start is None:
1255 if start is None:
1256 start = nullid
1256 start = nullid
1257 if stop is None:
1257 if stop is None:
1258 stop = []
1258 stop = []
1259 stoprevs = set([self.rev(n) for n in stop])
1259 stoprevs = set([self.rev(n) for n in stop])
1260 startrev = self.rev(start)
1260 startrev = self.rev(start)
1261 reachable = {startrev}
1261 reachable = {startrev}
1262 heads = {startrev}
1262 heads = {startrev}
1263
1263
1264 parentrevs = self.parentrevs
1264 parentrevs = self.parentrevs
1265 for r in self.revs(start=startrev + 1):
1265 for r in self.revs(start=startrev + 1):
1266 for p in parentrevs(r):
1266 for p in parentrevs(r):
1267 if p in reachable:
1267 if p in reachable:
1268 if r not in stoprevs:
1268 if r not in stoprevs:
1269 reachable.add(r)
1269 reachable.add(r)
1270 heads.add(r)
1270 heads.add(r)
1271 if p in heads and p not in stoprevs:
1271 if p in heads and p not in stoprevs:
1272 heads.remove(p)
1272 heads.remove(p)
1273
1273
1274 return [self.node(r) for r in heads]
1274 return [self.node(r) for r in heads]
1275
1275
1276 def children(self, node):
1276 def children(self, node):
1277 """find the children of a given node"""
1277 """find the children of a given node"""
1278 c = []
1278 c = []
1279 p = self.rev(node)
1279 p = self.rev(node)
1280 for r in self.revs(start=p + 1):
1280 for r in self.revs(start=p + 1):
1281 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1281 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1282 if prevs:
1282 if prevs:
1283 for pr in prevs:
1283 for pr in prevs:
1284 if pr == p:
1284 if pr == p:
1285 c.append(self.node(r))
1285 c.append(self.node(r))
1286 elif p == nullrev:
1286 elif p == nullrev:
1287 c.append(self.node(r))
1287 c.append(self.node(r))
1288 return c
1288 return c
1289
1289
1290 def descendant(self, start, end):
1290 def descendant(self, start, end):
1291 if start == nullrev:
1291 if start == nullrev:
1292 return True
1292 return True
1293 for i in self.descendants([start]):
1293 for i in self.descendants([start]):
1294 if i == end:
1294 if i == end:
1295 return True
1295 return True
1296 elif i > end:
1296 elif i > end:
1297 break
1297 break
1298 return False
1298 return False
1299
1299
1300 def commonancestorsheads(self, a, b):
1300 def commonancestorsheads(self, a, b):
1301 """calculate all the heads of the common ancestors of nodes a and b"""
1301 """calculate all the heads of the common ancestors of nodes a and b"""
1302 a, b = self.rev(a), self.rev(b)
1302 a, b = self.rev(a), self.rev(b)
1303 try:
1303 try:
1304 ancs = self.index.commonancestorsheads(a, b)
1304 ancs = self.index.commonancestorsheads(a, b)
1305 except (AttributeError, OverflowError): # C implementation failed
1305 except (AttributeError, OverflowError): # C implementation failed
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1307 return pycompat.maplist(self.node, ancs)
1307 return pycompat.maplist(self.node, ancs)
1308
1308
1309 def isancestor(self, a, b):
1309 def isancestor(self, a, b):
1310 """return True if node a is an ancestor of node b
1310 """return True if node a is an ancestor of node b
1311
1311
1312 The implementation of this is trivial but the use of
1312 The implementation of this is trivial but the use of
1313 commonancestorsheads is not."""
1313 commonancestorsheads is not."""
1314 return a in self.commonancestorsheads(a, b)
1314 return a in self.commonancestorsheads(a, b)
1315
1315
1316 def ancestor(self, a, b):
1316 def ancestor(self, a, b):
1317 """calculate the "best" common ancestor of nodes a and b"""
1317 """calculate the "best" common ancestor of nodes a and b"""
1318
1318
1319 a, b = self.rev(a), self.rev(b)
1319 a, b = self.rev(a), self.rev(b)
1320 try:
1320 try:
1321 ancs = self.index.ancestors(a, b)
1321 ancs = self.index.ancestors(a, b)
1322 except (AttributeError, OverflowError):
1322 except (AttributeError, OverflowError):
1323 ancs = ancestor.ancestors(self.parentrevs, a, b)
1323 ancs = ancestor.ancestors(self.parentrevs, a, b)
1324 if ancs:
1324 if ancs:
1325 # choose a consistent winner when there's a tie
1325 # choose a consistent winner when there's a tie
1326 return min(map(self.node, ancs))
1326 return min(map(self.node, ancs))
1327 return nullid
1327 return nullid
1328
1328
1329 def _match(self, id):
1329 def _match(self, id):
1330 if isinstance(id, int):
1330 if isinstance(id, int):
1331 # rev
1331 # rev
1332 return self.node(id)
1332 return self.node(id)
1333 if len(id) == 20:
1333 if len(id) == 20:
1334 # possibly a binary node
1334 # possibly a binary node
1335 # odds of a binary node being all hex in ASCII are 1 in 10**25
1335 # odds of a binary node being all hex in ASCII are 1 in 10**25
1336 try:
1336 try:
1337 node = id
1337 node = id
1338 self.rev(node) # quick search the index
1338 self.rev(node) # quick search the index
1339 return node
1339 return node
1340 except LookupError:
1340 except LookupError:
1341 pass # may be partial hex id
1341 pass # may be partial hex id
1342 try:
1342 try:
1343 # str(rev)
1343 # str(rev)
1344 rev = int(id)
1344 rev = int(id)
1345 if str(rev) != id:
1345 if str(rev) != id:
1346 raise ValueError
1346 raise ValueError
1347 if rev < 0:
1347 if rev < 0:
1348 rev = len(self) + rev
1348 rev = len(self) + rev
1349 if rev < 0 or rev >= len(self):
1349 if rev < 0 or rev >= len(self):
1350 raise ValueError
1350 raise ValueError
1351 return self.node(rev)
1351 return self.node(rev)
1352 except (ValueError, OverflowError):
1352 except (ValueError, OverflowError):
1353 pass
1353 pass
1354 if len(id) == 40:
1354 if len(id) == 40:
1355 try:
1355 try:
1356 # a full hex nodeid?
1356 # a full hex nodeid?
1357 node = bin(id)
1357 node = bin(id)
1358 self.rev(node)
1358 self.rev(node)
1359 return node
1359 return node
1360 except (TypeError, LookupError):
1360 except (TypeError, LookupError):
1361 pass
1361 pass
1362
1362
1363 def _partialmatch(self, id):
1363 def _partialmatch(self, id):
1364 maybewdir = wdirhex.startswith(id)
1364 maybewdir = wdirhex.startswith(id)
1365 try:
1365 try:
1366 partial = self.index.partialmatch(id)
1366 partial = self.index.partialmatch(id)
1367 if partial and self.hasnode(partial):
1367 if partial and self.hasnode(partial):
1368 if maybewdir:
1368 if maybewdir:
1369 # single 'ff...' match in radix tree, ambiguous with wdir
1369 # single 'ff...' match in radix tree, ambiguous with wdir
1370 raise RevlogError
1370 raise RevlogError
1371 return partial
1371 return partial
1372 if maybewdir:
1372 if maybewdir:
1373 # no 'ff...' match in radix tree, wdir identified
1373 # no 'ff...' match in radix tree, wdir identified
1374 raise error.WdirUnsupported
1374 raise error.WdirUnsupported
1375 return None
1375 return None
1376 except RevlogError:
1376 except RevlogError:
1377 # parsers.c radix tree lookup gave multiple matches
1377 # parsers.c radix tree lookup gave multiple matches
1378 # fast path: for unfiltered changelog, radix tree is accurate
1378 # fast path: for unfiltered changelog, radix tree is accurate
1379 if not getattr(self, 'filteredrevs', None):
1379 if not getattr(self, 'filteredrevs', None):
1380 raise LookupError(id, self.indexfile,
1380 raise LookupError(id, self.indexfile,
1381 _('ambiguous identifier'))
1381 _('ambiguous identifier'))
1382 # fall through to slow path that filters hidden revisions
1382 # fall through to slow path that filters hidden revisions
1383 except (AttributeError, ValueError):
1383 except (AttributeError, ValueError):
1384 # we are pure python, or key was too short to search radix tree
1384 # we are pure python, or key was too short to search radix tree
1385 pass
1385 pass
1386
1386
1387 if id in self._pcache:
1387 if id in self._pcache:
1388 return self._pcache[id]
1388 return self._pcache[id]
1389
1389
1390 if len(id) < 40:
1390 if len(id) < 40:
1391 try:
1391 try:
1392 # hex(node)[:...]
1392 # hex(node)[:...]
1393 l = len(id) // 2 # grab an even number of digits
1393 l = len(id) // 2 # grab an even number of digits
1394 prefix = bin(id[:l * 2])
1394 prefix = bin(id[:l * 2])
1395 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1395 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1396 nl = [n for n in nl if hex(n).startswith(id) and
1396 nl = [n for n in nl if hex(n).startswith(id) and
1397 self.hasnode(n)]
1397 self.hasnode(n)]
1398 if len(nl) > 0:
1398 if len(nl) > 0:
1399 if len(nl) == 1 and not maybewdir:
1399 if len(nl) == 1 and not maybewdir:
1400 self._pcache[id] = nl[0]
1400 self._pcache[id] = nl[0]
1401 return nl[0]
1401 return nl[0]
1402 raise LookupError(id, self.indexfile,
1402 raise LookupError(id, self.indexfile,
1403 _('ambiguous identifier'))
1403 _('ambiguous identifier'))
1404 if maybewdir:
1404 if maybewdir:
1405 raise error.WdirUnsupported
1405 raise error.WdirUnsupported
1406 return None
1406 return None
1407 except (TypeError, binascii.Error):
1407 except (TypeError, binascii.Error):
1408 pass
1408 pass
1409
1409
1410 def lookup(self, id):
1410 def lookup(self, id):
1411 """locate a node based on:
1411 """locate a node based on:
1412 - revision number or str(revision number)
1412 - revision number or str(revision number)
1413 - nodeid or subset of hex nodeid
1413 - nodeid or subset of hex nodeid
1414 """
1414 """
1415 n = self._match(id)
1415 n = self._match(id)
1416 if n is not None:
1416 if n is not None:
1417 return n
1417 return n
1418 n = self._partialmatch(id)
1418 n = self._partialmatch(id)
1419 if n:
1419 if n:
1420 return n
1420 return n
1421
1421
1422 raise LookupError(id, self.indexfile, _('no match found'))
1422 raise LookupError(id, self.indexfile, _('no match found'))
1423
1423
1424 def shortest(self, hexnode, minlength=1):
1424 def shortest(self, hexnode, minlength=1):
1425 """Find the shortest unambiguous prefix that matches hexnode."""
1425 """Find the shortest unambiguous prefix that matches hexnode."""
1426 def isvalid(test):
1426 def isvalid(test):
1427 try:
1427 try:
1428 if self._partialmatch(test) is None:
1428 if self._partialmatch(test) is None:
1429 return False
1429 return False
1430
1430
1431 try:
1431 try:
1432 i = int(test)
1432 i = int(test)
1433 # if we are a pure int, then starting with zero will not be
1433 # if we are a pure int, then starting with zero will not be
1434 # confused as a rev; or, obviously, if the int is larger
1434 # confused as a rev; or, obviously, if the int is larger
1435 # than the value of the tip rev
1435 # than the value of the tip rev
1436 if test[0] == '0' or i > len(self):
1436 if test[0] == '0' or i > len(self):
1437 return True
1437 return True
1438 return False
1438 return False
1439 except ValueError:
1439 except ValueError:
1440 return True
1440 return True
1441 except error.RevlogError:
1441 except error.RevlogError:
1442 return False
1442 return False
1443 except error.WdirUnsupported:
1443 except error.WdirUnsupported:
1444 # single 'ff...' match
1444 # single 'ff...' match
1445 return True
1445 return True
1446
1446
1447 shortest = hexnode
1447 shortest = hexnode
1448 startlength = max(6, minlength)
1448 startlength = max(6, minlength)
1449 length = startlength
1449 length = startlength
1450 while True:
1450 while True:
1451 test = hexnode[:length]
1451 test = hexnode[:length]
1452 if isvalid(test):
1452 if isvalid(test):
1453 shortest = test
1453 shortest = test
1454 if length == minlength or length > startlength:
1454 if length == minlength or length > startlength:
1455 return shortest
1455 return shortest
1456 length -= 1
1456 length -= 1
1457 else:
1457 else:
1458 length += 1
1458 length += 1
1459 if len(shortest) <= length:
1459 if len(shortest) <= length:
1460 return shortest
1460 return shortest
1461
1461
1462 def cmp(self, node, text):
1462 def cmp(self, node, text):
1463 """compare text with a given file revision
1463 """compare text with a given file revision
1464
1464
1465 returns True if text is different than what is stored.
1465 returns True if text is different than what is stored.
1466 """
1466 """
1467 p1, p2 = self.parents(node)
1467 p1, p2 = self.parents(node)
1468 return hash(text, p1, p2) != node
1468 return hash(text, p1, p2) != node
1469
1469
1470 def _cachesegment(self, offset, data):
1470 def _cachesegment(self, offset, data):
1471 """Add a segment to the revlog cache.
1471 """Add a segment to the revlog cache.
1472
1472
1473 Accepts an absolute offset and the data that is at that location.
1473 Accepts an absolute offset and the data that is at that location.
1474 """
1474 """
1475 o, d = self._chunkcache
1475 o, d = self._chunkcache
1476 # try to add to existing cache
1476 # try to add to existing cache
1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1477 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 self._chunkcache = o, d + data
1478 self._chunkcache = o, d + data
1479 else:
1479 else:
1480 self._chunkcache = offset, data
1480 self._chunkcache = offset, data
1481
1481
1482 def _readsegment(self, offset, length, df=None):
1482 def _readsegment(self, offset, length, df=None):
1483 """Load a segment of raw data from the revlog.
1483 """Load a segment of raw data from the revlog.
1484
1484
1485 Accepts an absolute offset, length to read, and an optional existing
1485 Accepts an absolute offset, length to read, and an optional existing
1486 file handle to read from.
1486 file handle to read from.
1487
1487
1488 If an existing file handle is passed, it will be seeked and the
1488 If an existing file handle is passed, it will be seeked and the
1489 original seek position will NOT be restored.
1489 original seek position will NOT be restored.
1490
1490
1491 Returns a str or buffer of raw byte data.
1491 Returns a str or buffer of raw byte data.
1492 """
1492 """
1493 if df is not None:
1493 if df is not None:
1494 closehandle = False
1494 closehandle = False
1495 else:
1495 else:
1496 if self._inline:
1496 if self._inline:
1497 df = self.opener(self.indexfile)
1497 df = self.opener(self.indexfile)
1498 else:
1498 else:
1499 df = self.opener(self.datafile)
1499 df = self.opener(self.datafile)
1500 closehandle = True
1500 closehandle = True
1501
1501
1502 # Cache data both forward and backward around the requested
1502 # Cache data both forward and backward around the requested
1503 # data, in a fixed size window. This helps speed up operations
1503 # data, in a fixed size window. This helps speed up operations
1504 # involving reading the revlog backwards.
1504 # involving reading the revlog backwards.
1505 cachesize = self._chunkcachesize
1505 cachesize = self._chunkcachesize
1506 realoffset = offset & ~(cachesize - 1)
1506 realoffset = offset & ~(cachesize - 1)
1507 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1507 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1508 - realoffset)
1508 - realoffset)
1509 df.seek(realoffset)
1509 df.seek(realoffset)
1510 d = df.read(reallength)
1510 d = df.read(reallength)
1511 if closehandle:
1511 if closehandle:
1512 df.close()
1512 df.close()
1513 self._cachesegment(realoffset, d)
1513 self._cachesegment(realoffset, d)
1514 if offset != realoffset or reallength != length:
1514 if offset != realoffset or reallength != length:
1515 return util.buffer(d, offset - realoffset, length)
1515 return util.buffer(d, offset - realoffset, length)
1516 return d
1516 return d
1517
1517
1518 def _getsegment(self, offset, length, df=None):
1518 def _getsegment(self, offset, length, df=None):
1519 """Obtain a segment of raw data from the revlog.
1519 """Obtain a segment of raw data from the revlog.
1520
1520
1521 Accepts an absolute offset, length of bytes to obtain, and an
1521 Accepts an absolute offset, length of bytes to obtain, and an
1522 optional file handle to the already-opened revlog. If the file
1522 optional file handle to the already-opened revlog. If the file
1523 handle is used, it's original seek position will not be preserved.
1523 handle is used, it's original seek position will not be preserved.
1524
1524
1525 Requests for data may be returned from a cache.
1525 Requests for data may be returned from a cache.
1526
1526
1527 Returns a str or a buffer instance of raw byte data.
1527 Returns a str or a buffer instance of raw byte data.
1528 """
1528 """
1529 o, d = self._chunkcache
1529 o, d = self._chunkcache
1530 l = len(d)
1530 l = len(d)
1531
1531
1532 # is it in the cache?
1532 # is it in the cache?
1533 cachestart = offset - o
1533 cachestart = offset - o
1534 cacheend = cachestart + length
1534 cacheend = cachestart + length
1535 if cachestart >= 0 and cacheend <= l:
1535 if cachestart >= 0 and cacheend <= l:
1536 if cachestart == 0 and cacheend == l:
1536 if cachestart == 0 and cacheend == l:
1537 return d # avoid a copy
1537 return d # avoid a copy
1538 return util.buffer(d, cachestart, cacheend - cachestart)
1538 return util.buffer(d, cachestart, cacheend - cachestart)
1539
1539
1540 return self._readsegment(offset, length, df=df)
1540 return self._readsegment(offset, length, df=df)
1541
1541
1542 def _getsegmentforrevs(self, startrev, endrev, df=None):
1542 def _getsegmentforrevs(self, startrev, endrev, df=None):
1543 """Obtain a segment of raw data corresponding to a range of revisions.
1543 """Obtain a segment of raw data corresponding to a range of revisions.
1544
1544
1545 Accepts the start and end revisions and an optional already-open
1545 Accepts the start and end revisions and an optional already-open
1546 file handle to be used for reading. If the file handle is read, its
1546 file handle to be used for reading. If the file handle is read, its
1547 seek position will not be preserved.
1547 seek position will not be preserved.
1548
1548
1549 Requests for data may be satisfied by a cache.
1549 Requests for data may be satisfied by a cache.
1550
1550
1551 Returns a 2-tuple of (offset, data) for the requested range of
1551 Returns a 2-tuple of (offset, data) for the requested range of
1552 revisions. Offset is the integer offset from the beginning of the
1552 revisions. Offset is the integer offset from the beginning of the
1553 revlog and data is a str or buffer of the raw byte data.
1553 revlog and data is a str or buffer of the raw byte data.
1554
1554
1555 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1555 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1556 to determine where each revision's data begins and ends.
1556 to determine where each revision's data begins and ends.
1557 """
1557 """
1558 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1558 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1559 # (functions are expensive).
1559 # (functions are expensive).
1560 index = self.index
1560 index = self.index
1561 istart = index[startrev]
1561 istart = index[startrev]
1562 start = int(istart[0] >> 16)
1562 start = int(istart[0] >> 16)
1563 if startrev == endrev:
1563 if startrev == endrev:
1564 end = start + istart[1]
1564 end = start + istart[1]
1565 else:
1565 else:
1566 iend = index[endrev]
1566 iend = index[endrev]
1567 end = int(iend[0] >> 16) + iend[1]
1567 end = int(iend[0] >> 16) + iend[1]
1568
1568
1569 if self._inline:
1569 if self._inline:
1570 start += (startrev + 1) * self._io.size
1570 start += (startrev + 1) * self._io.size
1571 end += (endrev + 1) * self._io.size
1571 end += (endrev + 1) * self._io.size
1572 length = end - start
1572 length = end - start
1573
1573
1574 return start, self._getsegment(start, length, df=df)
1574 return start, self._getsegment(start, length, df=df)
1575
1575
1576 def _chunk(self, rev, df=None):
1576 def _chunk(self, rev, df=None):
1577 """Obtain a single decompressed chunk for a revision.
1577 """Obtain a single decompressed chunk for a revision.
1578
1578
1579 Accepts an integer revision and an optional already-open file handle
1579 Accepts an integer revision and an optional already-open file handle
1580 to be used for reading. If used, the seek position of the file will not
1580 to be used for reading. If used, the seek position of the file will not
1581 be preserved.
1581 be preserved.
1582
1582
1583 Returns a str holding uncompressed data for the requested revision.
1583 Returns a str holding uncompressed data for the requested revision.
1584 """
1584 """
1585 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1585 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1586
1586
1587 def _chunks(self, revs, df=None):
1587 def _chunks(self, revs, df=None):
1588 """Obtain decompressed chunks for the specified revisions.
1588 """Obtain decompressed chunks for the specified revisions.
1589
1589
1590 Accepts an iterable of numeric revisions that are assumed to be in
1590 Accepts an iterable of numeric revisions that are assumed to be in
1591 ascending order. Also accepts an optional already-open file handle
1591 ascending order. Also accepts an optional already-open file handle
1592 to be used for reading. If used, the seek position of the file will
1592 to be used for reading. If used, the seek position of the file will
1593 not be preserved.
1593 not be preserved.
1594
1594
1595 This function is similar to calling ``self._chunk()`` multiple times,
1595 This function is similar to calling ``self._chunk()`` multiple times,
1596 but is faster.
1596 but is faster.
1597
1597
1598 Returns a list with decompressed data for each requested revision.
1598 Returns a list with decompressed data for each requested revision.
1599 """
1599 """
1600 if not revs:
1600 if not revs:
1601 return []
1601 return []
1602 start = self.start
1602 start = self.start
1603 length = self.length
1603 length = self.length
1604 inline = self._inline
1604 inline = self._inline
1605 iosize = self._io.size
1605 iosize = self._io.size
1606 buffer = util.buffer
1606 buffer = util.buffer
1607
1607
1608 l = []
1608 l = []
1609 ladd = l.append
1609 ladd = l.append
1610
1610
1611 if not self._withsparseread:
1611 if not self._withsparseread:
1612 slicedchunks = (revs,)
1612 slicedchunks = (revs,)
1613 else:
1613 else:
1614 slicedchunks = _slicechunk(self, revs)
1614 slicedchunks = _slicechunk(self, revs)
1615
1615
1616 for revschunk in slicedchunks:
1616 for revschunk in slicedchunks:
1617 firstrev = revschunk[0]
1617 firstrev = revschunk[0]
1618 # Skip trailing revisions with empty diff
1618 # Skip trailing revisions with empty diff
1619 for lastrev in revschunk[::-1]:
1619 for lastrev in revschunk[::-1]:
1620 if length(lastrev) != 0:
1620 if length(lastrev) != 0:
1621 break
1621 break
1622
1622
1623 try:
1623 try:
1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1625 except OverflowError:
1625 except OverflowError:
1626 # issue4215 - we can't cache a run of chunks greater than
1626 # issue4215 - we can't cache a run of chunks greater than
1627 # 2G on Windows
1627 # 2G on Windows
1628 return [self._chunk(rev, df=df) for rev in revschunk]
1628 return [self._chunk(rev, df=df) for rev in revschunk]
1629
1629
1630 decomp = self.decompress
1630 decomp = self.decompress
1631 for rev in revschunk:
1631 for rev in revschunk:
1632 chunkstart = start(rev)
1632 chunkstart = start(rev)
1633 if inline:
1633 if inline:
1634 chunkstart += (rev + 1) * iosize
1634 chunkstart += (rev + 1) * iosize
1635 chunklength = length(rev)
1635 chunklength = length(rev)
1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1637
1637
1638 return l
1638 return l
1639
1639
1640 def _chunkclear(self):
1640 def _chunkclear(self):
1641 """Clear the raw chunk cache."""
1641 """Clear the raw chunk cache."""
1642 self._chunkcache = (0, '')
1642 self._chunkcache = (0, '')
1643
1643
1644 def deltaparent(self, rev):
1644 def deltaparent(self, rev):
1645 """return deltaparent of the given revision"""
1645 """return deltaparent of the given revision"""
1646 base = self.index[rev][3]
1646 base = self.index[rev][3]
1647 if base == rev:
1647 if base == rev:
1648 return nullrev
1648 return nullrev
1649 elif self._generaldelta:
1649 elif self._generaldelta:
1650 return base
1650 return base
1651 else:
1651 else:
1652 return rev - 1
1652 return rev - 1
1653
1653
1654 def revdiff(self, rev1, rev2):
1654 def revdiff(self, rev1, rev2):
1655 """return or calculate a delta between two revisions
1655 """return or calculate a delta between two revisions
1656
1656
1657 The delta calculated is in binary form and is intended to be written to
1657 The delta calculated is in binary form and is intended to be written to
1658 revlog data directly. So this function needs raw revision data.
1658 revlog data directly. So this function needs raw revision data.
1659 """
1659 """
1660 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1660 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1661 return bytes(self._chunk(rev2))
1661 return bytes(self._chunk(rev2))
1662
1662
1663 return mdiff.textdiff(self.revision(rev1, raw=True),
1663 return mdiff.textdiff(self.revision(rev1, raw=True),
1664 self.revision(rev2, raw=True))
1664 self.revision(rev2, raw=True))
1665
1665
1666 def revision(self, nodeorrev, _df=None, raw=False):
1666 def revision(self, nodeorrev, _df=None, raw=False):
1667 """return an uncompressed revision of a given node or revision
1667 """return an uncompressed revision of a given node or revision
1668 number.
1668 number.
1669
1669
1670 _df - an existing file handle to read from. (internal-only)
1670 _df - an existing file handle to read from. (internal-only)
1671 raw - an optional argument specifying if the revision data is to be
1671 raw - an optional argument specifying if the revision data is to be
1672 treated as raw data when applying flag transforms. 'raw' should be set
1672 treated as raw data when applying flag transforms. 'raw' should be set
1673 to True when generating changegroups or in debug commands.
1673 to True when generating changegroups or in debug commands.
1674 """
1674 """
1675 if isinstance(nodeorrev, int):
1675 if isinstance(nodeorrev, int):
1676 rev = nodeorrev
1676 rev = nodeorrev
1677 node = self.node(rev)
1677 node = self.node(rev)
1678 else:
1678 else:
1679 node = nodeorrev
1679 node = nodeorrev
1680 rev = None
1680 rev = None
1681
1681
1682 cachedrev = None
1682 cachedrev = None
1683 flags = None
1683 flags = None
1684 rawtext = None
1684 rawtext = None
1685 if node == nullid:
1685 if node == nullid:
1686 return ""
1686 return ""
1687 if self._cache:
1687 if self._cache:
1688 if self._cache[0] == node:
1688 if self._cache[0] == node:
1689 # _cache only stores rawtext
1689 # _cache only stores rawtext
1690 if raw:
1690 if raw:
1691 return self._cache[2]
1691 return self._cache[2]
1692 # duplicated, but good for perf
1692 # duplicated, but good for perf
1693 if rev is None:
1693 if rev is None:
1694 rev = self.rev(node)
1694 rev = self.rev(node)
1695 if flags is None:
1695 if flags is None:
1696 flags = self.flags(rev)
1696 flags = self.flags(rev)
1697 # no extra flags set, no flag processor runs, text = rawtext
1697 # no extra flags set, no flag processor runs, text = rawtext
1698 if flags == REVIDX_DEFAULT_FLAGS:
1698 if flags == REVIDX_DEFAULT_FLAGS:
1699 return self._cache[2]
1699 return self._cache[2]
1700 # rawtext is reusable. need to run flag processor
1700 # rawtext is reusable. need to run flag processor
1701 rawtext = self._cache[2]
1701 rawtext = self._cache[2]
1702
1702
1703 cachedrev = self._cache[1]
1703 cachedrev = self._cache[1]
1704
1704
1705 # look up what we need to read
1705 # look up what we need to read
1706 if rawtext is None:
1706 if rawtext is None:
1707 if rev is None:
1707 if rev is None:
1708 rev = self.rev(node)
1708 rev = self.rev(node)
1709
1709
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1711 if stopped:
1711 if stopped:
1712 rawtext = self._cache[2]
1712 rawtext = self._cache[2]
1713
1713
1714 # drop cache to save memory
1714 # drop cache to save memory
1715 self._cache = None
1715 self._cache = None
1716
1716
1717 bins = self._chunks(chain, df=_df)
1717 bins = self._chunks(chain, df=_df)
1718 if rawtext is None:
1718 if rawtext is None:
1719 rawtext = bytes(bins[0])
1719 rawtext = bytes(bins[0])
1720 bins = bins[1:]
1720 bins = bins[1:]
1721
1721
1722 rawtext = mdiff.patches(rawtext, bins)
1722 rawtext = mdiff.patches(rawtext, bins)
1723 self._cache = (node, rev, rawtext)
1723 self._cache = (node, rev, rawtext)
1724
1724
1725 if flags is None:
1725 if flags is None:
1726 if rev is None:
1726 if rev is None:
1727 rev = self.rev(node)
1727 rev = self.rev(node)
1728 flags = self.flags(rev)
1728 flags = self.flags(rev)
1729
1729
1730 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1730 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1731 if validatehash:
1731 if validatehash:
1732 self.checkhash(text, node, rev=rev)
1732 self.checkhash(text, node, rev=rev)
1733
1733
1734 return text
1734 return text
1735
1735
1736 def hash(self, text, p1, p2):
1736 def hash(self, text, p1, p2):
1737 """Compute a node hash.
1737 """Compute a node hash.
1738
1738
1739 Available as a function so that subclasses can replace the hash
1739 Available as a function so that subclasses can replace the hash
1740 as needed.
1740 as needed.
1741 """
1741 """
1742 return hash(text, p1, p2)
1742 return hash(text, p1, p2)
1743
1743
1744 def _processflags(self, text, flags, operation, raw=False):
1744 def _processflags(self, text, flags, operation, raw=False):
1745 """Inspect revision data flags and applies transforms defined by
1745 """Inspect revision data flags and applies transforms defined by
1746 registered flag processors.
1746 registered flag processors.
1747
1747
1748 ``text`` - the revision data to process
1748 ``text`` - the revision data to process
1749 ``flags`` - the revision flags
1749 ``flags`` - the revision flags
1750 ``operation`` - the operation being performed (read or write)
1750 ``operation`` - the operation being performed (read or write)
1751 ``raw`` - an optional argument describing if the raw transform should be
1751 ``raw`` - an optional argument describing if the raw transform should be
1752 applied.
1752 applied.
1753
1753
1754 This method processes the flags in the order (or reverse order if
1754 This method processes the flags in the order (or reverse order if
1755 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1755 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1756 flag processors registered for present flags. The order of flags defined
1756 flag processors registered for present flags. The order of flags defined
1757 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1757 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1758
1758
1759 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1759 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1760 processed text and ``validatehash`` is a bool indicating whether the
1760 processed text and ``validatehash`` is a bool indicating whether the
1761 returned text should be checked for hash integrity.
1761 returned text should be checked for hash integrity.
1762
1762
1763 Note: If the ``raw`` argument is set, it has precedence over the
1763 Note: If the ``raw`` argument is set, it has precedence over the
1764 operation and will only update the value of ``validatehash``.
1764 operation and will only update the value of ``validatehash``.
1765 """
1765 """
1766 # fast path: no flag processors will run
1766 # fast path: no flag processors will run
1767 if flags == 0:
1767 if flags == 0:
1768 return text, True
1768 return text, True
1769 if not operation in ('read', 'write'):
1769 if not operation in ('read', 'write'):
1770 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1770 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1771 # Check all flags are known.
1771 # Check all flags are known.
1772 if flags & ~REVIDX_KNOWN_FLAGS:
1772 if flags & ~REVIDX_KNOWN_FLAGS:
1773 raise RevlogError(_("incompatible revision flag '%#x'") %
1773 raise RevlogError(_("incompatible revision flag '%#x'") %
1774 (flags & ~REVIDX_KNOWN_FLAGS))
1774 (flags & ~REVIDX_KNOWN_FLAGS))
1775 validatehash = True
1775 validatehash = True
1776 # Depending on the operation (read or write), the order might be
1776 # Depending on the operation (read or write), the order might be
1777 # reversed due to non-commutative transforms.
1777 # reversed due to non-commutative transforms.
1778 orderedflags = REVIDX_FLAGS_ORDER
1778 orderedflags = REVIDX_FLAGS_ORDER
1779 if operation == 'write':
1779 if operation == 'write':
1780 orderedflags = reversed(orderedflags)
1780 orderedflags = reversed(orderedflags)
1781
1781
1782 for flag in orderedflags:
1782 for flag in orderedflags:
1783 # If a flagprocessor has been registered for a known flag, apply the
1783 # If a flagprocessor has been registered for a known flag, apply the
1784 # related operation transform and update result tuple.
1784 # related operation transform and update result tuple.
1785 if flag & flags:
1785 if flag & flags:
1786 vhash = True
1786 vhash = True
1787
1787
1788 if flag not in _flagprocessors:
1788 if flag not in _flagprocessors:
1789 message = _("missing processor for flag '%#x'") % (flag)
1789 message = _("missing processor for flag '%#x'") % (flag)
1790 raise RevlogError(message)
1790 raise RevlogError(message)
1791
1791
1792 processor = _flagprocessors[flag]
1792 processor = _flagprocessors[flag]
1793 if processor is not None:
1793 if processor is not None:
1794 readtransform, writetransform, rawtransform = processor
1794 readtransform, writetransform, rawtransform = processor
1795
1795
1796 if raw:
1796 if raw:
1797 vhash = rawtransform(self, text)
1797 vhash = rawtransform(self, text)
1798 elif operation == 'read':
1798 elif operation == 'read':
1799 text, vhash = readtransform(self, text)
1799 text, vhash = readtransform(self, text)
1800 else: # write operation
1800 else: # write operation
1801 text, vhash = writetransform(self, text)
1801 text, vhash = writetransform(self, text)
1802 validatehash = validatehash and vhash
1802 validatehash = validatehash and vhash
1803
1803
1804 return text, validatehash
1804 return text, validatehash
1805
1805
1806 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1806 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1807 """Check node hash integrity.
1807 """Check node hash integrity.
1808
1808
1809 Available as a function so that subclasses can extend hash mismatch
1809 Available as a function so that subclasses can extend hash mismatch
1810 behaviors as needed.
1810 behaviors as needed.
1811 """
1811 """
1812 if p1 is None and p2 is None:
1812 if p1 is None and p2 is None:
1813 p1, p2 = self.parents(node)
1813 p1, p2 = self.parents(node)
1814 if node != self.hash(text, p1, p2):
1814 if node != self.hash(text, p1, p2):
1815 revornode = rev
1815 revornode = rev
1816 if revornode is None:
1816 if revornode is None:
1817 revornode = templatefilters.short(hex(node))
1817 revornode = templatefilters.short(hex(node))
1818 raise RevlogError(_("integrity check failed on %s:%s")
1818 raise RevlogError(_("integrity check failed on %s:%s")
1819 % (self.indexfile, pycompat.bytestr(revornode)))
1819 % (self.indexfile, pycompat.bytestr(revornode)))
1820
1820
1821 def checkinlinesize(self, tr, fp=None):
1821 def checkinlinesize(self, tr, fp=None):
1822 """Check if the revlog is too big for inline and convert if so.
1822 """Check if the revlog is too big for inline and convert if so.
1823
1823
1824 This should be called after revisions are added to the revlog. If the
1824 This should be called after revisions are added to the revlog. If the
1825 revlog has grown too large to be an inline revlog, it will convert it
1825 revlog has grown too large to be an inline revlog, it will convert it
1826 to use multiple index and data files.
1826 to use multiple index and data files.
1827 """
1827 """
1828 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1828 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1829 return
1829 return
1830
1830
1831 trinfo = tr.find(self.indexfile)
1831 trinfo = tr.find(self.indexfile)
1832 if trinfo is None:
1832 if trinfo is None:
1833 raise RevlogError(_("%s not found in the transaction")
1833 raise RevlogError(_("%s not found in the transaction")
1834 % self.indexfile)
1834 % self.indexfile)
1835
1835
1836 trindex = trinfo[2]
1836 trindex = trinfo[2]
1837 if trindex is not None:
1837 if trindex is not None:
1838 dataoff = self.start(trindex)
1838 dataoff = self.start(trindex)
1839 else:
1839 else:
1840 # revlog was stripped at start of transaction, use all leftover data
1840 # revlog was stripped at start of transaction, use all leftover data
1841 trindex = len(self) - 1
1841 trindex = len(self) - 1
1842 dataoff = self.end(-2)
1842 dataoff = self.end(-2)
1843
1843
1844 tr.add(self.datafile, dataoff)
1844 tr.add(self.datafile, dataoff)
1845
1845
1846 if fp:
1846 if fp:
1847 fp.flush()
1847 fp.flush()
1848 fp.close()
1848 fp.close()
1849
1849
1850 df = self.opener(self.datafile, 'w')
1850 df = self.opener(self.datafile, 'w')
1851 try:
1851 try:
1852 for r in self:
1852 for r in self:
1853 df.write(self._getsegmentforrevs(r, r)[1])
1853 df.write(self._getsegmentforrevs(r, r)[1])
1854 finally:
1854 finally:
1855 df.close()
1855 df.close()
1856
1856
1857 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1857 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1858 checkambig=self._checkambig)
1858 checkambig=self._checkambig)
1859 self.version &= ~FLAG_INLINE_DATA
1859 self.version &= ~FLAG_INLINE_DATA
1860 self._inline = False
1860 self._inline = False
1861 for i in self:
1861 for i in self:
1862 e = self._io.packentry(self.index[i], self.node, self.version, i)
1862 e = self._io.packentry(self.index[i], self.node, self.version, i)
1863 fp.write(e)
1863 fp.write(e)
1864
1864
1865 # if we don't call close, the temp file will never replace the
1865 # if we don't call close, the temp file will never replace the
1866 # real index
1866 # real index
1867 fp.close()
1867 fp.close()
1868
1868
1869 tr.replace(self.indexfile, trindex * self._io.size)
1869 tr.replace(self.indexfile, trindex * self._io.size)
1870 self._chunkclear()
1870 self._chunkclear()
1871
1871
1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1874 """add a revision to the log
1874 """add a revision to the log
1875
1875
1876 text - the revision data to add
1876 text - the revision data to add
1877 transaction - the transaction object used for rollback
1877 transaction - the transaction object used for rollback
1878 link - the linkrev data to add
1878 link - the linkrev data to add
1879 p1, p2 - the parent nodeids of the revision
1879 p1, p2 - the parent nodeids of the revision
1880 cachedelta - an optional precomputed delta
1880 cachedelta - an optional precomputed delta
1881 node - nodeid of revision; typically node is not specified, and it is
1881 node - nodeid of revision; typically node is not specified, and it is
1882 computed by default as hash(text, p1, p2), however subclasses might
1882 computed by default as hash(text, p1, p2), however subclasses might
1883 use different hashing method (and override checkhash() in such case)
1883 use different hashing method (and override checkhash() in such case)
1884 flags - the known flags to set on the revision
1884 flags - the known flags to set on the revision
1885 deltacomputer - an optional _deltacomputer instance shared between
1885 deltacomputer - an optional _deltacomputer instance shared between
1886 multiple calls
1886 multiple calls
1887 """
1887 """
1888 if link == nullrev:
1888 if link == nullrev:
1889 raise RevlogError(_("attempted to add linkrev -1 to %s")
1889 raise RevlogError(_("attempted to add linkrev -1 to %s")
1890 % self.indexfile)
1890 % self.indexfile)
1891
1891
1892 if flags:
1892 if flags:
1893 node = node or self.hash(text, p1, p2)
1893 node = node or self.hash(text, p1, p2)
1894
1894
1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1896
1896
1897 # If the flag processor modifies the revision data, ignore any provided
1897 # If the flag processor modifies the revision data, ignore any provided
1898 # cachedelta.
1898 # cachedelta.
1899 if rawtext != text:
1899 if rawtext != text:
1900 cachedelta = None
1900 cachedelta = None
1901
1901
1902 if len(rawtext) > _maxentrysize:
1902 if len(rawtext) > _maxentrysize:
1903 raise RevlogError(
1903 raise RevlogError(
1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1905 % (self.indexfile, len(rawtext)))
1905 % (self.indexfile, len(rawtext)))
1906
1906
1907 node = node or self.hash(rawtext, p1, p2)
1907 node = node or self.hash(rawtext, p1, p2)
1908 if node in self.nodemap:
1908 if node in self.nodemap:
1909 return node
1909 return node
1910
1910
1911 if validatehash:
1911 if validatehash:
1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1913
1913
1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1915 flags, cachedelta=cachedelta,
1915 flags, cachedelta=cachedelta,
1916 deltacomputer=deltacomputer)
1916 deltacomputer=deltacomputer)
1917
1917
1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1919 cachedelta=None, deltacomputer=None):
1919 cachedelta=None, deltacomputer=None):
1920 """add a raw revision with known flags, node and parents
1920 """add a raw revision with known flags, node and parents
1921 useful when reusing a revision not stored in this revlog (ex: received
1921 useful when reusing a revision not stored in this revlog (ex: received
1922 over wire, or read from an external bundle).
1922 over wire, or read from an external bundle).
1923 """
1923 """
1924 dfh = None
1924 dfh = None
1925 if not self._inline:
1925 if not self._inline:
1926 dfh = self.opener(self.datafile, "a+")
1926 dfh = self.opener(self.datafile, "a+")
1927 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1927 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1928 try:
1928 try:
1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1930 flags, cachedelta, ifh, dfh,
1930 flags, cachedelta, ifh, dfh,
1931 deltacomputer=deltacomputer)
1931 deltacomputer=deltacomputer)
1932 finally:
1932 finally:
1933 if dfh:
1933 if dfh:
1934 dfh.close()
1934 dfh.close()
1935 ifh.close()
1935 ifh.close()
1936
1936
1937 def compress(self, data):
1937 def compress(self, data):
1938 """Generate a possibly-compressed representation of data."""
1938 """Generate a possibly-compressed representation of data."""
1939 if not data:
1939 if not data:
1940 return '', data
1940 return '', data
1941
1941
1942 compressed = self._compressor.compress(data)
1942 compressed = self._compressor.compress(data)
1943
1943
1944 if compressed:
1944 if compressed:
1945 # The revlog compressor added the header in the returned data.
1945 # The revlog compressor added the header in the returned data.
1946 return '', compressed
1946 return '', compressed
1947
1947
1948 if data[0:1] == '\0':
1948 if data[0:1] == '\0':
1949 return '', data
1949 return '', data
1950 return 'u', data
1950 return 'u', data
1951
1951
1952 def decompress(self, data):
1952 def decompress(self, data):
1953 """Decompress a revlog chunk.
1953 """Decompress a revlog chunk.
1954
1954
1955 The chunk is expected to begin with a header identifying the
1955 The chunk is expected to begin with a header identifying the
1956 format type so it can be routed to an appropriate decompressor.
1956 format type so it can be routed to an appropriate decompressor.
1957 """
1957 """
1958 if not data:
1958 if not data:
1959 return data
1959 return data
1960
1960
1961 # Revlogs are read much more frequently than they are written and many
1961 # Revlogs are read much more frequently than they are written and many
1962 # chunks only take microseconds to decompress, so performance is
1962 # chunks only take microseconds to decompress, so performance is
1963 # important here.
1963 # important here.
1964 #
1964 #
1965 # We can make a few assumptions about revlogs:
1965 # We can make a few assumptions about revlogs:
1966 #
1966 #
1967 # 1) the majority of chunks will be compressed (as opposed to inline
1967 # 1) the majority of chunks will be compressed (as opposed to inline
1968 # raw data).
1968 # raw data).
1969 # 2) decompressing *any* data will likely by at least 10x slower than
1969 # 2) decompressing *any* data will likely by at least 10x slower than
1970 # returning raw inline data.
1970 # returning raw inline data.
1971 # 3) we want to prioritize common and officially supported compression
1971 # 3) we want to prioritize common and officially supported compression
1972 # engines
1972 # engines
1973 #
1973 #
1974 # It follows that we want to optimize for "decompress compressed data
1974 # It follows that we want to optimize for "decompress compressed data
1975 # when encoded with common and officially supported compression engines"
1975 # when encoded with common and officially supported compression engines"
1976 # case over "raw data" and "data encoded by less common or non-official
1976 # case over "raw data" and "data encoded by less common or non-official
1977 # compression engines." That is why we have the inline lookup first
1977 # compression engines." That is why we have the inline lookup first
1978 # followed by the compengines lookup.
1978 # followed by the compengines lookup.
1979 #
1979 #
1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1981 # compressed chunks. And this matters for changelog and manifest reads.
1981 # compressed chunks. And this matters for changelog and manifest reads.
1982 t = data[0:1]
1982 t = data[0:1]
1983
1983
1984 if t == 'x':
1984 if t == 'x':
1985 try:
1985 try:
1986 return _zlibdecompress(data)
1986 return _zlibdecompress(data)
1987 except zlib.error as e:
1987 except zlib.error as e:
1988 raise RevlogError(_('revlog decompress error: %s') % str(e))
1988 raise RevlogError(_('revlog decompress error: %s') % str(e))
1989 # '\0' is more common than 'u' so it goes first.
1989 # '\0' is more common than 'u' so it goes first.
1990 elif t == '\0':
1990 elif t == '\0':
1991 return data
1991 return data
1992 elif t == 'u':
1992 elif t == 'u':
1993 return util.buffer(data, 1)
1993 return util.buffer(data, 1)
1994
1994
1995 try:
1995 try:
1996 compressor = self._decompressors[t]
1996 compressor = self._decompressors[t]
1997 except KeyError:
1997 except KeyError:
1998 try:
1998 try:
1999 engine = util.compengines.forrevlogheader(t)
1999 engine = util.compengines.forrevlogheader(t)
2000 compressor = engine.revlogcompressor()
2000 compressor = engine.revlogcompressor()
2001 self._decompressors[t] = compressor
2001 self._decompressors[t] = compressor
2002 except KeyError:
2002 except KeyError:
2003 raise RevlogError(_('unknown compression type %r') % t)
2003 raise RevlogError(_('unknown compression type %r') % t)
2004
2004
2005 return compressor.decompress(data)
2005 return compressor.decompress(data)
2006
2006
2007 def _isgooddeltainfo(self, d, textlen):
2007 def _isgooddeltainfo(self, d, textlen):
2008 """Returns True if the given delta is good. Good means that it is within
2008 """Returns True if the given delta is good. Good means that it is within
2009 the disk span, disk size, and chain length bounds that we know to be
2009 the disk span, disk size, and chain length bounds that we know to be
2010 performant."""
2010 performant."""
2011 if d is None:
2011 if d is None:
2012 return False
2012 return False
2013
2013
2014 # - 'd.distance' is the distance from the base revision -- bounding it
2014 # - 'd.distance' is the distance from the base revision -- bounding it
2015 # limits the amount of I/O we need to do.
2015 # limits the amount of I/O we need to do.
2016 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2016 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2017 # need to apply -- bounding it limits the amount of CPU we consume.
2017 # need to apply -- bounding it limits the amount of CPU we consume.
2018
2018
2019 defaultmax = textlen * 4
2019 defaultmax = textlen * 4
2020 maxdist = self._maxdeltachainspan
2020 maxdist = self._maxdeltachainspan
2021 if not maxdist:
2021 if not maxdist:
2022 maxdist = d.distance # ensure the conditional pass
2022 maxdist = d.distance # ensure the conditional pass
2023 maxdist = max(maxdist, defaultmax)
2023 maxdist = max(maxdist, defaultmax)
2024 if (d.distance > maxdist or d.deltalen > textlen or
2024 if (d.distance > maxdist or d.deltalen > textlen or
2025 d.compresseddeltalen > textlen * 2 or
2025 d.compresseddeltalen > textlen * 2 or
2026 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2026 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2027 return False
2027 return False
2028
2028
2029 return True
2029 return True
2030
2030
2031 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2031 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2032 cachedelta, ifh, dfh, alwayscache=False,
2032 cachedelta, ifh, dfh, alwayscache=False,
2033 deltacomputer=None):
2033 deltacomputer=None):
2034 """internal function to add revisions to the log
2034 """internal function to add revisions to the log
2035
2035
2036 see addrevision for argument descriptions.
2036 see addrevision for argument descriptions.
2037
2037
2038 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2038 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2039
2039
2040 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2040 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2041 be used.
2041 be used.
2042
2042
2043 invariants:
2043 invariants:
2044 - rawtext is optional (can be None); if not set, cachedelta must be set.
2044 - rawtext is optional (can be None); if not set, cachedelta must be set.
2045 if both are set, they must correspond to each other.
2045 if both are set, they must correspond to each other.
2046 """
2046 """
2047 if node == nullid:
2047 if node == nullid:
2048 raise RevlogError(_("%s: attempt to add null revision") %
2048 raise RevlogError(_("%s: attempt to add null revision") %
2049 (self.indexfile))
2049 (self.indexfile))
2050 if node == wdirid:
2050 if node == wdirid:
2051 raise RevlogError(_("%s: attempt to add wdir revision") %
2051 raise RevlogError(_("%s: attempt to add wdir revision") %
2052 (self.indexfile))
2052 (self.indexfile))
2053
2053
2054 if self._inline:
2054 if self._inline:
2055 fh = ifh
2055 fh = ifh
2056 else:
2056 else:
2057 fh = dfh
2057 fh = dfh
2058
2058
2059 btext = [rawtext]
2059 btext = [rawtext]
2060
2060
2061 curr = len(self)
2061 curr = len(self)
2062 prev = curr - 1
2062 prev = curr - 1
2063 offset = self.end(prev)
2063 offset = self.end(prev)
2064 p1r, p2r = self.rev(p1), self.rev(p2)
2064 p1r, p2r = self.rev(p1), self.rev(p2)
2065
2065
2066 # full versions are inserted when the needed deltas
2066 # full versions are inserted when the needed deltas
2067 # become comparable to the uncompressed text
2067 # become comparable to the uncompressed text
2068 if rawtext is None:
2068 if rawtext is None:
2069 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2069 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2070 cachedelta[1])
2070 cachedelta[1])
2071 else:
2071 else:
2072 textlen = len(rawtext)
2072 textlen = len(rawtext)
2073
2073
2074 if deltacomputer is None:
2074 if deltacomputer is None:
2075 deltacomputer = _deltacomputer(self)
2075 deltacomputer = _deltacomputer(self)
2076
2076
2077 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2077 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2078 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2078 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2079
2079
2080 if deltainfo is not None:
2080 if deltainfo is not None:
2081 base = deltainfo.base
2081 base = deltainfo.base
2082 chainbase = deltainfo.chainbase
2082 chainbase = deltainfo.chainbase
2083 data = deltainfo.data
2083 data = deltainfo.data
2084 l = deltainfo.deltalen
2084 l = deltainfo.deltalen
2085 else:
2085 else:
2086 rawtext = deltacomputer.buildtext(revinfo, fh)
2086 rawtext = deltacomputer.buildtext(revinfo, fh)
2087 data = self.compress(rawtext)
2087 data = self.compress(rawtext)
2088 l = len(data[1]) + len(data[0])
2088 l = len(data[1]) + len(data[0])
2089 base = chainbase = curr
2089 base = chainbase = curr
2090
2090
2091 e = (offset_type(offset, flags), l, textlen,
2091 e = (offset_type(offset, flags), l, textlen,
2092 base, link, p1r, p2r, node)
2092 base, link, p1r, p2r, node)
2093 self.index.insert(-1, e)
2093 self.index.insert(-1, e)
2094 self.nodemap[node] = curr
2094 self.nodemap[node] = curr
2095
2095
2096 entry = self._io.packentry(e, self.node, self.version, curr)
2096 entry = self._io.packentry(e, self.node, self.version, curr)
2097 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2097 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2098
2098
2099 if alwayscache and rawtext is None:
2099 if alwayscache and rawtext is None:
2100 rawtext = deltacomputer._buildtext(revinfo, fh)
2100 rawtext = deltacomputer._buildtext(revinfo, fh)
2101
2101
2102 if type(rawtext) == str: # only accept immutable objects
2102 if type(rawtext) == bytes: # only accept immutable objects
2103 self._cache = (node, curr, rawtext)
2103 self._cache = (node, curr, rawtext)
2104 self._chainbasecache[curr] = chainbase
2104 self._chainbasecache[curr] = chainbase
2105 return node
2105 return node
2106
2106
2107 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2107 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2108 # Files opened in a+ mode have inconsistent behavior on various
2108 # Files opened in a+ mode have inconsistent behavior on various
2109 # platforms. Windows requires that a file positioning call be made
2109 # platforms. Windows requires that a file positioning call be made
2110 # when the file handle transitions between reads and writes. See
2110 # when the file handle transitions between reads and writes. See
2111 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2111 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2112 # platforms, Python or the platform itself can be buggy. Some versions
2112 # platforms, Python or the platform itself can be buggy. Some versions
2113 # of Solaris have been observed to not append at the end of the file
2113 # of Solaris have been observed to not append at the end of the file
2114 # if the file was seeked to before the end. See issue4943 for more.
2114 # if the file was seeked to before the end. See issue4943 for more.
2115 #
2115 #
2116 # We work around this issue by inserting a seek() before writing.
2116 # We work around this issue by inserting a seek() before writing.
2117 # Note: This is likely not necessary on Python 3.
2117 # Note: This is likely not necessary on Python 3.
2118 ifh.seek(0, os.SEEK_END)
2118 ifh.seek(0, os.SEEK_END)
2119 if dfh:
2119 if dfh:
2120 dfh.seek(0, os.SEEK_END)
2120 dfh.seek(0, os.SEEK_END)
2121
2121
2122 curr = len(self) - 1
2122 curr = len(self) - 1
2123 if not self._inline:
2123 if not self._inline:
2124 transaction.add(self.datafile, offset)
2124 transaction.add(self.datafile, offset)
2125 transaction.add(self.indexfile, curr * len(entry))
2125 transaction.add(self.indexfile, curr * len(entry))
2126 if data[0]:
2126 if data[0]:
2127 dfh.write(data[0])
2127 dfh.write(data[0])
2128 dfh.write(data[1])
2128 dfh.write(data[1])
2129 ifh.write(entry)
2129 ifh.write(entry)
2130 else:
2130 else:
2131 offset += curr * self._io.size
2131 offset += curr * self._io.size
2132 transaction.add(self.indexfile, offset, curr)
2132 transaction.add(self.indexfile, offset, curr)
2133 ifh.write(entry)
2133 ifh.write(entry)
2134 ifh.write(data[0])
2134 ifh.write(data[0])
2135 ifh.write(data[1])
2135 ifh.write(data[1])
2136 self.checkinlinesize(transaction, ifh)
2136 self.checkinlinesize(transaction, ifh)
2137
2137
2138 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2138 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2139 """
2139 """
2140 add a delta group
2140 add a delta group
2141
2141
2142 given a set of deltas, add them to the revision log. the
2142 given a set of deltas, add them to the revision log. the
2143 first delta is against its parent, which should be in our
2143 first delta is against its parent, which should be in our
2144 log, the rest are against the previous delta.
2144 log, the rest are against the previous delta.
2145
2145
2146 If ``addrevisioncb`` is defined, it will be called with arguments of
2146 If ``addrevisioncb`` is defined, it will be called with arguments of
2147 this revlog and the node that was added.
2147 this revlog and the node that was added.
2148 """
2148 """
2149
2149
2150 nodes = []
2150 nodes = []
2151
2151
2152 r = len(self)
2152 r = len(self)
2153 end = 0
2153 end = 0
2154 if r:
2154 if r:
2155 end = self.end(r - 1)
2155 end = self.end(r - 1)
2156 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2156 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2157 isize = r * self._io.size
2157 isize = r * self._io.size
2158 if self._inline:
2158 if self._inline:
2159 transaction.add(self.indexfile, end + isize, r)
2159 transaction.add(self.indexfile, end + isize, r)
2160 dfh = None
2160 dfh = None
2161 else:
2161 else:
2162 transaction.add(self.indexfile, isize, r)
2162 transaction.add(self.indexfile, isize, r)
2163 transaction.add(self.datafile, end)
2163 transaction.add(self.datafile, end)
2164 dfh = self.opener(self.datafile, "a+")
2164 dfh = self.opener(self.datafile, "a+")
2165 def flush():
2165 def flush():
2166 if dfh:
2166 if dfh:
2167 dfh.flush()
2167 dfh.flush()
2168 ifh.flush()
2168 ifh.flush()
2169 try:
2169 try:
2170 deltacomputer = _deltacomputer(self)
2170 deltacomputer = _deltacomputer(self)
2171 # loop through our set of deltas
2171 # loop through our set of deltas
2172 for data in deltas:
2172 for data in deltas:
2173 node, p1, p2, linknode, deltabase, delta, flags = data
2173 node, p1, p2, linknode, deltabase, delta, flags = data
2174 link = linkmapper(linknode)
2174 link = linkmapper(linknode)
2175 flags = flags or REVIDX_DEFAULT_FLAGS
2175 flags = flags or REVIDX_DEFAULT_FLAGS
2176
2176
2177 nodes.append(node)
2177 nodes.append(node)
2178
2178
2179 if node in self.nodemap:
2179 if node in self.nodemap:
2180 # this can happen if two branches make the same change
2180 # this can happen if two branches make the same change
2181 continue
2181 continue
2182
2182
2183 for p in (p1, p2):
2183 for p in (p1, p2):
2184 if p not in self.nodemap:
2184 if p not in self.nodemap:
2185 raise LookupError(p, self.indexfile,
2185 raise LookupError(p, self.indexfile,
2186 _('unknown parent'))
2186 _('unknown parent'))
2187
2187
2188 if deltabase not in self.nodemap:
2188 if deltabase not in self.nodemap:
2189 raise LookupError(deltabase, self.indexfile,
2189 raise LookupError(deltabase, self.indexfile,
2190 _('unknown delta base'))
2190 _('unknown delta base'))
2191
2191
2192 baserev = self.rev(deltabase)
2192 baserev = self.rev(deltabase)
2193
2193
2194 if baserev != nullrev and self.iscensored(baserev):
2194 if baserev != nullrev and self.iscensored(baserev):
2195 # if base is censored, delta must be full replacement in a
2195 # if base is censored, delta must be full replacement in a
2196 # single patch operation
2196 # single patch operation
2197 hlen = struct.calcsize(">lll")
2197 hlen = struct.calcsize(">lll")
2198 oldlen = self.rawsize(baserev)
2198 oldlen = self.rawsize(baserev)
2199 newlen = len(delta) - hlen
2199 newlen = len(delta) - hlen
2200 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2200 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2201 raise error.CensoredBaseError(self.indexfile,
2201 raise error.CensoredBaseError(self.indexfile,
2202 self.node(baserev))
2202 self.node(baserev))
2203
2203
2204 if not flags and self._peek_iscensored(baserev, delta, flush):
2204 if not flags and self._peek_iscensored(baserev, delta, flush):
2205 flags |= REVIDX_ISCENSORED
2205 flags |= REVIDX_ISCENSORED
2206
2206
2207 # We assume consumers of addrevisioncb will want to retrieve
2207 # We assume consumers of addrevisioncb will want to retrieve
2208 # the added revision, which will require a call to
2208 # the added revision, which will require a call to
2209 # revision(). revision() will fast path if there is a cache
2209 # revision(). revision() will fast path if there is a cache
2210 # hit. So, we tell _addrevision() to always cache in this case.
2210 # hit. So, we tell _addrevision() to always cache in this case.
2211 # We're only using addgroup() in the context of changegroup
2211 # We're only using addgroup() in the context of changegroup
2212 # generation so the revision data can always be handled as raw
2212 # generation so the revision data can always be handled as raw
2213 # by the flagprocessor.
2213 # by the flagprocessor.
2214 self._addrevision(node, None, transaction, link,
2214 self._addrevision(node, None, transaction, link,
2215 p1, p2, flags, (baserev, delta),
2215 p1, p2, flags, (baserev, delta),
2216 ifh, dfh,
2216 ifh, dfh,
2217 alwayscache=bool(addrevisioncb),
2217 alwayscache=bool(addrevisioncb),
2218 deltacomputer=deltacomputer)
2218 deltacomputer=deltacomputer)
2219
2219
2220 if addrevisioncb:
2220 if addrevisioncb:
2221 addrevisioncb(self, node)
2221 addrevisioncb(self, node)
2222
2222
2223 if not dfh and not self._inline:
2223 if not dfh and not self._inline:
2224 # addrevision switched from inline to conventional
2224 # addrevision switched from inline to conventional
2225 # reopen the index
2225 # reopen the index
2226 ifh.close()
2226 ifh.close()
2227 dfh = self.opener(self.datafile, "a+")
2227 dfh = self.opener(self.datafile, "a+")
2228 ifh = self.opener(self.indexfile, "a+",
2228 ifh = self.opener(self.indexfile, "a+",
2229 checkambig=self._checkambig)
2229 checkambig=self._checkambig)
2230 finally:
2230 finally:
2231 if dfh:
2231 if dfh:
2232 dfh.close()
2232 dfh.close()
2233 ifh.close()
2233 ifh.close()
2234
2234
2235 return nodes
2235 return nodes
2236
2236
2237 def iscensored(self, rev):
2237 def iscensored(self, rev):
2238 """Check if a file revision is censored."""
2238 """Check if a file revision is censored."""
2239 return False
2239 return False
2240
2240
2241 def _peek_iscensored(self, baserev, delta, flush):
2241 def _peek_iscensored(self, baserev, delta, flush):
2242 """Quickly check if a delta produces a censored revision."""
2242 """Quickly check if a delta produces a censored revision."""
2243 return False
2243 return False
2244
2244
2245 def getstrippoint(self, minlink):
2245 def getstrippoint(self, minlink):
2246 """find the minimum rev that must be stripped to strip the linkrev
2246 """find the minimum rev that must be stripped to strip the linkrev
2247
2247
2248 Returns a tuple containing the minimum rev and a set of all revs that
2248 Returns a tuple containing the minimum rev and a set of all revs that
2249 have linkrevs that will be broken by this strip.
2249 have linkrevs that will be broken by this strip.
2250 """
2250 """
2251 brokenrevs = set()
2251 brokenrevs = set()
2252 strippoint = len(self)
2252 strippoint = len(self)
2253
2253
2254 heads = {}
2254 heads = {}
2255 futurelargelinkrevs = set()
2255 futurelargelinkrevs = set()
2256 for head in self.headrevs():
2256 for head in self.headrevs():
2257 headlinkrev = self.linkrev(head)
2257 headlinkrev = self.linkrev(head)
2258 heads[head] = headlinkrev
2258 heads[head] = headlinkrev
2259 if headlinkrev >= minlink:
2259 if headlinkrev >= minlink:
2260 futurelargelinkrevs.add(headlinkrev)
2260 futurelargelinkrevs.add(headlinkrev)
2261
2261
2262 # This algorithm involves walking down the rev graph, starting at the
2262 # This algorithm involves walking down the rev graph, starting at the
2263 # heads. Since the revs are topologically sorted according to linkrev,
2263 # heads. Since the revs are topologically sorted according to linkrev,
2264 # once all head linkrevs are below the minlink, we know there are
2264 # once all head linkrevs are below the minlink, we know there are
2265 # no more revs that could have a linkrev greater than minlink.
2265 # no more revs that could have a linkrev greater than minlink.
2266 # So we can stop walking.
2266 # So we can stop walking.
2267 while futurelargelinkrevs:
2267 while futurelargelinkrevs:
2268 strippoint -= 1
2268 strippoint -= 1
2269 linkrev = heads.pop(strippoint)
2269 linkrev = heads.pop(strippoint)
2270
2270
2271 if linkrev < minlink:
2271 if linkrev < minlink:
2272 brokenrevs.add(strippoint)
2272 brokenrevs.add(strippoint)
2273 else:
2273 else:
2274 futurelargelinkrevs.remove(linkrev)
2274 futurelargelinkrevs.remove(linkrev)
2275
2275
2276 for p in self.parentrevs(strippoint):
2276 for p in self.parentrevs(strippoint):
2277 if p != nullrev:
2277 if p != nullrev:
2278 plinkrev = self.linkrev(p)
2278 plinkrev = self.linkrev(p)
2279 heads[p] = plinkrev
2279 heads[p] = plinkrev
2280 if plinkrev >= minlink:
2280 if plinkrev >= minlink:
2281 futurelargelinkrevs.add(plinkrev)
2281 futurelargelinkrevs.add(plinkrev)
2282
2282
2283 return strippoint, brokenrevs
2283 return strippoint, brokenrevs
2284
2284
2285 def strip(self, minlink, transaction):
2285 def strip(self, minlink, transaction):
2286 """truncate the revlog on the first revision with a linkrev >= minlink
2286 """truncate the revlog on the first revision with a linkrev >= minlink
2287
2287
2288 This function is called when we're stripping revision minlink and
2288 This function is called when we're stripping revision minlink and
2289 its descendants from the repository.
2289 its descendants from the repository.
2290
2290
2291 We have to remove all revisions with linkrev >= minlink, because
2291 We have to remove all revisions with linkrev >= minlink, because
2292 the equivalent changelog revisions will be renumbered after the
2292 the equivalent changelog revisions will be renumbered after the
2293 strip.
2293 strip.
2294
2294
2295 So we truncate the revlog on the first of these revisions, and
2295 So we truncate the revlog on the first of these revisions, and
2296 trust that the caller has saved the revisions that shouldn't be
2296 trust that the caller has saved the revisions that shouldn't be
2297 removed and that it'll re-add them after this truncation.
2297 removed and that it'll re-add them after this truncation.
2298 """
2298 """
2299 if len(self) == 0:
2299 if len(self) == 0:
2300 return
2300 return
2301
2301
2302 rev, _ = self.getstrippoint(minlink)
2302 rev, _ = self.getstrippoint(minlink)
2303 if rev == len(self):
2303 if rev == len(self):
2304 return
2304 return
2305
2305
2306 # first truncate the files on disk
2306 # first truncate the files on disk
2307 end = self.start(rev)
2307 end = self.start(rev)
2308 if not self._inline:
2308 if not self._inline:
2309 transaction.add(self.datafile, end)
2309 transaction.add(self.datafile, end)
2310 end = rev * self._io.size
2310 end = rev * self._io.size
2311 else:
2311 else:
2312 end += rev * self._io.size
2312 end += rev * self._io.size
2313
2313
2314 transaction.add(self.indexfile, end)
2314 transaction.add(self.indexfile, end)
2315
2315
2316 # then reset internal state in memory to forget those revisions
2316 # then reset internal state in memory to forget those revisions
2317 self._cache = None
2317 self._cache = None
2318 self._chaininfocache = {}
2318 self._chaininfocache = {}
2319 self._chunkclear()
2319 self._chunkclear()
2320 for x in xrange(rev, len(self)):
2320 for x in xrange(rev, len(self)):
2321 del self.nodemap[self.node(x)]
2321 del self.nodemap[self.node(x)]
2322
2322
2323 del self.index[rev:-1]
2323 del self.index[rev:-1]
2324
2324
2325 def checksize(self):
2325 def checksize(self):
2326 expected = 0
2326 expected = 0
2327 if len(self):
2327 if len(self):
2328 expected = max(0, self.end(len(self) - 1))
2328 expected = max(0, self.end(len(self) - 1))
2329
2329
2330 try:
2330 try:
2331 f = self.opener(self.datafile)
2331 f = self.opener(self.datafile)
2332 f.seek(0, 2)
2332 f.seek(0, 2)
2333 actual = f.tell()
2333 actual = f.tell()
2334 f.close()
2334 f.close()
2335 dd = actual - expected
2335 dd = actual - expected
2336 except IOError as inst:
2336 except IOError as inst:
2337 if inst.errno != errno.ENOENT:
2337 if inst.errno != errno.ENOENT:
2338 raise
2338 raise
2339 dd = 0
2339 dd = 0
2340
2340
2341 try:
2341 try:
2342 f = self.opener(self.indexfile)
2342 f = self.opener(self.indexfile)
2343 f.seek(0, 2)
2343 f.seek(0, 2)
2344 actual = f.tell()
2344 actual = f.tell()
2345 f.close()
2345 f.close()
2346 s = self._io.size
2346 s = self._io.size
2347 i = max(0, actual // s)
2347 i = max(0, actual // s)
2348 di = actual - (i * s)
2348 di = actual - (i * s)
2349 if self._inline:
2349 if self._inline:
2350 databytes = 0
2350 databytes = 0
2351 for r in self:
2351 for r in self:
2352 databytes += max(0, self.length(r))
2352 databytes += max(0, self.length(r))
2353 dd = 0
2353 dd = 0
2354 di = actual - len(self) * s - databytes
2354 di = actual - len(self) * s - databytes
2355 except IOError as inst:
2355 except IOError as inst:
2356 if inst.errno != errno.ENOENT:
2356 if inst.errno != errno.ENOENT:
2357 raise
2357 raise
2358 di = 0
2358 di = 0
2359
2359
2360 return (dd, di)
2360 return (dd, di)
2361
2361
2362 def files(self):
2362 def files(self):
2363 res = [self.indexfile]
2363 res = [self.indexfile]
2364 if not self._inline:
2364 if not self._inline:
2365 res.append(self.datafile)
2365 res.append(self.datafile)
2366 return res
2366 return res
2367
2367
2368 DELTAREUSEALWAYS = 'always'
2368 DELTAREUSEALWAYS = 'always'
2369 DELTAREUSESAMEREVS = 'samerevs'
2369 DELTAREUSESAMEREVS = 'samerevs'
2370 DELTAREUSENEVER = 'never'
2370 DELTAREUSENEVER = 'never'
2371
2371
2372 DELTAREUSEFULLADD = 'fulladd'
2372 DELTAREUSEFULLADD = 'fulladd'
2373
2373
2374 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2374 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2375
2375
2376 def clone(self, tr, destrevlog, addrevisioncb=None,
2376 def clone(self, tr, destrevlog, addrevisioncb=None,
2377 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2377 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2378 """Copy this revlog to another, possibly with format changes.
2378 """Copy this revlog to another, possibly with format changes.
2379
2379
2380 The destination revlog will contain the same revisions and nodes.
2380 The destination revlog will contain the same revisions and nodes.
2381 However, it may not be bit-for-bit identical due to e.g. delta encoding
2381 However, it may not be bit-for-bit identical due to e.g. delta encoding
2382 differences.
2382 differences.
2383
2383
2384 The ``deltareuse`` argument control how deltas from the existing revlog
2384 The ``deltareuse`` argument control how deltas from the existing revlog
2385 are preserved in the destination revlog. The argument can have the
2385 are preserved in the destination revlog. The argument can have the
2386 following values:
2386 following values:
2387
2387
2388 DELTAREUSEALWAYS
2388 DELTAREUSEALWAYS
2389 Deltas will always be reused (if possible), even if the destination
2389 Deltas will always be reused (if possible), even if the destination
2390 revlog would not select the same revisions for the delta. This is the
2390 revlog would not select the same revisions for the delta. This is the
2391 fastest mode of operation.
2391 fastest mode of operation.
2392 DELTAREUSESAMEREVS
2392 DELTAREUSESAMEREVS
2393 Deltas will be reused if the destination revlog would pick the same
2393 Deltas will be reused if the destination revlog would pick the same
2394 revisions for the delta. This mode strikes a balance between speed
2394 revisions for the delta. This mode strikes a balance between speed
2395 and optimization.
2395 and optimization.
2396 DELTAREUSENEVER
2396 DELTAREUSENEVER
2397 Deltas will never be reused. This is the slowest mode of execution.
2397 Deltas will never be reused. This is the slowest mode of execution.
2398 This mode can be used to recompute deltas (e.g. if the diff/delta
2398 This mode can be used to recompute deltas (e.g. if the diff/delta
2399 algorithm changes).
2399 algorithm changes).
2400
2400
2401 Delta computation can be slow, so the choice of delta reuse policy can
2401 Delta computation can be slow, so the choice of delta reuse policy can
2402 significantly affect run time.
2402 significantly affect run time.
2403
2403
2404 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2404 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2405 two extremes. Deltas will be reused if they are appropriate. But if the
2405 two extremes. Deltas will be reused if they are appropriate. But if the
2406 delta could choose a better revision, it will do so. This means if you
2406 delta could choose a better revision, it will do so. This means if you
2407 are converting a non-generaldelta revlog to a generaldelta revlog,
2407 are converting a non-generaldelta revlog to a generaldelta revlog,
2408 deltas will be recomputed if the delta's parent isn't a parent of the
2408 deltas will be recomputed if the delta's parent isn't a parent of the
2409 revision.
2409 revision.
2410
2410
2411 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2411 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2412 controls whether to compute deltas against both parents for merges.
2412 controls whether to compute deltas against both parents for merges.
2413 By default, the current default is used.
2413 By default, the current default is used.
2414 """
2414 """
2415 if deltareuse not in self.DELTAREUSEALL:
2415 if deltareuse not in self.DELTAREUSEALL:
2416 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2416 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2417
2417
2418 if len(destrevlog):
2418 if len(destrevlog):
2419 raise ValueError(_('destination revlog is not empty'))
2419 raise ValueError(_('destination revlog is not empty'))
2420
2420
2421 if getattr(self, 'filteredrevs', None):
2421 if getattr(self, 'filteredrevs', None):
2422 raise ValueError(_('source revlog has filtered revisions'))
2422 raise ValueError(_('source revlog has filtered revisions'))
2423 if getattr(destrevlog, 'filteredrevs', None):
2423 if getattr(destrevlog, 'filteredrevs', None):
2424 raise ValueError(_('destination revlog has filtered revisions'))
2424 raise ValueError(_('destination revlog has filtered revisions'))
2425
2425
2426 # lazydeltabase controls whether to reuse a cached delta, if possible.
2426 # lazydeltabase controls whether to reuse a cached delta, if possible.
2427 oldlazydeltabase = destrevlog._lazydeltabase
2427 oldlazydeltabase = destrevlog._lazydeltabase
2428 oldamd = destrevlog._aggressivemergedeltas
2428 oldamd = destrevlog._aggressivemergedeltas
2429
2429
2430 try:
2430 try:
2431 if deltareuse == self.DELTAREUSEALWAYS:
2431 if deltareuse == self.DELTAREUSEALWAYS:
2432 destrevlog._lazydeltabase = True
2432 destrevlog._lazydeltabase = True
2433 elif deltareuse == self.DELTAREUSESAMEREVS:
2433 elif deltareuse == self.DELTAREUSESAMEREVS:
2434 destrevlog._lazydeltabase = False
2434 destrevlog._lazydeltabase = False
2435
2435
2436 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2436 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2437
2437
2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2438 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2439 self.DELTAREUSESAMEREVS)
2439 self.DELTAREUSESAMEREVS)
2440
2440
2441 deltacomputer = _deltacomputer(destrevlog)
2441 deltacomputer = _deltacomputer(destrevlog)
2442 index = self.index
2442 index = self.index
2443 for rev in self:
2443 for rev in self:
2444 entry = index[rev]
2444 entry = index[rev]
2445
2445
2446 # Some classes override linkrev to take filtered revs into
2446 # Some classes override linkrev to take filtered revs into
2447 # account. Use raw entry from index.
2447 # account. Use raw entry from index.
2448 flags = entry[0] & 0xffff
2448 flags = entry[0] & 0xffff
2449 linkrev = entry[4]
2449 linkrev = entry[4]
2450 p1 = index[entry[5]][7]
2450 p1 = index[entry[5]][7]
2451 p2 = index[entry[6]][7]
2451 p2 = index[entry[6]][7]
2452 node = entry[7]
2452 node = entry[7]
2453
2453
2454 # (Possibly) reuse the delta from the revlog if allowed and
2454 # (Possibly) reuse the delta from the revlog if allowed and
2455 # the revlog chunk is a delta.
2455 # the revlog chunk is a delta.
2456 cachedelta = None
2456 cachedelta = None
2457 rawtext = None
2457 rawtext = None
2458 if populatecachedelta:
2458 if populatecachedelta:
2459 dp = self.deltaparent(rev)
2459 dp = self.deltaparent(rev)
2460 if dp != nullrev:
2460 if dp != nullrev:
2461 cachedelta = (dp, str(self._chunk(rev)))
2461 cachedelta = (dp, str(self._chunk(rev)))
2462
2462
2463 if not cachedelta:
2463 if not cachedelta:
2464 rawtext = self.revision(rev, raw=True)
2464 rawtext = self.revision(rev, raw=True)
2465
2465
2466
2466
2467 if deltareuse == self.DELTAREUSEFULLADD:
2467 if deltareuse == self.DELTAREUSEFULLADD:
2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2468 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2469 cachedelta=cachedelta,
2469 cachedelta=cachedelta,
2470 node=node, flags=flags,
2470 node=node, flags=flags,
2471 deltacomputer=deltacomputer)
2471 deltacomputer=deltacomputer)
2472 else:
2472 else:
2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2473 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2474 checkambig=False)
2474 checkambig=False)
2475 dfh = None
2475 dfh = None
2476 if not destrevlog._inline:
2476 if not destrevlog._inline:
2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2477 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2478 try:
2478 try:
2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2479 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2480 p2, flags, cachedelta, ifh, dfh,
2480 p2, flags, cachedelta, ifh, dfh,
2481 deltacomputer=deltacomputer)
2481 deltacomputer=deltacomputer)
2482 finally:
2482 finally:
2483 if dfh:
2483 if dfh:
2484 dfh.close()
2484 dfh.close()
2485 ifh.close()
2485 ifh.close()
2486
2486
2487 if addrevisioncb:
2487 if addrevisioncb:
2488 addrevisioncb(self, rev, node)
2488 addrevisioncb(self, rev, node)
2489 finally:
2489 finally:
2490 destrevlog._lazydeltabase = oldlazydeltabase
2490 destrevlog._lazydeltabase = oldlazydeltabase
2491 destrevlog._aggressivemergedeltas = oldamd
2491 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now