##// END OF EJS Templates
revlog: use context manager for index file life time in __init__...
Boris Feld -
r35987:1f2b8a64 default
parent child Browse files
Show More
@@ -1,2502 +1,2501
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
80
81 # max size of revlog with inline data
81 # max size of revlog with inline data
82 _maxinline = 131072
82 _maxinline = 131072
83 _chunksize = 1048576
83 _chunksize = 1048576
84
84
85 RevlogError = error.RevlogError
85 RevlogError = error.RevlogError
86 LookupError = error.LookupError
86 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
87 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
88 ProgrammingError = error.ProgrammingError
89
89
90 # Store flag processors (cf. 'addflagprocessor()' to register)
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
91 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
92 REVIDX_ISCENSORED: None,
93 }
93 }
94
94
95 def addflagprocessor(flag, processor):
95 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
96 """Register a flag processor on a revision data flag.
97
97
98 Invariant:
98 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
100 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
102 following signatures:
103 - (read) f(self, rawtext) -> text, bool
103 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
105 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
107 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
108 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
109 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
110 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
111 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
112 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
113 different booleans. And "raw" returns a same boolean as "write".
114
114
115 Note: The 'raw' transform is used for changegroup generation and in some
115 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
116 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
117 contents can be used for hash integrity checks.
118 """
118 """
119 if not flag & REVIDX_KNOWN_FLAGS:
119 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
122 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
125 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
127 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
128 _flagprocessors[flag] = processor
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 _nullhash = hashlib.sha1(nullid)
141 _nullhash = hashlib.sha1(nullid)
142
142
143 def hash(text, p1, p2):
143 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
144 """generate a hash from the given text and its parent hashes
145
145
146 This hash combines both the current file contents and its history
146 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
147 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
148 content in the revision graph.
149 """
149 """
150 # As of now, if one of the parent node is null, p2 is null
150 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
151 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
152 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
153 s = _nullhash.copy()
154 s.update(p1)
154 s.update(p1)
155 else:
155 else:
156 # none of the parent nodes are nullid
156 # none of the parent nodes are nullid
157 if p1 < p2:
157 if p1 < p2:
158 a = p1
158 a = p1
159 b = p2
159 b = p2
160 else:
160 else:
161 a = p2
161 a = p2
162 b = p1
162 b = p1
163 s = hashlib.sha1(a)
163 s = hashlib.sha1(a)
164 s.update(b)
164 s.update(b)
165 s.update(text)
165 s.update(text)
166 return s.digest()
166 return s.digest()
167
167
168 def _trimchunk(revlog, revs, startidx, endidx=None):
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
169 """returns revs[startidx:endidx] without empty trailing revs
170 """
170 """
171 length = revlog.length
171 length = revlog.length
172
172
173 if endidx is None:
173 if endidx is None:
174 endidx = len(revs)
174 endidx = len(revs)
175
175
176 # Trim empty revs at the end, but never the very first revision of a chain
176 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
178 endidx -= 1
179
179
180 return revs[startidx:endidx]
180 return revs[startidx:endidx]
181
181
182 def _slicechunk(revlog, revs):
182 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184
184
185 ``revs`` is sliced into groups that should be read in one time.
185 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
186 Assume that revs are sorted.
187 """
187 """
188 start = revlog.start
188 start = revlog.start
189 length = revlog.length
189 length = revlog.length
190
190
191 if len(revs) <= 1:
191 if len(revs) <= 1:
192 yield revs
192 yield revs
193 return
193 return
194
194
195 startbyte = start(revs[0])
195 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
197 readdata = deltachainspan = endbyte - startbyte
198
198
199 chainpayload = sum(length(r) for r in revs)
199 chainpayload = sum(length(r) for r in revs)
200
200
201 if deltachainspan:
201 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
202 density = chainpayload / float(deltachainspan)
203 else:
203 else:
204 density = 1.0
204 density = 1.0
205
205
206 # Store the gaps in a heap to have them sorted by decreasing size
206 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
207 gapsheap = []
208 heapq.heapify(gapsheap)
208 heapq.heapify(gapsheap)
209 prevend = None
209 prevend = None
210 for i, rev in enumerate(revs):
210 for i, rev in enumerate(revs):
211 revstart = start(rev)
211 revstart = start(rev)
212 revlen = length(rev)
212 revlen = length(rev)
213
213
214 # Skip empty revisions to form larger holes
214 # Skip empty revisions to form larger holes
215 if revlen == 0:
215 if revlen == 0:
216 continue
216 continue
217
217
218 if prevend is not None:
218 if prevend is not None:
219 gapsize = revstart - prevend
219 gapsize = revstart - prevend
220 # only consider holes that are large enough
220 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
221 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
222 heapq.heappush(gapsheap, (-gapsize, i))
223
223
224 prevend = revstart + revlen
224 prevend = revstart + revlen
225
225
226 # Collect the indices of the largest holes until the density is acceptable
226 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
227 indicesheap = []
228 heapq.heapify(indicesheap)
228 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
229 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
231
232 heapq.heappush(indicesheap, gapidx)
232 heapq.heappush(indicesheap, gapidx)
233
233
234 # the gap sizes are stored as negatives to be sorted decreasingly
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
235 # by the heap
236 readdata -= (-oppgapsize)
236 readdata -= (-oppgapsize)
237 if readdata > 0:
237 if readdata > 0:
238 density = chainpayload / float(readdata)
238 density = chainpayload / float(readdata)
239 else:
239 else:
240 density = 1.0
240 density = 1.0
241
241
242 # Cut the revs at collected indices
242 # Cut the revs at collected indices
243 previdx = 0
243 previdx = 0
244 while indicesheap:
244 while indicesheap:
245 idx = heapq.heappop(indicesheap)
245 idx = heapq.heappop(indicesheap)
246
246
247 chunk = _trimchunk(revlog, revs, previdx, idx)
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
248 if chunk:
249 yield chunk
249 yield chunk
250
250
251 previdx = idx
251 previdx = idx
252
252
253 chunk = _trimchunk(revlog, revs, previdx)
253 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
254 if chunk:
255 yield chunk
255 yield chunk
256
256
257 @attr.s(slots=True, frozen=True)
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
258 class _deltainfo(object):
259 distance = attr.ib()
259 distance = attr.ib()
260 deltalen = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
265 compresseddeltalen = attr.ib()
266
266
267 class _deltacomputer(object):
267 class _deltacomputer(object):
268 def __init__(self, revlog):
268 def __init__(self, revlog):
269 self.revlog = revlog
269 self.revlog = revlog
270
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
272 """
273 Provides revisions that present an interest to be diffed against,
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
274 grouped by level of easiness.
275 """
275 """
276 revlog = self.revlog
276 revlog = self.revlog
277 curr = len(revlog)
277 curr = len(revlog)
278 prev = curr - 1
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
280
281 # should we try to build a delta?
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
283 tested = set()
284 # This condition is true most of the time when processing
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
292 tested.add(cachedelta[0])
293
293
294 if revlog._generaldelta:
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
301 parents = [max(parents)]
302 tested.update(parents)
302 tested.update(parents)
303 yield parents
303 yield parents
304
304
305 if prev not in tested:
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
307 # fulltext.
308 yield (prev,)
308 yield (prev,)
309
309
310 def buildtext(self, revinfo, fh):
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
311 """Builds a fulltext version of a revision
312
312
313 revinfo: _revisioninfo instance that contains all needed info
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
315 depending on whether it is inlined or not
316 """
316 """
317 btext = revinfo.btext
317 btext = revinfo.btext
318 if btext[0] is not None:
318 if btext[0] is not None:
319 return btext[0]
319 return btext[0]
320
320
321 revlog = self.revlog
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
323 flags = revinfo.flags
324 node = revinfo.node
324 node = revinfo.node
325
325
326 baserev = cachedelta[0]
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
334 btext[0] = delta[hlen:]
335 else:
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
337 btext[0] = mdiff.patch(basetext, delta)
338
338
339 try:
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
341 btext[0], validatehash = res
342 if validatehash:
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
348 if not flags & REVIDX_ISCENSORED:
349 raise
349 raise
350 return btext[0]
350 return btext[0]
351
351
352 def _builddeltadiff(self, base, revinfo, fh):
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
359 delta = header + t
360 else:
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
362 delta = mdiff.textdiff(ptext, t)
363
363
364 return delta
364 return delta
365
365
366 def _builddeltainfo(self, revinfo, base, fh):
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
369 delta = revinfo.cachedelta[1]
370 else:
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
378 if revlog._generaldelta:
379 deltabase = base
379 deltabase = base
380 else:
380 else:
381 deltabase = chainbase
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
383 chainlen += 1
384 compresseddeltalen += deltalen
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
386 chainbase, chainlen, compresseddeltalen)
387
387
388 def finddeltainfo(self, revinfo, fh):
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
389 """Find an acceptable delta against a candidate revision
390
390
391 revinfo: information about the revision (instance of _revisioninfo)
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
393 depending on whether it is inlined or not
394
394
395 Returns the first acceptable candidate revision, as ordered by
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
396 _getcandidaterevs
397 """
397 """
398 cachedelta = revinfo.cachedelta
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
399 p1 = revinfo.p1
400 p2 = revinfo.p2
400 p2 = revinfo.p2
401 revlog = self.revlog
401 revlog = self.revlog
402
402
403 deltainfo = None
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
412 break
413
413
414 return deltainfo
414 return deltainfo
415
415
416 @attr.s(slots=True, frozen=True)
416 @attr.s(slots=True, frozen=True)
417 class _revisioninfo(object):
417 class _revisioninfo(object):
418 """Information about a revision that allows building its fulltext
418 """Information about a revision that allows building its fulltext
419 node: expected hash of the revision
419 node: expected hash of the revision
420 p1, p2: parent revs of the revision
420 p1, p2: parent revs of the revision
421 btext: built text cache consisting of a one-element list
421 btext: built text cache consisting of a one-element list
422 cachedelta: (baserev, uncompressed_delta) or None
422 cachedelta: (baserev, uncompressed_delta) or None
423 flags: flags associated to the revision storage
423 flags: flags associated to the revision storage
424
424
425 One of btext[0] or cachedelta must be set.
425 One of btext[0] or cachedelta must be set.
426 """
426 """
427 node = attr.ib()
427 node = attr.ib()
428 p1 = attr.ib()
428 p1 = attr.ib()
429 p2 = attr.ib()
429 p2 = attr.ib()
430 btext = attr.ib()
430 btext = attr.ib()
431 textlen = attr.ib()
431 textlen = attr.ib()
432 cachedelta = attr.ib()
432 cachedelta = attr.ib()
433 flags = attr.ib()
433 flags = attr.ib()
434
434
435 # index v0:
435 # index v0:
436 # 4 bytes: offset
436 # 4 bytes: offset
437 # 4 bytes: compressed length
437 # 4 bytes: compressed length
438 # 4 bytes: base rev
438 # 4 bytes: base rev
439 # 4 bytes: link rev
439 # 4 bytes: link rev
440 # 20 bytes: parent 1 nodeid
440 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 2 nodeid
441 # 20 bytes: parent 2 nodeid
442 # 20 bytes: nodeid
442 # 20 bytes: nodeid
443 indexformatv0 = struct.Struct(">4l20s20s20s")
443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0_pack = indexformatv0.pack
444 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_unpack = indexformatv0.unpack
445 indexformatv0_unpack = indexformatv0.unpack
446
446
447 class revlogoldio(object):
447 class revlogoldio(object):
448 def __init__(self):
448 def __init__(self):
449 self.size = indexformatv0.size
449 self.size = indexformatv0.size
450
450
451 def parseindex(self, data, inline):
451 def parseindex(self, data, inline):
452 s = self.size
452 s = self.size
453 index = []
453 index = []
454 nodemap = {nullid: nullrev}
454 nodemap = {nullid: nullrev}
455 n = off = 0
455 n = off = 0
456 l = len(data)
456 l = len(data)
457 while off + s <= l:
457 while off + s <= l:
458 cur = data[off:off + s]
458 cur = data[off:off + s]
459 off += s
459 off += s
460 e = indexformatv0_unpack(cur)
460 e = indexformatv0_unpack(cur)
461 # transform to revlogv1 format
461 # transform to revlogv1 format
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 index.append(e2)
464 index.append(e2)
465 nodemap[e[6]] = n
465 nodemap[e[6]] = n
466 n += 1
466 n += 1
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 return index, nodemap, None
471 return index, nodemap, None
472
472
473 def packentry(self, entry, node, version, rev):
473 def packentry(self, entry, node, version, rev):
474 if gettype(entry[0]):
474 if gettype(entry[0]):
475 raise RevlogError(_('index entry flags need revlog version 1'))
475 raise RevlogError(_('index entry flags need revlog version 1'))
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 node(entry[5]), node(entry[6]), entry[7])
477 node(entry[5]), node(entry[6]), entry[7])
478 return indexformatv0_pack(*e2)
478 return indexformatv0_pack(*e2)
479
479
480 # index ng:
480 # index ng:
481 # 6 bytes: offset
481 # 6 bytes: offset
482 # 2 bytes: flags
482 # 2 bytes: flags
483 # 4 bytes: compressed length
483 # 4 bytes: compressed length
484 # 4 bytes: uncompressed length
484 # 4 bytes: uncompressed length
485 # 4 bytes: base rev
485 # 4 bytes: base rev
486 # 4 bytes: link rev
486 # 4 bytes: link rev
487 # 4 bytes: parent 1 rev
487 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 2 rev
488 # 4 bytes: parent 2 rev
489 # 32 bytes: nodeid
489 # 32 bytes: nodeid
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng_pack = indexformatng.pack
491 indexformatng_pack = indexformatng.pack
492 versionformat = struct.Struct(">I")
492 versionformat = struct.Struct(">I")
493 versionformat_pack = versionformat.pack
493 versionformat_pack = versionformat.pack
494 versionformat_unpack = versionformat.unpack
494 versionformat_unpack = versionformat.unpack
495
495
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # signed integer)
497 # signed integer)
498 _maxentrysize = 0x7fffffff
498 _maxentrysize = 0x7fffffff
499
499
500 class revlogio(object):
500 class revlogio(object):
501 def __init__(self):
501 def __init__(self):
502 self.size = indexformatng.size
502 self.size = indexformatng.size
503
503
504 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
505 # call the C implementation to parse the index data
505 # call the C implementation to parse the index data
506 index, cache = parsers.parse_index2(data, inline)
506 index, cache = parsers.parse_index2(data, inline)
507 return index, getattr(index, 'nodemap', None), cache
507 return index, getattr(index, 'nodemap', None), cache
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 p = indexformatng_pack(*entry)
510 p = indexformatng_pack(*entry)
511 if rev == 0:
511 if rev == 0:
512 p = versionformat_pack(version) + p[4:]
512 p = versionformat_pack(version) + p[4:]
513 return p
513 return p
514
514
515 class revlog(object):
515 class revlog(object):
516 """
516 """
517 the underlying revision storage object
517 the underlying revision storage object
518
518
519 A revlog consists of two parts, an index and the revision data.
519 A revlog consists of two parts, an index and the revision data.
520
520
521 The index is a file with a fixed record size containing
521 The index is a file with a fixed record size containing
522 information on each revision, including its nodeid (hash), the
522 information on each revision, including its nodeid (hash), the
523 nodeids of its parents, the position and offset of its data within
523 nodeids of its parents, the position and offset of its data within
524 the data file, and the revision it's based on. Finally, each entry
524 the data file, and the revision it's based on. Finally, each entry
525 contains a linkrev entry that can serve as a pointer to external
525 contains a linkrev entry that can serve as a pointer to external
526 data.
526 data.
527
527
528 The revision data itself is a linear collection of data chunks.
528 The revision data itself is a linear collection of data chunks.
529 Each chunk represents a revision and is usually represented as a
529 Each chunk represents a revision and is usually represented as a
530 delta against the previous chunk. To bound lookup time, runs of
530 delta against the previous chunk. To bound lookup time, runs of
531 deltas are limited to about 2 times the length of the original
531 deltas are limited to about 2 times the length of the original
532 version data. This makes retrieval of a version proportional to
532 version data. This makes retrieval of a version proportional to
533 its size, or O(1) relative to the number of revisions.
533 its size, or O(1) relative to the number of revisions.
534
534
535 Both pieces of the revlog are written to in an append-only
535 Both pieces of the revlog are written to in an append-only
536 fashion, which means we never need to rewrite a file to insert or
536 fashion, which means we never need to rewrite a file to insert or
537 remove data, and can use some simple techniques to avoid the need
537 remove data, and can use some simple techniques to avoid the need
538 for locking while reading.
538 for locking while reading.
539
539
540 If checkambig, indexfile is opened with checkambig=True at
540 If checkambig, indexfile is opened with checkambig=True at
541 writing, to avoid file stat ambiguity.
541 writing, to avoid file stat ambiguity.
542
542
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 index will be mmapped rather than read if it is larger than the
544 index will be mmapped rather than read if it is larger than the
545 configured threshold.
545 configured threshold.
546 """
546 """
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 mmaplargeindex=False):
548 mmaplargeindex=False):
549 """
549 """
550 create a revlog object
550 create a revlog object
551
551
552 opener is a function that abstracts the file opening operation
552 opener is a function that abstracts the file opening operation
553 and can be used to implement COW semantics or the like.
553 and can be used to implement COW semantics or the like.
554 """
554 """
555 self.indexfile = indexfile
555 self.indexfile = indexfile
556 self.datafile = datafile or (indexfile[:-2] + ".d")
556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.opener = opener
557 self.opener = opener
558 # When True, indexfile is opened with checkambig=True at writing, to
558 # When True, indexfile is opened with checkambig=True at writing, to
559 # avoid file stat ambiguity.
559 # avoid file stat ambiguity.
560 self._checkambig = checkambig
560 self._checkambig = checkambig
561 # 3-tuple of (node, rev, text) for a raw revision.
561 # 3-tuple of (node, rev, text) for a raw revision.
562 self._cache = None
562 self._cache = None
563 # Maps rev to chain base rev.
563 # Maps rev to chain base rev.
564 self._chainbasecache = util.lrucachedict(100)
564 self._chainbasecache = util.lrucachedict(100)
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 self._chunkcache = (0, '')
566 self._chunkcache = (0, '')
567 # How much data to read and cache into the raw revlog data cache.
567 # How much data to read and cache into the raw revlog data cache.
568 self._chunkcachesize = 65536
568 self._chunkcachesize = 65536
569 self._maxchainlen = None
569 self._maxchainlen = None
570 self._aggressivemergedeltas = False
570 self._aggressivemergedeltas = False
571 self.index = []
571 self.index = []
572 # Mapping of partial identifiers to full nodes.
572 # Mapping of partial identifiers to full nodes.
573 self._pcache = {}
573 self._pcache = {}
574 # Mapping of revision integer to full node.
574 # Mapping of revision integer to full node.
575 self._nodecache = {nullid: nullrev}
575 self._nodecache = {nullid: nullrev}
576 self._nodepos = None
576 self._nodepos = None
577 self._compengine = 'zlib'
577 self._compengine = 'zlib'
578 self._maxdeltachainspan = -1
578 self._maxdeltachainspan = -1
579 self._withsparseread = False
579 self._withsparseread = False
580 self._srdensitythreshold = 0.25
580 self._srdensitythreshold = 0.25
581 self._srmingapsize = 262144
581 self._srmingapsize = 262144
582
582
583 mmapindexthreshold = None
583 mmapindexthreshold = None
584 v = REVLOG_DEFAULT_VERSION
584 v = REVLOG_DEFAULT_VERSION
585 opts = getattr(opener, 'options', None)
585 opts = getattr(opener, 'options', None)
586 if opts is not None:
586 if opts is not None:
587 if 'revlogv2' in opts:
587 if 'revlogv2' in opts:
588 # version 2 revlogs always use generaldelta.
588 # version 2 revlogs always use generaldelta.
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 elif 'revlogv1' in opts:
590 elif 'revlogv1' in opts:
591 if 'generaldelta' in opts:
591 if 'generaldelta' in opts:
592 v |= FLAG_GENERALDELTA
592 v |= FLAG_GENERALDELTA
593 else:
593 else:
594 v = 0
594 v = 0
595 if 'chunkcachesize' in opts:
595 if 'chunkcachesize' in opts:
596 self._chunkcachesize = opts['chunkcachesize']
596 self._chunkcachesize = opts['chunkcachesize']
597 if 'maxchainlen' in opts:
597 if 'maxchainlen' in opts:
598 self._maxchainlen = opts['maxchainlen']
598 self._maxchainlen = opts['maxchainlen']
599 if 'aggressivemergedeltas' in opts:
599 if 'aggressivemergedeltas' in opts:
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 if 'compengine' in opts:
602 if 'compengine' in opts:
603 self._compengine = opts['compengine']
603 self._compengine = opts['compengine']
604 if 'maxdeltachainspan' in opts:
604 if 'maxdeltachainspan' in opts:
605 self._maxdeltachainspan = opts['maxdeltachainspan']
605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 mmapindexthreshold = opts['mmapindexthreshold']
607 mmapindexthreshold = opts['mmapindexthreshold']
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 if 'sparse-read-density-threshold' in opts:
609 if 'sparse-read-density-threshold' in opts:
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 if 'sparse-read-min-gap-size' in opts:
611 if 'sparse-read-min-gap-size' in opts:
612 self._srmingapsize = opts['sparse-read-min-gap-size']
612 self._srmingapsize = opts['sparse-read-min-gap-size']
613
613
614 if self._chunkcachesize <= 0:
614 if self._chunkcachesize <= 0:
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 'than 0') % self._chunkcachesize)
616 'than 0') % self._chunkcachesize)
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 'of 2') % self._chunkcachesize)
619 'of 2') % self._chunkcachesize)
620
620
621 indexdata = ''
621 indexdata = ''
622 self._initempty = True
622 self._initempty = True
623 try:
623 try:
624 f = self._indexfp()
624 with self._indexfp() as f:
625 if (mmapindexthreshold is not None and
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
627 indexdata = util.buffer(util.mmapread(f))
628 else:
628 else:
629 indexdata = f.read()
629 indexdata = f.read()
630 f.close()
631 if len(indexdata) > 0:
630 if len(indexdata) > 0:
632 v = versionformat_unpack(indexdata[:4])[0]
631 v = versionformat_unpack(indexdata[:4])[0]
633 self._initempty = False
632 self._initempty = False
634 except IOError as inst:
633 except IOError as inst:
635 if inst.errno != errno.ENOENT:
634 if inst.errno != errno.ENOENT:
636 raise
635 raise
637
636
638 self.version = v
637 self.version = v
639 self._inline = v & FLAG_INLINE_DATA
638 self._inline = v & FLAG_INLINE_DATA
640 self._generaldelta = v & FLAG_GENERALDELTA
639 self._generaldelta = v & FLAG_GENERALDELTA
641 flags = v & ~0xFFFF
640 flags = v & ~0xFFFF
642 fmt = v & 0xFFFF
641 fmt = v & 0xFFFF
643 if fmt == REVLOGV0:
642 if fmt == REVLOGV0:
644 if flags:
643 if flags:
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
644 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 'revlog %s') %
645 'revlog %s') %
647 (flags >> 16, fmt, self.indexfile))
646 (flags >> 16, fmt, self.indexfile))
648 elif fmt == REVLOGV1:
647 elif fmt == REVLOGV1:
649 if flags & ~REVLOGV1_FLAGS:
648 if flags & ~REVLOGV1_FLAGS:
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
649 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 'revlog %s') %
650 'revlog %s') %
652 (flags >> 16, fmt, self.indexfile))
651 (flags >> 16, fmt, self.indexfile))
653 elif fmt == REVLOGV2:
652 elif fmt == REVLOGV2:
654 if flags & ~REVLOGV2_FLAGS:
653 if flags & ~REVLOGV2_FLAGS:
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
654 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 'revlog %s') %
655 'revlog %s') %
657 (flags >> 16, fmt, self.indexfile))
656 (flags >> 16, fmt, self.indexfile))
658 else:
657 else:
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
658 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 (fmt, self.indexfile))
659 (fmt, self.indexfile))
661
660
662 self.storedeltachains = True
661 self.storedeltachains = True
663
662
664 self._io = revlogio()
663 self._io = revlogio()
665 if self.version == REVLOGV0:
664 if self.version == REVLOGV0:
666 self._io = revlogoldio()
665 self._io = revlogoldio()
667 try:
666 try:
668 d = self._io.parseindex(indexdata, self._inline)
667 d = self._io.parseindex(indexdata, self._inline)
669 except (ValueError, IndexError):
668 except (ValueError, IndexError):
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 self.index, nodemap, self._chunkcache = d
670 self.index, nodemap, self._chunkcache = d
672 if nodemap is not None:
671 if nodemap is not None:
673 self.nodemap = self._nodecache = nodemap
672 self.nodemap = self._nodecache = nodemap
674 if not self._chunkcache:
673 if not self._chunkcache:
675 self._chunkclear()
674 self._chunkclear()
676 # revnum -> (chain-length, sum-delta-length)
675 # revnum -> (chain-length, sum-delta-length)
677 self._chaininfocache = {}
676 self._chaininfocache = {}
678 # revlog header -> revlog compressor
677 # revlog header -> revlog compressor
679 self._decompressors = {}
678 self._decompressors = {}
680
679
681 @util.propertycache
680 @util.propertycache
682 def _compressor(self):
681 def _compressor(self):
683 return util.compengines[self._compengine].revlogcompressor()
682 return util.compengines[self._compengine].revlogcompressor()
684
683
685 def _indexfp(self, mode='r'):
684 def _indexfp(self, mode='r'):
686 """file object for the revlog's index file"""
685 """file object for the revlog's index file"""
687 args = {r'mode': mode}
686 args = {r'mode': mode}
688 if mode != 'r':
687 if mode != 'r':
689 args[r'checkambig'] = self._checkambig
688 args[r'checkambig'] = self._checkambig
690 if mode == 'w':
689 if mode == 'w':
691 args[r'atomictemp'] = True
690 args[r'atomictemp'] = True
692 return self.opener(self.indexfile, **args)
691 return self.opener(self.indexfile, **args)
693
692
694 def _datafp(self, mode='r'):
693 def _datafp(self, mode='r'):
695 """file object for the revlog's data file"""
694 """file object for the revlog's data file"""
696 return self.opener(self.datafile, mode=mode)
695 return self.opener(self.datafile, mode=mode)
697
696
698 def tip(self):
697 def tip(self):
699 return self.node(len(self.index) - 2)
698 return self.node(len(self.index) - 2)
700 def __contains__(self, rev):
699 def __contains__(self, rev):
701 return 0 <= rev < len(self)
700 return 0 <= rev < len(self)
702 def __len__(self):
701 def __len__(self):
703 return len(self.index) - 1
702 return len(self.index) - 1
704 def __iter__(self):
703 def __iter__(self):
705 return iter(xrange(len(self)))
704 return iter(xrange(len(self)))
706 def revs(self, start=0, stop=None):
705 def revs(self, start=0, stop=None):
707 """iterate over all rev in this revlog (from start to stop)"""
706 """iterate over all rev in this revlog (from start to stop)"""
708 step = 1
707 step = 1
709 if stop is not None:
708 if stop is not None:
710 if start > stop:
709 if start > stop:
711 step = -1
710 step = -1
712 stop += step
711 stop += step
713 else:
712 else:
714 stop = len(self)
713 stop = len(self)
715 return xrange(start, stop, step)
714 return xrange(start, stop, step)
716
715
717 @util.propertycache
716 @util.propertycache
718 def nodemap(self):
717 def nodemap(self):
719 self.rev(self.node(0))
718 self.rev(self.node(0))
720 return self._nodecache
719 return self._nodecache
721
720
722 def hasnode(self, node):
721 def hasnode(self, node):
723 try:
722 try:
724 self.rev(node)
723 self.rev(node)
725 return True
724 return True
726 except KeyError:
725 except KeyError:
727 return False
726 return False
728
727
729 def clearcaches(self):
728 def clearcaches(self):
730 self._cache = None
729 self._cache = None
731 self._chainbasecache.clear()
730 self._chainbasecache.clear()
732 self._chunkcache = (0, '')
731 self._chunkcache = (0, '')
733 self._pcache = {}
732 self._pcache = {}
734
733
735 try:
734 try:
736 self._nodecache.clearcaches()
735 self._nodecache.clearcaches()
737 except AttributeError:
736 except AttributeError:
738 self._nodecache = {nullid: nullrev}
737 self._nodecache = {nullid: nullrev}
739 self._nodepos = None
738 self._nodepos = None
740
739
741 def rev(self, node):
740 def rev(self, node):
742 try:
741 try:
743 return self._nodecache[node]
742 return self._nodecache[node]
744 except TypeError:
743 except TypeError:
745 raise
744 raise
746 except RevlogError:
745 except RevlogError:
747 # parsers.c radix tree lookup failed
746 # parsers.c radix tree lookup failed
748 if node == wdirid:
747 if node == wdirid:
749 raise error.WdirUnsupported
748 raise error.WdirUnsupported
750 raise LookupError(node, self.indexfile, _('no node'))
749 raise LookupError(node, self.indexfile, _('no node'))
751 except KeyError:
750 except KeyError:
752 # pure python cache lookup failed
751 # pure python cache lookup failed
753 n = self._nodecache
752 n = self._nodecache
754 i = self.index
753 i = self.index
755 p = self._nodepos
754 p = self._nodepos
756 if p is None:
755 if p is None:
757 p = len(i) - 2
756 p = len(i) - 2
758 for r in xrange(p, -1, -1):
757 for r in xrange(p, -1, -1):
759 v = i[r][7]
758 v = i[r][7]
760 n[v] = r
759 n[v] = r
761 if v == node:
760 if v == node:
762 self._nodepos = r - 1
761 self._nodepos = r - 1
763 return r
762 return r
764 if node == wdirid:
763 if node == wdirid:
765 raise error.WdirUnsupported
764 raise error.WdirUnsupported
766 raise LookupError(node, self.indexfile, _('no node'))
765 raise LookupError(node, self.indexfile, _('no node'))
767
766
768 # Accessors for index entries.
767 # Accessors for index entries.
769
768
770 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
769 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
771 # are flags.
770 # are flags.
772 def start(self, rev):
771 def start(self, rev):
773 return int(self.index[rev][0] >> 16)
772 return int(self.index[rev][0] >> 16)
774
773
775 def flags(self, rev):
774 def flags(self, rev):
776 return self.index[rev][0] & 0xFFFF
775 return self.index[rev][0] & 0xFFFF
777
776
778 def length(self, rev):
777 def length(self, rev):
779 return self.index[rev][1]
778 return self.index[rev][1]
780
779
781 def rawsize(self, rev):
780 def rawsize(self, rev):
782 """return the length of the uncompressed text for a given revision"""
781 """return the length of the uncompressed text for a given revision"""
783 l = self.index[rev][2]
782 l = self.index[rev][2]
784 if l >= 0:
783 if l >= 0:
785 return l
784 return l
786
785
787 t = self.revision(rev, raw=True)
786 t = self.revision(rev, raw=True)
788 return len(t)
787 return len(t)
789
788
790 def size(self, rev):
789 def size(self, rev):
791 """length of non-raw text (processed by a "read" flag processor)"""
790 """length of non-raw text (processed by a "read" flag processor)"""
792 # fast path: if no "read" flag processor could change the content,
791 # fast path: if no "read" flag processor could change the content,
793 # size is rawsize. note: ELLIPSIS is known to not change the content.
792 # size is rawsize. note: ELLIPSIS is known to not change the content.
794 flags = self.flags(rev)
793 flags = self.flags(rev)
795 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
794 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
796 return self.rawsize(rev)
795 return self.rawsize(rev)
797
796
798 return len(self.revision(rev, raw=False))
797 return len(self.revision(rev, raw=False))
799
798
800 def chainbase(self, rev):
799 def chainbase(self, rev):
801 base = self._chainbasecache.get(rev)
800 base = self._chainbasecache.get(rev)
802 if base is not None:
801 if base is not None:
803 return base
802 return base
804
803
805 index = self.index
804 index = self.index
806 base = index[rev][3]
805 base = index[rev][3]
807 while base != rev:
806 while base != rev:
808 rev = base
807 rev = base
809 base = index[rev][3]
808 base = index[rev][3]
810
809
811 self._chainbasecache[rev] = base
810 self._chainbasecache[rev] = base
812 return base
811 return base
813
812
814 def linkrev(self, rev):
813 def linkrev(self, rev):
815 return self.index[rev][4]
814 return self.index[rev][4]
816
815
817 def parentrevs(self, rev):
816 def parentrevs(self, rev):
818 try:
817 try:
819 entry = self.index[rev]
818 entry = self.index[rev]
820 except IndexError:
819 except IndexError:
821 if rev == wdirrev:
820 if rev == wdirrev:
822 raise error.WdirUnsupported
821 raise error.WdirUnsupported
823 raise
822 raise
824
823
825 return entry[5], entry[6]
824 return entry[5], entry[6]
826
825
827 def node(self, rev):
826 def node(self, rev):
828 try:
827 try:
829 return self.index[rev][7]
828 return self.index[rev][7]
830 except IndexError:
829 except IndexError:
831 if rev == wdirrev:
830 if rev == wdirrev:
832 raise error.WdirUnsupported
831 raise error.WdirUnsupported
833 raise
832 raise
834
833
835 # Derived from index values.
834 # Derived from index values.
836
835
837 def end(self, rev):
836 def end(self, rev):
838 return self.start(rev) + self.length(rev)
837 return self.start(rev) + self.length(rev)
839
838
840 def parents(self, node):
839 def parents(self, node):
841 i = self.index
840 i = self.index
842 d = i[self.rev(node)]
841 d = i[self.rev(node)]
843 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
842 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
844
843
845 def chainlen(self, rev):
844 def chainlen(self, rev):
846 return self._chaininfo(rev)[0]
845 return self._chaininfo(rev)[0]
847
846
848 def _chaininfo(self, rev):
847 def _chaininfo(self, rev):
849 chaininfocache = self._chaininfocache
848 chaininfocache = self._chaininfocache
850 if rev in chaininfocache:
849 if rev in chaininfocache:
851 return chaininfocache[rev]
850 return chaininfocache[rev]
852 index = self.index
851 index = self.index
853 generaldelta = self._generaldelta
852 generaldelta = self._generaldelta
854 iterrev = rev
853 iterrev = rev
855 e = index[iterrev]
854 e = index[iterrev]
856 clen = 0
855 clen = 0
857 compresseddeltalen = 0
856 compresseddeltalen = 0
858 while iterrev != e[3]:
857 while iterrev != e[3]:
859 clen += 1
858 clen += 1
860 compresseddeltalen += e[1]
859 compresseddeltalen += e[1]
861 if generaldelta:
860 if generaldelta:
862 iterrev = e[3]
861 iterrev = e[3]
863 else:
862 else:
864 iterrev -= 1
863 iterrev -= 1
865 if iterrev in chaininfocache:
864 if iterrev in chaininfocache:
866 t = chaininfocache[iterrev]
865 t = chaininfocache[iterrev]
867 clen += t[0]
866 clen += t[0]
868 compresseddeltalen += t[1]
867 compresseddeltalen += t[1]
869 break
868 break
870 e = index[iterrev]
869 e = index[iterrev]
871 else:
870 else:
872 # Add text length of base since decompressing that also takes
871 # Add text length of base since decompressing that also takes
873 # work. For cache hits the length is already included.
872 # work. For cache hits the length is already included.
874 compresseddeltalen += e[1]
873 compresseddeltalen += e[1]
875 r = (clen, compresseddeltalen)
874 r = (clen, compresseddeltalen)
876 chaininfocache[rev] = r
875 chaininfocache[rev] = r
877 return r
876 return r
878
877
879 def _deltachain(self, rev, stoprev=None):
878 def _deltachain(self, rev, stoprev=None):
880 """Obtain the delta chain for a revision.
879 """Obtain the delta chain for a revision.
881
880
882 ``stoprev`` specifies a revision to stop at. If not specified, we
881 ``stoprev`` specifies a revision to stop at. If not specified, we
883 stop at the base of the chain.
882 stop at the base of the chain.
884
883
885 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
884 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
886 revs in ascending order and ``stopped`` is a bool indicating whether
885 revs in ascending order and ``stopped`` is a bool indicating whether
887 ``stoprev`` was hit.
886 ``stoprev`` was hit.
888 """
887 """
889 # Try C implementation.
888 # Try C implementation.
890 try:
889 try:
891 return self.index.deltachain(rev, stoprev, self._generaldelta)
890 return self.index.deltachain(rev, stoprev, self._generaldelta)
892 except AttributeError:
891 except AttributeError:
893 pass
892 pass
894
893
895 chain = []
894 chain = []
896
895
897 # Alias to prevent attribute lookup in tight loop.
896 # Alias to prevent attribute lookup in tight loop.
898 index = self.index
897 index = self.index
899 generaldelta = self._generaldelta
898 generaldelta = self._generaldelta
900
899
901 iterrev = rev
900 iterrev = rev
902 e = index[iterrev]
901 e = index[iterrev]
903 while iterrev != e[3] and iterrev != stoprev:
902 while iterrev != e[3] and iterrev != stoprev:
904 chain.append(iterrev)
903 chain.append(iterrev)
905 if generaldelta:
904 if generaldelta:
906 iterrev = e[3]
905 iterrev = e[3]
907 else:
906 else:
908 iterrev -= 1
907 iterrev -= 1
909 e = index[iterrev]
908 e = index[iterrev]
910
909
911 if iterrev == stoprev:
910 if iterrev == stoprev:
912 stopped = True
911 stopped = True
913 else:
912 else:
914 chain.append(iterrev)
913 chain.append(iterrev)
915 stopped = False
914 stopped = False
916
915
917 chain.reverse()
916 chain.reverse()
918 return chain, stopped
917 return chain, stopped
919
918
920 def ancestors(self, revs, stoprev=0, inclusive=False):
919 def ancestors(self, revs, stoprev=0, inclusive=False):
921 """Generate the ancestors of 'revs' in reverse topological order.
920 """Generate the ancestors of 'revs' in reverse topological order.
922 Does not generate revs lower than stoprev.
921 Does not generate revs lower than stoprev.
923
922
924 See the documentation for ancestor.lazyancestors for more details."""
923 See the documentation for ancestor.lazyancestors for more details."""
925
924
926 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
925 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
927 inclusive=inclusive)
926 inclusive=inclusive)
928
927
929 def descendants(self, revs):
928 def descendants(self, revs):
930 """Generate the descendants of 'revs' in revision order.
929 """Generate the descendants of 'revs' in revision order.
931
930
932 Yield a sequence of revision numbers starting with a child of
931 Yield a sequence of revision numbers starting with a child of
933 some rev in revs, i.e., each revision is *not* considered a
932 some rev in revs, i.e., each revision is *not* considered a
934 descendant of itself. Results are ordered by revision number (a
933 descendant of itself. Results are ordered by revision number (a
935 topological sort)."""
934 topological sort)."""
936 first = min(revs)
935 first = min(revs)
937 if first == nullrev:
936 if first == nullrev:
938 for i in self:
937 for i in self:
939 yield i
938 yield i
940 return
939 return
941
940
942 seen = set(revs)
941 seen = set(revs)
943 for i in self.revs(start=first + 1):
942 for i in self.revs(start=first + 1):
944 for x in self.parentrevs(i):
943 for x in self.parentrevs(i):
945 if x != nullrev and x in seen:
944 if x != nullrev and x in seen:
946 seen.add(i)
945 seen.add(i)
947 yield i
946 yield i
948 break
947 break
949
948
950 def findcommonmissing(self, common=None, heads=None):
949 def findcommonmissing(self, common=None, heads=None):
951 """Return a tuple of the ancestors of common and the ancestors of heads
950 """Return a tuple of the ancestors of common and the ancestors of heads
952 that are not ancestors of common. In revset terminology, we return the
951 that are not ancestors of common. In revset terminology, we return the
953 tuple:
952 tuple:
954
953
955 ::common, (::heads) - (::common)
954 ::common, (::heads) - (::common)
956
955
957 The list is sorted by revision number, meaning it is
956 The list is sorted by revision number, meaning it is
958 topologically sorted.
957 topologically sorted.
959
958
960 'heads' and 'common' are both lists of node IDs. If heads is
959 'heads' and 'common' are both lists of node IDs. If heads is
961 not supplied, uses all of the revlog's heads. If common is not
960 not supplied, uses all of the revlog's heads. If common is not
962 supplied, uses nullid."""
961 supplied, uses nullid."""
963 if common is None:
962 if common is None:
964 common = [nullid]
963 common = [nullid]
965 if heads is None:
964 if heads is None:
966 heads = self.heads()
965 heads = self.heads()
967
966
968 common = [self.rev(n) for n in common]
967 common = [self.rev(n) for n in common]
969 heads = [self.rev(n) for n in heads]
968 heads = [self.rev(n) for n in heads]
970
969
971 # we want the ancestors, but inclusive
970 # we want the ancestors, but inclusive
972 class lazyset(object):
971 class lazyset(object):
973 def __init__(self, lazyvalues):
972 def __init__(self, lazyvalues):
974 self.addedvalues = set()
973 self.addedvalues = set()
975 self.lazyvalues = lazyvalues
974 self.lazyvalues = lazyvalues
976
975
977 def __contains__(self, value):
976 def __contains__(self, value):
978 return value in self.addedvalues or value in self.lazyvalues
977 return value in self.addedvalues or value in self.lazyvalues
979
978
980 def __iter__(self):
979 def __iter__(self):
981 added = self.addedvalues
980 added = self.addedvalues
982 for r in added:
981 for r in added:
983 yield r
982 yield r
984 for r in self.lazyvalues:
983 for r in self.lazyvalues:
985 if not r in added:
984 if not r in added:
986 yield r
985 yield r
987
986
988 def add(self, value):
987 def add(self, value):
989 self.addedvalues.add(value)
988 self.addedvalues.add(value)
990
989
991 def update(self, values):
990 def update(self, values):
992 self.addedvalues.update(values)
991 self.addedvalues.update(values)
993
992
994 has = lazyset(self.ancestors(common))
993 has = lazyset(self.ancestors(common))
995 has.add(nullrev)
994 has.add(nullrev)
996 has.update(common)
995 has.update(common)
997
996
998 # take all ancestors from heads that aren't in has
997 # take all ancestors from heads that aren't in has
999 missing = set()
998 missing = set()
1000 visit = collections.deque(r for r in heads if r not in has)
999 visit = collections.deque(r for r in heads if r not in has)
1001 while visit:
1000 while visit:
1002 r = visit.popleft()
1001 r = visit.popleft()
1003 if r in missing:
1002 if r in missing:
1004 continue
1003 continue
1005 else:
1004 else:
1006 missing.add(r)
1005 missing.add(r)
1007 for p in self.parentrevs(r):
1006 for p in self.parentrevs(r):
1008 if p not in has:
1007 if p not in has:
1009 visit.append(p)
1008 visit.append(p)
1010 missing = list(missing)
1009 missing = list(missing)
1011 missing.sort()
1010 missing.sort()
1012 return has, [self.node(miss) for miss in missing]
1011 return has, [self.node(miss) for miss in missing]
1013
1012
1014 def incrementalmissingrevs(self, common=None):
1013 def incrementalmissingrevs(self, common=None):
1015 """Return an object that can be used to incrementally compute the
1014 """Return an object that can be used to incrementally compute the
1016 revision numbers of the ancestors of arbitrary sets that are not
1015 revision numbers of the ancestors of arbitrary sets that are not
1017 ancestors of common. This is an ancestor.incrementalmissingancestors
1016 ancestors of common. This is an ancestor.incrementalmissingancestors
1018 object.
1017 object.
1019
1018
1020 'common' is a list of revision numbers. If common is not supplied, uses
1019 'common' is a list of revision numbers. If common is not supplied, uses
1021 nullrev.
1020 nullrev.
1022 """
1021 """
1023 if common is None:
1022 if common is None:
1024 common = [nullrev]
1023 common = [nullrev]
1025
1024
1026 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1025 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1027
1026
1028 def findmissingrevs(self, common=None, heads=None):
1027 def findmissingrevs(self, common=None, heads=None):
1029 """Return the revision numbers of the ancestors of heads that
1028 """Return the revision numbers of the ancestors of heads that
1030 are not ancestors of common.
1029 are not ancestors of common.
1031
1030
1032 More specifically, return a list of revision numbers corresponding to
1031 More specifically, return a list of revision numbers corresponding to
1033 nodes N such that every N satisfies the following constraints:
1032 nodes N such that every N satisfies the following constraints:
1034
1033
1035 1. N is an ancestor of some node in 'heads'
1034 1. N is an ancestor of some node in 'heads'
1036 2. N is not an ancestor of any node in 'common'
1035 2. N is not an ancestor of any node in 'common'
1037
1036
1038 The list is sorted by revision number, meaning it is
1037 The list is sorted by revision number, meaning it is
1039 topologically sorted.
1038 topologically sorted.
1040
1039
1041 'heads' and 'common' are both lists of revision numbers. If heads is
1040 'heads' and 'common' are both lists of revision numbers. If heads is
1042 not supplied, uses all of the revlog's heads. If common is not
1041 not supplied, uses all of the revlog's heads. If common is not
1043 supplied, uses nullid."""
1042 supplied, uses nullid."""
1044 if common is None:
1043 if common is None:
1045 common = [nullrev]
1044 common = [nullrev]
1046 if heads is None:
1045 if heads is None:
1047 heads = self.headrevs()
1046 heads = self.headrevs()
1048
1047
1049 inc = self.incrementalmissingrevs(common=common)
1048 inc = self.incrementalmissingrevs(common=common)
1050 return inc.missingancestors(heads)
1049 return inc.missingancestors(heads)
1051
1050
1052 def findmissing(self, common=None, heads=None):
1051 def findmissing(self, common=None, heads=None):
1053 """Return the ancestors of heads that are not ancestors of common.
1052 """Return the ancestors of heads that are not ancestors of common.
1054
1053
1055 More specifically, return a list of nodes N such that every N
1054 More specifically, return a list of nodes N such that every N
1056 satisfies the following constraints:
1055 satisfies the following constraints:
1057
1056
1058 1. N is an ancestor of some node in 'heads'
1057 1. N is an ancestor of some node in 'heads'
1059 2. N is not an ancestor of any node in 'common'
1058 2. N is not an ancestor of any node in 'common'
1060
1059
1061 The list is sorted by revision number, meaning it is
1060 The list is sorted by revision number, meaning it is
1062 topologically sorted.
1061 topologically sorted.
1063
1062
1064 'heads' and 'common' are both lists of node IDs. If heads is
1063 'heads' and 'common' are both lists of node IDs. If heads is
1065 not supplied, uses all of the revlog's heads. If common is not
1064 not supplied, uses all of the revlog's heads. If common is not
1066 supplied, uses nullid."""
1065 supplied, uses nullid."""
1067 if common is None:
1066 if common is None:
1068 common = [nullid]
1067 common = [nullid]
1069 if heads is None:
1068 if heads is None:
1070 heads = self.heads()
1069 heads = self.heads()
1071
1070
1072 common = [self.rev(n) for n in common]
1071 common = [self.rev(n) for n in common]
1073 heads = [self.rev(n) for n in heads]
1072 heads = [self.rev(n) for n in heads]
1074
1073
1075 inc = self.incrementalmissingrevs(common=common)
1074 inc = self.incrementalmissingrevs(common=common)
1076 return [self.node(r) for r in inc.missingancestors(heads)]
1075 return [self.node(r) for r in inc.missingancestors(heads)]
1077
1076
1078 def nodesbetween(self, roots=None, heads=None):
1077 def nodesbetween(self, roots=None, heads=None):
1079 """Return a topological path from 'roots' to 'heads'.
1078 """Return a topological path from 'roots' to 'heads'.
1080
1079
1081 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1080 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1082 topologically sorted list of all nodes N that satisfy both of
1081 topologically sorted list of all nodes N that satisfy both of
1083 these constraints:
1082 these constraints:
1084
1083
1085 1. N is a descendant of some node in 'roots'
1084 1. N is a descendant of some node in 'roots'
1086 2. N is an ancestor of some node in 'heads'
1085 2. N is an ancestor of some node in 'heads'
1087
1086
1088 Every node is considered to be both a descendant and an ancestor
1087 Every node is considered to be both a descendant and an ancestor
1089 of itself, so every reachable node in 'roots' and 'heads' will be
1088 of itself, so every reachable node in 'roots' and 'heads' will be
1090 included in 'nodes'.
1089 included in 'nodes'.
1091
1090
1092 'outroots' is the list of reachable nodes in 'roots', i.e., the
1091 'outroots' is the list of reachable nodes in 'roots', i.e., the
1093 subset of 'roots' that is returned in 'nodes'. Likewise,
1092 subset of 'roots' that is returned in 'nodes'. Likewise,
1094 'outheads' is the subset of 'heads' that is also in 'nodes'.
1093 'outheads' is the subset of 'heads' that is also in 'nodes'.
1095
1094
1096 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1095 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1097 unspecified, uses nullid as the only root. If 'heads' is
1096 unspecified, uses nullid as the only root. If 'heads' is
1098 unspecified, uses list of all of the revlog's heads."""
1097 unspecified, uses list of all of the revlog's heads."""
1099 nonodes = ([], [], [])
1098 nonodes = ([], [], [])
1100 if roots is not None:
1099 if roots is not None:
1101 roots = list(roots)
1100 roots = list(roots)
1102 if not roots:
1101 if not roots:
1103 return nonodes
1102 return nonodes
1104 lowestrev = min([self.rev(n) for n in roots])
1103 lowestrev = min([self.rev(n) for n in roots])
1105 else:
1104 else:
1106 roots = [nullid] # Everybody's a descendant of nullid
1105 roots = [nullid] # Everybody's a descendant of nullid
1107 lowestrev = nullrev
1106 lowestrev = nullrev
1108 if (lowestrev == nullrev) and (heads is None):
1107 if (lowestrev == nullrev) and (heads is None):
1109 # We want _all_ the nodes!
1108 # We want _all_ the nodes!
1110 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1109 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1111 if heads is None:
1110 if heads is None:
1112 # All nodes are ancestors, so the latest ancestor is the last
1111 # All nodes are ancestors, so the latest ancestor is the last
1113 # node.
1112 # node.
1114 highestrev = len(self) - 1
1113 highestrev = len(self) - 1
1115 # Set ancestors to None to signal that every node is an ancestor.
1114 # Set ancestors to None to signal that every node is an ancestor.
1116 ancestors = None
1115 ancestors = None
1117 # Set heads to an empty dictionary for later discovery of heads
1116 # Set heads to an empty dictionary for later discovery of heads
1118 heads = {}
1117 heads = {}
1119 else:
1118 else:
1120 heads = list(heads)
1119 heads = list(heads)
1121 if not heads:
1120 if not heads:
1122 return nonodes
1121 return nonodes
1123 ancestors = set()
1122 ancestors = set()
1124 # Turn heads into a dictionary so we can remove 'fake' heads.
1123 # Turn heads into a dictionary so we can remove 'fake' heads.
1125 # Also, later we will be using it to filter out the heads we can't
1124 # Also, later we will be using it to filter out the heads we can't
1126 # find from roots.
1125 # find from roots.
1127 heads = dict.fromkeys(heads, False)
1126 heads = dict.fromkeys(heads, False)
1128 # Start at the top and keep marking parents until we're done.
1127 # Start at the top and keep marking parents until we're done.
1129 nodestotag = set(heads)
1128 nodestotag = set(heads)
1130 # Remember where the top was so we can use it as a limit later.
1129 # Remember where the top was so we can use it as a limit later.
1131 highestrev = max([self.rev(n) for n in nodestotag])
1130 highestrev = max([self.rev(n) for n in nodestotag])
1132 while nodestotag:
1131 while nodestotag:
1133 # grab a node to tag
1132 # grab a node to tag
1134 n = nodestotag.pop()
1133 n = nodestotag.pop()
1135 # Never tag nullid
1134 # Never tag nullid
1136 if n == nullid:
1135 if n == nullid:
1137 continue
1136 continue
1138 # A node's revision number represents its place in a
1137 # A node's revision number represents its place in a
1139 # topologically sorted list of nodes.
1138 # topologically sorted list of nodes.
1140 r = self.rev(n)
1139 r = self.rev(n)
1141 if r >= lowestrev:
1140 if r >= lowestrev:
1142 if n not in ancestors:
1141 if n not in ancestors:
1143 # If we are possibly a descendant of one of the roots
1142 # If we are possibly a descendant of one of the roots
1144 # and we haven't already been marked as an ancestor
1143 # and we haven't already been marked as an ancestor
1145 ancestors.add(n) # Mark as ancestor
1144 ancestors.add(n) # Mark as ancestor
1146 # Add non-nullid parents to list of nodes to tag.
1145 # Add non-nullid parents to list of nodes to tag.
1147 nodestotag.update([p for p in self.parents(n) if
1146 nodestotag.update([p for p in self.parents(n) if
1148 p != nullid])
1147 p != nullid])
1149 elif n in heads: # We've seen it before, is it a fake head?
1148 elif n in heads: # We've seen it before, is it a fake head?
1150 # So it is, real heads should not be the ancestors of
1149 # So it is, real heads should not be the ancestors of
1151 # any other heads.
1150 # any other heads.
1152 heads.pop(n)
1151 heads.pop(n)
1153 if not ancestors:
1152 if not ancestors:
1154 return nonodes
1153 return nonodes
1155 # Now that we have our set of ancestors, we want to remove any
1154 # Now that we have our set of ancestors, we want to remove any
1156 # roots that are not ancestors.
1155 # roots that are not ancestors.
1157
1156
1158 # If one of the roots was nullid, everything is included anyway.
1157 # If one of the roots was nullid, everything is included anyway.
1159 if lowestrev > nullrev:
1158 if lowestrev > nullrev:
1160 # But, since we weren't, let's recompute the lowest rev to not
1159 # But, since we weren't, let's recompute the lowest rev to not
1161 # include roots that aren't ancestors.
1160 # include roots that aren't ancestors.
1162
1161
1163 # Filter out roots that aren't ancestors of heads
1162 # Filter out roots that aren't ancestors of heads
1164 roots = [root for root in roots if root in ancestors]
1163 roots = [root for root in roots if root in ancestors]
1165 # Recompute the lowest revision
1164 # Recompute the lowest revision
1166 if roots:
1165 if roots:
1167 lowestrev = min([self.rev(root) for root in roots])
1166 lowestrev = min([self.rev(root) for root in roots])
1168 else:
1167 else:
1169 # No more roots? Return empty list
1168 # No more roots? Return empty list
1170 return nonodes
1169 return nonodes
1171 else:
1170 else:
1172 # We are descending from nullid, and don't need to care about
1171 # We are descending from nullid, and don't need to care about
1173 # any other roots.
1172 # any other roots.
1174 lowestrev = nullrev
1173 lowestrev = nullrev
1175 roots = [nullid]
1174 roots = [nullid]
1176 # Transform our roots list into a set.
1175 # Transform our roots list into a set.
1177 descendants = set(roots)
1176 descendants = set(roots)
1178 # Also, keep the original roots so we can filter out roots that aren't
1177 # Also, keep the original roots so we can filter out roots that aren't
1179 # 'real' roots (i.e. are descended from other roots).
1178 # 'real' roots (i.e. are descended from other roots).
1180 roots = descendants.copy()
1179 roots = descendants.copy()
1181 # Our topologically sorted list of output nodes.
1180 # Our topologically sorted list of output nodes.
1182 orderedout = []
1181 orderedout = []
1183 # Don't start at nullid since we don't want nullid in our output list,
1182 # Don't start at nullid since we don't want nullid in our output list,
1184 # and if nullid shows up in descendants, empty parents will look like
1183 # and if nullid shows up in descendants, empty parents will look like
1185 # they're descendants.
1184 # they're descendants.
1186 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1185 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1187 n = self.node(r)
1186 n = self.node(r)
1188 isdescendant = False
1187 isdescendant = False
1189 if lowestrev == nullrev: # Everybody is a descendant of nullid
1188 if lowestrev == nullrev: # Everybody is a descendant of nullid
1190 isdescendant = True
1189 isdescendant = True
1191 elif n in descendants:
1190 elif n in descendants:
1192 # n is already a descendant
1191 # n is already a descendant
1193 isdescendant = True
1192 isdescendant = True
1194 # This check only needs to be done here because all the roots
1193 # This check only needs to be done here because all the roots
1195 # will start being marked is descendants before the loop.
1194 # will start being marked is descendants before the loop.
1196 if n in roots:
1195 if n in roots:
1197 # If n was a root, check if it's a 'real' root.
1196 # If n was a root, check if it's a 'real' root.
1198 p = tuple(self.parents(n))
1197 p = tuple(self.parents(n))
1199 # If any of its parents are descendants, it's not a root.
1198 # If any of its parents are descendants, it's not a root.
1200 if (p[0] in descendants) or (p[1] in descendants):
1199 if (p[0] in descendants) or (p[1] in descendants):
1201 roots.remove(n)
1200 roots.remove(n)
1202 else:
1201 else:
1203 p = tuple(self.parents(n))
1202 p = tuple(self.parents(n))
1204 # A node is a descendant if either of its parents are
1203 # A node is a descendant if either of its parents are
1205 # descendants. (We seeded the dependents list with the roots
1204 # descendants. (We seeded the dependents list with the roots
1206 # up there, remember?)
1205 # up there, remember?)
1207 if (p[0] in descendants) or (p[1] in descendants):
1206 if (p[0] in descendants) or (p[1] in descendants):
1208 descendants.add(n)
1207 descendants.add(n)
1209 isdescendant = True
1208 isdescendant = True
1210 if isdescendant and ((ancestors is None) or (n in ancestors)):
1209 if isdescendant and ((ancestors is None) or (n in ancestors)):
1211 # Only include nodes that are both descendants and ancestors.
1210 # Only include nodes that are both descendants and ancestors.
1212 orderedout.append(n)
1211 orderedout.append(n)
1213 if (ancestors is not None) and (n in heads):
1212 if (ancestors is not None) and (n in heads):
1214 # We're trying to figure out which heads are reachable
1213 # We're trying to figure out which heads are reachable
1215 # from roots.
1214 # from roots.
1216 # Mark this head as having been reached
1215 # Mark this head as having been reached
1217 heads[n] = True
1216 heads[n] = True
1218 elif ancestors is None:
1217 elif ancestors is None:
1219 # Otherwise, we're trying to discover the heads.
1218 # Otherwise, we're trying to discover the heads.
1220 # Assume this is a head because if it isn't, the next step
1219 # Assume this is a head because if it isn't, the next step
1221 # will eventually remove it.
1220 # will eventually remove it.
1222 heads[n] = True
1221 heads[n] = True
1223 # But, obviously its parents aren't.
1222 # But, obviously its parents aren't.
1224 for p in self.parents(n):
1223 for p in self.parents(n):
1225 heads.pop(p, None)
1224 heads.pop(p, None)
1226 heads = [head for head, flag in heads.iteritems() if flag]
1225 heads = [head for head, flag in heads.iteritems() if flag]
1227 roots = list(roots)
1226 roots = list(roots)
1228 assert orderedout
1227 assert orderedout
1229 assert roots
1228 assert roots
1230 assert heads
1229 assert heads
1231 return (orderedout, roots, heads)
1230 return (orderedout, roots, heads)
1232
1231
1233 def headrevs(self):
1232 def headrevs(self):
1234 try:
1233 try:
1235 return self.index.headrevs()
1234 return self.index.headrevs()
1236 except AttributeError:
1235 except AttributeError:
1237 return self._headrevs()
1236 return self._headrevs()
1238
1237
1239 def computephases(self, roots):
1238 def computephases(self, roots):
1240 return self.index.computephasesmapsets(roots)
1239 return self.index.computephasesmapsets(roots)
1241
1240
1242 def _headrevs(self):
1241 def _headrevs(self):
1243 count = len(self)
1242 count = len(self)
1244 if not count:
1243 if not count:
1245 return [nullrev]
1244 return [nullrev]
1246 # we won't iter over filtered rev so nobody is a head at start
1245 # we won't iter over filtered rev so nobody is a head at start
1247 ishead = [0] * (count + 1)
1246 ishead = [0] * (count + 1)
1248 index = self.index
1247 index = self.index
1249 for r in self:
1248 for r in self:
1250 ishead[r] = 1 # I may be an head
1249 ishead[r] = 1 # I may be an head
1251 e = index[r]
1250 e = index[r]
1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1253 return [r for r, val in enumerate(ishead) if val]
1252 return [r for r, val in enumerate(ishead) if val]
1254
1253
1255 def heads(self, start=None, stop=None):
1254 def heads(self, start=None, stop=None):
1256 """return the list of all nodes that have no children
1255 """return the list of all nodes that have no children
1257
1256
1258 if start is specified, only heads that are descendants of
1257 if start is specified, only heads that are descendants of
1259 start will be returned
1258 start will be returned
1260 if stop is specified, it will consider all the revs from stop
1259 if stop is specified, it will consider all the revs from stop
1261 as if they had no children
1260 as if they had no children
1262 """
1261 """
1263 if start is None and stop is None:
1262 if start is None and stop is None:
1264 if not len(self):
1263 if not len(self):
1265 return [nullid]
1264 return [nullid]
1266 return [self.node(r) for r in self.headrevs()]
1265 return [self.node(r) for r in self.headrevs()]
1267
1266
1268 if start is None:
1267 if start is None:
1269 start = nullid
1268 start = nullid
1270 if stop is None:
1269 if stop is None:
1271 stop = []
1270 stop = []
1272 stoprevs = set([self.rev(n) for n in stop])
1271 stoprevs = set([self.rev(n) for n in stop])
1273 startrev = self.rev(start)
1272 startrev = self.rev(start)
1274 reachable = {startrev}
1273 reachable = {startrev}
1275 heads = {startrev}
1274 heads = {startrev}
1276
1275
1277 parentrevs = self.parentrevs
1276 parentrevs = self.parentrevs
1278 for r in self.revs(start=startrev + 1):
1277 for r in self.revs(start=startrev + 1):
1279 for p in parentrevs(r):
1278 for p in parentrevs(r):
1280 if p in reachable:
1279 if p in reachable:
1281 if r not in stoprevs:
1280 if r not in stoprevs:
1282 reachable.add(r)
1281 reachable.add(r)
1283 heads.add(r)
1282 heads.add(r)
1284 if p in heads and p not in stoprevs:
1283 if p in heads and p not in stoprevs:
1285 heads.remove(p)
1284 heads.remove(p)
1286
1285
1287 return [self.node(r) for r in heads]
1286 return [self.node(r) for r in heads]
1288
1287
1289 def children(self, node):
1288 def children(self, node):
1290 """find the children of a given node"""
1289 """find the children of a given node"""
1291 c = []
1290 c = []
1292 p = self.rev(node)
1291 p = self.rev(node)
1293 for r in self.revs(start=p + 1):
1292 for r in self.revs(start=p + 1):
1294 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1293 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1295 if prevs:
1294 if prevs:
1296 for pr in prevs:
1295 for pr in prevs:
1297 if pr == p:
1296 if pr == p:
1298 c.append(self.node(r))
1297 c.append(self.node(r))
1299 elif p == nullrev:
1298 elif p == nullrev:
1300 c.append(self.node(r))
1299 c.append(self.node(r))
1301 return c
1300 return c
1302
1301
1303 def descendant(self, start, end):
1302 def descendant(self, start, end):
1304 if start == nullrev:
1303 if start == nullrev:
1305 return True
1304 return True
1306 for i in self.descendants([start]):
1305 for i in self.descendants([start]):
1307 if i == end:
1306 if i == end:
1308 return True
1307 return True
1309 elif i > end:
1308 elif i > end:
1310 break
1309 break
1311 return False
1310 return False
1312
1311
1313 def commonancestorsheads(self, a, b):
1312 def commonancestorsheads(self, a, b):
1314 """calculate all the heads of the common ancestors of nodes a and b"""
1313 """calculate all the heads of the common ancestors of nodes a and b"""
1315 a, b = self.rev(a), self.rev(b)
1314 a, b = self.rev(a), self.rev(b)
1316 try:
1315 try:
1317 ancs = self.index.commonancestorsheads(a, b)
1316 ancs = self.index.commonancestorsheads(a, b)
1318 except (AttributeError, OverflowError): # C implementation failed
1317 except (AttributeError, OverflowError): # C implementation failed
1319 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1318 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1320 return pycompat.maplist(self.node, ancs)
1319 return pycompat.maplist(self.node, ancs)
1321
1320
1322 def isancestor(self, a, b):
1321 def isancestor(self, a, b):
1323 """return True if node a is an ancestor of node b
1322 """return True if node a is an ancestor of node b
1324
1323
1325 The implementation of this is trivial but the use of
1324 The implementation of this is trivial but the use of
1326 commonancestorsheads is not."""
1325 commonancestorsheads is not."""
1327 return a in self.commonancestorsheads(a, b)
1326 return a in self.commonancestorsheads(a, b)
1328
1327
1329 def ancestor(self, a, b):
1328 def ancestor(self, a, b):
1330 """calculate the "best" common ancestor of nodes a and b"""
1329 """calculate the "best" common ancestor of nodes a and b"""
1331
1330
1332 a, b = self.rev(a), self.rev(b)
1331 a, b = self.rev(a), self.rev(b)
1333 try:
1332 try:
1334 ancs = self.index.ancestors(a, b)
1333 ancs = self.index.ancestors(a, b)
1335 except (AttributeError, OverflowError):
1334 except (AttributeError, OverflowError):
1336 ancs = ancestor.ancestors(self.parentrevs, a, b)
1335 ancs = ancestor.ancestors(self.parentrevs, a, b)
1337 if ancs:
1336 if ancs:
1338 # choose a consistent winner when there's a tie
1337 # choose a consistent winner when there's a tie
1339 return min(map(self.node, ancs))
1338 return min(map(self.node, ancs))
1340 return nullid
1339 return nullid
1341
1340
1342 def _match(self, id):
1341 def _match(self, id):
1343 if isinstance(id, int):
1342 if isinstance(id, int):
1344 # rev
1343 # rev
1345 return self.node(id)
1344 return self.node(id)
1346 if len(id) == 20:
1345 if len(id) == 20:
1347 # possibly a binary node
1346 # possibly a binary node
1348 # odds of a binary node being all hex in ASCII are 1 in 10**25
1347 # odds of a binary node being all hex in ASCII are 1 in 10**25
1349 try:
1348 try:
1350 node = id
1349 node = id
1351 self.rev(node) # quick search the index
1350 self.rev(node) # quick search the index
1352 return node
1351 return node
1353 except LookupError:
1352 except LookupError:
1354 pass # may be partial hex id
1353 pass # may be partial hex id
1355 try:
1354 try:
1356 # str(rev)
1355 # str(rev)
1357 rev = int(id)
1356 rev = int(id)
1358 if str(rev) != id:
1357 if str(rev) != id:
1359 raise ValueError
1358 raise ValueError
1360 if rev < 0:
1359 if rev < 0:
1361 rev = len(self) + rev
1360 rev = len(self) + rev
1362 if rev < 0 or rev >= len(self):
1361 if rev < 0 or rev >= len(self):
1363 raise ValueError
1362 raise ValueError
1364 return self.node(rev)
1363 return self.node(rev)
1365 except (ValueError, OverflowError):
1364 except (ValueError, OverflowError):
1366 pass
1365 pass
1367 if len(id) == 40:
1366 if len(id) == 40:
1368 try:
1367 try:
1369 # a full hex nodeid?
1368 # a full hex nodeid?
1370 node = bin(id)
1369 node = bin(id)
1371 self.rev(node)
1370 self.rev(node)
1372 return node
1371 return node
1373 except (TypeError, LookupError):
1372 except (TypeError, LookupError):
1374 pass
1373 pass
1375
1374
1376 def _partialmatch(self, id):
1375 def _partialmatch(self, id):
1377 maybewdir = wdirhex.startswith(id)
1376 maybewdir = wdirhex.startswith(id)
1378 try:
1377 try:
1379 partial = self.index.partialmatch(id)
1378 partial = self.index.partialmatch(id)
1380 if partial and self.hasnode(partial):
1379 if partial and self.hasnode(partial):
1381 if maybewdir:
1380 if maybewdir:
1382 # single 'ff...' match in radix tree, ambiguous with wdir
1381 # single 'ff...' match in radix tree, ambiguous with wdir
1383 raise RevlogError
1382 raise RevlogError
1384 return partial
1383 return partial
1385 if maybewdir:
1384 if maybewdir:
1386 # no 'ff...' match in radix tree, wdir identified
1385 # no 'ff...' match in radix tree, wdir identified
1387 raise error.WdirUnsupported
1386 raise error.WdirUnsupported
1388 return None
1387 return None
1389 except RevlogError:
1388 except RevlogError:
1390 # parsers.c radix tree lookup gave multiple matches
1389 # parsers.c radix tree lookup gave multiple matches
1391 # fast path: for unfiltered changelog, radix tree is accurate
1390 # fast path: for unfiltered changelog, radix tree is accurate
1392 if not getattr(self, 'filteredrevs', None):
1391 if not getattr(self, 'filteredrevs', None):
1393 raise LookupError(id, self.indexfile,
1392 raise LookupError(id, self.indexfile,
1394 _('ambiguous identifier'))
1393 _('ambiguous identifier'))
1395 # fall through to slow path that filters hidden revisions
1394 # fall through to slow path that filters hidden revisions
1396 except (AttributeError, ValueError):
1395 except (AttributeError, ValueError):
1397 # we are pure python, or key was too short to search radix tree
1396 # we are pure python, or key was too short to search radix tree
1398 pass
1397 pass
1399
1398
1400 if id in self._pcache:
1399 if id in self._pcache:
1401 return self._pcache[id]
1400 return self._pcache[id]
1402
1401
1403 if len(id) < 40:
1402 if len(id) < 40:
1404 try:
1403 try:
1405 # hex(node)[:...]
1404 # hex(node)[:...]
1406 l = len(id) // 2 # grab an even number of digits
1405 l = len(id) // 2 # grab an even number of digits
1407 prefix = bin(id[:l * 2])
1406 prefix = bin(id[:l * 2])
1408 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1407 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1409 nl = [n for n in nl if hex(n).startswith(id) and
1408 nl = [n for n in nl if hex(n).startswith(id) and
1410 self.hasnode(n)]
1409 self.hasnode(n)]
1411 if len(nl) > 0:
1410 if len(nl) > 0:
1412 if len(nl) == 1 and not maybewdir:
1411 if len(nl) == 1 and not maybewdir:
1413 self._pcache[id] = nl[0]
1412 self._pcache[id] = nl[0]
1414 return nl[0]
1413 return nl[0]
1415 raise LookupError(id, self.indexfile,
1414 raise LookupError(id, self.indexfile,
1416 _('ambiguous identifier'))
1415 _('ambiguous identifier'))
1417 if maybewdir:
1416 if maybewdir:
1418 raise error.WdirUnsupported
1417 raise error.WdirUnsupported
1419 return None
1418 return None
1420 except (TypeError, binascii.Error):
1419 except (TypeError, binascii.Error):
1421 pass
1420 pass
1422
1421
1423 def lookup(self, id):
1422 def lookup(self, id):
1424 """locate a node based on:
1423 """locate a node based on:
1425 - revision number or str(revision number)
1424 - revision number or str(revision number)
1426 - nodeid or subset of hex nodeid
1425 - nodeid or subset of hex nodeid
1427 """
1426 """
1428 n = self._match(id)
1427 n = self._match(id)
1429 if n is not None:
1428 if n is not None:
1430 return n
1429 return n
1431 n = self._partialmatch(id)
1430 n = self._partialmatch(id)
1432 if n:
1431 if n:
1433 return n
1432 return n
1434
1433
1435 raise LookupError(id, self.indexfile, _('no match found'))
1434 raise LookupError(id, self.indexfile, _('no match found'))
1436
1435
1437 def shortest(self, hexnode, minlength=1):
1436 def shortest(self, hexnode, minlength=1):
1438 """Find the shortest unambiguous prefix that matches hexnode."""
1437 """Find the shortest unambiguous prefix that matches hexnode."""
1439 def isvalid(test):
1438 def isvalid(test):
1440 try:
1439 try:
1441 if self._partialmatch(test) is None:
1440 if self._partialmatch(test) is None:
1442 return False
1441 return False
1443
1442
1444 try:
1443 try:
1445 i = int(test)
1444 i = int(test)
1446 # if we are a pure int, then starting with zero will not be
1445 # if we are a pure int, then starting with zero will not be
1447 # confused as a rev; or, obviously, if the int is larger
1446 # confused as a rev; or, obviously, if the int is larger
1448 # than the value of the tip rev
1447 # than the value of the tip rev
1449 if test[0] == '0' or i > len(self):
1448 if test[0] == '0' or i > len(self):
1450 return True
1449 return True
1451 return False
1450 return False
1452 except ValueError:
1451 except ValueError:
1453 return True
1452 return True
1454 except error.RevlogError:
1453 except error.RevlogError:
1455 return False
1454 return False
1456 except error.WdirUnsupported:
1455 except error.WdirUnsupported:
1457 # single 'ff...' match
1456 # single 'ff...' match
1458 return True
1457 return True
1459
1458
1460 shortest = hexnode
1459 shortest = hexnode
1461 startlength = max(6, minlength)
1460 startlength = max(6, minlength)
1462 length = startlength
1461 length = startlength
1463 while True:
1462 while True:
1464 test = hexnode[:length]
1463 test = hexnode[:length]
1465 if isvalid(test):
1464 if isvalid(test):
1466 shortest = test
1465 shortest = test
1467 if length == minlength or length > startlength:
1466 if length == minlength or length > startlength:
1468 return shortest
1467 return shortest
1469 length -= 1
1468 length -= 1
1470 else:
1469 else:
1471 length += 1
1470 length += 1
1472 if len(shortest) <= length:
1471 if len(shortest) <= length:
1473 return shortest
1472 return shortest
1474
1473
1475 def cmp(self, node, text):
1474 def cmp(self, node, text):
1476 """compare text with a given file revision
1475 """compare text with a given file revision
1477
1476
1478 returns True if text is different than what is stored.
1477 returns True if text is different than what is stored.
1479 """
1478 """
1480 p1, p2 = self.parents(node)
1479 p1, p2 = self.parents(node)
1481 return hash(text, p1, p2) != node
1480 return hash(text, p1, p2) != node
1482
1481
1483 def _cachesegment(self, offset, data):
1482 def _cachesegment(self, offset, data):
1484 """Add a segment to the revlog cache.
1483 """Add a segment to the revlog cache.
1485
1484
1486 Accepts an absolute offset and the data that is at that location.
1485 Accepts an absolute offset and the data that is at that location.
1487 """
1486 """
1488 o, d = self._chunkcache
1487 o, d = self._chunkcache
1489 # try to add to existing cache
1488 # try to add to existing cache
1490 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1489 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1491 self._chunkcache = o, d + data
1490 self._chunkcache = o, d + data
1492 else:
1491 else:
1493 self._chunkcache = offset, data
1492 self._chunkcache = offset, data
1494
1493
1495 def _readsegment(self, offset, length, df=None):
1494 def _readsegment(self, offset, length, df=None):
1496 """Load a segment of raw data from the revlog.
1495 """Load a segment of raw data from the revlog.
1497
1496
1498 Accepts an absolute offset, length to read, and an optional existing
1497 Accepts an absolute offset, length to read, and an optional existing
1499 file handle to read from.
1498 file handle to read from.
1500
1499
1501 If an existing file handle is passed, it will be seeked and the
1500 If an existing file handle is passed, it will be seeked and the
1502 original seek position will NOT be restored.
1501 original seek position will NOT be restored.
1503
1502
1504 Returns a str or buffer of raw byte data.
1503 Returns a str or buffer of raw byte data.
1505 """
1504 """
1506 if df is not None:
1505 if df is not None:
1507 closehandle = False
1506 closehandle = False
1508 else:
1507 else:
1509 if self._inline:
1508 if self._inline:
1510 df = self._indexfp()
1509 df = self._indexfp()
1511 else:
1510 else:
1512 df = self._datafp()
1511 df = self._datafp()
1513 closehandle = True
1512 closehandle = True
1514
1513
1515 # Cache data both forward and backward around the requested
1514 # Cache data both forward and backward around the requested
1516 # data, in a fixed size window. This helps speed up operations
1515 # data, in a fixed size window. This helps speed up operations
1517 # involving reading the revlog backwards.
1516 # involving reading the revlog backwards.
1518 cachesize = self._chunkcachesize
1517 cachesize = self._chunkcachesize
1519 realoffset = offset & ~(cachesize - 1)
1518 realoffset = offset & ~(cachesize - 1)
1520 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1519 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1521 - realoffset)
1520 - realoffset)
1522 df.seek(realoffset)
1521 df.seek(realoffset)
1523 d = df.read(reallength)
1522 d = df.read(reallength)
1524 if closehandle:
1523 if closehandle:
1525 df.close()
1524 df.close()
1526 self._cachesegment(realoffset, d)
1525 self._cachesegment(realoffset, d)
1527 if offset != realoffset or reallength != length:
1526 if offset != realoffset or reallength != length:
1528 return util.buffer(d, offset - realoffset, length)
1527 return util.buffer(d, offset - realoffset, length)
1529 return d
1528 return d
1530
1529
1531 def _getsegment(self, offset, length, df=None):
1530 def _getsegment(self, offset, length, df=None):
1532 """Obtain a segment of raw data from the revlog.
1531 """Obtain a segment of raw data from the revlog.
1533
1532
1534 Accepts an absolute offset, length of bytes to obtain, and an
1533 Accepts an absolute offset, length of bytes to obtain, and an
1535 optional file handle to the already-opened revlog. If the file
1534 optional file handle to the already-opened revlog. If the file
1536 handle is used, it's original seek position will not be preserved.
1535 handle is used, it's original seek position will not be preserved.
1537
1536
1538 Requests for data may be returned from a cache.
1537 Requests for data may be returned from a cache.
1539
1538
1540 Returns a str or a buffer instance of raw byte data.
1539 Returns a str or a buffer instance of raw byte data.
1541 """
1540 """
1542 o, d = self._chunkcache
1541 o, d = self._chunkcache
1543 l = len(d)
1542 l = len(d)
1544
1543
1545 # is it in the cache?
1544 # is it in the cache?
1546 cachestart = offset - o
1545 cachestart = offset - o
1547 cacheend = cachestart + length
1546 cacheend = cachestart + length
1548 if cachestart >= 0 and cacheend <= l:
1547 if cachestart >= 0 and cacheend <= l:
1549 if cachestart == 0 and cacheend == l:
1548 if cachestart == 0 and cacheend == l:
1550 return d # avoid a copy
1549 return d # avoid a copy
1551 return util.buffer(d, cachestart, cacheend - cachestart)
1550 return util.buffer(d, cachestart, cacheend - cachestart)
1552
1551
1553 return self._readsegment(offset, length, df=df)
1552 return self._readsegment(offset, length, df=df)
1554
1553
1555 def _getsegmentforrevs(self, startrev, endrev, df=None):
1554 def _getsegmentforrevs(self, startrev, endrev, df=None):
1556 """Obtain a segment of raw data corresponding to a range of revisions.
1555 """Obtain a segment of raw data corresponding to a range of revisions.
1557
1556
1558 Accepts the start and end revisions and an optional already-open
1557 Accepts the start and end revisions and an optional already-open
1559 file handle to be used for reading. If the file handle is read, its
1558 file handle to be used for reading. If the file handle is read, its
1560 seek position will not be preserved.
1559 seek position will not be preserved.
1561
1560
1562 Requests for data may be satisfied by a cache.
1561 Requests for data may be satisfied by a cache.
1563
1562
1564 Returns a 2-tuple of (offset, data) for the requested range of
1563 Returns a 2-tuple of (offset, data) for the requested range of
1565 revisions. Offset is the integer offset from the beginning of the
1564 revisions. Offset is the integer offset from the beginning of the
1566 revlog and data is a str or buffer of the raw byte data.
1565 revlog and data is a str or buffer of the raw byte data.
1567
1566
1568 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1567 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1569 to determine where each revision's data begins and ends.
1568 to determine where each revision's data begins and ends.
1570 """
1569 """
1571 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1570 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1572 # (functions are expensive).
1571 # (functions are expensive).
1573 index = self.index
1572 index = self.index
1574 istart = index[startrev]
1573 istart = index[startrev]
1575 start = int(istart[0] >> 16)
1574 start = int(istart[0] >> 16)
1576 if startrev == endrev:
1575 if startrev == endrev:
1577 end = start + istart[1]
1576 end = start + istart[1]
1578 else:
1577 else:
1579 iend = index[endrev]
1578 iend = index[endrev]
1580 end = int(iend[0] >> 16) + iend[1]
1579 end = int(iend[0] >> 16) + iend[1]
1581
1580
1582 if self._inline:
1581 if self._inline:
1583 start += (startrev + 1) * self._io.size
1582 start += (startrev + 1) * self._io.size
1584 end += (endrev + 1) * self._io.size
1583 end += (endrev + 1) * self._io.size
1585 length = end - start
1584 length = end - start
1586
1585
1587 return start, self._getsegment(start, length, df=df)
1586 return start, self._getsegment(start, length, df=df)
1588
1587
1589 def _chunk(self, rev, df=None):
1588 def _chunk(self, rev, df=None):
1590 """Obtain a single decompressed chunk for a revision.
1589 """Obtain a single decompressed chunk for a revision.
1591
1590
1592 Accepts an integer revision and an optional already-open file handle
1591 Accepts an integer revision and an optional already-open file handle
1593 to be used for reading. If used, the seek position of the file will not
1592 to be used for reading. If used, the seek position of the file will not
1594 be preserved.
1593 be preserved.
1595
1594
1596 Returns a str holding uncompressed data for the requested revision.
1595 Returns a str holding uncompressed data for the requested revision.
1597 """
1596 """
1598 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1597 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1599
1598
1600 def _chunks(self, revs, df=None):
1599 def _chunks(self, revs, df=None):
1601 """Obtain decompressed chunks for the specified revisions.
1600 """Obtain decompressed chunks for the specified revisions.
1602
1601
1603 Accepts an iterable of numeric revisions that are assumed to be in
1602 Accepts an iterable of numeric revisions that are assumed to be in
1604 ascending order. Also accepts an optional already-open file handle
1603 ascending order. Also accepts an optional already-open file handle
1605 to be used for reading. If used, the seek position of the file will
1604 to be used for reading. If used, the seek position of the file will
1606 not be preserved.
1605 not be preserved.
1607
1606
1608 This function is similar to calling ``self._chunk()`` multiple times,
1607 This function is similar to calling ``self._chunk()`` multiple times,
1609 but is faster.
1608 but is faster.
1610
1609
1611 Returns a list with decompressed data for each requested revision.
1610 Returns a list with decompressed data for each requested revision.
1612 """
1611 """
1613 if not revs:
1612 if not revs:
1614 return []
1613 return []
1615 start = self.start
1614 start = self.start
1616 length = self.length
1615 length = self.length
1617 inline = self._inline
1616 inline = self._inline
1618 iosize = self._io.size
1617 iosize = self._io.size
1619 buffer = util.buffer
1618 buffer = util.buffer
1620
1619
1621 l = []
1620 l = []
1622 ladd = l.append
1621 ladd = l.append
1623
1622
1624 if not self._withsparseread:
1623 if not self._withsparseread:
1625 slicedchunks = (revs,)
1624 slicedchunks = (revs,)
1626 else:
1625 else:
1627 slicedchunks = _slicechunk(self, revs)
1626 slicedchunks = _slicechunk(self, revs)
1628
1627
1629 for revschunk in slicedchunks:
1628 for revschunk in slicedchunks:
1630 firstrev = revschunk[0]
1629 firstrev = revschunk[0]
1631 # Skip trailing revisions with empty diff
1630 # Skip trailing revisions with empty diff
1632 for lastrev in revschunk[::-1]:
1631 for lastrev in revschunk[::-1]:
1633 if length(lastrev) != 0:
1632 if length(lastrev) != 0:
1634 break
1633 break
1635
1634
1636 try:
1635 try:
1637 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1638 except OverflowError:
1637 except OverflowError:
1639 # issue4215 - we can't cache a run of chunks greater than
1638 # issue4215 - we can't cache a run of chunks greater than
1640 # 2G on Windows
1639 # 2G on Windows
1641 return [self._chunk(rev, df=df) for rev in revschunk]
1640 return [self._chunk(rev, df=df) for rev in revschunk]
1642
1641
1643 decomp = self.decompress
1642 decomp = self.decompress
1644 for rev in revschunk:
1643 for rev in revschunk:
1645 chunkstart = start(rev)
1644 chunkstart = start(rev)
1646 if inline:
1645 if inline:
1647 chunkstart += (rev + 1) * iosize
1646 chunkstart += (rev + 1) * iosize
1648 chunklength = length(rev)
1647 chunklength = length(rev)
1649 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1650
1649
1651 return l
1650 return l
1652
1651
1653 def _chunkclear(self):
1652 def _chunkclear(self):
1654 """Clear the raw chunk cache."""
1653 """Clear the raw chunk cache."""
1655 self._chunkcache = (0, '')
1654 self._chunkcache = (0, '')
1656
1655
1657 def deltaparent(self, rev):
1656 def deltaparent(self, rev):
1658 """return deltaparent of the given revision"""
1657 """return deltaparent of the given revision"""
1659 base = self.index[rev][3]
1658 base = self.index[rev][3]
1660 if base == rev:
1659 if base == rev:
1661 return nullrev
1660 return nullrev
1662 elif self._generaldelta:
1661 elif self._generaldelta:
1663 return base
1662 return base
1664 else:
1663 else:
1665 return rev - 1
1664 return rev - 1
1666
1665
1667 def revdiff(self, rev1, rev2):
1666 def revdiff(self, rev1, rev2):
1668 """return or calculate a delta between two revisions
1667 """return or calculate a delta between two revisions
1669
1668
1670 The delta calculated is in binary form and is intended to be written to
1669 The delta calculated is in binary form and is intended to be written to
1671 revlog data directly. So this function needs raw revision data.
1670 revlog data directly. So this function needs raw revision data.
1672 """
1671 """
1673 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1672 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1674 return bytes(self._chunk(rev2))
1673 return bytes(self._chunk(rev2))
1675
1674
1676 return mdiff.textdiff(self.revision(rev1, raw=True),
1675 return mdiff.textdiff(self.revision(rev1, raw=True),
1677 self.revision(rev2, raw=True))
1676 self.revision(rev2, raw=True))
1678
1677
1679 def revision(self, nodeorrev, _df=None, raw=False):
1678 def revision(self, nodeorrev, _df=None, raw=False):
1680 """return an uncompressed revision of a given node or revision
1679 """return an uncompressed revision of a given node or revision
1681 number.
1680 number.
1682
1681
1683 _df - an existing file handle to read from. (internal-only)
1682 _df - an existing file handle to read from. (internal-only)
1684 raw - an optional argument specifying if the revision data is to be
1683 raw - an optional argument specifying if the revision data is to be
1685 treated as raw data when applying flag transforms. 'raw' should be set
1684 treated as raw data when applying flag transforms. 'raw' should be set
1686 to True when generating changegroups or in debug commands.
1685 to True when generating changegroups or in debug commands.
1687 """
1686 """
1688 if isinstance(nodeorrev, int):
1687 if isinstance(nodeorrev, int):
1689 rev = nodeorrev
1688 rev = nodeorrev
1690 node = self.node(rev)
1689 node = self.node(rev)
1691 else:
1690 else:
1692 node = nodeorrev
1691 node = nodeorrev
1693 rev = None
1692 rev = None
1694
1693
1695 cachedrev = None
1694 cachedrev = None
1696 flags = None
1695 flags = None
1697 rawtext = None
1696 rawtext = None
1698 if node == nullid:
1697 if node == nullid:
1699 return ""
1698 return ""
1700 if self._cache:
1699 if self._cache:
1701 if self._cache[0] == node:
1700 if self._cache[0] == node:
1702 # _cache only stores rawtext
1701 # _cache only stores rawtext
1703 if raw:
1702 if raw:
1704 return self._cache[2]
1703 return self._cache[2]
1705 # duplicated, but good for perf
1704 # duplicated, but good for perf
1706 if rev is None:
1705 if rev is None:
1707 rev = self.rev(node)
1706 rev = self.rev(node)
1708 if flags is None:
1707 if flags is None:
1709 flags = self.flags(rev)
1708 flags = self.flags(rev)
1710 # no extra flags set, no flag processor runs, text = rawtext
1709 # no extra flags set, no flag processor runs, text = rawtext
1711 if flags == REVIDX_DEFAULT_FLAGS:
1710 if flags == REVIDX_DEFAULT_FLAGS:
1712 return self._cache[2]
1711 return self._cache[2]
1713 # rawtext is reusable. need to run flag processor
1712 # rawtext is reusable. need to run flag processor
1714 rawtext = self._cache[2]
1713 rawtext = self._cache[2]
1715
1714
1716 cachedrev = self._cache[1]
1715 cachedrev = self._cache[1]
1717
1716
1718 # look up what we need to read
1717 # look up what we need to read
1719 if rawtext is None:
1718 if rawtext is None:
1720 if rev is None:
1719 if rev is None:
1721 rev = self.rev(node)
1720 rev = self.rev(node)
1722
1721
1723 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1722 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1724 if stopped:
1723 if stopped:
1725 rawtext = self._cache[2]
1724 rawtext = self._cache[2]
1726
1725
1727 # drop cache to save memory
1726 # drop cache to save memory
1728 self._cache = None
1727 self._cache = None
1729
1728
1730 bins = self._chunks(chain, df=_df)
1729 bins = self._chunks(chain, df=_df)
1731 if rawtext is None:
1730 if rawtext is None:
1732 rawtext = bytes(bins[0])
1731 rawtext = bytes(bins[0])
1733 bins = bins[1:]
1732 bins = bins[1:]
1734
1733
1735 rawtext = mdiff.patches(rawtext, bins)
1734 rawtext = mdiff.patches(rawtext, bins)
1736 self._cache = (node, rev, rawtext)
1735 self._cache = (node, rev, rawtext)
1737
1736
1738 if flags is None:
1737 if flags is None:
1739 if rev is None:
1738 if rev is None:
1740 rev = self.rev(node)
1739 rev = self.rev(node)
1741 flags = self.flags(rev)
1740 flags = self.flags(rev)
1742
1741
1743 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1742 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1744 if validatehash:
1743 if validatehash:
1745 self.checkhash(text, node, rev=rev)
1744 self.checkhash(text, node, rev=rev)
1746
1745
1747 return text
1746 return text
1748
1747
1749 def hash(self, text, p1, p2):
1748 def hash(self, text, p1, p2):
1750 """Compute a node hash.
1749 """Compute a node hash.
1751
1750
1752 Available as a function so that subclasses can replace the hash
1751 Available as a function so that subclasses can replace the hash
1753 as needed.
1752 as needed.
1754 """
1753 """
1755 return hash(text, p1, p2)
1754 return hash(text, p1, p2)
1756
1755
1757 def _processflags(self, text, flags, operation, raw=False):
1756 def _processflags(self, text, flags, operation, raw=False):
1758 """Inspect revision data flags and applies transforms defined by
1757 """Inspect revision data flags and applies transforms defined by
1759 registered flag processors.
1758 registered flag processors.
1760
1759
1761 ``text`` - the revision data to process
1760 ``text`` - the revision data to process
1762 ``flags`` - the revision flags
1761 ``flags`` - the revision flags
1763 ``operation`` - the operation being performed (read or write)
1762 ``operation`` - the operation being performed (read or write)
1764 ``raw`` - an optional argument describing if the raw transform should be
1763 ``raw`` - an optional argument describing if the raw transform should be
1765 applied.
1764 applied.
1766
1765
1767 This method processes the flags in the order (or reverse order if
1766 This method processes the flags in the order (or reverse order if
1768 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1767 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1769 flag processors registered for present flags. The order of flags defined
1768 flag processors registered for present flags. The order of flags defined
1770 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1769 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1771
1770
1772 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1771 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1773 processed text and ``validatehash`` is a bool indicating whether the
1772 processed text and ``validatehash`` is a bool indicating whether the
1774 returned text should be checked for hash integrity.
1773 returned text should be checked for hash integrity.
1775
1774
1776 Note: If the ``raw`` argument is set, it has precedence over the
1775 Note: If the ``raw`` argument is set, it has precedence over the
1777 operation and will only update the value of ``validatehash``.
1776 operation and will only update the value of ``validatehash``.
1778 """
1777 """
1779 # fast path: no flag processors will run
1778 # fast path: no flag processors will run
1780 if flags == 0:
1779 if flags == 0:
1781 return text, True
1780 return text, True
1782 if not operation in ('read', 'write'):
1781 if not operation in ('read', 'write'):
1783 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1782 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1784 # Check all flags are known.
1783 # Check all flags are known.
1785 if flags & ~REVIDX_KNOWN_FLAGS:
1784 if flags & ~REVIDX_KNOWN_FLAGS:
1786 raise RevlogError(_("incompatible revision flag '%#x'") %
1785 raise RevlogError(_("incompatible revision flag '%#x'") %
1787 (flags & ~REVIDX_KNOWN_FLAGS))
1786 (flags & ~REVIDX_KNOWN_FLAGS))
1788 validatehash = True
1787 validatehash = True
1789 # Depending on the operation (read or write), the order might be
1788 # Depending on the operation (read or write), the order might be
1790 # reversed due to non-commutative transforms.
1789 # reversed due to non-commutative transforms.
1791 orderedflags = REVIDX_FLAGS_ORDER
1790 orderedflags = REVIDX_FLAGS_ORDER
1792 if operation == 'write':
1791 if operation == 'write':
1793 orderedflags = reversed(orderedflags)
1792 orderedflags = reversed(orderedflags)
1794
1793
1795 for flag in orderedflags:
1794 for flag in orderedflags:
1796 # If a flagprocessor has been registered for a known flag, apply the
1795 # If a flagprocessor has been registered for a known flag, apply the
1797 # related operation transform and update result tuple.
1796 # related operation transform and update result tuple.
1798 if flag & flags:
1797 if flag & flags:
1799 vhash = True
1798 vhash = True
1800
1799
1801 if flag not in _flagprocessors:
1800 if flag not in _flagprocessors:
1802 message = _("missing processor for flag '%#x'") % (flag)
1801 message = _("missing processor for flag '%#x'") % (flag)
1803 raise RevlogError(message)
1802 raise RevlogError(message)
1804
1803
1805 processor = _flagprocessors[flag]
1804 processor = _flagprocessors[flag]
1806 if processor is not None:
1805 if processor is not None:
1807 readtransform, writetransform, rawtransform = processor
1806 readtransform, writetransform, rawtransform = processor
1808
1807
1809 if raw:
1808 if raw:
1810 vhash = rawtransform(self, text)
1809 vhash = rawtransform(self, text)
1811 elif operation == 'read':
1810 elif operation == 'read':
1812 text, vhash = readtransform(self, text)
1811 text, vhash = readtransform(self, text)
1813 else: # write operation
1812 else: # write operation
1814 text, vhash = writetransform(self, text)
1813 text, vhash = writetransform(self, text)
1815 validatehash = validatehash and vhash
1814 validatehash = validatehash and vhash
1816
1815
1817 return text, validatehash
1816 return text, validatehash
1818
1817
1819 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1820 """Check node hash integrity.
1819 """Check node hash integrity.
1821
1820
1822 Available as a function so that subclasses can extend hash mismatch
1821 Available as a function so that subclasses can extend hash mismatch
1823 behaviors as needed.
1822 behaviors as needed.
1824 """
1823 """
1825 if p1 is None and p2 is None:
1824 if p1 is None and p2 is None:
1826 p1, p2 = self.parents(node)
1825 p1, p2 = self.parents(node)
1827 if node != self.hash(text, p1, p2):
1826 if node != self.hash(text, p1, p2):
1828 revornode = rev
1827 revornode = rev
1829 if revornode is None:
1828 if revornode is None:
1830 revornode = templatefilters.short(hex(node))
1829 revornode = templatefilters.short(hex(node))
1831 raise RevlogError(_("integrity check failed on %s:%s")
1830 raise RevlogError(_("integrity check failed on %s:%s")
1832 % (self.indexfile, pycompat.bytestr(revornode)))
1831 % (self.indexfile, pycompat.bytestr(revornode)))
1833
1832
1834 def checkinlinesize(self, tr, fp=None):
1833 def checkinlinesize(self, tr, fp=None):
1835 """Check if the revlog is too big for inline and convert if so.
1834 """Check if the revlog is too big for inline and convert if so.
1836
1835
1837 This should be called after revisions are added to the revlog. If the
1836 This should be called after revisions are added to the revlog. If the
1838 revlog has grown too large to be an inline revlog, it will convert it
1837 revlog has grown too large to be an inline revlog, it will convert it
1839 to use multiple index and data files.
1838 to use multiple index and data files.
1840 """
1839 """
1841 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1840 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1842 return
1841 return
1843
1842
1844 trinfo = tr.find(self.indexfile)
1843 trinfo = tr.find(self.indexfile)
1845 if trinfo is None:
1844 if trinfo is None:
1846 raise RevlogError(_("%s not found in the transaction")
1845 raise RevlogError(_("%s not found in the transaction")
1847 % self.indexfile)
1846 % self.indexfile)
1848
1847
1849 trindex = trinfo[2]
1848 trindex = trinfo[2]
1850 if trindex is not None:
1849 if trindex is not None:
1851 dataoff = self.start(trindex)
1850 dataoff = self.start(trindex)
1852 else:
1851 else:
1853 # revlog was stripped at start of transaction, use all leftover data
1852 # revlog was stripped at start of transaction, use all leftover data
1854 trindex = len(self) - 1
1853 trindex = len(self) - 1
1855 dataoff = self.end(-2)
1854 dataoff = self.end(-2)
1856
1855
1857 tr.add(self.datafile, dataoff)
1856 tr.add(self.datafile, dataoff)
1858
1857
1859 if fp:
1858 if fp:
1860 fp.flush()
1859 fp.flush()
1861 fp.close()
1860 fp.close()
1862
1861
1863 df = self._datafp('w')
1862 df = self._datafp('w')
1864 try:
1863 try:
1865 for r in self:
1864 for r in self:
1866 df.write(self._getsegmentforrevs(r, r)[1])
1865 df.write(self._getsegmentforrevs(r, r)[1])
1867 finally:
1866 finally:
1868 df.close()
1867 df.close()
1869
1868
1870 fp = self._indexfp('w')
1869 fp = self._indexfp('w')
1871 self.version &= ~FLAG_INLINE_DATA
1870 self.version &= ~FLAG_INLINE_DATA
1872 self._inline = False
1871 self._inline = False
1873 for i in self:
1872 for i in self:
1874 e = self._io.packentry(self.index[i], self.node, self.version, i)
1873 e = self._io.packentry(self.index[i], self.node, self.version, i)
1875 fp.write(e)
1874 fp.write(e)
1876
1875
1877 # if we don't call close, the temp file will never replace the
1876 # if we don't call close, the temp file will never replace the
1878 # real index
1877 # real index
1879 fp.close()
1878 fp.close()
1880
1879
1881 tr.replace(self.indexfile, trindex * self._io.size)
1880 tr.replace(self.indexfile, trindex * self._io.size)
1882 self._chunkclear()
1881 self._chunkclear()
1883
1882
1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1883 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1884 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1886 """add a revision to the log
1885 """add a revision to the log
1887
1886
1888 text - the revision data to add
1887 text - the revision data to add
1889 transaction - the transaction object used for rollback
1888 transaction - the transaction object used for rollback
1890 link - the linkrev data to add
1889 link - the linkrev data to add
1891 p1, p2 - the parent nodeids of the revision
1890 p1, p2 - the parent nodeids of the revision
1892 cachedelta - an optional precomputed delta
1891 cachedelta - an optional precomputed delta
1893 node - nodeid of revision; typically node is not specified, and it is
1892 node - nodeid of revision; typically node is not specified, and it is
1894 computed by default as hash(text, p1, p2), however subclasses might
1893 computed by default as hash(text, p1, p2), however subclasses might
1895 use different hashing method (and override checkhash() in such case)
1894 use different hashing method (and override checkhash() in such case)
1896 flags - the known flags to set on the revision
1895 flags - the known flags to set on the revision
1897 deltacomputer - an optional _deltacomputer instance shared between
1896 deltacomputer - an optional _deltacomputer instance shared between
1898 multiple calls
1897 multiple calls
1899 """
1898 """
1900 if link == nullrev:
1899 if link == nullrev:
1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1900 raise RevlogError(_("attempted to add linkrev -1 to %s")
1902 % self.indexfile)
1901 % self.indexfile)
1903
1902
1904 if flags:
1903 if flags:
1905 node = node or self.hash(text, p1, p2)
1904 node = node or self.hash(text, p1, p2)
1906
1905
1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1906 rawtext, validatehash = self._processflags(text, flags, 'write')
1908
1907
1909 # If the flag processor modifies the revision data, ignore any provided
1908 # If the flag processor modifies the revision data, ignore any provided
1910 # cachedelta.
1909 # cachedelta.
1911 if rawtext != text:
1910 if rawtext != text:
1912 cachedelta = None
1911 cachedelta = None
1913
1912
1914 if len(rawtext) > _maxentrysize:
1913 if len(rawtext) > _maxentrysize:
1915 raise RevlogError(
1914 raise RevlogError(
1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1915 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1917 % (self.indexfile, len(rawtext)))
1916 % (self.indexfile, len(rawtext)))
1918
1917
1919 node = node or self.hash(rawtext, p1, p2)
1918 node = node or self.hash(rawtext, p1, p2)
1920 if node in self.nodemap:
1919 if node in self.nodemap:
1921 return node
1920 return node
1922
1921
1923 if validatehash:
1922 if validatehash:
1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1923 self.checkhash(rawtext, node, p1=p1, p2=p2)
1925
1924
1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1925 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1927 flags, cachedelta=cachedelta,
1926 flags, cachedelta=cachedelta,
1928 deltacomputer=deltacomputer)
1927 deltacomputer=deltacomputer)
1929
1928
1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1929 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1931 cachedelta=None, deltacomputer=None):
1930 cachedelta=None, deltacomputer=None):
1932 """add a raw revision with known flags, node and parents
1931 """add a raw revision with known flags, node and parents
1933 useful when reusing a revision not stored in this revlog (ex: received
1932 useful when reusing a revision not stored in this revlog (ex: received
1934 over wire, or read from an external bundle).
1933 over wire, or read from an external bundle).
1935 """
1934 """
1936 dfh = None
1935 dfh = None
1937 if not self._inline:
1936 if not self._inline:
1938 dfh = self._datafp("a+")
1937 dfh = self._datafp("a+")
1939 ifh = self._indexfp("a+")
1938 ifh = self._indexfp("a+")
1940 try:
1939 try:
1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1940 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1942 flags, cachedelta, ifh, dfh,
1941 flags, cachedelta, ifh, dfh,
1943 deltacomputer=deltacomputer)
1942 deltacomputer=deltacomputer)
1944 finally:
1943 finally:
1945 if dfh:
1944 if dfh:
1946 dfh.close()
1945 dfh.close()
1947 ifh.close()
1946 ifh.close()
1948
1947
1949 def compress(self, data):
1948 def compress(self, data):
1950 """Generate a possibly-compressed representation of data."""
1949 """Generate a possibly-compressed representation of data."""
1951 if not data:
1950 if not data:
1952 return '', data
1951 return '', data
1953
1952
1954 compressed = self._compressor.compress(data)
1953 compressed = self._compressor.compress(data)
1955
1954
1956 if compressed:
1955 if compressed:
1957 # The revlog compressor added the header in the returned data.
1956 # The revlog compressor added the header in the returned data.
1958 return '', compressed
1957 return '', compressed
1959
1958
1960 if data[0:1] == '\0':
1959 if data[0:1] == '\0':
1961 return '', data
1960 return '', data
1962 return 'u', data
1961 return 'u', data
1963
1962
1964 def decompress(self, data):
1963 def decompress(self, data):
1965 """Decompress a revlog chunk.
1964 """Decompress a revlog chunk.
1966
1965
1967 The chunk is expected to begin with a header identifying the
1966 The chunk is expected to begin with a header identifying the
1968 format type so it can be routed to an appropriate decompressor.
1967 format type so it can be routed to an appropriate decompressor.
1969 """
1968 """
1970 if not data:
1969 if not data:
1971 return data
1970 return data
1972
1971
1973 # Revlogs are read much more frequently than they are written and many
1972 # Revlogs are read much more frequently than they are written and many
1974 # chunks only take microseconds to decompress, so performance is
1973 # chunks only take microseconds to decompress, so performance is
1975 # important here.
1974 # important here.
1976 #
1975 #
1977 # We can make a few assumptions about revlogs:
1976 # We can make a few assumptions about revlogs:
1978 #
1977 #
1979 # 1) the majority of chunks will be compressed (as opposed to inline
1978 # 1) the majority of chunks will be compressed (as opposed to inline
1980 # raw data).
1979 # raw data).
1981 # 2) decompressing *any* data will likely by at least 10x slower than
1980 # 2) decompressing *any* data will likely by at least 10x slower than
1982 # returning raw inline data.
1981 # returning raw inline data.
1983 # 3) we want to prioritize common and officially supported compression
1982 # 3) we want to prioritize common and officially supported compression
1984 # engines
1983 # engines
1985 #
1984 #
1986 # It follows that we want to optimize for "decompress compressed data
1985 # It follows that we want to optimize for "decompress compressed data
1987 # when encoded with common and officially supported compression engines"
1986 # when encoded with common and officially supported compression engines"
1988 # case over "raw data" and "data encoded by less common or non-official
1987 # case over "raw data" and "data encoded by less common or non-official
1989 # compression engines." That is why we have the inline lookup first
1988 # compression engines." That is why we have the inline lookup first
1990 # followed by the compengines lookup.
1989 # followed by the compengines lookup.
1991 #
1990 #
1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1991 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1993 # compressed chunks. And this matters for changelog and manifest reads.
1992 # compressed chunks. And this matters for changelog and manifest reads.
1994 t = data[0:1]
1993 t = data[0:1]
1995
1994
1996 if t == 'x':
1995 if t == 'x':
1997 try:
1996 try:
1998 return _zlibdecompress(data)
1997 return _zlibdecompress(data)
1999 except zlib.error as e:
1998 except zlib.error as e:
2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
1999 raise RevlogError(_('revlog decompress error: %s') % str(e))
2001 # '\0' is more common than 'u' so it goes first.
2000 # '\0' is more common than 'u' so it goes first.
2002 elif t == '\0':
2001 elif t == '\0':
2003 return data
2002 return data
2004 elif t == 'u':
2003 elif t == 'u':
2005 return util.buffer(data, 1)
2004 return util.buffer(data, 1)
2006
2005
2007 try:
2006 try:
2008 compressor = self._decompressors[t]
2007 compressor = self._decompressors[t]
2009 except KeyError:
2008 except KeyError:
2010 try:
2009 try:
2011 engine = util.compengines.forrevlogheader(t)
2010 engine = util.compengines.forrevlogheader(t)
2012 compressor = engine.revlogcompressor()
2011 compressor = engine.revlogcompressor()
2013 self._decompressors[t] = compressor
2012 self._decompressors[t] = compressor
2014 except KeyError:
2013 except KeyError:
2015 raise RevlogError(_('unknown compression type %r') % t)
2014 raise RevlogError(_('unknown compression type %r') % t)
2016
2015
2017 return compressor.decompress(data)
2016 return compressor.decompress(data)
2018
2017
2019 def _isgooddeltainfo(self, d, textlen):
2018 def _isgooddeltainfo(self, d, textlen):
2020 """Returns True if the given delta is good. Good means that it is within
2019 """Returns True if the given delta is good. Good means that it is within
2021 the disk span, disk size, and chain length bounds that we know to be
2020 the disk span, disk size, and chain length bounds that we know to be
2022 performant."""
2021 performant."""
2023 if d is None:
2022 if d is None:
2024 return False
2023 return False
2025
2024
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2025 # - 'd.distance' is the distance from the base revision -- bounding it
2027 # limits the amount of I/O we need to do.
2026 # limits the amount of I/O we need to do.
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2027 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2028 # need to apply -- bounding it limits the amount of CPU we consume.
2030
2029
2031 defaultmax = textlen * 4
2030 defaultmax = textlen * 4
2032 maxdist = self._maxdeltachainspan
2031 maxdist = self._maxdeltachainspan
2033 if not maxdist:
2032 if not maxdist:
2034 maxdist = d.distance # ensure the conditional pass
2033 maxdist = d.distance # ensure the conditional pass
2035 maxdist = max(maxdist, defaultmax)
2034 maxdist = max(maxdist, defaultmax)
2036 if (d.distance > maxdist or d.deltalen > textlen or
2035 if (d.distance > maxdist or d.deltalen > textlen or
2037 d.compresseddeltalen > textlen * 2 or
2036 d.compresseddeltalen > textlen * 2 or
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2037 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2039 return False
2038 return False
2040
2039
2041 return True
2040 return True
2042
2041
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2042 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2044 cachedelta, ifh, dfh, alwayscache=False,
2043 cachedelta, ifh, dfh, alwayscache=False,
2045 deltacomputer=None):
2044 deltacomputer=None):
2046 """internal function to add revisions to the log
2045 """internal function to add revisions to the log
2047
2046
2048 see addrevision for argument descriptions.
2047 see addrevision for argument descriptions.
2049
2048
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2049 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2051
2050
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2051 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2053 be used.
2052 be used.
2054
2053
2055 invariants:
2054 invariants:
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2055 - rawtext is optional (can be None); if not set, cachedelta must be set.
2057 if both are set, they must correspond to each other.
2056 if both are set, they must correspond to each other.
2058 """
2057 """
2059 if node == nullid:
2058 if node == nullid:
2060 raise RevlogError(_("%s: attempt to add null revision") %
2059 raise RevlogError(_("%s: attempt to add null revision") %
2061 (self.indexfile))
2060 (self.indexfile))
2062 if node == wdirid:
2061 if node == wdirid:
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2062 raise RevlogError(_("%s: attempt to add wdir revision") %
2064 (self.indexfile))
2063 (self.indexfile))
2065
2064
2066 if self._inline:
2065 if self._inline:
2067 fh = ifh
2066 fh = ifh
2068 else:
2067 else:
2069 fh = dfh
2068 fh = dfh
2070
2069
2071 btext = [rawtext]
2070 btext = [rawtext]
2072
2071
2073 curr = len(self)
2072 curr = len(self)
2074 prev = curr - 1
2073 prev = curr - 1
2075 offset = self.end(prev)
2074 offset = self.end(prev)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2075 p1r, p2r = self.rev(p1), self.rev(p2)
2077
2076
2078 # full versions are inserted when the needed deltas
2077 # full versions are inserted when the needed deltas
2079 # become comparable to the uncompressed text
2078 # become comparable to the uncompressed text
2080 if rawtext is None:
2079 if rawtext is None:
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2080 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2082 cachedelta[1])
2081 cachedelta[1])
2083 else:
2082 else:
2084 textlen = len(rawtext)
2083 textlen = len(rawtext)
2085
2084
2086 if deltacomputer is None:
2085 if deltacomputer is None:
2087 deltacomputer = _deltacomputer(self)
2086 deltacomputer = _deltacomputer(self)
2088
2087
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2088 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2089 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2091
2090
2092 if deltainfo is not None:
2091 if deltainfo is not None:
2093 base = deltainfo.base
2092 base = deltainfo.base
2094 chainbase = deltainfo.chainbase
2093 chainbase = deltainfo.chainbase
2095 data = deltainfo.data
2094 data = deltainfo.data
2096 l = deltainfo.deltalen
2095 l = deltainfo.deltalen
2097 else:
2096 else:
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2097 rawtext = deltacomputer.buildtext(revinfo, fh)
2099 data = self.compress(rawtext)
2098 data = self.compress(rawtext)
2100 l = len(data[1]) + len(data[0])
2099 l = len(data[1]) + len(data[0])
2101 base = chainbase = curr
2100 base = chainbase = curr
2102
2101
2103 e = (offset_type(offset, flags), l, textlen,
2102 e = (offset_type(offset, flags), l, textlen,
2104 base, link, p1r, p2r, node)
2103 base, link, p1r, p2r, node)
2105 self.index.insert(-1, e)
2104 self.index.insert(-1, e)
2106 self.nodemap[node] = curr
2105 self.nodemap[node] = curr
2107
2106
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2107 entry = self._io.packentry(e, self.node, self.version, curr)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2108 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2110
2109
2111 if alwayscache and rawtext is None:
2110 if alwayscache and rawtext is None:
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2111 rawtext = deltacomputer._buildtext(revinfo, fh)
2113
2112
2114 if type(rawtext) == bytes: # only accept immutable objects
2113 if type(rawtext) == bytes: # only accept immutable objects
2115 self._cache = (node, curr, rawtext)
2114 self._cache = (node, curr, rawtext)
2116 self._chainbasecache[curr] = chainbase
2115 self._chainbasecache[curr] = chainbase
2117 return node
2116 return node
2118
2117
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2118 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2120 # Files opened in a+ mode have inconsistent behavior on various
2119 # Files opened in a+ mode have inconsistent behavior on various
2121 # platforms. Windows requires that a file positioning call be made
2120 # platforms. Windows requires that a file positioning call be made
2122 # when the file handle transitions between reads and writes. See
2121 # when the file handle transitions between reads and writes. See
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2124 # platforms, Python or the platform itself can be buggy. Some versions
2123 # platforms, Python or the platform itself can be buggy. Some versions
2125 # of Solaris have been observed to not append at the end of the file
2124 # of Solaris have been observed to not append at the end of the file
2126 # if the file was seeked to before the end. See issue4943 for more.
2125 # if the file was seeked to before the end. See issue4943 for more.
2127 #
2126 #
2128 # We work around this issue by inserting a seek() before writing.
2127 # We work around this issue by inserting a seek() before writing.
2129 # Note: This is likely not necessary on Python 3.
2128 # Note: This is likely not necessary on Python 3.
2130 ifh.seek(0, os.SEEK_END)
2129 ifh.seek(0, os.SEEK_END)
2131 if dfh:
2130 if dfh:
2132 dfh.seek(0, os.SEEK_END)
2131 dfh.seek(0, os.SEEK_END)
2133
2132
2134 curr = len(self) - 1
2133 curr = len(self) - 1
2135 if not self._inline:
2134 if not self._inline:
2136 transaction.add(self.datafile, offset)
2135 transaction.add(self.datafile, offset)
2137 transaction.add(self.indexfile, curr * len(entry))
2136 transaction.add(self.indexfile, curr * len(entry))
2138 if data[0]:
2137 if data[0]:
2139 dfh.write(data[0])
2138 dfh.write(data[0])
2140 dfh.write(data[1])
2139 dfh.write(data[1])
2141 ifh.write(entry)
2140 ifh.write(entry)
2142 else:
2141 else:
2143 offset += curr * self._io.size
2142 offset += curr * self._io.size
2144 transaction.add(self.indexfile, offset, curr)
2143 transaction.add(self.indexfile, offset, curr)
2145 ifh.write(entry)
2144 ifh.write(entry)
2146 ifh.write(data[0])
2145 ifh.write(data[0])
2147 ifh.write(data[1])
2146 ifh.write(data[1])
2148 self.checkinlinesize(transaction, ifh)
2147 self.checkinlinesize(transaction, ifh)
2149
2148
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2149 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2151 """
2150 """
2152 add a delta group
2151 add a delta group
2153
2152
2154 given a set of deltas, add them to the revision log. the
2153 given a set of deltas, add them to the revision log. the
2155 first delta is against its parent, which should be in our
2154 first delta is against its parent, which should be in our
2156 log, the rest are against the previous delta.
2155 log, the rest are against the previous delta.
2157
2156
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2157 If ``addrevisioncb`` is defined, it will be called with arguments of
2159 this revlog and the node that was added.
2158 this revlog and the node that was added.
2160 """
2159 """
2161
2160
2162 nodes = []
2161 nodes = []
2163
2162
2164 r = len(self)
2163 r = len(self)
2165 end = 0
2164 end = 0
2166 if r:
2165 if r:
2167 end = self.end(r - 1)
2166 end = self.end(r - 1)
2168 ifh = self._indexfp("a+")
2167 ifh = self._indexfp("a+")
2169 isize = r * self._io.size
2168 isize = r * self._io.size
2170 if self._inline:
2169 if self._inline:
2171 transaction.add(self.indexfile, end + isize, r)
2170 transaction.add(self.indexfile, end + isize, r)
2172 dfh = None
2171 dfh = None
2173 else:
2172 else:
2174 transaction.add(self.indexfile, isize, r)
2173 transaction.add(self.indexfile, isize, r)
2175 transaction.add(self.datafile, end)
2174 transaction.add(self.datafile, end)
2176 dfh = self._datafp("a+")
2175 dfh = self._datafp("a+")
2177 def flush():
2176 def flush():
2178 if dfh:
2177 if dfh:
2179 dfh.flush()
2178 dfh.flush()
2180 ifh.flush()
2179 ifh.flush()
2181 try:
2180 try:
2182 deltacomputer = _deltacomputer(self)
2181 deltacomputer = _deltacomputer(self)
2183 # loop through our set of deltas
2182 # loop through our set of deltas
2184 for data in deltas:
2183 for data in deltas:
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2184 node, p1, p2, linknode, deltabase, delta, flags = data
2186 link = linkmapper(linknode)
2185 link = linkmapper(linknode)
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2186 flags = flags or REVIDX_DEFAULT_FLAGS
2188
2187
2189 nodes.append(node)
2188 nodes.append(node)
2190
2189
2191 if node in self.nodemap:
2190 if node in self.nodemap:
2192 # this can happen if two branches make the same change
2191 # this can happen if two branches make the same change
2193 continue
2192 continue
2194
2193
2195 for p in (p1, p2):
2194 for p in (p1, p2):
2196 if p not in self.nodemap:
2195 if p not in self.nodemap:
2197 raise LookupError(p, self.indexfile,
2196 raise LookupError(p, self.indexfile,
2198 _('unknown parent'))
2197 _('unknown parent'))
2199
2198
2200 if deltabase not in self.nodemap:
2199 if deltabase not in self.nodemap:
2201 raise LookupError(deltabase, self.indexfile,
2200 raise LookupError(deltabase, self.indexfile,
2202 _('unknown delta base'))
2201 _('unknown delta base'))
2203
2202
2204 baserev = self.rev(deltabase)
2203 baserev = self.rev(deltabase)
2205
2204
2206 if baserev != nullrev and self.iscensored(baserev):
2205 if baserev != nullrev and self.iscensored(baserev):
2207 # if base is censored, delta must be full replacement in a
2206 # if base is censored, delta must be full replacement in a
2208 # single patch operation
2207 # single patch operation
2209 hlen = struct.calcsize(">lll")
2208 hlen = struct.calcsize(">lll")
2210 oldlen = self.rawsize(baserev)
2209 oldlen = self.rawsize(baserev)
2211 newlen = len(delta) - hlen
2210 newlen = len(delta) - hlen
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2211 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2213 raise error.CensoredBaseError(self.indexfile,
2212 raise error.CensoredBaseError(self.indexfile,
2214 self.node(baserev))
2213 self.node(baserev))
2215
2214
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2215 if not flags and self._peek_iscensored(baserev, delta, flush):
2217 flags |= REVIDX_ISCENSORED
2216 flags |= REVIDX_ISCENSORED
2218
2217
2219 # We assume consumers of addrevisioncb will want to retrieve
2218 # We assume consumers of addrevisioncb will want to retrieve
2220 # the added revision, which will require a call to
2219 # the added revision, which will require a call to
2221 # revision(). revision() will fast path if there is a cache
2220 # revision(). revision() will fast path if there is a cache
2222 # hit. So, we tell _addrevision() to always cache in this case.
2221 # hit. So, we tell _addrevision() to always cache in this case.
2223 # We're only using addgroup() in the context of changegroup
2222 # We're only using addgroup() in the context of changegroup
2224 # generation so the revision data can always be handled as raw
2223 # generation so the revision data can always be handled as raw
2225 # by the flagprocessor.
2224 # by the flagprocessor.
2226 self._addrevision(node, None, transaction, link,
2225 self._addrevision(node, None, transaction, link,
2227 p1, p2, flags, (baserev, delta),
2226 p1, p2, flags, (baserev, delta),
2228 ifh, dfh,
2227 ifh, dfh,
2229 alwayscache=bool(addrevisioncb),
2228 alwayscache=bool(addrevisioncb),
2230 deltacomputer=deltacomputer)
2229 deltacomputer=deltacomputer)
2231
2230
2232 if addrevisioncb:
2231 if addrevisioncb:
2233 addrevisioncb(self, node)
2232 addrevisioncb(self, node)
2234
2233
2235 if not dfh and not self._inline:
2234 if not dfh and not self._inline:
2236 # addrevision switched from inline to conventional
2235 # addrevision switched from inline to conventional
2237 # reopen the index
2236 # reopen the index
2238 ifh.close()
2237 ifh.close()
2239 dfh = self._datafp("a+")
2238 dfh = self._datafp("a+")
2240 ifh = self._indexfp("a+")
2239 ifh = self._indexfp("a+")
2241 finally:
2240 finally:
2242 if dfh:
2241 if dfh:
2243 dfh.close()
2242 dfh.close()
2244 ifh.close()
2243 ifh.close()
2245
2244
2246 return nodes
2245 return nodes
2247
2246
2248 def iscensored(self, rev):
2247 def iscensored(self, rev):
2249 """Check if a file revision is censored."""
2248 """Check if a file revision is censored."""
2250 return False
2249 return False
2251
2250
2252 def _peek_iscensored(self, baserev, delta, flush):
2251 def _peek_iscensored(self, baserev, delta, flush):
2253 """Quickly check if a delta produces a censored revision."""
2252 """Quickly check if a delta produces a censored revision."""
2254 return False
2253 return False
2255
2254
2256 def getstrippoint(self, minlink):
2255 def getstrippoint(self, minlink):
2257 """find the minimum rev that must be stripped to strip the linkrev
2256 """find the minimum rev that must be stripped to strip the linkrev
2258
2257
2259 Returns a tuple containing the minimum rev and a set of all revs that
2258 Returns a tuple containing the minimum rev and a set of all revs that
2260 have linkrevs that will be broken by this strip.
2259 have linkrevs that will be broken by this strip.
2261 """
2260 """
2262 brokenrevs = set()
2261 brokenrevs = set()
2263 strippoint = len(self)
2262 strippoint = len(self)
2264
2263
2265 heads = {}
2264 heads = {}
2266 futurelargelinkrevs = set()
2265 futurelargelinkrevs = set()
2267 for head in self.headrevs():
2266 for head in self.headrevs():
2268 headlinkrev = self.linkrev(head)
2267 headlinkrev = self.linkrev(head)
2269 heads[head] = headlinkrev
2268 heads[head] = headlinkrev
2270 if headlinkrev >= minlink:
2269 if headlinkrev >= minlink:
2271 futurelargelinkrevs.add(headlinkrev)
2270 futurelargelinkrevs.add(headlinkrev)
2272
2271
2273 # This algorithm involves walking down the rev graph, starting at the
2272 # This algorithm involves walking down the rev graph, starting at the
2274 # heads. Since the revs are topologically sorted according to linkrev,
2273 # heads. Since the revs are topologically sorted according to linkrev,
2275 # once all head linkrevs are below the minlink, we know there are
2274 # once all head linkrevs are below the minlink, we know there are
2276 # no more revs that could have a linkrev greater than minlink.
2275 # no more revs that could have a linkrev greater than minlink.
2277 # So we can stop walking.
2276 # So we can stop walking.
2278 while futurelargelinkrevs:
2277 while futurelargelinkrevs:
2279 strippoint -= 1
2278 strippoint -= 1
2280 linkrev = heads.pop(strippoint)
2279 linkrev = heads.pop(strippoint)
2281
2280
2282 if linkrev < minlink:
2281 if linkrev < minlink:
2283 brokenrevs.add(strippoint)
2282 brokenrevs.add(strippoint)
2284 else:
2283 else:
2285 futurelargelinkrevs.remove(linkrev)
2284 futurelargelinkrevs.remove(linkrev)
2286
2285
2287 for p in self.parentrevs(strippoint):
2286 for p in self.parentrevs(strippoint):
2288 if p != nullrev:
2287 if p != nullrev:
2289 plinkrev = self.linkrev(p)
2288 plinkrev = self.linkrev(p)
2290 heads[p] = plinkrev
2289 heads[p] = plinkrev
2291 if plinkrev >= minlink:
2290 if plinkrev >= minlink:
2292 futurelargelinkrevs.add(plinkrev)
2291 futurelargelinkrevs.add(plinkrev)
2293
2292
2294 return strippoint, brokenrevs
2293 return strippoint, brokenrevs
2295
2294
2296 def strip(self, minlink, transaction):
2295 def strip(self, minlink, transaction):
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2296 """truncate the revlog on the first revision with a linkrev >= minlink
2298
2297
2299 This function is called when we're stripping revision minlink and
2298 This function is called when we're stripping revision minlink and
2300 its descendants from the repository.
2299 its descendants from the repository.
2301
2300
2302 We have to remove all revisions with linkrev >= minlink, because
2301 We have to remove all revisions with linkrev >= minlink, because
2303 the equivalent changelog revisions will be renumbered after the
2302 the equivalent changelog revisions will be renumbered after the
2304 strip.
2303 strip.
2305
2304
2306 So we truncate the revlog on the first of these revisions, and
2305 So we truncate the revlog on the first of these revisions, and
2307 trust that the caller has saved the revisions that shouldn't be
2306 trust that the caller has saved the revisions that shouldn't be
2308 removed and that it'll re-add them after this truncation.
2307 removed and that it'll re-add them after this truncation.
2309 """
2308 """
2310 if len(self) == 0:
2309 if len(self) == 0:
2311 return
2310 return
2312
2311
2313 rev, _ = self.getstrippoint(minlink)
2312 rev, _ = self.getstrippoint(minlink)
2314 if rev == len(self):
2313 if rev == len(self):
2315 return
2314 return
2316
2315
2317 # first truncate the files on disk
2316 # first truncate the files on disk
2318 end = self.start(rev)
2317 end = self.start(rev)
2319 if not self._inline:
2318 if not self._inline:
2320 transaction.add(self.datafile, end)
2319 transaction.add(self.datafile, end)
2321 end = rev * self._io.size
2320 end = rev * self._io.size
2322 else:
2321 else:
2323 end += rev * self._io.size
2322 end += rev * self._io.size
2324
2323
2325 transaction.add(self.indexfile, end)
2324 transaction.add(self.indexfile, end)
2326
2325
2327 # then reset internal state in memory to forget those revisions
2326 # then reset internal state in memory to forget those revisions
2328 self._cache = None
2327 self._cache = None
2329 self._chaininfocache = {}
2328 self._chaininfocache = {}
2330 self._chunkclear()
2329 self._chunkclear()
2331 for x in xrange(rev, len(self)):
2330 for x in xrange(rev, len(self)):
2332 del self.nodemap[self.node(x)]
2331 del self.nodemap[self.node(x)]
2333
2332
2334 del self.index[rev:-1]
2333 del self.index[rev:-1]
2335
2334
2336 def checksize(self):
2335 def checksize(self):
2337 expected = 0
2336 expected = 0
2338 if len(self):
2337 if len(self):
2339 expected = max(0, self.end(len(self) - 1))
2338 expected = max(0, self.end(len(self) - 1))
2340
2339
2341 try:
2340 try:
2342 f = self._datafp()
2341 f = self._datafp()
2343 f.seek(0, 2)
2342 f.seek(0, 2)
2344 actual = f.tell()
2343 actual = f.tell()
2345 f.close()
2344 f.close()
2346 dd = actual - expected
2345 dd = actual - expected
2347 except IOError as inst:
2346 except IOError as inst:
2348 if inst.errno != errno.ENOENT:
2347 if inst.errno != errno.ENOENT:
2349 raise
2348 raise
2350 dd = 0
2349 dd = 0
2351
2350
2352 try:
2351 try:
2353 f = self.opener(self.indexfile)
2352 f = self.opener(self.indexfile)
2354 f.seek(0, 2)
2353 f.seek(0, 2)
2355 actual = f.tell()
2354 actual = f.tell()
2356 f.close()
2355 f.close()
2357 s = self._io.size
2356 s = self._io.size
2358 i = max(0, actual // s)
2357 i = max(0, actual // s)
2359 di = actual - (i * s)
2358 di = actual - (i * s)
2360 if self._inline:
2359 if self._inline:
2361 databytes = 0
2360 databytes = 0
2362 for r in self:
2361 for r in self:
2363 databytes += max(0, self.length(r))
2362 databytes += max(0, self.length(r))
2364 dd = 0
2363 dd = 0
2365 di = actual - len(self) * s - databytes
2364 di = actual - len(self) * s - databytes
2366 except IOError as inst:
2365 except IOError as inst:
2367 if inst.errno != errno.ENOENT:
2366 if inst.errno != errno.ENOENT:
2368 raise
2367 raise
2369 di = 0
2368 di = 0
2370
2369
2371 return (dd, di)
2370 return (dd, di)
2372
2371
2373 def files(self):
2372 def files(self):
2374 res = [self.indexfile]
2373 res = [self.indexfile]
2375 if not self._inline:
2374 if not self._inline:
2376 res.append(self.datafile)
2375 res.append(self.datafile)
2377 return res
2376 return res
2378
2377
2379 DELTAREUSEALWAYS = 'always'
2378 DELTAREUSEALWAYS = 'always'
2380 DELTAREUSESAMEREVS = 'samerevs'
2379 DELTAREUSESAMEREVS = 'samerevs'
2381 DELTAREUSENEVER = 'never'
2380 DELTAREUSENEVER = 'never'
2382
2381
2383 DELTAREUSEFULLADD = 'fulladd'
2382 DELTAREUSEFULLADD = 'fulladd'
2384
2383
2385 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2386
2385
2387 def clone(self, tr, destrevlog, addrevisioncb=None,
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2388 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2389 """Copy this revlog to another, possibly with format changes.
2388 """Copy this revlog to another, possibly with format changes.
2390
2389
2391 The destination revlog will contain the same revisions and nodes.
2390 The destination revlog will contain the same revisions and nodes.
2392 However, it may not be bit-for-bit identical due to e.g. delta encoding
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2393 differences.
2392 differences.
2394
2393
2395 The ``deltareuse`` argument control how deltas from the existing revlog
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2396 are preserved in the destination revlog. The argument can have the
2395 are preserved in the destination revlog. The argument can have the
2397 following values:
2396 following values:
2398
2397
2399 DELTAREUSEALWAYS
2398 DELTAREUSEALWAYS
2400 Deltas will always be reused (if possible), even if the destination
2399 Deltas will always be reused (if possible), even if the destination
2401 revlog would not select the same revisions for the delta. This is the
2400 revlog would not select the same revisions for the delta. This is the
2402 fastest mode of operation.
2401 fastest mode of operation.
2403 DELTAREUSESAMEREVS
2402 DELTAREUSESAMEREVS
2404 Deltas will be reused if the destination revlog would pick the same
2403 Deltas will be reused if the destination revlog would pick the same
2405 revisions for the delta. This mode strikes a balance between speed
2404 revisions for the delta. This mode strikes a balance between speed
2406 and optimization.
2405 and optimization.
2407 DELTAREUSENEVER
2406 DELTAREUSENEVER
2408 Deltas will never be reused. This is the slowest mode of execution.
2407 Deltas will never be reused. This is the slowest mode of execution.
2409 This mode can be used to recompute deltas (e.g. if the diff/delta
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2410 algorithm changes).
2409 algorithm changes).
2411
2410
2412 Delta computation can be slow, so the choice of delta reuse policy can
2411 Delta computation can be slow, so the choice of delta reuse policy can
2413 significantly affect run time.
2412 significantly affect run time.
2414
2413
2415 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2416 two extremes. Deltas will be reused if they are appropriate. But if the
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2417 delta could choose a better revision, it will do so. This means if you
2416 delta could choose a better revision, it will do so. This means if you
2418 are converting a non-generaldelta revlog to a generaldelta revlog,
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2419 deltas will be recomputed if the delta's parent isn't a parent of the
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2420 revision.
2419 revision.
2421
2420
2422 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2423 controls whether to compute deltas against both parents for merges.
2422 controls whether to compute deltas against both parents for merges.
2424 By default, the current default is used.
2423 By default, the current default is used.
2425 """
2424 """
2426 if deltareuse not in self.DELTAREUSEALL:
2425 if deltareuse not in self.DELTAREUSEALL:
2427 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2428
2427
2429 if len(destrevlog):
2428 if len(destrevlog):
2430 raise ValueError(_('destination revlog is not empty'))
2429 raise ValueError(_('destination revlog is not empty'))
2431
2430
2432 if getattr(self, 'filteredrevs', None):
2431 if getattr(self, 'filteredrevs', None):
2433 raise ValueError(_('source revlog has filtered revisions'))
2432 raise ValueError(_('source revlog has filtered revisions'))
2434 if getattr(destrevlog, 'filteredrevs', None):
2433 if getattr(destrevlog, 'filteredrevs', None):
2435 raise ValueError(_('destination revlog has filtered revisions'))
2434 raise ValueError(_('destination revlog has filtered revisions'))
2436
2435
2437 # lazydeltabase controls whether to reuse a cached delta, if possible.
2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2438 oldlazydeltabase = destrevlog._lazydeltabase
2437 oldlazydeltabase = destrevlog._lazydeltabase
2439 oldamd = destrevlog._aggressivemergedeltas
2438 oldamd = destrevlog._aggressivemergedeltas
2440
2439
2441 try:
2440 try:
2442 if deltareuse == self.DELTAREUSEALWAYS:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2443 destrevlog._lazydeltabase = True
2442 destrevlog._lazydeltabase = True
2444 elif deltareuse == self.DELTAREUSESAMEREVS:
2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2445 destrevlog._lazydeltabase = False
2444 destrevlog._lazydeltabase = False
2446
2445
2447 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2448
2447
2449 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2450 self.DELTAREUSESAMEREVS)
2449 self.DELTAREUSESAMEREVS)
2451
2450
2452 deltacomputer = _deltacomputer(destrevlog)
2451 deltacomputer = _deltacomputer(destrevlog)
2453 index = self.index
2452 index = self.index
2454 for rev in self:
2453 for rev in self:
2455 entry = index[rev]
2454 entry = index[rev]
2456
2455
2457 # Some classes override linkrev to take filtered revs into
2456 # Some classes override linkrev to take filtered revs into
2458 # account. Use raw entry from index.
2457 # account. Use raw entry from index.
2459 flags = entry[0] & 0xffff
2458 flags = entry[0] & 0xffff
2460 linkrev = entry[4]
2459 linkrev = entry[4]
2461 p1 = index[entry[5]][7]
2460 p1 = index[entry[5]][7]
2462 p2 = index[entry[6]][7]
2461 p2 = index[entry[6]][7]
2463 node = entry[7]
2462 node = entry[7]
2464
2463
2465 # (Possibly) reuse the delta from the revlog if allowed and
2464 # (Possibly) reuse the delta from the revlog if allowed and
2466 # the revlog chunk is a delta.
2465 # the revlog chunk is a delta.
2467 cachedelta = None
2466 cachedelta = None
2468 rawtext = None
2467 rawtext = None
2469 if populatecachedelta:
2468 if populatecachedelta:
2470 dp = self.deltaparent(rev)
2469 dp = self.deltaparent(rev)
2471 if dp != nullrev:
2470 if dp != nullrev:
2472 cachedelta = (dp, str(self._chunk(rev)))
2471 cachedelta = (dp, str(self._chunk(rev)))
2473
2472
2474 if not cachedelta:
2473 if not cachedelta:
2475 rawtext = self.revision(rev, raw=True)
2474 rawtext = self.revision(rev, raw=True)
2476
2475
2477
2476
2478 if deltareuse == self.DELTAREUSEFULLADD:
2477 if deltareuse == self.DELTAREUSEFULLADD:
2479 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2480 cachedelta=cachedelta,
2479 cachedelta=cachedelta,
2481 node=node, flags=flags,
2480 node=node, flags=flags,
2482 deltacomputer=deltacomputer)
2481 deltacomputer=deltacomputer)
2483 else:
2482 else:
2484 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2485 checkambig=False)
2484 checkambig=False)
2486 dfh = None
2485 dfh = None
2487 if not destrevlog._inline:
2486 if not destrevlog._inline:
2488 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2489 try:
2488 try:
2490 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2491 p2, flags, cachedelta, ifh, dfh,
2490 p2, flags, cachedelta, ifh, dfh,
2492 deltacomputer=deltacomputer)
2491 deltacomputer=deltacomputer)
2493 finally:
2492 finally:
2494 if dfh:
2493 if dfh:
2495 dfh.close()
2494 dfh.close()
2496 ifh.close()
2495 ifh.close()
2497
2496
2498 if addrevisioncb:
2497 if addrevisioncb:
2499 addrevisioncb(self, rev, node)
2498 addrevisioncb(self, rev, node)
2500 finally:
2499 finally:
2501 destrevlog._lazydeltabase = oldlazydeltabase
2500 destrevlog._lazydeltabase = oldlazydeltabase
2502 destrevlog._aggressivemergedeltas = oldamd
2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now