##// END OF EJS Templates
revlog: use context manager for data file lifetime in checksize...
Boris Feld -
r35990:82afb1a5 default
parent child Browse files
Show More
@@ -1,2498 +1,2497
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
80
81 # max size of revlog with inline data
81 # max size of revlog with inline data
82 _maxinline = 131072
82 _maxinline = 131072
83 _chunksize = 1048576
83 _chunksize = 1048576
84
84
85 RevlogError = error.RevlogError
85 RevlogError = error.RevlogError
86 LookupError = error.LookupError
86 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
87 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
88 ProgrammingError = error.ProgrammingError
89
89
90 # Store flag processors (cf. 'addflagprocessor()' to register)
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
91 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
92 REVIDX_ISCENSORED: None,
93 }
93 }
94
94
95 def addflagprocessor(flag, processor):
95 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
96 """Register a flag processor on a revision data flag.
97
97
98 Invariant:
98 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
100 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
102 following signatures:
103 - (read) f(self, rawtext) -> text, bool
103 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
105 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
107 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
108 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
109 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
110 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
111 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
112 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
113 different booleans. And "raw" returns a same boolean as "write".
114
114
115 Note: The 'raw' transform is used for changegroup generation and in some
115 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
116 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
117 contents can be used for hash integrity checks.
118 """
118 """
119 if not flag & REVIDX_KNOWN_FLAGS:
119 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
122 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
125 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
127 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
128 _flagprocessors[flag] = processor
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 _nullhash = hashlib.sha1(nullid)
141 _nullhash = hashlib.sha1(nullid)
142
142
143 def hash(text, p1, p2):
143 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
144 """generate a hash from the given text and its parent hashes
145
145
146 This hash combines both the current file contents and its history
146 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
147 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
148 content in the revision graph.
149 """
149 """
150 # As of now, if one of the parent node is null, p2 is null
150 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
151 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
152 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
153 s = _nullhash.copy()
154 s.update(p1)
154 s.update(p1)
155 else:
155 else:
156 # none of the parent nodes are nullid
156 # none of the parent nodes are nullid
157 if p1 < p2:
157 if p1 < p2:
158 a = p1
158 a = p1
159 b = p2
159 b = p2
160 else:
160 else:
161 a = p2
161 a = p2
162 b = p1
162 b = p1
163 s = hashlib.sha1(a)
163 s = hashlib.sha1(a)
164 s.update(b)
164 s.update(b)
165 s.update(text)
165 s.update(text)
166 return s.digest()
166 return s.digest()
167
167
168 def _trimchunk(revlog, revs, startidx, endidx=None):
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
169 """returns revs[startidx:endidx] without empty trailing revs
170 """
170 """
171 length = revlog.length
171 length = revlog.length
172
172
173 if endidx is None:
173 if endidx is None:
174 endidx = len(revs)
174 endidx = len(revs)
175
175
176 # Trim empty revs at the end, but never the very first revision of a chain
176 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
178 endidx -= 1
179
179
180 return revs[startidx:endidx]
180 return revs[startidx:endidx]
181
181
182 def _slicechunk(revlog, revs):
182 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184
184
185 ``revs`` is sliced into groups that should be read in one time.
185 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
186 Assume that revs are sorted.
187 """
187 """
188 start = revlog.start
188 start = revlog.start
189 length = revlog.length
189 length = revlog.length
190
190
191 if len(revs) <= 1:
191 if len(revs) <= 1:
192 yield revs
192 yield revs
193 return
193 return
194
194
195 startbyte = start(revs[0])
195 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
197 readdata = deltachainspan = endbyte - startbyte
198
198
199 chainpayload = sum(length(r) for r in revs)
199 chainpayload = sum(length(r) for r in revs)
200
200
201 if deltachainspan:
201 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
202 density = chainpayload / float(deltachainspan)
203 else:
203 else:
204 density = 1.0
204 density = 1.0
205
205
206 # Store the gaps in a heap to have them sorted by decreasing size
206 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
207 gapsheap = []
208 heapq.heapify(gapsheap)
208 heapq.heapify(gapsheap)
209 prevend = None
209 prevend = None
210 for i, rev in enumerate(revs):
210 for i, rev in enumerate(revs):
211 revstart = start(rev)
211 revstart = start(rev)
212 revlen = length(rev)
212 revlen = length(rev)
213
213
214 # Skip empty revisions to form larger holes
214 # Skip empty revisions to form larger holes
215 if revlen == 0:
215 if revlen == 0:
216 continue
216 continue
217
217
218 if prevend is not None:
218 if prevend is not None:
219 gapsize = revstart - prevend
219 gapsize = revstart - prevend
220 # only consider holes that are large enough
220 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
221 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
222 heapq.heappush(gapsheap, (-gapsize, i))
223
223
224 prevend = revstart + revlen
224 prevend = revstart + revlen
225
225
226 # Collect the indices of the largest holes until the density is acceptable
226 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
227 indicesheap = []
228 heapq.heapify(indicesheap)
228 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
229 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
231
232 heapq.heappush(indicesheap, gapidx)
232 heapq.heappush(indicesheap, gapidx)
233
233
234 # the gap sizes are stored as negatives to be sorted decreasingly
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
235 # by the heap
236 readdata -= (-oppgapsize)
236 readdata -= (-oppgapsize)
237 if readdata > 0:
237 if readdata > 0:
238 density = chainpayload / float(readdata)
238 density = chainpayload / float(readdata)
239 else:
239 else:
240 density = 1.0
240 density = 1.0
241
241
242 # Cut the revs at collected indices
242 # Cut the revs at collected indices
243 previdx = 0
243 previdx = 0
244 while indicesheap:
244 while indicesheap:
245 idx = heapq.heappop(indicesheap)
245 idx = heapq.heappop(indicesheap)
246
246
247 chunk = _trimchunk(revlog, revs, previdx, idx)
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
248 if chunk:
249 yield chunk
249 yield chunk
250
250
251 previdx = idx
251 previdx = idx
252
252
253 chunk = _trimchunk(revlog, revs, previdx)
253 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
254 if chunk:
255 yield chunk
255 yield chunk
256
256
257 @attr.s(slots=True, frozen=True)
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
258 class _deltainfo(object):
259 distance = attr.ib()
259 distance = attr.ib()
260 deltalen = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
265 compresseddeltalen = attr.ib()
266
266
267 class _deltacomputer(object):
267 class _deltacomputer(object):
268 def __init__(self, revlog):
268 def __init__(self, revlog):
269 self.revlog = revlog
269 self.revlog = revlog
270
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
272 """
273 Provides revisions that present an interest to be diffed against,
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
274 grouped by level of easiness.
275 """
275 """
276 revlog = self.revlog
276 revlog = self.revlog
277 curr = len(revlog)
277 curr = len(revlog)
278 prev = curr - 1
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
280
281 # should we try to build a delta?
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
283 tested = set()
284 # This condition is true most of the time when processing
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
292 tested.add(cachedelta[0])
293
293
294 if revlog._generaldelta:
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
301 parents = [max(parents)]
302 tested.update(parents)
302 tested.update(parents)
303 yield parents
303 yield parents
304
304
305 if prev not in tested:
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
307 # fulltext.
308 yield (prev,)
308 yield (prev,)
309
309
310 def buildtext(self, revinfo, fh):
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
311 """Builds a fulltext version of a revision
312
312
313 revinfo: _revisioninfo instance that contains all needed info
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
315 depending on whether it is inlined or not
316 """
316 """
317 btext = revinfo.btext
317 btext = revinfo.btext
318 if btext[0] is not None:
318 if btext[0] is not None:
319 return btext[0]
319 return btext[0]
320
320
321 revlog = self.revlog
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
323 flags = revinfo.flags
324 node = revinfo.node
324 node = revinfo.node
325
325
326 baserev = cachedelta[0]
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
334 btext[0] = delta[hlen:]
335 else:
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
337 btext[0] = mdiff.patch(basetext, delta)
338
338
339 try:
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
341 btext[0], validatehash = res
342 if validatehash:
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
348 if not flags & REVIDX_ISCENSORED:
349 raise
349 raise
350 return btext[0]
350 return btext[0]
351
351
352 def _builddeltadiff(self, base, revinfo, fh):
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
359 delta = header + t
360 else:
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
362 delta = mdiff.textdiff(ptext, t)
363
363
364 return delta
364 return delta
365
365
366 def _builddeltainfo(self, revinfo, base, fh):
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
369 delta = revinfo.cachedelta[1]
370 else:
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
378 if revlog._generaldelta:
379 deltabase = base
379 deltabase = base
380 else:
380 else:
381 deltabase = chainbase
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
383 chainlen += 1
384 compresseddeltalen += deltalen
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
386 chainbase, chainlen, compresseddeltalen)
387
387
388 def finddeltainfo(self, revinfo, fh):
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
389 """Find an acceptable delta against a candidate revision
390
390
391 revinfo: information about the revision (instance of _revisioninfo)
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
393 depending on whether it is inlined or not
394
394
395 Returns the first acceptable candidate revision, as ordered by
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
396 _getcandidaterevs
397 """
397 """
398 cachedelta = revinfo.cachedelta
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
399 p1 = revinfo.p1
400 p2 = revinfo.p2
400 p2 = revinfo.p2
401 revlog = self.revlog
401 revlog = self.revlog
402
402
403 deltainfo = None
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
412 break
413
413
414 return deltainfo
414 return deltainfo
415
415
416 @attr.s(slots=True, frozen=True)
416 @attr.s(slots=True, frozen=True)
417 class _revisioninfo(object):
417 class _revisioninfo(object):
418 """Information about a revision that allows building its fulltext
418 """Information about a revision that allows building its fulltext
419 node: expected hash of the revision
419 node: expected hash of the revision
420 p1, p2: parent revs of the revision
420 p1, p2: parent revs of the revision
421 btext: built text cache consisting of a one-element list
421 btext: built text cache consisting of a one-element list
422 cachedelta: (baserev, uncompressed_delta) or None
422 cachedelta: (baserev, uncompressed_delta) or None
423 flags: flags associated to the revision storage
423 flags: flags associated to the revision storage
424
424
425 One of btext[0] or cachedelta must be set.
425 One of btext[0] or cachedelta must be set.
426 """
426 """
427 node = attr.ib()
427 node = attr.ib()
428 p1 = attr.ib()
428 p1 = attr.ib()
429 p2 = attr.ib()
429 p2 = attr.ib()
430 btext = attr.ib()
430 btext = attr.ib()
431 textlen = attr.ib()
431 textlen = attr.ib()
432 cachedelta = attr.ib()
432 cachedelta = attr.ib()
433 flags = attr.ib()
433 flags = attr.ib()
434
434
435 # index v0:
435 # index v0:
436 # 4 bytes: offset
436 # 4 bytes: offset
437 # 4 bytes: compressed length
437 # 4 bytes: compressed length
438 # 4 bytes: base rev
438 # 4 bytes: base rev
439 # 4 bytes: link rev
439 # 4 bytes: link rev
440 # 20 bytes: parent 1 nodeid
440 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 2 nodeid
441 # 20 bytes: parent 2 nodeid
442 # 20 bytes: nodeid
442 # 20 bytes: nodeid
443 indexformatv0 = struct.Struct(">4l20s20s20s")
443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0_pack = indexformatv0.pack
444 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_unpack = indexformatv0.unpack
445 indexformatv0_unpack = indexformatv0.unpack
446
446
447 class revlogoldio(object):
447 class revlogoldio(object):
448 def __init__(self):
448 def __init__(self):
449 self.size = indexformatv0.size
449 self.size = indexformatv0.size
450
450
451 def parseindex(self, data, inline):
451 def parseindex(self, data, inline):
452 s = self.size
452 s = self.size
453 index = []
453 index = []
454 nodemap = {nullid: nullrev}
454 nodemap = {nullid: nullrev}
455 n = off = 0
455 n = off = 0
456 l = len(data)
456 l = len(data)
457 while off + s <= l:
457 while off + s <= l:
458 cur = data[off:off + s]
458 cur = data[off:off + s]
459 off += s
459 off += s
460 e = indexformatv0_unpack(cur)
460 e = indexformatv0_unpack(cur)
461 # transform to revlogv1 format
461 # transform to revlogv1 format
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 index.append(e2)
464 index.append(e2)
465 nodemap[e[6]] = n
465 nodemap[e[6]] = n
466 n += 1
466 n += 1
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 return index, nodemap, None
471 return index, nodemap, None
472
472
473 def packentry(self, entry, node, version, rev):
473 def packentry(self, entry, node, version, rev):
474 if gettype(entry[0]):
474 if gettype(entry[0]):
475 raise RevlogError(_('index entry flags need revlog version 1'))
475 raise RevlogError(_('index entry flags need revlog version 1'))
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 node(entry[5]), node(entry[6]), entry[7])
477 node(entry[5]), node(entry[6]), entry[7])
478 return indexformatv0_pack(*e2)
478 return indexformatv0_pack(*e2)
479
479
480 # index ng:
480 # index ng:
481 # 6 bytes: offset
481 # 6 bytes: offset
482 # 2 bytes: flags
482 # 2 bytes: flags
483 # 4 bytes: compressed length
483 # 4 bytes: compressed length
484 # 4 bytes: uncompressed length
484 # 4 bytes: uncompressed length
485 # 4 bytes: base rev
485 # 4 bytes: base rev
486 # 4 bytes: link rev
486 # 4 bytes: link rev
487 # 4 bytes: parent 1 rev
487 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 2 rev
488 # 4 bytes: parent 2 rev
489 # 32 bytes: nodeid
489 # 32 bytes: nodeid
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng_pack = indexformatng.pack
491 indexformatng_pack = indexformatng.pack
492 versionformat = struct.Struct(">I")
492 versionformat = struct.Struct(">I")
493 versionformat_pack = versionformat.pack
493 versionformat_pack = versionformat.pack
494 versionformat_unpack = versionformat.unpack
494 versionformat_unpack = versionformat.unpack
495
495
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # signed integer)
497 # signed integer)
498 _maxentrysize = 0x7fffffff
498 _maxentrysize = 0x7fffffff
499
499
500 class revlogio(object):
500 class revlogio(object):
501 def __init__(self):
501 def __init__(self):
502 self.size = indexformatng.size
502 self.size = indexformatng.size
503
503
504 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
505 # call the C implementation to parse the index data
505 # call the C implementation to parse the index data
506 index, cache = parsers.parse_index2(data, inline)
506 index, cache = parsers.parse_index2(data, inline)
507 return index, getattr(index, 'nodemap', None), cache
507 return index, getattr(index, 'nodemap', None), cache
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 p = indexformatng_pack(*entry)
510 p = indexformatng_pack(*entry)
511 if rev == 0:
511 if rev == 0:
512 p = versionformat_pack(version) + p[4:]
512 p = versionformat_pack(version) + p[4:]
513 return p
513 return p
514
514
515 class revlog(object):
515 class revlog(object):
516 """
516 """
517 the underlying revision storage object
517 the underlying revision storage object
518
518
519 A revlog consists of two parts, an index and the revision data.
519 A revlog consists of two parts, an index and the revision data.
520
520
521 The index is a file with a fixed record size containing
521 The index is a file with a fixed record size containing
522 information on each revision, including its nodeid (hash), the
522 information on each revision, including its nodeid (hash), the
523 nodeids of its parents, the position and offset of its data within
523 nodeids of its parents, the position and offset of its data within
524 the data file, and the revision it's based on. Finally, each entry
524 the data file, and the revision it's based on. Finally, each entry
525 contains a linkrev entry that can serve as a pointer to external
525 contains a linkrev entry that can serve as a pointer to external
526 data.
526 data.
527
527
528 The revision data itself is a linear collection of data chunks.
528 The revision data itself is a linear collection of data chunks.
529 Each chunk represents a revision and is usually represented as a
529 Each chunk represents a revision and is usually represented as a
530 delta against the previous chunk. To bound lookup time, runs of
530 delta against the previous chunk. To bound lookup time, runs of
531 deltas are limited to about 2 times the length of the original
531 deltas are limited to about 2 times the length of the original
532 version data. This makes retrieval of a version proportional to
532 version data. This makes retrieval of a version proportional to
533 its size, or O(1) relative to the number of revisions.
533 its size, or O(1) relative to the number of revisions.
534
534
535 Both pieces of the revlog are written to in an append-only
535 Both pieces of the revlog are written to in an append-only
536 fashion, which means we never need to rewrite a file to insert or
536 fashion, which means we never need to rewrite a file to insert or
537 remove data, and can use some simple techniques to avoid the need
537 remove data, and can use some simple techniques to avoid the need
538 for locking while reading.
538 for locking while reading.
539
539
540 If checkambig, indexfile is opened with checkambig=True at
540 If checkambig, indexfile is opened with checkambig=True at
541 writing, to avoid file stat ambiguity.
541 writing, to avoid file stat ambiguity.
542
542
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 index will be mmapped rather than read if it is larger than the
544 index will be mmapped rather than read if it is larger than the
545 configured threshold.
545 configured threshold.
546 """
546 """
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 mmaplargeindex=False):
548 mmaplargeindex=False):
549 """
549 """
550 create a revlog object
550 create a revlog object
551
551
552 opener is a function that abstracts the file opening operation
552 opener is a function that abstracts the file opening operation
553 and can be used to implement COW semantics or the like.
553 and can be used to implement COW semantics or the like.
554 """
554 """
555 self.indexfile = indexfile
555 self.indexfile = indexfile
556 self.datafile = datafile or (indexfile[:-2] + ".d")
556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.opener = opener
557 self.opener = opener
558 # When True, indexfile is opened with checkambig=True at writing, to
558 # When True, indexfile is opened with checkambig=True at writing, to
559 # avoid file stat ambiguity.
559 # avoid file stat ambiguity.
560 self._checkambig = checkambig
560 self._checkambig = checkambig
561 # 3-tuple of (node, rev, text) for a raw revision.
561 # 3-tuple of (node, rev, text) for a raw revision.
562 self._cache = None
562 self._cache = None
563 # Maps rev to chain base rev.
563 # Maps rev to chain base rev.
564 self._chainbasecache = util.lrucachedict(100)
564 self._chainbasecache = util.lrucachedict(100)
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 self._chunkcache = (0, '')
566 self._chunkcache = (0, '')
567 # How much data to read and cache into the raw revlog data cache.
567 # How much data to read and cache into the raw revlog data cache.
568 self._chunkcachesize = 65536
568 self._chunkcachesize = 65536
569 self._maxchainlen = None
569 self._maxchainlen = None
570 self._aggressivemergedeltas = False
570 self._aggressivemergedeltas = False
571 self.index = []
571 self.index = []
572 # Mapping of partial identifiers to full nodes.
572 # Mapping of partial identifiers to full nodes.
573 self._pcache = {}
573 self._pcache = {}
574 # Mapping of revision integer to full node.
574 # Mapping of revision integer to full node.
575 self._nodecache = {nullid: nullrev}
575 self._nodecache = {nullid: nullrev}
576 self._nodepos = None
576 self._nodepos = None
577 self._compengine = 'zlib'
577 self._compengine = 'zlib'
578 self._maxdeltachainspan = -1
578 self._maxdeltachainspan = -1
579 self._withsparseread = False
579 self._withsparseread = False
580 self._srdensitythreshold = 0.25
580 self._srdensitythreshold = 0.25
581 self._srmingapsize = 262144
581 self._srmingapsize = 262144
582
582
583 mmapindexthreshold = None
583 mmapindexthreshold = None
584 v = REVLOG_DEFAULT_VERSION
584 v = REVLOG_DEFAULT_VERSION
585 opts = getattr(opener, 'options', None)
585 opts = getattr(opener, 'options', None)
586 if opts is not None:
586 if opts is not None:
587 if 'revlogv2' in opts:
587 if 'revlogv2' in opts:
588 # version 2 revlogs always use generaldelta.
588 # version 2 revlogs always use generaldelta.
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 elif 'revlogv1' in opts:
590 elif 'revlogv1' in opts:
591 if 'generaldelta' in opts:
591 if 'generaldelta' in opts:
592 v |= FLAG_GENERALDELTA
592 v |= FLAG_GENERALDELTA
593 else:
593 else:
594 v = 0
594 v = 0
595 if 'chunkcachesize' in opts:
595 if 'chunkcachesize' in opts:
596 self._chunkcachesize = opts['chunkcachesize']
596 self._chunkcachesize = opts['chunkcachesize']
597 if 'maxchainlen' in opts:
597 if 'maxchainlen' in opts:
598 self._maxchainlen = opts['maxchainlen']
598 self._maxchainlen = opts['maxchainlen']
599 if 'aggressivemergedeltas' in opts:
599 if 'aggressivemergedeltas' in opts:
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 if 'compengine' in opts:
602 if 'compengine' in opts:
603 self._compengine = opts['compengine']
603 self._compengine = opts['compengine']
604 if 'maxdeltachainspan' in opts:
604 if 'maxdeltachainspan' in opts:
605 self._maxdeltachainspan = opts['maxdeltachainspan']
605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 mmapindexthreshold = opts['mmapindexthreshold']
607 mmapindexthreshold = opts['mmapindexthreshold']
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 if 'sparse-read-density-threshold' in opts:
609 if 'sparse-read-density-threshold' in opts:
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 if 'sparse-read-min-gap-size' in opts:
611 if 'sparse-read-min-gap-size' in opts:
612 self._srmingapsize = opts['sparse-read-min-gap-size']
612 self._srmingapsize = opts['sparse-read-min-gap-size']
613
613
614 if self._chunkcachesize <= 0:
614 if self._chunkcachesize <= 0:
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 'than 0') % self._chunkcachesize)
616 'than 0') % self._chunkcachesize)
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 'of 2') % self._chunkcachesize)
619 'of 2') % self._chunkcachesize)
620
620
621 indexdata = ''
621 indexdata = ''
622 self._initempty = True
622 self._initempty = True
623 try:
623 try:
624 with self._indexfp() as f:
624 with self._indexfp() as f:
625 if (mmapindexthreshold is not None and
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
627 indexdata = util.buffer(util.mmapread(f))
628 else:
628 else:
629 indexdata = f.read()
629 indexdata = f.read()
630 if len(indexdata) > 0:
630 if len(indexdata) > 0:
631 v = versionformat_unpack(indexdata[:4])[0]
631 v = versionformat_unpack(indexdata[:4])[0]
632 self._initempty = False
632 self._initempty = False
633 except IOError as inst:
633 except IOError as inst:
634 if inst.errno != errno.ENOENT:
634 if inst.errno != errno.ENOENT:
635 raise
635 raise
636
636
637 self.version = v
637 self.version = v
638 self._inline = v & FLAG_INLINE_DATA
638 self._inline = v & FLAG_INLINE_DATA
639 self._generaldelta = v & FLAG_GENERALDELTA
639 self._generaldelta = v & FLAG_GENERALDELTA
640 flags = v & ~0xFFFF
640 flags = v & ~0xFFFF
641 fmt = v & 0xFFFF
641 fmt = v & 0xFFFF
642 if fmt == REVLOGV0:
642 if fmt == REVLOGV0:
643 if flags:
643 if flags:
644 raise RevlogError(_('unknown flags (%#04x) in version %d '
644 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 'revlog %s') %
645 'revlog %s') %
646 (flags >> 16, fmt, self.indexfile))
646 (flags >> 16, fmt, self.indexfile))
647 elif fmt == REVLOGV1:
647 elif fmt == REVLOGV1:
648 if flags & ~REVLOGV1_FLAGS:
648 if flags & ~REVLOGV1_FLAGS:
649 raise RevlogError(_('unknown flags (%#04x) in version %d '
649 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 'revlog %s') %
650 'revlog %s') %
651 (flags >> 16, fmt, self.indexfile))
651 (flags >> 16, fmt, self.indexfile))
652 elif fmt == REVLOGV2:
652 elif fmt == REVLOGV2:
653 if flags & ~REVLOGV2_FLAGS:
653 if flags & ~REVLOGV2_FLAGS:
654 raise RevlogError(_('unknown flags (%#04x) in version %d '
654 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 'revlog %s') %
655 'revlog %s') %
656 (flags >> 16, fmt, self.indexfile))
656 (flags >> 16, fmt, self.indexfile))
657 else:
657 else:
658 raise RevlogError(_('unknown version (%d) in revlog %s') %
658 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 (fmt, self.indexfile))
659 (fmt, self.indexfile))
660
660
661 self.storedeltachains = True
661 self.storedeltachains = True
662
662
663 self._io = revlogio()
663 self._io = revlogio()
664 if self.version == REVLOGV0:
664 if self.version == REVLOGV0:
665 self._io = revlogoldio()
665 self._io = revlogoldio()
666 try:
666 try:
667 d = self._io.parseindex(indexdata, self._inline)
667 d = self._io.parseindex(indexdata, self._inline)
668 except (ValueError, IndexError):
668 except (ValueError, IndexError):
669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 self.index, nodemap, self._chunkcache = d
670 self.index, nodemap, self._chunkcache = d
671 if nodemap is not None:
671 if nodemap is not None:
672 self.nodemap = self._nodecache = nodemap
672 self.nodemap = self._nodecache = nodemap
673 if not self._chunkcache:
673 if not self._chunkcache:
674 self._chunkclear()
674 self._chunkclear()
675 # revnum -> (chain-length, sum-delta-length)
675 # revnum -> (chain-length, sum-delta-length)
676 self._chaininfocache = {}
676 self._chaininfocache = {}
677 # revlog header -> revlog compressor
677 # revlog header -> revlog compressor
678 self._decompressors = {}
678 self._decompressors = {}
679
679
680 @util.propertycache
680 @util.propertycache
681 def _compressor(self):
681 def _compressor(self):
682 return util.compengines[self._compengine].revlogcompressor()
682 return util.compengines[self._compengine].revlogcompressor()
683
683
684 def _indexfp(self, mode='r'):
684 def _indexfp(self, mode='r'):
685 """file object for the revlog's index file"""
685 """file object for the revlog's index file"""
686 args = {r'mode': mode}
686 args = {r'mode': mode}
687 if mode != 'r':
687 if mode != 'r':
688 args[r'checkambig'] = self._checkambig
688 args[r'checkambig'] = self._checkambig
689 if mode == 'w':
689 if mode == 'w':
690 args[r'atomictemp'] = True
690 args[r'atomictemp'] = True
691 return self.opener(self.indexfile, **args)
691 return self.opener(self.indexfile, **args)
692
692
693 def _datafp(self, mode='r'):
693 def _datafp(self, mode='r'):
694 """file object for the revlog's data file"""
694 """file object for the revlog's data file"""
695 return self.opener(self.datafile, mode=mode)
695 return self.opener(self.datafile, mode=mode)
696
696
697 def tip(self):
697 def tip(self):
698 return self.node(len(self.index) - 2)
698 return self.node(len(self.index) - 2)
699 def __contains__(self, rev):
699 def __contains__(self, rev):
700 return 0 <= rev < len(self)
700 return 0 <= rev < len(self)
701 def __len__(self):
701 def __len__(self):
702 return len(self.index) - 1
702 return len(self.index) - 1
703 def __iter__(self):
703 def __iter__(self):
704 return iter(xrange(len(self)))
704 return iter(xrange(len(self)))
705 def revs(self, start=0, stop=None):
705 def revs(self, start=0, stop=None):
706 """iterate over all rev in this revlog (from start to stop)"""
706 """iterate over all rev in this revlog (from start to stop)"""
707 step = 1
707 step = 1
708 if stop is not None:
708 if stop is not None:
709 if start > stop:
709 if start > stop:
710 step = -1
710 step = -1
711 stop += step
711 stop += step
712 else:
712 else:
713 stop = len(self)
713 stop = len(self)
714 return xrange(start, stop, step)
714 return xrange(start, stop, step)
715
715
716 @util.propertycache
716 @util.propertycache
717 def nodemap(self):
717 def nodemap(self):
718 self.rev(self.node(0))
718 self.rev(self.node(0))
719 return self._nodecache
719 return self._nodecache
720
720
721 def hasnode(self, node):
721 def hasnode(self, node):
722 try:
722 try:
723 self.rev(node)
723 self.rev(node)
724 return True
724 return True
725 except KeyError:
725 except KeyError:
726 return False
726 return False
727
727
728 def clearcaches(self):
728 def clearcaches(self):
729 self._cache = None
729 self._cache = None
730 self._chainbasecache.clear()
730 self._chainbasecache.clear()
731 self._chunkcache = (0, '')
731 self._chunkcache = (0, '')
732 self._pcache = {}
732 self._pcache = {}
733
733
734 try:
734 try:
735 self._nodecache.clearcaches()
735 self._nodecache.clearcaches()
736 except AttributeError:
736 except AttributeError:
737 self._nodecache = {nullid: nullrev}
737 self._nodecache = {nullid: nullrev}
738 self._nodepos = None
738 self._nodepos = None
739
739
740 def rev(self, node):
740 def rev(self, node):
741 try:
741 try:
742 return self._nodecache[node]
742 return self._nodecache[node]
743 except TypeError:
743 except TypeError:
744 raise
744 raise
745 except RevlogError:
745 except RevlogError:
746 # parsers.c radix tree lookup failed
746 # parsers.c radix tree lookup failed
747 if node == wdirid:
747 if node == wdirid:
748 raise error.WdirUnsupported
748 raise error.WdirUnsupported
749 raise LookupError(node, self.indexfile, _('no node'))
749 raise LookupError(node, self.indexfile, _('no node'))
750 except KeyError:
750 except KeyError:
751 # pure python cache lookup failed
751 # pure python cache lookup failed
752 n = self._nodecache
752 n = self._nodecache
753 i = self.index
753 i = self.index
754 p = self._nodepos
754 p = self._nodepos
755 if p is None:
755 if p is None:
756 p = len(i) - 2
756 p = len(i) - 2
757 for r in xrange(p, -1, -1):
757 for r in xrange(p, -1, -1):
758 v = i[r][7]
758 v = i[r][7]
759 n[v] = r
759 n[v] = r
760 if v == node:
760 if v == node:
761 self._nodepos = r - 1
761 self._nodepos = r - 1
762 return r
762 return r
763 if node == wdirid:
763 if node == wdirid:
764 raise error.WdirUnsupported
764 raise error.WdirUnsupported
765 raise LookupError(node, self.indexfile, _('no node'))
765 raise LookupError(node, self.indexfile, _('no node'))
766
766
767 # Accessors for index entries.
767 # Accessors for index entries.
768
768
769 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
769 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
770 # are flags.
770 # are flags.
771 def start(self, rev):
771 def start(self, rev):
772 return int(self.index[rev][0] >> 16)
772 return int(self.index[rev][0] >> 16)
773
773
774 def flags(self, rev):
774 def flags(self, rev):
775 return self.index[rev][0] & 0xFFFF
775 return self.index[rev][0] & 0xFFFF
776
776
777 def length(self, rev):
777 def length(self, rev):
778 return self.index[rev][1]
778 return self.index[rev][1]
779
779
780 def rawsize(self, rev):
780 def rawsize(self, rev):
781 """return the length of the uncompressed text for a given revision"""
781 """return the length of the uncompressed text for a given revision"""
782 l = self.index[rev][2]
782 l = self.index[rev][2]
783 if l >= 0:
783 if l >= 0:
784 return l
784 return l
785
785
786 t = self.revision(rev, raw=True)
786 t = self.revision(rev, raw=True)
787 return len(t)
787 return len(t)
788
788
789 def size(self, rev):
789 def size(self, rev):
790 """length of non-raw text (processed by a "read" flag processor)"""
790 """length of non-raw text (processed by a "read" flag processor)"""
791 # fast path: if no "read" flag processor could change the content,
791 # fast path: if no "read" flag processor could change the content,
792 # size is rawsize. note: ELLIPSIS is known to not change the content.
792 # size is rawsize. note: ELLIPSIS is known to not change the content.
793 flags = self.flags(rev)
793 flags = self.flags(rev)
794 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
794 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
795 return self.rawsize(rev)
795 return self.rawsize(rev)
796
796
797 return len(self.revision(rev, raw=False))
797 return len(self.revision(rev, raw=False))
798
798
799 def chainbase(self, rev):
799 def chainbase(self, rev):
800 base = self._chainbasecache.get(rev)
800 base = self._chainbasecache.get(rev)
801 if base is not None:
801 if base is not None:
802 return base
802 return base
803
803
804 index = self.index
804 index = self.index
805 base = index[rev][3]
805 base = index[rev][3]
806 while base != rev:
806 while base != rev:
807 rev = base
807 rev = base
808 base = index[rev][3]
808 base = index[rev][3]
809
809
810 self._chainbasecache[rev] = base
810 self._chainbasecache[rev] = base
811 return base
811 return base
812
812
813 def linkrev(self, rev):
813 def linkrev(self, rev):
814 return self.index[rev][4]
814 return self.index[rev][4]
815
815
816 def parentrevs(self, rev):
816 def parentrevs(self, rev):
817 try:
817 try:
818 entry = self.index[rev]
818 entry = self.index[rev]
819 except IndexError:
819 except IndexError:
820 if rev == wdirrev:
820 if rev == wdirrev:
821 raise error.WdirUnsupported
821 raise error.WdirUnsupported
822 raise
822 raise
823
823
824 return entry[5], entry[6]
824 return entry[5], entry[6]
825
825
826 def node(self, rev):
826 def node(self, rev):
827 try:
827 try:
828 return self.index[rev][7]
828 return self.index[rev][7]
829 except IndexError:
829 except IndexError:
830 if rev == wdirrev:
830 if rev == wdirrev:
831 raise error.WdirUnsupported
831 raise error.WdirUnsupported
832 raise
832 raise
833
833
834 # Derived from index values.
834 # Derived from index values.
835
835
836 def end(self, rev):
836 def end(self, rev):
837 return self.start(rev) + self.length(rev)
837 return self.start(rev) + self.length(rev)
838
838
839 def parents(self, node):
839 def parents(self, node):
840 i = self.index
840 i = self.index
841 d = i[self.rev(node)]
841 d = i[self.rev(node)]
842 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
842 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
843
843
844 def chainlen(self, rev):
844 def chainlen(self, rev):
845 return self._chaininfo(rev)[0]
845 return self._chaininfo(rev)[0]
846
846
847 def _chaininfo(self, rev):
847 def _chaininfo(self, rev):
848 chaininfocache = self._chaininfocache
848 chaininfocache = self._chaininfocache
849 if rev in chaininfocache:
849 if rev in chaininfocache:
850 return chaininfocache[rev]
850 return chaininfocache[rev]
851 index = self.index
851 index = self.index
852 generaldelta = self._generaldelta
852 generaldelta = self._generaldelta
853 iterrev = rev
853 iterrev = rev
854 e = index[iterrev]
854 e = index[iterrev]
855 clen = 0
855 clen = 0
856 compresseddeltalen = 0
856 compresseddeltalen = 0
857 while iterrev != e[3]:
857 while iterrev != e[3]:
858 clen += 1
858 clen += 1
859 compresseddeltalen += e[1]
859 compresseddeltalen += e[1]
860 if generaldelta:
860 if generaldelta:
861 iterrev = e[3]
861 iterrev = e[3]
862 else:
862 else:
863 iterrev -= 1
863 iterrev -= 1
864 if iterrev in chaininfocache:
864 if iterrev in chaininfocache:
865 t = chaininfocache[iterrev]
865 t = chaininfocache[iterrev]
866 clen += t[0]
866 clen += t[0]
867 compresseddeltalen += t[1]
867 compresseddeltalen += t[1]
868 break
868 break
869 e = index[iterrev]
869 e = index[iterrev]
870 else:
870 else:
871 # Add text length of base since decompressing that also takes
871 # Add text length of base since decompressing that also takes
872 # work. For cache hits the length is already included.
872 # work. For cache hits the length is already included.
873 compresseddeltalen += e[1]
873 compresseddeltalen += e[1]
874 r = (clen, compresseddeltalen)
874 r = (clen, compresseddeltalen)
875 chaininfocache[rev] = r
875 chaininfocache[rev] = r
876 return r
876 return r
877
877
878 def _deltachain(self, rev, stoprev=None):
878 def _deltachain(self, rev, stoprev=None):
879 """Obtain the delta chain for a revision.
879 """Obtain the delta chain for a revision.
880
880
881 ``stoprev`` specifies a revision to stop at. If not specified, we
881 ``stoprev`` specifies a revision to stop at. If not specified, we
882 stop at the base of the chain.
882 stop at the base of the chain.
883
883
884 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
884 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
885 revs in ascending order and ``stopped`` is a bool indicating whether
885 revs in ascending order and ``stopped`` is a bool indicating whether
886 ``stoprev`` was hit.
886 ``stoprev`` was hit.
887 """
887 """
888 # Try C implementation.
888 # Try C implementation.
889 try:
889 try:
890 return self.index.deltachain(rev, stoprev, self._generaldelta)
890 return self.index.deltachain(rev, stoprev, self._generaldelta)
891 except AttributeError:
891 except AttributeError:
892 pass
892 pass
893
893
894 chain = []
894 chain = []
895
895
896 # Alias to prevent attribute lookup in tight loop.
896 # Alias to prevent attribute lookup in tight loop.
897 index = self.index
897 index = self.index
898 generaldelta = self._generaldelta
898 generaldelta = self._generaldelta
899
899
900 iterrev = rev
900 iterrev = rev
901 e = index[iterrev]
901 e = index[iterrev]
902 while iterrev != e[3] and iterrev != stoprev:
902 while iterrev != e[3] and iterrev != stoprev:
903 chain.append(iterrev)
903 chain.append(iterrev)
904 if generaldelta:
904 if generaldelta:
905 iterrev = e[3]
905 iterrev = e[3]
906 else:
906 else:
907 iterrev -= 1
907 iterrev -= 1
908 e = index[iterrev]
908 e = index[iterrev]
909
909
910 if iterrev == stoprev:
910 if iterrev == stoprev:
911 stopped = True
911 stopped = True
912 else:
912 else:
913 chain.append(iterrev)
913 chain.append(iterrev)
914 stopped = False
914 stopped = False
915
915
916 chain.reverse()
916 chain.reverse()
917 return chain, stopped
917 return chain, stopped
918
918
919 def ancestors(self, revs, stoprev=0, inclusive=False):
919 def ancestors(self, revs, stoprev=0, inclusive=False):
920 """Generate the ancestors of 'revs' in reverse topological order.
920 """Generate the ancestors of 'revs' in reverse topological order.
921 Does not generate revs lower than stoprev.
921 Does not generate revs lower than stoprev.
922
922
923 See the documentation for ancestor.lazyancestors for more details."""
923 See the documentation for ancestor.lazyancestors for more details."""
924
924
925 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
925 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
926 inclusive=inclusive)
926 inclusive=inclusive)
927
927
928 def descendants(self, revs):
928 def descendants(self, revs):
929 """Generate the descendants of 'revs' in revision order.
929 """Generate the descendants of 'revs' in revision order.
930
930
931 Yield a sequence of revision numbers starting with a child of
931 Yield a sequence of revision numbers starting with a child of
932 some rev in revs, i.e., each revision is *not* considered a
932 some rev in revs, i.e., each revision is *not* considered a
933 descendant of itself. Results are ordered by revision number (a
933 descendant of itself. Results are ordered by revision number (a
934 topological sort)."""
934 topological sort)."""
935 first = min(revs)
935 first = min(revs)
936 if first == nullrev:
936 if first == nullrev:
937 for i in self:
937 for i in self:
938 yield i
938 yield i
939 return
939 return
940
940
941 seen = set(revs)
941 seen = set(revs)
942 for i in self.revs(start=first + 1):
942 for i in self.revs(start=first + 1):
943 for x in self.parentrevs(i):
943 for x in self.parentrevs(i):
944 if x != nullrev and x in seen:
944 if x != nullrev and x in seen:
945 seen.add(i)
945 seen.add(i)
946 yield i
946 yield i
947 break
947 break
948
948
949 def findcommonmissing(self, common=None, heads=None):
949 def findcommonmissing(self, common=None, heads=None):
950 """Return a tuple of the ancestors of common and the ancestors of heads
950 """Return a tuple of the ancestors of common and the ancestors of heads
951 that are not ancestors of common. In revset terminology, we return the
951 that are not ancestors of common. In revset terminology, we return the
952 tuple:
952 tuple:
953
953
954 ::common, (::heads) - (::common)
954 ::common, (::heads) - (::common)
955
955
956 The list is sorted by revision number, meaning it is
956 The list is sorted by revision number, meaning it is
957 topologically sorted.
957 topologically sorted.
958
958
959 'heads' and 'common' are both lists of node IDs. If heads is
959 'heads' and 'common' are both lists of node IDs. If heads is
960 not supplied, uses all of the revlog's heads. If common is not
960 not supplied, uses all of the revlog's heads. If common is not
961 supplied, uses nullid."""
961 supplied, uses nullid."""
962 if common is None:
962 if common is None:
963 common = [nullid]
963 common = [nullid]
964 if heads is None:
964 if heads is None:
965 heads = self.heads()
965 heads = self.heads()
966
966
967 common = [self.rev(n) for n in common]
967 common = [self.rev(n) for n in common]
968 heads = [self.rev(n) for n in heads]
968 heads = [self.rev(n) for n in heads]
969
969
970 # we want the ancestors, but inclusive
970 # we want the ancestors, but inclusive
971 class lazyset(object):
971 class lazyset(object):
972 def __init__(self, lazyvalues):
972 def __init__(self, lazyvalues):
973 self.addedvalues = set()
973 self.addedvalues = set()
974 self.lazyvalues = lazyvalues
974 self.lazyvalues = lazyvalues
975
975
976 def __contains__(self, value):
976 def __contains__(self, value):
977 return value in self.addedvalues or value in self.lazyvalues
977 return value in self.addedvalues or value in self.lazyvalues
978
978
979 def __iter__(self):
979 def __iter__(self):
980 added = self.addedvalues
980 added = self.addedvalues
981 for r in added:
981 for r in added:
982 yield r
982 yield r
983 for r in self.lazyvalues:
983 for r in self.lazyvalues:
984 if not r in added:
984 if not r in added:
985 yield r
985 yield r
986
986
987 def add(self, value):
987 def add(self, value):
988 self.addedvalues.add(value)
988 self.addedvalues.add(value)
989
989
990 def update(self, values):
990 def update(self, values):
991 self.addedvalues.update(values)
991 self.addedvalues.update(values)
992
992
993 has = lazyset(self.ancestors(common))
993 has = lazyset(self.ancestors(common))
994 has.add(nullrev)
994 has.add(nullrev)
995 has.update(common)
995 has.update(common)
996
996
997 # take all ancestors from heads that aren't in has
997 # take all ancestors from heads that aren't in has
998 missing = set()
998 missing = set()
999 visit = collections.deque(r for r in heads if r not in has)
999 visit = collections.deque(r for r in heads if r not in has)
1000 while visit:
1000 while visit:
1001 r = visit.popleft()
1001 r = visit.popleft()
1002 if r in missing:
1002 if r in missing:
1003 continue
1003 continue
1004 else:
1004 else:
1005 missing.add(r)
1005 missing.add(r)
1006 for p in self.parentrevs(r):
1006 for p in self.parentrevs(r):
1007 if p not in has:
1007 if p not in has:
1008 visit.append(p)
1008 visit.append(p)
1009 missing = list(missing)
1009 missing = list(missing)
1010 missing.sort()
1010 missing.sort()
1011 return has, [self.node(miss) for miss in missing]
1011 return has, [self.node(miss) for miss in missing]
1012
1012
1013 def incrementalmissingrevs(self, common=None):
1013 def incrementalmissingrevs(self, common=None):
1014 """Return an object that can be used to incrementally compute the
1014 """Return an object that can be used to incrementally compute the
1015 revision numbers of the ancestors of arbitrary sets that are not
1015 revision numbers of the ancestors of arbitrary sets that are not
1016 ancestors of common. This is an ancestor.incrementalmissingancestors
1016 ancestors of common. This is an ancestor.incrementalmissingancestors
1017 object.
1017 object.
1018
1018
1019 'common' is a list of revision numbers. If common is not supplied, uses
1019 'common' is a list of revision numbers. If common is not supplied, uses
1020 nullrev.
1020 nullrev.
1021 """
1021 """
1022 if common is None:
1022 if common is None:
1023 common = [nullrev]
1023 common = [nullrev]
1024
1024
1025 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1025 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1026
1026
1027 def findmissingrevs(self, common=None, heads=None):
1027 def findmissingrevs(self, common=None, heads=None):
1028 """Return the revision numbers of the ancestors of heads that
1028 """Return the revision numbers of the ancestors of heads that
1029 are not ancestors of common.
1029 are not ancestors of common.
1030
1030
1031 More specifically, return a list of revision numbers corresponding to
1031 More specifically, return a list of revision numbers corresponding to
1032 nodes N such that every N satisfies the following constraints:
1032 nodes N such that every N satisfies the following constraints:
1033
1033
1034 1. N is an ancestor of some node in 'heads'
1034 1. N is an ancestor of some node in 'heads'
1035 2. N is not an ancestor of any node in 'common'
1035 2. N is not an ancestor of any node in 'common'
1036
1036
1037 The list is sorted by revision number, meaning it is
1037 The list is sorted by revision number, meaning it is
1038 topologically sorted.
1038 topologically sorted.
1039
1039
1040 'heads' and 'common' are both lists of revision numbers. If heads is
1040 'heads' and 'common' are both lists of revision numbers. If heads is
1041 not supplied, uses all of the revlog's heads. If common is not
1041 not supplied, uses all of the revlog's heads. If common is not
1042 supplied, uses nullid."""
1042 supplied, uses nullid."""
1043 if common is None:
1043 if common is None:
1044 common = [nullrev]
1044 common = [nullrev]
1045 if heads is None:
1045 if heads is None:
1046 heads = self.headrevs()
1046 heads = self.headrevs()
1047
1047
1048 inc = self.incrementalmissingrevs(common=common)
1048 inc = self.incrementalmissingrevs(common=common)
1049 return inc.missingancestors(heads)
1049 return inc.missingancestors(heads)
1050
1050
1051 def findmissing(self, common=None, heads=None):
1051 def findmissing(self, common=None, heads=None):
1052 """Return the ancestors of heads that are not ancestors of common.
1052 """Return the ancestors of heads that are not ancestors of common.
1053
1053
1054 More specifically, return a list of nodes N such that every N
1054 More specifically, return a list of nodes N such that every N
1055 satisfies the following constraints:
1055 satisfies the following constraints:
1056
1056
1057 1. N is an ancestor of some node in 'heads'
1057 1. N is an ancestor of some node in 'heads'
1058 2. N is not an ancestor of any node in 'common'
1058 2. N is not an ancestor of any node in 'common'
1059
1059
1060 The list is sorted by revision number, meaning it is
1060 The list is sorted by revision number, meaning it is
1061 topologically sorted.
1061 topologically sorted.
1062
1062
1063 'heads' and 'common' are both lists of node IDs. If heads is
1063 'heads' and 'common' are both lists of node IDs. If heads is
1064 not supplied, uses all of the revlog's heads. If common is not
1064 not supplied, uses all of the revlog's heads. If common is not
1065 supplied, uses nullid."""
1065 supplied, uses nullid."""
1066 if common is None:
1066 if common is None:
1067 common = [nullid]
1067 common = [nullid]
1068 if heads is None:
1068 if heads is None:
1069 heads = self.heads()
1069 heads = self.heads()
1070
1070
1071 common = [self.rev(n) for n in common]
1071 common = [self.rev(n) for n in common]
1072 heads = [self.rev(n) for n in heads]
1072 heads = [self.rev(n) for n in heads]
1073
1073
1074 inc = self.incrementalmissingrevs(common=common)
1074 inc = self.incrementalmissingrevs(common=common)
1075 return [self.node(r) for r in inc.missingancestors(heads)]
1075 return [self.node(r) for r in inc.missingancestors(heads)]
1076
1076
1077 def nodesbetween(self, roots=None, heads=None):
1077 def nodesbetween(self, roots=None, heads=None):
1078 """Return a topological path from 'roots' to 'heads'.
1078 """Return a topological path from 'roots' to 'heads'.
1079
1079
1080 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1080 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1081 topologically sorted list of all nodes N that satisfy both of
1081 topologically sorted list of all nodes N that satisfy both of
1082 these constraints:
1082 these constraints:
1083
1083
1084 1. N is a descendant of some node in 'roots'
1084 1. N is a descendant of some node in 'roots'
1085 2. N is an ancestor of some node in 'heads'
1085 2. N is an ancestor of some node in 'heads'
1086
1086
1087 Every node is considered to be both a descendant and an ancestor
1087 Every node is considered to be both a descendant and an ancestor
1088 of itself, so every reachable node in 'roots' and 'heads' will be
1088 of itself, so every reachable node in 'roots' and 'heads' will be
1089 included in 'nodes'.
1089 included in 'nodes'.
1090
1090
1091 'outroots' is the list of reachable nodes in 'roots', i.e., the
1091 'outroots' is the list of reachable nodes in 'roots', i.e., the
1092 subset of 'roots' that is returned in 'nodes'. Likewise,
1092 subset of 'roots' that is returned in 'nodes'. Likewise,
1093 'outheads' is the subset of 'heads' that is also in 'nodes'.
1093 'outheads' is the subset of 'heads' that is also in 'nodes'.
1094
1094
1095 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1095 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1096 unspecified, uses nullid as the only root. If 'heads' is
1096 unspecified, uses nullid as the only root. If 'heads' is
1097 unspecified, uses list of all of the revlog's heads."""
1097 unspecified, uses list of all of the revlog's heads."""
1098 nonodes = ([], [], [])
1098 nonodes = ([], [], [])
1099 if roots is not None:
1099 if roots is not None:
1100 roots = list(roots)
1100 roots = list(roots)
1101 if not roots:
1101 if not roots:
1102 return nonodes
1102 return nonodes
1103 lowestrev = min([self.rev(n) for n in roots])
1103 lowestrev = min([self.rev(n) for n in roots])
1104 else:
1104 else:
1105 roots = [nullid] # Everybody's a descendant of nullid
1105 roots = [nullid] # Everybody's a descendant of nullid
1106 lowestrev = nullrev
1106 lowestrev = nullrev
1107 if (lowestrev == nullrev) and (heads is None):
1107 if (lowestrev == nullrev) and (heads is None):
1108 # We want _all_ the nodes!
1108 # We want _all_ the nodes!
1109 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1109 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1110 if heads is None:
1110 if heads is None:
1111 # All nodes are ancestors, so the latest ancestor is the last
1111 # All nodes are ancestors, so the latest ancestor is the last
1112 # node.
1112 # node.
1113 highestrev = len(self) - 1
1113 highestrev = len(self) - 1
1114 # Set ancestors to None to signal that every node is an ancestor.
1114 # Set ancestors to None to signal that every node is an ancestor.
1115 ancestors = None
1115 ancestors = None
1116 # Set heads to an empty dictionary for later discovery of heads
1116 # Set heads to an empty dictionary for later discovery of heads
1117 heads = {}
1117 heads = {}
1118 else:
1118 else:
1119 heads = list(heads)
1119 heads = list(heads)
1120 if not heads:
1120 if not heads:
1121 return nonodes
1121 return nonodes
1122 ancestors = set()
1122 ancestors = set()
1123 # Turn heads into a dictionary so we can remove 'fake' heads.
1123 # Turn heads into a dictionary so we can remove 'fake' heads.
1124 # Also, later we will be using it to filter out the heads we can't
1124 # Also, later we will be using it to filter out the heads we can't
1125 # find from roots.
1125 # find from roots.
1126 heads = dict.fromkeys(heads, False)
1126 heads = dict.fromkeys(heads, False)
1127 # Start at the top and keep marking parents until we're done.
1127 # Start at the top and keep marking parents until we're done.
1128 nodestotag = set(heads)
1128 nodestotag = set(heads)
1129 # Remember where the top was so we can use it as a limit later.
1129 # Remember where the top was so we can use it as a limit later.
1130 highestrev = max([self.rev(n) for n in nodestotag])
1130 highestrev = max([self.rev(n) for n in nodestotag])
1131 while nodestotag:
1131 while nodestotag:
1132 # grab a node to tag
1132 # grab a node to tag
1133 n = nodestotag.pop()
1133 n = nodestotag.pop()
1134 # Never tag nullid
1134 # Never tag nullid
1135 if n == nullid:
1135 if n == nullid:
1136 continue
1136 continue
1137 # A node's revision number represents its place in a
1137 # A node's revision number represents its place in a
1138 # topologically sorted list of nodes.
1138 # topologically sorted list of nodes.
1139 r = self.rev(n)
1139 r = self.rev(n)
1140 if r >= lowestrev:
1140 if r >= lowestrev:
1141 if n not in ancestors:
1141 if n not in ancestors:
1142 # If we are possibly a descendant of one of the roots
1142 # If we are possibly a descendant of one of the roots
1143 # and we haven't already been marked as an ancestor
1143 # and we haven't already been marked as an ancestor
1144 ancestors.add(n) # Mark as ancestor
1144 ancestors.add(n) # Mark as ancestor
1145 # Add non-nullid parents to list of nodes to tag.
1145 # Add non-nullid parents to list of nodes to tag.
1146 nodestotag.update([p for p in self.parents(n) if
1146 nodestotag.update([p for p in self.parents(n) if
1147 p != nullid])
1147 p != nullid])
1148 elif n in heads: # We've seen it before, is it a fake head?
1148 elif n in heads: # We've seen it before, is it a fake head?
1149 # So it is, real heads should not be the ancestors of
1149 # So it is, real heads should not be the ancestors of
1150 # any other heads.
1150 # any other heads.
1151 heads.pop(n)
1151 heads.pop(n)
1152 if not ancestors:
1152 if not ancestors:
1153 return nonodes
1153 return nonodes
1154 # Now that we have our set of ancestors, we want to remove any
1154 # Now that we have our set of ancestors, we want to remove any
1155 # roots that are not ancestors.
1155 # roots that are not ancestors.
1156
1156
1157 # If one of the roots was nullid, everything is included anyway.
1157 # If one of the roots was nullid, everything is included anyway.
1158 if lowestrev > nullrev:
1158 if lowestrev > nullrev:
1159 # But, since we weren't, let's recompute the lowest rev to not
1159 # But, since we weren't, let's recompute the lowest rev to not
1160 # include roots that aren't ancestors.
1160 # include roots that aren't ancestors.
1161
1161
1162 # Filter out roots that aren't ancestors of heads
1162 # Filter out roots that aren't ancestors of heads
1163 roots = [root for root in roots if root in ancestors]
1163 roots = [root for root in roots if root in ancestors]
1164 # Recompute the lowest revision
1164 # Recompute the lowest revision
1165 if roots:
1165 if roots:
1166 lowestrev = min([self.rev(root) for root in roots])
1166 lowestrev = min([self.rev(root) for root in roots])
1167 else:
1167 else:
1168 # No more roots? Return empty list
1168 # No more roots? Return empty list
1169 return nonodes
1169 return nonodes
1170 else:
1170 else:
1171 # We are descending from nullid, and don't need to care about
1171 # We are descending from nullid, and don't need to care about
1172 # any other roots.
1172 # any other roots.
1173 lowestrev = nullrev
1173 lowestrev = nullrev
1174 roots = [nullid]
1174 roots = [nullid]
1175 # Transform our roots list into a set.
1175 # Transform our roots list into a set.
1176 descendants = set(roots)
1176 descendants = set(roots)
1177 # Also, keep the original roots so we can filter out roots that aren't
1177 # Also, keep the original roots so we can filter out roots that aren't
1178 # 'real' roots (i.e. are descended from other roots).
1178 # 'real' roots (i.e. are descended from other roots).
1179 roots = descendants.copy()
1179 roots = descendants.copy()
1180 # Our topologically sorted list of output nodes.
1180 # Our topologically sorted list of output nodes.
1181 orderedout = []
1181 orderedout = []
1182 # Don't start at nullid since we don't want nullid in our output list,
1182 # Don't start at nullid since we don't want nullid in our output list,
1183 # and if nullid shows up in descendants, empty parents will look like
1183 # and if nullid shows up in descendants, empty parents will look like
1184 # they're descendants.
1184 # they're descendants.
1185 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1185 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1186 n = self.node(r)
1186 n = self.node(r)
1187 isdescendant = False
1187 isdescendant = False
1188 if lowestrev == nullrev: # Everybody is a descendant of nullid
1188 if lowestrev == nullrev: # Everybody is a descendant of nullid
1189 isdescendant = True
1189 isdescendant = True
1190 elif n in descendants:
1190 elif n in descendants:
1191 # n is already a descendant
1191 # n is already a descendant
1192 isdescendant = True
1192 isdescendant = True
1193 # This check only needs to be done here because all the roots
1193 # This check only needs to be done here because all the roots
1194 # will start being marked is descendants before the loop.
1194 # will start being marked is descendants before the loop.
1195 if n in roots:
1195 if n in roots:
1196 # If n was a root, check if it's a 'real' root.
1196 # If n was a root, check if it's a 'real' root.
1197 p = tuple(self.parents(n))
1197 p = tuple(self.parents(n))
1198 # If any of its parents are descendants, it's not a root.
1198 # If any of its parents are descendants, it's not a root.
1199 if (p[0] in descendants) or (p[1] in descendants):
1199 if (p[0] in descendants) or (p[1] in descendants):
1200 roots.remove(n)
1200 roots.remove(n)
1201 else:
1201 else:
1202 p = tuple(self.parents(n))
1202 p = tuple(self.parents(n))
1203 # A node is a descendant if either of its parents are
1203 # A node is a descendant if either of its parents are
1204 # descendants. (We seeded the dependents list with the roots
1204 # descendants. (We seeded the dependents list with the roots
1205 # up there, remember?)
1205 # up there, remember?)
1206 if (p[0] in descendants) or (p[1] in descendants):
1206 if (p[0] in descendants) or (p[1] in descendants):
1207 descendants.add(n)
1207 descendants.add(n)
1208 isdescendant = True
1208 isdescendant = True
1209 if isdescendant and ((ancestors is None) or (n in ancestors)):
1209 if isdescendant and ((ancestors is None) or (n in ancestors)):
1210 # Only include nodes that are both descendants and ancestors.
1210 # Only include nodes that are both descendants and ancestors.
1211 orderedout.append(n)
1211 orderedout.append(n)
1212 if (ancestors is not None) and (n in heads):
1212 if (ancestors is not None) and (n in heads):
1213 # We're trying to figure out which heads are reachable
1213 # We're trying to figure out which heads are reachable
1214 # from roots.
1214 # from roots.
1215 # Mark this head as having been reached
1215 # Mark this head as having been reached
1216 heads[n] = True
1216 heads[n] = True
1217 elif ancestors is None:
1217 elif ancestors is None:
1218 # Otherwise, we're trying to discover the heads.
1218 # Otherwise, we're trying to discover the heads.
1219 # Assume this is a head because if it isn't, the next step
1219 # Assume this is a head because if it isn't, the next step
1220 # will eventually remove it.
1220 # will eventually remove it.
1221 heads[n] = True
1221 heads[n] = True
1222 # But, obviously its parents aren't.
1222 # But, obviously its parents aren't.
1223 for p in self.parents(n):
1223 for p in self.parents(n):
1224 heads.pop(p, None)
1224 heads.pop(p, None)
1225 heads = [head for head, flag in heads.iteritems() if flag]
1225 heads = [head for head, flag in heads.iteritems() if flag]
1226 roots = list(roots)
1226 roots = list(roots)
1227 assert orderedout
1227 assert orderedout
1228 assert roots
1228 assert roots
1229 assert heads
1229 assert heads
1230 return (orderedout, roots, heads)
1230 return (orderedout, roots, heads)
1231
1231
1232 def headrevs(self):
1232 def headrevs(self):
1233 try:
1233 try:
1234 return self.index.headrevs()
1234 return self.index.headrevs()
1235 except AttributeError:
1235 except AttributeError:
1236 return self._headrevs()
1236 return self._headrevs()
1237
1237
1238 def computephases(self, roots):
1238 def computephases(self, roots):
1239 return self.index.computephasesmapsets(roots)
1239 return self.index.computephasesmapsets(roots)
1240
1240
1241 def _headrevs(self):
1241 def _headrevs(self):
1242 count = len(self)
1242 count = len(self)
1243 if not count:
1243 if not count:
1244 return [nullrev]
1244 return [nullrev]
1245 # we won't iter over filtered rev so nobody is a head at start
1245 # we won't iter over filtered rev so nobody is a head at start
1246 ishead = [0] * (count + 1)
1246 ishead = [0] * (count + 1)
1247 index = self.index
1247 index = self.index
1248 for r in self:
1248 for r in self:
1249 ishead[r] = 1 # I may be an head
1249 ishead[r] = 1 # I may be an head
1250 e = index[r]
1250 e = index[r]
1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 return [r for r, val in enumerate(ishead) if val]
1252 return [r for r, val in enumerate(ishead) if val]
1253
1253
1254 def heads(self, start=None, stop=None):
1254 def heads(self, start=None, stop=None):
1255 """return the list of all nodes that have no children
1255 """return the list of all nodes that have no children
1256
1256
1257 if start is specified, only heads that are descendants of
1257 if start is specified, only heads that are descendants of
1258 start will be returned
1258 start will be returned
1259 if stop is specified, it will consider all the revs from stop
1259 if stop is specified, it will consider all the revs from stop
1260 as if they had no children
1260 as if they had no children
1261 """
1261 """
1262 if start is None and stop is None:
1262 if start is None and stop is None:
1263 if not len(self):
1263 if not len(self):
1264 return [nullid]
1264 return [nullid]
1265 return [self.node(r) for r in self.headrevs()]
1265 return [self.node(r) for r in self.headrevs()]
1266
1266
1267 if start is None:
1267 if start is None:
1268 start = nullid
1268 start = nullid
1269 if stop is None:
1269 if stop is None:
1270 stop = []
1270 stop = []
1271 stoprevs = set([self.rev(n) for n in stop])
1271 stoprevs = set([self.rev(n) for n in stop])
1272 startrev = self.rev(start)
1272 startrev = self.rev(start)
1273 reachable = {startrev}
1273 reachable = {startrev}
1274 heads = {startrev}
1274 heads = {startrev}
1275
1275
1276 parentrevs = self.parentrevs
1276 parentrevs = self.parentrevs
1277 for r in self.revs(start=startrev + 1):
1277 for r in self.revs(start=startrev + 1):
1278 for p in parentrevs(r):
1278 for p in parentrevs(r):
1279 if p in reachable:
1279 if p in reachable:
1280 if r not in stoprevs:
1280 if r not in stoprevs:
1281 reachable.add(r)
1281 reachable.add(r)
1282 heads.add(r)
1282 heads.add(r)
1283 if p in heads and p not in stoprevs:
1283 if p in heads and p not in stoprevs:
1284 heads.remove(p)
1284 heads.remove(p)
1285
1285
1286 return [self.node(r) for r in heads]
1286 return [self.node(r) for r in heads]
1287
1287
1288 def children(self, node):
1288 def children(self, node):
1289 """find the children of a given node"""
1289 """find the children of a given node"""
1290 c = []
1290 c = []
1291 p = self.rev(node)
1291 p = self.rev(node)
1292 for r in self.revs(start=p + 1):
1292 for r in self.revs(start=p + 1):
1293 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1293 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1294 if prevs:
1294 if prevs:
1295 for pr in prevs:
1295 for pr in prevs:
1296 if pr == p:
1296 if pr == p:
1297 c.append(self.node(r))
1297 c.append(self.node(r))
1298 elif p == nullrev:
1298 elif p == nullrev:
1299 c.append(self.node(r))
1299 c.append(self.node(r))
1300 return c
1300 return c
1301
1301
1302 def descendant(self, start, end):
1302 def descendant(self, start, end):
1303 if start == nullrev:
1303 if start == nullrev:
1304 return True
1304 return True
1305 for i in self.descendants([start]):
1305 for i in self.descendants([start]):
1306 if i == end:
1306 if i == end:
1307 return True
1307 return True
1308 elif i > end:
1308 elif i > end:
1309 break
1309 break
1310 return False
1310 return False
1311
1311
1312 def commonancestorsheads(self, a, b):
1312 def commonancestorsheads(self, a, b):
1313 """calculate all the heads of the common ancestors of nodes a and b"""
1313 """calculate all the heads of the common ancestors of nodes a and b"""
1314 a, b = self.rev(a), self.rev(b)
1314 a, b = self.rev(a), self.rev(b)
1315 try:
1315 try:
1316 ancs = self.index.commonancestorsheads(a, b)
1316 ancs = self.index.commonancestorsheads(a, b)
1317 except (AttributeError, OverflowError): # C implementation failed
1317 except (AttributeError, OverflowError): # C implementation failed
1318 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1318 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1319 return pycompat.maplist(self.node, ancs)
1319 return pycompat.maplist(self.node, ancs)
1320
1320
1321 def isancestor(self, a, b):
1321 def isancestor(self, a, b):
1322 """return True if node a is an ancestor of node b
1322 """return True if node a is an ancestor of node b
1323
1323
1324 The implementation of this is trivial but the use of
1324 The implementation of this is trivial but the use of
1325 commonancestorsheads is not."""
1325 commonancestorsheads is not."""
1326 return a in self.commonancestorsheads(a, b)
1326 return a in self.commonancestorsheads(a, b)
1327
1327
1328 def ancestor(self, a, b):
1328 def ancestor(self, a, b):
1329 """calculate the "best" common ancestor of nodes a and b"""
1329 """calculate the "best" common ancestor of nodes a and b"""
1330
1330
1331 a, b = self.rev(a), self.rev(b)
1331 a, b = self.rev(a), self.rev(b)
1332 try:
1332 try:
1333 ancs = self.index.ancestors(a, b)
1333 ancs = self.index.ancestors(a, b)
1334 except (AttributeError, OverflowError):
1334 except (AttributeError, OverflowError):
1335 ancs = ancestor.ancestors(self.parentrevs, a, b)
1335 ancs = ancestor.ancestors(self.parentrevs, a, b)
1336 if ancs:
1336 if ancs:
1337 # choose a consistent winner when there's a tie
1337 # choose a consistent winner when there's a tie
1338 return min(map(self.node, ancs))
1338 return min(map(self.node, ancs))
1339 return nullid
1339 return nullid
1340
1340
1341 def _match(self, id):
1341 def _match(self, id):
1342 if isinstance(id, int):
1342 if isinstance(id, int):
1343 # rev
1343 # rev
1344 return self.node(id)
1344 return self.node(id)
1345 if len(id) == 20:
1345 if len(id) == 20:
1346 # possibly a binary node
1346 # possibly a binary node
1347 # odds of a binary node being all hex in ASCII are 1 in 10**25
1347 # odds of a binary node being all hex in ASCII are 1 in 10**25
1348 try:
1348 try:
1349 node = id
1349 node = id
1350 self.rev(node) # quick search the index
1350 self.rev(node) # quick search the index
1351 return node
1351 return node
1352 except LookupError:
1352 except LookupError:
1353 pass # may be partial hex id
1353 pass # may be partial hex id
1354 try:
1354 try:
1355 # str(rev)
1355 # str(rev)
1356 rev = int(id)
1356 rev = int(id)
1357 if str(rev) != id:
1357 if str(rev) != id:
1358 raise ValueError
1358 raise ValueError
1359 if rev < 0:
1359 if rev < 0:
1360 rev = len(self) + rev
1360 rev = len(self) + rev
1361 if rev < 0 or rev >= len(self):
1361 if rev < 0 or rev >= len(self):
1362 raise ValueError
1362 raise ValueError
1363 return self.node(rev)
1363 return self.node(rev)
1364 except (ValueError, OverflowError):
1364 except (ValueError, OverflowError):
1365 pass
1365 pass
1366 if len(id) == 40:
1366 if len(id) == 40:
1367 try:
1367 try:
1368 # a full hex nodeid?
1368 # a full hex nodeid?
1369 node = bin(id)
1369 node = bin(id)
1370 self.rev(node)
1370 self.rev(node)
1371 return node
1371 return node
1372 except (TypeError, LookupError):
1372 except (TypeError, LookupError):
1373 pass
1373 pass
1374
1374
1375 def _partialmatch(self, id):
1375 def _partialmatch(self, id):
1376 maybewdir = wdirhex.startswith(id)
1376 maybewdir = wdirhex.startswith(id)
1377 try:
1377 try:
1378 partial = self.index.partialmatch(id)
1378 partial = self.index.partialmatch(id)
1379 if partial and self.hasnode(partial):
1379 if partial and self.hasnode(partial):
1380 if maybewdir:
1380 if maybewdir:
1381 # single 'ff...' match in radix tree, ambiguous with wdir
1381 # single 'ff...' match in radix tree, ambiguous with wdir
1382 raise RevlogError
1382 raise RevlogError
1383 return partial
1383 return partial
1384 if maybewdir:
1384 if maybewdir:
1385 # no 'ff...' match in radix tree, wdir identified
1385 # no 'ff...' match in radix tree, wdir identified
1386 raise error.WdirUnsupported
1386 raise error.WdirUnsupported
1387 return None
1387 return None
1388 except RevlogError:
1388 except RevlogError:
1389 # parsers.c radix tree lookup gave multiple matches
1389 # parsers.c radix tree lookup gave multiple matches
1390 # fast path: for unfiltered changelog, radix tree is accurate
1390 # fast path: for unfiltered changelog, radix tree is accurate
1391 if not getattr(self, 'filteredrevs', None):
1391 if not getattr(self, 'filteredrevs', None):
1392 raise LookupError(id, self.indexfile,
1392 raise LookupError(id, self.indexfile,
1393 _('ambiguous identifier'))
1393 _('ambiguous identifier'))
1394 # fall through to slow path that filters hidden revisions
1394 # fall through to slow path that filters hidden revisions
1395 except (AttributeError, ValueError):
1395 except (AttributeError, ValueError):
1396 # we are pure python, or key was too short to search radix tree
1396 # we are pure python, or key was too short to search radix tree
1397 pass
1397 pass
1398
1398
1399 if id in self._pcache:
1399 if id in self._pcache:
1400 return self._pcache[id]
1400 return self._pcache[id]
1401
1401
1402 if len(id) < 40:
1402 if len(id) < 40:
1403 try:
1403 try:
1404 # hex(node)[:...]
1404 # hex(node)[:...]
1405 l = len(id) // 2 # grab an even number of digits
1405 l = len(id) // 2 # grab an even number of digits
1406 prefix = bin(id[:l * 2])
1406 prefix = bin(id[:l * 2])
1407 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1407 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1408 nl = [n for n in nl if hex(n).startswith(id) and
1408 nl = [n for n in nl if hex(n).startswith(id) and
1409 self.hasnode(n)]
1409 self.hasnode(n)]
1410 if len(nl) > 0:
1410 if len(nl) > 0:
1411 if len(nl) == 1 and not maybewdir:
1411 if len(nl) == 1 and not maybewdir:
1412 self._pcache[id] = nl[0]
1412 self._pcache[id] = nl[0]
1413 return nl[0]
1413 return nl[0]
1414 raise LookupError(id, self.indexfile,
1414 raise LookupError(id, self.indexfile,
1415 _('ambiguous identifier'))
1415 _('ambiguous identifier'))
1416 if maybewdir:
1416 if maybewdir:
1417 raise error.WdirUnsupported
1417 raise error.WdirUnsupported
1418 return None
1418 return None
1419 except (TypeError, binascii.Error):
1419 except (TypeError, binascii.Error):
1420 pass
1420 pass
1421
1421
1422 def lookup(self, id):
1422 def lookup(self, id):
1423 """locate a node based on:
1423 """locate a node based on:
1424 - revision number or str(revision number)
1424 - revision number or str(revision number)
1425 - nodeid or subset of hex nodeid
1425 - nodeid or subset of hex nodeid
1426 """
1426 """
1427 n = self._match(id)
1427 n = self._match(id)
1428 if n is not None:
1428 if n is not None:
1429 return n
1429 return n
1430 n = self._partialmatch(id)
1430 n = self._partialmatch(id)
1431 if n:
1431 if n:
1432 return n
1432 return n
1433
1433
1434 raise LookupError(id, self.indexfile, _('no match found'))
1434 raise LookupError(id, self.indexfile, _('no match found'))
1435
1435
1436 def shortest(self, hexnode, minlength=1):
1436 def shortest(self, hexnode, minlength=1):
1437 """Find the shortest unambiguous prefix that matches hexnode."""
1437 """Find the shortest unambiguous prefix that matches hexnode."""
1438 def isvalid(test):
1438 def isvalid(test):
1439 try:
1439 try:
1440 if self._partialmatch(test) is None:
1440 if self._partialmatch(test) is None:
1441 return False
1441 return False
1442
1442
1443 try:
1443 try:
1444 i = int(test)
1444 i = int(test)
1445 # if we are a pure int, then starting with zero will not be
1445 # if we are a pure int, then starting with zero will not be
1446 # confused as a rev; or, obviously, if the int is larger
1446 # confused as a rev; or, obviously, if the int is larger
1447 # than the value of the tip rev
1447 # than the value of the tip rev
1448 if test[0] == '0' or i > len(self):
1448 if test[0] == '0' or i > len(self):
1449 return True
1449 return True
1450 return False
1450 return False
1451 except ValueError:
1451 except ValueError:
1452 return True
1452 return True
1453 except error.RevlogError:
1453 except error.RevlogError:
1454 return False
1454 return False
1455 except error.WdirUnsupported:
1455 except error.WdirUnsupported:
1456 # single 'ff...' match
1456 # single 'ff...' match
1457 return True
1457 return True
1458
1458
1459 shortest = hexnode
1459 shortest = hexnode
1460 startlength = max(6, minlength)
1460 startlength = max(6, minlength)
1461 length = startlength
1461 length = startlength
1462 while True:
1462 while True:
1463 test = hexnode[:length]
1463 test = hexnode[:length]
1464 if isvalid(test):
1464 if isvalid(test):
1465 shortest = test
1465 shortest = test
1466 if length == minlength or length > startlength:
1466 if length == minlength or length > startlength:
1467 return shortest
1467 return shortest
1468 length -= 1
1468 length -= 1
1469 else:
1469 else:
1470 length += 1
1470 length += 1
1471 if len(shortest) <= length:
1471 if len(shortest) <= length:
1472 return shortest
1472 return shortest
1473
1473
1474 def cmp(self, node, text):
1474 def cmp(self, node, text):
1475 """compare text with a given file revision
1475 """compare text with a given file revision
1476
1476
1477 returns True if text is different than what is stored.
1477 returns True if text is different than what is stored.
1478 """
1478 """
1479 p1, p2 = self.parents(node)
1479 p1, p2 = self.parents(node)
1480 return hash(text, p1, p2) != node
1480 return hash(text, p1, p2) != node
1481
1481
1482 def _cachesegment(self, offset, data):
1482 def _cachesegment(self, offset, data):
1483 """Add a segment to the revlog cache.
1483 """Add a segment to the revlog cache.
1484
1484
1485 Accepts an absolute offset and the data that is at that location.
1485 Accepts an absolute offset and the data that is at that location.
1486 """
1486 """
1487 o, d = self._chunkcache
1487 o, d = self._chunkcache
1488 # try to add to existing cache
1488 # try to add to existing cache
1489 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1489 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1490 self._chunkcache = o, d + data
1490 self._chunkcache = o, d + data
1491 else:
1491 else:
1492 self._chunkcache = offset, data
1492 self._chunkcache = offset, data
1493
1493
1494 def _readsegment(self, offset, length, df=None):
1494 def _readsegment(self, offset, length, df=None):
1495 """Load a segment of raw data from the revlog.
1495 """Load a segment of raw data from the revlog.
1496
1496
1497 Accepts an absolute offset, length to read, and an optional existing
1497 Accepts an absolute offset, length to read, and an optional existing
1498 file handle to read from.
1498 file handle to read from.
1499
1499
1500 If an existing file handle is passed, it will be seeked and the
1500 If an existing file handle is passed, it will be seeked and the
1501 original seek position will NOT be restored.
1501 original seek position will NOT be restored.
1502
1502
1503 Returns a str or buffer of raw byte data.
1503 Returns a str or buffer of raw byte data.
1504 """
1504 """
1505 if df is not None:
1505 if df is not None:
1506 closehandle = False
1506 closehandle = False
1507 else:
1507 else:
1508 if self._inline:
1508 if self._inline:
1509 df = self._indexfp()
1509 df = self._indexfp()
1510 else:
1510 else:
1511 df = self._datafp()
1511 df = self._datafp()
1512 closehandle = True
1512 closehandle = True
1513
1513
1514 # Cache data both forward and backward around the requested
1514 # Cache data both forward and backward around the requested
1515 # data, in a fixed size window. This helps speed up operations
1515 # data, in a fixed size window. This helps speed up operations
1516 # involving reading the revlog backwards.
1516 # involving reading the revlog backwards.
1517 cachesize = self._chunkcachesize
1517 cachesize = self._chunkcachesize
1518 realoffset = offset & ~(cachesize - 1)
1518 realoffset = offset & ~(cachesize - 1)
1519 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1519 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1520 - realoffset)
1520 - realoffset)
1521 df.seek(realoffset)
1521 df.seek(realoffset)
1522 d = df.read(reallength)
1522 d = df.read(reallength)
1523 if closehandle:
1523 if closehandle:
1524 df.close()
1524 df.close()
1525 self._cachesegment(realoffset, d)
1525 self._cachesegment(realoffset, d)
1526 if offset != realoffset or reallength != length:
1526 if offset != realoffset or reallength != length:
1527 return util.buffer(d, offset - realoffset, length)
1527 return util.buffer(d, offset - realoffset, length)
1528 return d
1528 return d
1529
1529
1530 def _getsegment(self, offset, length, df=None):
1530 def _getsegment(self, offset, length, df=None):
1531 """Obtain a segment of raw data from the revlog.
1531 """Obtain a segment of raw data from the revlog.
1532
1532
1533 Accepts an absolute offset, length of bytes to obtain, and an
1533 Accepts an absolute offset, length of bytes to obtain, and an
1534 optional file handle to the already-opened revlog. If the file
1534 optional file handle to the already-opened revlog. If the file
1535 handle is used, it's original seek position will not be preserved.
1535 handle is used, it's original seek position will not be preserved.
1536
1536
1537 Requests for data may be returned from a cache.
1537 Requests for data may be returned from a cache.
1538
1538
1539 Returns a str or a buffer instance of raw byte data.
1539 Returns a str or a buffer instance of raw byte data.
1540 """
1540 """
1541 o, d = self._chunkcache
1541 o, d = self._chunkcache
1542 l = len(d)
1542 l = len(d)
1543
1543
1544 # is it in the cache?
1544 # is it in the cache?
1545 cachestart = offset - o
1545 cachestart = offset - o
1546 cacheend = cachestart + length
1546 cacheend = cachestart + length
1547 if cachestart >= 0 and cacheend <= l:
1547 if cachestart >= 0 and cacheend <= l:
1548 if cachestart == 0 and cacheend == l:
1548 if cachestart == 0 and cacheend == l:
1549 return d # avoid a copy
1549 return d # avoid a copy
1550 return util.buffer(d, cachestart, cacheend - cachestart)
1550 return util.buffer(d, cachestart, cacheend - cachestart)
1551
1551
1552 return self._readsegment(offset, length, df=df)
1552 return self._readsegment(offset, length, df=df)
1553
1553
1554 def _getsegmentforrevs(self, startrev, endrev, df=None):
1554 def _getsegmentforrevs(self, startrev, endrev, df=None):
1555 """Obtain a segment of raw data corresponding to a range of revisions.
1555 """Obtain a segment of raw data corresponding to a range of revisions.
1556
1556
1557 Accepts the start and end revisions and an optional already-open
1557 Accepts the start and end revisions and an optional already-open
1558 file handle to be used for reading. If the file handle is read, its
1558 file handle to be used for reading. If the file handle is read, its
1559 seek position will not be preserved.
1559 seek position will not be preserved.
1560
1560
1561 Requests for data may be satisfied by a cache.
1561 Requests for data may be satisfied by a cache.
1562
1562
1563 Returns a 2-tuple of (offset, data) for the requested range of
1563 Returns a 2-tuple of (offset, data) for the requested range of
1564 revisions. Offset is the integer offset from the beginning of the
1564 revisions. Offset is the integer offset from the beginning of the
1565 revlog and data is a str or buffer of the raw byte data.
1565 revlog and data is a str or buffer of the raw byte data.
1566
1566
1567 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1567 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1568 to determine where each revision's data begins and ends.
1568 to determine where each revision's data begins and ends.
1569 """
1569 """
1570 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1570 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1571 # (functions are expensive).
1571 # (functions are expensive).
1572 index = self.index
1572 index = self.index
1573 istart = index[startrev]
1573 istart = index[startrev]
1574 start = int(istart[0] >> 16)
1574 start = int(istart[0] >> 16)
1575 if startrev == endrev:
1575 if startrev == endrev:
1576 end = start + istart[1]
1576 end = start + istart[1]
1577 else:
1577 else:
1578 iend = index[endrev]
1578 iend = index[endrev]
1579 end = int(iend[0] >> 16) + iend[1]
1579 end = int(iend[0] >> 16) + iend[1]
1580
1580
1581 if self._inline:
1581 if self._inline:
1582 start += (startrev + 1) * self._io.size
1582 start += (startrev + 1) * self._io.size
1583 end += (endrev + 1) * self._io.size
1583 end += (endrev + 1) * self._io.size
1584 length = end - start
1584 length = end - start
1585
1585
1586 return start, self._getsegment(start, length, df=df)
1586 return start, self._getsegment(start, length, df=df)
1587
1587
1588 def _chunk(self, rev, df=None):
1588 def _chunk(self, rev, df=None):
1589 """Obtain a single decompressed chunk for a revision.
1589 """Obtain a single decompressed chunk for a revision.
1590
1590
1591 Accepts an integer revision and an optional already-open file handle
1591 Accepts an integer revision and an optional already-open file handle
1592 to be used for reading. If used, the seek position of the file will not
1592 to be used for reading. If used, the seek position of the file will not
1593 be preserved.
1593 be preserved.
1594
1594
1595 Returns a str holding uncompressed data for the requested revision.
1595 Returns a str holding uncompressed data for the requested revision.
1596 """
1596 """
1597 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1597 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1598
1598
1599 def _chunks(self, revs, df=None):
1599 def _chunks(self, revs, df=None):
1600 """Obtain decompressed chunks for the specified revisions.
1600 """Obtain decompressed chunks for the specified revisions.
1601
1601
1602 Accepts an iterable of numeric revisions that are assumed to be in
1602 Accepts an iterable of numeric revisions that are assumed to be in
1603 ascending order. Also accepts an optional already-open file handle
1603 ascending order. Also accepts an optional already-open file handle
1604 to be used for reading. If used, the seek position of the file will
1604 to be used for reading. If used, the seek position of the file will
1605 not be preserved.
1605 not be preserved.
1606
1606
1607 This function is similar to calling ``self._chunk()`` multiple times,
1607 This function is similar to calling ``self._chunk()`` multiple times,
1608 but is faster.
1608 but is faster.
1609
1609
1610 Returns a list with decompressed data for each requested revision.
1610 Returns a list with decompressed data for each requested revision.
1611 """
1611 """
1612 if not revs:
1612 if not revs:
1613 return []
1613 return []
1614 start = self.start
1614 start = self.start
1615 length = self.length
1615 length = self.length
1616 inline = self._inline
1616 inline = self._inline
1617 iosize = self._io.size
1617 iosize = self._io.size
1618 buffer = util.buffer
1618 buffer = util.buffer
1619
1619
1620 l = []
1620 l = []
1621 ladd = l.append
1621 ladd = l.append
1622
1622
1623 if not self._withsparseread:
1623 if not self._withsparseread:
1624 slicedchunks = (revs,)
1624 slicedchunks = (revs,)
1625 else:
1625 else:
1626 slicedchunks = _slicechunk(self, revs)
1626 slicedchunks = _slicechunk(self, revs)
1627
1627
1628 for revschunk in slicedchunks:
1628 for revschunk in slicedchunks:
1629 firstrev = revschunk[0]
1629 firstrev = revschunk[0]
1630 # Skip trailing revisions with empty diff
1630 # Skip trailing revisions with empty diff
1631 for lastrev in revschunk[::-1]:
1631 for lastrev in revschunk[::-1]:
1632 if length(lastrev) != 0:
1632 if length(lastrev) != 0:
1633 break
1633 break
1634
1634
1635 try:
1635 try:
1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 except OverflowError:
1637 except OverflowError:
1638 # issue4215 - we can't cache a run of chunks greater than
1638 # issue4215 - we can't cache a run of chunks greater than
1639 # 2G on Windows
1639 # 2G on Windows
1640 return [self._chunk(rev, df=df) for rev in revschunk]
1640 return [self._chunk(rev, df=df) for rev in revschunk]
1641
1641
1642 decomp = self.decompress
1642 decomp = self.decompress
1643 for rev in revschunk:
1643 for rev in revschunk:
1644 chunkstart = start(rev)
1644 chunkstart = start(rev)
1645 if inline:
1645 if inline:
1646 chunkstart += (rev + 1) * iosize
1646 chunkstart += (rev + 1) * iosize
1647 chunklength = length(rev)
1647 chunklength = length(rev)
1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649
1649
1650 return l
1650 return l
1651
1651
1652 def _chunkclear(self):
1652 def _chunkclear(self):
1653 """Clear the raw chunk cache."""
1653 """Clear the raw chunk cache."""
1654 self._chunkcache = (0, '')
1654 self._chunkcache = (0, '')
1655
1655
1656 def deltaparent(self, rev):
1656 def deltaparent(self, rev):
1657 """return deltaparent of the given revision"""
1657 """return deltaparent of the given revision"""
1658 base = self.index[rev][3]
1658 base = self.index[rev][3]
1659 if base == rev:
1659 if base == rev:
1660 return nullrev
1660 return nullrev
1661 elif self._generaldelta:
1661 elif self._generaldelta:
1662 return base
1662 return base
1663 else:
1663 else:
1664 return rev - 1
1664 return rev - 1
1665
1665
1666 def revdiff(self, rev1, rev2):
1666 def revdiff(self, rev1, rev2):
1667 """return or calculate a delta between two revisions
1667 """return or calculate a delta between two revisions
1668
1668
1669 The delta calculated is in binary form and is intended to be written to
1669 The delta calculated is in binary form and is intended to be written to
1670 revlog data directly. So this function needs raw revision data.
1670 revlog data directly. So this function needs raw revision data.
1671 """
1671 """
1672 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1672 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1673 return bytes(self._chunk(rev2))
1673 return bytes(self._chunk(rev2))
1674
1674
1675 return mdiff.textdiff(self.revision(rev1, raw=True),
1675 return mdiff.textdiff(self.revision(rev1, raw=True),
1676 self.revision(rev2, raw=True))
1676 self.revision(rev2, raw=True))
1677
1677
1678 def revision(self, nodeorrev, _df=None, raw=False):
1678 def revision(self, nodeorrev, _df=None, raw=False):
1679 """return an uncompressed revision of a given node or revision
1679 """return an uncompressed revision of a given node or revision
1680 number.
1680 number.
1681
1681
1682 _df - an existing file handle to read from. (internal-only)
1682 _df - an existing file handle to read from. (internal-only)
1683 raw - an optional argument specifying if the revision data is to be
1683 raw - an optional argument specifying if the revision data is to be
1684 treated as raw data when applying flag transforms. 'raw' should be set
1684 treated as raw data when applying flag transforms. 'raw' should be set
1685 to True when generating changegroups or in debug commands.
1685 to True when generating changegroups or in debug commands.
1686 """
1686 """
1687 if isinstance(nodeorrev, int):
1687 if isinstance(nodeorrev, int):
1688 rev = nodeorrev
1688 rev = nodeorrev
1689 node = self.node(rev)
1689 node = self.node(rev)
1690 else:
1690 else:
1691 node = nodeorrev
1691 node = nodeorrev
1692 rev = None
1692 rev = None
1693
1693
1694 cachedrev = None
1694 cachedrev = None
1695 flags = None
1695 flags = None
1696 rawtext = None
1696 rawtext = None
1697 if node == nullid:
1697 if node == nullid:
1698 return ""
1698 return ""
1699 if self._cache:
1699 if self._cache:
1700 if self._cache[0] == node:
1700 if self._cache[0] == node:
1701 # _cache only stores rawtext
1701 # _cache only stores rawtext
1702 if raw:
1702 if raw:
1703 return self._cache[2]
1703 return self._cache[2]
1704 # duplicated, but good for perf
1704 # duplicated, but good for perf
1705 if rev is None:
1705 if rev is None:
1706 rev = self.rev(node)
1706 rev = self.rev(node)
1707 if flags is None:
1707 if flags is None:
1708 flags = self.flags(rev)
1708 flags = self.flags(rev)
1709 # no extra flags set, no flag processor runs, text = rawtext
1709 # no extra flags set, no flag processor runs, text = rawtext
1710 if flags == REVIDX_DEFAULT_FLAGS:
1710 if flags == REVIDX_DEFAULT_FLAGS:
1711 return self._cache[2]
1711 return self._cache[2]
1712 # rawtext is reusable. need to run flag processor
1712 # rawtext is reusable. need to run flag processor
1713 rawtext = self._cache[2]
1713 rawtext = self._cache[2]
1714
1714
1715 cachedrev = self._cache[1]
1715 cachedrev = self._cache[1]
1716
1716
1717 # look up what we need to read
1717 # look up what we need to read
1718 if rawtext is None:
1718 if rawtext is None:
1719 if rev is None:
1719 if rev is None:
1720 rev = self.rev(node)
1720 rev = self.rev(node)
1721
1721
1722 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1722 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1723 if stopped:
1723 if stopped:
1724 rawtext = self._cache[2]
1724 rawtext = self._cache[2]
1725
1725
1726 # drop cache to save memory
1726 # drop cache to save memory
1727 self._cache = None
1727 self._cache = None
1728
1728
1729 bins = self._chunks(chain, df=_df)
1729 bins = self._chunks(chain, df=_df)
1730 if rawtext is None:
1730 if rawtext is None:
1731 rawtext = bytes(bins[0])
1731 rawtext = bytes(bins[0])
1732 bins = bins[1:]
1732 bins = bins[1:]
1733
1733
1734 rawtext = mdiff.patches(rawtext, bins)
1734 rawtext = mdiff.patches(rawtext, bins)
1735 self._cache = (node, rev, rawtext)
1735 self._cache = (node, rev, rawtext)
1736
1736
1737 if flags is None:
1737 if flags is None:
1738 if rev is None:
1738 if rev is None:
1739 rev = self.rev(node)
1739 rev = self.rev(node)
1740 flags = self.flags(rev)
1740 flags = self.flags(rev)
1741
1741
1742 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1742 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1743 if validatehash:
1743 if validatehash:
1744 self.checkhash(text, node, rev=rev)
1744 self.checkhash(text, node, rev=rev)
1745
1745
1746 return text
1746 return text
1747
1747
1748 def hash(self, text, p1, p2):
1748 def hash(self, text, p1, p2):
1749 """Compute a node hash.
1749 """Compute a node hash.
1750
1750
1751 Available as a function so that subclasses can replace the hash
1751 Available as a function so that subclasses can replace the hash
1752 as needed.
1752 as needed.
1753 """
1753 """
1754 return hash(text, p1, p2)
1754 return hash(text, p1, p2)
1755
1755
1756 def _processflags(self, text, flags, operation, raw=False):
1756 def _processflags(self, text, flags, operation, raw=False):
1757 """Inspect revision data flags and applies transforms defined by
1757 """Inspect revision data flags and applies transforms defined by
1758 registered flag processors.
1758 registered flag processors.
1759
1759
1760 ``text`` - the revision data to process
1760 ``text`` - the revision data to process
1761 ``flags`` - the revision flags
1761 ``flags`` - the revision flags
1762 ``operation`` - the operation being performed (read or write)
1762 ``operation`` - the operation being performed (read or write)
1763 ``raw`` - an optional argument describing if the raw transform should be
1763 ``raw`` - an optional argument describing if the raw transform should be
1764 applied.
1764 applied.
1765
1765
1766 This method processes the flags in the order (or reverse order if
1766 This method processes the flags in the order (or reverse order if
1767 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1767 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1768 flag processors registered for present flags. The order of flags defined
1768 flag processors registered for present flags. The order of flags defined
1769 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1769 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1770
1770
1771 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1771 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1772 processed text and ``validatehash`` is a bool indicating whether the
1772 processed text and ``validatehash`` is a bool indicating whether the
1773 returned text should be checked for hash integrity.
1773 returned text should be checked for hash integrity.
1774
1774
1775 Note: If the ``raw`` argument is set, it has precedence over the
1775 Note: If the ``raw`` argument is set, it has precedence over the
1776 operation and will only update the value of ``validatehash``.
1776 operation and will only update the value of ``validatehash``.
1777 """
1777 """
1778 # fast path: no flag processors will run
1778 # fast path: no flag processors will run
1779 if flags == 0:
1779 if flags == 0:
1780 return text, True
1780 return text, True
1781 if not operation in ('read', 'write'):
1781 if not operation in ('read', 'write'):
1782 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1782 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1783 # Check all flags are known.
1783 # Check all flags are known.
1784 if flags & ~REVIDX_KNOWN_FLAGS:
1784 if flags & ~REVIDX_KNOWN_FLAGS:
1785 raise RevlogError(_("incompatible revision flag '%#x'") %
1785 raise RevlogError(_("incompatible revision flag '%#x'") %
1786 (flags & ~REVIDX_KNOWN_FLAGS))
1786 (flags & ~REVIDX_KNOWN_FLAGS))
1787 validatehash = True
1787 validatehash = True
1788 # Depending on the operation (read or write), the order might be
1788 # Depending on the operation (read or write), the order might be
1789 # reversed due to non-commutative transforms.
1789 # reversed due to non-commutative transforms.
1790 orderedflags = REVIDX_FLAGS_ORDER
1790 orderedflags = REVIDX_FLAGS_ORDER
1791 if operation == 'write':
1791 if operation == 'write':
1792 orderedflags = reversed(orderedflags)
1792 orderedflags = reversed(orderedflags)
1793
1793
1794 for flag in orderedflags:
1794 for flag in orderedflags:
1795 # If a flagprocessor has been registered for a known flag, apply the
1795 # If a flagprocessor has been registered for a known flag, apply the
1796 # related operation transform and update result tuple.
1796 # related operation transform and update result tuple.
1797 if flag & flags:
1797 if flag & flags:
1798 vhash = True
1798 vhash = True
1799
1799
1800 if flag not in _flagprocessors:
1800 if flag not in _flagprocessors:
1801 message = _("missing processor for flag '%#x'") % (flag)
1801 message = _("missing processor for flag '%#x'") % (flag)
1802 raise RevlogError(message)
1802 raise RevlogError(message)
1803
1803
1804 processor = _flagprocessors[flag]
1804 processor = _flagprocessors[flag]
1805 if processor is not None:
1805 if processor is not None:
1806 readtransform, writetransform, rawtransform = processor
1806 readtransform, writetransform, rawtransform = processor
1807
1807
1808 if raw:
1808 if raw:
1809 vhash = rawtransform(self, text)
1809 vhash = rawtransform(self, text)
1810 elif operation == 'read':
1810 elif operation == 'read':
1811 text, vhash = readtransform(self, text)
1811 text, vhash = readtransform(self, text)
1812 else: # write operation
1812 else: # write operation
1813 text, vhash = writetransform(self, text)
1813 text, vhash = writetransform(self, text)
1814 validatehash = validatehash and vhash
1814 validatehash = validatehash and vhash
1815
1815
1816 return text, validatehash
1816 return text, validatehash
1817
1817
1818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1819 """Check node hash integrity.
1819 """Check node hash integrity.
1820
1820
1821 Available as a function so that subclasses can extend hash mismatch
1821 Available as a function so that subclasses can extend hash mismatch
1822 behaviors as needed.
1822 behaviors as needed.
1823 """
1823 """
1824 if p1 is None and p2 is None:
1824 if p1 is None and p2 is None:
1825 p1, p2 = self.parents(node)
1825 p1, p2 = self.parents(node)
1826 if node != self.hash(text, p1, p2):
1826 if node != self.hash(text, p1, p2):
1827 revornode = rev
1827 revornode = rev
1828 if revornode is None:
1828 if revornode is None:
1829 revornode = templatefilters.short(hex(node))
1829 revornode = templatefilters.short(hex(node))
1830 raise RevlogError(_("integrity check failed on %s:%s")
1830 raise RevlogError(_("integrity check failed on %s:%s")
1831 % (self.indexfile, pycompat.bytestr(revornode)))
1831 % (self.indexfile, pycompat.bytestr(revornode)))
1832
1832
1833 def checkinlinesize(self, tr, fp=None):
1833 def checkinlinesize(self, tr, fp=None):
1834 """Check if the revlog is too big for inline and convert if so.
1834 """Check if the revlog is too big for inline and convert if so.
1835
1835
1836 This should be called after revisions are added to the revlog. If the
1836 This should be called after revisions are added to the revlog. If the
1837 revlog has grown too large to be an inline revlog, it will convert it
1837 revlog has grown too large to be an inline revlog, it will convert it
1838 to use multiple index and data files.
1838 to use multiple index and data files.
1839 """
1839 """
1840 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1840 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1841 return
1841 return
1842
1842
1843 trinfo = tr.find(self.indexfile)
1843 trinfo = tr.find(self.indexfile)
1844 if trinfo is None:
1844 if trinfo is None:
1845 raise RevlogError(_("%s not found in the transaction")
1845 raise RevlogError(_("%s not found in the transaction")
1846 % self.indexfile)
1846 % self.indexfile)
1847
1847
1848 trindex = trinfo[2]
1848 trindex = trinfo[2]
1849 if trindex is not None:
1849 if trindex is not None:
1850 dataoff = self.start(trindex)
1850 dataoff = self.start(trindex)
1851 else:
1851 else:
1852 # revlog was stripped at start of transaction, use all leftover data
1852 # revlog was stripped at start of transaction, use all leftover data
1853 trindex = len(self) - 1
1853 trindex = len(self) - 1
1854 dataoff = self.end(-2)
1854 dataoff = self.end(-2)
1855
1855
1856 tr.add(self.datafile, dataoff)
1856 tr.add(self.datafile, dataoff)
1857
1857
1858 if fp:
1858 if fp:
1859 fp.flush()
1859 fp.flush()
1860 fp.close()
1860 fp.close()
1861
1861
1862 with self._datafp('w') as df:
1862 with self._datafp('w') as df:
1863 for r in self:
1863 for r in self:
1864 df.write(self._getsegmentforrevs(r, r)[1])
1864 df.write(self._getsegmentforrevs(r, r)[1])
1865
1865
1866 with self._indexfp('w') as fp:
1866 with self._indexfp('w') as fp:
1867 self.version &= ~FLAG_INLINE_DATA
1867 self.version &= ~FLAG_INLINE_DATA
1868 self._inline = False
1868 self._inline = False
1869 io = self._io
1869 io = self._io
1870 for i in self:
1870 for i in self:
1871 e = io.packentry(self.index[i], self.node, self.version, i)
1871 e = io.packentry(self.index[i], self.node, self.version, i)
1872 fp.write(e)
1872 fp.write(e)
1873
1873
1874 # the temp file replace the real index when we exit the context
1874 # the temp file replace the real index when we exit the context
1875 # manager
1875 # manager
1876
1876
1877 tr.replace(self.indexfile, trindex * self._io.size)
1877 tr.replace(self.indexfile, trindex * self._io.size)
1878 self._chunkclear()
1878 self._chunkclear()
1879
1879
1880 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1880 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1881 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1881 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1882 """add a revision to the log
1882 """add a revision to the log
1883
1883
1884 text - the revision data to add
1884 text - the revision data to add
1885 transaction - the transaction object used for rollback
1885 transaction - the transaction object used for rollback
1886 link - the linkrev data to add
1886 link - the linkrev data to add
1887 p1, p2 - the parent nodeids of the revision
1887 p1, p2 - the parent nodeids of the revision
1888 cachedelta - an optional precomputed delta
1888 cachedelta - an optional precomputed delta
1889 node - nodeid of revision; typically node is not specified, and it is
1889 node - nodeid of revision; typically node is not specified, and it is
1890 computed by default as hash(text, p1, p2), however subclasses might
1890 computed by default as hash(text, p1, p2), however subclasses might
1891 use different hashing method (and override checkhash() in such case)
1891 use different hashing method (and override checkhash() in such case)
1892 flags - the known flags to set on the revision
1892 flags - the known flags to set on the revision
1893 deltacomputer - an optional _deltacomputer instance shared between
1893 deltacomputer - an optional _deltacomputer instance shared between
1894 multiple calls
1894 multiple calls
1895 """
1895 """
1896 if link == nullrev:
1896 if link == nullrev:
1897 raise RevlogError(_("attempted to add linkrev -1 to %s")
1897 raise RevlogError(_("attempted to add linkrev -1 to %s")
1898 % self.indexfile)
1898 % self.indexfile)
1899
1899
1900 if flags:
1900 if flags:
1901 node = node or self.hash(text, p1, p2)
1901 node = node or self.hash(text, p1, p2)
1902
1902
1903 rawtext, validatehash = self._processflags(text, flags, 'write')
1903 rawtext, validatehash = self._processflags(text, flags, 'write')
1904
1904
1905 # If the flag processor modifies the revision data, ignore any provided
1905 # If the flag processor modifies the revision data, ignore any provided
1906 # cachedelta.
1906 # cachedelta.
1907 if rawtext != text:
1907 if rawtext != text:
1908 cachedelta = None
1908 cachedelta = None
1909
1909
1910 if len(rawtext) > _maxentrysize:
1910 if len(rawtext) > _maxentrysize:
1911 raise RevlogError(
1911 raise RevlogError(
1912 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1912 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1913 % (self.indexfile, len(rawtext)))
1913 % (self.indexfile, len(rawtext)))
1914
1914
1915 node = node or self.hash(rawtext, p1, p2)
1915 node = node or self.hash(rawtext, p1, p2)
1916 if node in self.nodemap:
1916 if node in self.nodemap:
1917 return node
1917 return node
1918
1918
1919 if validatehash:
1919 if validatehash:
1920 self.checkhash(rawtext, node, p1=p1, p2=p2)
1920 self.checkhash(rawtext, node, p1=p1, p2=p2)
1921
1921
1922 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1922 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1923 flags, cachedelta=cachedelta,
1923 flags, cachedelta=cachedelta,
1924 deltacomputer=deltacomputer)
1924 deltacomputer=deltacomputer)
1925
1925
1926 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1926 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1927 cachedelta=None, deltacomputer=None):
1927 cachedelta=None, deltacomputer=None):
1928 """add a raw revision with known flags, node and parents
1928 """add a raw revision with known flags, node and parents
1929 useful when reusing a revision not stored in this revlog (ex: received
1929 useful when reusing a revision not stored in this revlog (ex: received
1930 over wire, or read from an external bundle).
1930 over wire, or read from an external bundle).
1931 """
1931 """
1932 dfh = None
1932 dfh = None
1933 if not self._inline:
1933 if not self._inline:
1934 dfh = self._datafp("a+")
1934 dfh = self._datafp("a+")
1935 ifh = self._indexfp("a+")
1935 ifh = self._indexfp("a+")
1936 try:
1936 try:
1937 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1937 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1938 flags, cachedelta, ifh, dfh,
1938 flags, cachedelta, ifh, dfh,
1939 deltacomputer=deltacomputer)
1939 deltacomputer=deltacomputer)
1940 finally:
1940 finally:
1941 if dfh:
1941 if dfh:
1942 dfh.close()
1942 dfh.close()
1943 ifh.close()
1943 ifh.close()
1944
1944
1945 def compress(self, data):
1945 def compress(self, data):
1946 """Generate a possibly-compressed representation of data."""
1946 """Generate a possibly-compressed representation of data."""
1947 if not data:
1947 if not data:
1948 return '', data
1948 return '', data
1949
1949
1950 compressed = self._compressor.compress(data)
1950 compressed = self._compressor.compress(data)
1951
1951
1952 if compressed:
1952 if compressed:
1953 # The revlog compressor added the header in the returned data.
1953 # The revlog compressor added the header in the returned data.
1954 return '', compressed
1954 return '', compressed
1955
1955
1956 if data[0:1] == '\0':
1956 if data[0:1] == '\0':
1957 return '', data
1957 return '', data
1958 return 'u', data
1958 return 'u', data
1959
1959
1960 def decompress(self, data):
1960 def decompress(self, data):
1961 """Decompress a revlog chunk.
1961 """Decompress a revlog chunk.
1962
1962
1963 The chunk is expected to begin with a header identifying the
1963 The chunk is expected to begin with a header identifying the
1964 format type so it can be routed to an appropriate decompressor.
1964 format type so it can be routed to an appropriate decompressor.
1965 """
1965 """
1966 if not data:
1966 if not data:
1967 return data
1967 return data
1968
1968
1969 # Revlogs are read much more frequently than they are written and many
1969 # Revlogs are read much more frequently than they are written and many
1970 # chunks only take microseconds to decompress, so performance is
1970 # chunks only take microseconds to decompress, so performance is
1971 # important here.
1971 # important here.
1972 #
1972 #
1973 # We can make a few assumptions about revlogs:
1973 # We can make a few assumptions about revlogs:
1974 #
1974 #
1975 # 1) the majority of chunks will be compressed (as opposed to inline
1975 # 1) the majority of chunks will be compressed (as opposed to inline
1976 # raw data).
1976 # raw data).
1977 # 2) decompressing *any* data will likely by at least 10x slower than
1977 # 2) decompressing *any* data will likely by at least 10x slower than
1978 # returning raw inline data.
1978 # returning raw inline data.
1979 # 3) we want to prioritize common and officially supported compression
1979 # 3) we want to prioritize common and officially supported compression
1980 # engines
1980 # engines
1981 #
1981 #
1982 # It follows that we want to optimize for "decompress compressed data
1982 # It follows that we want to optimize for "decompress compressed data
1983 # when encoded with common and officially supported compression engines"
1983 # when encoded with common and officially supported compression engines"
1984 # case over "raw data" and "data encoded by less common or non-official
1984 # case over "raw data" and "data encoded by less common or non-official
1985 # compression engines." That is why we have the inline lookup first
1985 # compression engines." That is why we have the inline lookup first
1986 # followed by the compengines lookup.
1986 # followed by the compengines lookup.
1987 #
1987 #
1988 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1988 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1989 # compressed chunks. And this matters for changelog and manifest reads.
1989 # compressed chunks. And this matters for changelog and manifest reads.
1990 t = data[0:1]
1990 t = data[0:1]
1991
1991
1992 if t == 'x':
1992 if t == 'x':
1993 try:
1993 try:
1994 return _zlibdecompress(data)
1994 return _zlibdecompress(data)
1995 except zlib.error as e:
1995 except zlib.error as e:
1996 raise RevlogError(_('revlog decompress error: %s') % str(e))
1996 raise RevlogError(_('revlog decompress error: %s') % str(e))
1997 # '\0' is more common than 'u' so it goes first.
1997 # '\0' is more common than 'u' so it goes first.
1998 elif t == '\0':
1998 elif t == '\0':
1999 return data
1999 return data
2000 elif t == 'u':
2000 elif t == 'u':
2001 return util.buffer(data, 1)
2001 return util.buffer(data, 1)
2002
2002
2003 try:
2003 try:
2004 compressor = self._decompressors[t]
2004 compressor = self._decompressors[t]
2005 except KeyError:
2005 except KeyError:
2006 try:
2006 try:
2007 engine = util.compengines.forrevlogheader(t)
2007 engine = util.compengines.forrevlogheader(t)
2008 compressor = engine.revlogcompressor()
2008 compressor = engine.revlogcompressor()
2009 self._decompressors[t] = compressor
2009 self._decompressors[t] = compressor
2010 except KeyError:
2010 except KeyError:
2011 raise RevlogError(_('unknown compression type %r') % t)
2011 raise RevlogError(_('unknown compression type %r') % t)
2012
2012
2013 return compressor.decompress(data)
2013 return compressor.decompress(data)
2014
2014
2015 def _isgooddeltainfo(self, d, textlen):
2015 def _isgooddeltainfo(self, d, textlen):
2016 """Returns True if the given delta is good. Good means that it is within
2016 """Returns True if the given delta is good. Good means that it is within
2017 the disk span, disk size, and chain length bounds that we know to be
2017 the disk span, disk size, and chain length bounds that we know to be
2018 performant."""
2018 performant."""
2019 if d is None:
2019 if d is None:
2020 return False
2020 return False
2021
2021
2022 # - 'd.distance' is the distance from the base revision -- bounding it
2022 # - 'd.distance' is the distance from the base revision -- bounding it
2023 # limits the amount of I/O we need to do.
2023 # limits the amount of I/O we need to do.
2024 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2024 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2025 # need to apply -- bounding it limits the amount of CPU we consume.
2025 # need to apply -- bounding it limits the amount of CPU we consume.
2026
2026
2027 defaultmax = textlen * 4
2027 defaultmax = textlen * 4
2028 maxdist = self._maxdeltachainspan
2028 maxdist = self._maxdeltachainspan
2029 if not maxdist:
2029 if not maxdist:
2030 maxdist = d.distance # ensure the conditional pass
2030 maxdist = d.distance # ensure the conditional pass
2031 maxdist = max(maxdist, defaultmax)
2031 maxdist = max(maxdist, defaultmax)
2032 if (d.distance > maxdist or d.deltalen > textlen or
2032 if (d.distance > maxdist or d.deltalen > textlen or
2033 d.compresseddeltalen > textlen * 2 or
2033 d.compresseddeltalen > textlen * 2 or
2034 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2034 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2035 return False
2035 return False
2036
2036
2037 return True
2037 return True
2038
2038
2039 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2039 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2040 cachedelta, ifh, dfh, alwayscache=False,
2040 cachedelta, ifh, dfh, alwayscache=False,
2041 deltacomputer=None):
2041 deltacomputer=None):
2042 """internal function to add revisions to the log
2042 """internal function to add revisions to the log
2043
2043
2044 see addrevision for argument descriptions.
2044 see addrevision for argument descriptions.
2045
2045
2046 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2046 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2047
2047
2048 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2048 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2049 be used.
2049 be used.
2050
2050
2051 invariants:
2051 invariants:
2052 - rawtext is optional (can be None); if not set, cachedelta must be set.
2052 - rawtext is optional (can be None); if not set, cachedelta must be set.
2053 if both are set, they must correspond to each other.
2053 if both are set, they must correspond to each other.
2054 """
2054 """
2055 if node == nullid:
2055 if node == nullid:
2056 raise RevlogError(_("%s: attempt to add null revision") %
2056 raise RevlogError(_("%s: attempt to add null revision") %
2057 (self.indexfile))
2057 (self.indexfile))
2058 if node == wdirid:
2058 if node == wdirid:
2059 raise RevlogError(_("%s: attempt to add wdir revision") %
2059 raise RevlogError(_("%s: attempt to add wdir revision") %
2060 (self.indexfile))
2060 (self.indexfile))
2061
2061
2062 if self._inline:
2062 if self._inline:
2063 fh = ifh
2063 fh = ifh
2064 else:
2064 else:
2065 fh = dfh
2065 fh = dfh
2066
2066
2067 btext = [rawtext]
2067 btext = [rawtext]
2068
2068
2069 curr = len(self)
2069 curr = len(self)
2070 prev = curr - 1
2070 prev = curr - 1
2071 offset = self.end(prev)
2071 offset = self.end(prev)
2072 p1r, p2r = self.rev(p1), self.rev(p2)
2072 p1r, p2r = self.rev(p1), self.rev(p2)
2073
2073
2074 # full versions are inserted when the needed deltas
2074 # full versions are inserted when the needed deltas
2075 # become comparable to the uncompressed text
2075 # become comparable to the uncompressed text
2076 if rawtext is None:
2076 if rawtext is None:
2077 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2077 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2078 cachedelta[1])
2078 cachedelta[1])
2079 else:
2079 else:
2080 textlen = len(rawtext)
2080 textlen = len(rawtext)
2081
2081
2082 if deltacomputer is None:
2082 if deltacomputer is None:
2083 deltacomputer = _deltacomputer(self)
2083 deltacomputer = _deltacomputer(self)
2084
2084
2085 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2085 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2086 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2086 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2087
2087
2088 if deltainfo is not None:
2088 if deltainfo is not None:
2089 base = deltainfo.base
2089 base = deltainfo.base
2090 chainbase = deltainfo.chainbase
2090 chainbase = deltainfo.chainbase
2091 data = deltainfo.data
2091 data = deltainfo.data
2092 l = deltainfo.deltalen
2092 l = deltainfo.deltalen
2093 else:
2093 else:
2094 rawtext = deltacomputer.buildtext(revinfo, fh)
2094 rawtext = deltacomputer.buildtext(revinfo, fh)
2095 data = self.compress(rawtext)
2095 data = self.compress(rawtext)
2096 l = len(data[1]) + len(data[0])
2096 l = len(data[1]) + len(data[0])
2097 base = chainbase = curr
2097 base = chainbase = curr
2098
2098
2099 e = (offset_type(offset, flags), l, textlen,
2099 e = (offset_type(offset, flags), l, textlen,
2100 base, link, p1r, p2r, node)
2100 base, link, p1r, p2r, node)
2101 self.index.insert(-1, e)
2101 self.index.insert(-1, e)
2102 self.nodemap[node] = curr
2102 self.nodemap[node] = curr
2103
2103
2104 entry = self._io.packentry(e, self.node, self.version, curr)
2104 entry = self._io.packentry(e, self.node, self.version, curr)
2105 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2105 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2106
2106
2107 if alwayscache and rawtext is None:
2107 if alwayscache and rawtext is None:
2108 rawtext = deltacomputer._buildtext(revinfo, fh)
2108 rawtext = deltacomputer._buildtext(revinfo, fh)
2109
2109
2110 if type(rawtext) == bytes: # only accept immutable objects
2110 if type(rawtext) == bytes: # only accept immutable objects
2111 self._cache = (node, curr, rawtext)
2111 self._cache = (node, curr, rawtext)
2112 self._chainbasecache[curr] = chainbase
2112 self._chainbasecache[curr] = chainbase
2113 return node
2113 return node
2114
2114
2115 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2115 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2116 # Files opened in a+ mode have inconsistent behavior on various
2116 # Files opened in a+ mode have inconsistent behavior on various
2117 # platforms. Windows requires that a file positioning call be made
2117 # platforms. Windows requires that a file positioning call be made
2118 # when the file handle transitions between reads and writes. See
2118 # when the file handle transitions between reads and writes. See
2119 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2119 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2120 # platforms, Python or the platform itself can be buggy. Some versions
2120 # platforms, Python or the platform itself can be buggy. Some versions
2121 # of Solaris have been observed to not append at the end of the file
2121 # of Solaris have been observed to not append at the end of the file
2122 # if the file was seeked to before the end. See issue4943 for more.
2122 # if the file was seeked to before the end. See issue4943 for more.
2123 #
2123 #
2124 # We work around this issue by inserting a seek() before writing.
2124 # We work around this issue by inserting a seek() before writing.
2125 # Note: This is likely not necessary on Python 3.
2125 # Note: This is likely not necessary on Python 3.
2126 ifh.seek(0, os.SEEK_END)
2126 ifh.seek(0, os.SEEK_END)
2127 if dfh:
2127 if dfh:
2128 dfh.seek(0, os.SEEK_END)
2128 dfh.seek(0, os.SEEK_END)
2129
2129
2130 curr = len(self) - 1
2130 curr = len(self) - 1
2131 if not self._inline:
2131 if not self._inline:
2132 transaction.add(self.datafile, offset)
2132 transaction.add(self.datafile, offset)
2133 transaction.add(self.indexfile, curr * len(entry))
2133 transaction.add(self.indexfile, curr * len(entry))
2134 if data[0]:
2134 if data[0]:
2135 dfh.write(data[0])
2135 dfh.write(data[0])
2136 dfh.write(data[1])
2136 dfh.write(data[1])
2137 ifh.write(entry)
2137 ifh.write(entry)
2138 else:
2138 else:
2139 offset += curr * self._io.size
2139 offset += curr * self._io.size
2140 transaction.add(self.indexfile, offset, curr)
2140 transaction.add(self.indexfile, offset, curr)
2141 ifh.write(entry)
2141 ifh.write(entry)
2142 ifh.write(data[0])
2142 ifh.write(data[0])
2143 ifh.write(data[1])
2143 ifh.write(data[1])
2144 self.checkinlinesize(transaction, ifh)
2144 self.checkinlinesize(transaction, ifh)
2145
2145
2146 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2146 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2147 """
2147 """
2148 add a delta group
2148 add a delta group
2149
2149
2150 given a set of deltas, add them to the revision log. the
2150 given a set of deltas, add them to the revision log. the
2151 first delta is against its parent, which should be in our
2151 first delta is against its parent, which should be in our
2152 log, the rest are against the previous delta.
2152 log, the rest are against the previous delta.
2153
2153
2154 If ``addrevisioncb`` is defined, it will be called with arguments of
2154 If ``addrevisioncb`` is defined, it will be called with arguments of
2155 this revlog and the node that was added.
2155 this revlog and the node that was added.
2156 """
2156 """
2157
2157
2158 nodes = []
2158 nodes = []
2159
2159
2160 r = len(self)
2160 r = len(self)
2161 end = 0
2161 end = 0
2162 if r:
2162 if r:
2163 end = self.end(r - 1)
2163 end = self.end(r - 1)
2164 ifh = self._indexfp("a+")
2164 ifh = self._indexfp("a+")
2165 isize = r * self._io.size
2165 isize = r * self._io.size
2166 if self._inline:
2166 if self._inline:
2167 transaction.add(self.indexfile, end + isize, r)
2167 transaction.add(self.indexfile, end + isize, r)
2168 dfh = None
2168 dfh = None
2169 else:
2169 else:
2170 transaction.add(self.indexfile, isize, r)
2170 transaction.add(self.indexfile, isize, r)
2171 transaction.add(self.datafile, end)
2171 transaction.add(self.datafile, end)
2172 dfh = self._datafp("a+")
2172 dfh = self._datafp("a+")
2173 def flush():
2173 def flush():
2174 if dfh:
2174 if dfh:
2175 dfh.flush()
2175 dfh.flush()
2176 ifh.flush()
2176 ifh.flush()
2177 try:
2177 try:
2178 deltacomputer = _deltacomputer(self)
2178 deltacomputer = _deltacomputer(self)
2179 # loop through our set of deltas
2179 # loop through our set of deltas
2180 for data in deltas:
2180 for data in deltas:
2181 node, p1, p2, linknode, deltabase, delta, flags = data
2181 node, p1, p2, linknode, deltabase, delta, flags = data
2182 link = linkmapper(linknode)
2182 link = linkmapper(linknode)
2183 flags = flags or REVIDX_DEFAULT_FLAGS
2183 flags = flags or REVIDX_DEFAULT_FLAGS
2184
2184
2185 nodes.append(node)
2185 nodes.append(node)
2186
2186
2187 if node in self.nodemap:
2187 if node in self.nodemap:
2188 # this can happen if two branches make the same change
2188 # this can happen if two branches make the same change
2189 continue
2189 continue
2190
2190
2191 for p in (p1, p2):
2191 for p in (p1, p2):
2192 if p not in self.nodemap:
2192 if p not in self.nodemap:
2193 raise LookupError(p, self.indexfile,
2193 raise LookupError(p, self.indexfile,
2194 _('unknown parent'))
2194 _('unknown parent'))
2195
2195
2196 if deltabase not in self.nodemap:
2196 if deltabase not in self.nodemap:
2197 raise LookupError(deltabase, self.indexfile,
2197 raise LookupError(deltabase, self.indexfile,
2198 _('unknown delta base'))
2198 _('unknown delta base'))
2199
2199
2200 baserev = self.rev(deltabase)
2200 baserev = self.rev(deltabase)
2201
2201
2202 if baserev != nullrev and self.iscensored(baserev):
2202 if baserev != nullrev and self.iscensored(baserev):
2203 # if base is censored, delta must be full replacement in a
2203 # if base is censored, delta must be full replacement in a
2204 # single patch operation
2204 # single patch operation
2205 hlen = struct.calcsize(">lll")
2205 hlen = struct.calcsize(">lll")
2206 oldlen = self.rawsize(baserev)
2206 oldlen = self.rawsize(baserev)
2207 newlen = len(delta) - hlen
2207 newlen = len(delta) - hlen
2208 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2208 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2209 raise error.CensoredBaseError(self.indexfile,
2209 raise error.CensoredBaseError(self.indexfile,
2210 self.node(baserev))
2210 self.node(baserev))
2211
2211
2212 if not flags and self._peek_iscensored(baserev, delta, flush):
2212 if not flags and self._peek_iscensored(baserev, delta, flush):
2213 flags |= REVIDX_ISCENSORED
2213 flags |= REVIDX_ISCENSORED
2214
2214
2215 # We assume consumers of addrevisioncb will want to retrieve
2215 # We assume consumers of addrevisioncb will want to retrieve
2216 # the added revision, which will require a call to
2216 # the added revision, which will require a call to
2217 # revision(). revision() will fast path if there is a cache
2217 # revision(). revision() will fast path if there is a cache
2218 # hit. So, we tell _addrevision() to always cache in this case.
2218 # hit. So, we tell _addrevision() to always cache in this case.
2219 # We're only using addgroup() in the context of changegroup
2219 # We're only using addgroup() in the context of changegroup
2220 # generation so the revision data can always be handled as raw
2220 # generation so the revision data can always be handled as raw
2221 # by the flagprocessor.
2221 # by the flagprocessor.
2222 self._addrevision(node, None, transaction, link,
2222 self._addrevision(node, None, transaction, link,
2223 p1, p2, flags, (baserev, delta),
2223 p1, p2, flags, (baserev, delta),
2224 ifh, dfh,
2224 ifh, dfh,
2225 alwayscache=bool(addrevisioncb),
2225 alwayscache=bool(addrevisioncb),
2226 deltacomputer=deltacomputer)
2226 deltacomputer=deltacomputer)
2227
2227
2228 if addrevisioncb:
2228 if addrevisioncb:
2229 addrevisioncb(self, node)
2229 addrevisioncb(self, node)
2230
2230
2231 if not dfh and not self._inline:
2231 if not dfh and not self._inline:
2232 # addrevision switched from inline to conventional
2232 # addrevision switched from inline to conventional
2233 # reopen the index
2233 # reopen the index
2234 ifh.close()
2234 ifh.close()
2235 dfh = self._datafp("a+")
2235 dfh = self._datafp("a+")
2236 ifh = self._indexfp("a+")
2236 ifh = self._indexfp("a+")
2237 finally:
2237 finally:
2238 if dfh:
2238 if dfh:
2239 dfh.close()
2239 dfh.close()
2240 ifh.close()
2240 ifh.close()
2241
2241
2242 return nodes
2242 return nodes
2243
2243
2244 def iscensored(self, rev):
2244 def iscensored(self, rev):
2245 """Check if a file revision is censored."""
2245 """Check if a file revision is censored."""
2246 return False
2246 return False
2247
2247
2248 def _peek_iscensored(self, baserev, delta, flush):
2248 def _peek_iscensored(self, baserev, delta, flush):
2249 """Quickly check if a delta produces a censored revision."""
2249 """Quickly check if a delta produces a censored revision."""
2250 return False
2250 return False
2251
2251
2252 def getstrippoint(self, minlink):
2252 def getstrippoint(self, minlink):
2253 """find the minimum rev that must be stripped to strip the linkrev
2253 """find the minimum rev that must be stripped to strip the linkrev
2254
2254
2255 Returns a tuple containing the minimum rev and a set of all revs that
2255 Returns a tuple containing the minimum rev and a set of all revs that
2256 have linkrevs that will be broken by this strip.
2256 have linkrevs that will be broken by this strip.
2257 """
2257 """
2258 brokenrevs = set()
2258 brokenrevs = set()
2259 strippoint = len(self)
2259 strippoint = len(self)
2260
2260
2261 heads = {}
2261 heads = {}
2262 futurelargelinkrevs = set()
2262 futurelargelinkrevs = set()
2263 for head in self.headrevs():
2263 for head in self.headrevs():
2264 headlinkrev = self.linkrev(head)
2264 headlinkrev = self.linkrev(head)
2265 heads[head] = headlinkrev
2265 heads[head] = headlinkrev
2266 if headlinkrev >= minlink:
2266 if headlinkrev >= minlink:
2267 futurelargelinkrevs.add(headlinkrev)
2267 futurelargelinkrevs.add(headlinkrev)
2268
2268
2269 # This algorithm involves walking down the rev graph, starting at the
2269 # This algorithm involves walking down the rev graph, starting at the
2270 # heads. Since the revs are topologically sorted according to linkrev,
2270 # heads. Since the revs are topologically sorted according to linkrev,
2271 # once all head linkrevs are below the minlink, we know there are
2271 # once all head linkrevs are below the minlink, we know there are
2272 # no more revs that could have a linkrev greater than minlink.
2272 # no more revs that could have a linkrev greater than minlink.
2273 # So we can stop walking.
2273 # So we can stop walking.
2274 while futurelargelinkrevs:
2274 while futurelargelinkrevs:
2275 strippoint -= 1
2275 strippoint -= 1
2276 linkrev = heads.pop(strippoint)
2276 linkrev = heads.pop(strippoint)
2277
2277
2278 if linkrev < minlink:
2278 if linkrev < minlink:
2279 brokenrevs.add(strippoint)
2279 brokenrevs.add(strippoint)
2280 else:
2280 else:
2281 futurelargelinkrevs.remove(linkrev)
2281 futurelargelinkrevs.remove(linkrev)
2282
2282
2283 for p in self.parentrevs(strippoint):
2283 for p in self.parentrevs(strippoint):
2284 if p != nullrev:
2284 if p != nullrev:
2285 plinkrev = self.linkrev(p)
2285 plinkrev = self.linkrev(p)
2286 heads[p] = plinkrev
2286 heads[p] = plinkrev
2287 if plinkrev >= minlink:
2287 if plinkrev >= minlink:
2288 futurelargelinkrevs.add(plinkrev)
2288 futurelargelinkrevs.add(plinkrev)
2289
2289
2290 return strippoint, brokenrevs
2290 return strippoint, brokenrevs
2291
2291
2292 def strip(self, minlink, transaction):
2292 def strip(self, minlink, transaction):
2293 """truncate the revlog on the first revision with a linkrev >= minlink
2293 """truncate the revlog on the first revision with a linkrev >= minlink
2294
2294
2295 This function is called when we're stripping revision minlink and
2295 This function is called when we're stripping revision minlink and
2296 its descendants from the repository.
2296 its descendants from the repository.
2297
2297
2298 We have to remove all revisions with linkrev >= minlink, because
2298 We have to remove all revisions with linkrev >= minlink, because
2299 the equivalent changelog revisions will be renumbered after the
2299 the equivalent changelog revisions will be renumbered after the
2300 strip.
2300 strip.
2301
2301
2302 So we truncate the revlog on the first of these revisions, and
2302 So we truncate the revlog on the first of these revisions, and
2303 trust that the caller has saved the revisions that shouldn't be
2303 trust that the caller has saved the revisions that shouldn't be
2304 removed and that it'll re-add them after this truncation.
2304 removed and that it'll re-add them after this truncation.
2305 """
2305 """
2306 if len(self) == 0:
2306 if len(self) == 0:
2307 return
2307 return
2308
2308
2309 rev, _ = self.getstrippoint(minlink)
2309 rev, _ = self.getstrippoint(minlink)
2310 if rev == len(self):
2310 if rev == len(self):
2311 return
2311 return
2312
2312
2313 # first truncate the files on disk
2313 # first truncate the files on disk
2314 end = self.start(rev)
2314 end = self.start(rev)
2315 if not self._inline:
2315 if not self._inline:
2316 transaction.add(self.datafile, end)
2316 transaction.add(self.datafile, end)
2317 end = rev * self._io.size
2317 end = rev * self._io.size
2318 else:
2318 else:
2319 end += rev * self._io.size
2319 end += rev * self._io.size
2320
2320
2321 transaction.add(self.indexfile, end)
2321 transaction.add(self.indexfile, end)
2322
2322
2323 # then reset internal state in memory to forget those revisions
2323 # then reset internal state in memory to forget those revisions
2324 self._cache = None
2324 self._cache = None
2325 self._chaininfocache = {}
2325 self._chaininfocache = {}
2326 self._chunkclear()
2326 self._chunkclear()
2327 for x in xrange(rev, len(self)):
2327 for x in xrange(rev, len(self)):
2328 del self.nodemap[self.node(x)]
2328 del self.nodemap[self.node(x)]
2329
2329
2330 del self.index[rev:-1]
2330 del self.index[rev:-1]
2331
2331
2332 def checksize(self):
2332 def checksize(self):
2333 expected = 0
2333 expected = 0
2334 if len(self):
2334 if len(self):
2335 expected = max(0, self.end(len(self) - 1))
2335 expected = max(0, self.end(len(self) - 1))
2336
2336
2337 try:
2337 try:
2338 f = self._datafp()
2338 with self._datafp() as f:
2339 f.seek(0, 2)
2339 f.seek(0, 2)
2340 actual = f.tell()
2340 actual = f.tell()
2341 f.close()
2342 dd = actual - expected
2341 dd = actual - expected
2343 except IOError as inst:
2342 except IOError as inst:
2344 if inst.errno != errno.ENOENT:
2343 if inst.errno != errno.ENOENT:
2345 raise
2344 raise
2346 dd = 0
2345 dd = 0
2347
2346
2348 try:
2347 try:
2349 f = self.opener(self.indexfile)
2348 f = self.opener(self.indexfile)
2350 f.seek(0, 2)
2349 f.seek(0, 2)
2351 actual = f.tell()
2350 actual = f.tell()
2352 f.close()
2351 f.close()
2353 s = self._io.size
2352 s = self._io.size
2354 i = max(0, actual // s)
2353 i = max(0, actual // s)
2355 di = actual - (i * s)
2354 di = actual - (i * s)
2356 if self._inline:
2355 if self._inline:
2357 databytes = 0
2356 databytes = 0
2358 for r in self:
2357 for r in self:
2359 databytes += max(0, self.length(r))
2358 databytes += max(0, self.length(r))
2360 dd = 0
2359 dd = 0
2361 di = actual - len(self) * s - databytes
2360 di = actual - len(self) * s - databytes
2362 except IOError as inst:
2361 except IOError as inst:
2363 if inst.errno != errno.ENOENT:
2362 if inst.errno != errno.ENOENT:
2364 raise
2363 raise
2365 di = 0
2364 di = 0
2366
2365
2367 return (dd, di)
2366 return (dd, di)
2368
2367
2369 def files(self):
2368 def files(self):
2370 res = [self.indexfile]
2369 res = [self.indexfile]
2371 if not self._inline:
2370 if not self._inline:
2372 res.append(self.datafile)
2371 res.append(self.datafile)
2373 return res
2372 return res
2374
2373
2375 DELTAREUSEALWAYS = 'always'
2374 DELTAREUSEALWAYS = 'always'
2376 DELTAREUSESAMEREVS = 'samerevs'
2375 DELTAREUSESAMEREVS = 'samerevs'
2377 DELTAREUSENEVER = 'never'
2376 DELTAREUSENEVER = 'never'
2378
2377
2379 DELTAREUSEFULLADD = 'fulladd'
2378 DELTAREUSEFULLADD = 'fulladd'
2380
2379
2381 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2380 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2382
2381
2383 def clone(self, tr, destrevlog, addrevisioncb=None,
2382 def clone(self, tr, destrevlog, addrevisioncb=None,
2384 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2383 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2385 """Copy this revlog to another, possibly with format changes.
2384 """Copy this revlog to another, possibly with format changes.
2386
2385
2387 The destination revlog will contain the same revisions and nodes.
2386 The destination revlog will contain the same revisions and nodes.
2388 However, it may not be bit-for-bit identical due to e.g. delta encoding
2387 However, it may not be bit-for-bit identical due to e.g. delta encoding
2389 differences.
2388 differences.
2390
2389
2391 The ``deltareuse`` argument control how deltas from the existing revlog
2390 The ``deltareuse`` argument control how deltas from the existing revlog
2392 are preserved in the destination revlog. The argument can have the
2391 are preserved in the destination revlog. The argument can have the
2393 following values:
2392 following values:
2394
2393
2395 DELTAREUSEALWAYS
2394 DELTAREUSEALWAYS
2396 Deltas will always be reused (if possible), even if the destination
2395 Deltas will always be reused (if possible), even if the destination
2397 revlog would not select the same revisions for the delta. This is the
2396 revlog would not select the same revisions for the delta. This is the
2398 fastest mode of operation.
2397 fastest mode of operation.
2399 DELTAREUSESAMEREVS
2398 DELTAREUSESAMEREVS
2400 Deltas will be reused if the destination revlog would pick the same
2399 Deltas will be reused if the destination revlog would pick the same
2401 revisions for the delta. This mode strikes a balance between speed
2400 revisions for the delta. This mode strikes a balance between speed
2402 and optimization.
2401 and optimization.
2403 DELTAREUSENEVER
2402 DELTAREUSENEVER
2404 Deltas will never be reused. This is the slowest mode of execution.
2403 Deltas will never be reused. This is the slowest mode of execution.
2405 This mode can be used to recompute deltas (e.g. if the diff/delta
2404 This mode can be used to recompute deltas (e.g. if the diff/delta
2406 algorithm changes).
2405 algorithm changes).
2407
2406
2408 Delta computation can be slow, so the choice of delta reuse policy can
2407 Delta computation can be slow, so the choice of delta reuse policy can
2409 significantly affect run time.
2408 significantly affect run time.
2410
2409
2411 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2410 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2412 two extremes. Deltas will be reused if they are appropriate. But if the
2411 two extremes. Deltas will be reused if they are appropriate. But if the
2413 delta could choose a better revision, it will do so. This means if you
2412 delta could choose a better revision, it will do so. This means if you
2414 are converting a non-generaldelta revlog to a generaldelta revlog,
2413 are converting a non-generaldelta revlog to a generaldelta revlog,
2415 deltas will be recomputed if the delta's parent isn't a parent of the
2414 deltas will be recomputed if the delta's parent isn't a parent of the
2416 revision.
2415 revision.
2417
2416
2418 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2417 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2419 controls whether to compute deltas against both parents for merges.
2418 controls whether to compute deltas against both parents for merges.
2420 By default, the current default is used.
2419 By default, the current default is used.
2421 """
2420 """
2422 if deltareuse not in self.DELTAREUSEALL:
2421 if deltareuse not in self.DELTAREUSEALL:
2423 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2422 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2424
2423
2425 if len(destrevlog):
2424 if len(destrevlog):
2426 raise ValueError(_('destination revlog is not empty'))
2425 raise ValueError(_('destination revlog is not empty'))
2427
2426
2428 if getattr(self, 'filteredrevs', None):
2427 if getattr(self, 'filteredrevs', None):
2429 raise ValueError(_('source revlog has filtered revisions'))
2428 raise ValueError(_('source revlog has filtered revisions'))
2430 if getattr(destrevlog, 'filteredrevs', None):
2429 if getattr(destrevlog, 'filteredrevs', None):
2431 raise ValueError(_('destination revlog has filtered revisions'))
2430 raise ValueError(_('destination revlog has filtered revisions'))
2432
2431
2433 # lazydeltabase controls whether to reuse a cached delta, if possible.
2432 # lazydeltabase controls whether to reuse a cached delta, if possible.
2434 oldlazydeltabase = destrevlog._lazydeltabase
2433 oldlazydeltabase = destrevlog._lazydeltabase
2435 oldamd = destrevlog._aggressivemergedeltas
2434 oldamd = destrevlog._aggressivemergedeltas
2436
2435
2437 try:
2436 try:
2438 if deltareuse == self.DELTAREUSEALWAYS:
2437 if deltareuse == self.DELTAREUSEALWAYS:
2439 destrevlog._lazydeltabase = True
2438 destrevlog._lazydeltabase = True
2440 elif deltareuse == self.DELTAREUSESAMEREVS:
2439 elif deltareuse == self.DELTAREUSESAMEREVS:
2441 destrevlog._lazydeltabase = False
2440 destrevlog._lazydeltabase = False
2442
2441
2443 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2442 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2444
2443
2445 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2444 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2446 self.DELTAREUSESAMEREVS)
2445 self.DELTAREUSESAMEREVS)
2447
2446
2448 deltacomputer = _deltacomputer(destrevlog)
2447 deltacomputer = _deltacomputer(destrevlog)
2449 index = self.index
2448 index = self.index
2450 for rev in self:
2449 for rev in self:
2451 entry = index[rev]
2450 entry = index[rev]
2452
2451
2453 # Some classes override linkrev to take filtered revs into
2452 # Some classes override linkrev to take filtered revs into
2454 # account. Use raw entry from index.
2453 # account. Use raw entry from index.
2455 flags = entry[0] & 0xffff
2454 flags = entry[0] & 0xffff
2456 linkrev = entry[4]
2455 linkrev = entry[4]
2457 p1 = index[entry[5]][7]
2456 p1 = index[entry[5]][7]
2458 p2 = index[entry[6]][7]
2457 p2 = index[entry[6]][7]
2459 node = entry[7]
2458 node = entry[7]
2460
2459
2461 # (Possibly) reuse the delta from the revlog if allowed and
2460 # (Possibly) reuse the delta from the revlog if allowed and
2462 # the revlog chunk is a delta.
2461 # the revlog chunk is a delta.
2463 cachedelta = None
2462 cachedelta = None
2464 rawtext = None
2463 rawtext = None
2465 if populatecachedelta:
2464 if populatecachedelta:
2466 dp = self.deltaparent(rev)
2465 dp = self.deltaparent(rev)
2467 if dp != nullrev:
2466 if dp != nullrev:
2468 cachedelta = (dp, str(self._chunk(rev)))
2467 cachedelta = (dp, str(self._chunk(rev)))
2469
2468
2470 if not cachedelta:
2469 if not cachedelta:
2471 rawtext = self.revision(rev, raw=True)
2470 rawtext = self.revision(rev, raw=True)
2472
2471
2473
2472
2474 if deltareuse == self.DELTAREUSEFULLADD:
2473 if deltareuse == self.DELTAREUSEFULLADD:
2475 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2474 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2476 cachedelta=cachedelta,
2475 cachedelta=cachedelta,
2477 node=node, flags=flags,
2476 node=node, flags=flags,
2478 deltacomputer=deltacomputer)
2477 deltacomputer=deltacomputer)
2479 else:
2478 else:
2480 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2479 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2481 checkambig=False)
2480 checkambig=False)
2482 dfh = None
2481 dfh = None
2483 if not destrevlog._inline:
2482 if not destrevlog._inline:
2484 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2483 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2485 try:
2484 try:
2486 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2485 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2487 p2, flags, cachedelta, ifh, dfh,
2486 p2, flags, cachedelta, ifh, dfh,
2488 deltacomputer=deltacomputer)
2487 deltacomputer=deltacomputer)
2489 finally:
2488 finally:
2490 if dfh:
2489 if dfh:
2491 dfh.close()
2490 dfh.close()
2492 ifh.close()
2491 ifh.close()
2493
2492
2494 if addrevisioncb:
2493 if addrevisioncb:
2495 addrevisioncb(self, rev, node)
2494 addrevisioncb(self, rev, node)
2496 finally:
2495 finally:
2497 destrevlog._lazydeltabase = oldlazydeltabase
2496 destrevlog._lazydeltabase = oldlazydeltabase
2498 destrevlog._aggressivemergedeltas = oldamd
2497 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now