##// END OF EJS Templates
py3: use bytes() instead of str()...
Pulkit Goyal -
r36739:6bacb2f6 default
parent child Browse files
Show More
@@ -1,2501 +1,2501
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
80
81 # max size of revlog with inline data
81 # max size of revlog with inline data
82 _maxinline = 131072
82 _maxinline = 131072
83 _chunksize = 1048576
83 _chunksize = 1048576
84
84
85 RevlogError = error.RevlogError
85 RevlogError = error.RevlogError
86 LookupError = error.LookupError
86 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
87 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
88 ProgrammingError = error.ProgrammingError
89
89
90 # Store flag processors (cf. 'addflagprocessor()' to register)
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
91 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
92 REVIDX_ISCENSORED: None,
93 }
93 }
94
94
95 def addflagprocessor(flag, processor):
95 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
96 """Register a flag processor on a revision data flag.
97
97
98 Invariant:
98 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
100 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
102 following signatures:
103 - (read) f(self, rawtext) -> text, bool
103 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
105 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
107 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
108 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
109 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
110 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
111 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
112 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
113 different booleans. And "raw" returns a same boolean as "write".
114
114
115 Note: The 'raw' transform is used for changegroup generation and in some
115 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
116 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
117 contents can be used for hash integrity checks.
118 """
118 """
119 if not flag & REVIDX_KNOWN_FLAGS:
119 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
122 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
125 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
127 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
128 _flagprocessors[flag] = processor
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 _nullhash = hashlib.sha1(nullid)
141 _nullhash = hashlib.sha1(nullid)
142
142
143 def hash(text, p1, p2):
143 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
144 """generate a hash from the given text and its parent hashes
145
145
146 This hash combines both the current file contents and its history
146 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
147 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
148 content in the revision graph.
149 """
149 """
150 # As of now, if one of the parent node is null, p2 is null
150 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
151 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
152 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
153 s = _nullhash.copy()
154 s.update(p1)
154 s.update(p1)
155 else:
155 else:
156 # none of the parent nodes are nullid
156 # none of the parent nodes are nullid
157 if p1 < p2:
157 if p1 < p2:
158 a = p1
158 a = p1
159 b = p2
159 b = p2
160 else:
160 else:
161 a = p2
161 a = p2
162 b = p1
162 b = p1
163 s = hashlib.sha1(a)
163 s = hashlib.sha1(a)
164 s.update(b)
164 s.update(b)
165 s.update(text)
165 s.update(text)
166 return s.digest()
166 return s.digest()
167
167
168 def _trimchunk(revlog, revs, startidx, endidx=None):
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
169 """returns revs[startidx:endidx] without empty trailing revs
170 """
170 """
171 length = revlog.length
171 length = revlog.length
172
172
173 if endidx is None:
173 if endidx is None:
174 endidx = len(revs)
174 endidx = len(revs)
175
175
176 # Trim empty revs at the end, but never the very first revision of a chain
176 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
178 endidx -= 1
179
179
180 return revs[startidx:endidx]
180 return revs[startidx:endidx]
181
181
182 def _slicechunk(revlog, revs):
182 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184
184
185 ``revs`` is sliced into groups that should be read in one time.
185 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
186 Assume that revs are sorted.
187 """
187 """
188 start = revlog.start
188 start = revlog.start
189 length = revlog.length
189 length = revlog.length
190
190
191 if len(revs) <= 1:
191 if len(revs) <= 1:
192 yield revs
192 yield revs
193 return
193 return
194
194
195 startbyte = start(revs[0])
195 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
197 readdata = deltachainspan = endbyte - startbyte
198
198
199 chainpayload = sum(length(r) for r in revs)
199 chainpayload = sum(length(r) for r in revs)
200
200
201 if deltachainspan:
201 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
202 density = chainpayload / float(deltachainspan)
203 else:
203 else:
204 density = 1.0
204 density = 1.0
205
205
206 # Store the gaps in a heap to have them sorted by decreasing size
206 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
207 gapsheap = []
208 heapq.heapify(gapsheap)
208 heapq.heapify(gapsheap)
209 prevend = None
209 prevend = None
210 for i, rev in enumerate(revs):
210 for i, rev in enumerate(revs):
211 revstart = start(rev)
211 revstart = start(rev)
212 revlen = length(rev)
212 revlen = length(rev)
213
213
214 # Skip empty revisions to form larger holes
214 # Skip empty revisions to form larger holes
215 if revlen == 0:
215 if revlen == 0:
216 continue
216 continue
217
217
218 if prevend is not None:
218 if prevend is not None:
219 gapsize = revstart - prevend
219 gapsize = revstart - prevend
220 # only consider holes that are large enough
220 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
221 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
222 heapq.heappush(gapsheap, (-gapsize, i))
223
223
224 prevend = revstart + revlen
224 prevend = revstart + revlen
225
225
226 # Collect the indices of the largest holes until the density is acceptable
226 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
227 indicesheap = []
228 heapq.heapify(indicesheap)
228 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
229 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
231
232 heapq.heappush(indicesheap, gapidx)
232 heapq.heappush(indicesheap, gapidx)
233
233
234 # the gap sizes are stored as negatives to be sorted decreasingly
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
235 # by the heap
236 readdata -= (-oppgapsize)
236 readdata -= (-oppgapsize)
237 if readdata > 0:
237 if readdata > 0:
238 density = chainpayload / float(readdata)
238 density = chainpayload / float(readdata)
239 else:
239 else:
240 density = 1.0
240 density = 1.0
241
241
242 # Cut the revs at collected indices
242 # Cut the revs at collected indices
243 previdx = 0
243 previdx = 0
244 while indicesheap:
244 while indicesheap:
245 idx = heapq.heappop(indicesheap)
245 idx = heapq.heappop(indicesheap)
246
246
247 chunk = _trimchunk(revlog, revs, previdx, idx)
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
248 if chunk:
249 yield chunk
249 yield chunk
250
250
251 previdx = idx
251 previdx = idx
252
252
253 chunk = _trimchunk(revlog, revs, previdx)
253 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
254 if chunk:
255 yield chunk
255 yield chunk
256
256
257 @attr.s(slots=True, frozen=True)
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
258 class _deltainfo(object):
259 distance = attr.ib()
259 distance = attr.ib()
260 deltalen = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
265 compresseddeltalen = attr.ib()
266
266
267 class _deltacomputer(object):
267 class _deltacomputer(object):
268 def __init__(self, revlog):
268 def __init__(self, revlog):
269 self.revlog = revlog
269 self.revlog = revlog
270
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
272 """
273 Provides revisions that present an interest to be diffed against,
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
274 grouped by level of easiness.
275 """
275 """
276 revlog = self.revlog
276 revlog = self.revlog
277 curr = len(revlog)
277 curr = len(revlog)
278 prev = curr - 1
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
280
281 # should we try to build a delta?
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
283 tested = set()
284 # This condition is true most of the time when processing
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
292 tested.add(cachedelta[0])
293
293
294 if revlog._generaldelta:
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
301 parents = [max(parents)]
302 tested.update(parents)
302 tested.update(parents)
303 yield parents
303 yield parents
304
304
305 if prev not in tested:
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
307 # fulltext.
308 yield (prev,)
308 yield (prev,)
309
309
310 def buildtext(self, revinfo, fh):
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
311 """Builds a fulltext version of a revision
312
312
313 revinfo: _revisioninfo instance that contains all needed info
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
315 depending on whether it is inlined or not
316 """
316 """
317 btext = revinfo.btext
317 btext = revinfo.btext
318 if btext[0] is not None:
318 if btext[0] is not None:
319 return btext[0]
319 return btext[0]
320
320
321 revlog = self.revlog
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
323 flags = revinfo.flags
324 node = revinfo.node
324 node = revinfo.node
325
325
326 baserev = cachedelta[0]
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
334 btext[0] = delta[hlen:]
335 else:
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
337 btext[0] = mdiff.patch(basetext, delta)
338
338
339 try:
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
341 btext[0], validatehash = res
342 if validatehash:
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
348 if not flags & REVIDX_ISCENSORED:
349 raise
349 raise
350 return btext[0]
350 return btext[0]
351
351
352 def _builddeltadiff(self, base, revinfo, fh):
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
359 delta = header + t
360 else:
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
362 delta = mdiff.textdiff(ptext, t)
363
363
364 return delta
364 return delta
365
365
366 def _builddeltainfo(self, revinfo, base, fh):
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
369 delta = revinfo.cachedelta[1]
370 else:
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
378 if revlog._generaldelta:
379 deltabase = base
379 deltabase = base
380 else:
380 else:
381 deltabase = chainbase
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
383 chainlen += 1
384 compresseddeltalen += deltalen
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
386 chainbase, chainlen, compresseddeltalen)
387
387
388 def finddeltainfo(self, revinfo, fh):
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
389 """Find an acceptable delta against a candidate revision
390
390
391 revinfo: information about the revision (instance of _revisioninfo)
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
393 depending on whether it is inlined or not
394
394
395 Returns the first acceptable candidate revision, as ordered by
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
396 _getcandidaterevs
397 """
397 """
398 cachedelta = revinfo.cachedelta
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
399 p1 = revinfo.p1
400 p2 = revinfo.p2
400 p2 = revinfo.p2
401 revlog = self.revlog
401 revlog = self.revlog
402
402
403 deltainfo = None
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
412 break
413
413
414 return deltainfo
414 return deltainfo
415
415
416 @attr.s(slots=True, frozen=True)
416 @attr.s(slots=True, frozen=True)
417 class _revisioninfo(object):
417 class _revisioninfo(object):
418 """Information about a revision that allows building its fulltext
418 """Information about a revision that allows building its fulltext
419 node: expected hash of the revision
419 node: expected hash of the revision
420 p1, p2: parent revs of the revision
420 p1, p2: parent revs of the revision
421 btext: built text cache consisting of a one-element list
421 btext: built text cache consisting of a one-element list
422 cachedelta: (baserev, uncompressed_delta) or None
422 cachedelta: (baserev, uncompressed_delta) or None
423 flags: flags associated to the revision storage
423 flags: flags associated to the revision storage
424
424
425 One of btext[0] or cachedelta must be set.
425 One of btext[0] or cachedelta must be set.
426 """
426 """
427 node = attr.ib()
427 node = attr.ib()
428 p1 = attr.ib()
428 p1 = attr.ib()
429 p2 = attr.ib()
429 p2 = attr.ib()
430 btext = attr.ib()
430 btext = attr.ib()
431 textlen = attr.ib()
431 textlen = attr.ib()
432 cachedelta = attr.ib()
432 cachedelta = attr.ib()
433 flags = attr.ib()
433 flags = attr.ib()
434
434
435 # index v0:
435 # index v0:
436 # 4 bytes: offset
436 # 4 bytes: offset
437 # 4 bytes: compressed length
437 # 4 bytes: compressed length
438 # 4 bytes: base rev
438 # 4 bytes: base rev
439 # 4 bytes: link rev
439 # 4 bytes: link rev
440 # 20 bytes: parent 1 nodeid
440 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 2 nodeid
441 # 20 bytes: parent 2 nodeid
442 # 20 bytes: nodeid
442 # 20 bytes: nodeid
443 indexformatv0 = struct.Struct(">4l20s20s20s")
443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0_pack = indexformatv0.pack
444 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_unpack = indexformatv0.unpack
445 indexformatv0_unpack = indexformatv0.unpack
446
446
447 class revlogoldio(object):
447 class revlogoldio(object):
448 def __init__(self):
448 def __init__(self):
449 self.size = indexformatv0.size
449 self.size = indexformatv0.size
450
450
451 def parseindex(self, data, inline):
451 def parseindex(self, data, inline):
452 s = self.size
452 s = self.size
453 index = []
453 index = []
454 nodemap = {nullid: nullrev}
454 nodemap = {nullid: nullrev}
455 n = off = 0
455 n = off = 0
456 l = len(data)
456 l = len(data)
457 while off + s <= l:
457 while off + s <= l:
458 cur = data[off:off + s]
458 cur = data[off:off + s]
459 off += s
459 off += s
460 e = indexformatv0_unpack(cur)
460 e = indexformatv0_unpack(cur)
461 # transform to revlogv1 format
461 # transform to revlogv1 format
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 index.append(e2)
464 index.append(e2)
465 nodemap[e[6]] = n
465 nodemap[e[6]] = n
466 n += 1
466 n += 1
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 return index, nodemap, None
471 return index, nodemap, None
472
472
473 def packentry(self, entry, node, version, rev):
473 def packentry(self, entry, node, version, rev):
474 if gettype(entry[0]):
474 if gettype(entry[0]):
475 raise RevlogError(_('index entry flags need revlog version 1'))
475 raise RevlogError(_('index entry flags need revlog version 1'))
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 node(entry[5]), node(entry[6]), entry[7])
477 node(entry[5]), node(entry[6]), entry[7])
478 return indexformatv0_pack(*e2)
478 return indexformatv0_pack(*e2)
479
479
480 # index ng:
480 # index ng:
481 # 6 bytes: offset
481 # 6 bytes: offset
482 # 2 bytes: flags
482 # 2 bytes: flags
483 # 4 bytes: compressed length
483 # 4 bytes: compressed length
484 # 4 bytes: uncompressed length
484 # 4 bytes: uncompressed length
485 # 4 bytes: base rev
485 # 4 bytes: base rev
486 # 4 bytes: link rev
486 # 4 bytes: link rev
487 # 4 bytes: parent 1 rev
487 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 2 rev
488 # 4 bytes: parent 2 rev
489 # 32 bytes: nodeid
489 # 32 bytes: nodeid
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng_pack = indexformatng.pack
491 indexformatng_pack = indexformatng.pack
492 versionformat = struct.Struct(">I")
492 versionformat = struct.Struct(">I")
493 versionformat_pack = versionformat.pack
493 versionformat_pack = versionformat.pack
494 versionformat_unpack = versionformat.unpack
494 versionformat_unpack = versionformat.unpack
495
495
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # signed integer)
497 # signed integer)
498 _maxentrysize = 0x7fffffff
498 _maxentrysize = 0x7fffffff
499
499
500 class revlogio(object):
500 class revlogio(object):
501 def __init__(self):
501 def __init__(self):
502 self.size = indexformatng.size
502 self.size = indexformatng.size
503
503
504 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
505 # call the C implementation to parse the index data
505 # call the C implementation to parse the index data
506 index, cache = parsers.parse_index2(data, inline)
506 index, cache = parsers.parse_index2(data, inline)
507 return index, getattr(index, 'nodemap', None), cache
507 return index, getattr(index, 'nodemap', None), cache
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 p = indexformatng_pack(*entry)
510 p = indexformatng_pack(*entry)
511 if rev == 0:
511 if rev == 0:
512 p = versionformat_pack(version) + p[4:]
512 p = versionformat_pack(version) + p[4:]
513 return p
513 return p
514
514
515 class revlog(object):
515 class revlog(object):
516 """
516 """
517 the underlying revision storage object
517 the underlying revision storage object
518
518
519 A revlog consists of two parts, an index and the revision data.
519 A revlog consists of two parts, an index and the revision data.
520
520
521 The index is a file with a fixed record size containing
521 The index is a file with a fixed record size containing
522 information on each revision, including its nodeid (hash), the
522 information on each revision, including its nodeid (hash), the
523 nodeids of its parents, the position and offset of its data within
523 nodeids of its parents, the position and offset of its data within
524 the data file, and the revision it's based on. Finally, each entry
524 the data file, and the revision it's based on. Finally, each entry
525 contains a linkrev entry that can serve as a pointer to external
525 contains a linkrev entry that can serve as a pointer to external
526 data.
526 data.
527
527
528 The revision data itself is a linear collection of data chunks.
528 The revision data itself is a linear collection of data chunks.
529 Each chunk represents a revision and is usually represented as a
529 Each chunk represents a revision and is usually represented as a
530 delta against the previous chunk. To bound lookup time, runs of
530 delta against the previous chunk. To bound lookup time, runs of
531 deltas are limited to about 2 times the length of the original
531 deltas are limited to about 2 times the length of the original
532 version data. This makes retrieval of a version proportional to
532 version data. This makes retrieval of a version proportional to
533 its size, or O(1) relative to the number of revisions.
533 its size, or O(1) relative to the number of revisions.
534
534
535 Both pieces of the revlog are written to in an append-only
535 Both pieces of the revlog are written to in an append-only
536 fashion, which means we never need to rewrite a file to insert or
536 fashion, which means we never need to rewrite a file to insert or
537 remove data, and can use some simple techniques to avoid the need
537 remove data, and can use some simple techniques to avoid the need
538 for locking while reading.
538 for locking while reading.
539
539
540 If checkambig, indexfile is opened with checkambig=True at
540 If checkambig, indexfile is opened with checkambig=True at
541 writing, to avoid file stat ambiguity.
541 writing, to avoid file stat ambiguity.
542
542
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 index will be mmapped rather than read if it is larger than the
544 index will be mmapped rather than read if it is larger than the
545 configured threshold.
545 configured threshold.
546 """
546 """
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 mmaplargeindex=False):
548 mmaplargeindex=False):
549 """
549 """
550 create a revlog object
550 create a revlog object
551
551
552 opener is a function that abstracts the file opening operation
552 opener is a function that abstracts the file opening operation
553 and can be used to implement COW semantics or the like.
553 and can be used to implement COW semantics or the like.
554 """
554 """
555 self.indexfile = indexfile
555 self.indexfile = indexfile
556 self.datafile = datafile or (indexfile[:-2] + ".d")
556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.opener = opener
557 self.opener = opener
558 # When True, indexfile is opened with checkambig=True at writing, to
558 # When True, indexfile is opened with checkambig=True at writing, to
559 # avoid file stat ambiguity.
559 # avoid file stat ambiguity.
560 self._checkambig = checkambig
560 self._checkambig = checkambig
561 # 3-tuple of (node, rev, text) for a raw revision.
561 # 3-tuple of (node, rev, text) for a raw revision.
562 self._cache = None
562 self._cache = None
563 # Maps rev to chain base rev.
563 # Maps rev to chain base rev.
564 self._chainbasecache = util.lrucachedict(100)
564 self._chainbasecache = util.lrucachedict(100)
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 self._chunkcache = (0, '')
566 self._chunkcache = (0, '')
567 # How much data to read and cache into the raw revlog data cache.
567 # How much data to read and cache into the raw revlog data cache.
568 self._chunkcachesize = 65536
568 self._chunkcachesize = 65536
569 self._maxchainlen = None
569 self._maxchainlen = None
570 self._aggressivemergedeltas = False
570 self._aggressivemergedeltas = False
571 self.index = []
571 self.index = []
572 # Mapping of partial identifiers to full nodes.
572 # Mapping of partial identifiers to full nodes.
573 self._pcache = {}
573 self._pcache = {}
574 # Mapping of revision integer to full node.
574 # Mapping of revision integer to full node.
575 self._nodecache = {nullid: nullrev}
575 self._nodecache = {nullid: nullrev}
576 self._nodepos = None
576 self._nodepos = None
577 self._compengine = 'zlib'
577 self._compengine = 'zlib'
578 self._maxdeltachainspan = -1
578 self._maxdeltachainspan = -1
579 self._withsparseread = False
579 self._withsparseread = False
580 self._srdensitythreshold = 0.25
580 self._srdensitythreshold = 0.25
581 self._srmingapsize = 262144
581 self._srmingapsize = 262144
582
582
583 mmapindexthreshold = None
583 mmapindexthreshold = None
584 v = REVLOG_DEFAULT_VERSION
584 v = REVLOG_DEFAULT_VERSION
585 opts = getattr(opener, 'options', None)
585 opts = getattr(opener, 'options', None)
586 if opts is not None:
586 if opts is not None:
587 if 'revlogv2' in opts:
587 if 'revlogv2' in opts:
588 # version 2 revlogs always use generaldelta.
588 # version 2 revlogs always use generaldelta.
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 elif 'revlogv1' in opts:
590 elif 'revlogv1' in opts:
591 if 'generaldelta' in opts:
591 if 'generaldelta' in opts:
592 v |= FLAG_GENERALDELTA
592 v |= FLAG_GENERALDELTA
593 else:
593 else:
594 v = 0
594 v = 0
595 if 'chunkcachesize' in opts:
595 if 'chunkcachesize' in opts:
596 self._chunkcachesize = opts['chunkcachesize']
596 self._chunkcachesize = opts['chunkcachesize']
597 if 'maxchainlen' in opts:
597 if 'maxchainlen' in opts:
598 self._maxchainlen = opts['maxchainlen']
598 self._maxchainlen = opts['maxchainlen']
599 if 'aggressivemergedeltas' in opts:
599 if 'aggressivemergedeltas' in opts:
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 if 'compengine' in opts:
602 if 'compengine' in opts:
603 self._compengine = opts['compengine']
603 self._compengine = opts['compengine']
604 if 'maxdeltachainspan' in opts:
604 if 'maxdeltachainspan' in opts:
605 self._maxdeltachainspan = opts['maxdeltachainspan']
605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 mmapindexthreshold = opts['mmapindexthreshold']
607 mmapindexthreshold = opts['mmapindexthreshold']
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 if 'sparse-read-density-threshold' in opts:
609 if 'sparse-read-density-threshold' in opts:
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 if 'sparse-read-min-gap-size' in opts:
611 if 'sparse-read-min-gap-size' in opts:
612 self._srmingapsize = opts['sparse-read-min-gap-size']
612 self._srmingapsize = opts['sparse-read-min-gap-size']
613
613
614 if self._chunkcachesize <= 0:
614 if self._chunkcachesize <= 0:
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 'than 0') % self._chunkcachesize)
616 'than 0') % self._chunkcachesize)
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 'of 2') % self._chunkcachesize)
619 'of 2') % self._chunkcachesize)
620
620
621 indexdata = ''
621 indexdata = ''
622 self._initempty = True
622 self._initempty = True
623 try:
623 try:
624 with self._indexfp() as f:
624 with self._indexfp() as f:
625 if (mmapindexthreshold is not None and
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
627 indexdata = util.buffer(util.mmapread(f))
628 else:
628 else:
629 indexdata = f.read()
629 indexdata = f.read()
630 if len(indexdata) > 0:
630 if len(indexdata) > 0:
631 v = versionformat_unpack(indexdata[:4])[0]
631 v = versionformat_unpack(indexdata[:4])[0]
632 self._initempty = False
632 self._initempty = False
633 except IOError as inst:
633 except IOError as inst:
634 if inst.errno != errno.ENOENT:
634 if inst.errno != errno.ENOENT:
635 raise
635 raise
636
636
637 self.version = v
637 self.version = v
638 self._inline = v & FLAG_INLINE_DATA
638 self._inline = v & FLAG_INLINE_DATA
639 self._generaldelta = v & FLAG_GENERALDELTA
639 self._generaldelta = v & FLAG_GENERALDELTA
640 flags = v & ~0xFFFF
640 flags = v & ~0xFFFF
641 fmt = v & 0xFFFF
641 fmt = v & 0xFFFF
642 if fmt == REVLOGV0:
642 if fmt == REVLOGV0:
643 if flags:
643 if flags:
644 raise RevlogError(_('unknown flags (%#04x) in version %d '
644 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 'revlog %s') %
645 'revlog %s') %
646 (flags >> 16, fmt, self.indexfile))
646 (flags >> 16, fmt, self.indexfile))
647 elif fmt == REVLOGV1:
647 elif fmt == REVLOGV1:
648 if flags & ~REVLOGV1_FLAGS:
648 if flags & ~REVLOGV1_FLAGS:
649 raise RevlogError(_('unknown flags (%#04x) in version %d '
649 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 'revlog %s') %
650 'revlog %s') %
651 (flags >> 16, fmt, self.indexfile))
651 (flags >> 16, fmt, self.indexfile))
652 elif fmt == REVLOGV2:
652 elif fmt == REVLOGV2:
653 if flags & ~REVLOGV2_FLAGS:
653 if flags & ~REVLOGV2_FLAGS:
654 raise RevlogError(_('unknown flags (%#04x) in version %d '
654 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 'revlog %s') %
655 'revlog %s') %
656 (flags >> 16, fmt, self.indexfile))
656 (flags >> 16, fmt, self.indexfile))
657 else:
657 else:
658 raise RevlogError(_('unknown version (%d) in revlog %s') %
658 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 (fmt, self.indexfile))
659 (fmt, self.indexfile))
660
660
661 self.storedeltachains = True
661 self.storedeltachains = True
662
662
663 self._io = revlogio()
663 self._io = revlogio()
664 if self.version == REVLOGV0:
664 if self.version == REVLOGV0:
665 self._io = revlogoldio()
665 self._io = revlogoldio()
666 try:
666 try:
667 d = self._io.parseindex(indexdata, self._inline)
667 d = self._io.parseindex(indexdata, self._inline)
668 except (ValueError, IndexError):
668 except (ValueError, IndexError):
669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 self.index, nodemap, self._chunkcache = d
670 self.index, nodemap, self._chunkcache = d
671 if nodemap is not None:
671 if nodemap is not None:
672 self.nodemap = self._nodecache = nodemap
672 self.nodemap = self._nodecache = nodemap
673 if not self._chunkcache:
673 if not self._chunkcache:
674 self._chunkclear()
674 self._chunkclear()
675 # revnum -> (chain-length, sum-delta-length)
675 # revnum -> (chain-length, sum-delta-length)
676 self._chaininfocache = {}
676 self._chaininfocache = {}
677 # revlog header -> revlog compressor
677 # revlog header -> revlog compressor
678 self._decompressors = {}
678 self._decompressors = {}
679
679
680 @util.propertycache
680 @util.propertycache
681 def _compressor(self):
681 def _compressor(self):
682 return util.compengines[self._compengine].revlogcompressor()
682 return util.compengines[self._compengine].revlogcompressor()
683
683
684 def _indexfp(self, mode='r'):
684 def _indexfp(self, mode='r'):
685 """file object for the revlog's index file"""
685 """file object for the revlog's index file"""
686 args = {r'mode': mode}
686 args = {r'mode': mode}
687 if mode != 'r':
687 if mode != 'r':
688 args[r'checkambig'] = self._checkambig
688 args[r'checkambig'] = self._checkambig
689 if mode == 'w':
689 if mode == 'w':
690 args[r'atomictemp'] = True
690 args[r'atomictemp'] = True
691 return self.opener(self.indexfile, **args)
691 return self.opener(self.indexfile, **args)
692
692
693 def _datafp(self, mode='r'):
693 def _datafp(self, mode='r'):
694 """file object for the revlog's data file"""
694 """file object for the revlog's data file"""
695 return self.opener(self.datafile, mode=mode)
695 return self.opener(self.datafile, mode=mode)
696
696
697 @contextlib.contextmanager
697 @contextlib.contextmanager
698 def _datareadfp(self, existingfp=None):
698 def _datareadfp(self, existingfp=None):
699 """file object suitable to read data"""
699 """file object suitable to read data"""
700 if existingfp is not None:
700 if existingfp is not None:
701 yield existingfp
701 yield existingfp
702 else:
702 else:
703 if self._inline:
703 if self._inline:
704 func = self._indexfp
704 func = self._indexfp
705 else:
705 else:
706 func = self._datafp
706 func = self._datafp
707 with func() as fp:
707 with func() as fp:
708 yield fp
708 yield fp
709
709
710 def tip(self):
710 def tip(self):
711 return self.node(len(self.index) - 2)
711 return self.node(len(self.index) - 2)
712 def __contains__(self, rev):
712 def __contains__(self, rev):
713 return 0 <= rev < len(self)
713 return 0 <= rev < len(self)
714 def __len__(self):
714 def __len__(self):
715 return len(self.index) - 1
715 return len(self.index) - 1
716 def __iter__(self):
716 def __iter__(self):
717 return iter(xrange(len(self)))
717 return iter(xrange(len(self)))
718 def revs(self, start=0, stop=None):
718 def revs(self, start=0, stop=None):
719 """iterate over all rev in this revlog (from start to stop)"""
719 """iterate over all rev in this revlog (from start to stop)"""
720 step = 1
720 step = 1
721 if stop is not None:
721 if stop is not None:
722 if start > stop:
722 if start > stop:
723 step = -1
723 step = -1
724 stop += step
724 stop += step
725 else:
725 else:
726 stop = len(self)
726 stop = len(self)
727 return xrange(start, stop, step)
727 return xrange(start, stop, step)
728
728
729 @util.propertycache
729 @util.propertycache
730 def nodemap(self):
730 def nodemap(self):
731 self.rev(self.node(0))
731 self.rev(self.node(0))
732 return self._nodecache
732 return self._nodecache
733
733
734 def hasnode(self, node):
734 def hasnode(self, node):
735 try:
735 try:
736 self.rev(node)
736 self.rev(node)
737 return True
737 return True
738 except KeyError:
738 except KeyError:
739 return False
739 return False
740
740
741 def clearcaches(self):
741 def clearcaches(self):
742 self._cache = None
742 self._cache = None
743 self._chainbasecache.clear()
743 self._chainbasecache.clear()
744 self._chunkcache = (0, '')
744 self._chunkcache = (0, '')
745 self._pcache = {}
745 self._pcache = {}
746
746
747 try:
747 try:
748 self._nodecache.clearcaches()
748 self._nodecache.clearcaches()
749 except AttributeError:
749 except AttributeError:
750 self._nodecache = {nullid: nullrev}
750 self._nodecache = {nullid: nullrev}
751 self._nodepos = None
751 self._nodepos = None
752
752
753 def rev(self, node):
753 def rev(self, node):
754 try:
754 try:
755 return self._nodecache[node]
755 return self._nodecache[node]
756 except TypeError:
756 except TypeError:
757 raise
757 raise
758 except RevlogError:
758 except RevlogError:
759 # parsers.c radix tree lookup failed
759 # parsers.c radix tree lookup failed
760 if node == wdirid:
760 if node == wdirid:
761 raise error.WdirUnsupported
761 raise error.WdirUnsupported
762 raise LookupError(node, self.indexfile, _('no node'))
762 raise LookupError(node, self.indexfile, _('no node'))
763 except KeyError:
763 except KeyError:
764 # pure python cache lookup failed
764 # pure python cache lookup failed
765 n = self._nodecache
765 n = self._nodecache
766 i = self.index
766 i = self.index
767 p = self._nodepos
767 p = self._nodepos
768 if p is None:
768 if p is None:
769 p = len(i) - 2
769 p = len(i) - 2
770 for r in xrange(p, -1, -1):
770 for r in xrange(p, -1, -1):
771 v = i[r][7]
771 v = i[r][7]
772 n[v] = r
772 n[v] = r
773 if v == node:
773 if v == node:
774 self._nodepos = r - 1
774 self._nodepos = r - 1
775 return r
775 return r
776 if node == wdirid:
776 if node == wdirid:
777 raise error.WdirUnsupported
777 raise error.WdirUnsupported
778 raise LookupError(node, self.indexfile, _('no node'))
778 raise LookupError(node, self.indexfile, _('no node'))
779
779
780 # Accessors for index entries.
780 # Accessors for index entries.
781
781
782 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
782 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
783 # are flags.
783 # are flags.
784 def start(self, rev):
784 def start(self, rev):
785 return int(self.index[rev][0] >> 16)
785 return int(self.index[rev][0] >> 16)
786
786
787 def flags(self, rev):
787 def flags(self, rev):
788 return self.index[rev][0] & 0xFFFF
788 return self.index[rev][0] & 0xFFFF
789
789
790 def length(self, rev):
790 def length(self, rev):
791 return self.index[rev][1]
791 return self.index[rev][1]
792
792
793 def rawsize(self, rev):
793 def rawsize(self, rev):
794 """return the length of the uncompressed text for a given revision"""
794 """return the length of the uncompressed text for a given revision"""
795 l = self.index[rev][2]
795 l = self.index[rev][2]
796 if l >= 0:
796 if l >= 0:
797 return l
797 return l
798
798
799 t = self.revision(rev, raw=True)
799 t = self.revision(rev, raw=True)
800 return len(t)
800 return len(t)
801
801
802 def size(self, rev):
802 def size(self, rev):
803 """length of non-raw text (processed by a "read" flag processor)"""
803 """length of non-raw text (processed by a "read" flag processor)"""
804 # fast path: if no "read" flag processor could change the content,
804 # fast path: if no "read" flag processor could change the content,
805 # size is rawsize. note: ELLIPSIS is known to not change the content.
805 # size is rawsize. note: ELLIPSIS is known to not change the content.
806 flags = self.flags(rev)
806 flags = self.flags(rev)
807 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
807 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
808 return self.rawsize(rev)
808 return self.rawsize(rev)
809
809
810 return len(self.revision(rev, raw=False))
810 return len(self.revision(rev, raw=False))
811
811
812 def chainbase(self, rev):
812 def chainbase(self, rev):
813 base = self._chainbasecache.get(rev)
813 base = self._chainbasecache.get(rev)
814 if base is not None:
814 if base is not None:
815 return base
815 return base
816
816
817 index = self.index
817 index = self.index
818 base = index[rev][3]
818 base = index[rev][3]
819 while base != rev:
819 while base != rev:
820 rev = base
820 rev = base
821 base = index[rev][3]
821 base = index[rev][3]
822
822
823 self._chainbasecache[rev] = base
823 self._chainbasecache[rev] = base
824 return base
824 return base
825
825
826 def linkrev(self, rev):
826 def linkrev(self, rev):
827 return self.index[rev][4]
827 return self.index[rev][4]
828
828
829 def parentrevs(self, rev):
829 def parentrevs(self, rev):
830 try:
830 try:
831 entry = self.index[rev]
831 entry = self.index[rev]
832 except IndexError:
832 except IndexError:
833 if rev == wdirrev:
833 if rev == wdirrev:
834 raise error.WdirUnsupported
834 raise error.WdirUnsupported
835 raise
835 raise
836
836
837 return entry[5], entry[6]
837 return entry[5], entry[6]
838
838
839 def node(self, rev):
839 def node(self, rev):
840 try:
840 try:
841 return self.index[rev][7]
841 return self.index[rev][7]
842 except IndexError:
842 except IndexError:
843 if rev == wdirrev:
843 if rev == wdirrev:
844 raise error.WdirUnsupported
844 raise error.WdirUnsupported
845 raise
845 raise
846
846
847 # Derived from index values.
847 # Derived from index values.
848
848
849 def end(self, rev):
849 def end(self, rev):
850 return self.start(rev) + self.length(rev)
850 return self.start(rev) + self.length(rev)
851
851
852 def parents(self, node):
852 def parents(self, node):
853 i = self.index
853 i = self.index
854 d = i[self.rev(node)]
854 d = i[self.rev(node)]
855 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
855 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
856
856
857 def chainlen(self, rev):
857 def chainlen(self, rev):
858 return self._chaininfo(rev)[0]
858 return self._chaininfo(rev)[0]
859
859
860 def _chaininfo(self, rev):
860 def _chaininfo(self, rev):
861 chaininfocache = self._chaininfocache
861 chaininfocache = self._chaininfocache
862 if rev in chaininfocache:
862 if rev in chaininfocache:
863 return chaininfocache[rev]
863 return chaininfocache[rev]
864 index = self.index
864 index = self.index
865 generaldelta = self._generaldelta
865 generaldelta = self._generaldelta
866 iterrev = rev
866 iterrev = rev
867 e = index[iterrev]
867 e = index[iterrev]
868 clen = 0
868 clen = 0
869 compresseddeltalen = 0
869 compresseddeltalen = 0
870 while iterrev != e[3]:
870 while iterrev != e[3]:
871 clen += 1
871 clen += 1
872 compresseddeltalen += e[1]
872 compresseddeltalen += e[1]
873 if generaldelta:
873 if generaldelta:
874 iterrev = e[3]
874 iterrev = e[3]
875 else:
875 else:
876 iterrev -= 1
876 iterrev -= 1
877 if iterrev in chaininfocache:
877 if iterrev in chaininfocache:
878 t = chaininfocache[iterrev]
878 t = chaininfocache[iterrev]
879 clen += t[0]
879 clen += t[0]
880 compresseddeltalen += t[1]
880 compresseddeltalen += t[1]
881 break
881 break
882 e = index[iterrev]
882 e = index[iterrev]
883 else:
883 else:
884 # Add text length of base since decompressing that also takes
884 # Add text length of base since decompressing that also takes
885 # work. For cache hits the length is already included.
885 # work. For cache hits the length is already included.
886 compresseddeltalen += e[1]
886 compresseddeltalen += e[1]
887 r = (clen, compresseddeltalen)
887 r = (clen, compresseddeltalen)
888 chaininfocache[rev] = r
888 chaininfocache[rev] = r
889 return r
889 return r
890
890
891 def _deltachain(self, rev, stoprev=None):
891 def _deltachain(self, rev, stoprev=None):
892 """Obtain the delta chain for a revision.
892 """Obtain the delta chain for a revision.
893
893
894 ``stoprev`` specifies a revision to stop at. If not specified, we
894 ``stoprev`` specifies a revision to stop at. If not specified, we
895 stop at the base of the chain.
895 stop at the base of the chain.
896
896
897 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
897 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
898 revs in ascending order and ``stopped`` is a bool indicating whether
898 revs in ascending order and ``stopped`` is a bool indicating whether
899 ``stoprev`` was hit.
899 ``stoprev`` was hit.
900 """
900 """
901 # Try C implementation.
901 # Try C implementation.
902 try:
902 try:
903 return self.index.deltachain(rev, stoprev, self._generaldelta)
903 return self.index.deltachain(rev, stoprev, self._generaldelta)
904 except AttributeError:
904 except AttributeError:
905 pass
905 pass
906
906
907 chain = []
907 chain = []
908
908
909 # Alias to prevent attribute lookup in tight loop.
909 # Alias to prevent attribute lookup in tight loop.
910 index = self.index
910 index = self.index
911 generaldelta = self._generaldelta
911 generaldelta = self._generaldelta
912
912
913 iterrev = rev
913 iterrev = rev
914 e = index[iterrev]
914 e = index[iterrev]
915 while iterrev != e[3] and iterrev != stoprev:
915 while iterrev != e[3] and iterrev != stoprev:
916 chain.append(iterrev)
916 chain.append(iterrev)
917 if generaldelta:
917 if generaldelta:
918 iterrev = e[3]
918 iterrev = e[3]
919 else:
919 else:
920 iterrev -= 1
920 iterrev -= 1
921 e = index[iterrev]
921 e = index[iterrev]
922
922
923 if iterrev == stoprev:
923 if iterrev == stoprev:
924 stopped = True
924 stopped = True
925 else:
925 else:
926 chain.append(iterrev)
926 chain.append(iterrev)
927 stopped = False
927 stopped = False
928
928
929 chain.reverse()
929 chain.reverse()
930 return chain, stopped
930 return chain, stopped
931
931
932 def ancestors(self, revs, stoprev=0, inclusive=False):
932 def ancestors(self, revs, stoprev=0, inclusive=False):
933 """Generate the ancestors of 'revs' in reverse topological order.
933 """Generate the ancestors of 'revs' in reverse topological order.
934 Does not generate revs lower than stoprev.
934 Does not generate revs lower than stoprev.
935
935
936 See the documentation for ancestor.lazyancestors for more details."""
936 See the documentation for ancestor.lazyancestors for more details."""
937
937
938 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
938 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
939 inclusive=inclusive)
939 inclusive=inclusive)
940
940
941 def descendants(self, revs):
941 def descendants(self, revs):
942 """Generate the descendants of 'revs' in revision order.
942 """Generate the descendants of 'revs' in revision order.
943
943
944 Yield a sequence of revision numbers starting with a child of
944 Yield a sequence of revision numbers starting with a child of
945 some rev in revs, i.e., each revision is *not* considered a
945 some rev in revs, i.e., each revision is *not* considered a
946 descendant of itself. Results are ordered by revision number (a
946 descendant of itself. Results are ordered by revision number (a
947 topological sort)."""
947 topological sort)."""
948 first = min(revs)
948 first = min(revs)
949 if first == nullrev:
949 if first == nullrev:
950 for i in self:
950 for i in self:
951 yield i
951 yield i
952 return
952 return
953
953
954 seen = set(revs)
954 seen = set(revs)
955 for i in self.revs(start=first + 1):
955 for i in self.revs(start=first + 1):
956 for x in self.parentrevs(i):
956 for x in self.parentrevs(i):
957 if x != nullrev and x in seen:
957 if x != nullrev and x in seen:
958 seen.add(i)
958 seen.add(i)
959 yield i
959 yield i
960 break
960 break
961
961
962 def findcommonmissing(self, common=None, heads=None):
962 def findcommonmissing(self, common=None, heads=None):
963 """Return a tuple of the ancestors of common and the ancestors of heads
963 """Return a tuple of the ancestors of common and the ancestors of heads
964 that are not ancestors of common. In revset terminology, we return the
964 that are not ancestors of common. In revset terminology, we return the
965 tuple:
965 tuple:
966
966
967 ::common, (::heads) - (::common)
967 ::common, (::heads) - (::common)
968
968
969 The list is sorted by revision number, meaning it is
969 The list is sorted by revision number, meaning it is
970 topologically sorted.
970 topologically sorted.
971
971
972 'heads' and 'common' are both lists of node IDs. If heads is
972 'heads' and 'common' are both lists of node IDs. If heads is
973 not supplied, uses all of the revlog's heads. If common is not
973 not supplied, uses all of the revlog's heads. If common is not
974 supplied, uses nullid."""
974 supplied, uses nullid."""
975 if common is None:
975 if common is None:
976 common = [nullid]
976 common = [nullid]
977 if heads is None:
977 if heads is None:
978 heads = self.heads()
978 heads = self.heads()
979
979
980 common = [self.rev(n) for n in common]
980 common = [self.rev(n) for n in common]
981 heads = [self.rev(n) for n in heads]
981 heads = [self.rev(n) for n in heads]
982
982
983 # we want the ancestors, but inclusive
983 # we want the ancestors, but inclusive
984 class lazyset(object):
984 class lazyset(object):
985 def __init__(self, lazyvalues):
985 def __init__(self, lazyvalues):
986 self.addedvalues = set()
986 self.addedvalues = set()
987 self.lazyvalues = lazyvalues
987 self.lazyvalues = lazyvalues
988
988
989 def __contains__(self, value):
989 def __contains__(self, value):
990 return value in self.addedvalues or value in self.lazyvalues
990 return value in self.addedvalues or value in self.lazyvalues
991
991
992 def __iter__(self):
992 def __iter__(self):
993 added = self.addedvalues
993 added = self.addedvalues
994 for r in added:
994 for r in added:
995 yield r
995 yield r
996 for r in self.lazyvalues:
996 for r in self.lazyvalues:
997 if not r in added:
997 if not r in added:
998 yield r
998 yield r
999
999
1000 def add(self, value):
1000 def add(self, value):
1001 self.addedvalues.add(value)
1001 self.addedvalues.add(value)
1002
1002
1003 def update(self, values):
1003 def update(self, values):
1004 self.addedvalues.update(values)
1004 self.addedvalues.update(values)
1005
1005
1006 has = lazyset(self.ancestors(common))
1006 has = lazyset(self.ancestors(common))
1007 has.add(nullrev)
1007 has.add(nullrev)
1008 has.update(common)
1008 has.update(common)
1009
1009
1010 # take all ancestors from heads that aren't in has
1010 # take all ancestors from heads that aren't in has
1011 missing = set()
1011 missing = set()
1012 visit = collections.deque(r for r in heads if r not in has)
1012 visit = collections.deque(r for r in heads if r not in has)
1013 while visit:
1013 while visit:
1014 r = visit.popleft()
1014 r = visit.popleft()
1015 if r in missing:
1015 if r in missing:
1016 continue
1016 continue
1017 else:
1017 else:
1018 missing.add(r)
1018 missing.add(r)
1019 for p in self.parentrevs(r):
1019 for p in self.parentrevs(r):
1020 if p not in has:
1020 if p not in has:
1021 visit.append(p)
1021 visit.append(p)
1022 missing = list(missing)
1022 missing = list(missing)
1023 missing.sort()
1023 missing.sort()
1024 return has, [self.node(miss) for miss in missing]
1024 return has, [self.node(miss) for miss in missing]
1025
1025
1026 def incrementalmissingrevs(self, common=None):
1026 def incrementalmissingrevs(self, common=None):
1027 """Return an object that can be used to incrementally compute the
1027 """Return an object that can be used to incrementally compute the
1028 revision numbers of the ancestors of arbitrary sets that are not
1028 revision numbers of the ancestors of arbitrary sets that are not
1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 object.
1030 object.
1031
1031
1032 'common' is a list of revision numbers. If common is not supplied, uses
1032 'common' is a list of revision numbers. If common is not supplied, uses
1033 nullrev.
1033 nullrev.
1034 """
1034 """
1035 if common is None:
1035 if common is None:
1036 common = [nullrev]
1036 common = [nullrev]
1037
1037
1038 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1038 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1039
1039
1040 def findmissingrevs(self, common=None, heads=None):
1040 def findmissingrevs(self, common=None, heads=None):
1041 """Return the revision numbers of the ancestors of heads that
1041 """Return the revision numbers of the ancestors of heads that
1042 are not ancestors of common.
1042 are not ancestors of common.
1043
1043
1044 More specifically, return a list of revision numbers corresponding to
1044 More specifically, return a list of revision numbers corresponding to
1045 nodes N such that every N satisfies the following constraints:
1045 nodes N such that every N satisfies the following constraints:
1046
1046
1047 1. N is an ancestor of some node in 'heads'
1047 1. N is an ancestor of some node in 'heads'
1048 2. N is not an ancestor of any node in 'common'
1048 2. N is not an ancestor of any node in 'common'
1049
1049
1050 The list is sorted by revision number, meaning it is
1050 The list is sorted by revision number, meaning it is
1051 topologically sorted.
1051 topologically sorted.
1052
1052
1053 'heads' and 'common' are both lists of revision numbers. If heads is
1053 'heads' and 'common' are both lists of revision numbers. If heads is
1054 not supplied, uses all of the revlog's heads. If common is not
1054 not supplied, uses all of the revlog's heads. If common is not
1055 supplied, uses nullid."""
1055 supplied, uses nullid."""
1056 if common is None:
1056 if common is None:
1057 common = [nullrev]
1057 common = [nullrev]
1058 if heads is None:
1058 if heads is None:
1059 heads = self.headrevs()
1059 heads = self.headrevs()
1060
1060
1061 inc = self.incrementalmissingrevs(common=common)
1061 inc = self.incrementalmissingrevs(common=common)
1062 return inc.missingancestors(heads)
1062 return inc.missingancestors(heads)
1063
1063
1064 def findmissing(self, common=None, heads=None):
1064 def findmissing(self, common=None, heads=None):
1065 """Return the ancestors of heads that are not ancestors of common.
1065 """Return the ancestors of heads that are not ancestors of common.
1066
1066
1067 More specifically, return a list of nodes N such that every N
1067 More specifically, return a list of nodes N such that every N
1068 satisfies the following constraints:
1068 satisfies the following constraints:
1069
1069
1070 1. N is an ancestor of some node in 'heads'
1070 1. N is an ancestor of some node in 'heads'
1071 2. N is not an ancestor of any node in 'common'
1071 2. N is not an ancestor of any node in 'common'
1072
1072
1073 The list is sorted by revision number, meaning it is
1073 The list is sorted by revision number, meaning it is
1074 topologically sorted.
1074 topologically sorted.
1075
1075
1076 'heads' and 'common' are both lists of node IDs. If heads is
1076 'heads' and 'common' are both lists of node IDs. If heads is
1077 not supplied, uses all of the revlog's heads. If common is not
1077 not supplied, uses all of the revlog's heads. If common is not
1078 supplied, uses nullid."""
1078 supplied, uses nullid."""
1079 if common is None:
1079 if common is None:
1080 common = [nullid]
1080 common = [nullid]
1081 if heads is None:
1081 if heads is None:
1082 heads = self.heads()
1082 heads = self.heads()
1083
1083
1084 common = [self.rev(n) for n in common]
1084 common = [self.rev(n) for n in common]
1085 heads = [self.rev(n) for n in heads]
1085 heads = [self.rev(n) for n in heads]
1086
1086
1087 inc = self.incrementalmissingrevs(common=common)
1087 inc = self.incrementalmissingrevs(common=common)
1088 return [self.node(r) for r in inc.missingancestors(heads)]
1088 return [self.node(r) for r in inc.missingancestors(heads)]
1089
1089
1090 def nodesbetween(self, roots=None, heads=None):
1090 def nodesbetween(self, roots=None, heads=None):
1091 """Return a topological path from 'roots' to 'heads'.
1091 """Return a topological path from 'roots' to 'heads'.
1092
1092
1093 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1093 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1094 topologically sorted list of all nodes N that satisfy both of
1094 topologically sorted list of all nodes N that satisfy both of
1095 these constraints:
1095 these constraints:
1096
1096
1097 1. N is a descendant of some node in 'roots'
1097 1. N is a descendant of some node in 'roots'
1098 2. N is an ancestor of some node in 'heads'
1098 2. N is an ancestor of some node in 'heads'
1099
1099
1100 Every node is considered to be both a descendant and an ancestor
1100 Every node is considered to be both a descendant and an ancestor
1101 of itself, so every reachable node in 'roots' and 'heads' will be
1101 of itself, so every reachable node in 'roots' and 'heads' will be
1102 included in 'nodes'.
1102 included in 'nodes'.
1103
1103
1104 'outroots' is the list of reachable nodes in 'roots', i.e., the
1104 'outroots' is the list of reachable nodes in 'roots', i.e., the
1105 subset of 'roots' that is returned in 'nodes'. Likewise,
1105 subset of 'roots' that is returned in 'nodes'. Likewise,
1106 'outheads' is the subset of 'heads' that is also in 'nodes'.
1106 'outheads' is the subset of 'heads' that is also in 'nodes'.
1107
1107
1108 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1108 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1109 unspecified, uses nullid as the only root. If 'heads' is
1109 unspecified, uses nullid as the only root. If 'heads' is
1110 unspecified, uses list of all of the revlog's heads."""
1110 unspecified, uses list of all of the revlog's heads."""
1111 nonodes = ([], [], [])
1111 nonodes = ([], [], [])
1112 if roots is not None:
1112 if roots is not None:
1113 roots = list(roots)
1113 roots = list(roots)
1114 if not roots:
1114 if not roots:
1115 return nonodes
1115 return nonodes
1116 lowestrev = min([self.rev(n) for n in roots])
1116 lowestrev = min([self.rev(n) for n in roots])
1117 else:
1117 else:
1118 roots = [nullid] # Everybody's a descendant of nullid
1118 roots = [nullid] # Everybody's a descendant of nullid
1119 lowestrev = nullrev
1119 lowestrev = nullrev
1120 if (lowestrev == nullrev) and (heads is None):
1120 if (lowestrev == nullrev) and (heads is None):
1121 # We want _all_ the nodes!
1121 # We want _all_ the nodes!
1122 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1122 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1123 if heads is None:
1123 if heads is None:
1124 # All nodes are ancestors, so the latest ancestor is the last
1124 # All nodes are ancestors, so the latest ancestor is the last
1125 # node.
1125 # node.
1126 highestrev = len(self) - 1
1126 highestrev = len(self) - 1
1127 # Set ancestors to None to signal that every node is an ancestor.
1127 # Set ancestors to None to signal that every node is an ancestor.
1128 ancestors = None
1128 ancestors = None
1129 # Set heads to an empty dictionary for later discovery of heads
1129 # Set heads to an empty dictionary for later discovery of heads
1130 heads = {}
1130 heads = {}
1131 else:
1131 else:
1132 heads = list(heads)
1132 heads = list(heads)
1133 if not heads:
1133 if not heads:
1134 return nonodes
1134 return nonodes
1135 ancestors = set()
1135 ancestors = set()
1136 # Turn heads into a dictionary so we can remove 'fake' heads.
1136 # Turn heads into a dictionary so we can remove 'fake' heads.
1137 # Also, later we will be using it to filter out the heads we can't
1137 # Also, later we will be using it to filter out the heads we can't
1138 # find from roots.
1138 # find from roots.
1139 heads = dict.fromkeys(heads, False)
1139 heads = dict.fromkeys(heads, False)
1140 # Start at the top and keep marking parents until we're done.
1140 # Start at the top and keep marking parents until we're done.
1141 nodestotag = set(heads)
1141 nodestotag = set(heads)
1142 # Remember where the top was so we can use it as a limit later.
1142 # Remember where the top was so we can use it as a limit later.
1143 highestrev = max([self.rev(n) for n in nodestotag])
1143 highestrev = max([self.rev(n) for n in nodestotag])
1144 while nodestotag:
1144 while nodestotag:
1145 # grab a node to tag
1145 # grab a node to tag
1146 n = nodestotag.pop()
1146 n = nodestotag.pop()
1147 # Never tag nullid
1147 # Never tag nullid
1148 if n == nullid:
1148 if n == nullid:
1149 continue
1149 continue
1150 # A node's revision number represents its place in a
1150 # A node's revision number represents its place in a
1151 # topologically sorted list of nodes.
1151 # topologically sorted list of nodes.
1152 r = self.rev(n)
1152 r = self.rev(n)
1153 if r >= lowestrev:
1153 if r >= lowestrev:
1154 if n not in ancestors:
1154 if n not in ancestors:
1155 # If we are possibly a descendant of one of the roots
1155 # If we are possibly a descendant of one of the roots
1156 # and we haven't already been marked as an ancestor
1156 # and we haven't already been marked as an ancestor
1157 ancestors.add(n) # Mark as ancestor
1157 ancestors.add(n) # Mark as ancestor
1158 # Add non-nullid parents to list of nodes to tag.
1158 # Add non-nullid parents to list of nodes to tag.
1159 nodestotag.update([p for p in self.parents(n) if
1159 nodestotag.update([p for p in self.parents(n) if
1160 p != nullid])
1160 p != nullid])
1161 elif n in heads: # We've seen it before, is it a fake head?
1161 elif n in heads: # We've seen it before, is it a fake head?
1162 # So it is, real heads should not be the ancestors of
1162 # So it is, real heads should not be the ancestors of
1163 # any other heads.
1163 # any other heads.
1164 heads.pop(n)
1164 heads.pop(n)
1165 if not ancestors:
1165 if not ancestors:
1166 return nonodes
1166 return nonodes
1167 # Now that we have our set of ancestors, we want to remove any
1167 # Now that we have our set of ancestors, we want to remove any
1168 # roots that are not ancestors.
1168 # roots that are not ancestors.
1169
1169
1170 # If one of the roots was nullid, everything is included anyway.
1170 # If one of the roots was nullid, everything is included anyway.
1171 if lowestrev > nullrev:
1171 if lowestrev > nullrev:
1172 # But, since we weren't, let's recompute the lowest rev to not
1172 # But, since we weren't, let's recompute the lowest rev to not
1173 # include roots that aren't ancestors.
1173 # include roots that aren't ancestors.
1174
1174
1175 # Filter out roots that aren't ancestors of heads
1175 # Filter out roots that aren't ancestors of heads
1176 roots = [root for root in roots if root in ancestors]
1176 roots = [root for root in roots if root in ancestors]
1177 # Recompute the lowest revision
1177 # Recompute the lowest revision
1178 if roots:
1178 if roots:
1179 lowestrev = min([self.rev(root) for root in roots])
1179 lowestrev = min([self.rev(root) for root in roots])
1180 else:
1180 else:
1181 # No more roots? Return empty list
1181 # No more roots? Return empty list
1182 return nonodes
1182 return nonodes
1183 else:
1183 else:
1184 # We are descending from nullid, and don't need to care about
1184 # We are descending from nullid, and don't need to care about
1185 # any other roots.
1185 # any other roots.
1186 lowestrev = nullrev
1186 lowestrev = nullrev
1187 roots = [nullid]
1187 roots = [nullid]
1188 # Transform our roots list into a set.
1188 # Transform our roots list into a set.
1189 descendants = set(roots)
1189 descendants = set(roots)
1190 # Also, keep the original roots so we can filter out roots that aren't
1190 # Also, keep the original roots so we can filter out roots that aren't
1191 # 'real' roots (i.e. are descended from other roots).
1191 # 'real' roots (i.e. are descended from other roots).
1192 roots = descendants.copy()
1192 roots = descendants.copy()
1193 # Our topologically sorted list of output nodes.
1193 # Our topologically sorted list of output nodes.
1194 orderedout = []
1194 orderedout = []
1195 # Don't start at nullid since we don't want nullid in our output list,
1195 # Don't start at nullid since we don't want nullid in our output list,
1196 # and if nullid shows up in descendants, empty parents will look like
1196 # and if nullid shows up in descendants, empty parents will look like
1197 # they're descendants.
1197 # they're descendants.
1198 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1198 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1199 n = self.node(r)
1199 n = self.node(r)
1200 isdescendant = False
1200 isdescendant = False
1201 if lowestrev == nullrev: # Everybody is a descendant of nullid
1201 if lowestrev == nullrev: # Everybody is a descendant of nullid
1202 isdescendant = True
1202 isdescendant = True
1203 elif n in descendants:
1203 elif n in descendants:
1204 # n is already a descendant
1204 # n is already a descendant
1205 isdescendant = True
1205 isdescendant = True
1206 # This check only needs to be done here because all the roots
1206 # This check only needs to be done here because all the roots
1207 # will start being marked is descendants before the loop.
1207 # will start being marked is descendants before the loop.
1208 if n in roots:
1208 if n in roots:
1209 # If n was a root, check if it's a 'real' root.
1209 # If n was a root, check if it's a 'real' root.
1210 p = tuple(self.parents(n))
1210 p = tuple(self.parents(n))
1211 # If any of its parents are descendants, it's not a root.
1211 # If any of its parents are descendants, it's not a root.
1212 if (p[0] in descendants) or (p[1] in descendants):
1212 if (p[0] in descendants) or (p[1] in descendants):
1213 roots.remove(n)
1213 roots.remove(n)
1214 else:
1214 else:
1215 p = tuple(self.parents(n))
1215 p = tuple(self.parents(n))
1216 # A node is a descendant if either of its parents are
1216 # A node is a descendant if either of its parents are
1217 # descendants. (We seeded the dependents list with the roots
1217 # descendants. (We seeded the dependents list with the roots
1218 # up there, remember?)
1218 # up there, remember?)
1219 if (p[0] in descendants) or (p[1] in descendants):
1219 if (p[0] in descendants) or (p[1] in descendants):
1220 descendants.add(n)
1220 descendants.add(n)
1221 isdescendant = True
1221 isdescendant = True
1222 if isdescendant and ((ancestors is None) or (n in ancestors)):
1222 if isdescendant and ((ancestors is None) or (n in ancestors)):
1223 # Only include nodes that are both descendants and ancestors.
1223 # Only include nodes that are both descendants and ancestors.
1224 orderedout.append(n)
1224 orderedout.append(n)
1225 if (ancestors is not None) and (n in heads):
1225 if (ancestors is not None) and (n in heads):
1226 # We're trying to figure out which heads are reachable
1226 # We're trying to figure out which heads are reachable
1227 # from roots.
1227 # from roots.
1228 # Mark this head as having been reached
1228 # Mark this head as having been reached
1229 heads[n] = True
1229 heads[n] = True
1230 elif ancestors is None:
1230 elif ancestors is None:
1231 # Otherwise, we're trying to discover the heads.
1231 # Otherwise, we're trying to discover the heads.
1232 # Assume this is a head because if it isn't, the next step
1232 # Assume this is a head because if it isn't, the next step
1233 # will eventually remove it.
1233 # will eventually remove it.
1234 heads[n] = True
1234 heads[n] = True
1235 # But, obviously its parents aren't.
1235 # But, obviously its parents aren't.
1236 for p in self.parents(n):
1236 for p in self.parents(n):
1237 heads.pop(p, None)
1237 heads.pop(p, None)
1238 heads = [head for head, flag in heads.iteritems() if flag]
1238 heads = [head for head, flag in heads.iteritems() if flag]
1239 roots = list(roots)
1239 roots = list(roots)
1240 assert orderedout
1240 assert orderedout
1241 assert roots
1241 assert roots
1242 assert heads
1242 assert heads
1243 return (orderedout, roots, heads)
1243 return (orderedout, roots, heads)
1244
1244
1245 def headrevs(self):
1245 def headrevs(self):
1246 try:
1246 try:
1247 return self.index.headrevs()
1247 return self.index.headrevs()
1248 except AttributeError:
1248 except AttributeError:
1249 return self._headrevs()
1249 return self._headrevs()
1250
1250
1251 def computephases(self, roots):
1251 def computephases(self, roots):
1252 return self.index.computephasesmapsets(roots)
1252 return self.index.computephasesmapsets(roots)
1253
1253
1254 def _headrevs(self):
1254 def _headrevs(self):
1255 count = len(self)
1255 count = len(self)
1256 if not count:
1256 if not count:
1257 return [nullrev]
1257 return [nullrev]
1258 # we won't iter over filtered rev so nobody is a head at start
1258 # we won't iter over filtered rev so nobody is a head at start
1259 ishead = [0] * (count + 1)
1259 ishead = [0] * (count + 1)
1260 index = self.index
1260 index = self.index
1261 for r in self:
1261 for r in self:
1262 ishead[r] = 1 # I may be an head
1262 ishead[r] = 1 # I may be an head
1263 e = index[r]
1263 e = index[r]
1264 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1264 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 return [r for r, val in enumerate(ishead) if val]
1265 return [r for r, val in enumerate(ishead) if val]
1266
1266
1267 def heads(self, start=None, stop=None):
1267 def heads(self, start=None, stop=None):
1268 """return the list of all nodes that have no children
1268 """return the list of all nodes that have no children
1269
1269
1270 if start is specified, only heads that are descendants of
1270 if start is specified, only heads that are descendants of
1271 start will be returned
1271 start will be returned
1272 if stop is specified, it will consider all the revs from stop
1272 if stop is specified, it will consider all the revs from stop
1273 as if they had no children
1273 as if they had no children
1274 """
1274 """
1275 if start is None and stop is None:
1275 if start is None and stop is None:
1276 if not len(self):
1276 if not len(self):
1277 return [nullid]
1277 return [nullid]
1278 return [self.node(r) for r in self.headrevs()]
1278 return [self.node(r) for r in self.headrevs()]
1279
1279
1280 if start is None:
1280 if start is None:
1281 start = nullid
1281 start = nullid
1282 if stop is None:
1282 if stop is None:
1283 stop = []
1283 stop = []
1284 stoprevs = set([self.rev(n) for n in stop])
1284 stoprevs = set([self.rev(n) for n in stop])
1285 startrev = self.rev(start)
1285 startrev = self.rev(start)
1286 reachable = {startrev}
1286 reachable = {startrev}
1287 heads = {startrev}
1287 heads = {startrev}
1288
1288
1289 parentrevs = self.parentrevs
1289 parentrevs = self.parentrevs
1290 for r in self.revs(start=startrev + 1):
1290 for r in self.revs(start=startrev + 1):
1291 for p in parentrevs(r):
1291 for p in parentrevs(r):
1292 if p in reachable:
1292 if p in reachable:
1293 if r not in stoprevs:
1293 if r not in stoprevs:
1294 reachable.add(r)
1294 reachable.add(r)
1295 heads.add(r)
1295 heads.add(r)
1296 if p in heads and p not in stoprevs:
1296 if p in heads and p not in stoprevs:
1297 heads.remove(p)
1297 heads.remove(p)
1298
1298
1299 return [self.node(r) for r in heads]
1299 return [self.node(r) for r in heads]
1300
1300
1301 def children(self, node):
1301 def children(self, node):
1302 """find the children of a given node"""
1302 """find the children of a given node"""
1303 c = []
1303 c = []
1304 p = self.rev(node)
1304 p = self.rev(node)
1305 for r in self.revs(start=p + 1):
1305 for r in self.revs(start=p + 1):
1306 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1306 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1307 if prevs:
1307 if prevs:
1308 for pr in prevs:
1308 for pr in prevs:
1309 if pr == p:
1309 if pr == p:
1310 c.append(self.node(r))
1310 c.append(self.node(r))
1311 elif p == nullrev:
1311 elif p == nullrev:
1312 c.append(self.node(r))
1312 c.append(self.node(r))
1313 return c
1313 return c
1314
1314
1315 def descendant(self, start, end):
1315 def descendant(self, start, end):
1316 if start == nullrev:
1316 if start == nullrev:
1317 return True
1317 return True
1318 for i in self.descendants([start]):
1318 for i in self.descendants([start]):
1319 if i == end:
1319 if i == end:
1320 return True
1320 return True
1321 elif i > end:
1321 elif i > end:
1322 break
1322 break
1323 return False
1323 return False
1324
1324
1325 def commonancestorsheads(self, a, b):
1325 def commonancestorsheads(self, a, b):
1326 """calculate all the heads of the common ancestors of nodes a and b"""
1326 """calculate all the heads of the common ancestors of nodes a and b"""
1327 a, b = self.rev(a), self.rev(b)
1327 a, b = self.rev(a), self.rev(b)
1328 try:
1328 try:
1329 ancs = self.index.commonancestorsheads(a, b)
1329 ancs = self.index.commonancestorsheads(a, b)
1330 except (AttributeError, OverflowError): # C implementation failed
1330 except (AttributeError, OverflowError): # C implementation failed
1331 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1331 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1332 return pycompat.maplist(self.node, ancs)
1332 return pycompat.maplist(self.node, ancs)
1333
1333
1334 def isancestor(self, a, b):
1334 def isancestor(self, a, b):
1335 """return True if node a is an ancestor of node b
1335 """return True if node a is an ancestor of node b
1336
1336
1337 The implementation of this is trivial but the use of
1337 The implementation of this is trivial but the use of
1338 commonancestorsheads is not."""
1338 commonancestorsheads is not."""
1339 return a in self.commonancestorsheads(a, b)
1339 return a in self.commonancestorsheads(a, b)
1340
1340
1341 def ancestor(self, a, b):
1341 def ancestor(self, a, b):
1342 """calculate the "best" common ancestor of nodes a and b"""
1342 """calculate the "best" common ancestor of nodes a and b"""
1343
1343
1344 a, b = self.rev(a), self.rev(b)
1344 a, b = self.rev(a), self.rev(b)
1345 try:
1345 try:
1346 ancs = self.index.ancestors(a, b)
1346 ancs = self.index.ancestors(a, b)
1347 except (AttributeError, OverflowError):
1347 except (AttributeError, OverflowError):
1348 ancs = ancestor.ancestors(self.parentrevs, a, b)
1348 ancs = ancestor.ancestors(self.parentrevs, a, b)
1349 if ancs:
1349 if ancs:
1350 # choose a consistent winner when there's a tie
1350 # choose a consistent winner when there's a tie
1351 return min(map(self.node, ancs))
1351 return min(map(self.node, ancs))
1352 return nullid
1352 return nullid
1353
1353
1354 def _match(self, id):
1354 def _match(self, id):
1355 if isinstance(id, int):
1355 if isinstance(id, int):
1356 # rev
1356 # rev
1357 return self.node(id)
1357 return self.node(id)
1358 if len(id) == 20:
1358 if len(id) == 20:
1359 # possibly a binary node
1359 # possibly a binary node
1360 # odds of a binary node being all hex in ASCII are 1 in 10**25
1360 # odds of a binary node being all hex in ASCII are 1 in 10**25
1361 try:
1361 try:
1362 node = id
1362 node = id
1363 self.rev(node) # quick search the index
1363 self.rev(node) # quick search the index
1364 return node
1364 return node
1365 except LookupError:
1365 except LookupError:
1366 pass # may be partial hex id
1366 pass # may be partial hex id
1367 try:
1367 try:
1368 # str(rev)
1368 # str(rev)
1369 rev = int(id)
1369 rev = int(id)
1370 if "%d" % rev != id:
1370 if "%d" % rev != id:
1371 raise ValueError
1371 raise ValueError
1372 if rev < 0:
1372 if rev < 0:
1373 rev = len(self) + rev
1373 rev = len(self) + rev
1374 if rev < 0 or rev >= len(self):
1374 if rev < 0 or rev >= len(self):
1375 raise ValueError
1375 raise ValueError
1376 return self.node(rev)
1376 return self.node(rev)
1377 except (ValueError, OverflowError):
1377 except (ValueError, OverflowError):
1378 pass
1378 pass
1379 if len(id) == 40:
1379 if len(id) == 40:
1380 try:
1380 try:
1381 # a full hex nodeid?
1381 # a full hex nodeid?
1382 node = bin(id)
1382 node = bin(id)
1383 self.rev(node)
1383 self.rev(node)
1384 return node
1384 return node
1385 except (TypeError, LookupError):
1385 except (TypeError, LookupError):
1386 pass
1386 pass
1387
1387
1388 def _partialmatch(self, id):
1388 def _partialmatch(self, id):
1389 maybewdir = wdirhex.startswith(id)
1389 maybewdir = wdirhex.startswith(id)
1390 try:
1390 try:
1391 partial = self.index.partialmatch(id)
1391 partial = self.index.partialmatch(id)
1392 if partial and self.hasnode(partial):
1392 if partial and self.hasnode(partial):
1393 if maybewdir:
1393 if maybewdir:
1394 # single 'ff...' match in radix tree, ambiguous with wdir
1394 # single 'ff...' match in radix tree, ambiguous with wdir
1395 raise RevlogError
1395 raise RevlogError
1396 return partial
1396 return partial
1397 if maybewdir:
1397 if maybewdir:
1398 # no 'ff...' match in radix tree, wdir identified
1398 # no 'ff...' match in radix tree, wdir identified
1399 raise error.WdirUnsupported
1399 raise error.WdirUnsupported
1400 return None
1400 return None
1401 except RevlogError:
1401 except RevlogError:
1402 # parsers.c radix tree lookup gave multiple matches
1402 # parsers.c radix tree lookup gave multiple matches
1403 # fast path: for unfiltered changelog, radix tree is accurate
1403 # fast path: for unfiltered changelog, radix tree is accurate
1404 if not getattr(self, 'filteredrevs', None):
1404 if not getattr(self, 'filteredrevs', None):
1405 raise LookupError(id, self.indexfile,
1405 raise LookupError(id, self.indexfile,
1406 _('ambiguous identifier'))
1406 _('ambiguous identifier'))
1407 # fall through to slow path that filters hidden revisions
1407 # fall through to slow path that filters hidden revisions
1408 except (AttributeError, ValueError):
1408 except (AttributeError, ValueError):
1409 # we are pure python, or key was too short to search radix tree
1409 # we are pure python, or key was too short to search radix tree
1410 pass
1410 pass
1411
1411
1412 if id in self._pcache:
1412 if id in self._pcache:
1413 return self._pcache[id]
1413 return self._pcache[id]
1414
1414
1415 if len(id) < 40:
1415 if len(id) < 40:
1416 try:
1416 try:
1417 # hex(node)[:...]
1417 # hex(node)[:...]
1418 l = len(id) // 2 # grab an even number of digits
1418 l = len(id) // 2 # grab an even number of digits
1419 prefix = bin(id[:l * 2])
1419 prefix = bin(id[:l * 2])
1420 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1420 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1421 nl = [n for n in nl if hex(n).startswith(id) and
1421 nl = [n for n in nl if hex(n).startswith(id) and
1422 self.hasnode(n)]
1422 self.hasnode(n)]
1423 if len(nl) > 0:
1423 if len(nl) > 0:
1424 if len(nl) == 1 and not maybewdir:
1424 if len(nl) == 1 and not maybewdir:
1425 self._pcache[id] = nl[0]
1425 self._pcache[id] = nl[0]
1426 return nl[0]
1426 return nl[0]
1427 raise LookupError(id, self.indexfile,
1427 raise LookupError(id, self.indexfile,
1428 _('ambiguous identifier'))
1428 _('ambiguous identifier'))
1429 if maybewdir:
1429 if maybewdir:
1430 raise error.WdirUnsupported
1430 raise error.WdirUnsupported
1431 return None
1431 return None
1432 except TypeError:
1432 except TypeError:
1433 pass
1433 pass
1434
1434
1435 def lookup(self, id):
1435 def lookup(self, id):
1436 """locate a node based on:
1436 """locate a node based on:
1437 - revision number or str(revision number)
1437 - revision number or str(revision number)
1438 - nodeid or subset of hex nodeid
1438 - nodeid or subset of hex nodeid
1439 """
1439 """
1440 n = self._match(id)
1440 n = self._match(id)
1441 if n is not None:
1441 if n is not None:
1442 return n
1442 return n
1443 n = self._partialmatch(id)
1443 n = self._partialmatch(id)
1444 if n:
1444 if n:
1445 return n
1445 return n
1446
1446
1447 raise LookupError(id, self.indexfile, _('no match found'))
1447 raise LookupError(id, self.indexfile, _('no match found'))
1448
1448
1449 def shortest(self, hexnode, minlength=1):
1449 def shortest(self, hexnode, minlength=1):
1450 """Find the shortest unambiguous prefix that matches hexnode."""
1450 """Find the shortest unambiguous prefix that matches hexnode."""
1451 def isvalid(test):
1451 def isvalid(test):
1452 try:
1452 try:
1453 if self._partialmatch(test) is None:
1453 if self._partialmatch(test) is None:
1454 return False
1454 return False
1455
1455
1456 try:
1456 try:
1457 i = int(test)
1457 i = int(test)
1458 # if we are a pure int, then starting with zero will not be
1458 # if we are a pure int, then starting with zero will not be
1459 # confused as a rev; or, obviously, if the int is larger
1459 # confused as a rev; or, obviously, if the int is larger
1460 # than the value of the tip rev
1460 # than the value of the tip rev
1461 if test[0] == '0' or i > len(self):
1461 if test[0] == '0' or i > len(self):
1462 return True
1462 return True
1463 return False
1463 return False
1464 except ValueError:
1464 except ValueError:
1465 return True
1465 return True
1466 except error.RevlogError:
1466 except error.RevlogError:
1467 return False
1467 return False
1468 except error.WdirUnsupported:
1468 except error.WdirUnsupported:
1469 # single 'ff...' match
1469 # single 'ff...' match
1470 return True
1470 return True
1471
1471
1472 shortest = hexnode
1472 shortest = hexnode
1473 startlength = max(6, minlength)
1473 startlength = max(6, minlength)
1474 length = startlength
1474 length = startlength
1475 while True:
1475 while True:
1476 test = hexnode[:length]
1476 test = hexnode[:length]
1477 if isvalid(test):
1477 if isvalid(test):
1478 shortest = test
1478 shortest = test
1479 if length == minlength or length > startlength:
1479 if length == minlength or length > startlength:
1480 return shortest
1480 return shortest
1481 length -= 1
1481 length -= 1
1482 else:
1482 else:
1483 length += 1
1483 length += 1
1484 if len(shortest) <= length:
1484 if len(shortest) <= length:
1485 return shortest
1485 return shortest
1486
1486
1487 def cmp(self, node, text):
1487 def cmp(self, node, text):
1488 """compare text with a given file revision
1488 """compare text with a given file revision
1489
1489
1490 returns True if text is different than what is stored.
1490 returns True if text is different than what is stored.
1491 """
1491 """
1492 p1, p2 = self.parents(node)
1492 p1, p2 = self.parents(node)
1493 return hash(text, p1, p2) != node
1493 return hash(text, p1, p2) != node
1494
1494
1495 def _cachesegment(self, offset, data):
1495 def _cachesegment(self, offset, data):
1496 """Add a segment to the revlog cache.
1496 """Add a segment to the revlog cache.
1497
1497
1498 Accepts an absolute offset and the data that is at that location.
1498 Accepts an absolute offset and the data that is at that location.
1499 """
1499 """
1500 o, d = self._chunkcache
1500 o, d = self._chunkcache
1501 # try to add to existing cache
1501 # try to add to existing cache
1502 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1502 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1503 self._chunkcache = o, d + data
1503 self._chunkcache = o, d + data
1504 else:
1504 else:
1505 self._chunkcache = offset, data
1505 self._chunkcache = offset, data
1506
1506
1507 def _readsegment(self, offset, length, df=None):
1507 def _readsegment(self, offset, length, df=None):
1508 """Load a segment of raw data from the revlog.
1508 """Load a segment of raw data from the revlog.
1509
1509
1510 Accepts an absolute offset, length to read, and an optional existing
1510 Accepts an absolute offset, length to read, and an optional existing
1511 file handle to read from.
1511 file handle to read from.
1512
1512
1513 If an existing file handle is passed, it will be seeked and the
1513 If an existing file handle is passed, it will be seeked and the
1514 original seek position will NOT be restored.
1514 original seek position will NOT be restored.
1515
1515
1516 Returns a str or buffer of raw byte data.
1516 Returns a str or buffer of raw byte data.
1517 """
1517 """
1518 # Cache data both forward and backward around the requested
1518 # Cache data both forward and backward around the requested
1519 # data, in a fixed size window. This helps speed up operations
1519 # data, in a fixed size window. This helps speed up operations
1520 # involving reading the revlog backwards.
1520 # involving reading the revlog backwards.
1521 cachesize = self._chunkcachesize
1521 cachesize = self._chunkcachesize
1522 realoffset = offset & ~(cachesize - 1)
1522 realoffset = offset & ~(cachesize - 1)
1523 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1523 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1524 - realoffset)
1524 - realoffset)
1525 with self._datareadfp(df) as df:
1525 with self._datareadfp(df) as df:
1526 df.seek(realoffset)
1526 df.seek(realoffset)
1527 d = df.read(reallength)
1527 d = df.read(reallength)
1528 self._cachesegment(realoffset, d)
1528 self._cachesegment(realoffset, d)
1529 if offset != realoffset or reallength != length:
1529 if offset != realoffset or reallength != length:
1530 return util.buffer(d, offset - realoffset, length)
1530 return util.buffer(d, offset - realoffset, length)
1531 return d
1531 return d
1532
1532
1533 def _getsegment(self, offset, length, df=None):
1533 def _getsegment(self, offset, length, df=None):
1534 """Obtain a segment of raw data from the revlog.
1534 """Obtain a segment of raw data from the revlog.
1535
1535
1536 Accepts an absolute offset, length of bytes to obtain, and an
1536 Accepts an absolute offset, length of bytes to obtain, and an
1537 optional file handle to the already-opened revlog. If the file
1537 optional file handle to the already-opened revlog. If the file
1538 handle is used, it's original seek position will not be preserved.
1538 handle is used, it's original seek position will not be preserved.
1539
1539
1540 Requests for data may be returned from a cache.
1540 Requests for data may be returned from a cache.
1541
1541
1542 Returns a str or a buffer instance of raw byte data.
1542 Returns a str or a buffer instance of raw byte data.
1543 """
1543 """
1544 o, d = self._chunkcache
1544 o, d = self._chunkcache
1545 l = len(d)
1545 l = len(d)
1546
1546
1547 # is it in the cache?
1547 # is it in the cache?
1548 cachestart = offset - o
1548 cachestart = offset - o
1549 cacheend = cachestart + length
1549 cacheend = cachestart + length
1550 if cachestart >= 0 and cacheend <= l:
1550 if cachestart >= 0 and cacheend <= l:
1551 if cachestart == 0 and cacheend == l:
1551 if cachestart == 0 and cacheend == l:
1552 return d # avoid a copy
1552 return d # avoid a copy
1553 return util.buffer(d, cachestart, cacheend - cachestart)
1553 return util.buffer(d, cachestart, cacheend - cachestart)
1554
1554
1555 return self._readsegment(offset, length, df=df)
1555 return self._readsegment(offset, length, df=df)
1556
1556
1557 def _getsegmentforrevs(self, startrev, endrev, df=None):
1557 def _getsegmentforrevs(self, startrev, endrev, df=None):
1558 """Obtain a segment of raw data corresponding to a range of revisions.
1558 """Obtain a segment of raw data corresponding to a range of revisions.
1559
1559
1560 Accepts the start and end revisions and an optional already-open
1560 Accepts the start and end revisions and an optional already-open
1561 file handle to be used for reading. If the file handle is read, its
1561 file handle to be used for reading. If the file handle is read, its
1562 seek position will not be preserved.
1562 seek position will not be preserved.
1563
1563
1564 Requests for data may be satisfied by a cache.
1564 Requests for data may be satisfied by a cache.
1565
1565
1566 Returns a 2-tuple of (offset, data) for the requested range of
1566 Returns a 2-tuple of (offset, data) for the requested range of
1567 revisions. Offset is the integer offset from the beginning of the
1567 revisions. Offset is the integer offset from the beginning of the
1568 revlog and data is a str or buffer of the raw byte data.
1568 revlog and data is a str or buffer of the raw byte data.
1569
1569
1570 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1570 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1571 to determine where each revision's data begins and ends.
1571 to determine where each revision's data begins and ends.
1572 """
1572 """
1573 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1573 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1574 # (functions are expensive).
1574 # (functions are expensive).
1575 index = self.index
1575 index = self.index
1576 istart = index[startrev]
1576 istart = index[startrev]
1577 start = int(istart[0] >> 16)
1577 start = int(istart[0] >> 16)
1578 if startrev == endrev:
1578 if startrev == endrev:
1579 end = start + istart[1]
1579 end = start + istart[1]
1580 else:
1580 else:
1581 iend = index[endrev]
1581 iend = index[endrev]
1582 end = int(iend[0] >> 16) + iend[1]
1582 end = int(iend[0] >> 16) + iend[1]
1583
1583
1584 if self._inline:
1584 if self._inline:
1585 start += (startrev + 1) * self._io.size
1585 start += (startrev + 1) * self._io.size
1586 end += (endrev + 1) * self._io.size
1586 end += (endrev + 1) * self._io.size
1587 length = end - start
1587 length = end - start
1588
1588
1589 return start, self._getsegment(start, length, df=df)
1589 return start, self._getsegment(start, length, df=df)
1590
1590
1591 def _chunk(self, rev, df=None):
1591 def _chunk(self, rev, df=None):
1592 """Obtain a single decompressed chunk for a revision.
1592 """Obtain a single decompressed chunk for a revision.
1593
1593
1594 Accepts an integer revision and an optional already-open file handle
1594 Accepts an integer revision and an optional already-open file handle
1595 to be used for reading. If used, the seek position of the file will not
1595 to be used for reading. If used, the seek position of the file will not
1596 be preserved.
1596 be preserved.
1597
1597
1598 Returns a str holding uncompressed data for the requested revision.
1598 Returns a str holding uncompressed data for the requested revision.
1599 """
1599 """
1600 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1600 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1601
1601
1602 def _chunks(self, revs, df=None):
1602 def _chunks(self, revs, df=None):
1603 """Obtain decompressed chunks for the specified revisions.
1603 """Obtain decompressed chunks for the specified revisions.
1604
1604
1605 Accepts an iterable of numeric revisions that are assumed to be in
1605 Accepts an iterable of numeric revisions that are assumed to be in
1606 ascending order. Also accepts an optional already-open file handle
1606 ascending order. Also accepts an optional already-open file handle
1607 to be used for reading. If used, the seek position of the file will
1607 to be used for reading. If used, the seek position of the file will
1608 not be preserved.
1608 not be preserved.
1609
1609
1610 This function is similar to calling ``self._chunk()`` multiple times,
1610 This function is similar to calling ``self._chunk()`` multiple times,
1611 but is faster.
1611 but is faster.
1612
1612
1613 Returns a list with decompressed data for each requested revision.
1613 Returns a list with decompressed data for each requested revision.
1614 """
1614 """
1615 if not revs:
1615 if not revs:
1616 return []
1616 return []
1617 start = self.start
1617 start = self.start
1618 length = self.length
1618 length = self.length
1619 inline = self._inline
1619 inline = self._inline
1620 iosize = self._io.size
1620 iosize = self._io.size
1621 buffer = util.buffer
1621 buffer = util.buffer
1622
1622
1623 l = []
1623 l = []
1624 ladd = l.append
1624 ladd = l.append
1625
1625
1626 if not self._withsparseread:
1626 if not self._withsparseread:
1627 slicedchunks = (revs,)
1627 slicedchunks = (revs,)
1628 else:
1628 else:
1629 slicedchunks = _slicechunk(self, revs)
1629 slicedchunks = _slicechunk(self, revs)
1630
1630
1631 for revschunk in slicedchunks:
1631 for revschunk in slicedchunks:
1632 firstrev = revschunk[0]
1632 firstrev = revschunk[0]
1633 # Skip trailing revisions with empty diff
1633 # Skip trailing revisions with empty diff
1634 for lastrev in revschunk[::-1]:
1634 for lastrev in revschunk[::-1]:
1635 if length(lastrev) != 0:
1635 if length(lastrev) != 0:
1636 break
1636 break
1637
1637
1638 try:
1638 try:
1639 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1639 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1640 except OverflowError:
1640 except OverflowError:
1641 # issue4215 - we can't cache a run of chunks greater than
1641 # issue4215 - we can't cache a run of chunks greater than
1642 # 2G on Windows
1642 # 2G on Windows
1643 return [self._chunk(rev, df=df) for rev in revschunk]
1643 return [self._chunk(rev, df=df) for rev in revschunk]
1644
1644
1645 decomp = self.decompress
1645 decomp = self.decompress
1646 for rev in revschunk:
1646 for rev in revschunk:
1647 chunkstart = start(rev)
1647 chunkstart = start(rev)
1648 if inline:
1648 if inline:
1649 chunkstart += (rev + 1) * iosize
1649 chunkstart += (rev + 1) * iosize
1650 chunklength = length(rev)
1650 chunklength = length(rev)
1651 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1651 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1652
1652
1653 return l
1653 return l
1654
1654
1655 def _chunkclear(self):
1655 def _chunkclear(self):
1656 """Clear the raw chunk cache."""
1656 """Clear the raw chunk cache."""
1657 self._chunkcache = (0, '')
1657 self._chunkcache = (0, '')
1658
1658
1659 def deltaparent(self, rev):
1659 def deltaparent(self, rev):
1660 """return deltaparent of the given revision"""
1660 """return deltaparent of the given revision"""
1661 base = self.index[rev][3]
1661 base = self.index[rev][3]
1662 if base == rev:
1662 if base == rev:
1663 return nullrev
1663 return nullrev
1664 elif self._generaldelta:
1664 elif self._generaldelta:
1665 return base
1665 return base
1666 else:
1666 else:
1667 return rev - 1
1667 return rev - 1
1668
1668
1669 def revdiff(self, rev1, rev2):
1669 def revdiff(self, rev1, rev2):
1670 """return or calculate a delta between two revisions
1670 """return or calculate a delta between two revisions
1671
1671
1672 The delta calculated is in binary form and is intended to be written to
1672 The delta calculated is in binary form and is intended to be written to
1673 revlog data directly. So this function needs raw revision data.
1673 revlog data directly. So this function needs raw revision data.
1674 """
1674 """
1675 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1675 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1676 return bytes(self._chunk(rev2))
1676 return bytes(self._chunk(rev2))
1677
1677
1678 return mdiff.textdiff(self.revision(rev1, raw=True),
1678 return mdiff.textdiff(self.revision(rev1, raw=True),
1679 self.revision(rev2, raw=True))
1679 self.revision(rev2, raw=True))
1680
1680
1681 def revision(self, nodeorrev, _df=None, raw=False):
1681 def revision(self, nodeorrev, _df=None, raw=False):
1682 """return an uncompressed revision of a given node or revision
1682 """return an uncompressed revision of a given node or revision
1683 number.
1683 number.
1684
1684
1685 _df - an existing file handle to read from. (internal-only)
1685 _df - an existing file handle to read from. (internal-only)
1686 raw - an optional argument specifying if the revision data is to be
1686 raw - an optional argument specifying if the revision data is to be
1687 treated as raw data when applying flag transforms. 'raw' should be set
1687 treated as raw data when applying flag transforms. 'raw' should be set
1688 to True when generating changegroups or in debug commands.
1688 to True when generating changegroups or in debug commands.
1689 """
1689 """
1690 if isinstance(nodeorrev, int):
1690 if isinstance(nodeorrev, int):
1691 rev = nodeorrev
1691 rev = nodeorrev
1692 node = self.node(rev)
1692 node = self.node(rev)
1693 else:
1693 else:
1694 node = nodeorrev
1694 node = nodeorrev
1695 rev = None
1695 rev = None
1696
1696
1697 cachedrev = None
1697 cachedrev = None
1698 flags = None
1698 flags = None
1699 rawtext = None
1699 rawtext = None
1700 if node == nullid:
1700 if node == nullid:
1701 return ""
1701 return ""
1702 if self._cache:
1702 if self._cache:
1703 if self._cache[0] == node:
1703 if self._cache[0] == node:
1704 # _cache only stores rawtext
1704 # _cache only stores rawtext
1705 if raw:
1705 if raw:
1706 return self._cache[2]
1706 return self._cache[2]
1707 # duplicated, but good for perf
1707 # duplicated, but good for perf
1708 if rev is None:
1708 if rev is None:
1709 rev = self.rev(node)
1709 rev = self.rev(node)
1710 if flags is None:
1710 if flags is None:
1711 flags = self.flags(rev)
1711 flags = self.flags(rev)
1712 # no extra flags set, no flag processor runs, text = rawtext
1712 # no extra flags set, no flag processor runs, text = rawtext
1713 if flags == REVIDX_DEFAULT_FLAGS:
1713 if flags == REVIDX_DEFAULT_FLAGS:
1714 return self._cache[2]
1714 return self._cache[2]
1715 # rawtext is reusable. need to run flag processor
1715 # rawtext is reusable. need to run flag processor
1716 rawtext = self._cache[2]
1716 rawtext = self._cache[2]
1717
1717
1718 cachedrev = self._cache[1]
1718 cachedrev = self._cache[1]
1719
1719
1720 # look up what we need to read
1720 # look up what we need to read
1721 if rawtext is None:
1721 if rawtext is None:
1722 if rev is None:
1722 if rev is None:
1723 rev = self.rev(node)
1723 rev = self.rev(node)
1724
1724
1725 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1725 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1726 if stopped:
1726 if stopped:
1727 rawtext = self._cache[2]
1727 rawtext = self._cache[2]
1728
1728
1729 # drop cache to save memory
1729 # drop cache to save memory
1730 self._cache = None
1730 self._cache = None
1731
1731
1732 bins = self._chunks(chain, df=_df)
1732 bins = self._chunks(chain, df=_df)
1733 if rawtext is None:
1733 if rawtext is None:
1734 rawtext = bytes(bins[0])
1734 rawtext = bytes(bins[0])
1735 bins = bins[1:]
1735 bins = bins[1:]
1736
1736
1737 rawtext = mdiff.patches(rawtext, bins)
1737 rawtext = mdiff.patches(rawtext, bins)
1738 self._cache = (node, rev, rawtext)
1738 self._cache = (node, rev, rawtext)
1739
1739
1740 if flags is None:
1740 if flags is None:
1741 if rev is None:
1741 if rev is None:
1742 rev = self.rev(node)
1742 rev = self.rev(node)
1743 flags = self.flags(rev)
1743 flags = self.flags(rev)
1744
1744
1745 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1745 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1746 if validatehash:
1746 if validatehash:
1747 self.checkhash(text, node, rev=rev)
1747 self.checkhash(text, node, rev=rev)
1748
1748
1749 return text
1749 return text
1750
1750
1751 def hash(self, text, p1, p2):
1751 def hash(self, text, p1, p2):
1752 """Compute a node hash.
1752 """Compute a node hash.
1753
1753
1754 Available as a function so that subclasses can replace the hash
1754 Available as a function so that subclasses can replace the hash
1755 as needed.
1755 as needed.
1756 """
1756 """
1757 return hash(text, p1, p2)
1757 return hash(text, p1, p2)
1758
1758
1759 def _processflags(self, text, flags, operation, raw=False):
1759 def _processflags(self, text, flags, operation, raw=False):
1760 """Inspect revision data flags and applies transforms defined by
1760 """Inspect revision data flags and applies transforms defined by
1761 registered flag processors.
1761 registered flag processors.
1762
1762
1763 ``text`` - the revision data to process
1763 ``text`` - the revision data to process
1764 ``flags`` - the revision flags
1764 ``flags`` - the revision flags
1765 ``operation`` - the operation being performed (read or write)
1765 ``operation`` - the operation being performed (read or write)
1766 ``raw`` - an optional argument describing if the raw transform should be
1766 ``raw`` - an optional argument describing if the raw transform should be
1767 applied.
1767 applied.
1768
1768
1769 This method processes the flags in the order (or reverse order if
1769 This method processes the flags in the order (or reverse order if
1770 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1770 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1771 flag processors registered for present flags. The order of flags defined
1771 flag processors registered for present flags. The order of flags defined
1772 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1772 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1773
1773
1774 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1774 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1775 processed text and ``validatehash`` is a bool indicating whether the
1775 processed text and ``validatehash`` is a bool indicating whether the
1776 returned text should be checked for hash integrity.
1776 returned text should be checked for hash integrity.
1777
1777
1778 Note: If the ``raw`` argument is set, it has precedence over the
1778 Note: If the ``raw`` argument is set, it has precedence over the
1779 operation and will only update the value of ``validatehash``.
1779 operation and will only update the value of ``validatehash``.
1780 """
1780 """
1781 # fast path: no flag processors will run
1781 # fast path: no flag processors will run
1782 if flags == 0:
1782 if flags == 0:
1783 return text, True
1783 return text, True
1784 if not operation in ('read', 'write'):
1784 if not operation in ('read', 'write'):
1785 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1785 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1786 # Check all flags are known.
1786 # Check all flags are known.
1787 if flags & ~REVIDX_KNOWN_FLAGS:
1787 if flags & ~REVIDX_KNOWN_FLAGS:
1788 raise RevlogError(_("incompatible revision flag '%#x'") %
1788 raise RevlogError(_("incompatible revision flag '%#x'") %
1789 (flags & ~REVIDX_KNOWN_FLAGS))
1789 (flags & ~REVIDX_KNOWN_FLAGS))
1790 validatehash = True
1790 validatehash = True
1791 # Depending on the operation (read or write), the order might be
1791 # Depending on the operation (read or write), the order might be
1792 # reversed due to non-commutative transforms.
1792 # reversed due to non-commutative transforms.
1793 orderedflags = REVIDX_FLAGS_ORDER
1793 orderedflags = REVIDX_FLAGS_ORDER
1794 if operation == 'write':
1794 if operation == 'write':
1795 orderedflags = reversed(orderedflags)
1795 orderedflags = reversed(orderedflags)
1796
1796
1797 for flag in orderedflags:
1797 for flag in orderedflags:
1798 # If a flagprocessor has been registered for a known flag, apply the
1798 # If a flagprocessor has been registered for a known flag, apply the
1799 # related operation transform and update result tuple.
1799 # related operation transform and update result tuple.
1800 if flag & flags:
1800 if flag & flags:
1801 vhash = True
1801 vhash = True
1802
1802
1803 if flag not in _flagprocessors:
1803 if flag not in _flagprocessors:
1804 message = _("missing processor for flag '%#x'") % (flag)
1804 message = _("missing processor for flag '%#x'") % (flag)
1805 raise RevlogError(message)
1805 raise RevlogError(message)
1806
1806
1807 processor = _flagprocessors[flag]
1807 processor = _flagprocessors[flag]
1808 if processor is not None:
1808 if processor is not None:
1809 readtransform, writetransform, rawtransform = processor
1809 readtransform, writetransform, rawtransform = processor
1810
1810
1811 if raw:
1811 if raw:
1812 vhash = rawtransform(self, text)
1812 vhash = rawtransform(self, text)
1813 elif operation == 'read':
1813 elif operation == 'read':
1814 text, vhash = readtransform(self, text)
1814 text, vhash = readtransform(self, text)
1815 else: # write operation
1815 else: # write operation
1816 text, vhash = writetransform(self, text)
1816 text, vhash = writetransform(self, text)
1817 validatehash = validatehash and vhash
1817 validatehash = validatehash and vhash
1818
1818
1819 return text, validatehash
1819 return text, validatehash
1820
1820
1821 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1821 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1822 """Check node hash integrity.
1822 """Check node hash integrity.
1823
1823
1824 Available as a function so that subclasses can extend hash mismatch
1824 Available as a function so that subclasses can extend hash mismatch
1825 behaviors as needed.
1825 behaviors as needed.
1826 """
1826 """
1827 if p1 is None and p2 is None:
1827 if p1 is None and p2 is None:
1828 p1, p2 = self.parents(node)
1828 p1, p2 = self.parents(node)
1829 if node != self.hash(text, p1, p2):
1829 if node != self.hash(text, p1, p2):
1830 revornode = rev
1830 revornode = rev
1831 if revornode is None:
1831 if revornode is None:
1832 revornode = templatefilters.short(hex(node))
1832 revornode = templatefilters.short(hex(node))
1833 raise RevlogError(_("integrity check failed on %s:%s")
1833 raise RevlogError(_("integrity check failed on %s:%s")
1834 % (self.indexfile, pycompat.bytestr(revornode)))
1834 % (self.indexfile, pycompat.bytestr(revornode)))
1835
1835
1836 def _enforceinlinesize(self, tr, fp=None):
1836 def _enforceinlinesize(self, tr, fp=None):
1837 """Check if the revlog is too big for inline and convert if so.
1837 """Check if the revlog is too big for inline and convert if so.
1838
1838
1839 This should be called after revisions are added to the revlog. If the
1839 This should be called after revisions are added to the revlog. If the
1840 revlog has grown too large to be an inline revlog, it will convert it
1840 revlog has grown too large to be an inline revlog, it will convert it
1841 to use multiple index and data files.
1841 to use multiple index and data files.
1842 """
1842 """
1843 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1843 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1844 return
1844 return
1845
1845
1846 trinfo = tr.find(self.indexfile)
1846 trinfo = tr.find(self.indexfile)
1847 if trinfo is None:
1847 if trinfo is None:
1848 raise RevlogError(_("%s not found in the transaction")
1848 raise RevlogError(_("%s not found in the transaction")
1849 % self.indexfile)
1849 % self.indexfile)
1850
1850
1851 trindex = trinfo[2]
1851 trindex = trinfo[2]
1852 if trindex is not None:
1852 if trindex is not None:
1853 dataoff = self.start(trindex)
1853 dataoff = self.start(trindex)
1854 else:
1854 else:
1855 # revlog was stripped at start of transaction, use all leftover data
1855 # revlog was stripped at start of transaction, use all leftover data
1856 trindex = len(self) - 1
1856 trindex = len(self) - 1
1857 dataoff = self.end(-2)
1857 dataoff = self.end(-2)
1858
1858
1859 tr.add(self.datafile, dataoff)
1859 tr.add(self.datafile, dataoff)
1860
1860
1861 if fp:
1861 if fp:
1862 fp.flush()
1862 fp.flush()
1863 fp.close()
1863 fp.close()
1864
1864
1865 with self._datafp('w') as df:
1865 with self._datafp('w') as df:
1866 for r in self:
1866 for r in self:
1867 df.write(self._getsegmentforrevs(r, r)[1])
1867 df.write(self._getsegmentforrevs(r, r)[1])
1868
1868
1869 with self._indexfp('w') as fp:
1869 with self._indexfp('w') as fp:
1870 self.version &= ~FLAG_INLINE_DATA
1870 self.version &= ~FLAG_INLINE_DATA
1871 self._inline = False
1871 self._inline = False
1872 io = self._io
1872 io = self._io
1873 for i in self:
1873 for i in self:
1874 e = io.packentry(self.index[i], self.node, self.version, i)
1874 e = io.packentry(self.index[i], self.node, self.version, i)
1875 fp.write(e)
1875 fp.write(e)
1876
1876
1877 # the temp file replace the real index when we exit the context
1877 # the temp file replace the real index when we exit the context
1878 # manager
1878 # manager
1879
1879
1880 tr.replace(self.indexfile, trindex * self._io.size)
1880 tr.replace(self.indexfile, trindex * self._io.size)
1881 self._chunkclear()
1881 self._chunkclear()
1882
1882
1883 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1883 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1884 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1884 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1885 """add a revision to the log
1885 """add a revision to the log
1886
1886
1887 text - the revision data to add
1887 text - the revision data to add
1888 transaction - the transaction object used for rollback
1888 transaction - the transaction object used for rollback
1889 link - the linkrev data to add
1889 link - the linkrev data to add
1890 p1, p2 - the parent nodeids of the revision
1890 p1, p2 - the parent nodeids of the revision
1891 cachedelta - an optional precomputed delta
1891 cachedelta - an optional precomputed delta
1892 node - nodeid of revision; typically node is not specified, and it is
1892 node - nodeid of revision; typically node is not specified, and it is
1893 computed by default as hash(text, p1, p2), however subclasses might
1893 computed by default as hash(text, p1, p2), however subclasses might
1894 use different hashing method (and override checkhash() in such case)
1894 use different hashing method (and override checkhash() in such case)
1895 flags - the known flags to set on the revision
1895 flags - the known flags to set on the revision
1896 deltacomputer - an optional _deltacomputer instance shared between
1896 deltacomputer - an optional _deltacomputer instance shared between
1897 multiple calls
1897 multiple calls
1898 """
1898 """
1899 if link == nullrev:
1899 if link == nullrev:
1900 raise RevlogError(_("attempted to add linkrev -1 to %s")
1900 raise RevlogError(_("attempted to add linkrev -1 to %s")
1901 % self.indexfile)
1901 % self.indexfile)
1902
1902
1903 if flags:
1903 if flags:
1904 node = node or self.hash(text, p1, p2)
1904 node = node or self.hash(text, p1, p2)
1905
1905
1906 rawtext, validatehash = self._processflags(text, flags, 'write')
1906 rawtext, validatehash = self._processflags(text, flags, 'write')
1907
1907
1908 # If the flag processor modifies the revision data, ignore any provided
1908 # If the flag processor modifies the revision data, ignore any provided
1909 # cachedelta.
1909 # cachedelta.
1910 if rawtext != text:
1910 if rawtext != text:
1911 cachedelta = None
1911 cachedelta = None
1912
1912
1913 if len(rawtext) > _maxentrysize:
1913 if len(rawtext) > _maxentrysize:
1914 raise RevlogError(
1914 raise RevlogError(
1915 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1915 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1916 % (self.indexfile, len(rawtext)))
1916 % (self.indexfile, len(rawtext)))
1917
1917
1918 node = node or self.hash(rawtext, p1, p2)
1918 node = node or self.hash(rawtext, p1, p2)
1919 if node in self.nodemap:
1919 if node in self.nodemap:
1920 return node
1920 return node
1921
1921
1922 if validatehash:
1922 if validatehash:
1923 self.checkhash(rawtext, node, p1=p1, p2=p2)
1923 self.checkhash(rawtext, node, p1=p1, p2=p2)
1924
1924
1925 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1925 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1926 flags, cachedelta=cachedelta,
1926 flags, cachedelta=cachedelta,
1927 deltacomputer=deltacomputer)
1927 deltacomputer=deltacomputer)
1928
1928
1929 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1929 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1930 cachedelta=None, deltacomputer=None):
1930 cachedelta=None, deltacomputer=None):
1931 """add a raw revision with known flags, node and parents
1931 """add a raw revision with known flags, node and parents
1932 useful when reusing a revision not stored in this revlog (ex: received
1932 useful when reusing a revision not stored in this revlog (ex: received
1933 over wire, or read from an external bundle).
1933 over wire, or read from an external bundle).
1934 """
1934 """
1935 dfh = None
1935 dfh = None
1936 if not self._inline:
1936 if not self._inline:
1937 dfh = self._datafp("a+")
1937 dfh = self._datafp("a+")
1938 ifh = self._indexfp("a+")
1938 ifh = self._indexfp("a+")
1939 try:
1939 try:
1940 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1940 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1941 flags, cachedelta, ifh, dfh,
1941 flags, cachedelta, ifh, dfh,
1942 deltacomputer=deltacomputer)
1942 deltacomputer=deltacomputer)
1943 finally:
1943 finally:
1944 if dfh:
1944 if dfh:
1945 dfh.close()
1945 dfh.close()
1946 ifh.close()
1946 ifh.close()
1947
1947
1948 def compress(self, data):
1948 def compress(self, data):
1949 """Generate a possibly-compressed representation of data."""
1949 """Generate a possibly-compressed representation of data."""
1950 if not data:
1950 if not data:
1951 return '', data
1951 return '', data
1952
1952
1953 compressed = self._compressor.compress(data)
1953 compressed = self._compressor.compress(data)
1954
1954
1955 if compressed:
1955 if compressed:
1956 # The revlog compressor added the header in the returned data.
1956 # The revlog compressor added the header in the returned data.
1957 return '', compressed
1957 return '', compressed
1958
1958
1959 if data[0:1] == '\0':
1959 if data[0:1] == '\0':
1960 return '', data
1960 return '', data
1961 return 'u', data
1961 return 'u', data
1962
1962
1963 def decompress(self, data):
1963 def decompress(self, data):
1964 """Decompress a revlog chunk.
1964 """Decompress a revlog chunk.
1965
1965
1966 The chunk is expected to begin with a header identifying the
1966 The chunk is expected to begin with a header identifying the
1967 format type so it can be routed to an appropriate decompressor.
1967 format type so it can be routed to an appropriate decompressor.
1968 """
1968 """
1969 if not data:
1969 if not data:
1970 return data
1970 return data
1971
1971
1972 # Revlogs are read much more frequently than they are written and many
1972 # Revlogs are read much more frequently than they are written and many
1973 # chunks only take microseconds to decompress, so performance is
1973 # chunks only take microseconds to decompress, so performance is
1974 # important here.
1974 # important here.
1975 #
1975 #
1976 # We can make a few assumptions about revlogs:
1976 # We can make a few assumptions about revlogs:
1977 #
1977 #
1978 # 1) the majority of chunks will be compressed (as opposed to inline
1978 # 1) the majority of chunks will be compressed (as opposed to inline
1979 # raw data).
1979 # raw data).
1980 # 2) decompressing *any* data will likely by at least 10x slower than
1980 # 2) decompressing *any* data will likely by at least 10x slower than
1981 # returning raw inline data.
1981 # returning raw inline data.
1982 # 3) we want to prioritize common and officially supported compression
1982 # 3) we want to prioritize common and officially supported compression
1983 # engines
1983 # engines
1984 #
1984 #
1985 # It follows that we want to optimize for "decompress compressed data
1985 # It follows that we want to optimize for "decompress compressed data
1986 # when encoded with common and officially supported compression engines"
1986 # when encoded with common and officially supported compression engines"
1987 # case over "raw data" and "data encoded by less common or non-official
1987 # case over "raw data" and "data encoded by less common or non-official
1988 # compression engines." That is why we have the inline lookup first
1988 # compression engines." That is why we have the inline lookup first
1989 # followed by the compengines lookup.
1989 # followed by the compengines lookup.
1990 #
1990 #
1991 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1991 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1992 # compressed chunks. And this matters for changelog and manifest reads.
1992 # compressed chunks. And this matters for changelog and manifest reads.
1993 t = data[0:1]
1993 t = data[0:1]
1994
1994
1995 if t == 'x':
1995 if t == 'x':
1996 try:
1996 try:
1997 return _zlibdecompress(data)
1997 return _zlibdecompress(data)
1998 except zlib.error as e:
1998 except zlib.error as e:
1999 raise RevlogError(_('revlog decompress error: %s') %
1999 raise RevlogError(_('revlog decompress error: %s') %
2000 util.forcebytestr(e))
2000 util.forcebytestr(e))
2001 # '\0' is more common than 'u' so it goes first.
2001 # '\0' is more common than 'u' so it goes first.
2002 elif t == '\0':
2002 elif t == '\0':
2003 return data
2003 return data
2004 elif t == 'u':
2004 elif t == 'u':
2005 return util.buffer(data, 1)
2005 return util.buffer(data, 1)
2006
2006
2007 try:
2007 try:
2008 compressor = self._decompressors[t]
2008 compressor = self._decompressors[t]
2009 except KeyError:
2009 except KeyError:
2010 try:
2010 try:
2011 engine = util.compengines.forrevlogheader(t)
2011 engine = util.compengines.forrevlogheader(t)
2012 compressor = engine.revlogcompressor()
2012 compressor = engine.revlogcompressor()
2013 self._decompressors[t] = compressor
2013 self._decompressors[t] = compressor
2014 except KeyError:
2014 except KeyError:
2015 raise RevlogError(_('unknown compression type %r') % t)
2015 raise RevlogError(_('unknown compression type %r') % t)
2016
2016
2017 return compressor.decompress(data)
2017 return compressor.decompress(data)
2018
2018
2019 def _isgooddeltainfo(self, d, textlen):
2019 def _isgooddeltainfo(self, d, textlen):
2020 """Returns True if the given delta is good. Good means that it is within
2020 """Returns True if the given delta is good. Good means that it is within
2021 the disk span, disk size, and chain length bounds that we know to be
2021 the disk span, disk size, and chain length bounds that we know to be
2022 performant."""
2022 performant."""
2023 if d is None:
2023 if d is None:
2024 return False
2024 return False
2025
2025
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2027 # limits the amount of I/O we need to do.
2027 # limits the amount of I/O we need to do.
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2030
2030
2031 defaultmax = textlen * 4
2031 defaultmax = textlen * 4
2032 maxdist = self._maxdeltachainspan
2032 maxdist = self._maxdeltachainspan
2033 if not maxdist:
2033 if not maxdist:
2034 maxdist = d.distance # ensure the conditional pass
2034 maxdist = d.distance # ensure the conditional pass
2035 maxdist = max(maxdist, defaultmax)
2035 maxdist = max(maxdist, defaultmax)
2036 if (d.distance > maxdist or d.deltalen > textlen or
2036 if (d.distance > maxdist or d.deltalen > textlen or
2037 d.compresseddeltalen > textlen * 2 or
2037 d.compresseddeltalen > textlen * 2 or
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2039 return False
2039 return False
2040
2040
2041 return True
2041 return True
2042
2042
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2044 cachedelta, ifh, dfh, alwayscache=False,
2044 cachedelta, ifh, dfh, alwayscache=False,
2045 deltacomputer=None):
2045 deltacomputer=None):
2046 """internal function to add revisions to the log
2046 """internal function to add revisions to the log
2047
2047
2048 see addrevision for argument descriptions.
2048 see addrevision for argument descriptions.
2049
2049
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2051
2051
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2053 be used.
2053 be used.
2054
2054
2055 invariants:
2055 invariants:
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2057 if both are set, they must correspond to each other.
2057 if both are set, they must correspond to each other.
2058 """
2058 """
2059 if node == nullid:
2059 if node == nullid:
2060 raise RevlogError(_("%s: attempt to add null revision") %
2060 raise RevlogError(_("%s: attempt to add null revision") %
2061 (self.indexfile))
2061 (self.indexfile))
2062 if node == wdirid:
2062 if node == wdirid:
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2064 (self.indexfile))
2064 (self.indexfile))
2065
2065
2066 if self._inline:
2066 if self._inline:
2067 fh = ifh
2067 fh = ifh
2068 else:
2068 else:
2069 fh = dfh
2069 fh = dfh
2070
2070
2071 btext = [rawtext]
2071 btext = [rawtext]
2072
2072
2073 curr = len(self)
2073 curr = len(self)
2074 prev = curr - 1
2074 prev = curr - 1
2075 offset = self.end(prev)
2075 offset = self.end(prev)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2077
2077
2078 # full versions are inserted when the needed deltas
2078 # full versions are inserted when the needed deltas
2079 # become comparable to the uncompressed text
2079 # become comparable to the uncompressed text
2080 if rawtext is None:
2080 if rawtext is None:
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2082 cachedelta[1])
2082 cachedelta[1])
2083 else:
2083 else:
2084 textlen = len(rawtext)
2084 textlen = len(rawtext)
2085
2085
2086 if deltacomputer is None:
2086 if deltacomputer is None:
2087 deltacomputer = _deltacomputer(self)
2087 deltacomputer = _deltacomputer(self)
2088
2088
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2091
2091
2092 if deltainfo is not None:
2092 if deltainfo is not None:
2093 base = deltainfo.base
2093 base = deltainfo.base
2094 chainbase = deltainfo.chainbase
2094 chainbase = deltainfo.chainbase
2095 data = deltainfo.data
2095 data = deltainfo.data
2096 l = deltainfo.deltalen
2096 l = deltainfo.deltalen
2097 else:
2097 else:
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2099 data = self.compress(rawtext)
2099 data = self.compress(rawtext)
2100 l = len(data[1]) + len(data[0])
2100 l = len(data[1]) + len(data[0])
2101 base = chainbase = curr
2101 base = chainbase = curr
2102
2102
2103 e = (offset_type(offset, flags), l, textlen,
2103 e = (offset_type(offset, flags), l, textlen,
2104 base, link, p1r, p2r, node)
2104 base, link, p1r, p2r, node)
2105 self.index.insert(-1, e)
2105 self.index.insert(-1, e)
2106 self.nodemap[node] = curr
2106 self.nodemap[node] = curr
2107
2107
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2110
2110
2111 if alwayscache and rawtext is None:
2111 if alwayscache and rawtext is None:
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2113
2113
2114 if type(rawtext) == bytes: # only accept immutable objects
2114 if type(rawtext) == bytes: # only accept immutable objects
2115 self._cache = (node, curr, rawtext)
2115 self._cache = (node, curr, rawtext)
2116 self._chainbasecache[curr] = chainbase
2116 self._chainbasecache[curr] = chainbase
2117 return node
2117 return node
2118
2118
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2120 # Files opened in a+ mode have inconsistent behavior on various
2120 # Files opened in a+ mode have inconsistent behavior on various
2121 # platforms. Windows requires that a file positioning call be made
2121 # platforms. Windows requires that a file positioning call be made
2122 # when the file handle transitions between reads and writes. See
2122 # when the file handle transitions between reads and writes. See
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2124 # platforms, Python or the platform itself can be buggy. Some versions
2124 # platforms, Python or the platform itself can be buggy. Some versions
2125 # of Solaris have been observed to not append at the end of the file
2125 # of Solaris have been observed to not append at the end of the file
2126 # if the file was seeked to before the end. See issue4943 for more.
2126 # if the file was seeked to before the end. See issue4943 for more.
2127 #
2127 #
2128 # We work around this issue by inserting a seek() before writing.
2128 # We work around this issue by inserting a seek() before writing.
2129 # Note: This is likely not necessary on Python 3.
2129 # Note: This is likely not necessary on Python 3.
2130 ifh.seek(0, os.SEEK_END)
2130 ifh.seek(0, os.SEEK_END)
2131 if dfh:
2131 if dfh:
2132 dfh.seek(0, os.SEEK_END)
2132 dfh.seek(0, os.SEEK_END)
2133
2133
2134 curr = len(self) - 1
2134 curr = len(self) - 1
2135 if not self._inline:
2135 if not self._inline:
2136 transaction.add(self.datafile, offset)
2136 transaction.add(self.datafile, offset)
2137 transaction.add(self.indexfile, curr * len(entry))
2137 transaction.add(self.indexfile, curr * len(entry))
2138 if data[0]:
2138 if data[0]:
2139 dfh.write(data[0])
2139 dfh.write(data[0])
2140 dfh.write(data[1])
2140 dfh.write(data[1])
2141 ifh.write(entry)
2141 ifh.write(entry)
2142 else:
2142 else:
2143 offset += curr * self._io.size
2143 offset += curr * self._io.size
2144 transaction.add(self.indexfile, offset, curr)
2144 transaction.add(self.indexfile, offset, curr)
2145 ifh.write(entry)
2145 ifh.write(entry)
2146 ifh.write(data[0])
2146 ifh.write(data[0])
2147 ifh.write(data[1])
2147 ifh.write(data[1])
2148 self._enforceinlinesize(transaction, ifh)
2148 self._enforceinlinesize(transaction, ifh)
2149
2149
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2151 """
2151 """
2152 add a delta group
2152 add a delta group
2153
2153
2154 given a set of deltas, add them to the revision log. the
2154 given a set of deltas, add them to the revision log. the
2155 first delta is against its parent, which should be in our
2155 first delta is against its parent, which should be in our
2156 log, the rest are against the previous delta.
2156 log, the rest are against the previous delta.
2157
2157
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2159 this revlog and the node that was added.
2159 this revlog and the node that was added.
2160 """
2160 """
2161
2161
2162 nodes = []
2162 nodes = []
2163
2163
2164 r = len(self)
2164 r = len(self)
2165 end = 0
2165 end = 0
2166 if r:
2166 if r:
2167 end = self.end(r - 1)
2167 end = self.end(r - 1)
2168 ifh = self._indexfp("a+")
2168 ifh = self._indexfp("a+")
2169 isize = r * self._io.size
2169 isize = r * self._io.size
2170 if self._inline:
2170 if self._inline:
2171 transaction.add(self.indexfile, end + isize, r)
2171 transaction.add(self.indexfile, end + isize, r)
2172 dfh = None
2172 dfh = None
2173 else:
2173 else:
2174 transaction.add(self.indexfile, isize, r)
2174 transaction.add(self.indexfile, isize, r)
2175 transaction.add(self.datafile, end)
2175 transaction.add(self.datafile, end)
2176 dfh = self._datafp("a+")
2176 dfh = self._datafp("a+")
2177 def flush():
2177 def flush():
2178 if dfh:
2178 if dfh:
2179 dfh.flush()
2179 dfh.flush()
2180 ifh.flush()
2180 ifh.flush()
2181 try:
2181 try:
2182 deltacomputer = _deltacomputer(self)
2182 deltacomputer = _deltacomputer(self)
2183 # loop through our set of deltas
2183 # loop through our set of deltas
2184 for data in deltas:
2184 for data in deltas:
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2186 link = linkmapper(linknode)
2186 link = linkmapper(linknode)
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2188
2188
2189 nodes.append(node)
2189 nodes.append(node)
2190
2190
2191 if node in self.nodemap:
2191 if node in self.nodemap:
2192 # this can happen if two branches make the same change
2192 # this can happen if two branches make the same change
2193 continue
2193 continue
2194
2194
2195 for p in (p1, p2):
2195 for p in (p1, p2):
2196 if p not in self.nodemap:
2196 if p not in self.nodemap:
2197 raise LookupError(p, self.indexfile,
2197 raise LookupError(p, self.indexfile,
2198 _('unknown parent'))
2198 _('unknown parent'))
2199
2199
2200 if deltabase not in self.nodemap:
2200 if deltabase not in self.nodemap:
2201 raise LookupError(deltabase, self.indexfile,
2201 raise LookupError(deltabase, self.indexfile,
2202 _('unknown delta base'))
2202 _('unknown delta base'))
2203
2203
2204 baserev = self.rev(deltabase)
2204 baserev = self.rev(deltabase)
2205
2205
2206 if baserev != nullrev and self.iscensored(baserev):
2206 if baserev != nullrev and self.iscensored(baserev):
2207 # if base is censored, delta must be full replacement in a
2207 # if base is censored, delta must be full replacement in a
2208 # single patch operation
2208 # single patch operation
2209 hlen = struct.calcsize(">lll")
2209 hlen = struct.calcsize(">lll")
2210 oldlen = self.rawsize(baserev)
2210 oldlen = self.rawsize(baserev)
2211 newlen = len(delta) - hlen
2211 newlen = len(delta) - hlen
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2213 raise error.CensoredBaseError(self.indexfile,
2213 raise error.CensoredBaseError(self.indexfile,
2214 self.node(baserev))
2214 self.node(baserev))
2215
2215
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2217 flags |= REVIDX_ISCENSORED
2217 flags |= REVIDX_ISCENSORED
2218
2218
2219 # We assume consumers of addrevisioncb will want to retrieve
2219 # We assume consumers of addrevisioncb will want to retrieve
2220 # the added revision, which will require a call to
2220 # the added revision, which will require a call to
2221 # revision(). revision() will fast path if there is a cache
2221 # revision(). revision() will fast path if there is a cache
2222 # hit. So, we tell _addrevision() to always cache in this case.
2222 # hit. So, we tell _addrevision() to always cache in this case.
2223 # We're only using addgroup() in the context of changegroup
2223 # We're only using addgroup() in the context of changegroup
2224 # generation so the revision data can always be handled as raw
2224 # generation so the revision data can always be handled as raw
2225 # by the flagprocessor.
2225 # by the flagprocessor.
2226 self._addrevision(node, None, transaction, link,
2226 self._addrevision(node, None, transaction, link,
2227 p1, p2, flags, (baserev, delta),
2227 p1, p2, flags, (baserev, delta),
2228 ifh, dfh,
2228 ifh, dfh,
2229 alwayscache=bool(addrevisioncb),
2229 alwayscache=bool(addrevisioncb),
2230 deltacomputer=deltacomputer)
2230 deltacomputer=deltacomputer)
2231
2231
2232 if addrevisioncb:
2232 if addrevisioncb:
2233 addrevisioncb(self, node)
2233 addrevisioncb(self, node)
2234
2234
2235 if not dfh and not self._inline:
2235 if not dfh and not self._inline:
2236 # addrevision switched from inline to conventional
2236 # addrevision switched from inline to conventional
2237 # reopen the index
2237 # reopen the index
2238 ifh.close()
2238 ifh.close()
2239 dfh = self._datafp("a+")
2239 dfh = self._datafp("a+")
2240 ifh = self._indexfp("a+")
2240 ifh = self._indexfp("a+")
2241 finally:
2241 finally:
2242 if dfh:
2242 if dfh:
2243 dfh.close()
2243 dfh.close()
2244 ifh.close()
2244 ifh.close()
2245
2245
2246 return nodes
2246 return nodes
2247
2247
2248 def iscensored(self, rev):
2248 def iscensored(self, rev):
2249 """Check if a file revision is censored."""
2249 """Check if a file revision is censored."""
2250 return False
2250 return False
2251
2251
2252 def _peek_iscensored(self, baserev, delta, flush):
2252 def _peek_iscensored(self, baserev, delta, flush):
2253 """Quickly check if a delta produces a censored revision."""
2253 """Quickly check if a delta produces a censored revision."""
2254 return False
2254 return False
2255
2255
2256 def getstrippoint(self, minlink):
2256 def getstrippoint(self, minlink):
2257 """find the minimum rev that must be stripped to strip the linkrev
2257 """find the minimum rev that must be stripped to strip the linkrev
2258
2258
2259 Returns a tuple containing the minimum rev and a set of all revs that
2259 Returns a tuple containing the minimum rev and a set of all revs that
2260 have linkrevs that will be broken by this strip.
2260 have linkrevs that will be broken by this strip.
2261 """
2261 """
2262 brokenrevs = set()
2262 brokenrevs = set()
2263 strippoint = len(self)
2263 strippoint = len(self)
2264
2264
2265 heads = {}
2265 heads = {}
2266 futurelargelinkrevs = set()
2266 futurelargelinkrevs = set()
2267 for head in self.headrevs():
2267 for head in self.headrevs():
2268 headlinkrev = self.linkrev(head)
2268 headlinkrev = self.linkrev(head)
2269 heads[head] = headlinkrev
2269 heads[head] = headlinkrev
2270 if headlinkrev >= minlink:
2270 if headlinkrev >= minlink:
2271 futurelargelinkrevs.add(headlinkrev)
2271 futurelargelinkrevs.add(headlinkrev)
2272
2272
2273 # This algorithm involves walking down the rev graph, starting at the
2273 # This algorithm involves walking down the rev graph, starting at the
2274 # heads. Since the revs are topologically sorted according to linkrev,
2274 # heads. Since the revs are topologically sorted according to linkrev,
2275 # once all head linkrevs are below the minlink, we know there are
2275 # once all head linkrevs are below the minlink, we know there are
2276 # no more revs that could have a linkrev greater than minlink.
2276 # no more revs that could have a linkrev greater than minlink.
2277 # So we can stop walking.
2277 # So we can stop walking.
2278 while futurelargelinkrevs:
2278 while futurelargelinkrevs:
2279 strippoint -= 1
2279 strippoint -= 1
2280 linkrev = heads.pop(strippoint)
2280 linkrev = heads.pop(strippoint)
2281
2281
2282 if linkrev < minlink:
2282 if linkrev < minlink:
2283 brokenrevs.add(strippoint)
2283 brokenrevs.add(strippoint)
2284 else:
2284 else:
2285 futurelargelinkrevs.remove(linkrev)
2285 futurelargelinkrevs.remove(linkrev)
2286
2286
2287 for p in self.parentrevs(strippoint):
2287 for p in self.parentrevs(strippoint):
2288 if p != nullrev:
2288 if p != nullrev:
2289 plinkrev = self.linkrev(p)
2289 plinkrev = self.linkrev(p)
2290 heads[p] = plinkrev
2290 heads[p] = plinkrev
2291 if plinkrev >= minlink:
2291 if plinkrev >= minlink:
2292 futurelargelinkrevs.add(plinkrev)
2292 futurelargelinkrevs.add(plinkrev)
2293
2293
2294 return strippoint, brokenrevs
2294 return strippoint, brokenrevs
2295
2295
2296 def strip(self, minlink, transaction):
2296 def strip(self, minlink, transaction):
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2298
2298
2299 This function is called when we're stripping revision minlink and
2299 This function is called when we're stripping revision minlink and
2300 its descendants from the repository.
2300 its descendants from the repository.
2301
2301
2302 We have to remove all revisions with linkrev >= minlink, because
2302 We have to remove all revisions with linkrev >= minlink, because
2303 the equivalent changelog revisions will be renumbered after the
2303 the equivalent changelog revisions will be renumbered after the
2304 strip.
2304 strip.
2305
2305
2306 So we truncate the revlog on the first of these revisions, and
2306 So we truncate the revlog on the first of these revisions, and
2307 trust that the caller has saved the revisions that shouldn't be
2307 trust that the caller has saved the revisions that shouldn't be
2308 removed and that it'll re-add them after this truncation.
2308 removed and that it'll re-add them after this truncation.
2309 """
2309 """
2310 if len(self) == 0:
2310 if len(self) == 0:
2311 return
2311 return
2312
2312
2313 rev, _ = self.getstrippoint(minlink)
2313 rev, _ = self.getstrippoint(minlink)
2314 if rev == len(self):
2314 if rev == len(self):
2315 return
2315 return
2316
2316
2317 # first truncate the files on disk
2317 # first truncate the files on disk
2318 end = self.start(rev)
2318 end = self.start(rev)
2319 if not self._inline:
2319 if not self._inline:
2320 transaction.add(self.datafile, end)
2320 transaction.add(self.datafile, end)
2321 end = rev * self._io.size
2321 end = rev * self._io.size
2322 else:
2322 else:
2323 end += rev * self._io.size
2323 end += rev * self._io.size
2324
2324
2325 transaction.add(self.indexfile, end)
2325 transaction.add(self.indexfile, end)
2326
2326
2327 # then reset internal state in memory to forget those revisions
2327 # then reset internal state in memory to forget those revisions
2328 self._cache = None
2328 self._cache = None
2329 self._chaininfocache = {}
2329 self._chaininfocache = {}
2330 self._chunkclear()
2330 self._chunkclear()
2331 for x in xrange(rev, len(self)):
2331 for x in xrange(rev, len(self)):
2332 del self.nodemap[self.node(x)]
2332 del self.nodemap[self.node(x)]
2333
2333
2334 del self.index[rev:-1]
2334 del self.index[rev:-1]
2335
2335
2336 def checksize(self):
2336 def checksize(self):
2337 expected = 0
2337 expected = 0
2338 if len(self):
2338 if len(self):
2339 expected = max(0, self.end(len(self) - 1))
2339 expected = max(0, self.end(len(self) - 1))
2340
2340
2341 try:
2341 try:
2342 with self._datafp() as f:
2342 with self._datafp() as f:
2343 f.seek(0, 2)
2343 f.seek(0, 2)
2344 actual = f.tell()
2344 actual = f.tell()
2345 dd = actual - expected
2345 dd = actual - expected
2346 except IOError as inst:
2346 except IOError as inst:
2347 if inst.errno != errno.ENOENT:
2347 if inst.errno != errno.ENOENT:
2348 raise
2348 raise
2349 dd = 0
2349 dd = 0
2350
2350
2351 try:
2351 try:
2352 f = self.opener(self.indexfile)
2352 f = self.opener(self.indexfile)
2353 f.seek(0, 2)
2353 f.seek(0, 2)
2354 actual = f.tell()
2354 actual = f.tell()
2355 f.close()
2355 f.close()
2356 s = self._io.size
2356 s = self._io.size
2357 i = max(0, actual // s)
2357 i = max(0, actual // s)
2358 di = actual - (i * s)
2358 di = actual - (i * s)
2359 if self._inline:
2359 if self._inline:
2360 databytes = 0
2360 databytes = 0
2361 for r in self:
2361 for r in self:
2362 databytes += max(0, self.length(r))
2362 databytes += max(0, self.length(r))
2363 dd = 0
2363 dd = 0
2364 di = actual - len(self) * s - databytes
2364 di = actual - len(self) * s - databytes
2365 except IOError as inst:
2365 except IOError as inst:
2366 if inst.errno != errno.ENOENT:
2366 if inst.errno != errno.ENOENT:
2367 raise
2367 raise
2368 di = 0
2368 di = 0
2369
2369
2370 return (dd, di)
2370 return (dd, di)
2371
2371
2372 def files(self):
2372 def files(self):
2373 res = [self.indexfile]
2373 res = [self.indexfile]
2374 if not self._inline:
2374 if not self._inline:
2375 res.append(self.datafile)
2375 res.append(self.datafile)
2376 return res
2376 return res
2377
2377
2378 DELTAREUSEALWAYS = 'always'
2378 DELTAREUSEALWAYS = 'always'
2379 DELTAREUSESAMEREVS = 'samerevs'
2379 DELTAREUSESAMEREVS = 'samerevs'
2380 DELTAREUSENEVER = 'never'
2380 DELTAREUSENEVER = 'never'
2381
2381
2382 DELTAREUSEFULLADD = 'fulladd'
2382 DELTAREUSEFULLADD = 'fulladd'
2383
2383
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385
2385
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2388 """Copy this revlog to another, possibly with format changes.
2388 """Copy this revlog to another, possibly with format changes.
2389
2389
2390 The destination revlog will contain the same revisions and nodes.
2390 The destination revlog will contain the same revisions and nodes.
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 differences.
2392 differences.
2393
2393
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2395 are preserved in the destination revlog. The argument can have the
2395 are preserved in the destination revlog. The argument can have the
2396 following values:
2396 following values:
2397
2397
2398 DELTAREUSEALWAYS
2398 DELTAREUSEALWAYS
2399 Deltas will always be reused (if possible), even if the destination
2399 Deltas will always be reused (if possible), even if the destination
2400 revlog would not select the same revisions for the delta. This is the
2400 revlog would not select the same revisions for the delta. This is the
2401 fastest mode of operation.
2401 fastest mode of operation.
2402 DELTAREUSESAMEREVS
2402 DELTAREUSESAMEREVS
2403 Deltas will be reused if the destination revlog would pick the same
2403 Deltas will be reused if the destination revlog would pick the same
2404 revisions for the delta. This mode strikes a balance between speed
2404 revisions for the delta. This mode strikes a balance between speed
2405 and optimization.
2405 and optimization.
2406 DELTAREUSENEVER
2406 DELTAREUSENEVER
2407 Deltas will never be reused. This is the slowest mode of execution.
2407 Deltas will never be reused. This is the slowest mode of execution.
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 algorithm changes).
2409 algorithm changes).
2410
2410
2411 Delta computation can be slow, so the choice of delta reuse policy can
2411 Delta computation can be slow, so the choice of delta reuse policy can
2412 significantly affect run time.
2412 significantly affect run time.
2413
2413
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2416 delta could choose a better revision, it will do so. This means if you
2416 delta could choose a better revision, it will do so. This means if you
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2419 revision.
2419 revision.
2420
2420
2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2422 controls whether to compute deltas against both parents for merges.
2422 controls whether to compute deltas against both parents for merges.
2423 By default, the current default is used.
2423 By default, the current default is used.
2424 """
2424 """
2425 if deltareuse not in self.DELTAREUSEALL:
2425 if deltareuse not in self.DELTAREUSEALL:
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427
2427
2428 if len(destrevlog):
2428 if len(destrevlog):
2429 raise ValueError(_('destination revlog is not empty'))
2429 raise ValueError(_('destination revlog is not empty'))
2430
2430
2431 if getattr(self, 'filteredrevs', None):
2431 if getattr(self, 'filteredrevs', None):
2432 raise ValueError(_('source revlog has filtered revisions'))
2432 raise ValueError(_('source revlog has filtered revisions'))
2433 if getattr(destrevlog, 'filteredrevs', None):
2433 if getattr(destrevlog, 'filteredrevs', None):
2434 raise ValueError(_('destination revlog has filtered revisions'))
2434 raise ValueError(_('destination revlog has filtered revisions'))
2435
2435
2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2437 oldlazydeltabase = destrevlog._lazydeltabase
2437 oldlazydeltabase = destrevlog._lazydeltabase
2438 oldamd = destrevlog._aggressivemergedeltas
2438 oldamd = destrevlog._aggressivemergedeltas
2439
2439
2440 try:
2440 try:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2442 destrevlog._lazydeltabase = True
2442 destrevlog._lazydeltabase = True
2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2444 destrevlog._lazydeltabase = False
2444 destrevlog._lazydeltabase = False
2445
2445
2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2447
2447
2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2449 self.DELTAREUSESAMEREVS)
2449 self.DELTAREUSESAMEREVS)
2450
2450
2451 deltacomputer = _deltacomputer(destrevlog)
2451 deltacomputer = _deltacomputer(destrevlog)
2452 index = self.index
2452 index = self.index
2453 for rev in self:
2453 for rev in self:
2454 entry = index[rev]
2454 entry = index[rev]
2455
2455
2456 # Some classes override linkrev to take filtered revs into
2456 # Some classes override linkrev to take filtered revs into
2457 # account. Use raw entry from index.
2457 # account. Use raw entry from index.
2458 flags = entry[0] & 0xffff
2458 flags = entry[0] & 0xffff
2459 linkrev = entry[4]
2459 linkrev = entry[4]
2460 p1 = index[entry[5]][7]
2460 p1 = index[entry[5]][7]
2461 p2 = index[entry[6]][7]
2461 p2 = index[entry[6]][7]
2462 node = entry[7]
2462 node = entry[7]
2463
2463
2464 # (Possibly) reuse the delta from the revlog if allowed and
2464 # (Possibly) reuse the delta from the revlog if allowed and
2465 # the revlog chunk is a delta.
2465 # the revlog chunk is a delta.
2466 cachedelta = None
2466 cachedelta = None
2467 rawtext = None
2467 rawtext = None
2468 if populatecachedelta:
2468 if populatecachedelta:
2469 dp = self.deltaparent(rev)
2469 dp = self.deltaparent(rev)
2470 if dp != nullrev:
2470 if dp != nullrev:
2471 cachedelta = (dp, str(self._chunk(rev)))
2471 cachedelta = (dp, bytes(self._chunk(rev)))
2472
2472
2473 if not cachedelta:
2473 if not cachedelta:
2474 rawtext = self.revision(rev, raw=True)
2474 rawtext = self.revision(rev, raw=True)
2475
2475
2476
2476
2477 if deltareuse == self.DELTAREUSEFULLADD:
2477 if deltareuse == self.DELTAREUSEFULLADD:
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 cachedelta=cachedelta,
2479 cachedelta=cachedelta,
2480 node=node, flags=flags,
2480 node=node, flags=flags,
2481 deltacomputer=deltacomputer)
2481 deltacomputer=deltacomputer)
2482 else:
2482 else:
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 checkambig=False)
2484 checkambig=False)
2485 dfh = None
2485 dfh = None
2486 if not destrevlog._inline:
2486 if not destrevlog._inline:
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 try:
2488 try:
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 p2, flags, cachedelta, ifh, dfh,
2490 p2, flags, cachedelta, ifh, dfh,
2491 deltacomputer=deltacomputer)
2491 deltacomputer=deltacomputer)
2492 finally:
2492 finally:
2493 if dfh:
2493 if dfh:
2494 dfh.close()
2494 dfh.close()
2495 ifh.close()
2495 ifh.close()
2496
2496
2497 if addrevisioncb:
2497 if addrevisioncb:
2498 addrevisioncb(self, rev, node)
2498 addrevisioncb(self, rev, node)
2499 finally:
2499 finally:
2500 destrevlog._lazydeltabase = oldlazydeltabase
2500 destrevlog._lazydeltabase = oldlazydeltabase
2501 destrevlog._aggressivemergedeltas = oldamd
2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now