##// END OF EJS Templates
revlog: add a _datareadfp context manager for data access needs...
Boris Feld -
r35991:4d66993b default
parent child Browse files
Show More
@@ -1,2497 +1,2501 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import errno
19 import errno
19 import hashlib
20 import hashlib
20 import heapq
21 import heapq
21 import os
22 import os
22 import struct
23 import struct
23 import zlib
24 import zlib
24
25
25 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
26 from .node import (
27 from .node import (
27 bin,
28 bin,
28 hex,
29 hex,
29 nullid,
30 nullid,
30 nullrev,
31 nullrev,
31 wdirhex,
32 wdirhex,
32 wdirid,
33 wdirid,
33 wdirrev,
34 wdirrev,
34 )
35 )
35 from .i18n import _
36 from .i18n import _
36 from .thirdparty import (
37 from .thirdparty import (
37 attr,
38 attr,
38 )
39 )
39 from . import (
40 from . import (
40 ancestor,
41 ancestor,
41 error,
42 error,
42 mdiff,
43 mdiff,
43 policy,
44 policy,
44 pycompat,
45 pycompat,
45 templatefilters,
46 templatefilters,
46 util,
47 util,
47 )
48 )
48
49
49 parsers = policy.importmod(r'parsers')
50 parsers = policy.importmod(r'parsers')
50
51
51 # Aliased for performance.
52 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
53 _zlibdecompress = zlib.decompress
53
54
54 # revlog header flags
55 # revlog header flags
55 REVLOGV0 = 0
56 REVLOGV0 = 0
56 REVLOGV1 = 1
57 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
58 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
60 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
62 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
68
68 # revlog index flags
69 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
73 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
74 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
76 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
77 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
78 REVIDX_EXTSTORED,
78 ]
79 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
81
81 # max size of revlog with inline data
82 # max size of revlog with inline data
82 _maxinline = 131072
83 _maxinline = 131072
83 _chunksize = 1048576
84 _chunksize = 1048576
84
85
85 RevlogError = error.RevlogError
86 RevlogError = error.RevlogError
86 LookupError = error.LookupError
87 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
88 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
89 ProgrammingError = error.ProgrammingError
89
90
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
92 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
93 REVIDX_ISCENSORED: None,
93 }
94 }
94
95
95 def addflagprocessor(flag, processor):
96 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
97 """Register a flag processor on a revision data flag.
97
98
98 Invariant:
99 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
101 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
103 following signatures:
103 - (read) f(self, rawtext) -> text, bool
104 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
106 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
108 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
109 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
110 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
111 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
112 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
113 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
114 different booleans. And "raw" returns a same boolean as "write".
114
115
115 Note: The 'raw' transform is used for changegroup generation and in some
116 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
117 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
118 contents can be used for hash integrity checks.
118 """
119 """
119 if not flag & REVIDX_KNOWN_FLAGS:
120 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
122 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
123 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
125 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
126 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
128 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
129 _flagprocessors[flag] = processor
129
130
130 def getoffset(q):
131 def getoffset(q):
131 return int(q >> 16)
132 return int(q >> 16)
132
133
133 def gettype(q):
134 def gettype(q):
134 return int(q & 0xFFFF)
135 return int(q & 0xFFFF)
135
136
136 def offset_type(offset, type):
137 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
139 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
140 return int(int(offset) << 16 | type)
140
141
141 _nullhash = hashlib.sha1(nullid)
142 _nullhash = hashlib.sha1(nullid)
142
143
143 def hash(text, p1, p2):
144 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
145 """generate a hash from the given text and its parent hashes
145
146
146 This hash combines both the current file contents and its history
147 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
148 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
149 content in the revision graph.
149 """
150 """
150 # As of now, if one of the parent node is null, p2 is null
151 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
152 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
153 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
154 s = _nullhash.copy()
154 s.update(p1)
155 s.update(p1)
155 else:
156 else:
156 # none of the parent nodes are nullid
157 # none of the parent nodes are nullid
157 if p1 < p2:
158 if p1 < p2:
158 a = p1
159 a = p1
159 b = p2
160 b = p2
160 else:
161 else:
161 a = p2
162 a = p2
162 b = p1
163 b = p1
163 s = hashlib.sha1(a)
164 s = hashlib.sha1(a)
164 s.update(b)
165 s.update(b)
165 s.update(text)
166 s.update(text)
166 return s.digest()
167 return s.digest()
167
168
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
170 """returns revs[startidx:endidx] without empty trailing revs
170 """
171 """
171 length = revlog.length
172 length = revlog.length
172
173
173 if endidx is None:
174 if endidx is None:
174 endidx = len(revs)
175 endidx = len(revs)
175
176
176 # Trim empty revs at the end, but never the very first revision of a chain
177 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
179 endidx -= 1
179
180
180 return revs[startidx:endidx]
181 return revs[startidx:endidx]
181
182
182 def _slicechunk(revlog, revs):
183 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184 """slice revs to reduce the amount of unrelated data to be read from disk.
184
185
185 ``revs`` is sliced into groups that should be read in one time.
186 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
187 Assume that revs are sorted.
187 """
188 """
188 start = revlog.start
189 start = revlog.start
189 length = revlog.length
190 length = revlog.length
190
191
191 if len(revs) <= 1:
192 if len(revs) <= 1:
192 yield revs
193 yield revs
193 return
194 return
194
195
195 startbyte = start(revs[0])
196 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
198 readdata = deltachainspan = endbyte - startbyte
198
199
199 chainpayload = sum(length(r) for r in revs)
200 chainpayload = sum(length(r) for r in revs)
200
201
201 if deltachainspan:
202 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
203 density = chainpayload / float(deltachainspan)
203 else:
204 else:
204 density = 1.0
205 density = 1.0
205
206
206 # Store the gaps in a heap to have them sorted by decreasing size
207 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
208 gapsheap = []
208 heapq.heapify(gapsheap)
209 heapq.heapify(gapsheap)
209 prevend = None
210 prevend = None
210 for i, rev in enumerate(revs):
211 for i, rev in enumerate(revs):
211 revstart = start(rev)
212 revstart = start(rev)
212 revlen = length(rev)
213 revlen = length(rev)
213
214
214 # Skip empty revisions to form larger holes
215 # Skip empty revisions to form larger holes
215 if revlen == 0:
216 if revlen == 0:
216 continue
217 continue
217
218
218 if prevend is not None:
219 if prevend is not None:
219 gapsize = revstart - prevend
220 gapsize = revstart - prevend
220 # only consider holes that are large enough
221 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
222 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
223 heapq.heappush(gapsheap, (-gapsize, i))
223
224
224 prevend = revstart + revlen
225 prevend = revstart + revlen
225
226
226 # Collect the indices of the largest holes until the density is acceptable
227 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
228 indicesheap = []
228 heapq.heapify(indicesheap)
229 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
230 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
232
232 heapq.heappush(indicesheap, gapidx)
233 heapq.heappush(indicesheap, gapidx)
233
234
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
236 # by the heap
236 readdata -= (-oppgapsize)
237 readdata -= (-oppgapsize)
237 if readdata > 0:
238 if readdata > 0:
238 density = chainpayload / float(readdata)
239 density = chainpayload / float(readdata)
239 else:
240 else:
240 density = 1.0
241 density = 1.0
241
242
242 # Cut the revs at collected indices
243 # Cut the revs at collected indices
243 previdx = 0
244 previdx = 0
244 while indicesheap:
245 while indicesheap:
245 idx = heapq.heappop(indicesheap)
246 idx = heapq.heappop(indicesheap)
246
247
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
249 if chunk:
249 yield chunk
250 yield chunk
250
251
251 previdx = idx
252 previdx = idx
252
253
253 chunk = _trimchunk(revlog, revs, previdx)
254 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
255 if chunk:
255 yield chunk
256 yield chunk
256
257
257 @attr.s(slots=True, frozen=True)
258 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
259 class _deltainfo(object):
259 distance = attr.ib()
260 distance = attr.ib()
260 deltalen = attr.ib()
261 deltalen = attr.ib()
261 data = attr.ib()
262 data = attr.ib()
262 base = attr.ib()
263 base = attr.ib()
263 chainbase = attr.ib()
264 chainbase = attr.ib()
264 chainlen = attr.ib()
265 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
266 compresseddeltalen = attr.ib()
266
267
267 class _deltacomputer(object):
268 class _deltacomputer(object):
268 def __init__(self, revlog):
269 def __init__(self, revlog):
269 self.revlog = revlog
270 self.revlog = revlog
270
271
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
273 """
273 Provides revisions that present an interest to be diffed against,
274 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
275 grouped by level of easiness.
275 """
276 """
276 revlog = self.revlog
277 revlog = self.revlog
277 curr = len(revlog)
278 curr = len(revlog)
278 prev = curr - 1
279 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
281
281 # should we try to build a delta?
282 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
283 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
284 tested = set()
284 # This condition is true most of the time when processing
285 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
286 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
287 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
290 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
291 # build delta will reuse the cache
291 yield (cachedelta[0],)
292 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
293 tested.add(cachedelta[0])
293
294
294 if revlog._generaldelta:
295 if revlog._generaldelta:
295 # exclude already lazy tested base if any
296 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
297 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
298 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
299 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
300 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
301 # chance of having to build a fulltext).
301 parents = [max(parents)]
302 parents = [max(parents)]
302 tested.update(parents)
303 tested.update(parents)
303 yield parents
304 yield parents
304
305
305 if prev not in tested:
306 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
307 # other approach failed try against prev to hopefully save us a
307 # fulltext.
308 # fulltext.
308 yield (prev,)
309 yield (prev,)
309
310
310 def buildtext(self, revinfo, fh):
311 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
312 """Builds a fulltext version of a revision
312
313
313 revinfo: _revisioninfo instance that contains all needed info
314 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
315 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
316 depending on whether it is inlined or not
316 """
317 """
317 btext = revinfo.btext
318 btext = revinfo.btext
318 if btext[0] is not None:
319 if btext[0] is not None:
319 return btext[0]
320 return btext[0]
320
321
321 revlog = self.revlog
322 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
323 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
324 flags = revinfo.flags
324 node = revinfo.node
325 node = revinfo.node
325
326
326 baserev = cachedelta[0]
327 baserev = cachedelta[0]
327 delta = cachedelta[1]
328 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
329 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
330 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
331 # they're decoded.
331 hlen = struct.calcsize(">lll")
332 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
334 len(delta) - hlen):
334 btext[0] = delta[hlen:]
335 btext[0] = delta[hlen:]
335 else:
336 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
338 btext[0] = mdiff.patch(basetext, delta)
338
339
339 try:
340 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
342 btext[0], validatehash = res
342 if validatehash:
343 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
345 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
346 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
347 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
348 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
349 if not flags & REVIDX_ISCENSORED:
349 raise
350 raise
350 return btext[0]
351 return btext[0]
351
352
352 def _builddeltadiff(self, base, revinfo, fh):
353 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
354 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
355 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
356 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
357 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
358 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
360 delta = header + t
360 else:
361 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
363 delta = mdiff.textdiff(ptext, t)
363
364
364 return delta
365 return delta
365
366
366 def _builddeltainfo(self, revinfo, base, fh):
367 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
368 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
370 delta = revinfo.cachedelta[1]
370 else:
371 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
372 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
373 revlog = self.revlog
373 header, data = revlog.compress(delta)
374 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
375 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
376 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
377 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
378 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
379 if revlog._generaldelta:
379 deltabase = base
380 deltabase = base
380 else:
381 else:
381 deltabase = chainbase
382 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
384 chainlen += 1
384 compresseddeltalen += deltalen
385 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
387 chainbase, chainlen, compresseddeltalen)
387
388
388 def finddeltainfo(self, revinfo, fh):
389 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
390 """Find an acceptable delta against a candidate revision
390
391
391 revinfo: information about the revision (instance of _revisioninfo)
392 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
393 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
394 depending on whether it is inlined or not
394
395
395 Returns the first acceptable candidate revision, as ordered by
396 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
397 _getcandidaterevs
397 """
398 """
398 cachedelta = revinfo.cachedelta
399 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
400 p1 = revinfo.p1
400 p2 = revinfo.p2
401 p2 = revinfo.p2
401 revlog = self.revlog
402 revlog = self.revlog
402
403
403 deltainfo = None
404 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
406 nominateddeltas = []
406 for candidaterev in candidaterevs:
407 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
410 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
411 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
413 break
413
414
414 return deltainfo
415 return deltainfo
415
416
416 @attr.s(slots=True, frozen=True)
417 @attr.s(slots=True, frozen=True)
417 class _revisioninfo(object):
418 class _revisioninfo(object):
418 """Information about a revision that allows building its fulltext
419 """Information about a revision that allows building its fulltext
419 node: expected hash of the revision
420 node: expected hash of the revision
420 p1, p2: parent revs of the revision
421 p1, p2: parent revs of the revision
421 btext: built text cache consisting of a one-element list
422 btext: built text cache consisting of a one-element list
422 cachedelta: (baserev, uncompressed_delta) or None
423 cachedelta: (baserev, uncompressed_delta) or None
423 flags: flags associated to the revision storage
424 flags: flags associated to the revision storage
424
425
425 One of btext[0] or cachedelta must be set.
426 One of btext[0] or cachedelta must be set.
426 """
427 """
427 node = attr.ib()
428 node = attr.ib()
428 p1 = attr.ib()
429 p1 = attr.ib()
429 p2 = attr.ib()
430 p2 = attr.ib()
430 btext = attr.ib()
431 btext = attr.ib()
431 textlen = attr.ib()
432 textlen = attr.ib()
432 cachedelta = attr.ib()
433 cachedelta = attr.ib()
433 flags = attr.ib()
434 flags = attr.ib()
434
435
435 # index v0:
436 # index v0:
436 # 4 bytes: offset
437 # 4 bytes: offset
437 # 4 bytes: compressed length
438 # 4 bytes: compressed length
438 # 4 bytes: base rev
439 # 4 bytes: base rev
439 # 4 bytes: link rev
440 # 4 bytes: link rev
440 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 2 nodeid
442 # 20 bytes: parent 2 nodeid
442 # 20 bytes: nodeid
443 # 20 bytes: nodeid
443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_unpack = indexformatv0.unpack
446 indexformatv0_unpack = indexformatv0.unpack
446
447
447 class revlogoldio(object):
448 class revlogoldio(object):
448 def __init__(self):
449 def __init__(self):
449 self.size = indexformatv0.size
450 self.size = indexformatv0.size
450
451
451 def parseindex(self, data, inline):
452 def parseindex(self, data, inline):
452 s = self.size
453 s = self.size
453 index = []
454 index = []
454 nodemap = {nullid: nullrev}
455 nodemap = {nullid: nullrev}
455 n = off = 0
456 n = off = 0
456 l = len(data)
457 l = len(data)
457 while off + s <= l:
458 while off + s <= l:
458 cur = data[off:off + s]
459 cur = data[off:off + s]
459 off += s
460 off += s
460 e = indexformatv0_unpack(cur)
461 e = indexformatv0_unpack(cur)
461 # transform to revlogv1 format
462 # transform to revlogv1 format
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 index.append(e2)
465 index.append(e2)
465 nodemap[e[6]] = n
466 nodemap[e[6]] = n
466 n += 1
467 n += 1
467
468
468 # add the magic null revision at -1
469 # add the magic null revision at -1
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
471
471 return index, nodemap, None
472 return index, nodemap, None
472
473
473 def packentry(self, entry, node, version, rev):
474 def packentry(self, entry, node, version, rev):
474 if gettype(entry[0]):
475 if gettype(entry[0]):
475 raise RevlogError(_('index entry flags need revlog version 1'))
476 raise RevlogError(_('index entry flags need revlog version 1'))
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 node(entry[5]), node(entry[6]), entry[7])
478 node(entry[5]), node(entry[6]), entry[7])
478 return indexformatv0_pack(*e2)
479 return indexformatv0_pack(*e2)
479
480
480 # index ng:
481 # index ng:
481 # 6 bytes: offset
482 # 6 bytes: offset
482 # 2 bytes: flags
483 # 2 bytes: flags
483 # 4 bytes: compressed length
484 # 4 bytes: compressed length
484 # 4 bytes: uncompressed length
485 # 4 bytes: uncompressed length
485 # 4 bytes: base rev
486 # 4 bytes: base rev
486 # 4 bytes: link rev
487 # 4 bytes: link rev
487 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 2 rev
489 # 4 bytes: parent 2 rev
489 # 32 bytes: nodeid
490 # 32 bytes: nodeid
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng_pack = indexformatng.pack
492 indexformatng_pack = indexformatng.pack
492 versionformat = struct.Struct(">I")
493 versionformat = struct.Struct(">I")
493 versionformat_pack = versionformat.pack
494 versionformat_pack = versionformat.pack
494 versionformat_unpack = versionformat.unpack
495 versionformat_unpack = versionformat.unpack
495
496
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # signed integer)
498 # signed integer)
498 _maxentrysize = 0x7fffffff
499 _maxentrysize = 0x7fffffff
499
500
500 class revlogio(object):
501 class revlogio(object):
501 def __init__(self):
502 def __init__(self):
502 self.size = indexformatng.size
503 self.size = indexformatng.size
503
504
504 def parseindex(self, data, inline):
505 def parseindex(self, data, inline):
505 # call the C implementation to parse the index data
506 # call the C implementation to parse the index data
506 index, cache = parsers.parse_index2(data, inline)
507 index, cache = parsers.parse_index2(data, inline)
507 return index, getattr(index, 'nodemap', None), cache
508 return index, getattr(index, 'nodemap', None), cache
508
509
509 def packentry(self, entry, node, version, rev):
510 def packentry(self, entry, node, version, rev):
510 p = indexformatng_pack(*entry)
511 p = indexformatng_pack(*entry)
511 if rev == 0:
512 if rev == 0:
512 p = versionformat_pack(version) + p[4:]
513 p = versionformat_pack(version) + p[4:]
513 return p
514 return p
514
515
515 class revlog(object):
516 class revlog(object):
516 """
517 """
517 the underlying revision storage object
518 the underlying revision storage object
518
519
519 A revlog consists of two parts, an index and the revision data.
520 A revlog consists of two parts, an index and the revision data.
520
521
521 The index is a file with a fixed record size containing
522 The index is a file with a fixed record size containing
522 information on each revision, including its nodeid (hash), the
523 information on each revision, including its nodeid (hash), the
523 nodeids of its parents, the position and offset of its data within
524 nodeids of its parents, the position and offset of its data within
524 the data file, and the revision it's based on. Finally, each entry
525 the data file, and the revision it's based on. Finally, each entry
525 contains a linkrev entry that can serve as a pointer to external
526 contains a linkrev entry that can serve as a pointer to external
526 data.
527 data.
527
528
528 The revision data itself is a linear collection of data chunks.
529 The revision data itself is a linear collection of data chunks.
529 Each chunk represents a revision and is usually represented as a
530 Each chunk represents a revision and is usually represented as a
530 delta against the previous chunk. To bound lookup time, runs of
531 delta against the previous chunk. To bound lookup time, runs of
531 deltas are limited to about 2 times the length of the original
532 deltas are limited to about 2 times the length of the original
532 version data. This makes retrieval of a version proportional to
533 version data. This makes retrieval of a version proportional to
533 its size, or O(1) relative to the number of revisions.
534 its size, or O(1) relative to the number of revisions.
534
535
535 Both pieces of the revlog are written to in an append-only
536 Both pieces of the revlog are written to in an append-only
536 fashion, which means we never need to rewrite a file to insert or
537 fashion, which means we never need to rewrite a file to insert or
537 remove data, and can use some simple techniques to avoid the need
538 remove data, and can use some simple techniques to avoid the need
538 for locking while reading.
539 for locking while reading.
539
540
540 If checkambig, indexfile is opened with checkambig=True at
541 If checkambig, indexfile is opened with checkambig=True at
541 writing, to avoid file stat ambiguity.
542 writing, to avoid file stat ambiguity.
542
543
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 index will be mmapped rather than read if it is larger than the
545 index will be mmapped rather than read if it is larger than the
545 configured threshold.
546 configured threshold.
546 """
547 """
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 mmaplargeindex=False):
549 mmaplargeindex=False):
549 """
550 """
550 create a revlog object
551 create a revlog object
551
552
552 opener is a function that abstracts the file opening operation
553 opener is a function that abstracts the file opening operation
553 and can be used to implement COW semantics or the like.
554 and can be used to implement COW semantics or the like.
554 """
555 """
555 self.indexfile = indexfile
556 self.indexfile = indexfile
556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.opener = opener
558 self.opener = opener
558 # When True, indexfile is opened with checkambig=True at writing, to
559 # When True, indexfile is opened with checkambig=True at writing, to
559 # avoid file stat ambiguity.
560 # avoid file stat ambiguity.
560 self._checkambig = checkambig
561 self._checkambig = checkambig
561 # 3-tuple of (node, rev, text) for a raw revision.
562 # 3-tuple of (node, rev, text) for a raw revision.
562 self._cache = None
563 self._cache = None
563 # Maps rev to chain base rev.
564 # Maps rev to chain base rev.
564 self._chainbasecache = util.lrucachedict(100)
565 self._chainbasecache = util.lrucachedict(100)
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 self._chunkcache = (0, '')
567 self._chunkcache = (0, '')
567 # How much data to read and cache into the raw revlog data cache.
568 # How much data to read and cache into the raw revlog data cache.
568 self._chunkcachesize = 65536
569 self._chunkcachesize = 65536
569 self._maxchainlen = None
570 self._maxchainlen = None
570 self._aggressivemergedeltas = False
571 self._aggressivemergedeltas = False
571 self.index = []
572 self.index = []
572 # Mapping of partial identifiers to full nodes.
573 # Mapping of partial identifiers to full nodes.
573 self._pcache = {}
574 self._pcache = {}
574 # Mapping of revision integer to full node.
575 # Mapping of revision integer to full node.
575 self._nodecache = {nullid: nullrev}
576 self._nodecache = {nullid: nullrev}
576 self._nodepos = None
577 self._nodepos = None
577 self._compengine = 'zlib'
578 self._compengine = 'zlib'
578 self._maxdeltachainspan = -1
579 self._maxdeltachainspan = -1
579 self._withsparseread = False
580 self._withsparseread = False
580 self._srdensitythreshold = 0.25
581 self._srdensitythreshold = 0.25
581 self._srmingapsize = 262144
582 self._srmingapsize = 262144
582
583
583 mmapindexthreshold = None
584 mmapindexthreshold = None
584 v = REVLOG_DEFAULT_VERSION
585 v = REVLOG_DEFAULT_VERSION
585 opts = getattr(opener, 'options', None)
586 opts = getattr(opener, 'options', None)
586 if opts is not None:
587 if opts is not None:
587 if 'revlogv2' in opts:
588 if 'revlogv2' in opts:
588 # version 2 revlogs always use generaldelta.
589 # version 2 revlogs always use generaldelta.
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 elif 'revlogv1' in opts:
591 elif 'revlogv1' in opts:
591 if 'generaldelta' in opts:
592 if 'generaldelta' in opts:
592 v |= FLAG_GENERALDELTA
593 v |= FLAG_GENERALDELTA
593 else:
594 else:
594 v = 0
595 v = 0
595 if 'chunkcachesize' in opts:
596 if 'chunkcachesize' in opts:
596 self._chunkcachesize = opts['chunkcachesize']
597 self._chunkcachesize = opts['chunkcachesize']
597 if 'maxchainlen' in opts:
598 if 'maxchainlen' in opts:
598 self._maxchainlen = opts['maxchainlen']
599 self._maxchainlen = opts['maxchainlen']
599 if 'aggressivemergedeltas' in opts:
600 if 'aggressivemergedeltas' in opts:
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 if 'compengine' in opts:
603 if 'compengine' in opts:
603 self._compengine = opts['compengine']
604 self._compengine = opts['compengine']
604 if 'maxdeltachainspan' in opts:
605 if 'maxdeltachainspan' in opts:
605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 self._maxdeltachainspan = opts['maxdeltachainspan']
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 mmapindexthreshold = opts['mmapindexthreshold']
608 mmapindexthreshold = opts['mmapindexthreshold']
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 if 'sparse-read-density-threshold' in opts:
610 if 'sparse-read-density-threshold' in opts:
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 if 'sparse-read-min-gap-size' in opts:
612 if 'sparse-read-min-gap-size' in opts:
612 self._srmingapsize = opts['sparse-read-min-gap-size']
613 self._srmingapsize = opts['sparse-read-min-gap-size']
613
614
614 if self._chunkcachesize <= 0:
615 if self._chunkcachesize <= 0:
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 'than 0') % self._chunkcachesize)
617 'than 0') % self._chunkcachesize)
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 'of 2') % self._chunkcachesize)
620 'of 2') % self._chunkcachesize)
620
621
621 indexdata = ''
622 indexdata = ''
622 self._initempty = True
623 self._initempty = True
623 try:
624 try:
624 with self._indexfp() as f:
625 with self._indexfp() as f:
625 if (mmapindexthreshold is not None and
626 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
628 indexdata = util.buffer(util.mmapread(f))
628 else:
629 else:
629 indexdata = f.read()
630 indexdata = f.read()
630 if len(indexdata) > 0:
631 if len(indexdata) > 0:
631 v = versionformat_unpack(indexdata[:4])[0]
632 v = versionformat_unpack(indexdata[:4])[0]
632 self._initempty = False
633 self._initempty = False
633 except IOError as inst:
634 except IOError as inst:
634 if inst.errno != errno.ENOENT:
635 if inst.errno != errno.ENOENT:
635 raise
636 raise
636
637
637 self.version = v
638 self.version = v
638 self._inline = v & FLAG_INLINE_DATA
639 self._inline = v & FLAG_INLINE_DATA
639 self._generaldelta = v & FLAG_GENERALDELTA
640 self._generaldelta = v & FLAG_GENERALDELTA
640 flags = v & ~0xFFFF
641 flags = v & ~0xFFFF
641 fmt = v & 0xFFFF
642 fmt = v & 0xFFFF
642 if fmt == REVLOGV0:
643 if fmt == REVLOGV0:
643 if flags:
644 if flags:
644 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 'revlog %s') %
646 'revlog %s') %
646 (flags >> 16, fmt, self.indexfile))
647 (flags >> 16, fmt, self.indexfile))
647 elif fmt == REVLOGV1:
648 elif fmt == REVLOGV1:
648 if flags & ~REVLOGV1_FLAGS:
649 if flags & ~REVLOGV1_FLAGS:
649 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 'revlog %s') %
651 'revlog %s') %
651 (flags >> 16, fmt, self.indexfile))
652 (flags >> 16, fmt, self.indexfile))
652 elif fmt == REVLOGV2:
653 elif fmt == REVLOGV2:
653 if flags & ~REVLOGV2_FLAGS:
654 if flags & ~REVLOGV2_FLAGS:
654 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 'revlog %s') %
656 'revlog %s') %
656 (flags >> 16, fmt, self.indexfile))
657 (flags >> 16, fmt, self.indexfile))
657 else:
658 else:
658 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 (fmt, self.indexfile))
660 (fmt, self.indexfile))
660
661
661 self.storedeltachains = True
662 self.storedeltachains = True
662
663
663 self._io = revlogio()
664 self._io = revlogio()
664 if self.version == REVLOGV0:
665 if self.version == REVLOGV0:
665 self._io = revlogoldio()
666 self._io = revlogoldio()
666 try:
667 try:
667 d = self._io.parseindex(indexdata, self._inline)
668 d = self._io.parseindex(indexdata, self._inline)
668 except (ValueError, IndexError):
669 except (ValueError, IndexError):
669 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 self.index, nodemap, self._chunkcache = d
671 self.index, nodemap, self._chunkcache = d
671 if nodemap is not None:
672 if nodemap is not None:
672 self.nodemap = self._nodecache = nodemap
673 self.nodemap = self._nodecache = nodemap
673 if not self._chunkcache:
674 if not self._chunkcache:
674 self._chunkclear()
675 self._chunkclear()
675 # revnum -> (chain-length, sum-delta-length)
676 # revnum -> (chain-length, sum-delta-length)
676 self._chaininfocache = {}
677 self._chaininfocache = {}
677 # revlog header -> revlog compressor
678 # revlog header -> revlog compressor
678 self._decompressors = {}
679 self._decompressors = {}
679
680
680 @util.propertycache
681 @util.propertycache
681 def _compressor(self):
682 def _compressor(self):
682 return util.compengines[self._compengine].revlogcompressor()
683 return util.compengines[self._compengine].revlogcompressor()
683
684
684 def _indexfp(self, mode='r'):
685 def _indexfp(self, mode='r'):
685 """file object for the revlog's index file"""
686 """file object for the revlog's index file"""
686 args = {r'mode': mode}
687 args = {r'mode': mode}
687 if mode != 'r':
688 if mode != 'r':
688 args[r'checkambig'] = self._checkambig
689 args[r'checkambig'] = self._checkambig
689 if mode == 'w':
690 if mode == 'w':
690 args[r'atomictemp'] = True
691 args[r'atomictemp'] = True
691 return self.opener(self.indexfile, **args)
692 return self.opener(self.indexfile, **args)
692
693
693 def _datafp(self, mode='r'):
694 def _datafp(self, mode='r'):
694 """file object for the revlog's data file"""
695 """file object for the revlog's data file"""
695 return self.opener(self.datafile, mode=mode)
696 return self.opener(self.datafile, mode=mode)
696
697
698 @contextlib.contextmanager
699 def _datareadfp(self, existingfp=None):
700 """file object suitable to read data"""
701 if existingfp is not None:
702 yield existingfp
703 else:
704 if self._inline:
705 func = self._indexfp
706 else:
707 func = self._datafp
708 with func() as fp:
709 yield fp
710
697 def tip(self):
711 def tip(self):
698 return self.node(len(self.index) - 2)
712 return self.node(len(self.index) - 2)
699 def __contains__(self, rev):
713 def __contains__(self, rev):
700 return 0 <= rev < len(self)
714 return 0 <= rev < len(self)
701 def __len__(self):
715 def __len__(self):
702 return len(self.index) - 1
716 return len(self.index) - 1
703 def __iter__(self):
717 def __iter__(self):
704 return iter(xrange(len(self)))
718 return iter(xrange(len(self)))
705 def revs(self, start=0, stop=None):
719 def revs(self, start=0, stop=None):
706 """iterate over all rev in this revlog (from start to stop)"""
720 """iterate over all rev in this revlog (from start to stop)"""
707 step = 1
721 step = 1
708 if stop is not None:
722 if stop is not None:
709 if start > stop:
723 if start > stop:
710 step = -1
724 step = -1
711 stop += step
725 stop += step
712 else:
726 else:
713 stop = len(self)
727 stop = len(self)
714 return xrange(start, stop, step)
728 return xrange(start, stop, step)
715
729
716 @util.propertycache
730 @util.propertycache
717 def nodemap(self):
731 def nodemap(self):
718 self.rev(self.node(0))
732 self.rev(self.node(0))
719 return self._nodecache
733 return self._nodecache
720
734
721 def hasnode(self, node):
735 def hasnode(self, node):
722 try:
736 try:
723 self.rev(node)
737 self.rev(node)
724 return True
738 return True
725 except KeyError:
739 except KeyError:
726 return False
740 return False
727
741
728 def clearcaches(self):
742 def clearcaches(self):
729 self._cache = None
743 self._cache = None
730 self._chainbasecache.clear()
744 self._chainbasecache.clear()
731 self._chunkcache = (0, '')
745 self._chunkcache = (0, '')
732 self._pcache = {}
746 self._pcache = {}
733
747
734 try:
748 try:
735 self._nodecache.clearcaches()
749 self._nodecache.clearcaches()
736 except AttributeError:
750 except AttributeError:
737 self._nodecache = {nullid: nullrev}
751 self._nodecache = {nullid: nullrev}
738 self._nodepos = None
752 self._nodepos = None
739
753
740 def rev(self, node):
754 def rev(self, node):
741 try:
755 try:
742 return self._nodecache[node]
756 return self._nodecache[node]
743 except TypeError:
757 except TypeError:
744 raise
758 raise
745 except RevlogError:
759 except RevlogError:
746 # parsers.c radix tree lookup failed
760 # parsers.c radix tree lookup failed
747 if node == wdirid:
761 if node == wdirid:
748 raise error.WdirUnsupported
762 raise error.WdirUnsupported
749 raise LookupError(node, self.indexfile, _('no node'))
763 raise LookupError(node, self.indexfile, _('no node'))
750 except KeyError:
764 except KeyError:
751 # pure python cache lookup failed
765 # pure python cache lookup failed
752 n = self._nodecache
766 n = self._nodecache
753 i = self.index
767 i = self.index
754 p = self._nodepos
768 p = self._nodepos
755 if p is None:
769 if p is None:
756 p = len(i) - 2
770 p = len(i) - 2
757 for r in xrange(p, -1, -1):
771 for r in xrange(p, -1, -1):
758 v = i[r][7]
772 v = i[r][7]
759 n[v] = r
773 n[v] = r
760 if v == node:
774 if v == node:
761 self._nodepos = r - 1
775 self._nodepos = r - 1
762 return r
776 return r
763 if node == wdirid:
777 if node == wdirid:
764 raise error.WdirUnsupported
778 raise error.WdirUnsupported
765 raise LookupError(node, self.indexfile, _('no node'))
779 raise LookupError(node, self.indexfile, _('no node'))
766
780
767 # Accessors for index entries.
781 # Accessors for index entries.
768
782
769 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
783 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
770 # are flags.
784 # are flags.
771 def start(self, rev):
785 def start(self, rev):
772 return int(self.index[rev][0] >> 16)
786 return int(self.index[rev][0] >> 16)
773
787
774 def flags(self, rev):
788 def flags(self, rev):
775 return self.index[rev][0] & 0xFFFF
789 return self.index[rev][0] & 0xFFFF
776
790
777 def length(self, rev):
791 def length(self, rev):
778 return self.index[rev][1]
792 return self.index[rev][1]
779
793
780 def rawsize(self, rev):
794 def rawsize(self, rev):
781 """return the length of the uncompressed text for a given revision"""
795 """return the length of the uncompressed text for a given revision"""
782 l = self.index[rev][2]
796 l = self.index[rev][2]
783 if l >= 0:
797 if l >= 0:
784 return l
798 return l
785
799
786 t = self.revision(rev, raw=True)
800 t = self.revision(rev, raw=True)
787 return len(t)
801 return len(t)
788
802
789 def size(self, rev):
803 def size(self, rev):
790 """length of non-raw text (processed by a "read" flag processor)"""
804 """length of non-raw text (processed by a "read" flag processor)"""
791 # fast path: if no "read" flag processor could change the content,
805 # fast path: if no "read" flag processor could change the content,
792 # size is rawsize. note: ELLIPSIS is known to not change the content.
806 # size is rawsize. note: ELLIPSIS is known to not change the content.
793 flags = self.flags(rev)
807 flags = self.flags(rev)
794 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
808 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
795 return self.rawsize(rev)
809 return self.rawsize(rev)
796
810
797 return len(self.revision(rev, raw=False))
811 return len(self.revision(rev, raw=False))
798
812
799 def chainbase(self, rev):
813 def chainbase(self, rev):
800 base = self._chainbasecache.get(rev)
814 base = self._chainbasecache.get(rev)
801 if base is not None:
815 if base is not None:
802 return base
816 return base
803
817
804 index = self.index
818 index = self.index
805 base = index[rev][3]
819 base = index[rev][3]
806 while base != rev:
820 while base != rev:
807 rev = base
821 rev = base
808 base = index[rev][3]
822 base = index[rev][3]
809
823
810 self._chainbasecache[rev] = base
824 self._chainbasecache[rev] = base
811 return base
825 return base
812
826
813 def linkrev(self, rev):
827 def linkrev(self, rev):
814 return self.index[rev][4]
828 return self.index[rev][4]
815
829
816 def parentrevs(self, rev):
830 def parentrevs(self, rev):
817 try:
831 try:
818 entry = self.index[rev]
832 entry = self.index[rev]
819 except IndexError:
833 except IndexError:
820 if rev == wdirrev:
834 if rev == wdirrev:
821 raise error.WdirUnsupported
835 raise error.WdirUnsupported
822 raise
836 raise
823
837
824 return entry[5], entry[6]
838 return entry[5], entry[6]
825
839
826 def node(self, rev):
840 def node(self, rev):
827 try:
841 try:
828 return self.index[rev][7]
842 return self.index[rev][7]
829 except IndexError:
843 except IndexError:
830 if rev == wdirrev:
844 if rev == wdirrev:
831 raise error.WdirUnsupported
845 raise error.WdirUnsupported
832 raise
846 raise
833
847
834 # Derived from index values.
848 # Derived from index values.
835
849
836 def end(self, rev):
850 def end(self, rev):
837 return self.start(rev) + self.length(rev)
851 return self.start(rev) + self.length(rev)
838
852
839 def parents(self, node):
853 def parents(self, node):
840 i = self.index
854 i = self.index
841 d = i[self.rev(node)]
855 d = i[self.rev(node)]
842 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
856 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
843
857
844 def chainlen(self, rev):
858 def chainlen(self, rev):
845 return self._chaininfo(rev)[0]
859 return self._chaininfo(rev)[0]
846
860
847 def _chaininfo(self, rev):
861 def _chaininfo(self, rev):
848 chaininfocache = self._chaininfocache
862 chaininfocache = self._chaininfocache
849 if rev in chaininfocache:
863 if rev in chaininfocache:
850 return chaininfocache[rev]
864 return chaininfocache[rev]
851 index = self.index
865 index = self.index
852 generaldelta = self._generaldelta
866 generaldelta = self._generaldelta
853 iterrev = rev
867 iterrev = rev
854 e = index[iterrev]
868 e = index[iterrev]
855 clen = 0
869 clen = 0
856 compresseddeltalen = 0
870 compresseddeltalen = 0
857 while iterrev != e[3]:
871 while iterrev != e[3]:
858 clen += 1
872 clen += 1
859 compresseddeltalen += e[1]
873 compresseddeltalen += e[1]
860 if generaldelta:
874 if generaldelta:
861 iterrev = e[3]
875 iterrev = e[3]
862 else:
876 else:
863 iterrev -= 1
877 iterrev -= 1
864 if iterrev in chaininfocache:
878 if iterrev in chaininfocache:
865 t = chaininfocache[iterrev]
879 t = chaininfocache[iterrev]
866 clen += t[0]
880 clen += t[0]
867 compresseddeltalen += t[1]
881 compresseddeltalen += t[1]
868 break
882 break
869 e = index[iterrev]
883 e = index[iterrev]
870 else:
884 else:
871 # Add text length of base since decompressing that also takes
885 # Add text length of base since decompressing that also takes
872 # work. For cache hits the length is already included.
886 # work. For cache hits the length is already included.
873 compresseddeltalen += e[1]
887 compresseddeltalen += e[1]
874 r = (clen, compresseddeltalen)
888 r = (clen, compresseddeltalen)
875 chaininfocache[rev] = r
889 chaininfocache[rev] = r
876 return r
890 return r
877
891
878 def _deltachain(self, rev, stoprev=None):
892 def _deltachain(self, rev, stoprev=None):
879 """Obtain the delta chain for a revision.
893 """Obtain the delta chain for a revision.
880
894
881 ``stoprev`` specifies a revision to stop at. If not specified, we
895 ``stoprev`` specifies a revision to stop at. If not specified, we
882 stop at the base of the chain.
896 stop at the base of the chain.
883
897
884 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
898 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
885 revs in ascending order and ``stopped`` is a bool indicating whether
899 revs in ascending order and ``stopped`` is a bool indicating whether
886 ``stoprev`` was hit.
900 ``stoprev`` was hit.
887 """
901 """
888 # Try C implementation.
902 # Try C implementation.
889 try:
903 try:
890 return self.index.deltachain(rev, stoprev, self._generaldelta)
904 return self.index.deltachain(rev, stoprev, self._generaldelta)
891 except AttributeError:
905 except AttributeError:
892 pass
906 pass
893
907
894 chain = []
908 chain = []
895
909
896 # Alias to prevent attribute lookup in tight loop.
910 # Alias to prevent attribute lookup in tight loop.
897 index = self.index
911 index = self.index
898 generaldelta = self._generaldelta
912 generaldelta = self._generaldelta
899
913
900 iterrev = rev
914 iterrev = rev
901 e = index[iterrev]
915 e = index[iterrev]
902 while iterrev != e[3] and iterrev != stoprev:
916 while iterrev != e[3] and iterrev != stoprev:
903 chain.append(iterrev)
917 chain.append(iterrev)
904 if generaldelta:
918 if generaldelta:
905 iterrev = e[3]
919 iterrev = e[3]
906 else:
920 else:
907 iterrev -= 1
921 iterrev -= 1
908 e = index[iterrev]
922 e = index[iterrev]
909
923
910 if iterrev == stoprev:
924 if iterrev == stoprev:
911 stopped = True
925 stopped = True
912 else:
926 else:
913 chain.append(iterrev)
927 chain.append(iterrev)
914 stopped = False
928 stopped = False
915
929
916 chain.reverse()
930 chain.reverse()
917 return chain, stopped
931 return chain, stopped
918
932
919 def ancestors(self, revs, stoprev=0, inclusive=False):
933 def ancestors(self, revs, stoprev=0, inclusive=False):
920 """Generate the ancestors of 'revs' in reverse topological order.
934 """Generate the ancestors of 'revs' in reverse topological order.
921 Does not generate revs lower than stoprev.
935 Does not generate revs lower than stoprev.
922
936
923 See the documentation for ancestor.lazyancestors for more details."""
937 See the documentation for ancestor.lazyancestors for more details."""
924
938
925 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
939 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
926 inclusive=inclusive)
940 inclusive=inclusive)
927
941
928 def descendants(self, revs):
942 def descendants(self, revs):
929 """Generate the descendants of 'revs' in revision order.
943 """Generate the descendants of 'revs' in revision order.
930
944
931 Yield a sequence of revision numbers starting with a child of
945 Yield a sequence of revision numbers starting with a child of
932 some rev in revs, i.e., each revision is *not* considered a
946 some rev in revs, i.e., each revision is *not* considered a
933 descendant of itself. Results are ordered by revision number (a
947 descendant of itself. Results are ordered by revision number (a
934 topological sort)."""
948 topological sort)."""
935 first = min(revs)
949 first = min(revs)
936 if first == nullrev:
950 if first == nullrev:
937 for i in self:
951 for i in self:
938 yield i
952 yield i
939 return
953 return
940
954
941 seen = set(revs)
955 seen = set(revs)
942 for i in self.revs(start=first + 1):
956 for i in self.revs(start=first + 1):
943 for x in self.parentrevs(i):
957 for x in self.parentrevs(i):
944 if x != nullrev and x in seen:
958 if x != nullrev and x in seen:
945 seen.add(i)
959 seen.add(i)
946 yield i
960 yield i
947 break
961 break
948
962
949 def findcommonmissing(self, common=None, heads=None):
963 def findcommonmissing(self, common=None, heads=None):
950 """Return a tuple of the ancestors of common and the ancestors of heads
964 """Return a tuple of the ancestors of common and the ancestors of heads
951 that are not ancestors of common. In revset terminology, we return the
965 that are not ancestors of common. In revset terminology, we return the
952 tuple:
966 tuple:
953
967
954 ::common, (::heads) - (::common)
968 ::common, (::heads) - (::common)
955
969
956 The list is sorted by revision number, meaning it is
970 The list is sorted by revision number, meaning it is
957 topologically sorted.
971 topologically sorted.
958
972
959 'heads' and 'common' are both lists of node IDs. If heads is
973 'heads' and 'common' are both lists of node IDs. If heads is
960 not supplied, uses all of the revlog's heads. If common is not
974 not supplied, uses all of the revlog's heads. If common is not
961 supplied, uses nullid."""
975 supplied, uses nullid."""
962 if common is None:
976 if common is None:
963 common = [nullid]
977 common = [nullid]
964 if heads is None:
978 if heads is None:
965 heads = self.heads()
979 heads = self.heads()
966
980
967 common = [self.rev(n) for n in common]
981 common = [self.rev(n) for n in common]
968 heads = [self.rev(n) for n in heads]
982 heads = [self.rev(n) for n in heads]
969
983
970 # we want the ancestors, but inclusive
984 # we want the ancestors, but inclusive
971 class lazyset(object):
985 class lazyset(object):
972 def __init__(self, lazyvalues):
986 def __init__(self, lazyvalues):
973 self.addedvalues = set()
987 self.addedvalues = set()
974 self.lazyvalues = lazyvalues
988 self.lazyvalues = lazyvalues
975
989
976 def __contains__(self, value):
990 def __contains__(self, value):
977 return value in self.addedvalues or value in self.lazyvalues
991 return value in self.addedvalues or value in self.lazyvalues
978
992
979 def __iter__(self):
993 def __iter__(self):
980 added = self.addedvalues
994 added = self.addedvalues
981 for r in added:
995 for r in added:
982 yield r
996 yield r
983 for r in self.lazyvalues:
997 for r in self.lazyvalues:
984 if not r in added:
998 if not r in added:
985 yield r
999 yield r
986
1000
987 def add(self, value):
1001 def add(self, value):
988 self.addedvalues.add(value)
1002 self.addedvalues.add(value)
989
1003
990 def update(self, values):
1004 def update(self, values):
991 self.addedvalues.update(values)
1005 self.addedvalues.update(values)
992
1006
993 has = lazyset(self.ancestors(common))
1007 has = lazyset(self.ancestors(common))
994 has.add(nullrev)
1008 has.add(nullrev)
995 has.update(common)
1009 has.update(common)
996
1010
997 # take all ancestors from heads that aren't in has
1011 # take all ancestors from heads that aren't in has
998 missing = set()
1012 missing = set()
999 visit = collections.deque(r for r in heads if r not in has)
1013 visit = collections.deque(r for r in heads if r not in has)
1000 while visit:
1014 while visit:
1001 r = visit.popleft()
1015 r = visit.popleft()
1002 if r in missing:
1016 if r in missing:
1003 continue
1017 continue
1004 else:
1018 else:
1005 missing.add(r)
1019 missing.add(r)
1006 for p in self.parentrevs(r):
1020 for p in self.parentrevs(r):
1007 if p not in has:
1021 if p not in has:
1008 visit.append(p)
1022 visit.append(p)
1009 missing = list(missing)
1023 missing = list(missing)
1010 missing.sort()
1024 missing.sort()
1011 return has, [self.node(miss) for miss in missing]
1025 return has, [self.node(miss) for miss in missing]
1012
1026
1013 def incrementalmissingrevs(self, common=None):
1027 def incrementalmissingrevs(self, common=None):
1014 """Return an object that can be used to incrementally compute the
1028 """Return an object that can be used to incrementally compute the
1015 revision numbers of the ancestors of arbitrary sets that are not
1029 revision numbers of the ancestors of arbitrary sets that are not
1016 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 ancestors of common. This is an ancestor.incrementalmissingancestors
1017 object.
1031 object.
1018
1032
1019 'common' is a list of revision numbers. If common is not supplied, uses
1033 'common' is a list of revision numbers. If common is not supplied, uses
1020 nullrev.
1034 nullrev.
1021 """
1035 """
1022 if common is None:
1036 if common is None:
1023 common = [nullrev]
1037 common = [nullrev]
1024
1038
1025 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1039 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1026
1040
1027 def findmissingrevs(self, common=None, heads=None):
1041 def findmissingrevs(self, common=None, heads=None):
1028 """Return the revision numbers of the ancestors of heads that
1042 """Return the revision numbers of the ancestors of heads that
1029 are not ancestors of common.
1043 are not ancestors of common.
1030
1044
1031 More specifically, return a list of revision numbers corresponding to
1045 More specifically, return a list of revision numbers corresponding to
1032 nodes N such that every N satisfies the following constraints:
1046 nodes N such that every N satisfies the following constraints:
1033
1047
1034 1. N is an ancestor of some node in 'heads'
1048 1. N is an ancestor of some node in 'heads'
1035 2. N is not an ancestor of any node in 'common'
1049 2. N is not an ancestor of any node in 'common'
1036
1050
1037 The list is sorted by revision number, meaning it is
1051 The list is sorted by revision number, meaning it is
1038 topologically sorted.
1052 topologically sorted.
1039
1053
1040 'heads' and 'common' are both lists of revision numbers. If heads is
1054 'heads' and 'common' are both lists of revision numbers. If heads is
1041 not supplied, uses all of the revlog's heads. If common is not
1055 not supplied, uses all of the revlog's heads. If common is not
1042 supplied, uses nullid."""
1056 supplied, uses nullid."""
1043 if common is None:
1057 if common is None:
1044 common = [nullrev]
1058 common = [nullrev]
1045 if heads is None:
1059 if heads is None:
1046 heads = self.headrevs()
1060 heads = self.headrevs()
1047
1061
1048 inc = self.incrementalmissingrevs(common=common)
1062 inc = self.incrementalmissingrevs(common=common)
1049 return inc.missingancestors(heads)
1063 return inc.missingancestors(heads)
1050
1064
1051 def findmissing(self, common=None, heads=None):
1065 def findmissing(self, common=None, heads=None):
1052 """Return the ancestors of heads that are not ancestors of common.
1066 """Return the ancestors of heads that are not ancestors of common.
1053
1067
1054 More specifically, return a list of nodes N such that every N
1068 More specifically, return a list of nodes N such that every N
1055 satisfies the following constraints:
1069 satisfies the following constraints:
1056
1070
1057 1. N is an ancestor of some node in 'heads'
1071 1. N is an ancestor of some node in 'heads'
1058 2. N is not an ancestor of any node in 'common'
1072 2. N is not an ancestor of any node in 'common'
1059
1073
1060 The list is sorted by revision number, meaning it is
1074 The list is sorted by revision number, meaning it is
1061 topologically sorted.
1075 topologically sorted.
1062
1076
1063 'heads' and 'common' are both lists of node IDs. If heads is
1077 'heads' and 'common' are both lists of node IDs. If heads is
1064 not supplied, uses all of the revlog's heads. If common is not
1078 not supplied, uses all of the revlog's heads. If common is not
1065 supplied, uses nullid."""
1079 supplied, uses nullid."""
1066 if common is None:
1080 if common is None:
1067 common = [nullid]
1081 common = [nullid]
1068 if heads is None:
1082 if heads is None:
1069 heads = self.heads()
1083 heads = self.heads()
1070
1084
1071 common = [self.rev(n) for n in common]
1085 common = [self.rev(n) for n in common]
1072 heads = [self.rev(n) for n in heads]
1086 heads = [self.rev(n) for n in heads]
1073
1087
1074 inc = self.incrementalmissingrevs(common=common)
1088 inc = self.incrementalmissingrevs(common=common)
1075 return [self.node(r) for r in inc.missingancestors(heads)]
1089 return [self.node(r) for r in inc.missingancestors(heads)]
1076
1090
1077 def nodesbetween(self, roots=None, heads=None):
1091 def nodesbetween(self, roots=None, heads=None):
1078 """Return a topological path from 'roots' to 'heads'.
1092 """Return a topological path from 'roots' to 'heads'.
1079
1093
1080 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1094 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1081 topologically sorted list of all nodes N that satisfy both of
1095 topologically sorted list of all nodes N that satisfy both of
1082 these constraints:
1096 these constraints:
1083
1097
1084 1. N is a descendant of some node in 'roots'
1098 1. N is a descendant of some node in 'roots'
1085 2. N is an ancestor of some node in 'heads'
1099 2. N is an ancestor of some node in 'heads'
1086
1100
1087 Every node is considered to be both a descendant and an ancestor
1101 Every node is considered to be both a descendant and an ancestor
1088 of itself, so every reachable node in 'roots' and 'heads' will be
1102 of itself, so every reachable node in 'roots' and 'heads' will be
1089 included in 'nodes'.
1103 included in 'nodes'.
1090
1104
1091 'outroots' is the list of reachable nodes in 'roots', i.e., the
1105 'outroots' is the list of reachable nodes in 'roots', i.e., the
1092 subset of 'roots' that is returned in 'nodes'. Likewise,
1106 subset of 'roots' that is returned in 'nodes'. Likewise,
1093 'outheads' is the subset of 'heads' that is also in 'nodes'.
1107 'outheads' is the subset of 'heads' that is also in 'nodes'.
1094
1108
1095 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1109 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1096 unspecified, uses nullid as the only root. If 'heads' is
1110 unspecified, uses nullid as the only root. If 'heads' is
1097 unspecified, uses list of all of the revlog's heads."""
1111 unspecified, uses list of all of the revlog's heads."""
1098 nonodes = ([], [], [])
1112 nonodes = ([], [], [])
1099 if roots is not None:
1113 if roots is not None:
1100 roots = list(roots)
1114 roots = list(roots)
1101 if not roots:
1115 if not roots:
1102 return nonodes
1116 return nonodes
1103 lowestrev = min([self.rev(n) for n in roots])
1117 lowestrev = min([self.rev(n) for n in roots])
1104 else:
1118 else:
1105 roots = [nullid] # Everybody's a descendant of nullid
1119 roots = [nullid] # Everybody's a descendant of nullid
1106 lowestrev = nullrev
1120 lowestrev = nullrev
1107 if (lowestrev == nullrev) and (heads is None):
1121 if (lowestrev == nullrev) and (heads is None):
1108 # We want _all_ the nodes!
1122 # We want _all_ the nodes!
1109 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1123 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1110 if heads is None:
1124 if heads is None:
1111 # All nodes are ancestors, so the latest ancestor is the last
1125 # All nodes are ancestors, so the latest ancestor is the last
1112 # node.
1126 # node.
1113 highestrev = len(self) - 1
1127 highestrev = len(self) - 1
1114 # Set ancestors to None to signal that every node is an ancestor.
1128 # Set ancestors to None to signal that every node is an ancestor.
1115 ancestors = None
1129 ancestors = None
1116 # Set heads to an empty dictionary for later discovery of heads
1130 # Set heads to an empty dictionary for later discovery of heads
1117 heads = {}
1131 heads = {}
1118 else:
1132 else:
1119 heads = list(heads)
1133 heads = list(heads)
1120 if not heads:
1134 if not heads:
1121 return nonodes
1135 return nonodes
1122 ancestors = set()
1136 ancestors = set()
1123 # Turn heads into a dictionary so we can remove 'fake' heads.
1137 # Turn heads into a dictionary so we can remove 'fake' heads.
1124 # Also, later we will be using it to filter out the heads we can't
1138 # Also, later we will be using it to filter out the heads we can't
1125 # find from roots.
1139 # find from roots.
1126 heads = dict.fromkeys(heads, False)
1140 heads = dict.fromkeys(heads, False)
1127 # Start at the top and keep marking parents until we're done.
1141 # Start at the top and keep marking parents until we're done.
1128 nodestotag = set(heads)
1142 nodestotag = set(heads)
1129 # Remember where the top was so we can use it as a limit later.
1143 # Remember where the top was so we can use it as a limit later.
1130 highestrev = max([self.rev(n) for n in nodestotag])
1144 highestrev = max([self.rev(n) for n in nodestotag])
1131 while nodestotag:
1145 while nodestotag:
1132 # grab a node to tag
1146 # grab a node to tag
1133 n = nodestotag.pop()
1147 n = nodestotag.pop()
1134 # Never tag nullid
1148 # Never tag nullid
1135 if n == nullid:
1149 if n == nullid:
1136 continue
1150 continue
1137 # A node's revision number represents its place in a
1151 # A node's revision number represents its place in a
1138 # topologically sorted list of nodes.
1152 # topologically sorted list of nodes.
1139 r = self.rev(n)
1153 r = self.rev(n)
1140 if r >= lowestrev:
1154 if r >= lowestrev:
1141 if n not in ancestors:
1155 if n not in ancestors:
1142 # If we are possibly a descendant of one of the roots
1156 # If we are possibly a descendant of one of the roots
1143 # and we haven't already been marked as an ancestor
1157 # and we haven't already been marked as an ancestor
1144 ancestors.add(n) # Mark as ancestor
1158 ancestors.add(n) # Mark as ancestor
1145 # Add non-nullid parents to list of nodes to tag.
1159 # Add non-nullid parents to list of nodes to tag.
1146 nodestotag.update([p for p in self.parents(n) if
1160 nodestotag.update([p for p in self.parents(n) if
1147 p != nullid])
1161 p != nullid])
1148 elif n in heads: # We've seen it before, is it a fake head?
1162 elif n in heads: # We've seen it before, is it a fake head?
1149 # So it is, real heads should not be the ancestors of
1163 # So it is, real heads should not be the ancestors of
1150 # any other heads.
1164 # any other heads.
1151 heads.pop(n)
1165 heads.pop(n)
1152 if not ancestors:
1166 if not ancestors:
1153 return nonodes
1167 return nonodes
1154 # Now that we have our set of ancestors, we want to remove any
1168 # Now that we have our set of ancestors, we want to remove any
1155 # roots that are not ancestors.
1169 # roots that are not ancestors.
1156
1170
1157 # If one of the roots was nullid, everything is included anyway.
1171 # If one of the roots was nullid, everything is included anyway.
1158 if lowestrev > nullrev:
1172 if lowestrev > nullrev:
1159 # But, since we weren't, let's recompute the lowest rev to not
1173 # But, since we weren't, let's recompute the lowest rev to not
1160 # include roots that aren't ancestors.
1174 # include roots that aren't ancestors.
1161
1175
1162 # Filter out roots that aren't ancestors of heads
1176 # Filter out roots that aren't ancestors of heads
1163 roots = [root for root in roots if root in ancestors]
1177 roots = [root for root in roots if root in ancestors]
1164 # Recompute the lowest revision
1178 # Recompute the lowest revision
1165 if roots:
1179 if roots:
1166 lowestrev = min([self.rev(root) for root in roots])
1180 lowestrev = min([self.rev(root) for root in roots])
1167 else:
1181 else:
1168 # No more roots? Return empty list
1182 # No more roots? Return empty list
1169 return nonodes
1183 return nonodes
1170 else:
1184 else:
1171 # We are descending from nullid, and don't need to care about
1185 # We are descending from nullid, and don't need to care about
1172 # any other roots.
1186 # any other roots.
1173 lowestrev = nullrev
1187 lowestrev = nullrev
1174 roots = [nullid]
1188 roots = [nullid]
1175 # Transform our roots list into a set.
1189 # Transform our roots list into a set.
1176 descendants = set(roots)
1190 descendants = set(roots)
1177 # Also, keep the original roots so we can filter out roots that aren't
1191 # Also, keep the original roots so we can filter out roots that aren't
1178 # 'real' roots (i.e. are descended from other roots).
1192 # 'real' roots (i.e. are descended from other roots).
1179 roots = descendants.copy()
1193 roots = descendants.copy()
1180 # Our topologically sorted list of output nodes.
1194 # Our topologically sorted list of output nodes.
1181 orderedout = []
1195 orderedout = []
1182 # Don't start at nullid since we don't want nullid in our output list,
1196 # Don't start at nullid since we don't want nullid in our output list,
1183 # and if nullid shows up in descendants, empty parents will look like
1197 # and if nullid shows up in descendants, empty parents will look like
1184 # they're descendants.
1198 # they're descendants.
1185 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1199 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1186 n = self.node(r)
1200 n = self.node(r)
1187 isdescendant = False
1201 isdescendant = False
1188 if lowestrev == nullrev: # Everybody is a descendant of nullid
1202 if lowestrev == nullrev: # Everybody is a descendant of nullid
1189 isdescendant = True
1203 isdescendant = True
1190 elif n in descendants:
1204 elif n in descendants:
1191 # n is already a descendant
1205 # n is already a descendant
1192 isdescendant = True
1206 isdescendant = True
1193 # This check only needs to be done here because all the roots
1207 # This check only needs to be done here because all the roots
1194 # will start being marked is descendants before the loop.
1208 # will start being marked is descendants before the loop.
1195 if n in roots:
1209 if n in roots:
1196 # If n was a root, check if it's a 'real' root.
1210 # If n was a root, check if it's a 'real' root.
1197 p = tuple(self.parents(n))
1211 p = tuple(self.parents(n))
1198 # If any of its parents are descendants, it's not a root.
1212 # If any of its parents are descendants, it's not a root.
1199 if (p[0] in descendants) or (p[1] in descendants):
1213 if (p[0] in descendants) or (p[1] in descendants):
1200 roots.remove(n)
1214 roots.remove(n)
1201 else:
1215 else:
1202 p = tuple(self.parents(n))
1216 p = tuple(self.parents(n))
1203 # A node is a descendant if either of its parents are
1217 # A node is a descendant if either of its parents are
1204 # descendants. (We seeded the dependents list with the roots
1218 # descendants. (We seeded the dependents list with the roots
1205 # up there, remember?)
1219 # up there, remember?)
1206 if (p[0] in descendants) or (p[1] in descendants):
1220 if (p[0] in descendants) or (p[1] in descendants):
1207 descendants.add(n)
1221 descendants.add(n)
1208 isdescendant = True
1222 isdescendant = True
1209 if isdescendant and ((ancestors is None) or (n in ancestors)):
1223 if isdescendant and ((ancestors is None) or (n in ancestors)):
1210 # Only include nodes that are both descendants and ancestors.
1224 # Only include nodes that are both descendants and ancestors.
1211 orderedout.append(n)
1225 orderedout.append(n)
1212 if (ancestors is not None) and (n in heads):
1226 if (ancestors is not None) and (n in heads):
1213 # We're trying to figure out which heads are reachable
1227 # We're trying to figure out which heads are reachable
1214 # from roots.
1228 # from roots.
1215 # Mark this head as having been reached
1229 # Mark this head as having been reached
1216 heads[n] = True
1230 heads[n] = True
1217 elif ancestors is None:
1231 elif ancestors is None:
1218 # Otherwise, we're trying to discover the heads.
1232 # Otherwise, we're trying to discover the heads.
1219 # Assume this is a head because if it isn't, the next step
1233 # Assume this is a head because if it isn't, the next step
1220 # will eventually remove it.
1234 # will eventually remove it.
1221 heads[n] = True
1235 heads[n] = True
1222 # But, obviously its parents aren't.
1236 # But, obviously its parents aren't.
1223 for p in self.parents(n):
1237 for p in self.parents(n):
1224 heads.pop(p, None)
1238 heads.pop(p, None)
1225 heads = [head for head, flag in heads.iteritems() if flag]
1239 heads = [head for head, flag in heads.iteritems() if flag]
1226 roots = list(roots)
1240 roots = list(roots)
1227 assert orderedout
1241 assert orderedout
1228 assert roots
1242 assert roots
1229 assert heads
1243 assert heads
1230 return (orderedout, roots, heads)
1244 return (orderedout, roots, heads)
1231
1245
1232 def headrevs(self):
1246 def headrevs(self):
1233 try:
1247 try:
1234 return self.index.headrevs()
1248 return self.index.headrevs()
1235 except AttributeError:
1249 except AttributeError:
1236 return self._headrevs()
1250 return self._headrevs()
1237
1251
1238 def computephases(self, roots):
1252 def computephases(self, roots):
1239 return self.index.computephasesmapsets(roots)
1253 return self.index.computephasesmapsets(roots)
1240
1254
1241 def _headrevs(self):
1255 def _headrevs(self):
1242 count = len(self)
1256 count = len(self)
1243 if not count:
1257 if not count:
1244 return [nullrev]
1258 return [nullrev]
1245 # we won't iter over filtered rev so nobody is a head at start
1259 # we won't iter over filtered rev so nobody is a head at start
1246 ishead = [0] * (count + 1)
1260 ishead = [0] * (count + 1)
1247 index = self.index
1261 index = self.index
1248 for r in self:
1262 for r in self:
1249 ishead[r] = 1 # I may be an head
1263 ishead[r] = 1 # I may be an head
1250 e = index[r]
1264 e = index[r]
1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 return [r for r, val in enumerate(ishead) if val]
1266 return [r for r, val in enumerate(ishead) if val]
1253
1267
1254 def heads(self, start=None, stop=None):
1268 def heads(self, start=None, stop=None):
1255 """return the list of all nodes that have no children
1269 """return the list of all nodes that have no children
1256
1270
1257 if start is specified, only heads that are descendants of
1271 if start is specified, only heads that are descendants of
1258 start will be returned
1272 start will be returned
1259 if stop is specified, it will consider all the revs from stop
1273 if stop is specified, it will consider all the revs from stop
1260 as if they had no children
1274 as if they had no children
1261 """
1275 """
1262 if start is None and stop is None:
1276 if start is None and stop is None:
1263 if not len(self):
1277 if not len(self):
1264 return [nullid]
1278 return [nullid]
1265 return [self.node(r) for r in self.headrevs()]
1279 return [self.node(r) for r in self.headrevs()]
1266
1280
1267 if start is None:
1281 if start is None:
1268 start = nullid
1282 start = nullid
1269 if stop is None:
1283 if stop is None:
1270 stop = []
1284 stop = []
1271 stoprevs = set([self.rev(n) for n in stop])
1285 stoprevs = set([self.rev(n) for n in stop])
1272 startrev = self.rev(start)
1286 startrev = self.rev(start)
1273 reachable = {startrev}
1287 reachable = {startrev}
1274 heads = {startrev}
1288 heads = {startrev}
1275
1289
1276 parentrevs = self.parentrevs
1290 parentrevs = self.parentrevs
1277 for r in self.revs(start=startrev + 1):
1291 for r in self.revs(start=startrev + 1):
1278 for p in parentrevs(r):
1292 for p in parentrevs(r):
1279 if p in reachable:
1293 if p in reachable:
1280 if r not in stoprevs:
1294 if r not in stoprevs:
1281 reachable.add(r)
1295 reachable.add(r)
1282 heads.add(r)
1296 heads.add(r)
1283 if p in heads and p not in stoprevs:
1297 if p in heads and p not in stoprevs:
1284 heads.remove(p)
1298 heads.remove(p)
1285
1299
1286 return [self.node(r) for r in heads]
1300 return [self.node(r) for r in heads]
1287
1301
1288 def children(self, node):
1302 def children(self, node):
1289 """find the children of a given node"""
1303 """find the children of a given node"""
1290 c = []
1304 c = []
1291 p = self.rev(node)
1305 p = self.rev(node)
1292 for r in self.revs(start=p + 1):
1306 for r in self.revs(start=p + 1):
1293 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1307 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1294 if prevs:
1308 if prevs:
1295 for pr in prevs:
1309 for pr in prevs:
1296 if pr == p:
1310 if pr == p:
1297 c.append(self.node(r))
1311 c.append(self.node(r))
1298 elif p == nullrev:
1312 elif p == nullrev:
1299 c.append(self.node(r))
1313 c.append(self.node(r))
1300 return c
1314 return c
1301
1315
1302 def descendant(self, start, end):
1316 def descendant(self, start, end):
1303 if start == nullrev:
1317 if start == nullrev:
1304 return True
1318 return True
1305 for i in self.descendants([start]):
1319 for i in self.descendants([start]):
1306 if i == end:
1320 if i == end:
1307 return True
1321 return True
1308 elif i > end:
1322 elif i > end:
1309 break
1323 break
1310 return False
1324 return False
1311
1325
1312 def commonancestorsheads(self, a, b):
1326 def commonancestorsheads(self, a, b):
1313 """calculate all the heads of the common ancestors of nodes a and b"""
1327 """calculate all the heads of the common ancestors of nodes a and b"""
1314 a, b = self.rev(a), self.rev(b)
1328 a, b = self.rev(a), self.rev(b)
1315 try:
1329 try:
1316 ancs = self.index.commonancestorsheads(a, b)
1330 ancs = self.index.commonancestorsheads(a, b)
1317 except (AttributeError, OverflowError): # C implementation failed
1331 except (AttributeError, OverflowError): # C implementation failed
1318 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1332 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1319 return pycompat.maplist(self.node, ancs)
1333 return pycompat.maplist(self.node, ancs)
1320
1334
1321 def isancestor(self, a, b):
1335 def isancestor(self, a, b):
1322 """return True if node a is an ancestor of node b
1336 """return True if node a is an ancestor of node b
1323
1337
1324 The implementation of this is trivial but the use of
1338 The implementation of this is trivial but the use of
1325 commonancestorsheads is not."""
1339 commonancestorsheads is not."""
1326 return a in self.commonancestorsheads(a, b)
1340 return a in self.commonancestorsheads(a, b)
1327
1341
1328 def ancestor(self, a, b):
1342 def ancestor(self, a, b):
1329 """calculate the "best" common ancestor of nodes a and b"""
1343 """calculate the "best" common ancestor of nodes a and b"""
1330
1344
1331 a, b = self.rev(a), self.rev(b)
1345 a, b = self.rev(a), self.rev(b)
1332 try:
1346 try:
1333 ancs = self.index.ancestors(a, b)
1347 ancs = self.index.ancestors(a, b)
1334 except (AttributeError, OverflowError):
1348 except (AttributeError, OverflowError):
1335 ancs = ancestor.ancestors(self.parentrevs, a, b)
1349 ancs = ancestor.ancestors(self.parentrevs, a, b)
1336 if ancs:
1350 if ancs:
1337 # choose a consistent winner when there's a tie
1351 # choose a consistent winner when there's a tie
1338 return min(map(self.node, ancs))
1352 return min(map(self.node, ancs))
1339 return nullid
1353 return nullid
1340
1354
1341 def _match(self, id):
1355 def _match(self, id):
1342 if isinstance(id, int):
1356 if isinstance(id, int):
1343 # rev
1357 # rev
1344 return self.node(id)
1358 return self.node(id)
1345 if len(id) == 20:
1359 if len(id) == 20:
1346 # possibly a binary node
1360 # possibly a binary node
1347 # odds of a binary node being all hex in ASCII are 1 in 10**25
1361 # odds of a binary node being all hex in ASCII are 1 in 10**25
1348 try:
1362 try:
1349 node = id
1363 node = id
1350 self.rev(node) # quick search the index
1364 self.rev(node) # quick search the index
1351 return node
1365 return node
1352 except LookupError:
1366 except LookupError:
1353 pass # may be partial hex id
1367 pass # may be partial hex id
1354 try:
1368 try:
1355 # str(rev)
1369 # str(rev)
1356 rev = int(id)
1370 rev = int(id)
1357 if str(rev) != id:
1371 if str(rev) != id:
1358 raise ValueError
1372 raise ValueError
1359 if rev < 0:
1373 if rev < 0:
1360 rev = len(self) + rev
1374 rev = len(self) + rev
1361 if rev < 0 or rev >= len(self):
1375 if rev < 0 or rev >= len(self):
1362 raise ValueError
1376 raise ValueError
1363 return self.node(rev)
1377 return self.node(rev)
1364 except (ValueError, OverflowError):
1378 except (ValueError, OverflowError):
1365 pass
1379 pass
1366 if len(id) == 40:
1380 if len(id) == 40:
1367 try:
1381 try:
1368 # a full hex nodeid?
1382 # a full hex nodeid?
1369 node = bin(id)
1383 node = bin(id)
1370 self.rev(node)
1384 self.rev(node)
1371 return node
1385 return node
1372 except (TypeError, LookupError):
1386 except (TypeError, LookupError):
1373 pass
1387 pass
1374
1388
1375 def _partialmatch(self, id):
1389 def _partialmatch(self, id):
1376 maybewdir = wdirhex.startswith(id)
1390 maybewdir = wdirhex.startswith(id)
1377 try:
1391 try:
1378 partial = self.index.partialmatch(id)
1392 partial = self.index.partialmatch(id)
1379 if partial and self.hasnode(partial):
1393 if partial and self.hasnode(partial):
1380 if maybewdir:
1394 if maybewdir:
1381 # single 'ff...' match in radix tree, ambiguous with wdir
1395 # single 'ff...' match in radix tree, ambiguous with wdir
1382 raise RevlogError
1396 raise RevlogError
1383 return partial
1397 return partial
1384 if maybewdir:
1398 if maybewdir:
1385 # no 'ff...' match in radix tree, wdir identified
1399 # no 'ff...' match in radix tree, wdir identified
1386 raise error.WdirUnsupported
1400 raise error.WdirUnsupported
1387 return None
1401 return None
1388 except RevlogError:
1402 except RevlogError:
1389 # parsers.c radix tree lookup gave multiple matches
1403 # parsers.c radix tree lookup gave multiple matches
1390 # fast path: for unfiltered changelog, radix tree is accurate
1404 # fast path: for unfiltered changelog, radix tree is accurate
1391 if not getattr(self, 'filteredrevs', None):
1405 if not getattr(self, 'filteredrevs', None):
1392 raise LookupError(id, self.indexfile,
1406 raise LookupError(id, self.indexfile,
1393 _('ambiguous identifier'))
1407 _('ambiguous identifier'))
1394 # fall through to slow path that filters hidden revisions
1408 # fall through to slow path that filters hidden revisions
1395 except (AttributeError, ValueError):
1409 except (AttributeError, ValueError):
1396 # we are pure python, or key was too short to search radix tree
1410 # we are pure python, or key was too short to search radix tree
1397 pass
1411 pass
1398
1412
1399 if id in self._pcache:
1413 if id in self._pcache:
1400 return self._pcache[id]
1414 return self._pcache[id]
1401
1415
1402 if len(id) < 40:
1416 if len(id) < 40:
1403 try:
1417 try:
1404 # hex(node)[:...]
1418 # hex(node)[:...]
1405 l = len(id) // 2 # grab an even number of digits
1419 l = len(id) // 2 # grab an even number of digits
1406 prefix = bin(id[:l * 2])
1420 prefix = bin(id[:l * 2])
1407 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1421 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1408 nl = [n for n in nl if hex(n).startswith(id) and
1422 nl = [n for n in nl if hex(n).startswith(id) and
1409 self.hasnode(n)]
1423 self.hasnode(n)]
1410 if len(nl) > 0:
1424 if len(nl) > 0:
1411 if len(nl) == 1 and not maybewdir:
1425 if len(nl) == 1 and not maybewdir:
1412 self._pcache[id] = nl[0]
1426 self._pcache[id] = nl[0]
1413 return nl[0]
1427 return nl[0]
1414 raise LookupError(id, self.indexfile,
1428 raise LookupError(id, self.indexfile,
1415 _('ambiguous identifier'))
1429 _('ambiguous identifier'))
1416 if maybewdir:
1430 if maybewdir:
1417 raise error.WdirUnsupported
1431 raise error.WdirUnsupported
1418 return None
1432 return None
1419 except (TypeError, binascii.Error):
1433 except (TypeError, binascii.Error):
1420 pass
1434 pass
1421
1435
1422 def lookup(self, id):
1436 def lookup(self, id):
1423 """locate a node based on:
1437 """locate a node based on:
1424 - revision number or str(revision number)
1438 - revision number or str(revision number)
1425 - nodeid or subset of hex nodeid
1439 - nodeid or subset of hex nodeid
1426 """
1440 """
1427 n = self._match(id)
1441 n = self._match(id)
1428 if n is not None:
1442 if n is not None:
1429 return n
1443 return n
1430 n = self._partialmatch(id)
1444 n = self._partialmatch(id)
1431 if n:
1445 if n:
1432 return n
1446 return n
1433
1447
1434 raise LookupError(id, self.indexfile, _('no match found'))
1448 raise LookupError(id, self.indexfile, _('no match found'))
1435
1449
1436 def shortest(self, hexnode, minlength=1):
1450 def shortest(self, hexnode, minlength=1):
1437 """Find the shortest unambiguous prefix that matches hexnode."""
1451 """Find the shortest unambiguous prefix that matches hexnode."""
1438 def isvalid(test):
1452 def isvalid(test):
1439 try:
1453 try:
1440 if self._partialmatch(test) is None:
1454 if self._partialmatch(test) is None:
1441 return False
1455 return False
1442
1456
1443 try:
1457 try:
1444 i = int(test)
1458 i = int(test)
1445 # if we are a pure int, then starting with zero will not be
1459 # if we are a pure int, then starting with zero will not be
1446 # confused as a rev; or, obviously, if the int is larger
1460 # confused as a rev; or, obviously, if the int is larger
1447 # than the value of the tip rev
1461 # than the value of the tip rev
1448 if test[0] == '0' or i > len(self):
1462 if test[0] == '0' or i > len(self):
1449 return True
1463 return True
1450 return False
1464 return False
1451 except ValueError:
1465 except ValueError:
1452 return True
1466 return True
1453 except error.RevlogError:
1467 except error.RevlogError:
1454 return False
1468 return False
1455 except error.WdirUnsupported:
1469 except error.WdirUnsupported:
1456 # single 'ff...' match
1470 # single 'ff...' match
1457 return True
1471 return True
1458
1472
1459 shortest = hexnode
1473 shortest = hexnode
1460 startlength = max(6, minlength)
1474 startlength = max(6, minlength)
1461 length = startlength
1475 length = startlength
1462 while True:
1476 while True:
1463 test = hexnode[:length]
1477 test = hexnode[:length]
1464 if isvalid(test):
1478 if isvalid(test):
1465 shortest = test
1479 shortest = test
1466 if length == minlength or length > startlength:
1480 if length == minlength or length > startlength:
1467 return shortest
1481 return shortest
1468 length -= 1
1482 length -= 1
1469 else:
1483 else:
1470 length += 1
1484 length += 1
1471 if len(shortest) <= length:
1485 if len(shortest) <= length:
1472 return shortest
1486 return shortest
1473
1487
1474 def cmp(self, node, text):
1488 def cmp(self, node, text):
1475 """compare text with a given file revision
1489 """compare text with a given file revision
1476
1490
1477 returns True if text is different than what is stored.
1491 returns True if text is different than what is stored.
1478 """
1492 """
1479 p1, p2 = self.parents(node)
1493 p1, p2 = self.parents(node)
1480 return hash(text, p1, p2) != node
1494 return hash(text, p1, p2) != node
1481
1495
1482 def _cachesegment(self, offset, data):
1496 def _cachesegment(self, offset, data):
1483 """Add a segment to the revlog cache.
1497 """Add a segment to the revlog cache.
1484
1498
1485 Accepts an absolute offset and the data that is at that location.
1499 Accepts an absolute offset and the data that is at that location.
1486 """
1500 """
1487 o, d = self._chunkcache
1501 o, d = self._chunkcache
1488 # try to add to existing cache
1502 # try to add to existing cache
1489 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1503 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1490 self._chunkcache = o, d + data
1504 self._chunkcache = o, d + data
1491 else:
1505 else:
1492 self._chunkcache = offset, data
1506 self._chunkcache = offset, data
1493
1507
1494 def _readsegment(self, offset, length, df=None):
1508 def _readsegment(self, offset, length, df=None):
1495 """Load a segment of raw data from the revlog.
1509 """Load a segment of raw data from the revlog.
1496
1510
1497 Accepts an absolute offset, length to read, and an optional existing
1511 Accepts an absolute offset, length to read, and an optional existing
1498 file handle to read from.
1512 file handle to read from.
1499
1513
1500 If an existing file handle is passed, it will be seeked and the
1514 If an existing file handle is passed, it will be seeked and the
1501 original seek position will NOT be restored.
1515 original seek position will NOT be restored.
1502
1516
1503 Returns a str or buffer of raw byte data.
1517 Returns a str or buffer of raw byte data.
1504 """
1518 """
1505 if df is not None:
1506 closehandle = False
1507 else:
1508 if self._inline:
1509 df = self._indexfp()
1510 else:
1511 df = self._datafp()
1512 closehandle = True
1513
1514 # Cache data both forward and backward around the requested
1519 # Cache data both forward and backward around the requested
1515 # data, in a fixed size window. This helps speed up operations
1520 # data, in a fixed size window. This helps speed up operations
1516 # involving reading the revlog backwards.
1521 # involving reading the revlog backwards.
1517 cachesize = self._chunkcachesize
1522 cachesize = self._chunkcachesize
1518 realoffset = offset & ~(cachesize - 1)
1523 realoffset = offset & ~(cachesize - 1)
1519 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1524 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1520 - realoffset)
1525 - realoffset)
1521 df.seek(realoffset)
1526 with self._datareadfp(df) as df:
1522 d = df.read(reallength)
1527 df.seek(realoffset)
1523 if closehandle:
1528 d = df.read(reallength)
1524 df.close()
1525 self._cachesegment(realoffset, d)
1529 self._cachesegment(realoffset, d)
1526 if offset != realoffset or reallength != length:
1530 if offset != realoffset or reallength != length:
1527 return util.buffer(d, offset - realoffset, length)
1531 return util.buffer(d, offset - realoffset, length)
1528 return d
1532 return d
1529
1533
1530 def _getsegment(self, offset, length, df=None):
1534 def _getsegment(self, offset, length, df=None):
1531 """Obtain a segment of raw data from the revlog.
1535 """Obtain a segment of raw data from the revlog.
1532
1536
1533 Accepts an absolute offset, length of bytes to obtain, and an
1537 Accepts an absolute offset, length of bytes to obtain, and an
1534 optional file handle to the already-opened revlog. If the file
1538 optional file handle to the already-opened revlog. If the file
1535 handle is used, it's original seek position will not be preserved.
1539 handle is used, it's original seek position will not be preserved.
1536
1540
1537 Requests for data may be returned from a cache.
1541 Requests for data may be returned from a cache.
1538
1542
1539 Returns a str or a buffer instance of raw byte data.
1543 Returns a str or a buffer instance of raw byte data.
1540 """
1544 """
1541 o, d = self._chunkcache
1545 o, d = self._chunkcache
1542 l = len(d)
1546 l = len(d)
1543
1547
1544 # is it in the cache?
1548 # is it in the cache?
1545 cachestart = offset - o
1549 cachestart = offset - o
1546 cacheend = cachestart + length
1550 cacheend = cachestart + length
1547 if cachestart >= 0 and cacheend <= l:
1551 if cachestart >= 0 and cacheend <= l:
1548 if cachestart == 0 and cacheend == l:
1552 if cachestart == 0 and cacheend == l:
1549 return d # avoid a copy
1553 return d # avoid a copy
1550 return util.buffer(d, cachestart, cacheend - cachestart)
1554 return util.buffer(d, cachestart, cacheend - cachestart)
1551
1555
1552 return self._readsegment(offset, length, df=df)
1556 return self._readsegment(offset, length, df=df)
1553
1557
1554 def _getsegmentforrevs(self, startrev, endrev, df=None):
1558 def _getsegmentforrevs(self, startrev, endrev, df=None):
1555 """Obtain a segment of raw data corresponding to a range of revisions.
1559 """Obtain a segment of raw data corresponding to a range of revisions.
1556
1560
1557 Accepts the start and end revisions and an optional already-open
1561 Accepts the start and end revisions and an optional already-open
1558 file handle to be used for reading. If the file handle is read, its
1562 file handle to be used for reading. If the file handle is read, its
1559 seek position will not be preserved.
1563 seek position will not be preserved.
1560
1564
1561 Requests for data may be satisfied by a cache.
1565 Requests for data may be satisfied by a cache.
1562
1566
1563 Returns a 2-tuple of (offset, data) for the requested range of
1567 Returns a 2-tuple of (offset, data) for the requested range of
1564 revisions. Offset is the integer offset from the beginning of the
1568 revisions. Offset is the integer offset from the beginning of the
1565 revlog and data is a str or buffer of the raw byte data.
1569 revlog and data is a str or buffer of the raw byte data.
1566
1570
1567 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1571 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1568 to determine where each revision's data begins and ends.
1572 to determine where each revision's data begins and ends.
1569 """
1573 """
1570 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1574 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1571 # (functions are expensive).
1575 # (functions are expensive).
1572 index = self.index
1576 index = self.index
1573 istart = index[startrev]
1577 istart = index[startrev]
1574 start = int(istart[0] >> 16)
1578 start = int(istart[0] >> 16)
1575 if startrev == endrev:
1579 if startrev == endrev:
1576 end = start + istart[1]
1580 end = start + istart[1]
1577 else:
1581 else:
1578 iend = index[endrev]
1582 iend = index[endrev]
1579 end = int(iend[0] >> 16) + iend[1]
1583 end = int(iend[0] >> 16) + iend[1]
1580
1584
1581 if self._inline:
1585 if self._inline:
1582 start += (startrev + 1) * self._io.size
1586 start += (startrev + 1) * self._io.size
1583 end += (endrev + 1) * self._io.size
1587 end += (endrev + 1) * self._io.size
1584 length = end - start
1588 length = end - start
1585
1589
1586 return start, self._getsegment(start, length, df=df)
1590 return start, self._getsegment(start, length, df=df)
1587
1591
1588 def _chunk(self, rev, df=None):
1592 def _chunk(self, rev, df=None):
1589 """Obtain a single decompressed chunk for a revision.
1593 """Obtain a single decompressed chunk for a revision.
1590
1594
1591 Accepts an integer revision and an optional already-open file handle
1595 Accepts an integer revision and an optional already-open file handle
1592 to be used for reading. If used, the seek position of the file will not
1596 to be used for reading. If used, the seek position of the file will not
1593 be preserved.
1597 be preserved.
1594
1598
1595 Returns a str holding uncompressed data for the requested revision.
1599 Returns a str holding uncompressed data for the requested revision.
1596 """
1600 """
1597 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1601 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1598
1602
1599 def _chunks(self, revs, df=None):
1603 def _chunks(self, revs, df=None):
1600 """Obtain decompressed chunks for the specified revisions.
1604 """Obtain decompressed chunks for the specified revisions.
1601
1605
1602 Accepts an iterable of numeric revisions that are assumed to be in
1606 Accepts an iterable of numeric revisions that are assumed to be in
1603 ascending order. Also accepts an optional already-open file handle
1607 ascending order. Also accepts an optional already-open file handle
1604 to be used for reading. If used, the seek position of the file will
1608 to be used for reading. If used, the seek position of the file will
1605 not be preserved.
1609 not be preserved.
1606
1610
1607 This function is similar to calling ``self._chunk()`` multiple times,
1611 This function is similar to calling ``self._chunk()`` multiple times,
1608 but is faster.
1612 but is faster.
1609
1613
1610 Returns a list with decompressed data for each requested revision.
1614 Returns a list with decompressed data for each requested revision.
1611 """
1615 """
1612 if not revs:
1616 if not revs:
1613 return []
1617 return []
1614 start = self.start
1618 start = self.start
1615 length = self.length
1619 length = self.length
1616 inline = self._inline
1620 inline = self._inline
1617 iosize = self._io.size
1621 iosize = self._io.size
1618 buffer = util.buffer
1622 buffer = util.buffer
1619
1623
1620 l = []
1624 l = []
1621 ladd = l.append
1625 ladd = l.append
1622
1626
1623 if not self._withsparseread:
1627 if not self._withsparseread:
1624 slicedchunks = (revs,)
1628 slicedchunks = (revs,)
1625 else:
1629 else:
1626 slicedchunks = _slicechunk(self, revs)
1630 slicedchunks = _slicechunk(self, revs)
1627
1631
1628 for revschunk in slicedchunks:
1632 for revschunk in slicedchunks:
1629 firstrev = revschunk[0]
1633 firstrev = revschunk[0]
1630 # Skip trailing revisions with empty diff
1634 # Skip trailing revisions with empty diff
1631 for lastrev in revschunk[::-1]:
1635 for lastrev in revschunk[::-1]:
1632 if length(lastrev) != 0:
1636 if length(lastrev) != 0:
1633 break
1637 break
1634
1638
1635 try:
1639 try:
1636 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1640 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 except OverflowError:
1641 except OverflowError:
1638 # issue4215 - we can't cache a run of chunks greater than
1642 # issue4215 - we can't cache a run of chunks greater than
1639 # 2G on Windows
1643 # 2G on Windows
1640 return [self._chunk(rev, df=df) for rev in revschunk]
1644 return [self._chunk(rev, df=df) for rev in revschunk]
1641
1645
1642 decomp = self.decompress
1646 decomp = self.decompress
1643 for rev in revschunk:
1647 for rev in revschunk:
1644 chunkstart = start(rev)
1648 chunkstart = start(rev)
1645 if inline:
1649 if inline:
1646 chunkstart += (rev + 1) * iosize
1650 chunkstart += (rev + 1) * iosize
1647 chunklength = length(rev)
1651 chunklength = length(rev)
1648 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1652 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649
1653
1650 return l
1654 return l
1651
1655
1652 def _chunkclear(self):
1656 def _chunkclear(self):
1653 """Clear the raw chunk cache."""
1657 """Clear the raw chunk cache."""
1654 self._chunkcache = (0, '')
1658 self._chunkcache = (0, '')
1655
1659
1656 def deltaparent(self, rev):
1660 def deltaparent(self, rev):
1657 """return deltaparent of the given revision"""
1661 """return deltaparent of the given revision"""
1658 base = self.index[rev][3]
1662 base = self.index[rev][3]
1659 if base == rev:
1663 if base == rev:
1660 return nullrev
1664 return nullrev
1661 elif self._generaldelta:
1665 elif self._generaldelta:
1662 return base
1666 return base
1663 else:
1667 else:
1664 return rev - 1
1668 return rev - 1
1665
1669
1666 def revdiff(self, rev1, rev2):
1670 def revdiff(self, rev1, rev2):
1667 """return or calculate a delta between two revisions
1671 """return or calculate a delta between two revisions
1668
1672
1669 The delta calculated is in binary form and is intended to be written to
1673 The delta calculated is in binary form and is intended to be written to
1670 revlog data directly. So this function needs raw revision data.
1674 revlog data directly. So this function needs raw revision data.
1671 """
1675 """
1672 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1676 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1673 return bytes(self._chunk(rev2))
1677 return bytes(self._chunk(rev2))
1674
1678
1675 return mdiff.textdiff(self.revision(rev1, raw=True),
1679 return mdiff.textdiff(self.revision(rev1, raw=True),
1676 self.revision(rev2, raw=True))
1680 self.revision(rev2, raw=True))
1677
1681
1678 def revision(self, nodeorrev, _df=None, raw=False):
1682 def revision(self, nodeorrev, _df=None, raw=False):
1679 """return an uncompressed revision of a given node or revision
1683 """return an uncompressed revision of a given node or revision
1680 number.
1684 number.
1681
1685
1682 _df - an existing file handle to read from. (internal-only)
1686 _df - an existing file handle to read from. (internal-only)
1683 raw - an optional argument specifying if the revision data is to be
1687 raw - an optional argument specifying if the revision data is to be
1684 treated as raw data when applying flag transforms. 'raw' should be set
1688 treated as raw data when applying flag transforms. 'raw' should be set
1685 to True when generating changegroups or in debug commands.
1689 to True when generating changegroups or in debug commands.
1686 """
1690 """
1687 if isinstance(nodeorrev, int):
1691 if isinstance(nodeorrev, int):
1688 rev = nodeorrev
1692 rev = nodeorrev
1689 node = self.node(rev)
1693 node = self.node(rev)
1690 else:
1694 else:
1691 node = nodeorrev
1695 node = nodeorrev
1692 rev = None
1696 rev = None
1693
1697
1694 cachedrev = None
1698 cachedrev = None
1695 flags = None
1699 flags = None
1696 rawtext = None
1700 rawtext = None
1697 if node == nullid:
1701 if node == nullid:
1698 return ""
1702 return ""
1699 if self._cache:
1703 if self._cache:
1700 if self._cache[0] == node:
1704 if self._cache[0] == node:
1701 # _cache only stores rawtext
1705 # _cache only stores rawtext
1702 if raw:
1706 if raw:
1703 return self._cache[2]
1707 return self._cache[2]
1704 # duplicated, but good for perf
1708 # duplicated, but good for perf
1705 if rev is None:
1709 if rev is None:
1706 rev = self.rev(node)
1710 rev = self.rev(node)
1707 if flags is None:
1711 if flags is None:
1708 flags = self.flags(rev)
1712 flags = self.flags(rev)
1709 # no extra flags set, no flag processor runs, text = rawtext
1713 # no extra flags set, no flag processor runs, text = rawtext
1710 if flags == REVIDX_DEFAULT_FLAGS:
1714 if flags == REVIDX_DEFAULT_FLAGS:
1711 return self._cache[2]
1715 return self._cache[2]
1712 # rawtext is reusable. need to run flag processor
1716 # rawtext is reusable. need to run flag processor
1713 rawtext = self._cache[2]
1717 rawtext = self._cache[2]
1714
1718
1715 cachedrev = self._cache[1]
1719 cachedrev = self._cache[1]
1716
1720
1717 # look up what we need to read
1721 # look up what we need to read
1718 if rawtext is None:
1722 if rawtext is None:
1719 if rev is None:
1723 if rev is None:
1720 rev = self.rev(node)
1724 rev = self.rev(node)
1721
1725
1722 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1726 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1723 if stopped:
1727 if stopped:
1724 rawtext = self._cache[2]
1728 rawtext = self._cache[2]
1725
1729
1726 # drop cache to save memory
1730 # drop cache to save memory
1727 self._cache = None
1731 self._cache = None
1728
1732
1729 bins = self._chunks(chain, df=_df)
1733 bins = self._chunks(chain, df=_df)
1730 if rawtext is None:
1734 if rawtext is None:
1731 rawtext = bytes(bins[0])
1735 rawtext = bytes(bins[0])
1732 bins = bins[1:]
1736 bins = bins[1:]
1733
1737
1734 rawtext = mdiff.patches(rawtext, bins)
1738 rawtext = mdiff.patches(rawtext, bins)
1735 self._cache = (node, rev, rawtext)
1739 self._cache = (node, rev, rawtext)
1736
1740
1737 if flags is None:
1741 if flags is None:
1738 if rev is None:
1742 if rev is None:
1739 rev = self.rev(node)
1743 rev = self.rev(node)
1740 flags = self.flags(rev)
1744 flags = self.flags(rev)
1741
1745
1742 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1746 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1743 if validatehash:
1747 if validatehash:
1744 self.checkhash(text, node, rev=rev)
1748 self.checkhash(text, node, rev=rev)
1745
1749
1746 return text
1750 return text
1747
1751
1748 def hash(self, text, p1, p2):
1752 def hash(self, text, p1, p2):
1749 """Compute a node hash.
1753 """Compute a node hash.
1750
1754
1751 Available as a function so that subclasses can replace the hash
1755 Available as a function so that subclasses can replace the hash
1752 as needed.
1756 as needed.
1753 """
1757 """
1754 return hash(text, p1, p2)
1758 return hash(text, p1, p2)
1755
1759
1756 def _processflags(self, text, flags, operation, raw=False):
1760 def _processflags(self, text, flags, operation, raw=False):
1757 """Inspect revision data flags and applies transforms defined by
1761 """Inspect revision data flags and applies transforms defined by
1758 registered flag processors.
1762 registered flag processors.
1759
1763
1760 ``text`` - the revision data to process
1764 ``text`` - the revision data to process
1761 ``flags`` - the revision flags
1765 ``flags`` - the revision flags
1762 ``operation`` - the operation being performed (read or write)
1766 ``operation`` - the operation being performed (read or write)
1763 ``raw`` - an optional argument describing if the raw transform should be
1767 ``raw`` - an optional argument describing if the raw transform should be
1764 applied.
1768 applied.
1765
1769
1766 This method processes the flags in the order (or reverse order if
1770 This method processes the flags in the order (or reverse order if
1767 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1771 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1768 flag processors registered for present flags. The order of flags defined
1772 flag processors registered for present flags. The order of flags defined
1769 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1773 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1770
1774
1771 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1775 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1772 processed text and ``validatehash`` is a bool indicating whether the
1776 processed text and ``validatehash`` is a bool indicating whether the
1773 returned text should be checked for hash integrity.
1777 returned text should be checked for hash integrity.
1774
1778
1775 Note: If the ``raw`` argument is set, it has precedence over the
1779 Note: If the ``raw`` argument is set, it has precedence over the
1776 operation and will only update the value of ``validatehash``.
1780 operation and will only update the value of ``validatehash``.
1777 """
1781 """
1778 # fast path: no flag processors will run
1782 # fast path: no flag processors will run
1779 if flags == 0:
1783 if flags == 0:
1780 return text, True
1784 return text, True
1781 if not operation in ('read', 'write'):
1785 if not operation in ('read', 'write'):
1782 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1786 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1783 # Check all flags are known.
1787 # Check all flags are known.
1784 if flags & ~REVIDX_KNOWN_FLAGS:
1788 if flags & ~REVIDX_KNOWN_FLAGS:
1785 raise RevlogError(_("incompatible revision flag '%#x'") %
1789 raise RevlogError(_("incompatible revision flag '%#x'") %
1786 (flags & ~REVIDX_KNOWN_FLAGS))
1790 (flags & ~REVIDX_KNOWN_FLAGS))
1787 validatehash = True
1791 validatehash = True
1788 # Depending on the operation (read or write), the order might be
1792 # Depending on the operation (read or write), the order might be
1789 # reversed due to non-commutative transforms.
1793 # reversed due to non-commutative transforms.
1790 orderedflags = REVIDX_FLAGS_ORDER
1794 orderedflags = REVIDX_FLAGS_ORDER
1791 if operation == 'write':
1795 if operation == 'write':
1792 orderedflags = reversed(orderedflags)
1796 orderedflags = reversed(orderedflags)
1793
1797
1794 for flag in orderedflags:
1798 for flag in orderedflags:
1795 # If a flagprocessor has been registered for a known flag, apply the
1799 # If a flagprocessor has been registered for a known flag, apply the
1796 # related operation transform and update result tuple.
1800 # related operation transform and update result tuple.
1797 if flag & flags:
1801 if flag & flags:
1798 vhash = True
1802 vhash = True
1799
1803
1800 if flag not in _flagprocessors:
1804 if flag not in _flagprocessors:
1801 message = _("missing processor for flag '%#x'") % (flag)
1805 message = _("missing processor for flag '%#x'") % (flag)
1802 raise RevlogError(message)
1806 raise RevlogError(message)
1803
1807
1804 processor = _flagprocessors[flag]
1808 processor = _flagprocessors[flag]
1805 if processor is not None:
1809 if processor is not None:
1806 readtransform, writetransform, rawtransform = processor
1810 readtransform, writetransform, rawtransform = processor
1807
1811
1808 if raw:
1812 if raw:
1809 vhash = rawtransform(self, text)
1813 vhash = rawtransform(self, text)
1810 elif operation == 'read':
1814 elif operation == 'read':
1811 text, vhash = readtransform(self, text)
1815 text, vhash = readtransform(self, text)
1812 else: # write operation
1816 else: # write operation
1813 text, vhash = writetransform(self, text)
1817 text, vhash = writetransform(self, text)
1814 validatehash = validatehash and vhash
1818 validatehash = validatehash and vhash
1815
1819
1816 return text, validatehash
1820 return text, validatehash
1817
1821
1818 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1822 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1819 """Check node hash integrity.
1823 """Check node hash integrity.
1820
1824
1821 Available as a function so that subclasses can extend hash mismatch
1825 Available as a function so that subclasses can extend hash mismatch
1822 behaviors as needed.
1826 behaviors as needed.
1823 """
1827 """
1824 if p1 is None and p2 is None:
1828 if p1 is None and p2 is None:
1825 p1, p2 = self.parents(node)
1829 p1, p2 = self.parents(node)
1826 if node != self.hash(text, p1, p2):
1830 if node != self.hash(text, p1, p2):
1827 revornode = rev
1831 revornode = rev
1828 if revornode is None:
1832 if revornode is None:
1829 revornode = templatefilters.short(hex(node))
1833 revornode = templatefilters.short(hex(node))
1830 raise RevlogError(_("integrity check failed on %s:%s")
1834 raise RevlogError(_("integrity check failed on %s:%s")
1831 % (self.indexfile, pycompat.bytestr(revornode)))
1835 % (self.indexfile, pycompat.bytestr(revornode)))
1832
1836
1833 def checkinlinesize(self, tr, fp=None):
1837 def checkinlinesize(self, tr, fp=None):
1834 """Check if the revlog is too big for inline and convert if so.
1838 """Check if the revlog is too big for inline and convert if so.
1835
1839
1836 This should be called after revisions are added to the revlog. If the
1840 This should be called after revisions are added to the revlog. If the
1837 revlog has grown too large to be an inline revlog, it will convert it
1841 revlog has grown too large to be an inline revlog, it will convert it
1838 to use multiple index and data files.
1842 to use multiple index and data files.
1839 """
1843 """
1840 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1844 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1841 return
1845 return
1842
1846
1843 trinfo = tr.find(self.indexfile)
1847 trinfo = tr.find(self.indexfile)
1844 if trinfo is None:
1848 if trinfo is None:
1845 raise RevlogError(_("%s not found in the transaction")
1849 raise RevlogError(_("%s not found in the transaction")
1846 % self.indexfile)
1850 % self.indexfile)
1847
1851
1848 trindex = trinfo[2]
1852 trindex = trinfo[2]
1849 if trindex is not None:
1853 if trindex is not None:
1850 dataoff = self.start(trindex)
1854 dataoff = self.start(trindex)
1851 else:
1855 else:
1852 # revlog was stripped at start of transaction, use all leftover data
1856 # revlog was stripped at start of transaction, use all leftover data
1853 trindex = len(self) - 1
1857 trindex = len(self) - 1
1854 dataoff = self.end(-2)
1858 dataoff = self.end(-2)
1855
1859
1856 tr.add(self.datafile, dataoff)
1860 tr.add(self.datafile, dataoff)
1857
1861
1858 if fp:
1862 if fp:
1859 fp.flush()
1863 fp.flush()
1860 fp.close()
1864 fp.close()
1861
1865
1862 with self._datafp('w') as df:
1866 with self._datafp('w') as df:
1863 for r in self:
1867 for r in self:
1864 df.write(self._getsegmentforrevs(r, r)[1])
1868 df.write(self._getsegmentforrevs(r, r)[1])
1865
1869
1866 with self._indexfp('w') as fp:
1870 with self._indexfp('w') as fp:
1867 self.version &= ~FLAG_INLINE_DATA
1871 self.version &= ~FLAG_INLINE_DATA
1868 self._inline = False
1872 self._inline = False
1869 io = self._io
1873 io = self._io
1870 for i in self:
1874 for i in self:
1871 e = io.packentry(self.index[i], self.node, self.version, i)
1875 e = io.packentry(self.index[i], self.node, self.version, i)
1872 fp.write(e)
1876 fp.write(e)
1873
1877
1874 # the temp file replace the real index when we exit the context
1878 # the temp file replace the real index when we exit the context
1875 # manager
1879 # manager
1876
1880
1877 tr.replace(self.indexfile, trindex * self._io.size)
1881 tr.replace(self.indexfile, trindex * self._io.size)
1878 self._chunkclear()
1882 self._chunkclear()
1879
1883
1880 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1881 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1882 """add a revision to the log
1886 """add a revision to the log
1883
1887
1884 text - the revision data to add
1888 text - the revision data to add
1885 transaction - the transaction object used for rollback
1889 transaction - the transaction object used for rollback
1886 link - the linkrev data to add
1890 link - the linkrev data to add
1887 p1, p2 - the parent nodeids of the revision
1891 p1, p2 - the parent nodeids of the revision
1888 cachedelta - an optional precomputed delta
1892 cachedelta - an optional precomputed delta
1889 node - nodeid of revision; typically node is not specified, and it is
1893 node - nodeid of revision; typically node is not specified, and it is
1890 computed by default as hash(text, p1, p2), however subclasses might
1894 computed by default as hash(text, p1, p2), however subclasses might
1891 use different hashing method (and override checkhash() in such case)
1895 use different hashing method (and override checkhash() in such case)
1892 flags - the known flags to set on the revision
1896 flags - the known flags to set on the revision
1893 deltacomputer - an optional _deltacomputer instance shared between
1897 deltacomputer - an optional _deltacomputer instance shared between
1894 multiple calls
1898 multiple calls
1895 """
1899 """
1896 if link == nullrev:
1900 if link == nullrev:
1897 raise RevlogError(_("attempted to add linkrev -1 to %s")
1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1898 % self.indexfile)
1902 % self.indexfile)
1899
1903
1900 if flags:
1904 if flags:
1901 node = node or self.hash(text, p1, p2)
1905 node = node or self.hash(text, p1, p2)
1902
1906
1903 rawtext, validatehash = self._processflags(text, flags, 'write')
1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1904
1908
1905 # If the flag processor modifies the revision data, ignore any provided
1909 # If the flag processor modifies the revision data, ignore any provided
1906 # cachedelta.
1910 # cachedelta.
1907 if rawtext != text:
1911 if rawtext != text:
1908 cachedelta = None
1912 cachedelta = None
1909
1913
1910 if len(rawtext) > _maxentrysize:
1914 if len(rawtext) > _maxentrysize:
1911 raise RevlogError(
1915 raise RevlogError(
1912 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1913 % (self.indexfile, len(rawtext)))
1917 % (self.indexfile, len(rawtext)))
1914
1918
1915 node = node or self.hash(rawtext, p1, p2)
1919 node = node or self.hash(rawtext, p1, p2)
1916 if node in self.nodemap:
1920 if node in self.nodemap:
1917 return node
1921 return node
1918
1922
1919 if validatehash:
1923 if validatehash:
1920 self.checkhash(rawtext, node, p1=p1, p2=p2)
1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1921
1925
1922 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1923 flags, cachedelta=cachedelta,
1927 flags, cachedelta=cachedelta,
1924 deltacomputer=deltacomputer)
1928 deltacomputer=deltacomputer)
1925
1929
1926 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1927 cachedelta=None, deltacomputer=None):
1931 cachedelta=None, deltacomputer=None):
1928 """add a raw revision with known flags, node and parents
1932 """add a raw revision with known flags, node and parents
1929 useful when reusing a revision not stored in this revlog (ex: received
1933 useful when reusing a revision not stored in this revlog (ex: received
1930 over wire, or read from an external bundle).
1934 over wire, or read from an external bundle).
1931 """
1935 """
1932 dfh = None
1936 dfh = None
1933 if not self._inline:
1937 if not self._inline:
1934 dfh = self._datafp("a+")
1938 dfh = self._datafp("a+")
1935 ifh = self._indexfp("a+")
1939 ifh = self._indexfp("a+")
1936 try:
1940 try:
1937 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1938 flags, cachedelta, ifh, dfh,
1942 flags, cachedelta, ifh, dfh,
1939 deltacomputer=deltacomputer)
1943 deltacomputer=deltacomputer)
1940 finally:
1944 finally:
1941 if dfh:
1945 if dfh:
1942 dfh.close()
1946 dfh.close()
1943 ifh.close()
1947 ifh.close()
1944
1948
1945 def compress(self, data):
1949 def compress(self, data):
1946 """Generate a possibly-compressed representation of data."""
1950 """Generate a possibly-compressed representation of data."""
1947 if not data:
1951 if not data:
1948 return '', data
1952 return '', data
1949
1953
1950 compressed = self._compressor.compress(data)
1954 compressed = self._compressor.compress(data)
1951
1955
1952 if compressed:
1956 if compressed:
1953 # The revlog compressor added the header in the returned data.
1957 # The revlog compressor added the header in the returned data.
1954 return '', compressed
1958 return '', compressed
1955
1959
1956 if data[0:1] == '\0':
1960 if data[0:1] == '\0':
1957 return '', data
1961 return '', data
1958 return 'u', data
1962 return 'u', data
1959
1963
1960 def decompress(self, data):
1964 def decompress(self, data):
1961 """Decompress a revlog chunk.
1965 """Decompress a revlog chunk.
1962
1966
1963 The chunk is expected to begin with a header identifying the
1967 The chunk is expected to begin with a header identifying the
1964 format type so it can be routed to an appropriate decompressor.
1968 format type so it can be routed to an appropriate decompressor.
1965 """
1969 """
1966 if not data:
1970 if not data:
1967 return data
1971 return data
1968
1972
1969 # Revlogs are read much more frequently than they are written and many
1973 # Revlogs are read much more frequently than they are written and many
1970 # chunks only take microseconds to decompress, so performance is
1974 # chunks only take microseconds to decompress, so performance is
1971 # important here.
1975 # important here.
1972 #
1976 #
1973 # We can make a few assumptions about revlogs:
1977 # We can make a few assumptions about revlogs:
1974 #
1978 #
1975 # 1) the majority of chunks will be compressed (as opposed to inline
1979 # 1) the majority of chunks will be compressed (as opposed to inline
1976 # raw data).
1980 # raw data).
1977 # 2) decompressing *any* data will likely by at least 10x slower than
1981 # 2) decompressing *any* data will likely by at least 10x slower than
1978 # returning raw inline data.
1982 # returning raw inline data.
1979 # 3) we want to prioritize common and officially supported compression
1983 # 3) we want to prioritize common and officially supported compression
1980 # engines
1984 # engines
1981 #
1985 #
1982 # It follows that we want to optimize for "decompress compressed data
1986 # It follows that we want to optimize for "decompress compressed data
1983 # when encoded with common and officially supported compression engines"
1987 # when encoded with common and officially supported compression engines"
1984 # case over "raw data" and "data encoded by less common or non-official
1988 # case over "raw data" and "data encoded by less common or non-official
1985 # compression engines." That is why we have the inline lookup first
1989 # compression engines." That is why we have the inline lookup first
1986 # followed by the compengines lookup.
1990 # followed by the compengines lookup.
1987 #
1991 #
1988 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1989 # compressed chunks. And this matters for changelog and manifest reads.
1993 # compressed chunks. And this matters for changelog and manifest reads.
1990 t = data[0:1]
1994 t = data[0:1]
1991
1995
1992 if t == 'x':
1996 if t == 'x':
1993 try:
1997 try:
1994 return _zlibdecompress(data)
1998 return _zlibdecompress(data)
1995 except zlib.error as e:
1999 except zlib.error as e:
1996 raise RevlogError(_('revlog decompress error: %s') % str(e))
2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
1997 # '\0' is more common than 'u' so it goes first.
2001 # '\0' is more common than 'u' so it goes first.
1998 elif t == '\0':
2002 elif t == '\0':
1999 return data
2003 return data
2000 elif t == 'u':
2004 elif t == 'u':
2001 return util.buffer(data, 1)
2005 return util.buffer(data, 1)
2002
2006
2003 try:
2007 try:
2004 compressor = self._decompressors[t]
2008 compressor = self._decompressors[t]
2005 except KeyError:
2009 except KeyError:
2006 try:
2010 try:
2007 engine = util.compengines.forrevlogheader(t)
2011 engine = util.compengines.forrevlogheader(t)
2008 compressor = engine.revlogcompressor()
2012 compressor = engine.revlogcompressor()
2009 self._decompressors[t] = compressor
2013 self._decompressors[t] = compressor
2010 except KeyError:
2014 except KeyError:
2011 raise RevlogError(_('unknown compression type %r') % t)
2015 raise RevlogError(_('unknown compression type %r') % t)
2012
2016
2013 return compressor.decompress(data)
2017 return compressor.decompress(data)
2014
2018
2015 def _isgooddeltainfo(self, d, textlen):
2019 def _isgooddeltainfo(self, d, textlen):
2016 """Returns True if the given delta is good. Good means that it is within
2020 """Returns True if the given delta is good. Good means that it is within
2017 the disk span, disk size, and chain length bounds that we know to be
2021 the disk span, disk size, and chain length bounds that we know to be
2018 performant."""
2022 performant."""
2019 if d is None:
2023 if d is None:
2020 return False
2024 return False
2021
2025
2022 # - 'd.distance' is the distance from the base revision -- bounding it
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2023 # limits the amount of I/O we need to do.
2027 # limits the amount of I/O we need to do.
2024 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2025 # need to apply -- bounding it limits the amount of CPU we consume.
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2026
2030
2027 defaultmax = textlen * 4
2031 defaultmax = textlen * 4
2028 maxdist = self._maxdeltachainspan
2032 maxdist = self._maxdeltachainspan
2029 if not maxdist:
2033 if not maxdist:
2030 maxdist = d.distance # ensure the conditional pass
2034 maxdist = d.distance # ensure the conditional pass
2031 maxdist = max(maxdist, defaultmax)
2035 maxdist = max(maxdist, defaultmax)
2032 if (d.distance > maxdist or d.deltalen > textlen or
2036 if (d.distance > maxdist or d.deltalen > textlen or
2033 d.compresseddeltalen > textlen * 2 or
2037 d.compresseddeltalen > textlen * 2 or
2034 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2035 return False
2039 return False
2036
2040
2037 return True
2041 return True
2038
2042
2039 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2040 cachedelta, ifh, dfh, alwayscache=False,
2044 cachedelta, ifh, dfh, alwayscache=False,
2041 deltacomputer=None):
2045 deltacomputer=None):
2042 """internal function to add revisions to the log
2046 """internal function to add revisions to the log
2043
2047
2044 see addrevision for argument descriptions.
2048 see addrevision for argument descriptions.
2045
2049
2046 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2047
2051
2048 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2049 be used.
2053 be used.
2050
2054
2051 invariants:
2055 invariants:
2052 - rawtext is optional (can be None); if not set, cachedelta must be set.
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2053 if both are set, they must correspond to each other.
2057 if both are set, they must correspond to each other.
2054 """
2058 """
2055 if node == nullid:
2059 if node == nullid:
2056 raise RevlogError(_("%s: attempt to add null revision") %
2060 raise RevlogError(_("%s: attempt to add null revision") %
2057 (self.indexfile))
2061 (self.indexfile))
2058 if node == wdirid:
2062 if node == wdirid:
2059 raise RevlogError(_("%s: attempt to add wdir revision") %
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2060 (self.indexfile))
2064 (self.indexfile))
2061
2065
2062 if self._inline:
2066 if self._inline:
2063 fh = ifh
2067 fh = ifh
2064 else:
2068 else:
2065 fh = dfh
2069 fh = dfh
2066
2070
2067 btext = [rawtext]
2071 btext = [rawtext]
2068
2072
2069 curr = len(self)
2073 curr = len(self)
2070 prev = curr - 1
2074 prev = curr - 1
2071 offset = self.end(prev)
2075 offset = self.end(prev)
2072 p1r, p2r = self.rev(p1), self.rev(p2)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2073
2077
2074 # full versions are inserted when the needed deltas
2078 # full versions are inserted when the needed deltas
2075 # become comparable to the uncompressed text
2079 # become comparable to the uncompressed text
2076 if rawtext is None:
2080 if rawtext is None:
2077 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2078 cachedelta[1])
2082 cachedelta[1])
2079 else:
2083 else:
2080 textlen = len(rawtext)
2084 textlen = len(rawtext)
2081
2085
2082 if deltacomputer is None:
2086 if deltacomputer is None:
2083 deltacomputer = _deltacomputer(self)
2087 deltacomputer = _deltacomputer(self)
2084
2088
2085 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2086 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2087
2091
2088 if deltainfo is not None:
2092 if deltainfo is not None:
2089 base = deltainfo.base
2093 base = deltainfo.base
2090 chainbase = deltainfo.chainbase
2094 chainbase = deltainfo.chainbase
2091 data = deltainfo.data
2095 data = deltainfo.data
2092 l = deltainfo.deltalen
2096 l = deltainfo.deltalen
2093 else:
2097 else:
2094 rawtext = deltacomputer.buildtext(revinfo, fh)
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2095 data = self.compress(rawtext)
2099 data = self.compress(rawtext)
2096 l = len(data[1]) + len(data[0])
2100 l = len(data[1]) + len(data[0])
2097 base = chainbase = curr
2101 base = chainbase = curr
2098
2102
2099 e = (offset_type(offset, flags), l, textlen,
2103 e = (offset_type(offset, flags), l, textlen,
2100 base, link, p1r, p2r, node)
2104 base, link, p1r, p2r, node)
2101 self.index.insert(-1, e)
2105 self.index.insert(-1, e)
2102 self.nodemap[node] = curr
2106 self.nodemap[node] = curr
2103
2107
2104 entry = self._io.packentry(e, self.node, self.version, curr)
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2105 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2106
2110
2107 if alwayscache and rawtext is None:
2111 if alwayscache and rawtext is None:
2108 rawtext = deltacomputer._buildtext(revinfo, fh)
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2109
2113
2110 if type(rawtext) == bytes: # only accept immutable objects
2114 if type(rawtext) == bytes: # only accept immutable objects
2111 self._cache = (node, curr, rawtext)
2115 self._cache = (node, curr, rawtext)
2112 self._chainbasecache[curr] = chainbase
2116 self._chainbasecache[curr] = chainbase
2113 return node
2117 return node
2114
2118
2115 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2116 # Files opened in a+ mode have inconsistent behavior on various
2120 # Files opened in a+ mode have inconsistent behavior on various
2117 # platforms. Windows requires that a file positioning call be made
2121 # platforms. Windows requires that a file positioning call be made
2118 # when the file handle transitions between reads and writes. See
2122 # when the file handle transitions between reads and writes. See
2119 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2120 # platforms, Python or the platform itself can be buggy. Some versions
2124 # platforms, Python or the platform itself can be buggy. Some versions
2121 # of Solaris have been observed to not append at the end of the file
2125 # of Solaris have been observed to not append at the end of the file
2122 # if the file was seeked to before the end. See issue4943 for more.
2126 # if the file was seeked to before the end. See issue4943 for more.
2123 #
2127 #
2124 # We work around this issue by inserting a seek() before writing.
2128 # We work around this issue by inserting a seek() before writing.
2125 # Note: This is likely not necessary on Python 3.
2129 # Note: This is likely not necessary on Python 3.
2126 ifh.seek(0, os.SEEK_END)
2130 ifh.seek(0, os.SEEK_END)
2127 if dfh:
2131 if dfh:
2128 dfh.seek(0, os.SEEK_END)
2132 dfh.seek(0, os.SEEK_END)
2129
2133
2130 curr = len(self) - 1
2134 curr = len(self) - 1
2131 if not self._inline:
2135 if not self._inline:
2132 transaction.add(self.datafile, offset)
2136 transaction.add(self.datafile, offset)
2133 transaction.add(self.indexfile, curr * len(entry))
2137 transaction.add(self.indexfile, curr * len(entry))
2134 if data[0]:
2138 if data[0]:
2135 dfh.write(data[0])
2139 dfh.write(data[0])
2136 dfh.write(data[1])
2140 dfh.write(data[1])
2137 ifh.write(entry)
2141 ifh.write(entry)
2138 else:
2142 else:
2139 offset += curr * self._io.size
2143 offset += curr * self._io.size
2140 transaction.add(self.indexfile, offset, curr)
2144 transaction.add(self.indexfile, offset, curr)
2141 ifh.write(entry)
2145 ifh.write(entry)
2142 ifh.write(data[0])
2146 ifh.write(data[0])
2143 ifh.write(data[1])
2147 ifh.write(data[1])
2144 self.checkinlinesize(transaction, ifh)
2148 self.checkinlinesize(transaction, ifh)
2145
2149
2146 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2147 """
2151 """
2148 add a delta group
2152 add a delta group
2149
2153
2150 given a set of deltas, add them to the revision log. the
2154 given a set of deltas, add them to the revision log. the
2151 first delta is against its parent, which should be in our
2155 first delta is against its parent, which should be in our
2152 log, the rest are against the previous delta.
2156 log, the rest are against the previous delta.
2153
2157
2154 If ``addrevisioncb`` is defined, it will be called with arguments of
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2155 this revlog and the node that was added.
2159 this revlog and the node that was added.
2156 """
2160 """
2157
2161
2158 nodes = []
2162 nodes = []
2159
2163
2160 r = len(self)
2164 r = len(self)
2161 end = 0
2165 end = 0
2162 if r:
2166 if r:
2163 end = self.end(r - 1)
2167 end = self.end(r - 1)
2164 ifh = self._indexfp("a+")
2168 ifh = self._indexfp("a+")
2165 isize = r * self._io.size
2169 isize = r * self._io.size
2166 if self._inline:
2170 if self._inline:
2167 transaction.add(self.indexfile, end + isize, r)
2171 transaction.add(self.indexfile, end + isize, r)
2168 dfh = None
2172 dfh = None
2169 else:
2173 else:
2170 transaction.add(self.indexfile, isize, r)
2174 transaction.add(self.indexfile, isize, r)
2171 transaction.add(self.datafile, end)
2175 transaction.add(self.datafile, end)
2172 dfh = self._datafp("a+")
2176 dfh = self._datafp("a+")
2173 def flush():
2177 def flush():
2174 if dfh:
2178 if dfh:
2175 dfh.flush()
2179 dfh.flush()
2176 ifh.flush()
2180 ifh.flush()
2177 try:
2181 try:
2178 deltacomputer = _deltacomputer(self)
2182 deltacomputer = _deltacomputer(self)
2179 # loop through our set of deltas
2183 # loop through our set of deltas
2180 for data in deltas:
2184 for data in deltas:
2181 node, p1, p2, linknode, deltabase, delta, flags = data
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2182 link = linkmapper(linknode)
2186 link = linkmapper(linknode)
2183 flags = flags or REVIDX_DEFAULT_FLAGS
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2184
2188
2185 nodes.append(node)
2189 nodes.append(node)
2186
2190
2187 if node in self.nodemap:
2191 if node in self.nodemap:
2188 # this can happen if two branches make the same change
2192 # this can happen if two branches make the same change
2189 continue
2193 continue
2190
2194
2191 for p in (p1, p2):
2195 for p in (p1, p2):
2192 if p not in self.nodemap:
2196 if p not in self.nodemap:
2193 raise LookupError(p, self.indexfile,
2197 raise LookupError(p, self.indexfile,
2194 _('unknown parent'))
2198 _('unknown parent'))
2195
2199
2196 if deltabase not in self.nodemap:
2200 if deltabase not in self.nodemap:
2197 raise LookupError(deltabase, self.indexfile,
2201 raise LookupError(deltabase, self.indexfile,
2198 _('unknown delta base'))
2202 _('unknown delta base'))
2199
2203
2200 baserev = self.rev(deltabase)
2204 baserev = self.rev(deltabase)
2201
2205
2202 if baserev != nullrev and self.iscensored(baserev):
2206 if baserev != nullrev and self.iscensored(baserev):
2203 # if base is censored, delta must be full replacement in a
2207 # if base is censored, delta must be full replacement in a
2204 # single patch operation
2208 # single patch operation
2205 hlen = struct.calcsize(">lll")
2209 hlen = struct.calcsize(">lll")
2206 oldlen = self.rawsize(baserev)
2210 oldlen = self.rawsize(baserev)
2207 newlen = len(delta) - hlen
2211 newlen = len(delta) - hlen
2208 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2209 raise error.CensoredBaseError(self.indexfile,
2213 raise error.CensoredBaseError(self.indexfile,
2210 self.node(baserev))
2214 self.node(baserev))
2211
2215
2212 if not flags and self._peek_iscensored(baserev, delta, flush):
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2213 flags |= REVIDX_ISCENSORED
2217 flags |= REVIDX_ISCENSORED
2214
2218
2215 # We assume consumers of addrevisioncb will want to retrieve
2219 # We assume consumers of addrevisioncb will want to retrieve
2216 # the added revision, which will require a call to
2220 # the added revision, which will require a call to
2217 # revision(). revision() will fast path if there is a cache
2221 # revision(). revision() will fast path if there is a cache
2218 # hit. So, we tell _addrevision() to always cache in this case.
2222 # hit. So, we tell _addrevision() to always cache in this case.
2219 # We're only using addgroup() in the context of changegroup
2223 # We're only using addgroup() in the context of changegroup
2220 # generation so the revision data can always be handled as raw
2224 # generation so the revision data can always be handled as raw
2221 # by the flagprocessor.
2225 # by the flagprocessor.
2222 self._addrevision(node, None, transaction, link,
2226 self._addrevision(node, None, transaction, link,
2223 p1, p2, flags, (baserev, delta),
2227 p1, p2, flags, (baserev, delta),
2224 ifh, dfh,
2228 ifh, dfh,
2225 alwayscache=bool(addrevisioncb),
2229 alwayscache=bool(addrevisioncb),
2226 deltacomputer=deltacomputer)
2230 deltacomputer=deltacomputer)
2227
2231
2228 if addrevisioncb:
2232 if addrevisioncb:
2229 addrevisioncb(self, node)
2233 addrevisioncb(self, node)
2230
2234
2231 if not dfh and not self._inline:
2235 if not dfh and not self._inline:
2232 # addrevision switched from inline to conventional
2236 # addrevision switched from inline to conventional
2233 # reopen the index
2237 # reopen the index
2234 ifh.close()
2238 ifh.close()
2235 dfh = self._datafp("a+")
2239 dfh = self._datafp("a+")
2236 ifh = self._indexfp("a+")
2240 ifh = self._indexfp("a+")
2237 finally:
2241 finally:
2238 if dfh:
2242 if dfh:
2239 dfh.close()
2243 dfh.close()
2240 ifh.close()
2244 ifh.close()
2241
2245
2242 return nodes
2246 return nodes
2243
2247
2244 def iscensored(self, rev):
2248 def iscensored(self, rev):
2245 """Check if a file revision is censored."""
2249 """Check if a file revision is censored."""
2246 return False
2250 return False
2247
2251
2248 def _peek_iscensored(self, baserev, delta, flush):
2252 def _peek_iscensored(self, baserev, delta, flush):
2249 """Quickly check if a delta produces a censored revision."""
2253 """Quickly check if a delta produces a censored revision."""
2250 return False
2254 return False
2251
2255
2252 def getstrippoint(self, minlink):
2256 def getstrippoint(self, minlink):
2253 """find the minimum rev that must be stripped to strip the linkrev
2257 """find the minimum rev that must be stripped to strip the linkrev
2254
2258
2255 Returns a tuple containing the minimum rev and a set of all revs that
2259 Returns a tuple containing the minimum rev and a set of all revs that
2256 have linkrevs that will be broken by this strip.
2260 have linkrevs that will be broken by this strip.
2257 """
2261 """
2258 brokenrevs = set()
2262 brokenrevs = set()
2259 strippoint = len(self)
2263 strippoint = len(self)
2260
2264
2261 heads = {}
2265 heads = {}
2262 futurelargelinkrevs = set()
2266 futurelargelinkrevs = set()
2263 for head in self.headrevs():
2267 for head in self.headrevs():
2264 headlinkrev = self.linkrev(head)
2268 headlinkrev = self.linkrev(head)
2265 heads[head] = headlinkrev
2269 heads[head] = headlinkrev
2266 if headlinkrev >= minlink:
2270 if headlinkrev >= minlink:
2267 futurelargelinkrevs.add(headlinkrev)
2271 futurelargelinkrevs.add(headlinkrev)
2268
2272
2269 # This algorithm involves walking down the rev graph, starting at the
2273 # This algorithm involves walking down the rev graph, starting at the
2270 # heads. Since the revs are topologically sorted according to linkrev,
2274 # heads. Since the revs are topologically sorted according to linkrev,
2271 # once all head linkrevs are below the minlink, we know there are
2275 # once all head linkrevs are below the minlink, we know there are
2272 # no more revs that could have a linkrev greater than minlink.
2276 # no more revs that could have a linkrev greater than minlink.
2273 # So we can stop walking.
2277 # So we can stop walking.
2274 while futurelargelinkrevs:
2278 while futurelargelinkrevs:
2275 strippoint -= 1
2279 strippoint -= 1
2276 linkrev = heads.pop(strippoint)
2280 linkrev = heads.pop(strippoint)
2277
2281
2278 if linkrev < minlink:
2282 if linkrev < minlink:
2279 brokenrevs.add(strippoint)
2283 brokenrevs.add(strippoint)
2280 else:
2284 else:
2281 futurelargelinkrevs.remove(linkrev)
2285 futurelargelinkrevs.remove(linkrev)
2282
2286
2283 for p in self.parentrevs(strippoint):
2287 for p in self.parentrevs(strippoint):
2284 if p != nullrev:
2288 if p != nullrev:
2285 plinkrev = self.linkrev(p)
2289 plinkrev = self.linkrev(p)
2286 heads[p] = plinkrev
2290 heads[p] = plinkrev
2287 if plinkrev >= minlink:
2291 if plinkrev >= minlink:
2288 futurelargelinkrevs.add(plinkrev)
2292 futurelargelinkrevs.add(plinkrev)
2289
2293
2290 return strippoint, brokenrevs
2294 return strippoint, brokenrevs
2291
2295
2292 def strip(self, minlink, transaction):
2296 def strip(self, minlink, transaction):
2293 """truncate the revlog on the first revision with a linkrev >= minlink
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2294
2298
2295 This function is called when we're stripping revision minlink and
2299 This function is called when we're stripping revision minlink and
2296 its descendants from the repository.
2300 its descendants from the repository.
2297
2301
2298 We have to remove all revisions with linkrev >= minlink, because
2302 We have to remove all revisions with linkrev >= minlink, because
2299 the equivalent changelog revisions will be renumbered after the
2303 the equivalent changelog revisions will be renumbered after the
2300 strip.
2304 strip.
2301
2305
2302 So we truncate the revlog on the first of these revisions, and
2306 So we truncate the revlog on the first of these revisions, and
2303 trust that the caller has saved the revisions that shouldn't be
2307 trust that the caller has saved the revisions that shouldn't be
2304 removed and that it'll re-add them after this truncation.
2308 removed and that it'll re-add them after this truncation.
2305 """
2309 """
2306 if len(self) == 0:
2310 if len(self) == 0:
2307 return
2311 return
2308
2312
2309 rev, _ = self.getstrippoint(minlink)
2313 rev, _ = self.getstrippoint(minlink)
2310 if rev == len(self):
2314 if rev == len(self):
2311 return
2315 return
2312
2316
2313 # first truncate the files on disk
2317 # first truncate the files on disk
2314 end = self.start(rev)
2318 end = self.start(rev)
2315 if not self._inline:
2319 if not self._inline:
2316 transaction.add(self.datafile, end)
2320 transaction.add(self.datafile, end)
2317 end = rev * self._io.size
2321 end = rev * self._io.size
2318 else:
2322 else:
2319 end += rev * self._io.size
2323 end += rev * self._io.size
2320
2324
2321 transaction.add(self.indexfile, end)
2325 transaction.add(self.indexfile, end)
2322
2326
2323 # then reset internal state in memory to forget those revisions
2327 # then reset internal state in memory to forget those revisions
2324 self._cache = None
2328 self._cache = None
2325 self._chaininfocache = {}
2329 self._chaininfocache = {}
2326 self._chunkclear()
2330 self._chunkclear()
2327 for x in xrange(rev, len(self)):
2331 for x in xrange(rev, len(self)):
2328 del self.nodemap[self.node(x)]
2332 del self.nodemap[self.node(x)]
2329
2333
2330 del self.index[rev:-1]
2334 del self.index[rev:-1]
2331
2335
2332 def checksize(self):
2336 def checksize(self):
2333 expected = 0
2337 expected = 0
2334 if len(self):
2338 if len(self):
2335 expected = max(0, self.end(len(self) - 1))
2339 expected = max(0, self.end(len(self) - 1))
2336
2340
2337 try:
2341 try:
2338 with self._datafp() as f:
2342 with self._datafp() as f:
2339 f.seek(0, 2)
2343 f.seek(0, 2)
2340 actual = f.tell()
2344 actual = f.tell()
2341 dd = actual - expected
2345 dd = actual - expected
2342 except IOError as inst:
2346 except IOError as inst:
2343 if inst.errno != errno.ENOENT:
2347 if inst.errno != errno.ENOENT:
2344 raise
2348 raise
2345 dd = 0
2349 dd = 0
2346
2350
2347 try:
2351 try:
2348 f = self.opener(self.indexfile)
2352 f = self.opener(self.indexfile)
2349 f.seek(0, 2)
2353 f.seek(0, 2)
2350 actual = f.tell()
2354 actual = f.tell()
2351 f.close()
2355 f.close()
2352 s = self._io.size
2356 s = self._io.size
2353 i = max(0, actual // s)
2357 i = max(0, actual // s)
2354 di = actual - (i * s)
2358 di = actual - (i * s)
2355 if self._inline:
2359 if self._inline:
2356 databytes = 0
2360 databytes = 0
2357 for r in self:
2361 for r in self:
2358 databytes += max(0, self.length(r))
2362 databytes += max(0, self.length(r))
2359 dd = 0
2363 dd = 0
2360 di = actual - len(self) * s - databytes
2364 di = actual - len(self) * s - databytes
2361 except IOError as inst:
2365 except IOError as inst:
2362 if inst.errno != errno.ENOENT:
2366 if inst.errno != errno.ENOENT:
2363 raise
2367 raise
2364 di = 0
2368 di = 0
2365
2369
2366 return (dd, di)
2370 return (dd, di)
2367
2371
2368 def files(self):
2372 def files(self):
2369 res = [self.indexfile]
2373 res = [self.indexfile]
2370 if not self._inline:
2374 if not self._inline:
2371 res.append(self.datafile)
2375 res.append(self.datafile)
2372 return res
2376 return res
2373
2377
2374 DELTAREUSEALWAYS = 'always'
2378 DELTAREUSEALWAYS = 'always'
2375 DELTAREUSESAMEREVS = 'samerevs'
2379 DELTAREUSESAMEREVS = 'samerevs'
2376 DELTAREUSENEVER = 'never'
2380 DELTAREUSENEVER = 'never'
2377
2381
2378 DELTAREUSEFULLADD = 'fulladd'
2382 DELTAREUSEFULLADD = 'fulladd'
2379
2383
2380 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2381
2385
2382 def clone(self, tr, destrevlog, addrevisioncb=None,
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2383 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2384 """Copy this revlog to another, possibly with format changes.
2388 """Copy this revlog to another, possibly with format changes.
2385
2389
2386 The destination revlog will contain the same revisions and nodes.
2390 The destination revlog will contain the same revisions and nodes.
2387 However, it may not be bit-for-bit identical due to e.g. delta encoding
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2388 differences.
2392 differences.
2389
2393
2390 The ``deltareuse`` argument control how deltas from the existing revlog
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2391 are preserved in the destination revlog. The argument can have the
2395 are preserved in the destination revlog. The argument can have the
2392 following values:
2396 following values:
2393
2397
2394 DELTAREUSEALWAYS
2398 DELTAREUSEALWAYS
2395 Deltas will always be reused (if possible), even if the destination
2399 Deltas will always be reused (if possible), even if the destination
2396 revlog would not select the same revisions for the delta. This is the
2400 revlog would not select the same revisions for the delta. This is the
2397 fastest mode of operation.
2401 fastest mode of operation.
2398 DELTAREUSESAMEREVS
2402 DELTAREUSESAMEREVS
2399 Deltas will be reused if the destination revlog would pick the same
2403 Deltas will be reused if the destination revlog would pick the same
2400 revisions for the delta. This mode strikes a balance between speed
2404 revisions for the delta. This mode strikes a balance between speed
2401 and optimization.
2405 and optimization.
2402 DELTAREUSENEVER
2406 DELTAREUSENEVER
2403 Deltas will never be reused. This is the slowest mode of execution.
2407 Deltas will never be reused. This is the slowest mode of execution.
2404 This mode can be used to recompute deltas (e.g. if the diff/delta
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2405 algorithm changes).
2409 algorithm changes).
2406
2410
2407 Delta computation can be slow, so the choice of delta reuse policy can
2411 Delta computation can be slow, so the choice of delta reuse policy can
2408 significantly affect run time.
2412 significantly affect run time.
2409
2413
2410 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2411 two extremes. Deltas will be reused if they are appropriate. But if the
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2412 delta could choose a better revision, it will do so. This means if you
2416 delta could choose a better revision, it will do so. This means if you
2413 are converting a non-generaldelta revlog to a generaldelta revlog,
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2414 deltas will be recomputed if the delta's parent isn't a parent of the
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2415 revision.
2419 revision.
2416
2420
2417 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2418 controls whether to compute deltas against both parents for merges.
2422 controls whether to compute deltas against both parents for merges.
2419 By default, the current default is used.
2423 By default, the current default is used.
2420 """
2424 """
2421 if deltareuse not in self.DELTAREUSEALL:
2425 if deltareuse not in self.DELTAREUSEALL:
2422 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2423
2427
2424 if len(destrevlog):
2428 if len(destrevlog):
2425 raise ValueError(_('destination revlog is not empty'))
2429 raise ValueError(_('destination revlog is not empty'))
2426
2430
2427 if getattr(self, 'filteredrevs', None):
2431 if getattr(self, 'filteredrevs', None):
2428 raise ValueError(_('source revlog has filtered revisions'))
2432 raise ValueError(_('source revlog has filtered revisions'))
2429 if getattr(destrevlog, 'filteredrevs', None):
2433 if getattr(destrevlog, 'filteredrevs', None):
2430 raise ValueError(_('destination revlog has filtered revisions'))
2434 raise ValueError(_('destination revlog has filtered revisions'))
2431
2435
2432 # lazydeltabase controls whether to reuse a cached delta, if possible.
2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2433 oldlazydeltabase = destrevlog._lazydeltabase
2437 oldlazydeltabase = destrevlog._lazydeltabase
2434 oldamd = destrevlog._aggressivemergedeltas
2438 oldamd = destrevlog._aggressivemergedeltas
2435
2439
2436 try:
2440 try:
2437 if deltareuse == self.DELTAREUSEALWAYS:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2438 destrevlog._lazydeltabase = True
2442 destrevlog._lazydeltabase = True
2439 elif deltareuse == self.DELTAREUSESAMEREVS:
2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2440 destrevlog._lazydeltabase = False
2444 destrevlog._lazydeltabase = False
2441
2445
2442 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2443
2447
2444 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2445 self.DELTAREUSESAMEREVS)
2449 self.DELTAREUSESAMEREVS)
2446
2450
2447 deltacomputer = _deltacomputer(destrevlog)
2451 deltacomputer = _deltacomputer(destrevlog)
2448 index = self.index
2452 index = self.index
2449 for rev in self:
2453 for rev in self:
2450 entry = index[rev]
2454 entry = index[rev]
2451
2455
2452 # Some classes override linkrev to take filtered revs into
2456 # Some classes override linkrev to take filtered revs into
2453 # account. Use raw entry from index.
2457 # account. Use raw entry from index.
2454 flags = entry[0] & 0xffff
2458 flags = entry[0] & 0xffff
2455 linkrev = entry[4]
2459 linkrev = entry[4]
2456 p1 = index[entry[5]][7]
2460 p1 = index[entry[5]][7]
2457 p2 = index[entry[6]][7]
2461 p2 = index[entry[6]][7]
2458 node = entry[7]
2462 node = entry[7]
2459
2463
2460 # (Possibly) reuse the delta from the revlog if allowed and
2464 # (Possibly) reuse the delta from the revlog if allowed and
2461 # the revlog chunk is a delta.
2465 # the revlog chunk is a delta.
2462 cachedelta = None
2466 cachedelta = None
2463 rawtext = None
2467 rawtext = None
2464 if populatecachedelta:
2468 if populatecachedelta:
2465 dp = self.deltaparent(rev)
2469 dp = self.deltaparent(rev)
2466 if dp != nullrev:
2470 if dp != nullrev:
2467 cachedelta = (dp, str(self._chunk(rev)))
2471 cachedelta = (dp, str(self._chunk(rev)))
2468
2472
2469 if not cachedelta:
2473 if not cachedelta:
2470 rawtext = self.revision(rev, raw=True)
2474 rawtext = self.revision(rev, raw=True)
2471
2475
2472
2476
2473 if deltareuse == self.DELTAREUSEFULLADD:
2477 if deltareuse == self.DELTAREUSEFULLADD:
2474 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2475 cachedelta=cachedelta,
2479 cachedelta=cachedelta,
2476 node=node, flags=flags,
2480 node=node, flags=flags,
2477 deltacomputer=deltacomputer)
2481 deltacomputer=deltacomputer)
2478 else:
2482 else:
2479 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2480 checkambig=False)
2484 checkambig=False)
2481 dfh = None
2485 dfh = None
2482 if not destrevlog._inline:
2486 if not destrevlog._inline:
2483 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2484 try:
2488 try:
2485 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2486 p2, flags, cachedelta, ifh, dfh,
2490 p2, flags, cachedelta, ifh, dfh,
2487 deltacomputer=deltacomputer)
2491 deltacomputer=deltacomputer)
2488 finally:
2492 finally:
2489 if dfh:
2493 if dfh:
2490 dfh.close()
2494 dfh.close()
2491 ifh.close()
2495 ifh.close()
2492
2496
2493 if addrevisioncb:
2497 if addrevisioncb:
2494 addrevisioncb(self, rev, node)
2498 addrevisioncb(self, rev, node)
2495 finally:
2499 finally:
2496 destrevlog._lazydeltabase = oldlazydeltabase
2500 destrevlog._lazydeltabase = oldlazydeltabase
2497 destrevlog._aggressivemergedeltas = oldamd
2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now