##// END OF EJS Templates
revlog: resolve lfs rawtext to vanilla rawtext before applying delta...
Jun Wu -
r36889:369aadf7 4.5.1 stable
parent child Browse files
Show More
@@ -1,2516 +1,2521
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 # bitmark for flags that could cause rawdata content change
80 # bitmark for flags that could cause rawdata content change
81 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
81 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
82
82
83 # max size of revlog with inline data
83 # max size of revlog with inline data
84 _maxinline = 131072
84 _maxinline = 131072
85 _chunksize = 1048576
85 _chunksize = 1048576
86
86
87 RevlogError = error.RevlogError
87 RevlogError = error.RevlogError
88 LookupError = error.LookupError
88 LookupError = error.LookupError
89 CensoredNodeError = error.CensoredNodeError
89 CensoredNodeError = error.CensoredNodeError
90 ProgrammingError = error.ProgrammingError
90 ProgrammingError = error.ProgrammingError
91
91
92 # Store flag processors (cf. 'addflagprocessor()' to register)
92 # Store flag processors (cf. 'addflagprocessor()' to register)
93 _flagprocessors = {
93 _flagprocessors = {
94 REVIDX_ISCENSORED: None,
94 REVIDX_ISCENSORED: None,
95 }
95 }
96
96
97 def addflagprocessor(flag, processor):
97 def addflagprocessor(flag, processor):
98 """Register a flag processor on a revision data flag.
98 """Register a flag processor on a revision data flag.
99
99
100 Invariant:
100 Invariant:
101 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
101 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
102 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
102 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
103 - Only one flag processor can be registered on a specific flag.
103 - Only one flag processor can be registered on a specific flag.
104 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
104 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
105 following signatures:
105 following signatures:
106 - (read) f(self, rawtext) -> text, bool
106 - (read) f(self, rawtext) -> text, bool
107 - (write) f(self, text) -> rawtext, bool
107 - (write) f(self, text) -> rawtext, bool
108 - (raw) f(self, rawtext) -> bool
108 - (raw) f(self, rawtext) -> bool
109 "text" is presented to the user. "rawtext" is stored in revlog data, not
109 "text" is presented to the user. "rawtext" is stored in revlog data, not
110 directly visible to the user.
110 directly visible to the user.
111 The boolean returned by these transforms is used to determine whether
111 The boolean returned by these transforms is used to determine whether
112 the returned text can be used for hash integrity checking. For example,
112 the returned text can be used for hash integrity checking. For example,
113 if "write" returns False, then "text" is used to generate hash. If
113 if "write" returns False, then "text" is used to generate hash. If
114 "write" returns True, that basically means "rawtext" returned by "write"
114 "write" returns True, that basically means "rawtext" returned by "write"
115 should be used to generate hash. Usually, "write" and "read" return
115 should be used to generate hash. Usually, "write" and "read" return
116 different booleans. And "raw" returns a same boolean as "write".
116 different booleans. And "raw" returns a same boolean as "write".
117
117
118 Note: The 'raw' transform is used for changegroup generation and in some
118 Note: The 'raw' transform is used for changegroup generation and in some
119 debug commands. In this case the transform only indicates whether the
119 debug commands. In this case the transform only indicates whether the
120 contents can be used for hash integrity checks.
120 contents can be used for hash integrity checks.
121 """
121 """
122 if not flag & REVIDX_KNOWN_FLAGS:
122 if not flag & REVIDX_KNOWN_FLAGS:
123 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
123 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag not in REVIDX_FLAGS_ORDER:
125 if flag not in REVIDX_FLAGS_ORDER:
126 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
126 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
127 raise ProgrammingError(msg)
127 raise ProgrammingError(msg)
128 if flag in _flagprocessors:
128 if flag in _flagprocessors:
129 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
129 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
130 raise error.Abort(msg)
130 raise error.Abort(msg)
131 _flagprocessors[flag] = processor
131 _flagprocessors[flag] = processor
132
132
133 def getoffset(q):
133 def getoffset(q):
134 return int(q >> 16)
134 return int(q >> 16)
135
135
136 def gettype(q):
136 def gettype(q):
137 return int(q & 0xFFFF)
137 return int(q & 0xFFFF)
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError('unknown revlog index flags')
141 raise ValueError('unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144 _nullhash = hashlib.sha1(nullid)
144 _nullhash = hashlib.sha1(nullid)
145
145
146 def hash(text, p1, p2):
146 def hash(text, p1, p2):
147 """generate a hash from the given text and its parent hashes
147 """generate a hash from the given text and its parent hashes
148
148
149 This hash combines both the current file contents and its history
149 This hash combines both the current file contents and its history
150 in a manner that makes it easy to distinguish nodes with the same
150 in a manner that makes it easy to distinguish nodes with the same
151 content in the revision graph.
151 content in the revision graph.
152 """
152 """
153 # As of now, if one of the parent node is null, p2 is null
153 # As of now, if one of the parent node is null, p2 is null
154 if p2 == nullid:
154 if p2 == nullid:
155 # deep copy of a hash is faster than creating one
155 # deep copy of a hash is faster than creating one
156 s = _nullhash.copy()
156 s = _nullhash.copy()
157 s.update(p1)
157 s.update(p1)
158 else:
158 else:
159 # none of the parent nodes are nullid
159 # none of the parent nodes are nullid
160 if p1 < p2:
160 if p1 < p2:
161 a = p1
161 a = p1
162 b = p2
162 b = p2
163 else:
163 else:
164 a = p2
164 a = p2
165 b = p1
165 b = p1
166 s = hashlib.sha1(a)
166 s = hashlib.sha1(a)
167 s.update(b)
167 s.update(b)
168 s.update(text)
168 s.update(text)
169 return s.digest()
169 return s.digest()
170
170
171 def _trimchunk(revlog, revs, startidx, endidx=None):
171 def _trimchunk(revlog, revs, startidx, endidx=None):
172 """returns revs[startidx:endidx] without empty trailing revs
172 """returns revs[startidx:endidx] without empty trailing revs
173 """
173 """
174 length = revlog.length
174 length = revlog.length
175
175
176 if endidx is None:
176 if endidx is None:
177 endidx = len(revs)
177 endidx = len(revs)
178
178
179 # Trim empty revs at the end, but never the very first revision of a chain
179 # Trim empty revs at the end, but never the very first revision of a chain
180 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
180 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
181 endidx -= 1
181 endidx -= 1
182
182
183 return revs[startidx:endidx]
183 return revs[startidx:endidx]
184
184
185 def _slicechunk(revlog, revs):
185 def _slicechunk(revlog, revs):
186 """slice revs to reduce the amount of unrelated data to be read from disk.
186 """slice revs to reduce the amount of unrelated data to be read from disk.
187
187
188 ``revs`` is sliced into groups that should be read in one time.
188 ``revs`` is sliced into groups that should be read in one time.
189 Assume that revs are sorted.
189 Assume that revs are sorted.
190 """
190 """
191 start = revlog.start
191 start = revlog.start
192 length = revlog.length
192 length = revlog.length
193
193
194 if len(revs) <= 1:
194 if len(revs) <= 1:
195 yield revs
195 yield revs
196 return
196 return
197
197
198 startbyte = start(revs[0])
198 startbyte = start(revs[0])
199 endbyte = start(revs[-1]) + length(revs[-1])
199 endbyte = start(revs[-1]) + length(revs[-1])
200 readdata = deltachainspan = endbyte - startbyte
200 readdata = deltachainspan = endbyte - startbyte
201
201
202 chainpayload = sum(length(r) for r in revs)
202 chainpayload = sum(length(r) for r in revs)
203
203
204 if deltachainspan:
204 if deltachainspan:
205 density = chainpayload / float(deltachainspan)
205 density = chainpayload / float(deltachainspan)
206 else:
206 else:
207 density = 1.0
207 density = 1.0
208
208
209 # Store the gaps in a heap to have them sorted by decreasing size
209 # Store the gaps in a heap to have them sorted by decreasing size
210 gapsheap = []
210 gapsheap = []
211 heapq.heapify(gapsheap)
211 heapq.heapify(gapsheap)
212 prevend = None
212 prevend = None
213 for i, rev in enumerate(revs):
213 for i, rev in enumerate(revs):
214 revstart = start(rev)
214 revstart = start(rev)
215 revlen = length(rev)
215 revlen = length(rev)
216
216
217 # Skip empty revisions to form larger holes
217 # Skip empty revisions to form larger holes
218 if revlen == 0:
218 if revlen == 0:
219 continue
219 continue
220
220
221 if prevend is not None:
221 if prevend is not None:
222 gapsize = revstart - prevend
222 gapsize = revstart - prevend
223 # only consider holes that are large enough
223 # only consider holes that are large enough
224 if gapsize > revlog._srmingapsize:
224 if gapsize > revlog._srmingapsize:
225 heapq.heappush(gapsheap, (-gapsize, i))
225 heapq.heappush(gapsheap, (-gapsize, i))
226
226
227 prevend = revstart + revlen
227 prevend = revstart + revlen
228
228
229 # Collect the indices of the largest holes until the density is acceptable
229 # Collect the indices of the largest holes until the density is acceptable
230 indicesheap = []
230 indicesheap = []
231 heapq.heapify(indicesheap)
231 heapq.heapify(indicesheap)
232 while gapsheap and density < revlog._srdensitythreshold:
232 while gapsheap and density < revlog._srdensitythreshold:
233 oppgapsize, gapidx = heapq.heappop(gapsheap)
233 oppgapsize, gapidx = heapq.heappop(gapsheap)
234
234
235 heapq.heappush(indicesheap, gapidx)
235 heapq.heappush(indicesheap, gapidx)
236
236
237 # the gap sizes are stored as negatives to be sorted decreasingly
237 # the gap sizes are stored as negatives to be sorted decreasingly
238 # by the heap
238 # by the heap
239 readdata -= (-oppgapsize)
239 readdata -= (-oppgapsize)
240 if readdata > 0:
240 if readdata > 0:
241 density = chainpayload / float(readdata)
241 density = chainpayload / float(readdata)
242 else:
242 else:
243 density = 1.0
243 density = 1.0
244
244
245 # Cut the revs at collected indices
245 # Cut the revs at collected indices
246 previdx = 0
246 previdx = 0
247 while indicesheap:
247 while indicesheap:
248 idx = heapq.heappop(indicesheap)
248 idx = heapq.heappop(indicesheap)
249
249
250 chunk = _trimchunk(revlog, revs, previdx, idx)
250 chunk = _trimchunk(revlog, revs, previdx, idx)
251 if chunk:
251 if chunk:
252 yield chunk
252 yield chunk
253
253
254 previdx = idx
254 previdx = idx
255
255
256 chunk = _trimchunk(revlog, revs, previdx)
256 chunk = _trimchunk(revlog, revs, previdx)
257 if chunk:
257 if chunk:
258 yield chunk
258 yield chunk
259
259
260 @attr.s(slots=True, frozen=True)
260 @attr.s(slots=True, frozen=True)
261 class _deltainfo(object):
261 class _deltainfo(object):
262 distance = attr.ib()
262 distance = attr.ib()
263 deltalen = attr.ib()
263 deltalen = attr.ib()
264 data = attr.ib()
264 data = attr.ib()
265 base = attr.ib()
265 base = attr.ib()
266 chainbase = attr.ib()
266 chainbase = attr.ib()
267 chainlen = attr.ib()
267 chainlen = attr.ib()
268 compresseddeltalen = attr.ib()
268 compresseddeltalen = attr.ib()
269
269
270 class _deltacomputer(object):
270 class _deltacomputer(object):
271 def __init__(self, revlog):
271 def __init__(self, revlog):
272 self.revlog = revlog
272 self.revlog = revlog
273
273
274 def _getcandidaterevs(self, p1, p2, cachedelta):
274 def _getcandidaterevs(self, p1, p2, cachedelta):
275 """
275 """
276 Provides revisions that present an interest to be diffed against,
276 Provides revisions that present an interest to be diffed against,
277 grouped by level of easiness.
277 grouped by level of easiness.
278 """
278 """
279 revlog = self.revlog
279 revlog = self.revlog
280 curr = len(revlog)
280 curr = len(revlog)
281 prev = curr - 1
281 prev = curr - 1
282 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
282 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
283
283
284 # should we try to build a delta?
284 # should we try to build a delta?
285 if prev != nullrev and revlog.storedeltachains:
285 if prev != nullrev and revlog.storedeltachains:
286 tested = set()
286 tested = set()
287 # This condition is true most of the time when processing
287 # This condition is true most of the time when processing
288 # changegroup data into a generaldelta repo. The only time it
288 # changegroup data into a generaldelta repo. The only time it
289 # isn't true is if this is the first revision in a delta chain
289 # isn't true is if this is the first revision in a delta chain
290 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
290 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
291 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
291 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
292 # Assume what we received from the server is a good choice
292 # Assume what we received from the server is a good choice
293 # build delta will reuse the cache
293 # build delta will reuse the cache
294 yield (cachedelta[0],)
294 yield (cachedelta[0],)
295 tested.add(cachedelta[0])
295 tested.add(cachedelta[0])
296
296
297 if revlog._generaldelta:
297 if revlog._generaldelta:
298 # exclude already lazy tested base if any
298 # exclude already lazy tested base if any
299 parents = [p for p in (p1r, p2r)
299 parents = [p for p in (p1r, p2r)
300 if p != nullrev and p not in tested]
300 if p != nullrev and p not in tested]
301 if parents and not revlog._aggressivemergedeltas:
301 if parents and not revlog._aggressivemergedeltas:
302 # Pick whichever parent is closer to us (to minimize the
302 # Pick whichever parent is closer to us (to minimize the
303 # chance of having to build a fulltext).
303 # chance of having to build a fulltext).
304 parents = [max(parents)]
304 parents = [max(parents)]
305 tested.update(parents)
305 tested.update(parents)
306 yield parents
306 yield parents
307
307
308 if prev not in tested:
308 if prev not in tested:
309 # other approach failed try against prev to hopefully save us a
309 # other approach failed try against prev to hopefully save us a
310 # fulltext.
310 # fulltext.
311 yield (prev,)
311 yield (prev,)
312
312
313 def buildtext(self, revinfo, fh):
313 def buildtext(self, revinfo, fh):
314 """Builds a fulltext version of a revision
314 """Builds a fulltext version of a revision
315
315
316 revinfo: _revisioninfo instance that contains all needed info
316 revinfo: _revisioninfo instance that contains all needed info
317 fh: file handle to either the .i or the .d revlog file,
317 fh: file handle to either the .i or the .d revlog file,
318 depending on whether it is inlined or not
318 depending on whether it is inlined or not
319 """
319 """
320 btext = revinfo.btext
320 btext = revinfo.btext
321 if btext[0] is not None:
321 if btext[0] is not None:
322 return btext[0]
322 return btext[0]
323
323
324 revlog = self.revlog
324 revlog = self.revlog
325 cachedelta = revinfo.cachedelta
325 cachedelta = revinfo.cachedelta
326 flags = revinfo.flags
326 flags = revinfo.flags
327 node = revinfo.node
327 node = revinfo.node
328
328
329 baserev = cachedelta[0]
329 baserev = cachedelta[0]
330 delta = cachedelta[1]
330 delta = cachedelta[1]
331 # special case deltas which replace entire base; no need to decode
331 # special case deltas which replace entire base; no need to decode
332 # base revision. this neatly avoids censored bases, which throw when
332 # base revision. this neatly avoids censored bases, which throw when
333 # they're decoded.
333 # they're decoded.
334 hlen = struct.calcsize(">lll")
334 hlen = struct.calcsize(">lll")
335 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
335 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
336 len(delta) - hlen):
336 len(delta) - hlen):
337 btext[0] = delta[hlen:]
337 btext[0] = delta[hlen:]
338 else:
338 else:
339 basetext = revlog.revision(baserev, _df=fh, raw=True)
339 # deltabase is rawtext before changed by flag processors, which is
340 # equivalent to non-raw text
341 basetext = revlog.revision(baserev, _df=fh, raw=False)
340 btext[0] = mdiff.patch(basetext, delta)
342 btext[0] = mdiff.patch(basetext, delta)
341
343
342 try:
344 try:
343 res = revlog._processflags(btext[0], flags, 'read', raw=True)
345 res = revlog._processflags(btext[0], flags, 'read', raw=True)
344 btext[0], validatehash = res
346 btext[0], validatehash = res
345 if validatehash:
347 if validatehash:
346 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
348 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
347 if flags & REVIDX_ISCENSORED:
349 if flags & REVIDX_ISCENSORED:
348 raise RevlogError(_('node %s is not censored') % node)
350 raise RevlogError(_('node %s is not censored') % node)
349 except CensoredNodeError:
351 except CensoredNodeError:
350 # must pass the censored index flag to add censored revisions
352 # must pass the censored index flag to add censored revisions
351 if not flags & REVIDX_ISCENSORED:
353 if not flags & REVIDX_ISCENSORED:
352 raise
354 raise
353 return btext[0]
355 return btext[0]
354
356
355 def _builddeltadiff(self, base, revinfo, fh):
357 def _builddeltadiff(self, base, revinfo, fh):
356 revlog = self.revlog
358 revlog = self.revlog
357 t = self.buildtext(revinfo, fh)
359 t = self.buildtext(revinfo, fh)
358 if revlog.iscensored(base):
360 if revlog.iscensored(base):
359 # deltas based on a censored revision must replace the
361 # deltas based on a censored revision must replace the
360 # full content in one patch, so delta works everywhere
362 # full content in one patch, so delta works everywhere
361 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
363 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
362 delta = header + t
364 delta = header + t
363 else:
365 else:
364 ptext = revlog.revision(base, _df=fh, raw=True)
366 ptext = revlog.revision(base, _df=fh, raw=True)
365 delta = mdiff.textdiff(ptext, t)
367 delta = mdiff.textdiff(ptext, t)
366
368
367 return delta
369 return delta
368
370
369 def _builddeltainfo(self, revinfo, base, fh):
371 def _builddeltainfo(self, revinfo, base, fh):
370 # can we use the cached delta?
372 # can we use the cached delta?
371 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
373 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
372 delta = revinfo.cachedelta[1]
374 delta = revinfo.cachedelta[1]
373 else:
375 else:
374 delta = self._builddeltadiff(base, revinfo, fh)
376 delta = self._builddeltadiff(base, revinfo, fh)
375 revlog = self.revlog
377 revlog = self.revlog
376 header, data = revlog.compress(delta)
378 header, data = revlog.compress(delta)
377 deltalen = len(header) + len(data)
379 deltalen = len(header) + len(data)
378 chainbase = revlog.chainbase(base)
380 chainbase = revlog.chainbase(base)
379 offset = revlog.end(len(revlog) - 1)
381 offset = revlog.end(len(revlog) - 1)
380 dist = deltalen + offset - revlog.start(chainbase)
382 dist = deltalen + offset - revlog.start(chainbase)
381 if revlog._generaldelta:
383 if revlog._generaldelta:
382 deltabase = base
384 deltabase = base
383 else:
385 else:
384 deltabase = chainbase
386 deltabase = chainbase
385 chainlen, compresseddeltalen = revlog._chaininfo(base)
387 chainlen, compresseddeltalen = revlog._chaininfo(base)
386 chainlen += 1
388 chainlen += 1
387 compresseddeltalen += deltalen
389 compresseddeltalen += deltalen
388 return _deltainfo(dist, deltalen, (header, data), deltabase,
390 return _deltainfo(dist, deltalen, (header, data), deltabase,
389 chainbase, chainlen, compresseddeltalen)
391 chainbase, chainlen, compresseddeltalen)
390
392
391 def finddeltainfo(self, revinfo, fh):
393 def finddeltainfo(self, revinfo, fh):
392 """Find an acceptable delta against a candidate revision
394 """Find an acceptable delta against a candidate revision
393
395
394 revinfo: information about the revision (instance of _revisioninfo)
396 revinfo: information about the revision (instance of _revisioninfo)
395 fh: file handle to either the .i or the .d revlog file,
397 fh: file handle to either the .i or the .d revlog file,
396 depending on whether it is inlined or not
398 depending on whether it is inlined or not
397
399
398 Returns the first acceptable candidate revision, as ordered by
400 Returns the first acceptable candidate revision, as ordered by
399 _getcandidaterevs
401 _getcandidaterevs
400 """
402 """
401 cachedelta = revinfo.cachedelta
403 cachedelta = revinfo.cachedelta
402 p1 = revinfo.p1
404 p1 = revinfo.p1
403 p2 = revinfo.p2
405 p2 = revinfo.p2
404 revlog = self.revlog
406 revlog = self.revlog
405
407
406 deltainfo = None
408 deltainfo = None
407 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
409 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
408 nominateddeltas = []
410 nominateddeltas = []
409 for candidaterev in candidaterevs:
411 for candidaterev in candidaterevs:
410 # no delta for rawtext-changing revs (see "candelta" for why)
412 # no delta for rawtext-changing revs (see "candelta" for why)
411 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
413 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
412 continue
414 continue
413 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
415 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
414 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
416 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
415 nominateddeltas.append(candidatedelta)
417 nominateddeltas.append(candidatedelta)
416 if nominateddeltas:
418 if nominateddeltas:
417 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
419 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
418 break
420 break
419
421
420 return deltainfo
422 return deltainfo
421
423
422 @attr.s(slots=True, frozen=True)
424 @attr.s(slots=True, frozen=True)
423 class _revisioninfo(object):
425 class _revisioninfo(object):
424 """Information about a revision that allows building its fulltext
426 """Information about a revision that allows building its fulltext
425 node: expected hash of the revision
427 node: expected hash of the revision
426 p1, p2: parent revs of the revision
428 p1, p2: parent revs of the revision
427 btext: built text cache consisting of a one-element list
429 btext: built text cache consisting of a one-element list
428 cachedelta: (baserev, uncompressed_delta) or None
430 cachedelta: (baserev, uncompressed_delta) or None
429 flags: flags associated to the revision storage
431 flags: flags associated to the revision storage
430
432
431 One of btext[0] or cachedelta must be set.
433 One of btext[0] or cachedelta must be set.
432 """
434 """
433 node = attr.ib()
435 node = attr.ib()
434 p1 = attr.ib()
436 p1 = attr.ib()
435 p2 = attr.ib()
437 p2 = attr.ib()
436 btext = attr.ib()
438 btext = attr.ib()
437 textlen = attr.ib()
439 textlen = attr.ib()
438 cachedelta = attr.ib()
440 cachedelta = attr.ib()
439 flags = attr.ib()
441 flags = attr.ib()
440
442
441 # index v0:
443 # index v0:
442 # 4 bytes: offset
444 # 4 bytes: offset
443 # 4 bytes: compressed length
445 # 4 bytes: compressed length
444 # 4 bytes: base rev
446 # 4 bytes: base rev
445 # 4 bytes: link rev
447 # 4 bytes: link rev
446 # 20 bytes: parent 1 nodeid
448 # 20 bytes: parent 1 nodeid
447 # 20 bytes: parent 2 nodeid
449 # 20 bytes: parent 2 nodeid
448 # 20 bytes: nodeid
450 # 20 bytes: nodeid
449 indexformatv0 = struct.Struct(">4l20s20s20s")
451 indexformatv0 = struct.Struct(">4l20s20s20s")
450 indexformatv0_pack = indexformatv0.pack
452 indexformatv0_pack = indexformatv0.pack
451 indexformatv0_unpack = indexformatv0.unpack
453 indexformatv0_unpack = indexformatv0.unpack
452
454
453 class revlogoldio(object):
455 class revlogoldio(object):
454 def __init__(self):
456 def __init__(self):
455 self.size = indexformatv0.size
457 self.size = indexformatv0.size
456
458
457 def parseindex(self, data, inline):
459 def parseindex(self, data, inline):
458 s = self.size
460 s = self.size
459 index = []
461 index = []
460 nodemap = {nullid: nullrev}
462 nodemap = {nullid: nullrev}
461 n = off = 0
463 n = off = 0
462 l = len(data)
464 l = len(data)
463 while off + s <= l:
465 while off + s <= l:
464 cur = data[off:off + s]
466 cur = data[off:off + s]
465 off += s
467 off += s
466 e = indexformatv0_unpack(cur)
468 e = indexformatv0_unpack(cur)
467 # transform to revlogv1 format
469 # transform to revlogv1 format
468 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
470 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
469 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
471 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
470 index.append(e2)
472 index.append(e2)
471 nodemap[e[6]] = n
473 nodemap[e[6]] = n
472 n += 1
474 n += 1
473
475
474 # add the magic null revision at -1
476 # add the magic null revision at -1
475 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
476
478
477 return index, nodemap, None
479 return index, nodemap, None
478
480
479 def packentry(self, entry, node, version, rev):
481 def packentry(self, entry, node, version, rev):
480 if gettype(entry[0]):
482 if gettype(entry[0]):
481 raise RevlogError(_('index entry flags need revlog version 1'))
483 raise RevlogError(_('index entry flags need revlog version 1'))
482 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
484 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
483 node(entry[5]), node(entry[6]), entry[7])
485 node(entry[5]), node(entry[6]), entry[7])
484 return indexformatv0_pack(*e2)
486 return indexformatv0_pack(*e2)
485
487
486 # index ng:
488 # index ng:
487 # 6 bytes: offset
489 # 6 bytes: offset
488 # 2 bytes: flags
490 # 2 bytes: flags
489 # 4 bytes: compressed length
491 # 4 bytes: compressed length
490 # 4 bytes: uncompressed length
492 # 4 bytes: uncompressed length
491 # 4 bytes: base rev
493 # 4 bytes: base rev
492 # 4 bytes: link rev
494 # 4 bytes: link rev
493 # 4 bytes: parent 1 rev
495 # 4 bytes: parent 1 rev
494 # 4 bytes: parent 2 rev
496 # 4 bytes: parent 2 rev
495 # 32 bytes: nodeid
497 # 32 bytes: nodeid
496 indexformatng = struct.Struct(">Qiiiiii20s12x")
498 indexformatng = struct.Struct(">Qiiiiii20s12x")
497 indexformatng_pack = indexformatng.pack
499 indexformatng_pack = indexformatng.pack
498 versionformat = struct.Struct(">I")
500 versionformat = struct.Struct(">I")
499 versionformat_pack = versionformat.pack
501 versionformat_pack = versionformat.pack
500 versionformat_unpack = versionformat.unpack
502 versionformat_unpack = versionformat.unpack
501
503
502 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
504 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
503 # signed integer)
505 # signed integer)
504 _maxentrysize = 0x7fffffff
506 _maxentrysize = 0x7fffffff
505
507
506 class revlogio(object):
508 class revlogio(object):
507 def __init__(self):
509 def __init__(self):
508 self.size = indexformatng.size
510 self.size = indexformatng.size
509
511
510 def parseindex(self, data, inline):
512 def parseindex(self, data, inline):
511 # call the C implementation to parse the index data
513 # call the C implementation to parse the index data
512 index, cache = parsers.parse_index2(data, inline)
514 index, cache = parsers.parse_index2(data, inline)
513 return index, getattr(index, 'nodemap', None), cache
515 return index, getattr(index, 'nodemap', None), cache
514
516
515 def packentry(self, entry, node, version, rev):
517 def packentry(self, entry, node, version, rev):
516 p = indexformatng_pack(*entry)
518 p = indexformatng_pack(*entry)
517 if rev == 0:
519 if rev == 0:
518 p = versionformat_pack(version) + p[4:]
520 p = versionformat_pack(version) + p[4:]
519 return p
521 return p
520
522
521 class revlog(object):
523 class revlog(object):
522 """
524 """
523 the underlying revision storage object
525 the underlying revision storage object
524
526
525 A revlog consists of two parts, an index and the revision data.
527 A revlog consists of two parts, an index and the revision data.
526
528
527 The index is a file with a fixed record size containing
529 The index is a file with a fixed record size containing
528 information on each revision, including its nodeid (hash), the
530 information on each revision, including its nodeid (hash), the
529 nodeids of its parents, the position and offset of its data within
531 nodeids of its parents, the position and offset of its data within
530 the data file, and the revision it's based on. Finally, each entry
532 the data file, and the revision it's based on. Finally, each entry
531 contains a linkrev entry that can serve as a pointer to external
533 contains a linkrev entry that can serve as a pointer to external
532 data.
534 data.
533
535
534 The revision data itself is a linear collection of data chunks.
536 The revision data itself is a linear collection of data chunks.
535 Each chunk represents a revision and is usually represented as a
537 Each chunk represents a revision and is usually represented as a
536 delta against the previous chunk. To bound lookup time, runs of
538 delta against the previous chunk. To bound lookup time, runs of
537 deltas are limited to about 2 times the length of the original
539 deltas are limited to about 2 times the length of the original
538 version data. This makes retrieval of a version proportional to
540 version data. This makes retrieval of a version proportional to
539 its size, or O(1) relative to the number of revisions.
541 its size, or O(1) relative to the number of revisions.
540
542
541 Both pieces of the revlog are written to in an append-only
543 Both pieces of the revlog are written to in an append-only
542 fashion, which means we never need to rewrite a file to insert or
544 fashion, which means we never need to rewrite a file to insert or
543 remove data, and can use some simple techniques to avoid the need
545 remove data, and can use some simple techniques to avoid the need
544 for locking while reading.
546 for locking while reading.
545
547
546 If checkambig, indexfile is opened with checkambig=True at
548 If checkambig, indexfile is opened with checkambig=True at
547 writing, to avoid file stat ambiguity.
549 writing, to avoid file stat ambiguity.
548
550
549 If mmaplargeindex is True, and an mmapindexthreshold is set, the
551 If mmaplargeindex is True, and an mmapindexthreshold is set, the
550 index will be mmapped rather than read if it is larger than the
552 index will be mmapped rather than read if it is larger than the
551 configured threshold.
553 configured threshold.
552 """
554 """
553 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
555 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
554 mmaplargeindex=False):
556 mmaplargeindex=False):
555 """
557 """
556 create a revlog object
558 create a revlog object
557
559
558 opener is a function that abstracts the file opening operation
560 opener is a function that abstracts the file opening operation
559 and can be used to implement COW semantics or the like.
561 and can be used to implement COW semantics or the like.
560 """
562 """
561 self.indexfile = indexfile
563 self.indexfile = indexfile
562 self.datafile = datafile or (indexfile[:-2] + ".d")
564 self.datafile = datafile or (indexfile[:-2] + ".d")
563 self.opener = opener
565 self.opener = opener
564 # When True, indexfile is opened with checkambig=True at writing, to
566 # When True, indexfile is opened with checkambig=True at writing, to
565 # avoid file stat ambiguity.
567 # avoid file stat ambiguity.
566 self._checkambig = checkambig
568 self._checkambig = checkambig
567 # 3-tuple of (node, rev, text) for a raw revision.
569 # 3-tuple of (node, rev, text) for a raw revision.
568 self._cache = None
570 self._cache = None
569 # Maps rev to chain base rev.
571 # Maps rev to chain base rev.
570 self._chainbasecache = util.lrucachedict(100)
572 self._chainbasecache = util.lrucachedict(100)
571 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
573 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
572 self._chunkcache = (0, '')
574 self._chunkcache = (0, '')
573 # How much data to read and cache into the raw revlog data cache.
575 # How much data to read and cache into the raw revlog data cache.
574 self._chunkcachesize = 65536
576 self._chunkcachesize = 65536
575 self._maxchainlen = None
577 self._maxchainlen = None
576 self._aggressivemergedeltas = False
578 self._aggressivemergedeltas = False
577 self.index = []
579 self.index = []
578 # Mapping of partial identifiers to full nodes.
580 # Mapping of partial identifiers to full nodes.
579 self._pcache = {}
581 self._pcache = {}
580 # Mapping of revision integer to full node.
582 # Mapping of revision integer to full node.
581 self._nodecache = {nullid: nullrev}
583 self._nodecache = {nullid: nullrev}
582 self._nodepos = None
584 self._nodepos = None
583 self._compengine = 'zlib'
585 self._compengine = 'zlib'
584 self._maxdeltachainspan = -1
586 self._maxdeltachainspan = -1
585 self._withsparseread = False
587 self._withsparseread = False
586 self._srdensitythreshold = 0.25
588 self._srdensitythreshold = 0.25
587 self._srmingapsize = 262144
589 self._srmingapsize = 262144
588
590
589 mmapindexthreshold = None
591 mmapindexthreshold = None
590 v = REVLOG_DEFAULT_VERSION
592 v = REVLOG_DEFAULT_VERSION
591 opts = getattr(opener, 'options', None)
593 opts = getattr(opener, 'options', None)
592 if opts is not None:
594 if opts is not None:
593 if 'revlogv2' in opts:
595 if 'revlogv2' in opts:
594 # version 2 revlogs always use generaldelta.
596 # version 2 revlogs always use generaldelta.
595 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
597 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
596 elif 'revlogv1' in opts:
598 elif 'revlogv1' in opts:
597 if 'generaldelta' in opts:
599 if 'generaldelta' in opts:
598 v |= FLAG_GENERALDELTA
600 v |= FLAG_GENERALDELTA
599 else:
601 else:
600 v = 0
602 v = 0
601 if 'chunkcachesize' in opts:
603 if 'chunkcachesize' in opts:
602 self._chunkcachesize = opts['chunkcachesize']
604 self._chunkcachesize = opts['chunkcachesize']
603 if 'maxchainlen' in opts:
605 if 'maxchainlen' in opts:
604 self._maxchainlen = opts['maxchainlen']
606 self._maxchainlen = opts['maxchainlen']
605 if 'aggressivemergedeltas' in opts:
607 if 'aggressivemergedeltas' in opts:
606 self._aggressivemergedeltas = opts['aggressivemergedeltas']
608 self._aggressivemergedeltas = opts['aggressivemergedeltas']
607 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
609 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
608 if 'compengine' in opts:
610 if 'compengine' in opts:
609 self._compengine = opts['compengine']
611 self._compengine = opts['compengine']
610 if 'maxdeltachainspan' in opts:
612 if 'maxdeltachainspan' in opts:
611 self._maxdeltachainspan = opts['maxdeltachainspan']
613 self._maxdeltachainspan = opts['maxdeltachainspan']
612 if mmaplargeindex and 'mmapindexthreshold' in opts:
614 if mmaplargeindex and 'mmapindexthreshold' in opts:
613 mmapindexthreshold = opts['mmapindexthreshold']
615 mmapindexthreshold = opts['mmapindexthreshold']
614 self._withsparseread = bool(opts.get('with-sparse-read', False))
616 self._withsparseread = bool(opts.get('with-sparse-read', False))
615 if 'sparse-read-density-threshold' in opts:
617 if 'sparse-read-density-threshold' in opts:
616 self._srdensitythreshold = opts['sparse-read-density-threshold']
618 self._srdensitythreshold = opts['sparse-read-density-threshold']
617 if 'sparse-read-min-gap-size' in opts:
619 if 'sparse-read-min-gap-size' in opts:
618 self._srmingapsize = opts['sparse-read-min-gap-size']
620 self._srmingapsize = opts['sparse-read-min-gap-size']
619
621
620 if self._chunkcachesize <= 0:
622 if self._chunkcachesize <= 0:
621 raise RevlogError(_('revlog chunk cache size %r is not greater '
623 raise RevlogError(_('revlog chunk cache size %r is not greater '
622 'than 0') % self._chunkcachesize)
624 'than 0') % self._chunkcachesize)
623 elif self._chunkcachesize & (self._chunkcachesize - 1):
625 elif self._chunkcachesize & (self._chunkcachesize - 1):
624 raise RevlogError(_('revlog chunk cache size %r is not a power '
626 raise RevlogError(_('revlog chunk cache size %r is not a power '
625 'of 2') % self._chunkcachesize)
627 'of 2') % self._chunkcachesize)
626
628
627 indexdata = ''
629 indexdata = ''
628 self._initempty = True
630 self._initempty = True
629 try:
631 try:
630 f = self.opener(self.indexfile)
632 f = self.opener(self.indexfile)
631 if (mmapindexthreshold is not None and
633 if (mmapindexthreshold is not None and
632 self.opener.fstat(f).st_size >= mmapindexthreshold):
634 self.opener.fstat(f).st_size >= mmapindexthreshold):
633 indexdata = util.buffer(util.mmapread(f))
635 indexdata = util.buffer(util.mmapread(f))
634 else:
636 else:
635 indexdata = f.read()
637 indexdata = f.read()
636 f.close()
638 f.close()
637 if len(indexdata) > 0:
639 if len(indexdata) > 0:
638 v = versionformat_unpack(indexdata[:4])[0]
640 v = versionformat_unpack(indexdata[:4])[0]
639 self._initempty = False
641 self._initempty = False
640 except IOError as inst:
642 except IOError as inst:
641 if inst.errno != errno.ENOENT:
643 if inst.errno != errno.ENOENT:
642 raise
644 raise
643
645
644 self.version = v
646 self.version = v
645 self._inline = v & FLAG_INLINE_DATA
647 self._inline = v & FLAG_INLINE_DATA
646 self._generaldelta = v & FLAG_GENERALDELTA
648 self._generaldelta = v & FLAG_GENERALDELTA
647 flags = v & ~0xFFFF
649 flags = v & ~0xFFFF
648 fmt = v & 0xFFFF
650 fmt = v & 0xFFFF
649 if fmt == REVLOGV0:
651 if fmt == REVLOGV0:
650 if flags:
652 if flags:
651 raise RevlogError(_('unknown flags (%#04x) in version %d '
653 raise RevlogError(_('unknown flags (%#04x) in version %d '
652 'revlog %s') %
654 'revlog %s') %
653 (flags >> 16, fmt, self.indexfile))
655 (flags >> 16, fmt, self.indexfile))
654 elif fmt == REVLOGV1:
656 elif fmt == REVLOGV1:
655 if flags & ~REVLOGV1_FLAGS:
657 if flags & ~REVLOGV1_FLAGS:
656 raise RevlogError(_('unknown flags (%#04x) in version %d '
658 raise RevlogError(_('unknown flags (%#04x) in version %d '
657 'revlog %s') %
659 'revlog %s') %
658 (flags >> 16, fmt, self.indexfile))
660 (flags >> 16, fmt, self.indexfile))
659 elif fmt == REVLOGV2:
661 elif fmt == REVLOGV2:
660 if flags & ~REVLOGV2_FLAGS:
662 if flags & ~REVLOGV2_FLAGS:
661 raise RevlogError(_('unknown flags (%#04x) in version %d '
663 raise RevlogError(_('unknown flags (%#04x) in version %d '
662 'revlog %s') %
664 'revlog %s') %
663 (flags >> 16, fmt, self.indexfile))
665 (flags >> 16, fmt, self.indexfile))
664 else:
666 else:
665 raise RevlogError(_('unknown version (%d) in revlog %s') %
667 raise RevlogError(_('unknown version (%d) in revlog %s') %
666 (fmt, self.indexfile))
668 (fmt, self.indexfile))
667
669
668 self.storedeltachains = True
670 self.storedeltachains = True
669
671
670 self._io = revlogio()
672 self._io = revlogio()
671 if self.version == REVLOGV0:
673 if self.version == REVLOGV0:
672 self._io = revlogoldio()
674 self._io = revlogoldio()
673 try:
675 try:
674 d = self._io.parseindex(indexdata, self._inline)
676 d = self._io.parseindex(indexdata, self._inline)
675 except (ValueError, IndexError):
677 except (ValueError, IndexError):
676 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
678 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
677 self.index, nodemap, self._chunkcache = d
679 self.index, nodemap, self._chunkcache = d
678 if nodemap is not None:
680 if nodemap is not None:
679 self.nodemap = self._nodecache = nodemap
681 self.nodemap = self._nodecache = nodemap
680 if not self._chunkcache:
682 if not self._chunkcache:
681 self._chunkclear()
683 self._chunkclear()
682 # revnum -> (chain-length, sum-delta-length)
684 # revnum -> (chain-length, sum-delta-length)
683 self._chaininfocache = {}
685 self._chaininfocache = {}
684 # revlog header -> revlog compressor
686 # revlog header -> revlog compressor
685 self._decompressors = {}
687 self._decompressors = {}
686
688
687 @util.propertycache
689 @util.propertycache
688 def _compressor(self):
690 def _compressor(self):
689 return util.compengines[self._compengine].revlogcompressor()
691 return util.compengines[self._compengine].revlogcompressor()
690
692
691 def tip(self):
693 def tip(self):
692 return self.node(len(self.index) - 2)
694 return self.node(len(self.index) - 2)
693 def __contains__(self, rev):
695 def __contains__(self, rev):
694 return 0 <= rev < len(self)
696 return 0 <= rev < len(self)
695 def __len__(self):
697 def __len__(self):
696 return len(self.index) - 1
698 return len(self.index) - 1
697 def __iter__(self):
699 def __iter__(self):
698 return iter(xrange(len(self)))
700 return iter(xrange(len(self)))
699 def revs(self, start=0, stop=None):
701 def revs(self, start=0, stop=None):
700 """iterate over all rev in this revlog (from start to stop)"""
702 """iterate over all rev in this revlog (from start to stop)"""
701 step = 1
703 step = 1
702 if stop is not None:
704 if stop is not None:
703 if start > stop:
705 if start > stop:
704 step = -1
706 step = -1
705 stop += step
707 stop += step
706 else:
708 else:
707 stop = len(self)
709 stop = len(self)
708 return xrange(start, stop, step)
710 return xrange(start, stop, step)
709
711
710 @util.propertycache
712 @util.propertycache
711 def nodemap(self):
713 def nodemap(self):
712 self.rev(self.node(0))
714 self.rev(self.node(0))
713 return self._nodecache
715 return self._nodecache
714
716
715 def hasnode(self, node):
717 def hasnode(self, node):
716 try:
718 try:
717 self.rev(node)
719 self.rev(node)
718 return True
720 return True
719 except KeyError:
721 except KeyError:
720 return False
722 return False
721
723
722 def candelta(self, baserev, rev):
724 def candelta(self, baserev, rev):
723 """whether two revisions (baserev, rev) can be delta-ed or not"""
725 """whether two revisions (baserev, rev) can be delta-ed or not"""
724 # Disable delta if either rev requires a content-changing flag
726 # Disable delta if either rev requires a content-changing flag
725 # processor (ex. LFS). This is because such flag processor can alter
727 # processor (ex. LFS). This is because such flag processor can alter
726 # the rawtext content that the delta will be based on, and two clients
728 # the rawtext content that the delta will be based on, and two clients
727 # could have a same revlog node with different flags (i.e. different
729 # could have a same revlog node with different flags (i.e. different
728 # rawtext contents) and the delta could be incompatible.
730 # rawtext contents) and the delta could be incompatible.
729 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
731 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
730 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
732 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
731 return False
733 return False
732 return True
734 return True
733
735
734 def clearcaches(self):
736 def clearcaches(self):
735 self._cache = None
737 self._cache = None
736 self._chainbasecache.clear()
738 self._chainbasecache.clear()
737 self._chunkcache = (0, '')
739 self._chunkcache = (0, '')
738 self._pcache = {}
740 self._pcache = {}
739
741
740 try:
742 try:
741 self._nodecache.clearcaches()
743 self._nodecache.clearcaches()
742 except AttributeError:
744 except AttributeError:
743 self._nodecache = {nullid: nullrev}
745 self._nodecache = {nullid: nullrev}
744 self._nodepos = None
746 self._nodepos = None
745
747
746 def rev(self, node):
748 def rev(self, node):
747 try:
749 try:
748 return self._nodecache[node]
750 return self._nodecache[node]
749 except TypeError:
751 except TypeError:
750 raise
752 raise
751 except RevlogError:
753 except RevlogError:
752 # parsers.c radix tree lookup failed
754 # parsers.c radix tree lookup failed
753 if node == wdirid:
755 if node == wdirid:
754 raise error.WdirUnsupported
756 raise error.WdirUnsupported
755 raise LookupError(node, self.indexfile, _('no node'))
757 raise LookupError(node, self.indexfile, _('no node'))
756 except KeyError:
758 except KeyError:
757 # pure python cache lookup failed
759 # pure python cache lookup failed
758 n = self._nodecache
760 n = self._nodecache
759 i = self.index
761 i = self.index
760 p = self._nodepos
762 p = self._nodepos
761 if p is None:
763 if p is None:
762 p = len(i) - 2
764 p = len(i) - 2
763 for r in xrange(p, -1, -1):
765 for r in xrange(p, -1, -1):
764 v = i[r][7]
766 v = i[r][7]
765 n[v] = r
767 n[v] = r
766 if v == node:
768 if v == node:
767 self._nodepos = r - 1
769 self._nodepos = r - 1
768 return r
770 return r
769 if node == wdirid:
771 if node == wdirid:
770 raise error.WdirUnsupported
772 raise error.WdirUnsupported
771 raise LookupError(node, self.indexfile, _('no node'))
773 raise LookupError(node, self.indexfile, _('no node'))
772
774
773 # Accessors for index entries.
775 # Accessors for index entries.
774
776
775 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
777 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
776 # are flags.
778 # are flags.
777 def start(self, rev):
779 def start(self, rev):
778 return int(self.index[rev][0] >> 16)
780 return int(self.index[rev][0] >> 16)
779
781
780 def flags(self, rev):
782 def flags(self, rev):
781 return self.index[rev][0] & 0xFFFF
783 return self.index[rev][0] & 0xFFFF
782
784
783 def length(self, rev):
785 def length(self, rev):
784 return self.index[rev][1]
786 return self.index[rev][1]
785
787
786 def rawsize(self, rev):
788 def rawsize(self, rev):
787 """return the length of the uncompressed text for a given revision"""
789 """return the length of the uncompressed text for a given revision"""
788 l = self.index[rev][2]
790 l = self.index[rev][2]
789 if l >= 0:
791 if l >= 0:
790 return l
792 return l
791
793
792 t = self.revision(rev, raw=True)
794 t = self.revision(rev, raw=True)
793 return len(t)
795 return len(t)
794
796
795 def size(self, rev):
797 def size(self, rev):
796 """length of non-raw text (processed by a "read" flag processor)"""
798 """length of non-raw text (processed by a "read" flag processor)"""
797 # fast path: if no "read" flag processor could change the content,
799 # fast path: if no "read" flag processor could change the content,
798 # size is rawsize. note: ELLIPSIS is known to not change the content.
800 # size is rawsize. note: ELLIPSIS is known to not change the content.
799 flags = self.flags(rev)
801 flags = self.flags(rev)
800 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
802 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
801 return self.rawsize(rev)
803 return self.rawsize(rev)
802
804
803 return len(self.revision(rev, raw=False))
805 return len(self.revision(rev, raw=False))
804
806
805 def chainbase(self, rev):
807 def chainbase(self, rev):
806 base = self._chainbasecache.get(rev)
808 base = self._chainbasecache.get(rev)
807 if base is not None:
809 if base is not None:
808 return base
810 return base
809
811
810 index = self.index
812 index = self.index
811 base = index[rev][3]
813 base = index[rev][3]
812 while base != rev:
814 while base != rev:
813 rev = base
815 rev = base
814 base = index[rev][3]
816 base = index[rev][3]
815
817
816 self._chainbasecache[rev] = base
818 self._chainbasecache[rev] = base
817 return base
819 return base
818
820
819 def linkrev(self, rev):
821 def linkrev(self, rev):
820 return self.index[rev][4]
822 return self.index[rev][4]
821
823
822 def parentrevs(self, rev):
824 def parentrevs(self, rev):
823 try:
825 try:
824 entry = self.index[rev]
826 entry = self.index[rev]
825 except IndexError:
827 except IndexError:
826 if rev == wdirrev:
828 if rev == wdirrev:
827 raise error.WdirUnsupported
829 raise error.WdirUnsupported
828 raise
830 raise
829
831
830 return entry[5], entry[6]
832 return entry[5], entry[6]
831
833
832 def node(self, rev):
834 def node(self, rev):
833 try:
835 try:
834 return self.index[rev][7]
836 return self.index[rev][7]
835 except IndexError:
837 except IndexError:
836 if rev == wdirrev:
838 if rev == wdirrev:
837 raise error.WdirUnsupported
839 raise error.WdirUnsupported
838 raise
840 raise
839
841
840 # Derived from index values.
842 # Derived from index values.
841
843
842 def end(self, rev):
844 def end(self, rev):
843 return self.start(rev) + self.length(rev)
845 return self.start(rev) + self.length(rev)
844
846
845 def parents(self, node):
847 def parents(self, node):
846 i = self.index
848 i = self.index
847 d = i[self.rev(node)]
849 d = i[self.rev(node)]
848 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
850 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
849
851
850 def chainlen(self, rev):
852 def chainlen(self, rev):
851 return self._chaininfo(rev)[0]
853 return self._chaininfo(rev)[0]
852
854
853 def _chaininfo(self, rev):
855 def _chaininfo(self, rev):
854 chaininfocache = self._chaininfocache
856 chaininfocache = self._chaininfocache
855 if rev in chaininfocache:
857 if rev in chaininfocache:
856 return chaininfocache[rev]
858 return chaininfocache[rev]
857 index = self.index
859 index = self.index
858 generaldelta = self._generaldelta
860 generaldelta = self._generaldelta
859 iterrev = rev
861 iterrev = rev
860 e = index[iterrev]
862 e = index[iterrev]
861 clen = 0
863 clen = 0
862 compresseddeltalen = 0
864 compresseddeltalen = 0
863 while iterrev != e[3]:
865 while iterrev != e[3]:
864 clen += 1
866 clen += 1
865 compresseddeltalen += e[1]
867 compresseddeltalen += e[1]
866 if generaldelta:
868 if generaldelta:
867 iterrev = e[3]
869 iterrev = e[3]
868 else:
870 else:
869 iterrev -= 1
871 iterrev -= 1
870 if iterrev in chaininfocache:
872 if iterrev in chaininfocache:
871 t = chaininfocache[iterrev]
873 t = chaininfocache[iterrev]
872 clen += t[0]
874 clen += t[0]
873 compresseddeltalen += t[1]
875 compresseddeltalen += t[1]
874 break
876 break
875 e = index[iterrev]
877 e = index[iterrev]
876 else:
878 else:
877 # Add text length of base since decompressing that also takes
879 # Add text length of base since decompressing that also takes
878 # work. For cache hits the length is already included.
880 # work. For cache hits the length is already included.
879 compresseddeltalen += e[1]
881 compresseddeltalen += e[1]
880 r = (clen, compresseddeltalen)
882 r = (clen, compresseddeltalen)
881 chaininfocache[rev] = r
883 chaininfocache[rev] = r
882 return r
884 return r
883
885
884 def _deltachain(self, rev, stoprev=None):
886 def _deltachain(self, rev, stoprev=None):
885 """Obtain the delta chain for a revision.
887 """Obtain the delta chain for a revision.
886
888
887 ``stoprev`` specifies a revision to stop at. If not specified, we
889 ``stoprev`` specifies a revision to stop at. If not specified, we
888 stop at the base of the chain.
890 stop at the base of the chain.
889
891
890 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
892 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
891 revs in ascending order and ``stopped`` is a bool indicating whether
893 revs in ascending order and ``stopped`` is a bool indicating whether
892 ``stoprev`` was hit.
894 ``stoprev`` was hit.
893 """
895 """
894 # Try C implementation.
896 # Try C implementation.
895 try:
897 try:
896 return self.index.deltachain(rev, stoprev, self._generaldelta)
898 return self.index.deltachain(rev, stoprev, self._generaldelta)
897 except AttributeError:
899 except AttributeError:
898 pass
900 pass
899
901
900 chain = []
902 chain = []
901
903
902 # Alias to prevent attribute lookup in tight loop.
904 # Alias to prevent attribute lookup in tight loop.
903 index = self.index
905 index = self.index
904 generaldelta = self._generaldelta
906 generaldelta = self._generaldelta
905
907
906 iterrev = rev
908 iterrev = rev
907 e = index[iterrev]
909 e = index[iterrev]
908 while iterrev != e[3] and iterrev != stoprev:
910 while iterrev != e[3] and iterrev != stoprev:
909 chain.append(iterrev)
911 chain.append(iterrev)
910 if generaldelta:
912 if generaldelta:
911 iterrev = e[3]
913 iterrev = e[3]
912 else:
914 else:
913 iterrev -= 1
915 iterrev -= 1
914 e = index[iterrev]
916 e = index[iterrev]
915
917
916 if iterrev == stoprev:
918 if iterrev == stoprev:
917 stopped = True
919 stopped = True
918 else:
920 else:
919 chain.append(iterrev)
921 chain.append(iterrev)
920 stopped = False
922 stopped = False
921
923
922 chain.reverse()
924 chain.reverse()
923 return chain, stopped
925 return chain, stopped
924
926
925 def ancestors(self, revs, stoprev=0, inclusive=False):
927 def ancestors(self, revs, stoprev=0, inclusive=False):
926 """Generate the ancestors of 'revs' in reverse topological order.
928 """Generate the ancestors of 'revs' in reverse topological order.
927 Does not generate revs lower than stoprev.
929 Does not generate revs lower than stoprev.
928
930
929 See the documentation for ancestor.lazyancestors for more details."""
931 See the documentation for ancestor.lazyancestors for more details."""
930
932
931 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
933 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
932 inclusive=inclusive)
934 inclusive=inclusive)
933
935
934 def descendants(self, revs):
936 def descendants(self, revs):
935 """Generate the descendants of 'revs' in revision order.
937 """Generate the descendants of 'revs' in revision order.
936
938
937 Yield a sequence of revision numbers starting with a child of
939 Yield a sequence of revision numbers starting with a child of
938 some rev in revs, i.e., each revision is *not* considered a
940 some rev in revs, i.e., each revision is *not* considered a
939 descendant of itself. Results are ordered by revision number (a
941 descendant of itself. Results are ordered by revision number (a
940 topological sort)."""
942 topological sort)."""
941 first = min(revs)
943 first = min(revs)
942 if first == nullrev:
944 if first == nullrev:
943 for i in self:
945 for i in self:
944 yield i
946 yield i
945 return
947 return
946
948
947 seen = set(revs)
949 seen = set(revs)
948 for i in self.revs(start=first + 1):
950 for i in self.revs(start=first + 1):
949 for x in self.parentrevs(i):
951 for x in self.parentrevs(i):
950 if x != nullrev and x in seen:
952 if x != nullrev and x in seen:
951 seen.add(i)
953 seen.add(i)
952 yield i
954 yield i
953 break
955 break
954
956
955 def findcommonmissing(self, common=None, heads=None):
957 def findcommonmissing(self, common=None, heads=None):
956 """Return a tuple of the ancestors of common and the ancestors of heads
958 """Return a tuple of the ancestors of common and the ancestors of heads
957 that are not ancestors of common. In revset terminology, we return the
959 that are not ancestors of common. In revset terminology, we return the
958 tuple:
960 tuple:
959
961
960 ::common, (::heads) - (::common)
962 ::common, (::heads) - (::common)
961
963
962 The list is sorted by revision number, meaning it is
964 The list is sorted by revision number, meaning it is
963 topologically sorted.
965 topologically sorted.
964
966
965 'heads' and 'common' are both lists of node IDs. If heads is
967 'heads' and 'common' are both lists of node IDs. If heads is
966 not supplied, uses all of the revlog's heads. If common is not
968 not supplied, uses all of the revlog's heads. If common is not
967 supplied, uses nullid."""
969 supplied, uses nullid."""
968 if common is None:
970 if common is None:
969 common = [nullid]
971 common = [nullid]
970 if heads is None:
972 if heads is None:
971 heads = self.heads()
973 heads = self.heads()
972
974
973 common = [self.rev(n) for n in common]
975 common = [self.rev(n) for n in common]
974 heads = [self.rev(n) for n in heads]
976 heads = [self.rev(n) for n in heads]
975
977
976 # we want the ancestors, but inclusive
978 # we want the ancestors, but inclusive
977 class lazyset(object):
979 class lazyset(object):
978 def __init__(self, lazyvalues):
980 def __init__(self, lazyvalues):
979 self.addedvalues = set()
981 self.addedvalues = set()
980 self.lazyvalues = lazyvalues
982 self.lazyvalues = lazyvalues
981
983
982 def __contains__(self, value):
984 def __contains__(self, value):
983 return value in self.addedvalues or value in self.lazyvalues
985 return value in self.addedvalues or value in self.lazyvalues
984
986
985 def __iter__(self):
987 def __iter__(self):
986 added = self.addedvalues
988 added = self.addedvalues
987 for r in added:
989 for r in added:
988 yield r
990 yield r
989 for r in self.lazyvalues:
991 for r in self.lazyvalues:
990 if not r in added:
992 if not r in added:
991 yield r
993 yield r
992
994
993 def add(self, value):
995 def add(self, value):
994 self.addedvalues.add(value)
996 self.addedvalues.add(value)
995
997
996 def update(self, values):
998 def update(self, values):
997 self.addedvalues.update(values)
999 self.addedvalues.update(values)
998
1000
999 has = lazyset(self.ancestors(common))
1001 has = lazyset(self.ancestors(common))
1000 has.add(nullrev)
1002 has.add(nullrev)
1001 has.update(common)
1003 has.update(common)
1002
1004
1003 # take all ancestors from heads that aren't in has
1005 # take all ancestors from heads that aren't in has
1004 missing = set()
1006 missing = set()
1005 visit = collections.deque(r for r in heads if r not in has)
1007 visit = collections.deque(r for r in heads if r not in has)
1006 while visit:
1008 while visit:
1007 r = visit.popleft()
1009 r = visit.popleft()
1008 if r in missing:
1010 if r in missing:
1009 continue
1011 continue
1010 else:
1012 else:
1011 missing.add(r)
1013 missing.add(r)
1012 for p in self.parentrevs(r):
1014 for p in self.parentrevs(r):
1013 if p not in has:
1015 if p not in has:
1014 visit.append(p)
1016 visit.append(p)
1015 missing = list(missing)
1017 missing = list(missing)
1016 missing.sort()
1018 missing.sort()
1017 return has, [self.node(miss) for miss in missing]
1019 return has, [self.node(miss) for miss in missing]
1018
1020
1019 def incrementalmissingrevs(self, common=None):
1021 def incrementalmissingrevs(self, common=None):
1020 """Return an object that can be used to incrementally compute the
1022 """Return an object that can be used to incrementally compute the
1021 revision numbers of the ancestors of arbitrary sets that are not
1023 revision numbers of the ancestors of arbitrary sets that are not
1022 ancestors of common. This is an ancestor.incrementalmissingancestors
1024 ancestors of common. This is an ancestor.incrementalmissingancestors
1023 object.
1025 object.
1024
1026
1025 'common' is a list of revision numbers. If common is not supplied, uses
1027 'common' is a list of revision numbers. If common is not supplied, uses
1026 nullrev.
1028 nullrev.
1027 """
1029 """
1028 if common is None:
1030 if common is None:
1029 common = [nullrev]
1031 common = [nullrev]
1030
1032
1031 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1033 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1032
1034
1033 def findmissingrevs(self, common=None, heads=None):
1035 def findmissingrevs(self, common=None, heads=None):
1034 """Return the revision numbers of the ancestors of heads that
1036 """Return the revision numbers of the ancestors of heads that
1035 are not ancestors of common.
1037 are not ancestors of common.
1036
1038
1037 More specifically, return a list of revision numbers corresponding to
1039 More specifically, return a list of revision numbers corresponding to
1038 nodes N such that every N satisfies the following constraints:
1040 nodes N such that every N satisfies the following constraints:
1039
1041
1040 1. N is an ancestor of some node in 'heads'
1042 1. N is an ancestor of some node in 'heads'
1041 2. N is not an ancestor of any node in 'common'
1043 2. N is not an ancestor of any node in 'common'
1042
1044
1043 The list is sorted by revision number, meaning it is
1045 The list is sorted by revision number, meaning it is
1044 topologically sorted.
1046 topologically sorted.
1045
1047
1046 'heads' and 'common' are both lists of revision numbers. If heads is
1048 'heads' and 'common' are both lists of revision numbers. If heads is
1047 not supplied, uses all of the revlog's heads. If common is not
1049 not supplied, uses all of the revlog's heads. If common is not
1048 supplied, uses nullid."""
1050 supplied, uses nullid."""
1049 if common is None:
1051 if common is None:
1050 common = [nullrev]
1052 common = [nullrev]
1051 if heads is None:
1053 if heads is None:
1052 heads = self.headrevs()
1054 heads = self.headrevs()
1053
1055
1054 inc = self.incrementalmissingrevs(common=common)
1056 inc = self.incrementalmissingrevs(common=common)
1055 return inc.missingancestors(heads)
1057 return inc.missingancestors(heads)
1056
1058
1057 def findmissing(self, common=None, heads=None):
1059 def findmissing(self, common=None, heads=None):
1058 """Return the ancestors of heads that are not ancestors of common.
1060 """Return the ancestors of heads that are not ancestors of common.
1059
1061
1060 More specifically, return a list of nodes N such that every N
1062 More specifically, return a list of nodes N such that every N
1061 satisfies the following constraints:
1063 satisfies the following constraints:
1062
1064
1063 1. N is an ancestor of some node in 'heads'
1065 1. N is an ancestor of some node in 'heads'
1064 2. N is not an ancestor of any node in 'common'
1066 2. N is not an ancestor of any node in 'common'
1065
1067
1066 The list is sorted by revision number, meaning it is
1068 The list is sorted by revision number, meaning it is
1067 topologically sorted.
1069 topologically sorted.
1068
1070
1069 'heads' and 'common' are both lists of node IDs. If heads is
1071 'heads' and 'common' are both lists of node IDs. If heads is
1070 not supplied, uses all of the revlog's heads. If common is not
1072 not supplied, uses all of the revlog's heads. If common is not
1071 supplied, uses nullid."""
1073 supplied, uses nullid."""
1072 if common is None:
1074 if common is None:
1073 common = [nullid]
1075 common = [nullid]
1074 if heads is None:
1076 if heads is None:
1075 heads = self.heads()
1077 heads = self.heads()
1076
1078
1077 common = [self.rev(n) for n in common]
1079 common = [self.rev(n) for n in common]
1078 heads = [self.rev(n) for n in heads]
1080 heads = [self.rev(n) for n in heads]
1079
1081
1080 inc = self.incrementalmissingrevs(common=common)
1082 inc = self.incrementalmissingrevs(common=common)
1081 return [self.node(r) for r in inc.missingancestors(heads)]
1083 return [self.node(r) for r in inc.missingancestors(heads)]
1082
1084
1083 def nodesbetween(self, roots=None, heads=None):
1085 def nodesbetween(self, roots=None, heads=None):
1084 """Return a topological path from 'roots' to 'heads'.
1086 """Return a topological path from 'roots' to 'heads'.
1085
1087
1086 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1088 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1087 topologically sorted list of all nodes N that satisfy both of
1089 topologically sorted list of all nodes N that satisfy both of
1088 these constraints:
1090 these constraints:
1089
1091
1090 1. N is a descendant of some node in 'roots'
1092 1. N is a descendant of some node in 'roots'
1091 2. N is an ancestor of some node in 'heads'
1093 2. N is an ancestor of some node in 'heads'
1092
1094
1093 Every node is considered to be both a descendant and an ancestor
1095 Every node is considered to be both a descendant and an ancestor
1094 of itself, so every reachable node in 'roots' and 'heads' will be
1096 of itself, so every reachable node in 'roots' and 'heads' will be
1095 included in 'nodes'.
1097 included in 'nodes'.
1096
1098
1097 'outroots' is the list of reachable nodes in 'roots', i.e., the
1099 'outroots' is the list of reachable nodes in 'roots', i.e., the
1098 subset of 'roots' that is returned in 'nodes'. Likewise,
1100 subset of 'roots' that is returned in 'nodes'. Likewise,
1099 'outheads' is the subset of 'heads' that is also in 'nodes'.
1101 'outheads' is the subset of 'heads' that is also in 'nodes'.
1100
1102
1101 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1103 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1102 unspecified, uses nullid as the only root. If 'heads' is
1104 unspecified, uses nullid as the only root. If 'heads' is
1103 unspecified, uses list of all of the revlog's heads."""
1105 unspecified, uses list of all of the revlog's heads."""
1104 nonodes = ([], [], [])
1106 nonodes = ([], [], [])
1105 if roots is not None:
1107 if roots is not None:
1106 roots = list(roots)
1108 roots = list(roots)
1107 if not roots:
1109 if not roots:
1108 return nonodes
1110 return nonodes
1109 lowestrev = min([self.rev(n) for n in roots])
1111 lowestrev = min([self.rev(n) for n in roots])
1110 else:
1112 else:
1111 roots = [nullid] # Everybody's a descendant of nullid
1113 roots = [nullid] # Everybody's a descendant of nullid
1112 lowestrev = nullrev
1114 lowestrev = nullrev
1113 if (lowestrev == nullrev) and (heads is None):
1115 if (lowestrev == nullrev) and (heads is None):
1114 # We want _all_ the nodes!
1116 # We want _all_ the nodes!
1115 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1117 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1116 if heads is None:
1118 if heads is None:
1117 # All nodes are ancestors, so the latest ancestor is the last
1119 # All nodes are ancestors, so the latest ancestor is the last
1118 # node.
1120 # node.
1119 highestrev = len(self) - 1
1121 highestrev = len(self) - 1
1120 # Set ancestors to None to signal that every node is an ancestor.
1122 # Set ancestors to None to signal that every node is an ancestor.
1121 ancestors = None
1123 ancestors = None
1122 # Set heads to an empty dictionary for later discovery of heads
1124 # Set heads to an empty dictionary for later discovery of heads
1123 heads = {}
1125 heads = {}
1124 else:
1126 else:
1125 heads = list(heads)
1127 heads = list(heads)
1126 if not heads:
1128 if not heads:
1127 return nonodes
1129 return nonodes
1128 ancestors = set()
1130 ancestors = set()
1129 # Turn heads into a dictionary so we can remove 'fake' heads.
1131 # Turn heads into a dictionary so we can remove 'fake' heads.
1130 # Also, later we will be using it to filter out the heads we can't
1132 # Also, later we will be using it to filter out the heads we can't
1131 # find from roots.
1133 # find from roots.
1132 heads = dict.fromkeys(heads, False)
1134 heads = dict.fromkeys(heads, False)
1133 # Start at the top and keep marking parents until we're done.
1135 # Start at the top and keep marking parents until we're done.
1134 nodestotag = set(heads)
1136 nodestotag = set(heads)
1135 # Remember where the top was so we can use it as a limit later.
1137 # Remember where the top was so we can use it as a limit later.
1136 highestrev = max([self.rev(n) for n in nodestotag])
1138 highestrev = max([self.rev(n) for n in nodestotag])
1137 while nodestotag:
1139 while nodestotag:
1138 # grab a node to tag
1140 # grab a node to tag
1139 n = nodestotag.pop()
1141 n = nodestotag.pop()
1140 # Never tag nullid
1142 # Never tag nullid
1141 if n == nullid:
1143 if n == nullid:
1142 continue
1144 continue
1143 # A node's revision number represents its place in a
1145 # A node's revision number represents its place in a
1144 # topologically sorted list of nodes.
1146 # topologically sorted list of nodes.
1145 r = self.rev(n)
1147 r = self.rev(n)
1146 if r >= lowestrev:
1148 if r >= lowestrev:
1147 if n not in ancestors:
1149 if n not in ancestors:
1148 # If we are possibly a descendant of one of the roots
1150 # If we are possibly a descendant of one of the roots
1149 # and we haven't already been marked as an ancestor
1151 # and we haven't already been marked as an ancestor
1150 ancestors.add(n) # Mark as ancestor
1152 ancestors.add(n) # Mark as ancestor
1151 # Add non-nullid parents to list of nodes to tag.
1153 # Add non-nullid parents to list of nodes to tag.
1152 nodestotag.update([p for p in self.parents(n) if
1154 nodestotag.update([p for p in self.parents(n) if
1153 p != nullid])
1155 p != nullid])
1154 elif n in heads: # We've seen it before, is it a fake head?
1156 elif n in heads: # We've seen it before, is it a fake head?
1155 # So it is, real heads should not be the ancestors of
1157 # So it is, real heads should not be the ancestors of
1156 # any other heads.
1158 # any other heads.
1157 heads.pop(n)
1159 heads.pop(n)
1158 if not ancestors:
1160 if not ancestors:
1159 return nonodes
1161 return nonodes
1160 # Now that we have our set of ancestors, we want to remove any
1162 # Now that we have our set of ancestors, we want to remove any
1161 # roots that are not ancestors.
1163 # roots that are not ancestors.
1162
1164
1163 # If one of the roots was nullid, everything is included anyway.
1165 # If one of the roots was nullid, everything is included anyway.
1164 if lowestrev > nullrev:
1166 if lowestrev > nullrev:
1165 # But, since we weren't, let's recompute the lowest rev to not
1167 # But, since we weren't, let's recompute the lowest rev to not
1166 # include roots that aren't ancestors.
1168 # include roots that aren't ancestors.
1167
1169
1168 # Filter out roots that aren't ancestors of heads
1170 # Filter out roots that aren't ancestors of heads
1169 roots = [root for root in roots if root in ancestors]
1171 roots = [root for root in roots if root in ancestors]
1170 # Recompute the lowest revision
1172 # Recompute the lowest revision
1171 if roots:
1173 if roots:
1172 lowestrev = min([self.rev(root) for root in roots])
1174 lowestrev = min([self.rev(root) for root in roots])
1173 else:
1175 else:
1174 # No more roots? Return empty list
1176 # No more roots? Return empty list
1175 return nonodes
1177 return nonodes
1176 else:
1178 else:
1177 # We are descending from nullid, and don't need to care about
1179 # We are descending from nullid, and don't need to care about
1178 # any other roots.
1180 # any other roots.
1179 lowestrev = nullrev
1181 lowestrev = nullrev
1180 roots = [nullid]
1182 roots = [nullid]
1181 # Transform our roots list into a set.
1183 # Transform our roots list into a set.
1182 descendants = set(roots)
1184 descendants = set(roots)
1183 # Also, keep the original roots so we can filter out roots that aren't
1185 # Also, keep the original roots so we can filter out roots that aren't
1184 # 'real' roots (i.e. are descended from other roots).
1186 # 'real' roots (i.e. are descended from other roots).
1185 roots = descendants.copy()
1187 roots = descendants.copy()
1186 # Our topologically sorted list of output nodes.
1188 # Our topologically sorted list of output nodes.
1187 orderedout = []
1189 orderedout = []
1188 # Don't start at nullid since we don't want nullid in our output list,
1190 # Don't start at nullid since we don't want nullid in our output list,
1189 # and if nullid shows up in descendants, empty parents will look like
1191 # and if nullid shows up in descendants, empty parents will look like
1190 # they're descendants.
1192 # they're descendants.
1191 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1193 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1192 n = self.node(r)
1194 n = self.node(r)
1193 isdescendant = False
1195 isdescendant = False
1194 if lowestrev == nullrev: # Everybody is a descendant of nullid
1196 if lowestrev == nullrev: # Everybody is a descendant of nullid
1195 isdescendant = True
1197 isdescendant = True
1196 elif n in descendants:
1198 elif n in descendants:
1197 # n is already a descendant
1199 # n is already a descendant
1198 isdescendant = True
1200 isdescendant = True
1199 # This check only needs to be done here because all the roots
1201 # This check only needs to be done here because all the roots
1200 # will start being marked is descendants before the loop.
1202 # will start being marked is descendants before the loop.
1201 if n in roots:
1203 if n in roots:
1202 # If n was a root, check if it's a 'real' root.
1204 # If n was a root, check if it's a 'real' root.
1203 p = tuple(self.parents(n))
1205 p = tuple(self.parents(n))
1204 # If any of its parents are descendants, it's not a root.
1206 # If any of its parents are descendants, it's not a root.
1205 if (p[0] in descendants) or (p[1] in descendants):
1207 if (p[0] in descendants) or (p[1] in descendants):
1206 roots.remove(n)
1208 roots.remove(n)
1207 else:
1209 else:
1208 p = tuple(self.parents(n))
1210 p = tuple(self.parents(n))
1209 # A node is a descendant if either of its parents are
1211 # A node is a descendant if either of its parents are
1210 # descendants. (We seeded the dependents list with the roots
1212 # descendants. (We seeded the dependents list with the roots
1211 # up there, remember?)
1213 # up there, remember?)
1212 if (p[0] in descendants) or (p[1] in descendants):
1214 if (p[0] in descendants) or (p[1] in descendants):
1213 descendants.add(n)
1215 descendants.add(n)
1214 isdescendant = True
1216 isdescendant = True
1215 if isdescendant and ((ancestors is None) or (n in ancestors)):
1217 if isdescendant and ((ancestors is None) or (n in ancestors)):
1216 # Only include nodes that are both descendants and ancestors.
1218 # Only include nodes that are both descendants and ancestors.
1217 orderedout.append(n)
1219 orderedout.append(n)
1218 if (ancestors is not None) and (n in heads):
1220 if (ancestors is not None) and (n in heads):
1219 # We're trying to figure out which heads are reachable
1221 # We're trying to figure out which heads are reachable
1220 # from roots.
1222 # from roots.
1221 # Mark this head as having been reached
1223 # Mark this head as having been reached
1222 heads[n] = True
1224 heads[n] = True
1223 elif ancestors is None:
1225 elif ancestors is None:
1224 # Otherwise, we're trying to discover the heads.
1226 # Otherwise, we're trying to discover the heads.
1225 # Assume this is a head because if it isn't, the next step
1227 # Assume this is a head because if it isn't, the next step
1226 # will eventually remove it.
1228 # will eventually remove it.
1227 heads[n] = True
1229 heads[n] = True
1228 # But, obviously its parents aren't.
1230 # But, obviously its parents aren't.
1229 for p in self.parents(n):
1231 for p in self.parents(n):
1230 heads.pop(p, None)
1232 heads.pop(p, None)
1231 heads = [head for head, flag in heads.iteritems() if flag]
1233 heads = [head for head, flag in heads.iteritems() if flag]
1232 roots = list(roots)
1234 roots = list(roots)
1233 assert orderedout
1235 assert orderedout
1234 assert roots
1236 assert roots
1235 assert heads
1237 assert heads
1236 return (orderedout, roots, heads)
1238 return (orderedout, roots, heads)
1237
1239
1238 def headrevs(self):
1240 def headrevs(self):
1239 try:
1241 try:
1240 return self.index.headrevs()
1242 return self.index.headrevs()
1241 except AttributeError:
1243 except AttributeError:
1242 return self._headrevs()
1244 return self._headrevs()
1243
1245
1244 def computephases(self, roots):
1246 def computephases(self, roots):
1245 return self.index.computephasesmapsets(roots)
1247 return self.index.computephasesmapsets(roots)
1246
1248
1247 def _headrevs(self):
1249 def _headrevs(self):
1248 count = len(self)
1250 count = len(self)
1249 if not count:
1251 if not count:
1250 return [nullrev]
1252 return [nullrev]
1251 # we won't iter over filtered rev so nobody is a head at start
1253 # we won't iter over filtered rev so nobody is a head at start
1252 ishead = [0] * (count + 1)
1254 ishead = [0] * (count + 1)
1253 index = self.index
1255 index = self.index
1254 for r in self:
1256 for r in self:
1255 ishead[r] = 1 # I may be an head
1257 ishead[r] = 1 # I may be an head
1256 e = index[r]
1258 e = index[r]
1257 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1259 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1258 return [r for r, val in enumerate(ishead) if val]
1260 return [r for r, val in enumerate(ishead) if val]
1259
1261
1260 def heads(self, start=None, stop=None):
1262 def heads(self, start=None, stop=None):
1261 """return the list of all nodes that have no children
1263 """return the list of all nodes that have no children
1262
1264
1263 if start is specified, only heads that are descendants of
1265 if start is specified, only heads that are descendants of
1264 start will be returned
1266 start will be returned
1265 if stop is specified, it will consider all the revs from stop
1267 if stop is specified, it will consider all the revs from stop
1266 as if they had no children
1268 as if they had no children
1267 """
1269 """
1268 if start is None and stop is None:
1270 if start is None and stop is None:
1269 if not len(self):
1271 if not len(self):
1270 return [nullid]
1272 return [nullid]
1271 return [self.node(r) for r in self.headrevs()]
1273 return [self.node(r) for r in self.headrevs()]
1272
1274
1273 if start is None:
1275 if start is None:
1274 start = nullid
1276 start = nullid
1275 if stop is None:
1277 if stop is None:
1276 stop = []
1278 stop = []
1277 stoprevs = set([self.rev(n) for n in stop])
1279 stoprevs = set([self.rev(n) for n in stop])
1278 startrev = self.rev(start)
1280 startrev = self.rev(start)
1279 reachable = {startrev}
1281 reachable = {startrev}
1280 heads = {startrev}
1282 heads = {startrev}
1281
1283
1282 parentrevs = self.parentrevs
1284 parentrevs = self.parentrevs
1283 for r in self.revs(start=startrev + 1):
1285 for r in self.revs(start=startrev + 1):
1284 for p in parentrevs(r):
1286 for p in parentrevs(r):
1285 if p in reachable:
1287 if p in reachable:
1286 if r not in stoprevs:
1288 if r not in stoprevs:
1287 reachable.add(r)
1289 reachable.add(r)
1288 heads.add(r)
1290 heads.add(r)
1289 if p in heads and p not in stoprevs:
1291 if p in heads and p not in stoprevs:
1290 heads.remove(p)
1292 heads.remove(p)
1291
1293
1292 return [self.node(r) for r in heads]
1294 return [self.node(r) for r in heads]
1293
1295
1294 def children(self, node):
1296 def children(self, node):
1295 """find the children of a given node"""
1297 """find the children of a given node"""
1296 c = []
1298 c = []
1297 p = self.rev(node)
1299 p = self.rev(node)
1298 for r in self.revs(start=p + 1):
1300 for r in self.revs(start=p + 1):
1299 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1301 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1300 if prevs:
1302 if prevs:
1301 for pr in prevs:
1303 for pr in prevs:
1302 if pr == p:
1304 if pr == p:
1303 c.append(self.node(r))
1305 c.append(self.node(r))
1304 elif p == nullrev:
1306 elif p == nullrev:
1305 c.append(self.node(r))
1307 c.append(self.node(r))
1306 return c
1308 return c
1307
1309
1308 def descendant(self, start, end):
1310 def descendant(self, start, end):
1309 if start == nullrev:
1311 if start == nullrev:
1310 return True
1312 return True
1311 for i in self.descendants([start]):
1313 for i in self.descendants([start]):
1312 if i == end:
1314 if i == end:
1313 return True
1315 return True
1314 elif i > end:
1316 elif i > end:
1315 break
1317 break
1316 return False
1318 return False
1317
1319
1318 def commonancestorsheads(self, a, b):
1320 def commonancestorsheads(self, a, b):
1319 """calculate all the heads of the common ancestors of nodes a and b"""
1321 """calculate all the heads of the common ancestors of nodes a and b"""
1320 a, b = self.rev(a), self.rev(b)
1322 a, b = self.rev(a), self.rev(b)
1321 try:
1323 try:
1322 ancs = self.index.commonancestorsheads(a, b)
1324 ancs = self.index.commonancestorsheads(a, b)
1323 except (AttributeError, OverflowError): # C implementation failed
1325 except (AttributeError, OverflowError): # C implementation failed
1324 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1326 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1325 return pycompat.maplist(self.node, ancs)
1327 return pycompat.maplist(self.node, ancs)
1326
1328
1327 def isancestor(self, a, b):
1329 def isancestor(self, a, b):
1328 """return True if node a is an ancestor of node b
1330 """return True if node a is an ancestor of node b
1329
1331
1330 The implementation of this is trivial but the use of
1332 The implementation of this is trivial but the use of
1331 commonancestorsheads is not."""
1333 commonancestorsheads is not."""
1332 return a in self.commonancestorsheads(a, b)
1334 return a in self.commonancestorsheads(a, b)
1333
1335
1334 def ancestor(self, a, b):
1336 def ancestor(self, a, b):
1335 """calculate the "best" common ancestor of nodes a and b"""
1337 """calculate the "best" common ancestor of nodes a and b"""
1336
1338
1337 a, b = self.rev(a), self.rev(b)
1339 a, b = self.rev(a), self.rev(b)
1338 try:
1340 try:
1339 ancs = self.index.ancestors(a, b)
1341 ancs = self.index.ancestors(a, b)
1340 except (AttributeError, OverflowError):
1342 except (AttributeError, OverflowError):
1341 ancs = ancestor.ancestors(self.parentrevs, a, b)
1343 ancs = ancestor.ancestors(self.parentrevs, a, b)
1342 if ancs:
1344 if ancs:
1343 # choose a consistent winner when there's a tie
1345 # choose a consistent winner when there's a tie
1344 return min(map(self.node, ancs))
1346 return min(map(self.node, ancs))
1345 return nullid
1347 return nullid
1346
1348
1347 def _match(self, id):
1349 def _match(self, id):
1348 if isinstance(id, int):
1350 if isinstance(id, int):
1349 # rev
1351 # rev
1350 return self.node(id)
1352 return self.node(id)
1351 if len(id) == 20:
1353 if len(id) == 20:
1352 # possibly a binary node
1354 # possibly a binary node
1353 # odds of a binary node being all hex in ASCII are 1 in 10**25
1355 # odds of a binary node being all hex in ASCII are 1 in 10**25
1354 try:
1356 try:
1355 node = id
1357 node = id
1356 self.rev(node) # quick search the index
1358 self.rev(node) # quick search the index
1357 return node
1359 return node
1358 except LookupError:
1360 except LookupError:
1359 pass # may be partial hex id
1361 pass # may be partial hex id
1360 try:
1362 try:
1361 # str(rev)
1363 # str(rev)
1362 rev = int(id)
1364 rev = int(id)
1363 if str(rev) != id:
1365 if str(rev) != id:
1364 raise ValueError
1366 raise ValueError
1365 if rev < 0:
1367 if rev < 0:
1366 rev = len(self) + rev
1368 rev = len(self) + rev
1367 if rev < 0 or rev >= len(self):
1369 if rev < 0 or rev >= len(self):
1368 raise ValueError
1370 raise ValueError
1369 return self.node(rev)
1371 return self.node(rev)
1370 except (ValueError, OverflowError):
1372 except (ValueError, OverflowError):
1371 pass
1373 pass
1372 if len(id) == 40:
1374 if len(id) == 40:
1373 try:
1375 try:
1374 # a full hex nodeid?
1376 # a full hex nodeid?
1375 node = bin(id)
1377 node = bin(id)
1376 self.rev(node)
1378 self.rev(node)
1377 return node
1379 return node
1378 except (TypeError, LookupError):
1380 except (TypeError, LookupError):
1379 pass
1381 pass
1380
1382
1381 def _partialmatch(self, id):
1383 def _partialmatch(self, id):
1382 maybewdir = wdirhex.startswith(id)
1384 maybewdir = wdirhex.startswith(id)
1383 try:
1385 try:
1384 partial = self.index.partialmatch(id)
1386 partial = self.index.partialmatch(id)
1385 if partial and self.hasnode(partial):
1387 if partial and self.hasnode(partial):
1386 if maybewdir:
1388 if maybewdir:
1387 # single 'ff...' match in radix tree, ambiguous with wdir
1389 # single 'ff...' match in radix tree, ambiguous with wdir
1388 raise RevlogError
1390 raise RevlogError
1389 return partial
1391 return partial
1390 if maybewdir:
1392 if maybewdir:
1391 # no 'ff...' match in radix tree, wdir identified
1393 # no 'ff...' match in radix tree, wdir identified
1392 raise error.WdirUnsupported
1394 raise error.WdirUnsupported
1393 return None
1395 return None
1394 except RevlogError:
1396 except RevlogError:
1395 # parsers.c radix tree lookup gave multiple matches
1397 # parsers.c radix tree lookup gave multiple matches
1396 # fast path: for unfiltered changelog, radix tree is accurate
1398 # fast path: for unfiltered changelog, radix tree is accurate
1397 if not getattr(self, 'filteredrevs', None):
1399 if not getattr(self, 'filteredrevs', None):
1398 raise LookupError(id, self.indexfile,
1400 raise LookupError(id, self.indexfile,
1399 _('ambiguous identifier'))
1401 _('ambiguous identifier'))
1400 # fall through to slow path that filters hidden revisions
1402 # fall through to slow path that filters hidden revisions
1401 except (AttributeError, ValueError):
1403 except (AttributeError, ValueError):
1402 # we are pure python, or key was too short to search radix tree
1404 # we are pure python, or key was too short to search radix tree
1403 pass
1405 pass
1404
1406
1405 if id in self._pcache:
1407 if id in self._pcache:
1406 return self._pcache[id]
1408 return self._pcache[id]
1407
1409
1408 if len(id) < 40:
1410 if len(id) < 40:
1409 try:
1411 try:
1410 # hex(node)[:...]
1412 # hex(node)[:...]
1411 l = len(id) // 2 # grab an even number of digits
1413 l = len(id) // 2 # grab an even number of digits
1412 prefix = bin(id[:l * 2])
1414 prefix = bin(id[:l * 2])
1413 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1415 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1414 nl = [n for n in nl if hex(n).startswith(id) and
1416 nl = [n for n in nl if hex(n).startswith(id) and
1415 self.hasnode(n)]
1417 self.hasnode(n)]
1416 if len(nl) > 0:
1418 if len(nl) > 0:
1417 if len(nl) == 1 and not maybewdir:
1419 if len(nl) == 1 and not maybewdir:
1418 self._pcache[id] = nl[0]
1420 self._pcache[id] = nl[0]
1419 return nl[0]
1421 return nl[0]
1420 raise LookupError(id, self.indexfile,
1422 raise LookupError(id, self.indexfile,
1421 _('ambiguous identifier'))
1423 _('ambiguous identifier'))
1422 if maybewdir:
1424 if maybewdir:
1423 raise error.WdirUnsupported
1425 raise error.WdirUnsupported
1424 return None
1426 return None
1425 except (TypeError, binascii.Error):
1427 except (TypeError, binascii.Error):
1426 pass
1428 pass
1427
1429
1428 def lookup(self, id):
1430 def lookup(self, id):
1429 """locate a node based on:
1431 """locate a node based on:
1430 - revision number or str(revision number)
1432 - revision number or str(revision number)
1431 - nodeid or subset of hex nodeid
1433 - nodeid or subset of hex nodeid
1432 """
1434 """
1433 n = self._match(id)
1435 n = self._match(id)
1434 if n is not None:
1436 if n is not None:
1435 return n
1437 return n
1436 n = self._partialmatch(id)
1438 n = self._partialmatch(id)
1437 if n:
1439 if n:
1438 return n
1440 return n
1439
1441
1440 raise LookupError(id, self.indexfile, _('no match found'))
1442 raise LookupError(id, self.indexfile, _('no match found'))
1441
1443
1442 def shortest(self, hexnode, minlength=1):
1444 def shortest(self, hexnode, minlength=1):
1443 """Find the shortest unambiguous prefix that matches hexnode."""
1445 """Find the shortest unambiguous prefix that matches hexnode."""
1444 def isvalid(test):
1446 def isvalid(test):
1445 try:
1447 try:
1446 if self._partialmatch(test) is None:
1448 if self._partialmatch(test) is None:
1447 return False
1449 return False
1448
1450
1449 try:
1451 try:
1450 i = int(test)
1452 i = int(test)
1451 # if we are a pure int, then starting with zero will not be
1453 # if we are a pure int, then starting with zero will not be
1452 # confused as a rev; or, obviously, if the int is larger
1454 # confused as a rev; or, obviously, if the int is larger
1453 # than the value of the tip rev
1455 # than the value of the tip rev
1454 if test[0] == '0' or i > len(self):
1456 if test[0] == '0' or i > len(self):
1455 return True
1457 return True
1456 return False
1458 return False
1457 except ValueError:
1459 except ValueError:
1458 return True
1460 return True
1459 except error.RevlogError:
1461 except error.RevlogError:
1460 return False
1462 return False
1461 except error.WdirUnsupported:
1463 except error.WdirUnsupported:
1462 # single 'ff...' match
1464 # single 'ff...' match
1463 return True
1465 return True
1464
1466
1465 shortest = hexnode
1467 shortest = hexnode
1466 startlength = max(6, minlength)
1468 startlength = max(6, minlength)
1467 length = startlength
1469 length = startlength
1468 while True:
1470 while True:
1469 test = hexnode[:length]
1471 test = hexnode[:length]
1470 if isvalid(test):
1472 if isvalid(test):
1471 shortest = test
1473 shortest = test
1472 if length == minlength or length > startlength:
1474 if length == minlength or length > startlength:
1473 return shortest
1475 return shortest
1474 length -= 1
1476 length -= 1
1475 else:
1477 else:
1476 length += 1
1478 length += 1
1477 if len(shortest) <= length:
1479 if len(shortest) <= length:
1478 return shortest
1480 return shortest
1479
1481
1480 def cmp(self, node, text):
1482 def cmp(self, node, text):
1481 """compare text with a given file revision
1483 """compare text with a given file revision
1482
1484
1483 returns True if text is different than what is stored.
1485 returns True if text is different than what is stored.
1484 """
1486 """
1485 p1, p2 = self.parents(node)
1487 p1, p2 = self.parents(node)
1486 return hash(text, p1, p2) != node
1488 return hash(text, p1, p2) != node
1487
1489
1488 def _cachesegment(self, offset, data):
1490 def _cachesegment(self, offset, data):
1489 """Add a segment to the revlog cache.
1491 """Add a segment to the revlog cache.
1490
1492
1491 Accepts an absolute offset and the data that is at that location.
1493 Accepts an absolute offset and the data that is at that location.
1492 """
1494 """
1493 o, d = self._chunkcache
1495 o, d = self._chunkcache
1494 # try to add to existing cache
1496 # try to add to existing cache
1495 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1497 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1496 self._chunkcache = o, d + data
1498 self._chunkcache = o, d + data
1497 else:
1499 else:
1498 self._chunkcache = offset, data
1500 self._chunkcache = offset, data
1499
1501
1500 def _readsegment(self, offset, length, df=None):
1502 def _readsegment(self, offset, length, df=None):
1501 """Load a segment of raw data from the revlog.
1503 """Load a segment of raw data from the revlog.
1502
1504
1503 Accepts an absolute offset, length to read, and an optional existing
1505 Accepts an absolute offset, length to read, and an optional existing
1504 file handle to read from.
1506 file handle to read from.
1505
1507
1506 If an existing file handle is passed, it will be seeked and the
1508 If an existing file handle is passed, it will be seeked and the
1507 original seek position will NOT be restored.
1509 original seek position will NOT be restored.
1508
1510
1509 Returns a str or buffer of raw byte data.
1511 Returns a str or buffer of raw byte data.
1510 """
1512 """
1511 if df is not None:
1513 if df is not None:
1512 closehandle = False
1514 closehandle = False
1513 else:
1515 else:
1514 if self._inline:
1516 if self._inline:
1515 df = self.opener(self.indexfile)
1517 df = self.opener(self.indexfile)
1516 else:
1518 else:
1517 df = self.opener(self.datafile)
1519 df = self.opener(self.datafile)
1518 closehandle = True
1520 closehandle = True
1519
1521
1520 # Cache data both forward and backward around the requested
1522 # Cache data both forward and backward around the requested
1521 # data, in a fixed size window. This helps speed up operations
1523 # data, in a fixed size window. This helps speed up operations
1522 # involving reading the revlog backwards.
1524 # involving reading the revlog backwards.
1523 cachesize = self._chunkcachesize
1525 cachesize = self._chunkcachesize
1524 realoffset = offset & ~(cachesize - 1)
1526 realoffset = offset & ~(cachesize - 1)
1525 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1527 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1526 - realoffset)
1528 - realoffset)
1527 df.seek(realoffset)
1529 df.seek(realoffset)
1528 d = df.read(reallength)
1530 d = df.read(reallength)
1529 if closehandle:
1531 if closehandle:
1530 df.close()
1532 df.close()
1531 self._cachesegment(realoffset, d)
1533 self._cachesegment(realoffset, d)
1532 if offset != realoffset or reallength != length:
1534 if offset != realoffset or reallength != length:
1533 return util.buffer(d, offset - realoffset, length)
1535 return util.buffer(d, offset - realoffset, length)
1534 return d
1536 return d
1535
1537
1536 def _getsegment(self, offset, length, df=None):
1538 def _getsegment(self, offset, length, df=None):
1537 """Obtain a segment of raw data from the revlog.
1539 """Obtain a segment of raw data from the revlog.
1538
1540
1539 Accepts an absolute offset, length of bytes to obtain, and an
1541 Accepts an absolute offset, length of bytes to obtain, and an
1540 optional file handle to the already-opened revlog. If the file
1542 optional file handle to the already-opened revlog. If the file
1541 handle is used, it's original seek position will not be preserved.
1543 handle is used, it's original seek position will not be preserved.
1542
1544
1543 Requests for data may be returned from a cache.
1545 Requests for data may be returned from a cache.
1544
1546
1545 Returns a str or a buffer instance of raw byte data.
1547 Returns a str or a buffer instance of raw byte data.
1546 """
1548 """
1547 o, d = self._chunkcache
1549 o, d = self._chunkcache
1548 l = len(d)
1550 l = len(d)
1549
1551
1550 # is it in the cache?
1552 # is it in the cache?
1551 cachestart = offset - o
1553 cachestart = offset - o
1552 cacheend = cachestart + length
1554 cacheend = cachestart + length
1553 if cachestart >= 0 and cacheend <= l:
1555 if cachestart >= 0 and cacheend <= l:
1554 if cachestart == 0 and cacheend == l:
1556 if cachestart == 0 and cacheend == l:
1555 return d # avoid a copy
1557 return d # avoid a copy
1556 return util.buffer(d, cachestart, cacheend - cachestart)
1558 return util.buffer(d, cachestart, cacheend - cachestart)
1557
1559
1558 return self._readsegment(offset, length, df=df)
1560 return self._readsegment(offset, length, df=df)
1559
1561
1560 def _getsegmentforrevs(self, startrev, endrev, df=None):
1562 def _getsegmentforrevs(self, startrev, endrev, df=None):
1561 """Obtain a segment of raw data corresponding to a range of revisions.
1563 """Obtain a segment of raw data corresponding to a range of revisions.
1562
1564
1563 Accepts the start and end revisions and an optional already-open
1565 Accepts the start and end revisions and an optional already-open
1564 file handle to be used for reading. If the file handle is read, its
1566 file handle to be used for reading. If the file handle is read, its
1565 seek position will not be preserved.
1567 seek position will not be preserved.
1566
1568
1567 Requests for data may be satisfied by a cache.
1569 Requests for data may be satisfied by a cache.
1568
1570
1569 Returns a 2-tuple of (offset, data) for the requested range of
1571 Returns a 2-tuple of (offset, data) for the requested range of
1570 revisions. Offset is the integer offset from the beginning of the
1572 revisions. Offset is the integer offset from the beginning of the
1571 revlog and data is a str or buffer of the raw byte data.
1573 revlog and data is a str or buffer of the raw byte data.
1572
1574
1573 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1575 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1574 to determine where each revision's data begins and ends.
1576 to determine where each revision's data begins and ends.
1575 """
1577 """
1576 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1578 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1577 # (functions are expensive).
1579 # (functions are expensive).
1578 index = self.index
1580 index = self.index
1579 istart = index[startrev]
1581 istart = index[startrev]
1580 start = int(istart[0] >> 16)
1582 start = int(istart[0] >> 16)
1581 if startrev == endrev:
1583 if startrev == endrev:
1582 end = start + istart[1]
1584 end = start + istart[1]
1583 else:
1585 else:
1584 iend = index[endrev]
1586 iend = index[endrev]
1585 end = int(iend[0] >> 16) + iend[1]
1587 end = int(iend[0] >> 16) + iend[1]
1586
1588
1587 if self._inline:
1589 if self._inline:
1588 start += (startrev + 1) * self._io.size
1590 start += (startrev + 1) * self._io.size
1589 end += (endrev + 1) * self._io.size
1591 end += (endrev + 1) * self._io.size
1590 length = end - start
1592 length = end - start
1591
1593
1592 return start, self._getsegment(start, length, df=df)
1594 return start, self._getsegment(start, length, df=df)
1593
1595
1594 def _chunk(self, rev, df=None):
1596 def _chunk(self, rev, df=None):
1595 """Obtain a single decompressed chunk for a revision.
1597 """Obtain a single decompressed chunk for a revision.
1596
1598
1597 Accepts an integer revision and an optional already-open file handle
1599 Accepts an integer revision and an optional already-open file handle
1598 to be used for reading. If used, the seek position of the file will not
1600 to be used for reading. If used, the seek position of the file will not
1599 be preserved.
1601 be preserved.
1600
1602
1601 Returns a str holding uncompressed data for the requested revision.
1603 Returns a str holding uncompressed data for the requested revision.
1602 """
1604 """
1603 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1605 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1604
1606
1605 def _chunks(self, revs, df=None):
1607 def _chunks(self, revs, df=None):
1606 """Obtain decompressed chunks for the specified revisions.
1608 """Obtain decompressed chunks for the specified revisions.
1607
1609
1608 Accepts an iterable of numeric revisions that are assumed to be in
1610 Accepts an iterable of numeric revisions that are assumed to be in
1609 ascending order. Also accepts an optional already-open file handle
1611 ascending order. Also accepts an optional already-open file handle
1610 to be used for reading. If used, the seek position of the file will
1612 to be used for reading. If used, the seek position of the file will
1611 not be preserved.
1613 not be preserved.
1612
1614
1613 This function is similar to calling ``self._chunk()`` multiple times,
1615 This function is similar to calling ``self._chunk()`` multiple times,
1614 but is faster.
1616 but is faster.
1615
1617
1616 Returns a list with decompressed data for each requested revision.
1618 Returns a list with decompressed data for each requested revision.
1617 """
1619 """
1618 if not revs:
1620 if not revs:
1619 return []
1621 return []
1620 start = self.start
1622 start = self.start
1621 length = self.length
1623 length = self.length
1622 inline = self._inline
1624 inline = self._inline
1623 iosize = self._io.size
1625 iosize = self._io.size
1624 buffer = util.buffer
1626 buffer = util.buffer
1625
1627
1626 l = []
1628 l = []
1627 ladd = l.append
1629 ladd = l.append
1628
1630
1629 if not self._withsparseread:
1631 if not self._withsparseread:
1630 slicedchunks = (revs,)
1632 slicedchunks = (revs,)
1631 else:
1633 else:
1632 slicedchunks = _slicechunk(self, revs)
1634 slicedchunks = _slicechunk(self, revs)
1633
1635
1634 for revschunk in slicedchunks:
1636 for revschunk in slicedchunks:
1635 firstrev = revschunk[0]
1637 firstrev = revschunk[0]
1636 # Skip trailing revisions with empty diff
1638 # Skip trailing revisions with empty diff
1637 for lastrev in revschunk[::-1]:
1639 for lastrev in revschunk[::-1]:
1638 if length(lastrev) != 0:
1640 if length(lastrev) != 0:
1639 break
1641 break
1640
1642
1641 try:
1643 try:
1642 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1644 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1643 except OverflowError:
1645 except OverflowError:
1644 # issue4215 - we can't cache a run of chunks greater than
1646 # issue4215 - we can't cache a run of chunks greater than
1645 # 2G on Windows
1647 # 2G on Windows
1646 return [self._chunk(rev, df=df) for rev in revschunk]
1648 return [self._chunk(rev, df=df) for rev in revschunk]
1647
1649
1648 decomp = self.decompress
1650 decomp = self.decompress
1649 for rev in revschunk:
1651 for rev in revschunk:
1650 chunkstart = start(rev)
1652 chunkstart = start(rev)
1651 if inline:
1653 if inline:
1652 chunkstart += (rev + 1) * iosize
1654 chunkstart += (rev + 1) * iosize
1653 chunklength = length(rev)
1655 chunklength = length(rev)
1654 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1656 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1655
1657
1656 return l
1658 return l
1657
1659
1658 def _chunkclear(self):
1660 def _chunkclear(self):
1659 """Clear the raw chunk cache."""
1661 """Clear the raw chunk cache."""
1660 self._chunkcache = (0, '')
1662 self._chunkcache = (0, '')
1661
1663
1662 def deltaparent(self, rev):
1664 def deltaparent(self, rev):
1663 """return deltaparent of the given revision"""
1665 """return deltaparent of the given revision"""
1664 base = self.index[rev][3]
1666 base = self.index[rev][3]
1665 if base == rev:
1667 if base == rev:
1666 return nullrev
1668 return nullrev
1667 elif self._generaldelta:
1669 elif self._generaldelta:
1668 return base
1670 return base
1669 else:
1671 else:
1670 return rev - 1
1672 return rev - 1
1671
1673
1672 def revdiff(self, rev1, rev2):
1674 def revdiff(self, rev1, rev2):
1673 """return or calculate a delta between two revisions
1675 """return or calculate a delta between two revisions
1674
1676
1675 The delta calculated is in binary form and is intended to be written to
1677 The delta calculated is in binary form and is intended to be written to
1676 revlog data directly. So this function needs raw revision data.
1678 revlog data directly. So this function needs raw revision data.
1677 """
1679 """
1678 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1680 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1679 return bytes(self._chunk(rev2))
1681 return bytes(self._chunk(rev2))
1680
1682
1681 return mdiff.textdiff(self.revision(rev1, raw=True),
1683 return mdiff.textdiff(self.revision(rev1, raw=True),
1682 self.revision(rev2, raw=True))
1684 self.revision(rev2, raw=True))
1683
1685
1684 def revision(self, nodeorrev, _df=None, raw=False):
1686 def revision(self, nodeorrev, _df=None, raw=False):
1685 """return an uncompressed revision of a given node or revision
1687 """return an uncompressed revision of a given node or revision
1686 number.
1688 number.
1687
1689
1688 _df - an existing file handle to read from. (internal-only)
1690 _df - an existing file handle to read from. (internal-only)
1689 raw - an optional argument specifying if the revision data is to be
1691 raw - an optional argument specifying if the revision data is to be
1690 treated as raw data when applying flag transforms. 'raw' should be set
1692 treated as raw data when applying flag transforms. 'raw' should be set
1691 to True when generating changegroups or in debug commands.
1693 to True when generating changegroups or in debug commands.
1692 """
1694 """
1693 if isinstance(nodeorrev, int):
1695 if isinstance(nodeorrev, int):
1694 rev = nodeorrev
1696 rev = nodeorrev
1695 node = self.node(rev)
1697 node = self.node(rev)
1696 else:
1698 else:
1697 node = nodeorrev
1699 node = nodeorrev
1698 rev = None
1700 rev = None
1699
1701
1700 cachedrev = None
1702 cachedrev = None
1701 flags = None
1703 flags = None
1702 rawtext = None
1704 rawtext = None
1703 if node == nullid:
1705 if node == nullid:
1704 return ""
1706 return ""
1705 if self._cache:
1707 if self._cache:
1706 if self._cache[0] == node:
1708 if self._cache[0] == node:
1707 # _cache only stores rawtext
1709 # _cache only stores rawtext
1708 if raw:
1710 if raw:
1709 return self._cache[2]
1711 return self._cache[2]
1710 # duplicated, but good for perf
1712 # duplicated, but good for perf
1711 if rev is None:
1713 if rev is None:
1712 rev = self.rev(node)
1714 rev = self.rev(node)
1713 if flags is None:
1715 if flags is None:
1714 flags = self.flags(rev)
1716 flags = self.flags(rev)
1715 # no extra flags set, no flag processor runs, text = rawtext
1717 # no extra flags set, no flag processor runs, text = rawtext
1716 if flags == REVIDX_DEFAULT_FLAGS:
1718 if flags == REVIDX_DEFAULT_FLAGS:
1717 return self._cache[2]
1719 return self._cache[2]
1718 # rawtext is reusable. need to run flag processor
1720 # rawtext is reusable. need to run flag processor
1719 rawtext = self._cache[2]
1721 rawtext = self._cache[2]
1720
1722
1721 cachedrev = self._cache[1]
1723 cachedrev = self._cache[1]
1722
1724
1723 # look up what we need to read
1725 # look up what we need to read
1724 if rawtext is None:
1726 if rawtext is None:
1725 if rev is None:
1727 if rev is None:
1726 rev = self.rev(node)
1728 rev = self.rev(node)
1727
1729
1728 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1730 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1729 if stopped:
1731 if stopped:
1730 rawtext = self._cache[2]
1732 rawtext = self._cache[2]
1731
1733
1732 # drop cache to save memory
1734 # drop cache to save memory
1733 self._cache = None
1735 self._cache = None
1734
1736
1735 bins = self._chunks(chain, df=_df)
1737 bins = self._chunks(chain, df=_df)
1736 if rawtext is None:
1738 if rawtext is None:
1737 rawtext = bytes(bins[0])
1739 rawtext = bytes(bins[0])
1738 bins = bins[1:]
1740 bins = bins[1:]
1739
1741
1740 rawtext = mdiff.patches(rawtext, bins)
1742 rawtext = mdiff.patches(rawtext, bins)
1741 self._cache = (node, rev, rawtext)
1743 self._cache = (node, rev, rawtext)
1742
1744
1743 if flags is None:
1745 if flags is None:
1744 if rev is None:
1746 if rev is None:
1745 rev = self.rev(node)
1747 rev = self.rev(node)
1746 flags = self.flags(rev)
1748 flags = self.flags(rev)
1747
1749
1748 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1750 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1749 if validatehash:
1751 if validatehash:
1750 self.checkhash(text, node, rev=rev)
1752 self.checkhash(text, node, rev=rev)
1751
1753
1752 return text
1754 return text
1753
1755
1754 def hash(self, text, p1, p2):
1756 def hash(self, text, p1, p2):
1755 """Compute a node hash.
1757 """Compute a node hash.
1756
1758
1757 Available as a function so that subclasses can replace the hash
1759 Available as a function so that subclasses can replace the hash
1758 as needed.
1760 as needed.
1759 """
1761 """
1760 return hash(text, p1, p2)
1762 return hash(text, p1, p2)
1761
1763
1762 def _processflags(self, text, flags, operation, raw=False):
1764 def _processflags(self, text, flags, operation, raw=False):
1763 """Inspect revision data flags and applies transforms defined by
1765 """Inspect revision data flags and applies transforms defined by
1764 registered flag processors.
1766 registered flag processors.
1765
1767
1766 ``text`` - the revision data to process
1768 ``text`` - the revision data to process
1767 ``flags`` - the revision flags
1769 ``flags`` - the revision flags
1768 ``operation`` - the operation being performed (read or write)
1770 ``operation`` - the operation being performed (read or write)
1769 ``raw`` - an optional argument describing if the raw transform should be
1771 ``raw`` - an optional argument describing if the raw transform should be
1770 applied.
1772 applied.
1771
1773
1772 This method processes the flags in the order (or reverse order if
1774 This method processes the flags in the order (or reverse order if
1773 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1775 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1774 flag processors registered for present flags. The order of flags defined
1776 flag processors registered for present flags. The order of flags defined
1775 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1777 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1776
1778
1777 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1779 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1778 processed text and ``validatehash`` is a bool indicating whether the
1780 processed text and ``validatehash`` is a bool indicating whether the
1779 returned text should be checked for hash integrity.
1781 returned text should be checked for hash integrity.
1780
1782
1781 Note: If the ``raw`` argument is set, it has precedence over the
1783 Note: If the ``raw`` argument is set, it has precedence over the
1782 operation and will only update the value of ``validatehash``.
1784 operation and will only update the value of ``validatehash``.
1783 """
1785 """
1784 # fast path: no flag processors will run
1786 # fast path: no flag processors will run
1785 if flags == 0:
1787 if flags == 0:
1786 return text, True
1788 return text, True
1787 if not operation in ('read', 'write'):
1789 if not operation in ('read', 'write'):
1788 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1790 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1789 # Check all flags are known.
1791 # Check all flags are known.
1790 if flags & ~REVIDX_KNOWN_FLAGS:
1792 if flags & ~REVIDX_KNOWN_FLAGS:
1791 raise RevlogError(_("incompatible revision flag '%#x'") %
1793 raise RevlogError(_("incompatible revision flag '%#x'") %
1792 (flags & ~REVIDX_KNOWN_FLAGS))
1794 (flags & ~REVIDX_KNOWN_FLAGS))
1793 validatehash = True
1795 validatehash = True
1794 # Depending on the operation (read or write), the order might be
1796 # Depending on the operation (read or write), the order might be
1795 # reversed due to non-commutative transforms.
1797 # reversed due to non-commutative transforms.
1796 orderedflags = REVIDX_FLAGS_ORDER
1798 orderedflags = REVIDX_FLAGS_ORDER
1797 if operation == 'write':
1799 if operation == 'write':
1798 orderedflags = reversed(orderedflags)
1800 orderedflags = reversed(orderedflags)
1799
1801
1800 for flag in orderedflags:
1802 for flag in orderedflags:
1801 # If a flagprocessor has been registered for a known flag, apply the
1803 # If a flagprocessor has been registered for a known flag, apply the
1802 # related operation transform and update result tuple.
1804 # related operation transform and update result tuple.
1803 if flag & flags:
1805 if flag & flags:
1804 vhash = True
1806 vhash = True
1805
1807
1806 if flag not in _flagprocessors:
1808 if flag not in _flagprocessors:
1807 message = _("missing processor for flag '%#x'") % (flag)
1809 message = _("missing processor for flag '%#x'") % (flag)
1808 raise RevlogError(message)
1810 raise RevlogError(message)
1809
1811
1810 processor = _flagprocessors[flag]
1812 processor = _flagprocessors[flag]
1811 if processor is not None:
1813 if processor is not None:
1812 readtransform, writetransform, rawtransform = processor
1814 readtransform, writetransform, rawtransform = processor
1813
1815
1814 if raw:
1816 if raw:
1815 vhash = rawtransform(self, text)
1817 vhash = rawtransform(self, text)
1816 elif operation == 'read':
1818 elif operation == 'read':
1817 text, vhash = readtransform(self, text)
1819 text, vhash = readtransform(self, text)
1818 else: # write operation
1820 else: # write operation
1819 text, vhash = writetransform(self, text)
1821 text, vhash = writetransform(self, text)
1820 validatehash = validatehash and vhash
1822 validatehash = validatehash and vhash
1821
1823
1822 return text, validatehash
1824 return text, validatehash
1823
1825
1824 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1826 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1825 """Check node hash integrity.
1827 """Check node hash integrity.
1826
1828
1827 Available as a function so that subclasses can extend hash mismatch
1829 Available as a function so that subclasses can extend hash mismatch
1828 behaviors as needed.
1830 behaviors as needed.
1829 """
1831 """
1830 if p1 is None and p2 is None:
1832 if p1 is None and p2 is None:
1831 p1, p2 = self.parents(node)
1833 p1, p2 = self.parents(node)
1832 if node != self.hash(text, p1, p2):
1834 if node != self.hash(text, p1, p2):
1833 revornode = rev
1835 revornode = rev
1834 if revornode is None:
1836 if revornode is None:
1835 revornode = templatefilters.short(hex(node))
1837 revornode = templatefilters.short(hex(node))
1836 raise RevlogError(_("integrity check failed on %s:%s")
1838 raise RevlogError(_("integrity check failed on %s:%s")
1837 % (self.indexfile, pycompat.bytestr(revornode)))
1839 % (self.indexfile, pycompat.bytestr(revornode)))
1838
1840
1839 def checkinlinesize(self, tr, fp=None):
1841 def checkinlinesize(self, tr, fp=None):
1840 """Check if the revlog is too big for inline and convert if so.
1842 """Check if the revlog is too big for inline and convert if so.
1841
1843
1842 This should be called after revisions are added to the revlog. If the
1844 This should be called after revisions are added to the revlog. If the
1843 revlog has grown too large to be an inline revlog, it will convert it
1845 revlog has grown too large to be an inline revlog, it will convert it
1844 to use multiple index and data files.
1846 to use multiple index and data files.
1845 """
1847 """
1846 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1848 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1847 return
1849 return
1848
1850
1849 trinfo = tr.find(self.indexfile)
1851 trinfo = tr.find(self.indexfile)
1850 if trinfo is None:
1852 if trinfo is None:
1851 raise RevlogError(_("%s not found in the transaction")
1853 raise RevlogError(_("%s not found in the transaction")
1852 % self.indexfile)
1854 % self.indexfile)
1853
1855
1854 trindex = trinfo[2]
1856 trindex = trinfo[2]
1855 if trindex is not None:
1857 if trindex is not None:
1856 dataoff = self.start(trindex)
1858 dataoff = self.start(trindex)
1857 else:
1859 else:
1858 # revlog was stripped at start of transaction, use all leftover data
1860 # revlog was stripped at start of transaction, use all leftover data
1859 trindex = len(self) - 1
1861 trindex = len(self) - 1
1860 dataoff = self.end(-2)
1862 dataoff = self.end(-2)
1861
1863
1862 tr.add(self.datafile, dataoff)
1864 tr.add(self.datafile, dataoff)
1863
1865
1864 if fp:
1866 if fp:
1865 fp.flush()
1867 fp.flush()
1866 fp.close()
1868 fp.close()
1867
1869
1868 df = self.opener(self.datafile, 'w')
1870 df = self.opener(self.datafile, 'w')
1869 try:
1871 try:
1870 for r in self:
1872 for r in self:
1871 df.write(self._getsegmentforrevs(r, r)[1])
1873 df.write(self._getsegmentforrevs(r, r)[1])
1872 finally:
1874 finally:
1873 df.close()
1875 df.close()
1874
1876
1875 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1877 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1876 checkambig=self._checkambig)
1878 checkambig=self._checkambig)
1877 self.version &= ~FLAG_INLINE_DATA
1879 self.version &= ~FLAG_INLINE_DATA
1878 self._inline = False
1880 self._inline = False
1879 for i in self:
1881 for i in self:
1880 e = self._io.packentry(self.index[i], self.node, self.version, i)
1882 e = self._io.packentry(self.index[i], self.node, self.version, i)
1881 fp.write(e)
1883 fp.write(e)
1882
1884
1883 # if we don't call close, the temp file will never replace the
1885 # if we don't call close, the temp file will never replace the
1884 # real index
1886 # real index
1885 fp.close()
1887 fp.close()
1886
1888
1887 tr.replace(self.indexfile, trindex * self._io.size)
1889 tr.replace(self.indexfile, trindex * self._io.size)
1888 self._chunkclear()
1890 self._chunkclear()
1889
1891
1890 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1892 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1891 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1893 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1892 """add a revision to the log
1894 """add a revision to the log
1893
1895
1894 text - the revision data to add
1896 text - the revision data to add
1895 transaction - the transaction object used for rollback
1897 transaction - the transaction object used for rollback
1896 link - the linkrev data to add
1898 link - the linkrev data to add
1897 p1, p2 - the parent nodeids of the revision
1899 p1, p2 - the parent nodeids of the revision
1898 cachedelta - an optional precomputed delta
1900 cachedelta - an optional precomputed delta
1899 node - nodeid of revision; typically node is not specified, and it is
1901 node - nodeid of revision; typically node is not specified, and it is
1900 computed by default as hash(text, p1, p2), however subclasses might
1902 computed by default as hash(text, p1, p2), however subclasses might
1901 use different hashing method (and override checkhash() in such case)
1903 use different hashing method (and override checkhash() in such case)
1902 flags - the known flags to set on the revision
1904 flags - the known flags to set on the revision
1903 deltacomputer - an optional _deltacomputer instance shared between
1905 deltacomputer - an optional _deltacomputer instance shared between
1904 multiple calls
1906 multiple calls
1905 """
1907 """
1906 if link == nullrev:
1908 if link == nullrev:
1907 raise RevlogError(_("attempted to add linkrev -1 to %s")
1909 raise RevlogError(_("attempted to add linkrev -1 to %s")
1908 % self.indexfile)
1910 % self.indexfile)
1909
1911
1910 if flags:
1912 if flags:
1911 node = node or self.hash(text, p1, p2)
1913 node = node or self.hash(text, p1, p2)
1912
1914
1913 rawtext, validatehash = self._processflags(text, flags, 'write')
1915 rawtext, validatehash = self._processflags(text, flags, 'write')
1914
1916
1915 # If the flag processor modifies the revision data, ignore any provided
1917 # If the flag processor modifies the revision data, ignore any provided
1916 # cachedelta.
1918 # cachedelta.
1917 if rawtext != text:
1919 if rawtext != text:
1918 cachedelta = None
1920 cachedelta = None
1919
1921
1920 if len(rawtext) > _maxentrysize:
1922 if len(rawtext) > _maxentrysize:
1921 raise RevlogError(
1923 raise RevlogError(
1922 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1924 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1923 % (self.indexfile, len(rawtext)))
1925 % (self.indexfile, len(rawtext)))
1924
1926
1925 node = node or self.hash(rawtext, p1, p2)
1927 node = node or self.hash(rawtext, p1, p2)
1926 if node in self.nodemap:
1928 if node in self.nodemap:
1927 return node
1929 return node
1928
1930
1929 if validatehash:
1931 if validatehash:
1930 self.checkhash(rawtext, node, p1=p1, p2=p2)
1932 self.checkhash(rawtext, node, p1=p1, p2=p2)
1931
1933
1932 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1934 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1933 flags, cachedelta=cachedelta,
1935 flags, cachedelta=cachedelta,
1934 deltacomputer=deltacomputer)
1936 deltacomputer=deltacomputer)
1935
1937
1936 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1938 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1937 cachedelta=None, deltacomputer=None):
1939 cachedelta=None, deltacomputer=None):
1938 """add a raw revision with known flags, node and parents
1940 """add a raw revision with known flags, node and parents
1939 useful when reusing a revision not stored in this revlog (ex: received
1941 useful when reusing a revision not stored in this revlog (ex: received
1940 over wire, or read from an external bundle).
1942 over wire, or read from an external bundle).
1941 """
1943 """
1942 dfh = None
1944 dfh = None
1943 if not self._inline:
1945 if not self._inline:
1944 dfh = self.opener(self.datafile, "a+")
1946 dfh = self.opener(self.datafile, "a+")
1945 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1947 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1946 try:
1948 try:
1947 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1949 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1948 flags, cachedelta, ifh, dfh,
1950 flags, cachedelta, ifh, dfh,
1949 deltacomputer=deltacomputer)
1951 deltacomputer=deltacomputer)
1950 finally:
1952 finally:
1951 if dfh:
1953 if dfh:
1952 dfh.close()
1954 dfh.close()
1953 ifh.close()
1955 ifh.close()
1954
1956
1955 def compress(self, data):
1957 def compress(self, data):
1956 """Generate a possibly-compressed representation of data."""
1958 """Generate a possibly-compressed representation of data."""
1957 if not data:
1959 if not data:
1958 return '', data
1960 return '', data
1959
1961
1960 compressed = self._compressor.compress(data)
1962 compressed = self._compressor.compress(data)
1961
1963
1962 if compressed:
1964 if compressed:
1963 # The revlog compressor added the header in the returned data.
1965 # The revlog compressor added the header in the returned data.
1964 return '', compressed
1966 return '', compressed
1965
1967
1966 if data[0:1] == '\0':
1968 if data[0:1] == '\0':
1967 return '', data
1969 return '', data
1968 return 'u', data
1970 return 'u', data
1969
1971
1970 def decompress(self, data):
1972 def decompress(self, data):
1971 """Decompress a revlog chunk.
1973 """Decompress a revlog chunk.
1972
1974
1973 The chunk is expected to begin with a header identifying the
1975 The chunk is expected to begin with a header identifying the
1974 format type so it can be routed to an appropriate decompressor.
1976 format type so it can be routed to an appropriate decompressor.
1975 """
1977 """
1976 if not data:
1978 if not data:
1977 return data
1979 return data
1978
1980
1979 # Revlogs are read much more frequently than they are written and many
1981 # Revlogs are read much more frequently than they are written and many
1980 # chunks only take microseconds to decompress, so performance is
1982 # chunks only take microseconds to decompress, so performance is
1981 # important here.
1983 # important here.
1982 #
1984 #
1983 # We can make a few assumptions about revlogs:
1985 # We can make a few assumptions about revlogs:
1984 #
1986 #
1985 # 1) the majority of chunks will be compressed (as opposed to inline
1987 # 1) the majority of chunks will be compressed (as opposed to inline
1986 # raw data).
1988 # raw data).
1987 # 2) decompressing *any* data will likely by at least 10x slower than
1989 # 2) decompressing *any* data will likely by at least 10x slower than
1988 # returning raw inline data.
1990 # returning raw inline data.
1989 # 3) we want to prioritize common and officially supported compression
1991 # 3) we want to prioritize common and officially supported compression
1990 # engines
1992 # engines
1991 #
1993 #
1992 # It follows that we want to optimize for "decompress compressed data
1994 # It follows that we want to optimize for "decompress compressed data
1993 # when encoded with common and officially supported compression engines"
1995 # when encoded with common and officially supported compression engines"
1994 # case over "raw data" and "data encoded by less common or non-official
1996 # case over "raw data" and "data encoded by less common or non-official
1995 # compression engines." That is why we have the inline lookup first
1997 # compression engines." That is why we have the inline lookup first
1996 # followed by the compengines lookup.
1998 # followed by the compengines lookup.
1997 #
1999 #
1998 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2000 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1999 # compressed chunks. And this matters for changelog and manifest reads.
2001 # compressed chunks. And this matters for changelog and manifest reads.
2000 t = data[0:1]
2002 t = data[0:1]
2001
2003
2002 if t == 'x':
2004 if t == 'x':
2003 try:
2005 try:
2004 return _zlibdecompress(data)
2006 return _zlibdecompress(data)
2005 except zlib.error as e:
2007 except zlib.error as e:
2006 raise RevlogError(_('revlog decompress error: %s') % str(e))
2008 raise RevlogError(_('revlog decompress error: %s') % str(e))
2007 # '\0' is more common than 'u' so it goes first.
2009 # '\0' is more common than 'u' so it goes first.
2008 elif t == '\0':
2010 elif t == '\0':
2009 return data
2011 return data
2010 elif t == 'u':
2012 elif t == 'u':
2011 return util.buffer(data, 1)
2013 return util.buffer(data, 1)
2012
2014
2013 try:
2015 try:
2014 compressor = self._decompressors[t]
2016 compressor = self._decompressors[t]
2015 except KeyError:
2017 except KeyError:
2016 try:
2018 try:
2017 engine = util.compengines.forrevlogheader(t)
2019 engine = util.compengines.forrevlogheader(t)
2018 compressor = engine.revlogcompressor()
2020 compressor = engine.revlogcompressor()
2019 self._decompressors[t] = compressor
2021 self._decompressors[t] = compressor
2020 except KeyError:
2022 except KeyError:
2021 raise RevlogError(_('unknown compression type %r') % t)
2023 raise RevlogError(_('unknown compression type %r') % t)
2022
2024
2023 return compressor.decompress(data)
2025 return compressor.decompress(data)
2024
2026
2025 def _isgooddeltainfo(self, d, textlen):
2027 def _isgooddeltainfo(self, d, textlen):
2026 """Returns True if the given delta is good. Good means that it is within
2028 """Returns True if the given delta is good. Good means that it is within
2027 the disk span, disk size, and chain length bounds that we know to be
2029 the disk span, disk size, and chain length bounds that we know to be
2028 performant."""
2030 performant."""
2029 if d is None:
2031 if d is None:
2030 return False
2032 return False
2031
2033
2032 # - 'd.distance' is the distance from the base revision -- bounding it
2034 # - 'd.distance' is the distance from the base revision -- bounding it
2033 # limits the amount of I/O we need to do.
2035 # limits the amount of I/O we need to do.
2034 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2036 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2035 # need to apply -- bounding it limits the amount of CPU we consume.
2037 # need to apply -- bounding it limits the amount of CPU we consume.
2036
2038
2037 defaultmax = textlen * 4
2039 defaultmax = textlen * 4
2038 maxdist = self._maxdeltachainspan
2040 maxdist = self._maxdeltachainspan
2039 if not maxdist:
2041 if not maxdist:
2040 maxdist = d.distance # ensure the conditional pass
2042 maxdist = d.distance # ensure the conditional pass
2041 maxdist = max(maxdist, defaultmax)
2043 maxdist = max(maxdist, defaultmax)
2042 if (d.distance > maxdist or d.deltalen > textlen or
2044 if (d.distance > maxdist or d.deltalen > textlen or
2043 d.compresseddeltalen > textlen * 2 or
2045 d.compresseddeltalen > textlen * 2 or
2044 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2046 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2045 return False
2047 return False
2046
2048
2047 return True
2049 return True
2048
2050
2049 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2051 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2050 cachedelta, ifh, dfh, alwayscache=False,
2052 cachedelta, ifh, dfh, alwayscache=False,
2051 deltacomputer=None):
2053 deltacomputer=None):
2052 """internal function to add revisions to the log
2054 """internal function to add revisions to the log
2053
2055
2054 see addrevision for argument descriptions.
2056 see addrevision for argument descriptions.
2055
2057
2056 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2058 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2057
2059
2058 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2060 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2059 be used.
2061 be used.
2060
2062
2061 invariants:
2063 invariants:
2062 - rawtext is optional (can be None); if not set, cachedelta must be set.
2064 - rawtext is optional (can be None); if not set, cachedelta must be set.
2063 if both are set, they must correspond to each other.
2065 if both are set, they must correspond to each other.
2064 """
2066 """
2065 if node == nullid:
2067 if node == nullid:
2066 raise RevlogError(_("%s: attempt to add null revision") %
2068 raise RevlogError(_("%s: attempt to add null revision") %
2067 (self.indexfile))
2069 (self.indexfile))
2068 if node == wdirid:
2070 if node == wdirid:
2069 raise RevlogError(_("%s: attempt to add wdir revision") %
2071 raise RevlogError(_("%s: attempt to add wdir revision") %
2070 (self.indexfile))
2072 (self.indexfile))
2071
2073
2072 if self._inline:
2074 if self._inline:
2073 fh = ifh
2075 fh = ifh
2074 else:
2076 else:
2075 fh = dfh
2077 fh = dfh
2076
2078
2077 btext = [rawtext]
2079 btext = [rawtext]
2078
2080
2079 curr = len(self)
2081 curr = len(self)
2080 prev = curr - 1
2082 prev = curr - 1
2081 offset = self.end(prev)
2083 offset = self.end(prev)
2082 p1r, p2r = self.rev(p1), self.rev(p2)
2084 p1r, p2r = self.rev(p1), self.rev(p2)
2083
2085
2084 # full versions are inserted when the needed deltas
2086 # full versions are inserted when the needed deltas
2085 # become comparable to the uncompressed text
2087 # become comparable to the uncompressed text
2086 if rawtext is None:
2088 if rawtext is None:
2087 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2089 # need rawtext size, before changed by flag processors, which is
2090 # the non-raw size. use revlog explicitly to avoid filelog's extra
2091 # logic that might remove metadata size.
2092 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2088 cachedelta[1])
2093 cachedelta[1])
2089 else:
2094 else:
2090 textlen = len(rawtext)
2095 textlen = len(rawtext)
2091
2096
2092 if deltacomputer is None:
2097 if deltacomputer is None:
2093 deltacomputer = _deltacomputer(self)
2098 deltacomputer = _deltacomputer(self)
2094
2099
2095 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2100 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2096
2101
2097 # no delta for flag processor revision (see "candelta" for why)
2102 # no delta for flag processor revision (see "candelta" for why)
2098 # not calling candelta since only one revision needs test, also to
2103 # not calling candelta since only one revision needs test, also to
2099 # avoid overhead fetching flags again.
2104 # avoid overhead fetching flags again.
2100 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2105 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2101 deltainfo = None
2106 deltainfo = None
2102 else:
2107 else:
2103 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2108 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2104
2109
2105 if deltainfo is not None:
2110 if deltainfo is not None:
2106 base = deltainfo.base
2111 base = deltainfo.base
2107 chainbase = deltainfo.chainbase
2112 chainbase = deltainfo.chainbase
2108 data = deltainfo.data
2113 data = deltainfo.data
2109 l = deltainfo.deltalen
2114 l = deltainfo.deltalen
2110 else:
2115 else:
2111 rawtext = deltacomputer.buildtext(revinfo, fh)
2116 rawtext = deltacomputer.buildtext(revinfo, fh)
2112 data = self.compress(rawtext)
2117 data = self.compress(rawtext)
2113 l = len(data[1]) + len(data[0])
2118 l = len(data[1]) + len(data[0])
2114 base = chainbase = curr
2119 base = chainbase = curr
2115
2120
2116 e = (offset_type(offset, flags), l, textlen,
2121 e = (offset_type(offset, flags), l, textlen,
2117 base, link, p1r, p2r, node)
2122 base, link, p1r, p2r, node)
2118 self.index.insert(-1, e)
2123 self.index.insert(-1, e)
2119 self.nodemap[node] = curr
2124 self.nodemap[node] = curr
2120
2125
2121 entry = self._io.packentry(e, self.node, self.version, curr)
2126 entry = self._io.packentry(e, self.node, self.version, curr)
2122 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2127 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2123
2128
2124 if alwayscache and rawtext is None:
2129 if alwayscache and rawtext is None:
2125 rawtext = deltacomputer._buildtext(revinfo, fh)
2130 rawtext = deltacomputer._buildtext(revinfo, fh)
2126
2131
2127 if type(rawtext) == str: # only accept immutable objects
2132 if type(rawtext) == str: # only accept immutable objects
2128 self._cache = (node, curr, rawtext)
2133 self._cache = (node, curr, rawtext)
2129 self._chainbasecache[curr] = chainbase
2134 self._chainbasecache[curr] = chainbase
2130 return node
2135 return node
2131
2136
2132 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2137 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2133 # Files opened in a+ mode have inconsistent behavior on various
2138 # Files opened in a+ mode have inconsistent behavior on various
2134 # platforms. Windows requires that a file positioning call be made
2139 # platforms. Windows requires that a file positioning call be made
2135 # when the file handle transitions between reads and writes. See
2140 # when the file handle transitions between reads and writes. See
2136 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2141 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2137 # platforms, Python or the platform itself can be buggy. Some versions
2142 # platforms, Python or the platform itself can be buggy. Some versions
2138 # of Solaris have been observed to not append at the end of the file
2143 # of Solaris have been observed to not append at the end of the file
2139 # if the file was seeked to before the end. See issue4943 for more.
2144 # if the file was seeked to before the end. See issue4943 for more.
2140 #
2145 #
2141 # We work around this issue by inserting a seek() before writing.
2146 # We work around this issue by inserting a seek() before writing.
2142 # Note: This is likely not necessary on Python 3.
2147 # Note: This is likely not necessary on Python 3.
2143 ifh.seek(0, os.SEEK_END)
2148 ifh.seek(0, os.SEEK_END)
2144 if dfh:
2149 if dfh:
2145 dfh.seek(0, os.SEEK_END)
2150 dfh.seek(0, os.SEEK_END)
2146
2151
2147 curr = len(self) - 1
2152 curr = len(self) - 1
2148 if not self._inline:
2153 if not self._inline:
2149 transaction.add(self.datafile, offset)
2154 transaction.add(self.datafile, offset)
2150 transaction.add(self.indexfile, curr * len(entry))
2155 transaction.add(self.indexfile, curr * len(entry))
2151 if data[0]:
2156 if data[0]:
2152 dfh.write(data[0])
2157 dfh.write(data[0])
2153 dfh.write(data[1])
2158 dfh.write(data[1])
2154 ifh.write(entry)
2159 ifh.write(entry)
2155 else:
2160 else:
2156 offset += curr * self._io.size
2161 offset += curr * self._io.size
2157 transaction.add(self.indexfile, offset, curr)
2162 transaction.add(self.indexfile, offset, curr)
2158 ifh.write(entry)
2163 ifh.write(entry)
2159 ifh.write(data[0])
2164 ifh.write(data[0])
2160 ifh.write(data[1])
2165 ifh.write(data[1])
2161 self.checkinlinesize(transaction, ifh)
2166 self.checkinlinesize(transaction, ifh)
2162
2167
2163 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2168 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2164 """
2169 """
2165 add a delta group
2170 add a delta group
2166
2171
2167 given a set of deltas, add them to the revision log. the
2172 given a set of deltas, add them to the revision log. the
2168 first delta is against its parent, which should be in our
2173 first delta is against its parent, which should be in our
2169 log, the rest are against the previous delta.
2174 log, the rest are against the previous delta.
2170
2175
2171 If ``addrevisioncb`` is defined, it will be called with arguments of
2176 If ``addrevisioncb`` is defined, it will be called with arguments of
2172 this revlog and the node that was added.
2177 this revlog and the node that was added.
2173 """
2178 """
2174
2179
2175 nodes = []
2180 nodes = []
2176
2181
2177 r = len(self)
2182 r = len(self)
2178 end = 0
2183 end = 0
2179 if r:
2184 if r:
2180 end = self.end(r - 1)
2185 end = self.end(r - 1)
2181 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2186 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2182 isize = r * self._io.size
2187 isize = r * self._io.size
2183 if self._inline:
2188 if self._inline:
2184 transaction.add(self.indexfile, end + isize, r)
2189 transaction.add(self.indexfile, end + isize, r)
2185 dfh = None
2190 dfh = None
2186 else:
2191 else:
2187 transaction.add(self.indexfile, isize, r)
2192 transaction.add(self.indexfile, isize, r)
2188 transaction.add(self.datafile, end)
2193 transaction.add(self.datafile, end)
2189 dfh = self.opener(self.datafile, "a+")
2194 dfh = self.opener(self.datafile, "a+")
2190 def flush():
2195 def flush():
2191 if dfh:
2196 if dfh:
2192 dfh.flush()
2197 dfh.flush()
2193 ifh.flush()
2198 ifh.flush()
2194 try:
2199 try:
2195 deltacomputer = _deltacomputer(self)
2200 deltacomputer = _deltacomputer(self)
2196 # loop through our set of deltas
2201 # loop through our set of deltas
2197 for data in deltas:
2202 for data in deltas:
2198 node, p1, p2, linknode, deltabase, delta, flags = data
2203 node, p1, p2, linknode, deltabase, delta, flags = data
2199 link = linkmapper(linknode)
2204 link = linkmapper(linknode)
2200 flags = flags or REVIDX_DEFAULT_FLAGS
2205 flags = flags or REVIDX_DEFAULT_FLAGS
2201
2206
2202 nodes.append(node)
2207 nodes.append(node)
2203
2208
2204 if node in self.nodemap:
2209 if node in self.nodemap:
2205 # this can happen if two branches make the same change
2210 # this can happen if two branches make the same change
2206 continue
2211 continue
2207
2212
2208 for p in (p1, p2):
2213 for p in (p1, p2):
2209 if p not in self.nodemap:
2214 if p not in self.nodemap:
2210 raise LookupError(p, self.indexfile,
2215 raise LookupError(p, self.indexfile,
2211 _('unknown parent'))
2216 _('unknown parent'))
2212
2217
2213 if deltabase not in self.nodemap:
2218 if deltabase not in self.nodemap:
2214 raise LookupError(deltabase, self.indexfile,
2219 raise LookupError(deltabase, self.indexfile,
2215 _('unknown delta base'))
2220 _('unknown delta base'))
2216
2221
2217 baserev = self.rev(deltabase)
2222 baserev = self.rev(deltabase)
2218
2223
2219 if baserev != nullrev and self.iscensored(baserev):
2224 if baserev != nullrev and self.iscensored(baserev):
2220 # if base is censored, delta must be full replacement in a
2225 # if base is censored, delta must be full replacement in a
2221 # single patch operation
2226 # single patch operation
2222 hlen = struct.calcsize(">lll")
2227 hlen = struct.calcsize(">lll")
2223 oldlen = self.rawsize(baserev)
2228 oldlen = self.rawsize(baserev)
2224 newlen = len(delta) - hlen
2229 newlen = len(delta) - hlen
2225 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2230 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2226 raise error.CensoredBaseError(self.indexfile,
2231 raise error.CensoredBaseError(self.indexfile,
2227 self.node(baserev))
2232 self.node(baserev))
2228
2233
2229 if not flags and self._peek_iscensored(baserev, delta, flush):
2234 if not flags and self._peek_iscensored(baserev, delta, flush):
2230 flags |= REVIDX_ISCENSORED
2235 flags |= REVIDX_ISCENSORED
2231
2236
2232 # We assume consumers of addrevisioncb will want to retrieve
2237 # We assume consumers of addrevisioncb will want to retrieve
2233 # the added revision, which will require a call to
2238 # the added revision, which will require a call to
2234 # revision(). revision() will fast path if there is a cache
2239 # revision(). revision() will fast path if there is a cache
2235 # hit. So, we tell _addrevision() to always cache in this case.
2240 # hit. So, we tell _addrevision() to always cache in this case.
2236 # We're only using addgroup() in the context of changegroup
2241 # We're only using addgroup() in the context of changegroup
2237 # generation so the revision data can always be handled as raw
2242 # generation so the revision data can always be handled as raw
2238 # by the flagprocessor.
2243 # by the flagprocessor.
2239 self._addrevision(node, None, transaction, link,
2244 self._addrevision(node, None, transaction, link,
2240 p1, p2, flags, (baserev, delta),
2245 p1, p2, flags, (baserev, delta),
2241 ifh, dfh,
2246 ifh, dfh,
2242 alwayscache=bool(addrevisioncb),
2247 alwayscache=bool(addrevisioncb),
2243 deltacomputer=deltacomputer)
2248 deltacomputer=deltacomputer)
2244
2249
2245 if addrevisioncb:
2250 if addrevisioncb:
2246 addrevisioncb(self, node)
2251 addrevisioncb(self, node)
2247
2252
2248 if not dfh and not self._inline:
2253 if not dfh and not self._inline:
2249 # addrevision switched from inline to conventional
2254 # addrevision switched from inline to conventional
2250 # reopen the index
2255 # reopen the index
2251 ifh.close()
2256 ifh.close()
2252 dfh = self.opener(self.datafile, "a+")
2257 dfh = self.opener(self.datafile, "a+")
2253 ifh = self.opener(self.indexfile, "a+",
2258 ifh = self.opener(self.indexfile, "a+",
2254 checkambig=self._checkambig)
2259 checkambig=self._checkambig)
2255 finally:
2260 finally:
2256 if dfh:
2261 if dfh:
2257 dfh.close()
2262 dfh.close()
2258 ifh.close()
2263 ifh.close()
2259
2264
2260 return nodes
2265 return nodes
2261
2266
2262 def iscensored(self, rev):
2267 def iscensored(self, rev):
2263 """Check if a file revision is censored."""
2268 """Check if a file revision is censored."""
2264 return False
2269 return False
2265
2270
2266 def _peek_iscensored(self, baserev, delta, flush):
2271 def _peek_iscensored(self, baserev, delta, flush):
2267 """Quickly check if a delta produces a censored revision."""
2272 """Quickly check if a delta produces a censored revision."""
2268 return False
2273 return False
2269
2274
2270 def getstrippoint(self, minlink):
2275 def getstrippoint(self, minlink):
2271 """find the minimum rev that must be stripped to strip the linkrev
2276 """find the minimum rev that must be stripped to strip the linkrev
2272
2277
2273 Returns a tuple containing the minimum rev and a set of all revs that
2278 Returns a tuple containing the minimum rev and a set of all revs that
2274 have linkrevs that will be broken by this strip.
2279 have linkrevs that will be broken by this strip.
2275 """
2280 """
2276 brokenrevs = set()
2281 brokenrevs = set()
2277 strippoint = len(self)
2282 strippoint = len(self)
2278
2283
2279 heads = {}
2284 heads = {}
2280 futurelargelinkrevs = set()
2285 futurelargelinkrevs = set()
2281 for head in self.headrevs():
2286 for head in self.headrevs():
2282 headlinkrev = self.linkrev(head)
2287 headlinkrev = self.linkrev(head)
2283 heads[head] = headlinkrev
2288 heads[head] = headlinkrev
2284 if headlinkrev >= minlink:
2289 if headlinkrev >= minlink:
2285 futurelargelinkrevs.add(headlinkrev)
2290 futurelargelinkrevs.add(headlinkrev)
2286
2291
2287 # This algorithm involves walking down the rev graph, starting at the
2292 # This algorithm involves walking down the rev graph, starting at the
2288 # heads. Since the revs are topologically sorted according to linkrev,
2293 # heads. Since the revs are topologically sorted according to linkrev,
2289 # once all head linkrevs are below the minlink, we know there are
2294 # once all head linkrevs are below the minlink, we know there are
2290 # no more revs that could have a linkrev greater than minlink.
2295 # no more revs that could have a linkrev greater than minlink.
2291 # So we can stop walking.
2296 # So we can stop walking.
2292 while futurelargelinkrevs:
2297 while futurelargelinkrevs:
2293 strippoint -= 1
2298 strippoint -= 1
2294 linkrev = heads.pop(strippoint)
2299 linkrev = heads.pop(strippoint)
2295
2300
2296 if linkrev < minlink:
2301 if linkrev < minlink:
2297 brokenrevs.add(strippoint)
2302 brokenrevs.add(strippoint)
2298 else:
2303 else:
2299 futurelargelinkrevs.remove(linkrev)
2304 futurelargelinkrevs.remove(linkrev)
2300
2305
2301 for p in self.parentrevs(strippoint):
2306 for p in self.parentrevs(strippoint):
2302 if p != nullrev:
2307 if p != nullrev:
2303 plinkrev = self.linkrev(p)
2308 plinkrev = self.linkrev(p)
2304 heads[p] = plinkrev
2309 heads[p] = plinkrev
2305 if plinkrev >= minlink:
2310 if plinkrev >= minlink:
2306 futurelargelinkrevs.add(plinkrev)
2311 futurelargelinkrevs.add(plinkrev)
2307
2312
2308 return strippoint, brokenrevs
2313 return strippoint, brokenrevs
2309
2314
2310 def strip(self, minlink, transaction):
2315 def strip(self, minlink, transaction):
2311 """truncate the revlog on the first revision with a linkrev >= minlink
2316 """truncate the revlog on the first revision with a linkrev >= minlink
2312
2317
2313 This function is called when we're stripping revision minlink and
2318 This function is called when we're stripping revision minlink and
2314 its descendants from the repository.
2319 its descendants from the repository.
2315
2320
2316 We have to remove all revisions with linkrev >= minlink, because
2321 We have to remove all revisions with linkrev >= minlink, because
2317 the equivalent changelog revisions will be renumbered after the
2322 the equivalent changelog revisions will be renumbered after the
2318 strip.
2323 strip.
2319
2324
2320 So we truncate the revlog on the first of these revisions, and
2325 So we truncate the revlog on the first of these revisions, and
2321 trust that the caller has saved the revisions that shouldn't be
2326 trust that the caller has saved the revisions that shouldn't be
2322 removed and that it'll re-add them after this truncation.
2327 removed and that it'll re-add them after this truncation.
2323 """
2328 """
2324 if len(self) == 0:
2329 if len(self) == 0:
2325 return
2330 return
2326
2331
2327 rev, _ = self.getstrippoint(minlink)
2332 rev, _ = self.getstrippoint(minlink)
2328 if rev == len(self):
2333 if rev == len(self):
2329 return
2334 return
2330
2335
2331 # first truncate the files on disk
2336 # first truncate the files on disk
2332 end = self.start(rev)
2337 end = self.start(rev)
2333 if not self._inline:
2338 if not self._inline:
2334 transaction.add(self.datafile, end)
2339 transaction.add(self.datafile, end)
2335 end = rev * self._io.size
2340 end = rev * self._io.size
2336 else:
2341 else:
2337 end += rev * self._io.size
2342 end += rev * self._io.size
2338
2343
2339 transaction.add(self.indexfile, end)
2344 transaction.add(self.indexfile, end)
2340
2345
2341 # then reset internal state in memory to forget those revisions
2346 # then reset internal state in memory to forget those revisions
2342 self._cache = None
2347 self._cache = None
2343 self._chaininfocache = {}
2348 self._chaininfocache = {}
2344 self._chunkclear()
2349 self._chunkclear()
2345 for x in xrange(rev, len(self)):
2350 for x in xrange(rev, len(self)):
2346 del self.nodemap[self.node(x)]
2351 del self.nodemap[self.node(x)]
2347
2352
2348 del self.index[rev:-1]
2353 del self.index[rev:-1]
2349
2354
2350 def checksize(self):
2355 def checksize(self):
2351 expected = 0
2356 expected = 0
2352 if len(self):
2357 if len(self):
2353 expected = max(0, self.end(len(self) - 1))
2358 expected = max(0, self.end(len(self) - 1))
2354
2359
2355 try:
2360 try:
2356 f = self.opener(self.datafile)
2361 f = self.opener(self.datafile)
2357 f.seek(0, 2)
2362 f.seek(0, 2)
2358 actual = f.tell()
2363 actual = f.tell()
2359 f.close()
2364 f.close()
2360 dd = actual - expected
2365 dd = actual - expected
2361 except IOError as inst:
2366 except IOError as inst:
2362 if inst.errno != errno.ENOENT:
2367 if inst.errno != errno.ENOENT:
2363 raise
2368 raise
2364 dd = 0
2369 dd = 0
2365
2370
2366 try:
2371 try:
2367 f = self.opener(self.indexfile)
2372 f = self.opener(self.indexfile)
2368 f.seek(0, 2)
2373 f.seek(0, 2)
2369 actual = f.tell()
2374 actual = f.tell()
2370 f.close()
2375 f.close()
2371 s = self._io.size
2376 s = self._io.size
2372 i = max(0, actual // s)
2377 i = max(0, actual // s)
2373 di = actual - (i * s)
2378 di = actual - (i * s)
2374 if self._inline:
2379 if self._inline:
2375 databytes = 0
2380 databytes = 0
2376 for r in self:
2381 for r in self:
2377 databytes += max(0, self.length(r))
2382 databytes += max(0, self.length(r))
2378 dd = 0
2383 dd = 0
2379 di = actual - len(self) * s - databytes
2384 di = actual - len(self) * s - databytes
2380 except IOError as inst:
2385 except IOError as inst:
2381 if inst.errno != errno.ENOENT:
2386 if inst.errno != errno.ENOENT:
2382 raise
2387 raise
2383 di = 0
2388 di = 0
2384
2389
2385 return (dd, di)
2390 return (dd, di)
2386
2391
2387 def files(self):
2392 def files(self):
2388 res = [self.indexfile]
2393 res = [self.indexfile]
2389 if not self._inline:
2394 if not self._inline:
2390 res.append(self.datafile)
2395 res.append(self.datafile)
2391 return res
2396 return res
2392
2397
2393 DELTAREUSEALWAYS = 'always'
2398 DELTAREUSEALWAYS = 'always'
2394 DELTAREUSESAMEREVS = 'samerevs'
2399 DELTAREUSESAMEREVS = 'samerevs'
2395 DELTAREUSENEVER = 'never'
2400 DELTAREUSENEVER = 'never'
2396
2401
2397 DELTAREUSEFULLADD = 'fulladd'
2402 DELTAREUSEFULLADD = 'fulladd'
2398
2403
2399 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2404 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2400
2405
2401 def clone(self, tr, destrevlog, addrevisioncb=None,
2406 def clone(self, tr, destrevlog, addrevisioncb=None,
2402 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2407 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2403 """Copy this revlog to another, possibly with format changes.
2408 """Copy this revlog to another, possibly with format changes.
2404
2409
2405 The destination revlog will contain the same revisions and nodes.
2410 The destination revlog will contain the same revisions and nodes.
2406 However, it may not be bit-for-bit identical due to e.g. delta encoding
2411 However, it may not be bit-for-bit identical due to e.g. delta encoding
2407 differences.
2412 differences.
2408
2413
2409 The ``deltareuse`` argument control how deltas from the existing revlog
2414 The ``deltareuse`` argument control how deltas from the existing revlog
2410 are preserved in the destination revlog. The argument can have the
2415 are preserved in the destination revlog. The argument can have the
2411 following values:
2416 following values:
2412
2417
2413 DELTAREUSEALWAYS
2418 DELTAREUSEALWAYS
2414 Deltas will always be reused (if possible), even if the destination
2419 Deltas will always be reused (if possible), even if the destination
2415 revlog would not select the same revisions for the delta. This is the
2420 revlog would not select the same revisions for the delta. This is the
2416 fastest mode of operation.
2421 fastest mode of operation.
2417 DELTAREUSESAMEREVS
2422 DELTAREUSESAMEREVS
2418 Deltas will be reused if the destination revlog would pick the same
2423 Deltas will be reused if the destination revlog would pick the same
2419 revisions for the delta. This mode strikes a balance between speed
2424 revisions for the delta. This mode strikes a balance between speed
2420 and optimization.
2425 and optimization.
2421 DELTAREUSENEVER
2426 DELTAREUSENEVER
2422 Deltas will never be reused. This is the slowest mode of execution.
2427 Deltas will never be reused. This is the slowest mode of execution.
2423 This mode can be used to recompute deltas (e.g. if the diff/delta
2428 This mode can be used to recompute deltas (e.g. if the diff/delta
2424 algorithm changes).
2429 algorithm changes).
2425
2430
2426 Delta computation can be slow, so the choice of delta reuse policy can
2431 Delta computation can be slow, so the choice of delta reuse policy can
2427 significantly affect run time.
2432 significantly affect run time.
2428
2433
2429 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2434 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2430 two extremes. Deltas will be reused if they are appropriate. But if the
2435 two extremes. Deltas will be reused if they are appropriate. But if the
2431 delta could choose a better revision, it will do so. This means if you
2436 delta could choose a better revision, it will do so. This means if you
2432 are converting a non-generaldelta revlog to a generaldelta revlog,
2437 are converting a non-generaldelta revlog to a generaldelta revlog,
2433 deltas will be recomputed if the delta's parent isn't a parent of the
2438 deltas will be recomputed if the delta's parent isn't a parent of the
2434 revision.
2439 revision.
2435
2440
2436 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2441 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2437 controls whether to compute deltas against both parents for merges.
2442 controls whether to compute deltas against both parents for merges.
2438 By default, the current default is used.
2443 By default, the current default is used.
2439 """
2444 """
2440 if deltareuse not in self.DELTAREUSEALL:
2445 if deltareuse not in self.DELTAREUSEALL:
2441 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2446 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2442
2447
2443 if len(destrevlog):
2448 if len(destrevlog):
2444 raise ValueError(_('destination revlog is not empty'))
2449 raise ValueError(_('destination revlog is not empty'))
2445
2450
2446 if getattr(self, 'filteredrevs', None):
2451 if getattr(self, 'filteredrevs', None):
2447 raise ValueError(_('source revlog has filtered revisions'))
2452 raise ValueError(_('source revlog has filtered revisions'))
2448 if getattr(destrevlog, 'filteredrevs', None):
2453 if getattr(destrevlog, 'filteredrevs', None):
2449 raise ValueError(_('destination revlog has filtered revisions'))
2454 raise ValueError(_('destination revlog has filtered revisions'))
2450
2455
2451 # lazydeltabase controls whether to reuse a cached delta, if possible.
2456 # lazydeltabase controls whether to reuse a cached delta, if possible.
2452 oldlazydeltabase = destrevlog._lazydeltabase
2457 oldlazydeltabase = destrevlog._lazydeltabase
2453 oldamd = destrevlog._aggressivemergedeltas
2458 oldamd = destrevlog._aggressivemergedeltas
2454
2459
2455 try:
2460 try:
2456 if deltareuse == self.DELTAREUSEALWAYS:
2461 if deltareuse == self.DELTAREUSEALWAYS:
2457 destrevlog._lazydeltabase = True
2462 destrevlog._lazydeltabase = True
2458 elif deltareuse == self.DELTAREUSESAMEREVS:
2463 elif deltareuse == self.DELTAREUSESAMEREVS:
2459 destrevlog._lazydeltabase = False
2464 destrevlog._lazydeltabase = False
2460
2465
2461 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2466 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2462
2467
2463 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2468 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2464 self.DELTAREUSESAMEREVS)
2469 self.DELTAREUSESAMEREVS)
2465
2470
2466 deltacomputer = _deltacomputer(destrevlog)
2471 deltacomputer = _deltacomputer(destrevlog)
2467 index = self.index
2472 index = self.index
2468 for rev in self:
2473 for rev in self:
2469 entry = index[rev]
2474 entry = index[rev]
2470
2475
2471 # Some classes override linkrev to take filtered revs into
2476 # Some classes override linkrev to take filtered revs into
2472 # account. Use raw entry from index.
2477 # account. Use raw entry from index.
2473 flags = entry[0] & 0xffff
2478 flags = entry[0] & 0xffff
2474 linkrev = entry[4]
2479 linkrev = entry[4]
2475 p1 = index[entry[5]][7]
2480 p1 = index[entry[5]][7]
2476 p2 = index[entry[6]][7]
2481 p2 = index[entry[6]][7]
2477 node = entry[7]
2482 node = entry[7]
2478
2483
2479 # (Possibly) reuse the delta from the revlog if allowed and
2484 # (Possibly) reuse the delta from the revlog if allowed and
2480 # the revlog chunk is a delta.
2485 # the revlog chunk is a delta.
2481 cachedelta = None
2486 cachedelta = None
2482 rawtext = None
2487 rawtext = None
2483 if populatecachedelta:
2488 if populatecachedelta:
2484 dp = self.deltaparent(rev)
2489 dp = self.deltaparent(rev)
2485 if dp != nullrev:
2490 if dp != nullrev:
2486 cachedelta = (dp, str(self._chunk(rev)))
2491 cachedelta = (dp, str(self._chunk(rev)))
2487
2492
2488 if not cachedelta:
2493 if not cachedelta:
2489 rawtext = self.revision(rev, raw=True)
2494 rawtext = self.revision(rev, raw=True)
2490
2495
2491
2496
2492 if deltareuse == self.DELTAREUSEFULLADD:
2497 if deltareuse == self.DELTAREUSEFULLADD:
2493 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2498 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2494 cachedelta=cachedelta,
2499 cachedelta=cachedelta,
2495 node=node, flags=flags,
2500 node=node, flags=flags,
2496 deltacomputer=deltacomputer)
2501 deltacomputer=deltacomputer)
2497 else:
2502 else:
2498 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2503 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2499 checkambig=False)
2504 checkambig=False)
2500 dfh = None
2505 dfh = None
2501 if not destrevlog._inline:
2506 if not destrevlog._inline:
2502 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2507 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2503 try:
2508 try:
2504 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2509 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2505 p2, flags, cachedelta, ifh, dfh,
2510 p2, flags, cachedelta, ifh, dfh,
2506 deltacomputer=deltacomputer)
2511 deltacomputer=deltacomputer)
2507 finally:
2512 finally:
2508 if dfh:
2513 if dfh:
2509 dfh.close()
2514 dfh.close()
2510 ifh.close()
2515 ifh.close()
2511
2516
2512 if addrevisioncb:
2517 if addrevisioncb:
2513 addrevisioncb(self, rev, node)
2518 addrevisioncb(self, rev, node)
2514 finally:
2519 finally:
2515 destrevlog._lazydeltabase = oldlazydeltabase
2520 destrevlog._lazydeltabase = oldlazydeltabase
2516 destrevlog._aggressivemergedeltas = oldamd
2521 destrevlog._aggressivemergedeltas = oldamd
@@ -1,97 +1,97
1 In this test, we want to test LFS bundle application on both LFS and non-LFS
1 In this test, we want to test LFS bundle application on both LFS and non-LFS
2 repos.
2 repos.
3
3
4 To make it more interesting, the file revisions will contain hg filelog
4 To make it more interesting, the file revisions will contain hg filelog
5 metadata ('\1\n'). The bundle will have 1 file revision overlapping with the
5 metadata ('\1\n'). The bundle will have 1 file revision overlapping with the
6 destination repo.
6 destination repo.
7
7
8 # rev 1 2 3
8 # rev 1 2 3
9 # repo: yes yes no
9 # repo: yes yes no
10 # bundle: no (base) yes yes (deltabase: 2 if possible)
10 # bundle: no (base) yes yes (deltabase: 2 if possible)
11
11
12 It is interesting because rev 2 could have been stored as LFS in the repo, and
12 It is interesting because rev 2 could have been stored as LFS in the repo, and
13 non-LFS in the bundle; or vice-versa.
13 non-LFS in the bundle; or vice-versa.
14
14
15 Init
15 Init
16
16
17 $ cat >> $HGRCPATH << EOF
17 $ cat >> $HGRCPATH << EOF
18 > [extensions]
18 > [extensions]
19 > lfs=
19 > lfs=
20 > drawdag=$TESTDIR/drawdag.py
20 > drawdag=$TESTDIR/drawdag.py
21 > [lfs]
21 > [lfs]
22 > url=file:$TESTTMP/lfs-remote
22 > url=file:$TESTTMP/lfs-remote
23 > EOF
23 > EOF
24
24
25 Helper functions
25 Helper functions
26
26
27 $ commitxy() {
27 $ commitxy() {
28 > hg debugdrawdag "$@" <<'EOS'
28 > hg debugdrawdag "$@" <<'EOS'
29 > Y # Y/X=\1\nAAAA\nE\nF
29 > Y # Y/X=\1\nAAAA\nE\nF
30 > | # Y/Y=\1\nAAAA\nG\nH
30 > | # Y/Y=\1\nAAAA\nG\nH
31 > X # X/X=\1\nAAAA\nC\n
31 > X # X/X=\1\nAAAA\nC\n
32 > # X/Y=\1\nAAAA\nD\n
32 > # X/Y=\1\nAAAA\nD\n
33 > EOS
33 > EOS
34 > }
34 > }
35
35
36 $ commitz() {
36 $ commitz() {
37 > hg debugdrawdag "$@" <<'EOS'
37 > hg debugdrawdag "$@" <<'EOS'
38 > Z # Z/X=\1\nAAAA\nI\n
38 > Z # Z/X=\1\nAAAA\nI\n
39 > | # Z/Y=\1\nAAAA\nJ\n
39 > | # Z/Y=\1\nAAAA\nJ\n
40 > | # Z/Z=\1\nZ
40 > | # Z/Z=\1\nZ
41 > Y
41 > Y
42 > EOS
42 > EOS
43 > }
43 > }
44
44
45 $ enablelfs() {
45 $ enablelfs() {
46 > cat >> .hg/hgrc <<EOF
46 > cat >> .hg/hgrc <<EOF
47 > [lfs]
47 > [lfs]
48 > track=all()
48 > track=all()
49 > EOF
49 > EOF
50 > }
50 > }
51
51
52 Generate bundles
52 Generate bundles
53
53
54 $ for i in normal lfs; do
54 $ for i in normal lfs; do
55 > NAME=src-$i
55 > NAME=src-$i
56 > hg init $TESTTMP/$NAME
56 > hg init $TESTTMP/$NAME
57 > cd $TESTTMP/$NAME
57 > cd $TESTTMP/$NAME
58 > [ $i = lfs ] && enablelfs
58 > [ $i = lfs ] && enablelfs
59 > commitxy
59 > commitxy
60 > commitz
60 > commitz
61 > hg bundle -q --base X -r Y+Z $TESTTMP/$NAME.bundle
61 > hg bundle -q --base X -r Y+Z $TESTTMP/$NAME.bundle
62 > SRCNAMES="$SRCNAMES $NAME"
62 > SRCNAMES="$SRCNAMES $NAME"
63 > done
63 > done
64
64
65 Prepare destination repos
65 Prepare destination repos
66
66
67 $ for i in normal lfs; do
67 $ for i in normal lfs; do
68 > NAME=dst-$i
68 > NAME=dst-$i
69 > hg init $TESTTMP/$NAME
69 > hg init $TESTTMP/$NAME
70 > cd $TESTTMP/$NAME
70 > cd $TESTTMP/$NAME
71 > [ $i = lfs ] && enablelfs
71 > [ $i = lfs ] && enablelfs
72 > commitxy
72 > commitxy
73 > DSTNAMES="$DSTNAMES $NAME"
73 > DSTNAMES="$DSTNAMES $NAME"
74 > done
74 > done
75
75
76 Apply bundles
76 Apply bundles
77
77
78 $ for i in $SRCNAMES; do
78 $ for i in $SRCNAMES; do
79 > for j in $DSTNAMES; do
79 > for j in $DSTNAMES; do
80 > echo ---- Applying $i.bundle to $j ----
80 > echo ---- Applying $i.bundle to $j ----
81 > cp -R $TESTTMP/$j $TESTTMP/tmp-$i-$j
81 > cp -R $TESTTMP/$j $TESTTMP/tmp-$i-$j
82 > cd $TESTTMP/tmp-$i-$j
82 > cd $TESTTMP/tmp-$i-$j
83 > if hg unbundle $TESTTMP/$i.bundle -q 2>/dev/null; then
83 > if hg unbundle $TESTTMP/$i.bundle -q 2>/dev/null; then
84 > hg verify -q && echo OK
84 > hg verify -q && echo OK
85 > else
85 > else
86 > echo CRASHED
86 > echo CRASHED
87 > fi
87 > fi
88 > done
88 > done
89 > done
89 > done
90 ---- Applying src-normal.bundle to dst-normal ----
90 ---- Applying src-normal.bundle to dst-normal ----
91 OK
91 OK
92 ---- Applying src-normal.bundle to dst-lfs ----
92 ---- Applying src-normal.bundle to dst-lfs ----
93 CRASHED
93 OK
94 ---- Applying src-lfs.bundle to dst-normal ----
94 ---- Applying src-lfs.bundle to dst-normal ----
95 OK
95 OK
96 ---- Applying src-lfs.bundle to dst-lfs ----
96 ---- Applying src-lfs.bundle to dst-lfs ----
97 OK
97 OK
General Comments 0
You need to be logged in to leave comments. Login now