##// END OF EJS Templates
revlog: move index file opening in a method...
Boris Feld -
r35986:91b3b115 default
parent child Browse files
Show More
@@ -1,2495 +1,2502 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .thirdparty import (
36 from .thirdparty import (
37 attr,
37 attr,
38 )
38 )
39 from . import (
39 from . import (
40 ancestor,
40 ancestor,
41 error,
41 error,
42 mdiff,
42 mdiff,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 templatefilters,
45 templatefilters,
46 util,
46 util,
47 )
47 )
48
48
49 parsers = policy.importmod(r'parsers')
49 parsers = policy.importmod(r'parsers')
50
50
51 # Aliased for performance.
51 # Aliased for performance.
52 _zlibdecompress = zlib.decompress
52 _zlibdecompress = zlib.decompress
53
53
54 # revlog header flags
54 # revlog header flags
55 REVLOGV0 = 0
55 REVLOGV0 = 0
56 REVLOGV1 = 1
56 REVLOGV1 = 1
57 # Dummy value until file format is finalized.
57 # Dummy value until file format is finalized.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
58 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 REVLOGV2 = 0xDEAD
59 REVLOGV2 = 0xDEAD
60 FLAG_INLINE_DATA = (1 << 16)
60 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_GENERALDELTA = (1 << 17)
61 FLAG_GENERALDELTA = (1 << 17)
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
62 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
63 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
64 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
65 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
66 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67
67
68 # revlog index flags
68 # revlog index flags
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
69 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
70 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
71 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_DEFAULT_FLAGS = 0
72 REVIDX_DEFAULT_FLAGS = 0
73 # stable order in which flags need to be processed and their processors applied
73 # stable order in which flags need to be processed and their processors applied
74 REVIDX_FLAGS_ORDER = [
74 REVIDX_FLAGS_ORDER = [
75 REVIDX_ISCENSORED,
75 REVIDX_ISCENSORED,
76 REVIDX_ELLIPSIS,
76 REVIDX_ELLIPSIS,
77 REVIDX_EXTSTORED,
77 REVIDX_EXTSTORED,
78 ]
78 ]
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
79 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80
80
81 # max size of revlog with inline data
81 # max size of revlog with inline data
82 _maxinline = 131072
82 _maxinline = 131072
83 _chunksize = 1048576
83 _chunksize = 1048576
84
84
85 RevlogError = error.RevlogError
85 RevlogError = error.RevlogError
86 LookupError = error.LookupError
86 LookupError = error.LookupError
87 CensoredNodeError = error.CensoredNodeError
87 CensoredNodeError = error.CensoredNodeError
88 ProgrammingError = error.ProgrammingError
88 ProgrammingError = error.ProgrammingError
89
89
90 # Store flag processors (cf. 'addflagprocessor()' to register)
90 # Store flag processors (cf. 'addflagprocessor()' to register)
91 _flagprocessors = {
91 _flagprocessors = {
92 REVIDX_ISCENSORED: None,
92 REVIDX_ISCENSORED: None,
93 }
93 }
94
94
95 def addflagprocessor(flag, processor):
95 def addflagprocessor(flag, processor):
96 """Register a flag processor on a revision data flag.
96 """Register a flag processor on a revision data flag.
97
97
98 Invariant:
98 Invariant:
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
99 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Only one flag processor can be registered on a specific flag.
100 - Only one flag processor can be registered on a specific flag.
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
101 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 following signatures:
102 following signatures:
103 - (read) f(self, rawtext) -> text, bool
103 - (read) f(self, rawtext) -> text, bool
104 - (write) f(self, text) -> rawtext, bool
104 - (write) f(self, text) -> rawtext, bool
105 - (raw) f(self, rawtext) -> bool
105 - (raw) f(self, rawtext) -> bool
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
106 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 directly visible to the user.
107 directly visible to the user.
108 The boolean returned by these transforms is used to determine whether
108 The boolean returned by these transforms is used to determine whether
109 the returned text can be used for hash integrity checking. For example,
109 the returned text can be used for hash integrity checking. For example,
110 if "write" returns False, then "text" is used to generate hash. If
110 if "write" returns False, then "text" is used to generate hash. If
111 "write" returns True, that basically means "rawtext" returned by "write"
111 "write" returns True, that basically means "rawtext" returned by "write"
112 should be used to generate hash. Usually, "write" and "read" return
112 should be used to generate hash. Usually, "write" and "read" return
113 different booleans. And "raw" returns a same boolean as "write".
113 different booleans. And "raw" returns a same boolean as "write".
114
114
115 Note: The 'raw' transform is used for changegroup generation and in some
115 Note: The 'raw' transform is used for changegroup generation and in some
116 debug commands. In this case the transform only indicates whether the
116 debug commands. In this case the transform only indicates whether the
117 contents can be used for hash integrity checks.
117 contents can be used for hash integrity checks.
118 """
118 """
119 if not flag & REVIDX_KNOWN_FLAGS:
119 if not flag & REVIDX_KNOWN_FLAGS:
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
120 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag not in REVIDX_FLAGS_ORDER:
122 if flag not in REVIDX_FLAGS_ORDER:
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
123 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 raise ProgrammingError(msg)
124 raise ProgrammingError(msg)
125 if flag in _flagprocessors:
125 if flag in _flagprocessors:
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
126 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 raise error.Abort(msg)
127 raise error.Abort(msg)
128 _flagprocessors[flag] = processor
128 _flagprocessors[flag] = processor
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 _nullhash = hashlib.sha1(nullid)
141 _nullhash = hashlib.sha1(nullid)
142
142
143 def hash(text, p1, p2):
143 def hash(text, p1, p2):
144 """generate a hash from the given text and its parent hashes
144 """generate a hash from the given text and its parent hashes
145
145
146 This hash combines both the current file contents and its history
146 This hash combines both the current file contents and its history
147 in a manner that makes it easy to distinguish nodes with the same
147 in a manner that makes it easy to distinguish nodes with the same
148 content in the revision graph.
148 content in the revision graph.
149 """
149 """
150 # As of now, if one of the parent node is null, p2 is null
150 # As of now, if one of the parent node is null, p2 is null
151 if p2 == nullid:
151 if p2 == nullid:
152 # deep copy of a hash is faster than creating one
152 # deep copy of a hash is faster than creating one
153 s = _nullhash.copy()
153 s = _nullhash.copy()
154 s.update(p1)
154 s.update(p1)
155 else:
155 else:
156 # none of the parent nodes are nullid
156 # none of the parent nodes are nullid
157 if p1 < p2:
157 if p1 < p2:
158 a = p1
158 a = p1
159 b = p2
159 b = p2
160 else:
160 else:
161 a = p2
161 a = p2
162 b = p1
162 b = p1
163 s = hashlib.sha1(a)
163 s = hashlib.sha1(a)
164 s.update(b)
164 s.update(b)
165 s.update(text)
165 s.update(text)
166 return s.digest()
166 return s.digest()
167
167
168 def _trimchunk(revlog, revs, startidx, endidx=None):
168 def _trimchunk(revlog, revs, startidx, endidx=None):
169 """returns revs[startidx:endidx] without empty trailing revs
169 """returns revs[startidx:endidx] without empty trailing revs
170 """
170 """
171 length = revlog.length
171 length = revlog.length
172
172
173 if endidx is None:
173 if endidx is None:
174 endidx = len(revs)
174 endidx = len(revs)
175
175
176 # Trim empty revs at the end, but never the very first revision of a chain
176 # Trim empty revs at the end, but never the very first revision of a chain
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
177 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 endidx -= 1
178 endidx -= 1
179
179
180 return revs[startidx:endidx]
180 return revs[startidx:endidx]
181
181
182 def _slicechunk(revlog, revs):
182 def _slicechunk(revlog, revs):
183 """slice revs to reduce the amount of unrelated data to be read from disk.
183 """slice revs to reduce the amount of unrelated data to be read from disk.
184
184
185 ``revs`` is sliced into groups that should be read in one time.
185 ``revs`` is sliced into groups that should be read in one time.
186 Assume that revs are sorted.
186 Assume that revs are sorted.
187 """
187 """
188 start = revlog.start
188 start = revlog.start
189 length = revlog.length
189 length = revlog.length
190
190
191 if len(revs) <= 1:
191 if len(revs) <= 1:
192 yield revs
192 yield revs
193 return
193 return
194
194
195 startbyte = start(revs[0])
195 startbyte = start(revs[0])
196 endbyte = start(revs[-1]) + length(revs[-1])
196 endbyte = start(revs[-1]) + length(revs[-1])
197 readdata = deltachainspan = endbyte - startbyte
197 readdata = deltachainspan = endbyte - startbyte
198
198
199 chainpayload = sum(length(r) for r in revs)
199 chainpayload = sum(length(r) for r in revs)
200
200
201 if deltachainspan:
201 if deltachainspan:
202 density = chainpayload / float(deltachainspan)
202 density = chainpayload / float(deltachainspan)
203 else:
203 else:
204 density = 1.0
204 density = 1.0
205
205
206 # Store the gaps in a heap to have them sorted by decreasing size
206 # Store the gaps in a heap to have them sorted by decreasing size
207 gapsheap = []
207 gapsheap = []
208 heapq.heapify(gapsheap)
208 heapq.heapify(gapsheap)
209 prevend = None
209 prevend = None
210 for i, rev in enumerate(revs):
210 for i, rev in enumerate(revs):
211 revstart = start(rev)
211 revstart = start(rev)
212 revlen = length(rev)
212 revlen = length(rev)
213
213
214 # Skip empty revisions to form larger holes
214 # Skip empty revisions to form larger holes
215 if revlen == 0:
215 if revlen == 0:
216 continue
216 continue
217
217
218 if prevend is not None:
218 if prevend is not None:
219 gapsize = revstart - prevend
219 gapsize = revstart - prevend
220 # only consider holes that are large enough
220 # only consider holes that are large enough
221 if gapsize > revlog._srmingapsize:
221 if gapsize > revlog._srmingapsize:
222 heapq.heappush(gapsheap, (-gapsize, i))
222 heapq.heappush(gapsheap, (-gapsize, i))
223
223
224 prevend = revstart + revlen
224 prevend = revstart + revlen
225
225
226 # Collect the indices of the largest holes until the density is acceptable
226 # Collect the indices of the largest holes until the density is acceptable
227 indicesheap = []
227 indicesheap = []
228 heapq.heapify(indicesheap)
228 heapq.heapify(indicesheap)
229 while gapsheap and density < revlog._srdensitythreshold:
229 while gapsheap and density < revlog._srdensitythreshold:
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
230 oppgapsize, gapidx = heapq.heappop(gapsheap)
231
231
232 heapq.heappush(indicesheap, gapidx)
232 heapq.heappush(indicesheap, gapidx)
233
233
234 # the gap sizes are stored as negatives to be sorted decreasingly
234 # the gap sizes are stored as negatives to be sorted decreasingly
235 # by the heap
235 # by the heap
236 readdata -= (-oppgapsize)
236 readdata -= (-oppgapsize)
237 if readdata > 0:
237 if readdata > 0:
238 density = chainpayload / float(readdata)
238 density = chainpayload / float(readdata)
239 else:
239 else:
240 density = 1.0
240 density = 1.0
241
241
242 # Cut the revs at collected indices
242 # Cut the revs at collected indices
243 previdx = 0
243 previdx = 0
244 while indicesheap:
244 while indicesheap:
245 idx = heapq.heappop(indicesheap)
245 idx = heapq.heappop(indicesheap)
246
246
247 chunk = _trimchunk(revlog, revs, previdx, idx)
247 chunk = _trimchunk(revlog, revs, previdx, idx)
248 if chunk:
248 if chunk:
249 yield chunk
249 yield chunk
250
250
251 previdx = idx
251 previdx = idx
252
252
253 chunk = _trimchunk(revlog, revs, previdx)
253 chunk = _trimchunk(revlog, revs, previdx)
254 if chunk:
254 if chunk:
255 yield chunk
255 yield chunk
256
256
257 @attr.s(slots=True, frozen=True)
257 @attr.s(slots=True, frozen=True)
258 class _deltainfo(object):
258 class _deltainfo(object):
259 distance = attr.ib()
259 distance = attr.ib()
260 deltalen = attr.ib()
260 deltalen = attr.ib()
261 data = attr.ib()
261 data = attr.ib()
262 base = attr.ib()
262 base = attr.ib()
263 chainbase = attr.ib()
263 chainbase = attr.ib()
264 chainlen = attr.ib()
264 chainlen = attr.ib()
265 compresseddeltalen = attr.ib()
265 compresseddeltalen = attr.ib()
266
266
267 class _deltacomputer(object):
267 class _deltacomputer(object):
268 def __init__(self, revlog):
268 def __init__(self, revlog):
269 self.revlog = revlog
269 self.revlog = revlog
270
270
271 def _getcandidaterevs(self, p1, p2, cachedelta):
271 def _getcandidaterevs(self, p1, p2, cachedelta):
272 """
272 """
273 Provides revisions that present an interest to be diffed against,
273 Provides revisions that present an interest to be diffed against,
274 grouped by level of easiness.
274 grouped by level of easiness.
275 """
275 """
276 revlog = self.revlog
276 revlog = self.revlog
277 curr = len(revlog)
277 curr = len(revlog)
278 prev = curr - 1
278 prev = curr - 1
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
279 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280
280
281 # should we try to build a delta?
281 # should we try to build a delta?
282 if prev != nullrev and revlog.storedeltachains:
282 if prev != nullrev and revlog.storedeltachains:
283 tested = set()
283 tested = set()
284 # This condition is true most of the time when processing
284 # This condition is true most of the time when processing
285 # changegroup data into a generaldelta repo. The only time it
285 # changegroup data into a generaldelta repo. The only time it
286 # isn't true is if this is the first revision in a delta chain
286 # isn't true is if this is the first revision in a delta chain
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
287 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
288 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 # Assume what we received from the server is a good choice
289 # Assume what we received from the server is a good choice
290 # build delta will reuse the cache
290 # build delta will reuse the cache
291 yield (cachedelta[0],)
291 yield (cachedelta[0],)
292 tested.add(cachedelta[0])
292 tested.add(cachedelta[0])
293
293
294 if revlog._generaldelta:
294 if revlog._generaldelta:
295 # exclude already lazy tested base if any
295 # exclude already lazy tested base if any
296 parents = [p for p in (p1r, p2r)
296 parents = [p for p in (p1r, p2r)
297 if p != nullrev and p not in tested]
297 if p != nullrev and p not in tested]
298 if parents and not revlog._aggressivemergedeltas:
298 if parents and not revlog._aggressivemergedeltas:
299 # Pick whichever parent is closer to us (to minimize the
299 # Pick whichever parent is closer to us (to minimize the
300 # chance of having to build a fulltext).
300 # chance of having to build a fulltext).
301 parents = [max(parents)]
301 parents = [max(parents)]
302 tested.update(parents)
302 tested.update(parents)
303 yield parents
303 yield parents
304
304
305 if prev not in tested:
305 if prev not in tested:
306 # other approach failed try against prev to hopefully save us a
306 # other approach failed try against prev to hopefully save us a
307 # fulltext.
307 # fulltext.
308 yield (prev,)
308 yield (prev,)
309
309
310 def buildtext(self, revinfo, fh):
310 def buildtext(self, revinfo, fh):
311 """Builds a fulltext version of a revision
311 """Builds a fulltext version of a revision
312
312
313 revinfo: _revisioninfo instance that contains all needed info
313 revinfo: _revisioninfo instance that contains all needed info
314 fh: file handle to either the .i or the .d revlog file,
314 fh: file handle to either the .i or the .d revlog file,
315 depending on whether it is inlined or not
315 depending on whether it is inlined or not
316 """
316 """
317 btext = revinfo.btext
317 btext = revinfo.btext
318 if btext[0] is not None:
318 if btext[0] is not None:
319 return btext[0]
319 return btext[0]
320
320
321 revlog = self.revlog
321 revlog = self.revlog
322 cachedelta = revinfo.cachedelta
322 cachedelta = revinfo.cachedelta
323 flags = revinfo.flags
323 flags = revinfo.flags
324 node = revinfo.node
324 node = revinfo.node
325
325
326 baserev = cachedelta[0]
326 baserev = cachedelta[0]
327 delta = cachedelta[1]
327 delta = cachedelta[1]
328 # special case deltas which replace entire base; no need to decode
328 # special case deltas which replace entire base; no need to decode
329 # base revision. this neatly avoids censored bases, which throw when
329 # base revision. this neatly avoids censored bases, which throw when
330 # they're decoded.
330 # they're decoded.
331 hlen = struct.calcsize(">lll")
331 hlen = struct.calcsize(">lll")
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
332 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 len(delta) - hlen):
333 len(delta) - hlen):
334 btext[0] = delta[hlen:]
334 btext[0] = delta[hlen:]
335 else:
335 else:
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
336 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 btext[0] = mdiff.patch(basetext, delta)
337 btext[0] = mdiff.patch(basetext, delta)
338
338
339 try:
339 try:
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
340 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 btext[0], validatehash = res
341 btext[0], validatehash = res
342 if validatehash:
342 if validatehash:
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
343 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 if flags & REVIDX_ISCENSORED:
344 if flags & REVIDX_ISCENSORED:
345 raise RevlogError(_('node %s is not censored') % node)
345 raise RevlogError(_('node %s is not censored') % node)
346 except CensoredNodeError:
346 except CensoredNodeError:
347 # must pass the censored index flag to add censored revisions
347 # must pass the censored index flag to add censored revisions
348 if not flags & REVIDX_ISCENSORED:
348 if not flags & REVIDX_ISCENSORED:
349 raise
349 raise
350 return btext[0]
350 return btext[0]
351
351
352 def _builddeltadiff(self, base, revinfo, fh):
352 def _builddeltadiff(self, base, revinfo, fh):
353 revlog = self.revlog
353 revlog = self.revlog
354 t = self.buildtext(revinfo, fh)
354 t = self.buildtext(revinfo, fh)
355 if revlog.iscensored(base):
355 if revlog.iscensored(base):
356 # deltas based on a censored revision must replace the
356 # deltas based on a censored revision must replace the
357 # full content in one patch, so delta works everywhere
357 # full content in one patch, so delta works everywhere
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
358 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 delta = header + t
359 delta = header + t
360 else:
360 else:
361 ptext = revlog.revision(base, _df=fh, raw=True)
361 ptext = revlog.revision(base, _df=fh, raw=True)
362 delta = mdiff.textdiff(ptext, t)
362 delta = mdiff.textdiff(ptext, t)
363
363
364 return delta
364 return delta
365
365
366 def _builddeltainfo(self, revinfo, base, fh):
366 def _builddeltainfo(self, revinfo, base, fh):
367 # can we use the cached delta?
367 # can we use the cached delta?
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
368 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 delta = revinfo.cachedelta[1]
369 delta = revinfo.cachedelta[1]
370 else:
370 else:
371 delta = self._builddeltadiff(base, revinfo, fh)
371 delta = self._builddeltadiff(base, revinfo, fh)
372 revlog = self.revlog
372 revlog = self.revlog
373 header, data = revlog.compress(delta)
373 header, data = revlog.compress(delta)
374 deltalen = len(header) + len(data)
374 deltalen = len(header) + len(data)
375 chainbase = revlog.chainbase(base)
375 chainbase = revlog.chainbase(base)
376 offset = revlog.end(len(revlog) - 1)
376 offset = revlog.end(len(revlog) - 1)
377 dist = deltalen + offset - revlog.start(chainbase)
377 dist = deltalen + offset - revlog.start(chainbase)
378 if revlog._generaldelta:
378 if revlog._generaldelta:
379 deltabase = base
379 deltabase = base
380 else:
380 else:
381 deltabase = chainbase
381 deltabase = chainbase
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
382 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen += 1
383 chainlen += 1
384 compresseddeltalen += deltalen
384 compresseddeltalen += deltalen
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
385 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 chainbase, chainlen, compresseddeltalen)
386 chainbase, chainlen, compresseddeltalen)
387
387
388 def finddeltainfo(self, revinfo, fh):
388 def finddeltainfo(self, revinfo, fh):
389 """Find an acceptable delta against a candidate revision
389 """Find an acceptable delta against a candidate revision
390
390
391 revinfo: information about the revision (instance of _revisioninfo)
391 revinfo: information about the revision (instance of _revisioninfo)
392 fh: file handle to either the .i or the .d revlog file,
392 fh: file handle to either the .i or the .d revlog file,
393 depending on whether it is inlined or not
393 depending on whether it is inlined or not
394
394
395 Returns the first acceptable candidate revision, as ordered by
395 Returns the first acceptable candidate revision, as ordered by
396 _getcandidaterevs
396 _getcandidaterevs
397 """
397 """
398 cachedelta = revinfo.cachedelta
398 cachedelta = revinfo.cachedelta
399 p1 = revinfo.p1
399 p1 = revinfo.p1
400 p2 = revinfo.p2
400 p2 = revinfo.p2
401 revlog = self.revlog
401 revlog = self.revlog
402
402
403 deltainfo = None
403 deltainfo = None
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
404 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 nominateddeltas = []
405 nominateddeltas = []
406 for candidaterev in candidaterevs:
406 for candidaterev in candidaterevs:
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
407 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
408 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 nominateddeltas.append(candidatedelta)
409 nominateddeltas.append(candidatedelta)
410 if nominateddeltas:
410 if nominateddeltas:
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
411 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 break
412 break
413
413
414 return deltainfo
414 return deltainfo
415
415
416 @attr.s(slots=True, frozen=True)
416 @attr.s(slots=True, frozen=True)
417 class _revisioninfo(object):
417 class _revisioninfo(object):
418 """Information about a revision that allows building its fulltext
418 """Information about a revision that allows building its fulltext
419 node: expected hash of the revision
419 node: expected hash of the revision
420 p1, p2: parent revs of the revision
420 p1, p2: parent revs of the revision
421 btext: built text cache consisting of a one-element list
421 btext: built text cache consisting of a one-element list
422 cachedelta: (baserev, uncompressed_delta) or None
422 cachedelta: (baserev, uncompressed_delta) or None
423 flags: flags associated to the revision storage
423 flags: flags associated to the revision storage
424
424
425 One of btext[0] or cachedelta must be set.
425 One of btext[0] or cachedelta must be set.
426 """
426 """
427 node = attr.ib()
427 node = attr.ib()
428 p1 = attr.ib()
428 p1 = attr.ib()
429 p2 = attr.ib()
429 p2 = attr.ib()
430 btext = attr.ib()
430 btext = attr.ib()
431 textlen = attr.ib()
431 textlen = attr.ib()
432 cachedelta = attr.ib()
432 cachedelta = attr.ib()
433 flags = attr.ib()
433 flags = attr.ib()
434
434
435 # index v0:
435 # index v0:
436 # 4 bytes: offset
436 # 4 bytes: offset
437 # 4 bytes: compressed length
437 # 4 bytes: compressed length
438 # 4 bytes: base rev
438 # 4 bytes: base rev
439 # 4 bytes: link rev
439 # 4 bytes: link rev
440 # 20 bytes: parent 1 nodeid
440 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 2 nodeid
441 # 20 bytes: parent 2 nodeid
442 # 20 bytes: nodeid
442 # 20 bytes: nodeid
443 indexformatv0 = struct.Struct(">4l20s20s20s")
443 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0_pack = indexformatv0.pack
444 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_unpack = indexformatv0.unpack
445 indexformatv0_unpack = indexformatv0.unpack
446
446
447 class revlogoldio(object):
447 class revlogoldio(object):
448 def __init__(self):
448 def __init__(self):
449 self.size = indexformatv0.size
449 self.size = indexformatv0.size
450
450
451 def parseindex(self, data, inline):
451 def parseindex(self, data, inline):
452 s = self.size
452 s = self.size
453 index = []
453 index = []
454 nodemap = {nullid: nullrev}
454 nodemap = {nullid: nullrev}
455 n = off = 0
455 n = off = 0
456 l = len(data)
456 l = len(data)
457 while off + s <= l:
457 while off + s <= l:
458 cur = data[off:off + s]
458 cur = data[off:off + s]
459 off += s
459 off += s
460 e = indexformatv0_unpack(cur)
460 e = indexformatv0_unpack(cur)
461 # transform to revlogv1 format
461 # transform to revlogv1 format
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
462 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
463 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 index.append(e2)
464 index.append(e2)
465 nodemap[e[6]] = n
465 nodemap[e[6]] = n
466 n += 1
466 n += 1
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 return index, nodemap, None
471 return index, nodemap, None
472
472
473 def packentry(self, entry, node, version, rev):
473 def packentry(self, entry, node, version, rev):
474 if gettype(entry[0]):
474 if gettype(entry[0]):
475 raise RevlogError(_('index entry flags need revlog version 1'))
475 raise RevlogError(_('index entry flags need revlog version 1'))
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
476 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 node(entry[5]), node(entry[6]), entry[7])
477 node(entry[5]), node(entry[6]), entry[7])
478 return indexformatv0_pack(*e2)
478 return indexformatv0_pack(*e2)
479
479
480 # index ng:
480 # index ng:
481 # 6 bytes: offset
481 # 6 bytes: offset
482 # 2 bytes: flags
482 # 2 bytes: flags
483 # 4 bytes: compressed length
483 # 4 bytes: compressed length
484 # 4 bytes: uncompressed length
484 # 4 bytes: uncompressed length
485 # 4 bytes: base rev
485 # 4 bytes: base rev
486 # 4 bytes: link rev
486 # 4 bytes: link rev
487 # 4 bytes: parent 1 rev
487 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 2 rev
488 # 4 bytes: parent 2 rev
489 # 32 bytes: nodeid
489 # 32 bytes: nodeid
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
490 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng_pack = indexformatng.pack
491 indexformatng_pack = indexformatng.pack
492 versionformat = struct.Struct(">I")
492 versionformat = struct.Struct(">I")
493 versionformat_pack = versionformat.pack
493 versionformat_pack = versionformat.pack
494 versionformat_unpack = versionformat.unpack
494 versionformat_unpack = versionformat.unpack
495
495
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
496 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # signed integer)
497 # signed integer)
498 _maxentrysize = 0x7fffffff
498 _maxentrysize = 0x7fffffff
499
499
500 class revlogio(object):
500 class revlogio(object):
501 def __init__(self):
501 def __init__(self):
502 self.size = indexformatng.size
502 self.size = indexformatng.size
503
503
504 def parseindex(self, data, inline):
504 def parseindex(self, data, inline):
505 # call the C implementation to parse the index data
505 # call the C implementation to parse the index data
506 index, cache = parsers.parse_index2(data, inline)
506 index, cache = parsers.parse_index2(data, inline)
507 return index, getattr(index, 'nodemap', None), cache
507 return index, getattr(index, 'nodemap', None), cache
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 p = indexformatng_pack(*entry)
510 p = indexformatng_pack(*entry)
511 if rev == 0:
511 if rev == 0:
512 p = versionformat_pack(version) + p[4:]
512 p = versionformat_pack(version) + p[4:]
513 return p
513 return p
514
514
515 class revlog(object):
515 class revlog(object):
516 """
516 """
517 the underlying revision storage object
517 the underlying revision storage object
518
518
519 A revlog consists of two parts, an index and the revision data.
519 A revlog consists of two parts, an index and the revision data.
520
520
521 The index is a file with a fixed record size containing
521 The index is a file with a fixed record size containing
522 information on each revision, including its nodeid (hash), the
522 information on each revision, including its nodeid (hash), the
523 nodeids of its parents, the position and offset of its data within
523 nodeids of its parents, the position and offset of its data within
524 the data file, and the revision it's based on. Finally, each entry
524 the data file, and the revision it's based on. Finally, each entry
525 contains a linkrev entry that can serve as a pointer to external
525 contains a linkrev entry that can serve as a pointer to external
526 data.
526 data.
527
527
528 The revision data itself is a linear collection of data chunks.
528 The revision data itself is a linear collection of data chunks.
529 Each chunk represents a revision and is usually represented as a
529 Each chunk represents a revision and is usually represented as a
530 delta against the previous chunk. To bound lookup time, runs of
530 delta against the previous chunk. To bound lookup time, runs of
531 deltas are limited to about 2 times the length of the original
531 deltas are limited to about 2 times the length of the original
532 version data. This makes retrieval of a version proportional to
532 version data. This makes retrieval of a version proportional to
533 its size, or O(1) relative to the number of revisions.
533 its size, or O(1) relative to the number of revisions.
534
534
535 Both pieces of the revlog are written to in an append-only
535 Both pieces of the revlog are written to in an append-only
536 fashion, which means we never need to rewrite a file to insert or
536 fashion, which means we never need to rewrite a file to insert or
537 remove data, and can use some simple techniques to avoid the need
537 remove data, and can use some simple techniques to avoid the need
538 for locking while reading.
538 for locking while reading.
539
539
540 If checkambig, indexfile is opened with checkambig=True at
540 If checkambig, indexfile is opened with checkambig=True at
541 writing, to avoid file stat ambiguity.
541 writing, to avoid file stat ambiguity.
542
542
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
543 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 index will be mmapped rather than read if it is larger than the
544 index will be mmapped rather than read if it is larger than the
545 configured threshold.
545 configured threshold.
546 """
546 """
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
547 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 mmaplargeindex=False):
548 mmaplargeindex=False):
549 """
549 """
550 create a revlog object
550 create a revlog object
551
551
552 opener is a function that abstracts the file opening operation
552 opener is a function that abstracts the file opening operation
553 and can be used to implement COW semantics or the like.
553 and can be used to implement COW semantics or the like.
554 """
554 """
555 self.indexfile = indexfile
555 self.indexfile = indexfile
556 self.datafile = datafile or (indexfile[:-2] + ".d")
556 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.opener = opener
557 self.opener = opener
558 # When True, indexfile is opened with checkambig=True at writing, to
558 # When True, indexfile is opened with checkambig=True at writing, to
559 # avoid file stat ambiguity.
559 # avoid file stat ambiguity.
560 self._checkambig = checkambig
560 self._checkambig = checkambig
561 # 3-tuple of (node, rev, text) for a raw revision.
561 # 3-tuple of (node, rev, text) for a raw revision.
562 self._cache = None
562 self._cache = None
563 # Maps rev to chain base rev.
563 # Maps rev to chain base rev.
564 self._chainbasecache = util.lrucachedict(100)
564 self._chainbasecache = util.lrucachedict(100)
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
565 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 self._chunkcache = (0, '')
566 self._chunkcache = (0, '')
567 # How much data to read and cache into the raw revlog data cache.
567 # How much data to read and cache into the raw revlog data cache.
568 self._chunkcachesize = 65536
568 self._chunkcachesize = 65536
569 self._maxchainlen = None
569 self._maxchainlen = None
570 self._aggressivemergedeltas = False
570 self._aggressivemergedeltas = False
571 self.index = []
571 self.index = []
572 # Mapping of partial identifiers to full nodes.
572 # Mapping of partial identifiers to full nodes.
573 self._pcache = {}
573 self._pcache = {}
574 # Mapping of revision integer to full node.
574 # Mapping of revision integer to full node.
575 self._nodecache = {nullid: nullrev}
575 self._nodecache = {nullid: nullrev}
576 self._nodepos = None
576 self._nodepos = None
577 self._compengine = 'zlib'
577 self._compengine = 'zlib'
578 self._maxdeltachainspan = -1
578 self._maxdeltachainspan = -1
579 self._withsparseread = False
579 self._withsparseread = False
580 self._srdensitythreshold = 0.25
580 self._srdensitythreshold = 0.25
581 self._srmingapsize = 262144
581 self._srmingapsize = 262144
582
582
583 mmapindexthreshold = None
583 mmapindexthreshold = None
584 v = REVLOG_DEFAULT_VERSION
584 v = REVLOG_DEFAULT_VERSION
585 opts = getattr(opener, 'options', None)
585 opts = getattr(opener, 'options', None)
586 if opts is not None:
586 if opts is not None:
587 if 'revlogv2' in opts:
587 if 'revlogv2' in opts:
588 # version 2 revlogs always use generaldelta.
588 # version 2 revlogs always use generaldelta.
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
589 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 elif 'revlogv1' in opts:
590 elif 'revlogv1' in opts:
591 if 'generaldelta' in opts:
591 if 'generaldelta' in opts:
592 v |= FLAG_GENERALDELTA
592 v |= FLAG_GENERALDELTA
593 else:
593 else:
594 v = 0
594 v = 0
595 if 'chunkcachesize' in opts:
595 if 'chunkcachesize' in opts:
596 self._chunkcachesize = opts['chunkcachesize']
596 self._chunkcachesize = opts['chunkcachesize']
597 if 'maxchainlen' in opts:
597 if 'maxchainlen' in opts:
598 self._maxchainlen = opts['maxchainlen']
598 self._maxchainlen = opts['maxchainlen']
599 if 'aggressivemergedeltas' in opts:
599 if 'aggressivemergedeltas' in opts:
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
600 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
601 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 if 'compengine' in opts:
602 if 'compengine' in opts:
603 self._compengine = opts['compengine']
603 self._compengine = opts['compengine']
604 if 'maxdeltachainspan' in opts:
604 if 'maxdeltachainspan' in opts:
605 self._maxdeltachainspan = opts['maxdeltachainspan']
605 self._maxdeltachainspan = opts['maxdeltachainspan']
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
606 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 mmapindexthreshold = opts['mmapindexthreshold']
607 mmapindexthreshold = opts['mmapindexthreshold']
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
608 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 if 'sparse-read-density-threshold' in opts:
609 if 'sparse-read-density-threshold' in opts:
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
610 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 if 'sparse-read-min-gap-size' in opts:
611 if 'sparse-read-min-gap-size' in opts:
612 self._srmingapsize = opts['sparse-read-min-gap-size']
612 self._srmingapsize = opts['sparse-read-min-gap-size']
613
613
614 if self._chunkcachesize <= 0:
614 if self._chunkcachesize <= 0:
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
615 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 'than 0') % self._chunkcachesize)
616 'than 0') % self._chunkcachesize)
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
617 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
618 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 'of 2') % self._chunkcachesize)
619 'of 2') % self._chunkcachesize)
620
620
621 indexdata = ''
621 indexdata = ''
622 self._initempty = True
622 self._initempty = True
623 try:
623 try:
624 f = self.opener(self.indexfile)
624 f = self._indexfp()
625 if (mmapindexthreshold is not None and
625 if (mmapindexthreshold is not None and
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
626 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 indexdata = util.buffer(util.mmapread(f))
627 indexdata = util.buffer(util.mmapread(f))
628 else:
628 else:
629 indexdata = f.read()
629 indexdata = f.read()
630 f.close()
630 f.close()
631 if len(indexdata) > 0:
631 if len(indexdata) > 0:
632 v = versionformat_unpack(indexdata[:4])[0]
632 v = versionformat_unpack(indexdata[:4])[0]
633 self._initempty = False
633 self._initempty = False
634 except IOError as inst:
634 except IOError as inst:
635 if inst.errno != errno.ENOENT:
635 if inst.errno != errno.ENOENT:
636 raise
636 raise
637
637
638 self.version = v
638 self.version = v
639 self._inline = v & FLAG_INLINE_DATA
639 self._inline = v & FLAG_INLINE_DATA
640 self._generaldelta = v & FLAG_GENERALDELTA
640 self._generaldelta = v & FLAG_GENERALDELTA
641 flags = v & ~0xFFFF
641 flags = v & ~0xFFFF
642 fmt = v & 0xFFFF
642 fmt = v & 0xFFFF
643 if fmt == REVLOGV0:
643 if fmt == REVLOGV0:
644 if flags:
644 if flags:
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 'revlog %s') %
646 'revlog %s') %
647 (flags >> 16, fmt, self.indexfile))
647 (flags >> 16, fmt, self.indexfile))
648 elif fmt == REVLOGV1:
648 elif fmt == REVLOGV1:
649 if flags & ~REVLOGV1_FLAGS:
649 if flags & ~REVLOGV1_FLAGS:
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 'revlog %s') %
651 'revlog %s') %
652 (flags >> 16, fmt, self.indexfile))
652 (flags >> 16, fmt, self.indexfile))
653 elif fmt == REVLOGV2:
653 elif fmt == REVLOGV2:
654 if flags & ~REVLOGV2_FLAGS:
654 if flags & ~REVLOGV2_FLAGS:
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 'revlog %s') %
656 'revlog %s') %
657 (flags >> 16, fmt, self.indexfile))
657 (flags >> 16, fmt, self.indexfile))
658 else:
658 else:
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 (fmt, self.indexfile))
660 (fmt, self.indexfile))
661
661
662 self.storedeltachains = True
662 self.storedeltachains = True
663
663
664 self._io = revlogio()
664 self._io = revlogio()
665 if self.version == REVLOGV0:
665 if self.version == REVLOGV0:
666 self._io = revlogoldio()
666 self._io = revlogoldio()
667 try:
667 try:
668 d = self._io.parseindex(indexdata, self._inline)
668 d = self._io.parseindex(indexdata, self._inline)
669 except (ValueError, IndexError):
669 except (ValueError, IndexError):
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 self.index, nodemap, self._chunkcache = d
671 self.index, nodemap, self._chunkcache = d
672 if nodemap is not None:
672 if nodemap is not None:
673 self.nodemap = self._nodecache = nodemap
673 self.nodemap = self._nodecache = nodemap
674 if not self._chunkcache:
674 if not self._chunkcache:
675 self._chunkclear()
675 self._chunkclear()
676 # revnum -> (chain-length, sum-delta-length)
676 # revnum -> (chain-length, sum-delta-length)
677 self._chaininfocache = {}
677 self._chaininfocache = {}
678 # revlog header -> revlog compressor
678 # revlog header -> revlog compressor
679 self._decompressors = {}
679 self._decompressors = {}
680
680
681 @util.propertycache
681 @util.propertycache
682 def _compressor(self):
682 def _compressor(self):
683 return util.compengines[self._compengine].revlogcompressor()
683 return util.compengines[self._compengine].revlogcompressor()
684
684
685 def _indexfp(self, mode='r'):
686 """file object for the revlog's index file"""
687 args = {r'mode': mode}
688 if mode != 'r':
689 args[r'checkambig'] = self._checkambig
690 if mode == 'w':
691 args[r'atomictemp'] = True
692 return self.opener(self.indexfile, **args)
693
685 def _datafp(self, mode='r'):
694 def _datafp(self, mode='r'):
686 """file object for the revlog's data file"""
695 """file object for the revlog's data file"""
687 return self.opener(self.datafile, mode=mode)
696 return self.opener(self.datafile, mode=mode)
688
697
689 def tip(self):
698 def tip(self):
690 return self.node(len(self.index) - 2)
699 return self.node(len(self.index) - 2)
691 def __contains__(self, rev):
700 def __contains__(self, rev):
692 return 0 <= rev < len(self)
701 return 0 <= rev < len(self)
693 def __len__(self):
702 def __len__(self):
694 return len(self.index) - 1
703 return len(self.index) - 1
695 def __iter__(self):
704 def __iter__(self):
696 return iter(xrange(len(self)))
705 return iter(xrange(len(self)))
697 def revs(self, start=0, stop=None):
706 def revs(self, start=0, stop=None):
698 """iterate over all rev in this revlog (from start to stop)"""
707 """iterate over all rev in this revlog (from start to stop)"""
699 step = 1
708 step = 1
700 if stop is not None:
709 if stop is not None:
701 if start > stop:
710 if start > stop:
702 step = -1
711 step = -1
703 stop += step
712 stop += step
704 else:
713 else:
705 stop = len(self)
714 stop = len(self)
706 return xrange(start, stop, step)
715 return xrange(start, stop, step)
707
716
708 @util.propertycache
717 @util.propertycache
709 def nodemap(self):
718 def nodemap(self):
710 self.rev(self.node(0))
719 self.rev(self.node(0))
711 return self._nodecache
720 return self._nodecache
712
721
713 def hasnode(self, node):
722 def hasnode(self, node):
714 try:
723 try:
715 self.rev(node)
724 self.rev(node)
716 return True
725 return True
717 except KeyError:
726 except KeyError:
718 return False
727 return False
719
728
720 def clearcaches(self):
729 def clearcaches(self):
721 self._cache = None
730 self._cache = None
722 self._chainbasecache.clear()
731 self._chainbasecache.clear()
723 self._chunkcache = (0, '')
732 self._chunkcache = (0, '')
724 self._pcache = {}
733 self._pcache = {}
725
734
726 try:
735 try:
727 self._nodecache.clearcaches()
736 self._nodecache.clearcaches()
728 except AttributeError:
737 except AttributeError:
729 self._nodecache = {nullid: nullrev}
738 self._nodecache = {nullid: nullrev}
730 self._nodepos = None
739 self._nodepos = None
731
740
732 def rev(self, node):
741 def rev(self, node):
733 try:
742 try:
734 return self._nodecache[node]
743 return self._nodecache[node]
735 except TypeError:
744 except TypeError:
736 raise
745 raise
737 except RevlogError:
746 except RevlogError:
738 # parsers.c radix tree lookup failed
747 # parsers.c radix tree lookup failed
739 if node == wdirid:
748 if node == wdirid:
740 raise error.WdirUnsupported
749 raise error.WdirUnsupported
741 raise LookupError(node, self.indexfile, _('no node'))
750 raise LookupError(node, self.indexfile, _('no node'))
742 except KeyError:
751 except KeyError:
743 # pure python cache lookup failed
752 # pure python cache lookup failed
744 n = self._nodecache
753 n = self._nodecache
745 i = self.index
754 i = self.index
746 p = self._nodepos
755 p = self._nodepos
747 if p is None:
756 if p is None:
748 p = len(i) - 2
757 p = len(i) - 2
749 for r in xrange(p, -1, -1):
758 for r in xrange(p, -1, -1):
750 v = i[r][7]
759 v = i[r][7]
751 n[v] = r
760 n[v] = r
752 if v == node:
761 if v == node:
753 self._nodepos = r - 1
762 self._nodepos = r - 1
754 return r
763 return r
755 if node == wdirid:
764 if node == wdirid:
756 raise error.WdirUnsupported
765 raise error.WdirUnsupported
757 raise LookupError(node, self.indexfile, _('no node'))
766 raise LookupError(node, self.indexfile, _('no node'))
758
767
759 # Accessors for index entries.
768 # Accessors for index entries.
760
769
761 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
770 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
762 # are flags.
771 # are flags.
763 def start(self, rev):
772 def start(self, rev):
764 return int(self.index[rev][0] >> 16)
773 return int(self.index[rev][0] >> 16)
765
774
766 def flags(self, rev):
775 def flags(self, rev):
767 return self.index[rev][0] & 0xFFFF
776 return self.index[rev][0] & 0xFFFF
768
777
769 def length(self, rev):
778 def length(self, rev):
770 return self.index[rev][1]
779 return self.index[rev][1]
771
780
772 def rawsize(self, rev):
781 def rawsize(self, rev):
773 """return the length of the uncompressed text for a given revision"""
782 """return the length of the uncompressed text for a given revision"""
774 l = self.index[rev][2]
783 l = self.index[rev][2]
775 if l >= 0:
784 if l >= 0:
776 return l
785 return l
777
786
778 t = self.revision(rev, raw=True)
787 t = self.revision(rev, raw=True)
779 return len(t)
788 return len(t)
780
789
781 def size(self, rev):
790 def size(self, rev):
782 """length of non-raw text (processed by a "read" flag processor)"""
791 """length of non-raw text (processed by a "read" flag processor)"""
783 # fast path: if no "read" flag processor could change the content,
792 # fast path: if no "read" flag processor could change the content,
784 # size is rawsize. note: ELLIPSIS is known to not change the content.
793 # size is rawsize. note: ELLIPSIS is known to not change the content.
785 flags = self.flags(rev)
794 flags = self.flags(rev)
786 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
795 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
787 return self.rawsize(rev)
796 return self.rawsize(rev)
788
797
789 return len(self.revision(rev, raw=False))
798 return len(self.revision(rev, raw=False))
790
799
791 def chainbase(self, rev):
800 def chainbase(self, rev):
792 base = self._chainbasecache.get(rev)
801 base = self._chainbasecache.get(rev)
793 if base is not None:
802 if base is not None:
794 return base
803 return base
795
804
796 index = self.index
805 index = self.index
797 base = index[rev][3]
806 base = index[rev][3]
798 while base != rev:
807 while base != rev:
799 rev = base
808 rev = base
800 base = index[rev][3]
809 base = index[rev][3]
801
810
802 self._chainbasecache[rev] = base
811 self._chainbasecache[rev] = base
803 return base
812 return base
804
813
805 def linkrev(self, rev):
814 def linkrev(self, rev):
806 return self.index[rev][4]
815 return self.index[rev][4]
807
816
808 def parentrevs(self, rev):
817 def parentrevs(self, rev):
809 try:
818 try:
810 entry = self.index[rev]
819 entry = self.index[rev]
811 except IndexError:
820 except IndexError:
812 if rev == wdirrev:
821 if rev == wdirrev:
813 raise error.WdirUnsupported
822 raise error.WdirUnsupported
814 raise
823 raise
815
824
816 return entry[5], entry[6]
825 return entry[5], entry[6]
817
826
818 def node(self, rev):
827 def node(self, rev):
819 try:
828 try:
820 return self.index[rev][7]
829 return self.index[rev][7]
821 except IndexError:
830 except IndexError:
822 if rev == wdirrev:
831 if rev == wdirrev:
823 raise error.WdirUnsupported
832 raise error.WdirUnsupported
824 raise
833 raise
825
834
826 # Derived from index values.
835 # Derived from index values.
827
836
828 def end(self, rev):
837 def end(self, rev):
829 return self.start(rev) + self.length(rev)
838 return self.start(rev) + self.length(rev)
830
839
831 def parents(self, node):
840 def parents(self, node):
832 i = self.index
841 i = self.index
833 d = i[self.rev(node)]
842 d = i[self.rev(node)]
834 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
843 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
835
844
836 def chainlen(self, rev):
845 def chainlen(self, rev):
837 return self._chaininfo(rev)[0]
846 return self._chaininfo(rev)[0]
838
847
839 def _chaininfo(self, rev):
848 def _chaininfo(self, rev):
840 chaininfocache = self._chaininfocache
849 chaininfocache = self._chaininfocache
841 if rev in chaininfocache:
850 if rev in chaininfocache:
842 return chaininfocache[rev]
851 return chaininfocache[rev]
843 index = self.index
852 index = self.index
844 generaldelta = self._generaldelta
853 generaldelta = self._generaldelta
845 iterrev = rev
854 iterrev = rev
846 e = index[iterrev]
855 e = index[iterrev]
847 clen = 0
856 clen = 0
848 compresseddeltalen = 0
857 compresseddeltalen = 0
849 while iterrev != e[3]:
858 while iterrev != e[3]:
850 clen += 1
859 clen += 1
851 compresseddeltalen += e[1]
860 compresseddeltalen += e[1]
852 if generaldelta:
861 if generaldelta:
853 iterrev = e[3]
862 iterrev = e[3]
854 else:
863 else:
855 iterrev -= 1
864 iterrev -= 1
856 if iterrev in chaininfocache:
865 if iterrev in chaininfocache:
857 t = chaininfocache[iterrev]
866 t = chaininfocache[iterrev]
858 clen += t[0]
867 clen += t[0]
859 compresseddeltalen += t[1]
868 compresseddeltalen += t[1]
860 break
869 break
861 e = index[iterrev]
870 e = index[iterrev]
862 else:
871 else:
863 # Add text length of base since decompressing that also takes
872 # Add text length of base since decompressing that also takes
864 # work. For cache hits the length is already included.
873 # work. For cache hits the length is already included.
865 compresseddeltalen += e[1]
874 compresseddeltalen += e[1]
866 r = (clen, compresseddeltalen)
875 r = (clen, compresseddeltalen)
867 chaininfocache[rev] = r
876 chaininfocache[rev] = r
868 return r
877 return r
869
878
870 def _deltachain(self, rev, stoprev=None):
879 def _deltachain(self, rev, stoprev=None):
871 """Obtain the delta chain for a revision.
880 """Obtain the delta chain for a revision.
872
881
873 ``stoprev`` specifies a revision to stop at. If not specified, we
882 ``stoprev`` specifies a revision to stop at. If not specified, we
874 stop at the base of the chain.
883 stop at the base of the chain.
875
884
876 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
885 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
877 revs in ascending order and ``stopped`` is a bool indicating whether
886 revs in ascending order and ``stopped`` is a bool indicating whether
878 ``stoprev`` was hit.
887 ``stoprev`` was hit.
879 """
888 """
880 # Try C implementation.
889 # Try C implementation.
881 try:
890 try:
882 return self.index.deltachain(rev, stoprev, self._generaldelta)
891 return self.index.deltachain(rev, stoprev, self._generaldelta)
883 except AttributeError:
892 except AttributeError:
884 pass
893 pass
885
894
886 chain = []
895 chain = []
887
896
888 # Alias to prevent attribute lookup in tight loop.
897 # Alias to prevent attribute lookup in tight loop.
889 index = self.index
898 index = self.index
890 generaldelta = self._generaldelta
899 generaldelta = self._generaldelta
891
900
892 iterrev = rev
901 iterrev = rev
893 e = index[iterrev]
902 e = index[iterrev]
894 while iterrev != e[3] and iterrev != stoprev:
903 while iterrev != e[3] and iterrev != stoprev:
895 chain.append(iterrev)
904 chain.append(iterrev)
896 if generaldelta:
905 if generaldelta:
897 iterrev = e[3]
906 iterrev = e[3]
898 else:
907 else:
899 iterrev -= 1
908 iterrev -= 1
900 e = index[iterrev]
909 e = index[iterrev]
901
910
902 if iterrev == stoprev:
911 if iterrev == stoprev:
903 stopped = True
912 stopped = True
904 else:
913 else:
905 chain.append(iterrev)
914 chain.append(iterrev)
906 stopped = False
915 stopped = False
907
916
908 chain.reverse()
917 chain.reverse()
909 return chain, stopped
918 return chain, stopped
910
919
911 def ancestors(self, revs, stoprev=0, inclusive=False):
920 def ancestors(self, revs, stoprev=0, inclusive=False):
912 """Generate the ancestors of 'revs' in reverse topological order.
921 """Generate the ancestors of 'revs' in reverse topological order.
913 Does not generate revs lower than stoprev.
922 Does not generate revs lower than stoprev.
914
923
915 See the documentation for ancestor.lazyancestors for more details."""
924 See the documentation for ancestor.lazyancestors for more details."""
916
925
917 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
926 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
918 inclusive=inclusive)
927 inclusive=inclusive)
919
928
920 def descendants(self, revs):
929 def descendants(self, revs):
921 """Generate the descendants of 'revs' in revision order.
930 """Generate the descendants of 'revs' in revision order.
922
931
923 Yield a sequence of revision numbers starting with a child of
932 Yield a sequence of revision numbers starting with a child of
924 some rev in revs, i.e., each revision is *not* considered a
933 some rev in revs, i.e., each revision is *not* considered a
925 descendant of itself. Results are ordered by revision number (a
934 descendant of itself. Results are ordered by revision number (a
926 topological sort)."""
935 topological sort)."""
927 first = min(revs)
936 first = min(revs)
928 if first == nullrev:
937 if first == nullrev:
929 for i in self:
938 for i in self:
930 yield i
939 yield i
931 return
940 return
932
941
933 seen = set(revs)
942 seen = set(revs)
934 for i in self.revs(start=first + 1):
943 for i in self.revs(start=first + 1):
935 for x in self.parentrevs(i):
944 for x in self.parentrevs(i):
936 if x != nullrev and x in seen:
945 if x != nullrev and x in seen:
937 seen.add(i)
946 seen.add(i)
938 yield i
947 yield i
939 break
948 break
940
949
941 def findcommonmissing(self, common=None, heads=None):
950 def findcommonmissing(self, common=None, heads=None):
942 """Return a tuple of the ancestors of common and the ancestors of heads
951 """Return a tuple of the ancestors of common and the ancestors of heads
943 that are not ancestors of common. In revset terminology, we return the
952 that are not ancestors of common. In revset terminology, we return the
944 tuple:
953 tuple:
945
954
946 ::common, (::heads) - (::common)
955 ::common, (::heads) - (::common)
947
956
948 The list is sorted by revision number, meaning it is
957 The list is sorted by revision number, meaning it is
949 topologically sorted.
958 topologically sorted.
950
959
951 'heads' and 'common' are both lists of node IDs. If heads is
960 'heads' and 'common' are both lists of node IDs. If heads is
952 not supplied, uses all of the revlog's heads. If common is not
961 not supplied, uses all of the revlog's heads. If common is not
953 supplied, uses nullid."""
962 supplied, uses nullid."""
954 if common is None:
963 if common is None:
955 common = [nullid]
964 common = [nullid]
956 if heads is None:
965 if heads is None:
957 heads = self.heads()
966 heads = self.heads()
958
967
959 common = [self.rev(n) for n in common]
968 common = [self.rev(n) for n in common]
960 heads = [self.rev(n) for n in heads]
969 heads = [self.rev(n) for n in heads]
961
970
962 # we want the ancestors, but inclusive
971 # we want the ancestors, but inclusive
963 class lazyset(object):
972 class lazyset(object):
964 def __init__(self, lazyvalues):
973 def __init__(self, lazyvalues):
965 self.addedvalues = set()
974 self.addedvalues = set()
966 self.lazyvalues = lazyvalues
975 self.lazyvalues = lazyvalues
967
976
968 def __contains__(self, value):
977 def __contains__(self, value):
969 return value in self.addedvalues or value in self.lazyvalues
978 return value in self.addedvalues or value in self.lazyvalues
970
979
971 def __iter__(self):
980 def __iter__(self):
972 added = self.addedvalues
981 added = self.addedvalues
973 for r in added:
982 for r in added:
974 yield r
983 yield r
975 for r in self.lazyvalues:
984 for r in self.lazyvalues:
976 if not r in added:
985 if not r in added:
977 yield r
986 yield r
978
987
979 def add(self, value):
988 def add(self, value):
980 self.addedvalues.add(value)
989 self.addedvalues.add(value)
981
990
982 def update(self, values):
991 def update(self, values):
983 self.addedvalues.update(values)
992 self.addedvalues.update(values)
984
993
985 has = lazyset(self.ancestors(common))
994 has = lazyset(self.ancestors(common))
986 has.add(nullrev)
995 has.add(nullrev)
987 has.update(common)
996 has.update(common)
988
997
989 # take all ancestors from heads that aren't in has
998 # take all ancestors from heads that aren't in has
990 missing = set()
999 missing = set()
991 visit = collections.deque(r for r in heads if r not in has)
1000 visit = collections.deque(r for r in heads if r not in has)
992 while visit:
1001 while visit:
993 r = visit.popleft()
1002 r = visit.popleft()
994 if r in missing:
1003 if r in missing:
995 continue
1004 continue
996 else:
1005 else:
997 missing.add(r)
1006 missing.add(r)
998 for p in self.parentrevs(r):
1007 for p in self.parentrevs(r):
999 if p not in has:
1008 if p not in has:
1000 visit.append(p)
1009 visit.append(p)
1001 missing = list(missing)
1010 missing = list(missing)
1002 missing.sort()
1011 missing.sort()
1003 return has, [self.node(miss) for miss in missing]
1012 return has, [self.node(miss) for miss in missing]
1004
1013
1005 def incrementalmissingrevs(self, common=None):
1014 def incrementalmissingrevs(self, common=None):
1006 """Return an object that can be used to incrementally compute the
1015 """Return an object that can be used to incrementally compute the
1007 revision numbers of the ancestors of arbitrary sets that are not
1016 revision numbers of the ancestors of arbitrary sets that are not
1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1017 ancestors of common. This is an ancestor.incrementalmissingancestors
1009 object.
1018 object.
1010
1019
1011 'common' is a list of revision numbers. If common is not supplied, uses
1020 'common' is a list of revision numbers. If common is not supplied, uses
1012 nullrev.
1021 nullrev.
1013 """
1022 """
1014 if common is None:
1023 if common is None:
1015 common = [nullrev]
1024 common = [nullrev]
1016
1025
1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1026 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1018
1027
1019 def findmissingrevs(self, common=None, heads=None):
1028 def findmissingrevs(self, common=None, heads=None):
1020 """Return the revision numbers of the ancestors of heads that
1029 """Return the revision numbers of the ancestors of heads that
1021 are not ancestors of common.
1030 are not ancestors of common.
1022
1031
1023 More specifically, return a list of revision numbers corresponding to
1032 More specifically, return a list of revision numbers corresponding to
1024 nodes N such that every N satisfies the following constraints:
1033 nodes N such that every N satisfies the following constraints:
1025
1034
1026 1. N is an ancestor of some node in 'heads'
1035 1. N is an ancestor of some node in 'heads'
1027 2. N is not an ancestor of any node in 'common'
1036 2. N is not an ancestor of any node in 'common'
1028
1037
1029 The list is sorted by revision number, meaning it is
1038 The list is sorted by revision number, meaning it is
1030 topologically sorted.
1039 topologically sorted.
1031
1040
1032 'heads' and 'common' are both lists of revision numbers. If heads is
1041 'heads' and 'common' are both lists of revision numbers. If heads is
1033 not supplied, uses all of the revlog's heads. If common is not
1042 not supplied, uses all of the revlog's heads. If common is not
1034 supplied, uses nullid."""
1043 supplied, uses nullid."""
1035 if common is None:
1044 if common is None:
1036 common = [nullrev]
1045 common = [nullrev]
1037 if heads is None:
1046 if heads is None:
1038 heads = self.headrevs()
1047 heads = self.headrevs()
1039
1048
1040 inc = self.incrementalmissingrevs(common=common)
1049 inc = self.incrementalmissingrevs(common=common)
1041 return inc.missingancestors(heads)
1050 return inc.missingancestors(heads)
1042
1051
1043 def findmissing(self, common=None, heads=None):
1052 def findmissing(self, common=None, heads=None):
1044 """Return the ancestors of heads that are not ancestors of common.
1053 """Return the ancestors of heads that are not ancestors of common.
1045
1054
1046 More specifically, return a list of nodes N such that every N
1055 More specifically, return a list of nodes N such that every N
1047 satisfies the following constraints:
1056 satisfies the following constraints:
1048
1057
1049 1. N is an ancestor of some node in 'heads'
1058 1. N is an ancestor of some node in 'heads'
1050 2. N is not an ancestor of any node in 'common'
1059 2. N is not an ancestor of any node in 'common'
1051
1060
1052 The list is sorted by revision number, meaning it is
1061 The list is sorted by revision number, meaning it is
1053 topologically sorted.
1062 topologically sorted.
1054
1063
1055 'heads' and 'common' are both lists of node IDs. If heads is
1064 'heads' and 'common' are both lists of node IDs. If heads is
1056 not supplied, uses all of the revlog's heads. If common is not
1065 not supplied, uses all of the revlog's heads. If common is not
1057 supplied, uses nullid."""
1066 supplied, uses nullid."""
1058 if common is None:
1067 if common is None:
1059 common = [nullid]
1068 common = [nullid]
1060 if heads is None:
1069 if heads is None:
1061 heads = self.heads()
1070 heads = self.heads()
1062
1071
1063 common = [self.rev(n) for n in common]
1072 common = [self.rev(n) for n in common]
1064 heads = [self.rev(n) for n in heads]
1073 heads = [self.rev(n) for n in heads]
1065
1074
1066 inc = self.incrementalmissingrevs(common=common)
1075 inc = self.incrementalmissingrevs(common=common)
1067 return [self.node(r) for r in inc.missingancestors(heads)]
1076 return [self.node(r) for r in inc.missingancestors(heads)]
1068
1077
1069 def nodesbetween(self, roots=None, heads=None):
1078 def nodesbetween(self, roots=None, heads=None):
1070 """Return a topological path from 'roots' to 'heads'.
1079 """Return a topological path from 'roots' to 'heads'.
1071
1080
1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1081 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1073 topologically sorted list of all nodes N that satisfy both of
1082 topologically sorted list of all nodes N that satisfy both of
1074 these constraints:
1083 these constraints:
1075
1084
1076 1. N is a descendant of some node in 'roots'
1085 1. N is a descendant of some node in 'roots'
1077 2. N is an ancestor of some node in 'heads'
1086 2. N is an ancestor of some node in 'heads'
1078
1087
1079 Every node is considered to be both a descendant and an ancestor
1088 Every node is considered to be both a descendant and an ancestor
1080 of itself, so every reachable node in 'roots' and 'heads' will be
1089 of itself, so every reachable node in 'roots' and 'heads' will be
1081 included in 'nodes'.
1090 included in 'nodes'.
1082
1091
1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1092 'outroots' is the list of reachable nodes in 'roots', i.e., the
1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1093 subset of 'roots' that is returned in 'nodes'. Likewise,
1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1094 'outheads' is the subset of 'heads' that is also in 'nodes'.
1086
1095
1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1096 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1088 unspecified, uses nullid as the only root. If 'heads' is
1097 unspecified, uses nullid as the only root. If 'heads' is
1089 unspecified, uses list of all of the revlog's heads."""
1098 unspecified, uses list of all of the revlog's heads."""
1090 nonodes = ([], [], [])
1099 nonodes = ([], [], [])
1091 if roots is not None:
1100 if roots is not None:
1092 roots = list(roots)
1101 roots = list(roots)
1093 if not roots:
1102 if not roots:
1094 return nonodes
1103 return nonodes
1095 lowestrev = min([self.rev(n) for n in roots])
1104 lowestrev = min([self.rev(n) for n in roots])
1096 else:
1105 else:
1097 roots = [nullid] # Everybody's a descendant of nullid
1106 roots = [nullid] # Everybody's a descendant of nullid
1098 lowestrev = nullrev
1107 lowestrev = nullrev
1099 if (lowestrev == nullrev) and (heads is None):
1108 if (lowestrev == nullrev) and (heads is None):
1100 # We want _all_ the nodes!
1109 # We want _all_ the nodes!
1101 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1110 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1102 if heads is None:
1111 if heads is None:
1103 # All nodes are ancestors, so the latest ancestor is the last
1112 # All nodes are ancestors, so the latest ancestor is the last
1104 # node.
1113 # node.
1105 highestrev = len(self) - 1
1114 highestrev = len(self) - 1
1106 # Set ancestors to None to signal that every node is an ancestor.
1115 # Set ancestors to None to signal that every node is an ancestor.
1107 ancestors = None
1116 ancestors = None
1108 # Set heads to an empty dictionary for later discovery of heads
1117 # Set heads to an empty dictionary for later discovery of heads
1109 heads = {}
1118 heads = {}
1110 else:
1119 else:
1111 heads = list(heads)
1120 heads = list(heads)
1112 if not heads:
1121 if not heads:
1113 return nonodes
1122 return nonodes
1114 ancestors = set()
1123 ancestors = set()
1115 # Turn heads into a dictionary so we can remove 'fake' heads.
1124 # Turn heads into a dictionary so we can remove 'fake' heads.
1116 # Also, later we will be using it to filter out the heads we can't
1125 # Also, later we will be using it to filter out the heads we can't
1117 # find from roots.
1126 # find from roots.
1118 heads = dict.fromkeys(heads, False)
1127 heads = dict.fromkeys(heads, False)
1119 # Start at the top and keep marking parents until we're done.
1128 # Start at the top and keep marking parents until we're done.
1120 nodestotag = set(heads)
1129 nodestotag = set(heads)
1121 # Remember where the top was so we can use it as a limit later.
1130 # Remember where the top was so we can use it as a limit later.
1122 highestrev = max([self.rev(n) for n in nodestotag])
1131 highestrev = max([self.rev(n) for n in nodestotag])
1123 while nodestotag:
1132 while nodestotag:
1124 # grab a node to tag
1133 # grab a node to tag
1125 n = nodestotag.pop()
1134 n = nodestotag.pop()
1126 # Never tag nullid
1135 # Never tag nullid
1127 if n == nullid:
1136 if n == nullid:
1128 continue
1137 continue
1129 # A node's revision number represents its place in a
1138 # A node's revision number represents its place in a
1130 # topologically sorted list of nodes.
1139 # topologically sorted list of nodes.
1131 r = self.rev(n)
1140 r = self.rev(n)
1132 if r >= lowestrev:
1141 if r >= lowestrev:
1133 if n not in ancestors:
1142 if n not in ancestors:
1134 # If we are possibly a descendant of one of the roots
1143 # If we are possibly a descendant of one of the roots
1135 # and we haven't already been marked as an ancestor
1144 # and we haven't already been marked as an ancestor
1136 ancestors.add(n) # Mark as ancestor
1145 ancestors.add(n) # Mark as ancestor
1137 # Add non-nullid parents to list of nodes to tag.
1146 # Add non-nullid parents to list of nodes to tag.
1138 nodestotag.update([p for p in self.parents(n) if
1147 nodestotag.update([p for p in self.parents(n) if
1139 p != nullid])
1148 p != nullid])
1140 elif n in heads: # We've seen it before, is it a fake head?
1149 elif n in heads: # We've seen it before, is it a fake head?
1141 # So it is, real heads should not be the ancestors of
1150 # So it is, real heads should not be the ancestors of
1142 # any other heads.
1151 # any other heads.
1143 heads.pop(n)
1152 heads.pop(n)
1144 if not ancestors:
1153 if not ancestors:
1145 return nonodes
1154 return nonodes
1146 # Now that we have our set of ancestors, we want to remove any
1155 # Now that we have our set of ancestors, we want to remove any
1147 # roots that are not ancestors.
1156 # roots that are not ancestors.
1148
1157
1149 # If one of the roots was nullid, everything is included anyway.
1158 # If one of the roots was nullid, everything is included anyway.
1150 if lowestrev > nullrev:
1159 if lowestrev > nullrev:
1151 # But, since we weren't, let's recompute the lowest rev to not
1160 # But, since we weren't, let's recompute the lowest rev to not
1152 # include roots that aren't ancestors.
1161 # include roots that aren't ancestors.
1153
1162
1154 # Filter out roots that aren't ancestors of heads
1163 # Filter out roots that aren't ancestors of heads
1155 roots = [root for root in roots if root in ancestors]
1164 roots = [root for root in roots if root in ancestors]
1156 # Recompute the lowest revision
1165 # Recompute the lowest revision
1157 if roots:
1166 if roots:
1158 lowestrev = min([self.rev(root) for root in roots])
1167 lowestrev = min([self.rev(root) for root in roots])
1159 else:
1168 else:
1160 # No more roots? Return empty list
1169 # No more roots? Return empty list
1161 return nonodes
1170 return nonodes
1162 else:
1171 else:
1163 # We are descending from nullid, and don't need to care about
1172 # We are descending from nullid, and don't need to care about
1164 # any other roots.
1173 # any other roots.
1165 lowestrev = nullrev
1174 lowestrev = nullrev
1166 roots = [nullid]
1175 roots = [nullid]
1167 # Transform our roots list into a set.
1176 # Transform our roots list into a set.
1168 descendants = set(roots)
1177 descendants = set(roots)
1169 # Also, keep the original roots so we can filter out roots that aren't
1178 # Also, keep the original roots so we can filter out roots that aren't
1170 # 'real' roots (i.e. are descended from other roots).
1179 # 'real' roots (i.e. are descended from other roots).
1171 roots = descendants.copy()
1180 roots = descendants.copy()
1172 # Our topologically sorted list of output nodes.
1181 # Our topologically sorted list of output nodes.
1173 orderedout = []
1182 orderedout = []
1174 # Don't start at nullid since we don't want nullid in our output list,
1183 # Don't start at nullid since we don't want nullid in our output list,
1175 # and if nullid shows up in descendants, empty parents will look like
1184 # and if nullid shows up in descendants, empty parents will look like
1176 # they're descendants.
1185 # they're descendants.
1177 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1186 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1178 n = self.node(r)
1187 n = self.node(r)
1179 isdescendant = False
1188 isdescendant = False
1180 if lowestrev == nullrev: # Everybody is a descendant of nullid
1189 if lowestrev == nullrev: # Everybody is a descendant of nullid
1181 isdescendant = True
1190 isdescendant = True
1182 elif n in descendants:
1191 elif n in descendants:
1183 # n is already a descendant
1192 # n is already a descendant
1184 isdescendant = True
1193 isdescendant = True
1185 # This check only needs to be done here because all the roots
1194 # This check only needs to be done here because all the roots
1186 # will start being marked is descendants before the loop.
1195 # will start being marked is descendants before the loop.
1187 if n in roots:
1196 if n in roots:
1188 # If n was a root, check if it's a 'real' root.
1197 # If n was a root, check if it's a 'real' root.
1189 p = tuple(self.parents(n))
1198 p = tuple(self.parents(n))
1190 # If any of its parents are descendants, it's not a root.
1199 # If any of its parents are descendants, it's not a root.
1191 if (p[0] in descendants) or (p[1] in descendants):
1200 if (p[0] in descendants) or (p[1] in descendants):
1192 roots.remove(n)
1201 roots.remove(n)
1193 else:
1202 else:
1194 p = tuple(self.parents(n))
1203 p = tuple(self.parents(n))
1195 # A node is a descendant if either of its parents are
1204 # A node is a descendant if either of its parents are
1196 # descendants. (We seeded the dependents list with the roots
1205 # descendants. (We seeded the dependents list with the roots
1197 # up there, remember?)
1206 # up there, remember?)
1198 if (p[0] in descendants) or (p[1] in descendants):
1207 if (p[0] in descendants) or (p[1] in descendants):
1199 descendants.add(n)
1208 descendants.add(n)
1200 isdescendant = True
1209 isdescendant = True
1201 if isdescendant and ((ancestors is None) or (n in ancestors)):
1210 if isdescendant and ((ancestors is None) or (n in ancestors)):
1202 # Only include nodes that are both descendants and ancestors.
1211 # Only include nodes that are both descendants and ancestors.
1203 orderedout.append(n)
1212 orderedout.append(n)
1204 if (ancestors is not None) and (n in heads):
1213 if (ancestors is not None) and (n in heads):
1205 # We're trying to figure out which heads are reachable
1214 # We're trying to figure out which heads are reachable
1206 # from roots.
1215 # from roots.
1207 # Mark this head as having been reached
1216 # Mark this head as having been reached
1208 heads[n] = True
1217 heads[n] = True
1209 elif ancestors is None:
1218 elif ancestors is None:
1210 # Otherwise, we're trying to discover the heads.
1219 # Otherwise, we're trying to discover the heads.
1211 # Assume this is a head because if it isn't, the next step
1220 # Assume this is a head because if it isn't, the next step
1212 # will eventually remove it.
1221 # will eventually remove it.
1213 heads[n] = True
1222 heads[n] = True
1214 # But, obviously its parents aren't.
1223 # But, obviously its parents aren't.
1215 for p in self.parents(n):
1224 for p in self.parents(n):
1216 heads.pop(p, None)
1225 heads.pop(p, None)
1217 heads = [head for head, flag in heads.iteritems() if flag]
1226 heads = [head for head, flag in heads.iteritems() if flag]
1218 roots = list(roots)
1227 roots = list(roots)
1219 assert orderedout
1228 assert orderedout
1220 assert roots
1229 assert roots
1221 assert heads
1230 assert heads
1222 return (orderedout, roots, heads)
1231 return (orderedout, roots, heads)
1223
1232
1224 def headrevs(self):
1233 def headrevs(self):
1225 try:
1234 try:
1226 return self.index.headrevs()
1235 return self.index.headrevs()
1227 except AttributeError:
1236 except AttributeError:
1228 return self._headrevs()
1237 return self._headrevs()
1229
1238
1230 def computephases(self, roots):
1239 def computephases(self, roots):
1231 return self.index.computephasesmapsets(roots)
1240 return self.index.computephasesmapsets(roots)
1232
1241
1233 def _headrevs(self):
1242 def _headrevs(self):
1234 count = len(self)
1243 count = len(self)
1235 if not count:
1244 if not count:
1236 return [nullrev]
1245 return [nullrev]
1237 # we won't iter over filtered rev so nobody is a head at start
1246 # we won't iter over filtered rev so nobody is a head at start
1238 ishead = [0] * (count + 1)
1247 ishead = [0] * (count + 1)
1239 index = self.index
1248 index = self.index
1240 for r in self:
1249 for r in self:
1241 ishead[r] = 1 # I may be an head
1250 ishead[r] = 1 # I may be an head
1242 e = index[r]
1251 e = index[r]
1243 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1244 return [r for r, val in enumerate(ishead) if val]
1253 return [r for r, val in enumerate(ishead) if val]
1245
1254
1246 def heads(self, start=None, stop=None):
1255 def heads(self, start=None, stop=None):
1247 """return the list of all nodes that have no children
1256 """return the list of all nodes that have no children
1248
1257
1249 if start is specified, only heads that are descendants of
1258 if start is specified, only heads that are descendants of
1250 start will be returned
1259 start will be returned
1251 if stop is specified, it will consider all the revs from stop
1260 if stop is specified, it will consider all the revs from stop
1252 as if they had no children
1261 as if they had no children
1253 """
1262 """
1254 if start is None and stop is None:
1263 if start is None and stop is None:
1255 if not len(self):
1264 if not len(self):
1256 return [nullid]
1265 return [nullid]
1257 return [self.node(r) for r in self.headrevs()]
1266 return [self.node(r) for r in self.headrevs()]
1258
1267
1259 if start is None:
1268 if start is None:
1260 start = nullid
1269 start = nullid
1261 if stop is None:
1270 if stop is None:
1262 stop = []
1271 stop = []
1263 stoprevs = set([self.rev(n) for n in stop])
1272 stoprevs = set([self.rev(n) for n in stop])
1264 startrev = self.rev(start)
1273 startrev = self.rev(start)
1265 reachable = {startrev}
1274 reachable = {startrev}
1266 heads = {startrev}
1275 heads = {startrev}
1267
1276
1268 parentrevs = self.parentrevs
1277 parentrevs = self.parentrevs
1269 for r in self.revs(start=startrev + 1):
1278 for r in self.revs(start=startrev + 1):
1270 for p in parentrevs(r):
1279 for p in parentrevs(r):
1271 if p in reachable:
1280 if p in reachable:
1272 if r not in stoprevs:
1281 if r not in stoprevs:
1273 reachable.add(r)
1282 reachable.add(r)
1274 heads.add(r)
1283 heads.add(r)
1275 if p in heads and p not in stoprevs:
1284 if p in heads and p not in stoprevs:
1276 heads.remove(p)
1285 heads.remove(p)
1277
1286
1278 return [self.node(r) for r in heads]
1287 return [self.node(r) for r in heads]
1279
1288
1280 def children(self, node):
1289 def children(self, node):
1281 """find the children of a given node"""
1290 """find the children of a given node"""
1282 c = []
1291 c = []
1283 p = self.rev(node)
1292 p = self.rev(node)
1284 for r in self.revs(start=p + 1):
1293 for r in self.revs(start=p + 1):
1285 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1294 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1286 if prevs:
1295 if prevs:
1287 for pr in prevs:
1296 for pr in prevs:
1288 if pr == p:
1297 if pr == p:
1289 c.append(self.node(r))
1298 c.append(self.node(r))
1290 elif p == nullrev:
1299 elif p == nullrev:
1291 c.append(self.node(r))
1300 c.append(self.node(r))
1292 return c
1301 return c
1293
1302
1294 def descendant(self, start, end):
1303 def descendant(self, start, end):
1295 if start == nullrev:
1304 if start == nullrev:
1296 return True
1305 return True
1297 for i in self.descendants([start]):
1306 for i in self.descendants([start]):
1298 if i == end:
1307 if i == end:
1299 return True
1308 return True
1300 elif i > end:
1309 elif i > end:
1301 break
1310 break
1302 return False
1311 return False
1303
1312
1304 def commonancestorsheads(self, a, b):
1313 def commonancestorsheads(self, a, b):
1305 """calculate all the heads of the common ancestors of nodes a and b"""
1314 """calculate all the heads of the common ancestors of nodes a and b"""
1306 a, b = self.rev(a), self.rev(b)
1315 a, b = self.rev(a), self.rev(b)
1307 try:
1316 try:
1308 ancs = self.index.commonancestorsheads(a, b)
1317 ancs = self.index.commonancestorsheads(a, b)
1309 except (AttributeError, OverflowError): # C implementation failed
1318 except (AttributeError, OverflowError): # C implementation failed
1310 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1319 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1311 return pycompat.maplist(self.node, ancs)
1320 return pycompat.maplist(self.node, ancs)
1312
1321
1313 def isancestor(self, a, b):
1322 def isancestor(self, a, b):
1314 """return True if node a is an ancestor of node b
1323 """return True if node a is an ancestor of node b
1315
1324
1316 The implementation of this is trivial but the use of
1325 The implementation of this is trivial but the use of
1317 commonancestorsheads is not."""
1326 commonancestorsheads is not."""
1318 return a in self.commonancestorsheads(a, b)
1327 return a in self.commonancestorsheads(a, b)
1319
1328
1320 def ancestor(self, a, b):
1329 def ancestor(self, a, b):
1321 """calculate the "best" common ancestor of nodes a and b"""
1330 """calculate the "best" common ancestor of nodes a and b"""
1322
1331
1323 a, b = self.rev(a), self.rev(b)
1332 a, b = self.rev(a), self.rev(b)
1324 try:
1333 try:
1325 ancs = self.index.ancestors(a, b)
1334 ancs = self.index.ancestors(a, b)
1326 except (AttributeError, OverflowError):
1335 except (AttributeError, OverflowError):
1327 ancs = ancestor.ancestors(self.parentrevs, a, b)
1336 ancs = ancestor.ancestors(self.parentrevs, a, b)
1328 if ancs:
1337 if ancs:
1329 # choose a consistent winner when there's a tie
1338 # choose a consistent winner when there's a tie
1330 return min(map(self.node, ancs))
1339 return min(map(self.node, ancs))
1331 return nullid
1340 return nullid
1332
1341
1333 def _match(self, id):
1342 def _match(self, id):
1334 if isinstance(id, int):
1343 if isinstance(id, int):
1335 # rev
1344 # rev
1336 return self.node(id)
1345 return self.node(id)
1337 if len(id) == 20:
1346 if len(id) == 20:
1338 # possibly a binary node
1347 # possibly a binary node
1339 # odds of a binary node being all hex in ASCII are 1 in 10**25
1348 # odds of a binary node being all hex in ASCII are 1 in 10**25
1340 try:
1349 try:
1341 node = id
1350 node = id
1342 self.rev(node) # quick search the index
1351 self.rev(node) # quick search the index
1343 return node
1352 return node
1344 except LookupError:
1353 except LookupError:
1345 pass # may be partial hex id
1354 pass # may be partial hex id
1346 try:
1355 try:
1347 # str(rev)
1356 # str(rev)
1348 rev = int(id)
1357 rev = int(id)
1349 if str(rev) != id:
1358 if str(rev) != id:
1350 raise ValueError
1359 raise ValueError
1351 if rev < 0:
1360 if rev < 0:
1352 rev = len(self) + rev
1361 rev = len(self) + rev
1353 if rev < 0 or rev >= len(self):
1362 if rev < 0 or rev >= len(self):
1354 raise ValueError
1363 raise ValueError
1355 return self.node(rev)
1364 return self.node(rev)
1356 except (ValueError, OverflowError):
1365 except (ValueError, OverflowError):
1357 pass
1366 pass
1358 if len(id) == 40:
1367 if len(id) == 40:
1359 try:
1368 try:
1360 # a full hex nodeid?
1369 # a full hex nodeid?
1361 node = bin(id)
1370 node = bin(id)
1362 self.rev(node)
1371 self.rev(node)
1363 return node
1372 return node
1364 except (TypeError, LookupError):
1373 except (TypeError, LookupError):
1365 pass
1374 pass
1366
1375
1367 def _partialmatch(self, id):
1376 def _partialmatch(self, id):
1368 maybewdir = wdirhex.startswith(id)
1377 maybewdir = wdirhex.startswith(id)
1369 try:
1378 try:
1370 partial = self.index.partialmatch(id)
1379 partial = self.index.partialmatch(id)
1371 if partial and self.hasnode(partial):
1380 if partial and self.hasnode(partial):
1372 if maybewdir:
1381 if maybewdir:
1373 # single 'ff...' match in radix tree, ambiguous with wdir
1382 # single 'ff...' match in radix tree, ambiguous with wdir
1374 raise RevlogError
1383 raise RevlogError
1375 return partial
1384 return partial
1376 if maybewdir:
1385 if maybewdir:
1377 # no 'ff...' match in radix tree, wdir identified
1386 # no 'ff...' match in radix tree, wdir identified
1378 raise error.WdirUnsupported
1387 raise error.WdirUnsupported
1379 return None
1388 return None
1380 except RevlogError:
1389 except RevlogError:
1381 # parsers.c radix tree lookup gave multiple matches
1390 # parsers.c radix tree lookup gave multiple matches
1382 # fast path: for unfiltered changelog, radix tree is accurate
1391 # fast path: for unfiltered changelog, radix tree is accurate
1383 if not getattr(self, 'filteredrevs', None):
1392 if not getattr(self, 'filteredrevs', None):
1384 raise LookupError(id, self.indexfile,
1393 raise LookupError(id, self.indexfile,
1385 _('ambiguous identifier'))
1394 _('ambiguous identifier'))
1386 # fall through to slow path that filters hidden revisions
1395 # fall through to slow path that filters hidden revisions
1387 except (AttributeError, ValueError):
1396 except (AttributeError, ValueError):
1388 # we are pure python, or key was too short to search radix tree
1397 # we are pure python, or key was too short to search radix tree
1389 pass
1398 pass
1390
1399
1391 if id in self._pcache:
1400 if id in self._pcache:
1392 return self._pcache[id]
1401 return self._pcache[id]
1393
1402
1394 if len(id) < 40:
1403 if len(id) < 40:
1395 try:
1404 try:
1396 # hex(node)[:...]
1405 # hex(node)[:...]
1397 l = len(id) // 2 # grab an even number of digits
1406 l = len(id) // 2 # grab an even number of digits
1398 prefix = bin(id[:l * 2])
1407 prefix = bin(id[:l * 2])
1399 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1408 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1400 nl = [n for n in nl if hex(n).startswith(id) and
1409 nl = [n for n in nl if hex(n).startswith(id) and
1401 self.hasnode(n)]
1410 self.hasnode(n)]
1402 if len(nl) > 0:
1411 if len(nl) > 0:
1403 if len(nl) == 1 and not maybewdir:
1412 if len(nl) == 1 and not maybewdir:
1404 self._pcache[id] = nl[0]
1413 self._pcache[id] = nl[0]
1405 return nl[0]
1414 return nl[0]
1406 raise LookupError(id, self.indexfile,
1415 raise LookupError(id, self.indexfile,
1407 _('ambiguous identifier'))
1416 _('ambiguous identifier'))
1408 if maybewdir:
1417 if maybewdir:
1409 raise error.WdirUnsupported
1418 raise error.WdirUnsupported
1410 return None
1419 return None
1411 except (TypeError, binascii.Error):
1420 except (TypeError, binascii.Error):
1412 pass
1421 pass
1413
1422
1414 def lookup(self, id):
1423 def lookup(self, id):
1415 """locate a node based on:
1424 """locate a node based on:
1416 - revision number or str(revision number)
1425 - revision number or str(revision number)
1417 - nodeid or subset of hex nodeid
1426 - nodeid or subset of hex nodeid
1418 """
1427 """
1419 n = self._match(id)
1428 n = self._match(id)
1420 if n is not None:
1429 if n is not None:
1421 return n
1430 return n
1422 n = self._partialmatch(id)
1431 n = self._partialmatch(id)
1423 if n:
1432 if n:
1424 return n
1433 return n
1425
1434
1426 raise LookupError(id, self.indexfile, _('no match found'))
1435 raise LookupError(id, self.indexfile, _('no match found'))
1427
1436
1428 def shortest(self, hexnode, minlength=1):
1437 def shortest(self, hexnode, minlength=1):
1429 """Find the shortest unambiguous prefix that matches hexnode."""
1438 """Find the shortest unambiguous prefix that matches hexnode."""
1430 def isvalid(test):
1439 def isvalid(test):
1431 try:
1440 try:
1432 if self._partialmatch(test) is None:
1441 if self._partialmatch(test) is None:
1433 return False
1442 return False
1434
1443
1435 try:
1444 try:
1436 i = int(test)
1445 i = int(test)
1437 # if we are a pure int, then starting with zero will not be
1446 # if we are a pure int, then starting with zero will not be
1438 # confused as a rev; or, obviously, if the int is larger
1447 # confused as a rev; or, obviously, if the int is larger
1439 # than the value of the tip rev
1448 # than the value of the tip rev
1440 if test[0] == '0' or i > len(self):
1449 if test[0] == '0' or i > len(self):
1441 return True
1450 return True
1442 return False
1451 return False
1443 except ValueError:
1452 except ValueError:
1444 return True
1453 return True
1445 except error.RevlogError:
1454 except error.RevlogError:
1446 return False
1455 return False
1447 except error.WdirUnsupported:
1456 except error.WdirUnsupported:
1448 # single 'ff...' match
1457 # single 'ff...' match
1449 return True
1458 return True
1450
1459
1451 shortest = hexnode
1460 shortest = hexnode
1452 startlength = max(6, minlength)
1461 startlength = max(6, minlength)
1453 length = startlength
1462 length = startlength
1454 while True:
1463 while True:
1455 test = hexnode[:length]
1464 test = hexnode[:length]
1456 if isvalid(test):
1465 if isvalid(test):
1457 shortest = test
1466 shortest = test
1458 if length == minlength or length > startlength:
1467 if length == minlength or length > startlength:
1459 return shortest
1468 return shortest
1460 length -= 1
1469 length -= 1
1461 else:
1470 else:
1462 length += 1
1471 length += 1
1463 if len(shortest) <= length:
1472 if len(shortest) <= length:
1464 return shortest
1473 return shortest
1465
1474
1466 def cmp(self, node, text):
1475 def cmp(self, node, text):
1467 """compare text with a given file revision
1476 """compare text with a given file revision
1468
1477
1469 returns True if text is different than what is stored.
1478 returns True if text is different than what is stored.
1470 """
1479 """
1471 p1, p2 = self.parents(node)
1480 p1, p2 = self.parents(node)
1472 return hash(text, p1, p2) != node
1481 return hash(text, p1, p2) != node
1473
1482
1474 def _cachesegment(self, offset, data):
1483 def _cachesegment(self, offset, data):
1475 """Add a segment to the revlog cache.
1484 """Add a segment to the revlog cache.
1476
1485
1477 Accepts an absolute offset and the data that is at that location.
1486 Accepts an absolute offset and the data that is at that location.
1478 """
1487 """
1479 o, d = self._chunkcache
1488 o, d = self._chunkcache
1480 # try to add to existing cache
1489 # try to add to existing cache
1481 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1490 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1482 self._chunkcache = o, d + data
1491 self._chunkcache = o, d + data
1483 else:
1492 else:
1484 self._chunkcache = offset, data
1493 self._chunkcache = offset, data
1485
1494
1486 def _readsegment(self, offset, length, df=None):
1495 def _readsegment(self, offset, length, df=None):
1487 """Load a segment of raw data from the revlog.
1496 """Load a segment of raw data from the revlog.
1488
1497
1489 Accepts an absolute offset, length to read, and an optional existing
1498 Accepts an absolute offset, length to read, and an optional existing
1490 file handle to read from.
1499 file handle to read from.
1491
1500
1492 If an existing file handle is passed, it will be seeked and the
1501 If an existing file handle is passed, it will be seeked and the
1493 original seek position will NOT be restored.
1502 original seek position will NOT be restored.
1494
1503
1495 Returns a str or buffer of raw byte data.
1504 Returns a str or buffer of raw byte data.
1496 """
1505 """
1497 if df is not None:
1506 if df is not None:
1498 closehandle = False
1507 closehandle = False
1499 else:
1508 else:
1500 if self._inline:
1509 if self._inline:
1501 df = self.opener(self.indexfile)
1510 df = self._indexfp()
1502 else:
1511 else:
1503 df = self._datafp()
1512 df = self._datafp()
1504 closehandle = True
1513 closehandle = True
1505
1514
1506 # Cache data both forward and backward around the requested
1515 # Cache data both forward and backward around the requested
1507 # data, in a fixed size window. This helps speed up operations
1516 # data, in a fixed size window. This helps speed up operations
1508 # involving reading the revlog backwards.
1517 # involving reading the revlog backwards.
1509 cachesize = self._chunkcachesize
1518 cachesize = self._chunkcachesize
1510 realoffset = offset & ~(cachesize - 1)
1519 realoffset = offset & ~(cachesize - 1)
1511 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1520 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1512 - realoffset)
1521 - realoffset)
1513 df.seek(realoffset)
1522 df.seek(realoffset)
1514 d = df.read(reallength)
1523 d = df.read(reallength)
1515 if closehandle:
1524 if closehandle:
1516 df.close()
1525 df.close()
1517 self._cachesegment(realoffset, d)
1526 self._cachesegment(realoffset, d)
1518 if offset != realoffset or reallength != length:
1527 if offset != realoffset or reallength != length:
1519 return util.buffer(d, offset - realoffset, length)
1528 return util.buffer(d, offset - realoffset, length)
1520 return d
1529 return d
1521
1530
1522 def _getsegment(self, offset, length, df=None):
1531 def _getsegment(self, offset, length, df=None):
1523 """Obtain a segment of raw data from the revlog.
1532 """Obtain a segment of raw data from the revlog.
1524
1533
1525 Accepts an absolute offset, length of bytes to obtain, and an
1534 Accepts an absolute offset, length of bytes to obtain, and an
1526 optional file handle to the already-opened revlog. If the file
1535 optional file handle to the already-opened revlog. If the file
1527 handle is used, it's original seek position will not be preserved.
1536 handle is used, it's original seek position will not be preserved.
1528
1537
1529 Requests for data may be returned from a cache.
1538 Requests for data may be returned from a cache.
1530
1539
1531 Returns a str or a buffer instance of raw byte data.
1540 Returns a str or a buffer instance of raw byte data.
1532 """
1541 """
1533 o, d = self._chunkcache
1542 o, d = self._chunkcache
1534 l = len(d)
1543 l = len(d)
1535
1544
1536 # is it in the cache?
1545 # is it in the cache?
1537 cachestart = offset - o
1546 cachestart = offset - o
1538 cacheend = cachestart + length
1547 cacheend = cachestart + length
1539 if cachestart >= 0 and cacheend <= l:
1548 if cachestart >= 0 and cacheend <= l:
1540 if cachestart == 0 and cacheend == l:
1549 if cachestart == 0 and cacheend == l:
1541 return d # avoid a copy
1550 return d # avoid a copy
1542 return util.buffer(d, cachestart, cacheend - cachestart)
1551 return util.buffer(d, cachestart, cacheend - cachestart)
1543
1552
1544 return self._readsegment(offset, length, df=df)
1553 return self._readsegment(offset, length, df=df)
1545
1554
1546 def _getsegmentforrevs(self, startrev, endrev, df=None):
1555 def _getsegmentforrevs(self, startrev, endrev, df=None):
1547 """Obtain a segment of raw data corresponding to a range of revisions.
1556 """Obtain a segment of raw data corresponding to a range of revisions.
1548
1557
1549 Accepts the start and end revisions and an optional already-open
1558 Accepts the start and end revisions and an optional already-open
1550 file handle to be used for reading. If the file handle is read, its
1559 file handle to be used for reading. If the file handle is read, its
1551 seek position will not be preserved.
1560 seek position will not be preserved.
1552
1561
1553 Requests for data may be satisfied by a cache.
1562 Requests for data may be satisfied by a cache.
1554
1563
1555 Returns a 2-tuple of (offset, data) for the requested range of
1564 Returns a 2-tuple of (offset, data) for the requested range of
1556 revisions. Offset is the integer offset from the beginning of the
1565 revisions. Offset is the integer offset from the beginning of the
1557 revlog and data is a str or buffer of the raw byte data.
1566 revlog and data is a str or buffer of the raw byte data.
1558
1567
1559 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1568 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1560 to determine where each revision's data begins and ends.
1569 to determine where each revision's data begins and ends.
1561 """
1570 """
1562 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1571 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1563 # (functions are expensive).
1572 # (functions are expensive).
1564 index = self.index
1573 index = self.index
1565 istart = index[startrev]
1574 istart = index[startrev]
1566 start = int(istart[0] >> 16)
1575 start = int(istart[0] >> 16)
1567 if startrev == endrev:
1576 if startrev == endrev:
1568 end = start + istart[1]
1577 end = start + istart[1]
1569 else:
1578 else:
1570 iend = index[endrev]
1579 iend = index[endrev]
1571 end = int(iend[0] >> 16) + iend[1]
1580 end = int(iend[0] >> 16) + iend[1]
1572
1581
1573 if self._inline:
1582 if self._inline:
1574 start += (startrev + 1) * self._io.size
1583 start += (startrev + 1) * self._io.size
1575 end += (endrev + 1) * self._io.size
1584 end += (endrev + 1) * self._io.size
1576 length = end - start
1585 length = end - start
1577
1586
1578 return start, self._getsegment(start, length, df=df)
1587 return start, self._getsegment(start, length, df=df)
1579
1588
1580 def _chunk(self, rev, df=None):
1589 def _chunk(self, rev, df=None):
1581 """Obtain a single decompressed chunk for a revision.
1590 """Obtain a single decompressed chunk for a revision.
1582
1591
1583 Accepts an integer revision and an optional already-open file handle
1592 Accepts an integer revision and an optional already-open file handle
1584 to be used for reading. If used, the seek position of the file will not
1593 to be used for reading. If used, the seek position of the file will not
1585 be preserved.
1594 be preserved.
1586
1595
1587 Returns a str holding uncompressed data for the requested revision.
1596 Returns a str holding uncompressed data for the requested revision.
1588 """
1597 """
1589 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1598 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1590
1599
1591 def _chunks(self, revs, df=None):
1600 def _chunks(self, revs, df=None):
1592 """Obtain decompressed chunks for the specified revisions.
1601 """Obtain decompressed chunks for the specified revisions.
1593
1602
1594 Accepts an iterable of numeric revisions that are assumed to be in
1603 Accepts an iterable of numeric revisions that are assumed to be in
1595 ascending order. Also accepts an optional already-open file handle
1604 ascending order. Also accepts an optional already-open file handle
1596 to be used for reading. If used, the seek position of the file will
1605 to be used for reading. If used, the seek position of the file will
1597 not be preserved.
1606 not be preserved.
1598
1607
1599 This function is similar to calling ``self._chunk()`` multiple times,
1608 This function is similar to calling ``self._chunk()`` multiple times,
1600 but is faster.
1609 but is faster.
1601
1610
1602 Returns a list with decompressed data for each requested revision.
1611 Returns a list with decompressed data for each requested revision.
1603 """
1612 """
1604 if not revs:
1613 if not revs:
1605 return []
1614 return []
1606 start = self.start
1615 start = self.start
1607 length = self.length
1616 length = self.length
1608 inline = self._inline
1617 inline = self._inline
1609 iosize = self._io.size
1618 iosize = self._io.size
1610 buffer = util.buffer
1619 buffer = util.buffer
1611
1620
1612 l = []
1621 l = []
1613 ladd = l.append
1622 ladd = l.append
1614
1623
1615 if not self._withsparseread:
1624 if not self._withsparseread:
1616 slicedchunks = (revs,)
1625 slicedchunks = (revs,)
1617 else:
1626 else:
1618 slicedchunks = _slicechunk(self, revs)
1627 slicedchunks = _slicechunk(self, revs)
1619
1628
1620 for revschunk in slicedchunks:
1629 for revschunk in slicedchunks:
1621 firstrev = revschunk[0]
1630 firstrev = revschunk[0]
1622 # Skip trailing revisions with empty diff
1631 # Skip trailing revisions with empty diff
1623 for lastrev in revschunk[::-1]:
1632 for lastrev in revschunk[::-1]:
1624 if length(lastrev) != 0:
1633 if length(lastrev) != 0:
1625 break
1634 break
1626
1635
1627 try:
1636 try:
1628 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1629 except OverflowError:
1638 except OverflowError:
1630 # issue4215 - we can't cache a run of chunks greater than
1639 # issue4215 - we can't cache a run of chunks greater than
1631 # 2G on Windows
1640 # 2G on Windows
1632 return [self._chunk(rev, df=df) for rev in revschunk]
1641 return [self._chunk(rev, df=df) for rev in revschunk]
1633
1642
1634 decomp = self.decompress
1643 decomp = self.decompress
1635 for rev in revschunk:
1644 for rev in revschunk:
1636 chunkstart = start(rev)
1645 chunkstart = start(rev)
1637 if inline:
1646 if inline:
1638 chunkstart += (rev + 1) * iosize
1647 chunkstart += (rev + 1) * iosize
1639 chunklength = length(rev)
1648 chunklength = length(rev)
1640 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1641
1650
1642 return l
1651 return l
1643
1652
1644 def _chunkclear(self):
1653 def _chunkclear(self):
1645 """Clear the raw chunk cache."""
1654 """Clear the raw chunk cache."""
1646 self._chunkcache = (0, '')
1655 self._chunkcache = (0, '')
1647
1656
1648 def deltaparent(self, rev):
1657 def deltaparent(self, rev):
1649 """return deltaparent of the given revision"""
1658 """return deltaparent of the given revision"""
1650 base = self.index[rev][3]
1659 base = self.index[rev][3]
1651 if base == rev:
1660 if base == rev:
1652 return nullrev
1661 return nullrev
1653 elif self._generaldelta:
1662 elif self._generaldelta:
1654 return base
1663 return base
1655 else:
1664 else:
1656 return rev - 1
1665 return rev - 1
1657
1666
1658 def revdiff(self, rev1, rev2):
1667 def revdiff(self, rev1, rev2):
1659 """return or calculate a delta between two revisions
1668 """return or calculate a delta between two revisions
1660
1669
1661 The delta calculated is in binary form and is intended to be written to
1670 The delta calculated is in binary form and is intended to be written to
1662 revlog data directly. So this function needs raw revision data.
1671 revlog data directly. So this function needs raw revision data.
1663 """
1672 """
1664 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1673 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1665 return bytes(self._chunk(rev2))
1674 return bytes(self._chunk(rev2))
1666
1675
1667 return mdiff.textdiff(self.revision(rev1, raw=True),
1676 return mdiff.textdiff(self.revision(rev1, raw=True),
1668 self.revision(rev2, raw=True))
1677 self.revision(rev2, raw=True))
1669
1678
1670 def revision(self, nodeorrev, _df=None, raw=False):
1679 def revision(self, nodeorrev, _df=None, raw=False):
1671 """return an uncompressed revision of a given node or revision
1680 """return an uncompressed revision of a given node or revision
1672 number.
1681 number.
1673
1682
1674 _df - an existing file handle to read from. (internal-only)
1683 _df - an existing file handle to read from. (internal-only)
1675 raw - an optional argument specifying if the revision data is to be
1684 raw - an optional argument specifying if the revision data is to be
1676 treated as raw data when applying flag transforms. 'raw' should be set
1685 treated as raw data when applying flag transforms. 'raw' should be set
1677 to True when generating changegroups or in debug commands.
1686 to True when generating changegroups or in debug commands.
1678 """
1687 """
1679 if isinstance(nodeorrev, int):
1688 if isinstance(nodeorrev, int):
1680 rev = nodeorrev
1689 rev = nodeorrev
1681 node = self.node(rev)
1690 node = self.node(rev)
1682 else:
1691 else:
1683 node = nodeorrev
1692 node = nodeorrev
1684 rev = None
1693 rev = None
1685
1694
1686 cachedrev = None
1695 cachedrev = None
1687 flags = None
1696 flags = None
1688 rawtext = None
1697 rawtext = None
1689 if node == nullid:
1698 if node == nullid:
1690 return ""
1699 return ""
1691 if self._cache:
1700 if self._cache:
1692 if self._cache[0] == node:
1701 if self._cache[0] == node:
1693 # _cache only stores rawtext
1702 # _cache only stores rawtext
1694 if raw:
1703 if raw:
1695 return self._cache[2]
1704 return self._cache[2]
1696 # duplicated, but good for perf
1705 # duplicated, but good for perf
1697 if rev is None:
1706 if rev is None:
1698 rev = self.rev(node)
1707 rev = self.rev(node)
1699 if flags is None:
1708 if flags is None:
1700 flags = self.flags(rev)
1709 flags = self.flags(rev)
1701 # no extra flags set, no flag processor runs, text = rawtext
1710 # no extra flags set, no flag processor runs, text = rawtext
1702 if flags == REVIDX_DEFAULT_FLAGS:
1711 if flags == REVIDX_DEFAULT_FLAGS:
1703 return self._cache[2]
1712 return self._cache[2]
1704 # rawtext is reusable. need to run flag processor
1713 # rawtext is reusable. need to run flag processor
1705 rawtext = self._cache[2]
1714 rawtext = self._cache[2]
1706
1715
1707 cachedrev = self._cache[1]
1716 cachedrev = self._cache[1]
1708
1717
1709 # look up what we need to read
1718 # look up what we need to read
1710 if rawtext is None:
1719 if rawtext is None:
1711 if rev is None:
1720 if rev is None:
1712 rev = self.rev(node)
1721 rev = self.rev(node)
1713
1722
1714 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1723 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1715 if stopped:
1724 if stopped:
1716 rawtext = self._cache[2]
1725 rawtext = self._cache[2]
1717
1726
1718 # drop cache to save memory
1727 # drop cache to save memory
1719 self._cache = None
1728 self._cache = None
1720
1729
1721 bins = self._chunks(chain, df=_df)
1730 bins = self._chunks(chain, df=_df)
1722 if rawtext is None:
1731 if rawtext is None:
1723 rawtext = bytes(bins[0])
1732 rawtext = bytes(bins[0])
1724 bins = bins[1:]
1733 bins = bins[1:]
1725
1734
1726 rawtext = mdiff.patches(rawtext, bins)
1735 rawtext = mdiff.patches(rawtext, bins)
1727 self._cache = (node, rev, rawtext)
1736 self._cache = (node, rev, rawtext)
1728
1737
1729 if flags is None:
1738 if flags is None:
1730 if rev is None:
1739 if rev is None:
1731 rev = self.rev(node)
1740 rev = self.rev(node)
1732 flags = self.flags(rev)
1741 flags = self.flags(rev)
1733
1742
1734 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1743 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1735 if validatehash:
1744 if validatehash:
1736 self.checkhash(text, node, rev=rev)
1745 self.checkhash(text, node, rev=rev)
1737
1746
1738 return text
1747 return text
1739
1748
1740 def hash(self, text, p1, p2):
1749 def hash(self, text, p1, p2):
1741 """Compute a node hash.
1750 """Compute a node hash.
1742
1751
1743 Available as a function so that subclasses can replace the hash
1752 Available as a function so that subclasses can replace the hash
1744 as needed.
1753 as needed.
1745 """
1754 """
1746 return hash(text, p1, p2)
1755 return hash(text, p1, p2)
1747
1756
1748 def _processflags(self, text, flags, operation, raw=False):
1757 def _processflags(self, text, flags, operation, raw=False):
1749 """Inspect revision data flags and applies transforms defined by
1758 """Inspect revision data flags and applies transforms defined by
1750 registered flag processors.
1759 registered flag processors.
1751
1760
1752 ``text`` - the revision data to process
1761 ``text`` - the revision data to process
1753 ``flags`` - the revision flags
1762 ``flags`` - the revision flags
1754 ``operation`` - the operation being performed (read or write)
1763 ``operation`` - the operation being performed (read or write)
1755 ``raw`` - an optional argument describing if the raw transform should be
1764 ``raw`` - an optional argument describing if the raw transform should be
1756 applied.
1765 applied.
1757
1766
1758 This method processes the flags in the order (or reverse order if
1767 This method processes the flags in the order (or reverse order if
1759 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1768 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1760 flag processors registered for present flags. The order of flags defined
1769 flag processors registered for present flags. The order of flags defined
1761 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1770 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1762
1771
1763 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1772 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1764 processed text and ``validatehash`` is a bool indicating whether the
1773 processed text and ``validatehash`` is a bool indicating whether the
1765 returned text should be checked for hash integrity.
1774 returned text should be checked for hash integrity.
1766
1775
1767 Note: If the ``raw`` argument is set, it has precedence over the
1776 Note: If the ``raw`` argument is set, it has precedence over the
1768 operation and will only update the value of ``validatehash``.
1777 operation and will only update the value of ``validatehash``.
1769 """
1778 """
1770 # fast path: no flag processors will run
1779 # fast path: no flag processors will run
1771 if flags == 0:
1780 if flags == 0:
1772 return text, True
1781 return text, True
1773 if not operation in ('read', 'write'):
1782 if not operation in ('read', 'write'):
1774 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1783 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1775 # Check all flags are known.
1784 # Check all flags are known.
1776 if flags & ~REVIDX_KNOWN_FLAGS:
1785 if flags & ~REVIDX_KNOWN_FLAGS:
1777 raise RevlogError(_("incompatible revision flag '%#x'") %
1786 raise RevlogError(_("incompatible revision flag '%#x'") %
1778 (flags & ~REVIDX_KNOWN_FLAGS))
1787 (flags & ~REVIDX_KNOWN_FLAGS))
1779 validatehash = True
1788 validatehash = True
1780 # Depending on the operation (read or write), the order might be
1789 # Depending on the operation (read or write), the order might be
1781 # reversed due to non-commutative transforms.
1790 # reversed due to non-commutative transforms.
1782 orderedflags = REVIDX_FLAGS_ORDER
1791 orderedflags = REVIDX_FLAGS_ORDER
1783 if operation == 'write':
1792 if operation == 'write':
1784 orderedflags = reversed(orderedflags)
1793 orderedflags = reversed(orderedflags)
1785
1794
1786 for flag in orderedflags:
1795 for flag in orderedflags:
1787 # If a flagprocessor has been registered for a known flag, apply the
1796 # If a flagprocessor has been registered for a known flag, apply the
1788 # related operation transform and update result tuple.
1797 # related operation transform and update result tuple.
1789 if flag & flags:
1798 if flag & flags:
1790 vhash = True
1799 vhash = True
1791
1800
1792 if flag not in _flagprocessors:
1801 if flag not in _flagprocessors:
1793 message = _("missing processor for flag '%#x'") % (flag)
1802 message = _("missing processor for flag '%#x'") % (flag)
1794 raise RevlogError(message)
1803 raise RevlogError(message)
1795
1804
1796 processor = _flagprocessors[flag]
1805 processor = _flagprocessors[flag]
1797 if processor is not None:
1806 if processor is not None:
1798 readtransform, writetransform, rawtransform = processor
1807 readtransform, writetransform, rawtransform = processor
1799
1808
1800 if raw:
1809 if raw:
1801 vhash = rawtransform(self, text)
1810 vhash = rawtransform(self, text)
1802 elif operation == 'read':
1811 elif operation == 'read':
1803 text, vhash = readtransform(self, text)
1812 text, vhash = readtransform(self, text)
1804 else: # write operation
1813 else: # write operation
1805 text, vhash = writetransform(self, text)
1814 text, vhash = writetransform(self, text)
1806 validatehash = validatehash and vhash
1815 validatehash = validatehash and vhash
1807
1816
1808 return text, validatehash
1817 return text, validatehash
1809
1818
1810 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1819 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1811 """Check node hash integrity.
1820 """Check node hash integrity.
1812
1821
1813 Available as a function so that subclasses can extend hash mismatch
1822 Available as a function so that subclasses can extend hash mismatch
1814 behaviors as needed.
1823 behaviors as needed.
1815 """
1824 """
1816 if p1 is None and p2 is None:
1825 if p1 is None and p2 is None:
1817 p1, p2 = self.parents(node)
1826 p1, p2 = self.parents(node)
1818 if node != self.hash(text, p1, p2):
1827 if node != self.hash(text, p1, p2):
1819 revornode = rev
1828 revornode = rev
1820 if revornode is None:
1829 if revornode is None:
1821 revornode = templatefilters.short(hex(node))
1830 revornode = templatefilters.short(hex(node))
1822 raise RevlogError(_("integrity check failed on %s:%s")
1831 raise RevlogError(_("integrity check failed on %s:%s")
1823 % (self.indexfile, pycompat.bytestr(revornode)))
1832 % (self.indexfile, pycompat.bytestr(revornode)))
1824
1833
1825 def checkinlinesize(self, tr, fp=None):
1834 def checkinlinesize(self, tr, fp=None):
1826 """Check if the revlog is too big for inline and convert if so.
1835 """Check if the revlog is too big for inline and convert if so.
1827
1836
1828 This should be called after revisions are added to the revlog. If the
1837 This should be called after revisions are added to the revlog. If the
1829 revlog has grown too large to be an inline revlog, it will convert it
1838 revlog has grown too large to be an inline revlog, it will convert it
1830 to use multiple index and data files.
1839 to use multiple index and data files.
1831 """
1840 """
1832 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1841 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1833 return
1842 return
1834
1843
1835 trinfo = tr.find(self.indexfile)
1844 trinfo = tr.find(self.indexfile)
1836 if trinfo is None:
1845 if trinfo is None:
1837 raise RevlogError(_("%s not found in the transaction")
1846 raise RevlogError(_("%s not found in the transaction")
1838 % self.indexfile)
1847 % self.indexfile)
1839
1848
1840 trindex = trinfo[2]
1849 trindex = trinfo[2]
1841 if trindex is not None:
1850 if trindex is not None:
1842 dataoff = self.start(trindex)
1851 dataoff = self.start(trindex)
1843 else:
1852 else:
1844 # revlog was stripped at start of transaction, use all leftover data
1853 # revlog was stripped at start of transaction, use all leftover data
1845 trindex = len(self) - 1
1854 trindex = len(self) - 1
1846 dataoff = self.end(-2)
1855 dataoff = self.end(-2)
1847
1856
1848 tr.add(self.datafile, dataoff)
1857 tr.add(self.datafile, dataoff)
1849
1858
1850 if fp:
1859 if fp:
1851 fp.flush()
1860 fp.flush()
1852 fp.close()
1861 fp.close()
1853
1862
1854 df = self._datafp('w')
1863 df = self._datafp('w')
1855 try:
1864 try:
1856 for r in self:
1865 for r in self:
1857 df.write(self._getsegmentforrevs(r, r)[1])
1866 df.write(self._getsegmentforrevs(r, r)[1])
1858 finally:
1867 finally:
1859 df.close()
1868 df.close()
1860
1869
1861 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1870 fp = self._indexfp('w')
1862 checkambig=self._checkambig)
1863 self.version &= ~FLAG_INLINE_DATA
1871 self.version &= ~FLAG_INLINE_DATA
1864 self._inline = False
1872 self._inline = False
1865 for i in self:
1873 for i in self:
1866 e = self._io.packentry(self.index[i], self.node, self.version, i)
1874 e = self._io.packentry(self.index[i], self.node, self.version, i)
1867 fp.write(e)
1875 fp.write(e)
1868
1876
1869 # if we don't call close, the temp file will never replace the
1877 # if we don't call close, the temp file will never replace the
1870 # real index
1878 # real index
1871 fp.close()
1879 fp.close()
1872
1880
1873 tr.replace(self.indexfile, trindex * self._io.size)
1881 tr.replace(self.indexfile, trindex * self._io.size)
1874 self._chunkclear()
1882 self._chunkclear()
1875
1883
1876 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1877 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1878 """add a revision to the log
1886 """add a revision to the log
1879
1887
1880 text - the revision data to add
1888 text - the revision data to add
1881 transaction - the transaction object used for rollback
1889 transaction - the transaction object used for rollback
1882 link - the linkrev data to add
1890 link - the linkrev data to add
1883 p1, p2 - the parent nodeids of the revision
1891 p1, p2 - the parent nodeids of the revision
1884 cachedelta - an optional precomputed delta
1892 cachedelta - an optional precomputed delta
1885 node - nodeid of revision; typically node is not specified, and it is
1893 node - nodeid of revision; typically node is not specified, and it is
1886 computed by default as hash(text, p1, p2), however subclasses might
1894 computed by default as hash(text, p1, p2), however subclasses might
1887 use different hashing method (and override checkhash() in such case)
1895 use different hashing method (and override checkhash() in such case)
1888 flags - the known flags to set on the revision
1896 flags - the known flags to set on the revision
1889 deltacomputer - an optional _deltacomputer instance shared between
1897 deltacomputer - an optional _deltacomputer instance shared between
1890 multiple calls
1898 multiple calls
1891 """
1899 """
1892 if link == nullrev:
1900 if link == nullrev:
1893 raise RevlogError(_("attempted to add linkrev -1 to %s")
1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1894 % self.indexfile)
1902 % self.indexfile)
1895
1903
1896 if flags:
1904 if flags:
1897 node = node or self.hash(text, p1, p2)
1905 node = node or self.hash(text, p1, p2)
1898
1906
1899 rawtext, validatehash = self._processflags(text, flags, 'write')
1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1900
1908
1901 # If the flag processor modifies the revision data, ignore any provided
1909 # If the flag processor modifies the revision data, ignore any provided
1902 # cachedelta.
1910 # cachedelta.
1903 if rawtext != text:
1911 if rawtext != text:
1904 cachedelta = None
1912 cachedelta = None
1905
1913
1906 if len(rawtext) > _maxentrysize:
1914 if len(rawtext) > _maxentrysize:
1907 raise RevlogError(
1915 raise RevlogError(
1908 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1909 % (self.indexfile, len(rawtext)))
1917 % (self.indexfile, len(rawtext)))
1910
1918
1911 node = node or self.hash(rawtext, p1, p2)
1919 node = node or self.hash(rawtext, p1, p2)
1912 if node in self.nodemap:
1920 if node in self.nodemap:
1913 return node
1921 return node
1914
1922
1915 if validatehash:
1923 if validatehash:
1916 self.checkhash(rawtext, node, p1=p1, p2=p2)
1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1917
1925
1918 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1919 flags, cachedelta=cachedelta,
1927 flags, cachedelta=cachedelta,
1920 deltacomputer=deltacomputer)
1928 deltacomputer=deltacomputer)
1921
1929
1922 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1923 cachedelta=None, deltacomputer=None):
1931 cachedelta=None, deltacomputer=None):
1924 """add a raw revision with known flags, node and parents
1932 """add a raw revision with known flags, node and parents
1925 useful when reusing a revision not stored in this revlog (ex: received
1933 useful when reusing a revision not stored in this revlog (ex: received
1926 over wire, or read from an external bundle).
1934 over wire, or read from an external bundle).
1927 """
1935 """
1928 dfh = None
1936 dfh = None
1929 if not self._inline:
1937 if not self._inline:
1930 dfh = self._datafp("a+")
1938 dfh = self._datafp("a+")
1931 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1939 ifh = self._indexfp("a+")
1932 try:
1940 try:
1933 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1934 flags, cachedelta, ifh, dfh,
1942 flags, cachedelta, ifh, dfh,
1935 deltacomputer=deltacomputer)
1943 deltacomputer=deltacomputer)
1936 finally:
1944 finally:
1937 if dfh:
1945 if dfh:
1938 dfh.close()
1946 dfh.close()
1939 ifh.close()
1947 ifh.close()
1940
1948
1941 def compress(self, data):
1949 def compress(self, data):
1942 """Generate a possibly-compressed representation of data."""
1950 """Generate a possibly-compressed representation of data."""
1943 if not data:
1951 if not data:
1944 return '', data
1952 return '', data
1945
1953
1946 compressed = self._compressor.compress(data)
1954 compressed = self._compressor.compress(data)
1947
1955
1948 if compressed:
1956 if compressed:
1949 # The revlog compressor added the header in the returned data.
1957 # The revlog compressor added the header in the returned data.
1950 return '', compressed
1958 return '', compressed
1951
1959
1952 if data[0:1] == '\0':
1960 if data[0:1] == '\0':
1953 return '', data
1961 return '', data
1954 return 'u', data
1962 return 'u', data
1955
1963
1956 def decompress(self, data):
1964 def decompress(self, data):
1957 """Decompress a revlog chunk.
1965 """Decompress a revlog chunk.
1958
1966
1959 The chunk is expected to begin with a header identifying the
1967 The chunk is expected to begin with a header identifying the
1960 format type so it can be routed to an appropriate decompressor.
1968 format type so it can be routed to an appropriate decompressor.
1961 """
1969 """
1962 if not data:
1970 if not data:
1963 return data
1971 return data
1964
1972
1965 # Revlogs are read much more frequently than they are written and many
1973 # Revlogs are read much more frequently than they are written and many
1966 # chunks only take microseconds to decompress, so performance is
1974 # chunks only take microseconds to decompress, so performance is
1967 # important here.
1975 # important here.
1968 #
1976 #
1969 # We can make a few assumptions about revlogs:
1977 # We can make a few assumptions about revlogs:
1970 #
1978 #
1971 # 1) the majority of chunks will be compressed (as opposed to inline
1979 # 1) the majority of chunks will be compressed (as opposed to inline
1972 # raw data).
1980 # raw data).
1973 # 2) decompressing *any* data will likely by at least 10x slower than
1981 # 2) decompressing *any* data will likely by at least 10x slower than
1974 # returning raw inline data.
1982 # returning raw inline data.
1975 # 3) we want to prioritize common and officially supported compression
1983 # 3) we want to prioritize common and officially supported compression
1976 # engines
1984 # engines
1977 #
1985 #
1978 # It follows that we want to optimize for "decompress compressed data
1986 # It follows that we want to optimize for "decompress compressed data
1979 # when encoded with common and officially supported compression engines"
1987 # when encoded with common and officially supported compression engines"
1980 # case over "raw data" and "data encoded by less common or non-official
1988 # case over "raw data" and "data encoded by less common or non-official
1981 # compression engines." That is why we have the inline lookup first
1989 # compression engines." That is why we have the inline lookup first
1982 # followed by the compengines lookup.
1990 # followed by the compengines lookup.
1983 #
1991 #
1984 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1985 # compressed chunks. And this matters for changelog and manifest reads.
1993 # compressed chunks. And this matters for changelog and manifest reads.
1986 t = data[0:1]
1994 t = data[0:1]
1987
1995
1988 if t == 'x':
1996 if t == 'x':
1989 try:
1997 try:
1990 return _zlibdecompress(data)
1998 return _zlibdecompress(data)
1991 except zlib.error as e:
1999 except zlib.error as e:
1992 raise RevlogError(_('revlog decompress error: %s') % str(e))
2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
1993 # '\0' is more common than 'u' so it goes first.
2001 # '\0' is more common than 'u' so it goes first.
1994 elif t == '\0':
2002 elif t == '\0':
1995 return data
2003 return data
1996 elif t == 'u':
2004 elif t == 'u':
1997 return util.buffer(data, 1)
2005 return util.buffer(data, 1)
1998
2006
1999 try:
2007 try:
2000 compressor = self._decompressors[t]
2008 compressor = self._decompressors[t]
2001 except KeyError:
2009 except KeyError:
2002 try:
2010 try:
2003 engine = util.compengines.forrevlogheader(t)
2011 engine = util.compengines.forrevlogheader(t)
2004 compressor = engine.revlogcompressor()
2012 compressor = engine.revlogcompressor()
2005 self._decompressors[t] = compressor
2013 self._decompressors[t] = compressor
2006 except KeyError:
2014 except KeyError:
2007 raise RevlogError(_('unknown compression type %r') % t)
2015 raise RevlogError(_('unknown compression type %r') % t)
2008
2016
2009 return compressor.decompress(data)
2017 return compressor.decompress(data)
2010
2018
2011 def _isgooddeltainfo(self, d, textlen):
2019 def _isgooddeltainfo(self, d, textlen):
2012 """Returns True if the given delta is good. Good means that it is within
2020 """Returns True if the given delta is good. Good means that it is within
2013 the disk span, disk size, and chain length bounds that we know to be
2021 the disk span, disk size, and chain length bounds that we know to be
2014 performant."""
2022 performant."""
2015 if d is None:
2023 if d is None:
2016 return False
2024 return False
2017
2025
2018 # - 'd.distance' is the distance from the base revision -- bounding it
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2019 # limits the amount of I/O we need to do.
2027 # limits the amount of I/O we need to do.
2020 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2021 # need to apply -- bounding it limits the amount of CPU we consume.
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2022
2030
2023 defaultmax = textlen * 4
2031 defaultmax = textlen * 4
2024 maxdist = self._maxdeltachainspan
2032 maxdist = self._maxdeltachainspan
2025 if not maxdist:
2033 if not maxdist:
2026 maxdist = d.distance # ensure the conditional pass
2034 maxdist = d.distance # ensure the conditional pass
2027 maxdist = max(maxdist, defaultmax)
2035 maxdist = max(maxdist, defaultmax)
2028 if (d.distance > maxdist or d.deltalen > textlen or
2036 if (d.distance > maxdist or d.deltalen > textlen or
2029 d.compresseddeltalen > textlen * 2 or
2037 d.compresseddeltalen > textlen * 2 or
2030 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2031 return False
2039 return False
2032
2040
2033 return True
2041 return True
2034
2042
2035 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2036 cachedelta, ifh, dfh, alwayscache=False,
2044 cachedelta, ifh, dfh, alwayscache=False,
2037 deltacomputer=None):
2045 deltacomputer=None):
2038 """internal function to add revisions to the log
2046 """internal function to add revisions to the log
2039
2047
2040 see addrevision for argument descriptions.
2048 see addrevision for argument descriptions.
2041
2049
2042 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2043
2051
2044 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2045 be used.
2053 be used.
2046
2054
2047 invariants:
2055 invariants:
2048 - rawtext is optional (can be None); if not set, cachedelta must be set.
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2049 if both are set, they must correspond to each other.
2057 if both are set, they must correspond to each other.
2050 """
2058 """
2051 if node == nullid:
2059 if node == nullid:
2052 raise RevlogError(_("%s: attempt to add null revision") %
2060 raise RevlogError(_("%s: attempt to add null revision") %
2053 (self.indexfile))
2061 (self.indexfile))
2054 if node == wdirid:
2062 if node == wdirid:
2055 raise RevlogError(_("%s: attempt to add wdir revision") %
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2056 (self.indexfile))
2064 (self.indexfile))
2057
2065
2058 if self._inline:
2066 if self._inline:
2059 fh = ifh
2067 fh = ifh
2060 else:
2068 else:
2061 fh = dfh
2069 fh = dfh
2062
2070
2063 btext = [rawtext]
2071 btext = [rawtext]
2064
2072
2065 curr = len(self)
2073 curr = len(self)
2066 prev = curr - 1
2074 prev = curr - 1
2067 offset = self.end(prev)
2075 offset = self.end(prev)
2068 p1r, p2r = self.rev(p1), self.rev(p2)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2069
2077
2070 # full versions are inserted when the needed deltas
2078 # full versions are inserted when the needed deltas
2071 # become comparable to the uncompressed text
2079 # become comparable to the uncompressed text
2072 if rawtext is None:
2080 if rawtext is None:
2073 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2074 cachedelta[1])
2082 cachedelta[1])
2075 else:
2083 else:
2076 textlen = len(rawtext)
2084 textlen = len(rawtext)
2077
2085
2078 if deltacomputer is None:
2086 if deltacomputer is None:
2079 deltacomputer = _deltacomputer(self)
2087 deltacomputer = _deltacomputer(self)
2080
2088
2081 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2082 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2083
2091
2084 if deltainfo is not None:
2092 if deltainfo is not None:
2085 base = deltainfo.base
2093 base = deltainfo.base
2086 chainbase = deltainfo.chainbase
2094 chainbase = deltainfo.chainbase
2087 data = deltainfo.data
2095 data = deltainfo.data
2088 l = deltainfo.deltalen
2096 l = deltainfo.deltalen
2089 else:
2097 else:
2090 rawtext = deltacomputer.buildtext(revinfo, fh)
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2091 data = self.compress(rawtext)
2099 data = self.compress(rawtext)
2092 l = len(data[1]) + len(data[0])
2100 l = len(data[1]) + len(data[0])
2093 base = chainbase = curr
2101 base = chainbase = curr
2094
2102
2095 e = (offset_type(offset, flags), l, textlen,
2103 e = (offset_type(offset, flags), l, textlen,
2096 base, link, p1r, p2r, node)
2104 base, link, p1r, p2r, node)
2097 self.index.insert(-1, e)
2105 self.index.insert(-1, e)
2098 self.nodemap[node] = curr
2106 self.nodemap[node] = curr
2099
2107
2100 entry = self._io.packentry(e, self.node, self.version, curr)
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2101 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2102
2110
2103 if alwayscache and rawtext is None:
2111 if alwayscache and rawtext is None:
2104 rawtext = deltacomputer._buildtext(revinfo, fh)
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2105
2113
2106 if type(rawtext) == bytes: # only accept immutable objects
2114 if type(rawtext) == bytes: # only accept immutable objects
2107 self._cache = (node, curr, rawtext)
2115 self._cache = (node, curr, rawtext)
2108 self._chainbasecache[curr] = chainbase
2116 self._chainbasecache[curr] = chainbase
2109 return node
2117 return node
2110
2118
2111 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2112 # Files opened in a+ mode have inconsistent behavior on various
2120 # Files opened in a+ mode have inconsistent behavior on various
2113 # platforms. Windows requires that a file positioning call be made
2121 # platforms. Windows requires that a file positioning call be made
2114 # when the file handle transitions between reads and writes. See
2122 # when the file handle transitions between reads and writes. See
2115 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2116 # platforms, Python or the platform itself can be buggy. Some versions
2124 # platforms, Python or the platform itself can be buggy. Some versions
2117 # of Solaris have been observed to not append at the end of the file
2125 # of Solaris have been observed to not append at the end of the file
2118 # if the file was seeked to before the end. See issue4943 for more.
2126 # if the file was seeked to before the end. See issue4943 for more.
2119 #
2127 #
2120 # We work around this issue by inserting a seek() before writing.
2128 # We work around this issue by inserting a seek() before writing.
2121 # Note: This is likely not necessary on Python 3.
2129 # Note: This is likely not necessary on Python 3.
2122 ifh.seek(0, os.SEEK_END)
2130 ifh.seek(0, os.SEEK_END)
2123 if dfh:
2131 if dfh:
2124 dfh.seek(0, os.SEEK_END)
2132 dfh.seek(0, os.SEEK_END)
2125
2133
2126 curr = len(self) - 1
2134 curr = len(self) - 1
2127 if not self._inline:
2135 if not self._inline:
2128 transaction.add(self.datafile, offset)
2136 transaction.add(self.datafile, offset)
2129 transaction.add(self.indexfile, curr * len(entry))
2137 transaction.add(self.indexfile, curr * len(entry))
2130 if data[0]:
2138 if data[0]:
2131 dfh.write(data[0])
2139 dfh.write(data[0])
2132 dfh.write(data[1])
2140 dfh.write(data[1])
2133 ifh.write(entry)
2141 ifh.write(entry)
2134 else:
2142 else:
2135 offset += curr * self._io.size
2143 offset += curr * self._io.size
2136 transaction.add(self.indexfile, offset, curr)
2144 transaction.add(self.indexfile, offset, curr)
2137 ifh.write(entry)
2145 ifh.write(entry)
2138 ifh.write(data[0])
2146 ifh.write(data[0])
2139 ifh.write(data[1])
2147 ifh.write(data[1])
2140 self.checkinlinesize(transaction, ifh)
2148 self.checkinlinesize(transaction, ifh)
2141
2149
2142 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2143 """
2151 """
2144 add a delta group
2152 add a delta group
2145
2153
2146 given a set of deltas, add them to the revision log. the
2154 given a set of deltas, add them to the revision log. the
2147 first delta is against its parent, which should be in our
2155 first delta is against its parent, which should be in our
2148 log, the rest are against the previous delta.
2156 log, the rest are against the previous delta.
2149
2157
2150 If ``addrevisioncb`` is defined, it will be called with arguments of
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2151 this revlog and the node that was added.
2159 this revlog and the node that was added.
2152 """
2160 """
2153
2161
2154 nodes = []
2162 nodes = []
2155
2163
2156 r = len(self)
2164 r = len(self)
2157 end = 0
2165 end = 0
2158 if r:
2166 if r:
2159 end = self.end(r - 1)
2167 end = self.end(r - 1)
2160 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2168 ifh = self._indexfp("a+")
2161 isize = r * self._io.size
2169 isize = r * self._io.size
2162 if self._inline:
2170 if self._inline:
2163 transaction.add(self.indexfile, end + isize, r)
2171 transaction.add(self.indexfile, end + isize, r)
2164 dfh = None
2172 dfh = None
2165 else:
2173 else:
2166 transaction.add(self.indexfile, isize, r)
2174 transaction.add(self.indexfile, isize, r)
2167 transaction.add(self.datafile, end)
2175 transaction.add(self.datafile, end)
2168 dfh = self._datafp("a+")
2176 dfh = self._datafp("a+")
2169 def flush():
2177 def flush():
2170 if dfh:
2178 if dfh:
2171 dfh.flush()
2179 dfh.flush()
2172 ifh.flush()
2180 ifh.flush()
2173 try:
2181 try:
2174 deltacomputer = _deltacomputer(self)
2182 deltacomputer = _deltacomputer(self)
2175 # loop through our set of deltas
2183 # loop through our set of deltas
2176 for data in deltas:
2184 for data in deltas:
2177 node, p1, p2, linknode, deltabase, delta, flags = data
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2178 link = linkmapper(linknode)
2186 link = linkmapper(linknode)
2179 flags = flags or REVIDX_DEFAULT_FLAGS
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2180
2188
2181 nodes.append(node)
2189 nodes.append(node)
2182
2190
2183 if node in self.nodemap:
2191 if node in self.nodemap:
2184 # this can happen if two branches make the same change
2192 # this can happen if two branches make the same change
2185 continue
2193 continue
2186
2194
2187 for p in (p1, p2):
2195 for p in (p1, p2):
2188 if p not in self.nodemap:
2196 if p not in self.nodemap:
2189 raise LookupError(p, self.indexfile,
2197 raise LookupError(p, self.indexfile,
2190 _('unknown parent'))
2198 _('unknown parent'))
2191
2199
2192 if deltabase not in self.nodemap:
2200 if deltabase not in self.nodemap:
2193 raise LookupError(deltabase, self.indexfile,
2201 raise LookupError(deltabase, self.indexfile,
2194 _('unknown delta base'))
2202 _('unknown delta base'))
2195
2203
2196 baserev = self.rev(deltabase)
2204 baserev = self.rev(deltabase)
2197
2205
2198 if baserev != nullrev and self.iscensored(baserev):
2206 if baserev != nullrev and self.iscensored(baserev):
2199 # if base is censored, delta must be full replacement in a
2207 # if base is censored, delta must be full replacement in a
2200 # single patch operation
2208 # single patch operation
2201 hlen = struct.calcsize(">lll")
2209 hlen = struct.calcsize(">lll")
2202 oldlen = self.rawsize(baserev)
2210 oldlen = self.rawsize(baserev)
2203 newlen = len(delta) - hlen
2211 newlen = len(delta) - hlen
2204 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2205 raise error.CensoredBaseError(self.indexfile,
2213 raise error.CensoredBaseError(self.indexfile,
2206 self.node(baserev))
2214 self.node(baserev))
2207
2215
2208 if not flags and self._peek_iscensored(baserev, delta, flush):
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2209 flags |= REVIDX_ISCENSORED
2217 flags |= REVIDX_ISCENSORED
2210
2218
2211 # We assume consumers of addrevisioncb will want to retrieve
2219 # We assume consumers of addrevisioncb will want to retrieve
2212 # the added revision, which will require a call to
2220 # the added revision, which will require a call to
2213 # revision(). revision() will fast path if there is a cache
2221 # revision(). revision() will fast path if there is a cache
2214 # hit. So, we tell _addrevision() to always cache in this case.
2222 # hit. So, we tell _addrevision() to always cache in this case.
2215 # We're only using addgroup() in the context of changegroup
2223 # We're only using addgroup() in the context of changegroup
2216 # generation so the revision data can always be handled as raw
2224 # generation so the revision data can always be handled as raw
2217 # by the flagprocessor.
2225 # by the flagprocessor.
2218 self._addrevision(node, None, transaction, link,
2226 self._addrevision(node, None, transaction, link,
2219 p1, p2, flags, (baserev, delta),
2227 p1, p2, flags, (baserev, delta),
2220 ifh, dfh,
2228 ifh, dfh,
2221 alwayscache=bool(addrevisioncb),
2229 alwayscache=bool(addrevisioncb),
2222 deltacomputer=deltacomputer)
2230 deltacomputer=deltacomputer)
2223
2231
2224 if addrevisioncb:
2232 if addrevisioncb:
2225 addrevisioncb(self, node)
2233 addrevisioncb(self, node)
2226
2234
2227 if not dfh and not self._inline:
2235 if not dfh and not self._inline:
2228 # addrevision switched from inline to conventional
2236 # addrevision switched from inline to conventional
2229 # reopen the index
2237 # reopen the index
2230 ifh.close()
2238 ifh.close()
2231 dfh = self._datafp("a+")
2239 dfh = self._datafp("a+")
2232 ifh = self.opener(self.indexfile, "a+",
2240 ifh = self._indexfp("a+")
2233 checkambig=self._checkambig)
2234 finally:
2241 finally:
2235 if dfh:
2242 if dfh:
2236 dfh.close()
2243 dfh.close()
2237 ifh.close()
2244 ifh.close()
2238
2245
2239 return nodes
2246 return nodes
2240
2247
2241 def iscensored(self, rev):
2248 def iscensored(self, rev):
2242 """Check if a file revision is censored."""
2249 """Check if a file revision is censored."""
2243 return False
2250 return False
2244
2251
2245 def _peek_iscensored(self, baserev, delta, flush):
2252 def _peek_iscensored(self, baserev, delta, flush):
2246 """Quickly check if a delta produces a censored revision."""
2253 """Quickly check if a delta produces a censored revision."""
2247 return False
2254 return False
2248
2255
2249 def getstrippoint(self, minlink):
2256 def getstrippoint(self, minlink):
2250 """find the minimum rev that must be stripped to strip the linkrev
2257 """find the minimum rev that must be stripped to strip the linkrev
2251
2258
2252 Returns a tuple containing the minimum rev and a set of all revs that
2259 Returns a tuple containing the minimum rev and a set of all revs that
2253 have linkrevs that will be broken by this strip.
2260 have linkrevs that will be broken by this strip.
2254 """
2261 """
2255 brokenrevs = set()
2262 brokenrevs = set()
2256 strippoint = len(self)
2263 strippoint = len(self)
2257
2264
2258 heads = {}
2265 heads = {}
2259 futurelargelinkrevs = set()
2266 futurelargelinkrevs = set()
2260 for head in self.headrevs():
2267 for head in self.headrevs():
2261 headlinkrev = self.linkrev(head)
2268 headlinkrev = self.linkrev(head)
2262 heads[head] = headlinkrev
2269 heads[head] = headlinkrev
2263 if headlinkrev >= minlink:
2270 if headlinkrev >= minlink:
2264 futurelargelinkrevs.add(headlinkrev)
2271 futurelargelinkrevs.add(headlinkrev)
2265
2272
2266 # This algorithm involves walking down the rev graph, starting at the
2273 # This algorithm involves walking down the rev graph, starting at the
2267 # heads. Since the revs are topologically sorted according to linkrev,
2274 # heads. Since the revs are topologically sorted according to linkrev,
2268 # once all head linkrevs are below the minlink, we know there are
2275 # once all head linkrevs are below the minlink, we know there are
2269 # no more revs that could have a linkrev greater than minlink.
2276 # no more revs that could have a linkrev greater than minlink.
2270 # So we can stop walking.
2277 # So we can stop walking.
2271 while futurelargelinkrevs:
2278 while futurelargelinkrevs:
2272 strippoint -= 1
2279 strippoint -= 1
2273 linkrev = heads.pop(strippoint)
2280 linkrev = heads.pop(strippoint)
2274
2281
2275 if linkrev < minlink:
2282 if linkrev < minlink:
2276 brokenrevs.add(strippoint)
2283 brokenrevs.add(strippoint)
2277 else:
2284 else:
2278 futurelargelinkrevs.remove(linkrev)
2285 futurelargelinkrevs.remove(linkrev)
2279
2286
2280 for p in self.parentrevs(strippoint):
2287 for p in self.parentrevs(strippoint):
2281 if p != nullrev:
2288 if p != nullrev:
2282 plinkrev = self.linkrev(p)
2289 plinkrev = self.linkrev(p)
2283 heads[p] = plinkrev
2290 heads[p] = plinkrev
2284 if plinkrev >= minlink:
2291 if plinkrev >= minlink:
2285 futurelargelinkrevs.add(plinkrev)
2292 futurelargelinkrevs.add(plinkrev)
2286
2293
2287 return strippoint, brokenrevs
2294 return strippoint, brokenrevs
2288
2295
2289 def strip(self, minlink, transaction):
2296 def strip(self, minlink, transaction):
2290 """truncate the revlog on the first revision with a linkrev >= minlink
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2291
2298
2292 This function is called when we're stripping revision minlink and
2299 This function is called when we're stripping revision minlink and
2293 its descendants from the repository.
2300 its descendants from the repository.
2294
2301
2295 We have to remove all revisions with linkrev >= minlink, because
2302 We have to remove all revisions with linkrev >= minlink, because
2296 the equivalent changelog revisions will be renumbered after the
2303 the equivalent changelog revisions will be renumbered after the
2297 strip.
2304 strip.
2298
2305
2299 So we truncate the revlog on the first of these revisions, and
2306 So we truncate the revlog on the first of these revisions, and
2300 trust that the caller has saved the revisions that shouldn't be
2307 trust that the caller has saved the revisions that shouldn't be
2301 removed and that it'll re-add them after this truncation.
2308 removed and that it'll re-add them after this truncation.
2302 """
2309 """
2303 if len(self) == 0:
2310 if len(self) == 0:
2304 return
2311 return
2305
2312
2306 rev, _ = self.getstrippoint(minlink)
2313 rev, _ = self.getstrippoint(minlink)
2307 if rev == len(self):
2314 if rev == len(self):
2308 return
2315 return
2309
2316
2310 # first truncate the files on disk
2317 # first truncate the files on disk
2311 end = self.start(rev)
2318 end = self.start(rev)
2312 if not self._inline:
2319 if not self._inline:
2313 transaction.add(self.datafile, end)
2320 transaction.add(self.datafile, end)
2314 end = rev * self._io.size
2321 end = rev * self._io.size
2315 else:
2322 else:
2316 end += rev * self._io.size
2323 end += rev * self._io.size
2317
2324
2318 transaction.add(self.indexfile, end)
2325 transaction.add(self.indexfile, end)
2319
2326
2320 # then reset internal state in memory to forget those revisions
2327 # then reset internal state in memory to forget those revisions
2321 self._cache = None
2328 self._cache = None
2322 self._chaininfocache = {}
2329 self._chaininfocache = {}
2323 self._chunkclear()
2330 self._chunkclear()
2324 for x in xrange(rev, len(self)):
2331 for x in xrange(rev, len(self)):
2325 del self.nodemap[self.node(x)]
2332 del self.nodemap[self.node(x)]
2326
2333
2327 del self.index[rev:-1]
2334 del self.index[rev:-1]
2328
2335
2329 def checksize(self):
2336 def checksize(self):
2330 expected = 0
2337 expected = 0
2331 if len(self):
2338 if len(self):
2332 expected = max(0, self.end(len(self) - 1))
2339 expected = max(0, self.end(len(self) - 1))
2333
2340
2334 try:
2341 try:
2335 f = self._datafp()
2342 f = self._datafp()
2336 f.seek(0, 2)
2343 f.seek(0, 2)
2337 actual = f.tell()
2344 actual = f.tell()
2338 f.close()
2345 f.close()
2339 dd = actual - expected
2346 dd = actual - expected
2340 except IOError as inst:
2347 except IOError as inst:
2341 if inst.errno != errno.ENOENT:
2348 if inst.errno != errno.ENOENT:
2342 raise
2349 raise
2343 dd = 0
2350 dd = 0
2344
2351
2345 try:
2352 try:
2346 f = self.opener(self.indexfile)
2353 f = self.opener(self.indexfile)
2347 f.seek(0, 2)
2354 f.seek(0, 2)
2348 actual = f.tell()
2355 actual = f.tell()
2349 f.close()
2356 f.close()
2350 s = self._io.size
2357 s = self._io.size
2351 i = max(0, actual // s)
2358 i = max(0, actual // s)
2352 di = actual - (i * s)
2359 di = actual - (i * s)
2353 if self._inline:
2360 if self._inline:
2354 databytes = 0
2361 databytes = 0
2355 for r in self:
2362 for r in self:
2356 databytes += max(0, self.length(r))
2363 databytes += max(0, self.length(r))
2357 dd = 0
2364 dd = 0
2358 di = actual - len(self) * s - databytes
2365 di = actual - len(self) * s - databytes
2359 except IOError as inst:
2366 except IOError as inst:
2360 if inst.errno != errno.ENOENT:
2367 if inst.errno != errno.ENOENT:
2361 raise
2368 raise
2362 di = 0
2369 di = 0
2363
2370
2364 return (dd, di)
2371 return (dd, di)
2365
2372
2366 def files(self):
2373 def files(self):
2367 res = [self.indexfile]
2374 res = [self.indexfile]
2368 if not self._inline:
2375 if not self._inline:
2369 res.append(self.datafile)
2376 res.append(self.datafile)
2370 return res
2377 return res
2371
2378
2372 DELTAREUSEALWAYS = 'always'
2379 DELTAREUSEALWAYS = 'always'
2373 DELTAREUSESAMEREVS = 'samerevs'
2380 DELTAREUSESAMEREVS = 'samerevs'
2374 DELTAREUSENEVER = 'never'
2381 DELTAREUSENEVER = 'never'
2375
2382
2376 DELTAREUSEFULLADD = 'fulladd'
2383 DELTAREUSEFULLADD = 'fulladd'
2377
2384
2378 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2379
2386
2380 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 def clone(self, tr, destrevlog, addrevisioncb=None,
2381 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2388 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2382 """Copy this revlog to another, possibly with format changes.
2389 """Copy this revlog to another, possibly with format changes.
2383
2390
2384 The destination revlog will contain the same revisions and nodes.
2391 The destination revlog will contain the same revisions and nodes.
2385 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 However, it may not be bit-for-bit identical due to e.g. delta encoding
2386 differences.
2393 differences.
2387
2394
2388 The ``deltareuse`` argument control how deltas from the existing revlog
2395 The ``deltareuse`` argument control how deltas from the existing revlog
2389 are preserved in the destination revlog. The argument can have the
2396 are preserved in the destination revlog. The argument can have the
2390 following values:
2397 following values:
2391
2398
2392 DELTAREUSEALWAYS
2399 DELTAREUSEALWAYS
2393 Deltas will always be reused (if possible), even if the destination
2400 Deltas will always be reused (if possible), even if the destination
2394 revlog would not select the same revisions for the delta. This is the
2401 revlog would not select the same revisions for the delta. This is the
2395 fastest mode of operation.
2402 fastest mode of operation.
2396 DELTAREUSESAMEREVS
2403 DELTAREUSESAMEREVS
2397 Deltas will be reused if the destination revlog would pick the same
2404 Deltas will be reused if the destination revlog would pick the same
2398 revisions for the delta. This mode strikes a balance between speed
2405 revisions for the delta. This mode strikes a balance between speed
2399 and optimization.
2406 and optimization.
2400 DELTAREUSENEVER
2407 DELTAREUSENEVER
2401 Deltas will never be reused. This is the slowest mode of execution.
2408 Deltas will never be reused. This is the slowest mode of execution.
2402 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 This mode can be used to recompute deltas (e.g. if the diff/delta
2403 algorithm changes).
2410 algorithm changes).
2404
2411
2405 Delta computation can be slow, so the choice of delta reuse policy can
2412 Delta computation can be slow, so the choice of delta reuse policy can
2406 significantly affect run time.
2413 significantly affect run time.
2407
2414
2408 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2409 two extremes. Deltas will be reused if they are appropriate. But if the
2416 two extremes. Deltas will be reused if they are appropriate. But if the
2410 delta could choose a better revision, it will do so. This means if you
2417 delta could choose a better revision, it will do so. This means if you
2411 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 are converting a non-generaldelta revlog to a generaldelta revlog,
2412 deltas will be recomputed if the delta's parent isn't a parent of the
2419 deltas will be recomputed if the delta's parent isn't a parent of the
2413 revision.
2420 revision.
2414
2421
2415 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2422 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2416 controls whether to compute deltas against both parents for merges.
2423 controls whether to compute deltas against both parents for merges.
2417 By default, the current default is used.
2424 By default, the current default is used.
2418 """
2425 """
2419 if deltareuse not in self.DELTAREUSEALL:
2426 if deltareuse not in self.DELTAREUSEALL:
2420 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2421
2428
2422 if len(destrevlog):
2429 if len(destrevlog):
2423 raise ValueError(_('destination revlog is not empty'))
2430 raise ValueError(_('destination revlog is not empty'))
2424
2431
2425 if getattr(self, 'filteredrevs', None):
2432 if getattr(self, 'filteredrevs', None):
2426 raise ValueError(_('source revlog has filtered revisions'))
2433 raise ValueError(_('source revlog has filtered revisions'))
2427 if getattr(destrevlog, 'filteredrevs', None):
2434 if getattr(destrevlog, 'filteredrevs', None):
2428 raise ValueError(_('destination revlog has filtered revisions'))
2435 raise ValueError(_('destination revlog has filtered revisions'))
2429
2436
2430 # lazydeltabase controls whether to reuse a cached delta, if possible.
2437 # lazydeltabase controls whether to reuse a cached delta, if possible.
2431 oldlazydeltabase = destrevlog._lazydeltabase
2438 oldlazydeltabase = destrevlog._lazydeltabase
2432 oldamd = destrevlog._aggressivemergedeltas
2439 oldamd = destrevlog._aggressivemergedeltas
2433
2440
2434 try:
2441 try:
2435 if deltareuse == self.DELTAREUSEALWAYS:
2442 if deltareuse == self.DELTAREUSEALWAYS:
2436 destrevlog._lazydeltabase = True
2443 destrevlog._lazydeltabase = True
2437 elif deltareuse == self.DELTAREUSESAMEREVS:
2444 elif deltareuse == self.DELTAREUSESAMEREVS:
2438 destrevlog._lazydeltabase = False
2445 destrevlog._lazydeltabase = False
2439
2446
2440 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2447 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2441
2448
2442 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2449 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2443 self.DELTAREUSESAMEREVS)
2450 self.DELTAREUSESAMEREVS)
2444
2451
2445 deltacomputer = _deltacomputer(destrevlog)
2452 deltacomputer = _deltacomputer(destrevlog)
2446 index = self.index
2453 index = self.index
2447 for rev in self:
2454 for rev in self:
2448 entry = index[rev]
2455 entry = index[rev]
2449
2456
2450 # Some classes override linkrev to take filtered revs into
2457 # Some classes override linkrev to take filtered revs into
2451 # account. Use raw entry from index.
2458 # account. Use raw entry from index.
2452 flags = entry[0] & 0xffff
2459 flags = entry[0] & 0xffff
2453 linkrev = entry[4]
2460 linkrev = entry[4]
2454 p1 = index[entry[5]][7]
2461 p1 = index[entry[5]][7]
2455 p2 = index[entry[6]][7]
2462 p2 = index[entry[6]][7]
2456 node = entry[7]
2463 node = entry[7]
2457
2464
2458 # (Possibly) reuse the delta from the revlog if allowed and
2465 # (Possibly) reuse the delta from the revlog if allowed and
2459 # the revlog chunk is a delta.
2466 # the revlog chunk is a delta.
2460 cachedelta = None
2467 cachedelta = None
2461 rawtext = None
2468 rawtext = None
2462 if populatecachedelta:
2469 if populatecachedelta:
2463 dp = self.deltaparent(rev)
2470 dp = self.deltaparent(rev)
2464 if dp != nullrev:
2471 if dp != nullrev:
2465 cachedelta = (dp, str(self._chunk(rev)))
2472 cachedelta = (dp, str(self._chunk(rev)))
2466
2473
2467 if not cachedelta:
2474 if not cachedelta:
2468 rawtext = self.revision(rev, raw=True)
2475 rawtext = self.revision(rev, raw=True)
2469
2476
2470
2477
2471 if deltareuse == self.DELTAREUSEFULLADD:
2478 if deltareuse == self.DELTAREUSEFULLADD:
2472 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2473 cachedelta=cachedelta,
2480 cachedelta=cachedelta,
2474 node=node, flags=flags,
2481 node=node, flags=flags,
2475 deltacomputer=deltacomputer)
2482 deltacomputer=deltacomputer)
2476 else:
2483 else:
2477 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2478 checkambig=False)
2485 checkambig=False)
2479 dfh = None
2486 dfh = None
2480 if not destrevlog._inline:
2487 if not destrevlog._inline:
2481 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2482 try:
2489 try:
2483 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2484 p2, flags, cachedelta, ifh, dfh,
2491 p2, flags, cachedelta, ifh, dfh,
2485 deltacomputer=deltacomputer)
2492 deltacomputer=deltacomputer)
2486 finally:
2493 finally:
2487 if dfh:
2494 if dfh:
2488 dfh.close()
2495 dfh.close()
2489 ifh.close()
2496 ifh.close()
2490
2497
2491 if addrevisioncb:
2498 if addrevisioncb:
2492 addrevisioncb(self, rev, node)
2499 addrevisioncb(self, rev, node)
2493 finally:
2500 finally:
2494 destrevlog._lazydeltabase = oldlazydeltabase
2501 destrevlog._lazydeltabase = oldlazydeltabase
2495 destrevlog._aggressivemergedeltas = oldamd
2502 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now