##// END OF EJS Templates
sparse-read: ignore trailing empty revs in each read chunk...
Paul Morelle -
r34899:1bde8e8e default
parent child Browse files
Show More
@@ -1,2350 +1,2375 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 wdirhex,
31 wdirhex,
32 wdirid,
32 wdirid,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from . import (
36 from . import (
37 ancestor,
37 ancestor,
38 error,
38 error,
39 mdiff,
39 mdiff,
40 policy,
40 policy,
41 pycompat,
41 pycompat,
42 templatefilters,
42 templatefilters,
43 util,
43 util,
44 )
44 )
45
45
46 parsers = policy.importmod(r'parsers')
46 parsers = policy.importmod(r'parsers')
47
47
48 # Aliased for performance.
48 # Aliased for performance.
49 _zlibdecompress = zlib.decompress
49 _zlibdecompress = zlib.decompress
50
50
51 # revlog header flags
51 # revlog header flags
52 REVLOGV0 = 0
52 REVLOGV0 = 0
53 REVLOGV1 = 1
53 REVLOGV1 = 1
54 # Dummy value until file format is finalized.
54 # Dummy value until file format is finalized.
55 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 # Reminder: change the bounds check in revlog.__init__ when this is changed.
56 REVLOGV2 = 0xDEAD
56 REVLOGV2 = 0xDEAD
57 FLAG_INLINE_DATA = (1 << 16)
57 FLAG_INLINE_DATA = (1 << 16)
58 FLAG_GENERALDELTA = (1 << 17)
58 FLAG_GENERALDELTA = (1 << 17)
59 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
60 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 REVLOG_DEFAULT_FORMAT = REVLOGV1
61 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
62 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
63 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63 REVLOGV2_FLAGS = REVLOGV1_FLAGS
64
64
65 # revlog index flags
65 # revlog index flags
66 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
67 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
68 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
69 REVIDX_DEFAULT_FLAGS = 0
69 REVIDX_DEFAULT_FLAGS = 0
70 # stable order in which flags need to be processed and their processors applied
70 # stable order in which flags need to be processed and their processors applied
71 REVIDX_FLAGS_ORDER = [
71 REVIDX_FLAGS_ORDER = [
72 REVIDX_ISCENSORED,
72 REVIDX_ISCENSORED,
73 REVIDX_ELLIPSIS,
73 REVIDX_ELLIPSIS,
74 REVIDX_EXTSTORED,
74 REVIDX_EXTSTORED,
75 ]
75 ]
76 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
77
77
78 # max size of revlog with inline data
78 # max size of revlog with inline data
79 _maxinline = 131072
79 _maxinline = 131072
80 _chunksize = 1048576
80 _chunksize = 1048576
81
81
82 RevlogError = error.RevlogError
82 RevlogError = error.RevlogError
83 LookupError = error.LookupError
83 LookupError = error.LookupError
84 CensoredNodeError = error.CensoredNodeError
84 CensoredNodeError = error.CensoredNodeError
85 ProgrammingError = error.ProgrammingError
85 ProgrammingError = error.ProgrammingError
86
86
87 # Store flag processors (cf. 'addflagprocessor()' to register)
87 # Store flag processors (cf. 'addflagprocessor()' to register)
88 _flagprocessors = {
88 _flagprocessors = {
89 REVIDX_ISCENSORED: None,
89 REVIDX_ISCENSORED: None,
90 }
90 }
91
91
92 def addflagprocessor(flag, processor):
92 def addflagprocessor(flag, processor):
93 """Register a flag processor on a revision data flag.
93 """Register a flag processor on a revision data flag.
94
94
95 Invariant:
95 Invariant:
96 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
97 - Only one flag processor can be registered on a specific flag.
97 - Only one flag processor can be registered on a specific flag.
98 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
99 following signatures:
99 following signatures:
100 - (read) f(self, rawtext) -> text, bool
100 - (read) f(self, rawtext) -> text, bool
101 - (write) f(self, text) -> rawtext, bool
101 - (write) f(self, text) -> rawtext, bool
102 - (raw) f(self, rawtext) -> bool
102 - (raw) f(self, rawtext) -> bool
103 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 "text" is presented to the user. "rawtext" is stored in revlog data, not
104 directly visible to the user.
104 directly visible to the user.
105 The boolean returned by these transforms is used to determine whether
105 The boolean returned by these transforms is used to determine whether
106 the returned text can be used for hash integrity checking. For example,
106 the returned text can be used for hash integrity checking. For example,
107 if "write" returns False, then "text" is used to generate hash. If
107 if "write" returns False, then "text" is used to generate hash. If
108 "write" returns True, that basically means "rawtext" returned by "write"
108 "write" returns True, that basically means "rawtext" returned by "write"
109 should be used to generate hash. Usually, "write" and "read" return
109 should be used to generate hash. Usually, "write" and "read" return
110 different booleans. And "raw" returns a same boolean as "write".
110 different booleans. And "raw" returns a same boolean as "write".
111
111
112 Note: The 'raw' transform is used for changegroup generation and in some
112 Note: The 'raw' transform is used for changegroup generation and in some
113 debug commands. In this case the transform only indicates whether the
113 debug commands. In this case the transform only indicates whether the
114 contents can be used for hash integrity checks.
114 contents can be used for hash integrity checks.
115 """
115 """
116 if not flag & REVIDX_KNOWN_FLAGS:
116 if not flag & REVIDX_KNOWN_FLAGS:
117 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
118 raise ProgrammingError(msg)
118 raise ProgrammingError(msg)
119 if flag not in REVIDX_FLAGS_ORDER:
119 if flag not in REVIDX_FLAGS_ORDER:
120 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
121 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
122 if flag in _flagprocessors:
122 if flag in _flagprocessors:
123 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
124 raise error.Abort(msg)
124 raise error.Abort(msg)
125 _flagprocessors[flag] = processor
125 _flagprocessors[flag] = processor
126
126
127 def getoffset(q):
127 def getoffset(q):
128 return int(q >> 16)
128 return int(q >> 16)
129
129
130 def gettype(q):
130 def gettype(q):
131 return int(q & 0xFFFF)
131 return int(q & 0xFFFF)
132
132
133 def offset_type(offset, type):
133 def offset_type(offset, type):
134 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
135 raise ValueError('unknown revlog index flags')
135 raise ValueError('unknown revlog index flags')
136 return int(int(offset) << 16 | type)
136 return int(int(offset) << 16 | type)
137
137
138 _nullhash = hashlib.sha1(nullid)
138 _nullhash = hashlib.sha1(nullid)
139
139
140 def hash(text, p1, p2):
140 def hash(text, p1, p2):
141 """generate a hash from the given text and its parent hashes
141 """generate a hash from the given text and its parent hashes
142
142
143 This hash combines both the current file contents and its history
143 This hash combines both the current file contents and its history
144 in a manner that makes it easy to distinguish nodes with the same
144 in a manner that makes it easy to distinguish nodes with the same
145 content in the revision graph.
145 content in the revision graph.
146 """
146 """
147 # As of now, if one of the parent node is null, p2 is null
147 # As of now, if one of the parent node is null, p2 is null
148 if p2 == nullid:
148 if p2 == nullid:
149 # deep copy of a hash is faster than creating one
149 # deep copy of a hash is faster than creating one
150 s = _nullhash.copy()
150 s = _nullhash.copy()
151 s.update(p1)
151 s.update(p1)
152 else:
152 else:
153 # none of the parent nodes are nullid
153 # none of the parent nodes are nullid
154 if p1 < p2:
154 if p1 < p2:
155 a = p1
155 a = p1
156 b = p2
156 b = p2
157 else:
157 else:
158 a = p2
158 a = p2
159 b = p1
159 b = p1
160 s = hashlib.sha1(a)
160 s = hashlib.sha1(a)
161 s.update(b)
161 s.update(b)
162 s.update(text)
162 s.update(text)
163 return s.digest()
163 return s.digest()
164
164
165 def _trimchunk(revlog, revs, startidx, endidx=None):
166 """returns revs[startidx:endidx] without empty trailing revs
167 """
168 length = revlog.length
169
170 if endidx is None:
171 endidx = len(revs)
172
173 # Trim empty revs at the end, but never the very first revision of a chain
174 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
175 endidx -= 1
176
177 return revs[startidx:endidx]
178
165 def _slicechunk(revlog, revs):
179 def _slicechunk(revlog, revs):
166 """slice revs to reduce the amount of unrelated data to be read from disk.
180 """slice revs to reduce the amount of unrelated data to be read from disk.
167
181
168 ``revs`` is sliced into groups that should be read in one time.
182 ``revs`` is sliced into groups that should be read in one time.
169 Assume that revs are sorted.
183 Assume that revs are sorted.
170 """
184 """
171 start = revlog.start
185 start = revlog.start
172 length = revlog.length
186 length = revlog.length
173
187
174 if len(revs) <= 1:
188 if len(revs) <= 1:
175 yield revs
189 yield revs
176 return
190 return
177
191
178 startbyte = start(revs[0])
192 startbyte = start(revs[0])
179 endbyte = start(revs[-1]) + length(revs[-1])
193 endbyte = start(revs[-1]) + length(revs[-1])
180 readdata = deltachainspan = endbyte - startbyte
194 readdata = deltachainspan = endbyte - startbyte
181
195
182 chainpayload = sum(length(r) for r in revs)
196 chainpayload = sum(length(r) for r in revs)
183
197
184 if deltachainspan:
198 if deltachainspan:
185 density = chainpayload / float(deltachainspan)
199 density = chainpayload / float(deltachainspan)
186 else:
200 else:
187 density = 1.0
201 density = 1.0
188
202
189 # Store the gaps in a heap to have them sorted by decreasing size
203 # Store the gaps in a heap to have them sorted by decreasing size
190 gapsheap = []
204 gapsheap = []
191 heapq.heapify(gapsheap)
205 heapq.heapify(gapsheap)
192 prevend = None
206 prevend = None
193 for i, rev in enumerate(revs):
207 for i, rev in enumerate(revs):
194 revstart = start(rev)
208 revstart = start(rev)
195 revlen = length(rev)
209 revlen = length(rev)
196
210
211 # Skip empty revisions to form larger holes
212 if revlen == 0:
213 continue
214
197 if prevend is not None:
215 if prevend is not None:
198 gapsize = revstart - prevend
216 gapsize = revstart - prevend
199 # only consider holes that are large enough
217 # only consider holes that are large enough
200 if gapsize > revlog._srmingapsize:
218 if gapsize > revlog._srmingapsize:
201 heapq.heappush(gapsheap, (-gapsize, i))
219 heapq.heappush(gapsheap, (-gapsize, i))
202
220
203 prevend = revstart + revlen
221 prevend = revstart + revlen
204
222
205 # Collect the indices of the largest holes until the density is acceptable
223 # Collect the indices of the largest holes until the density is acceptable
206 indicesheap = []
224 indicesheap = []
207 heapq.heapify(indicesheap)
225 heapq.heapify(indicesheap)
208 while gapsheap and density < revlog._srdensitythreshold:
226 while gapsheap and density < revlog._srdensitythreshold:
209 oppgapsize, gapidx = heapq.heappop(gapsheap)
227 oppgapsize, gapidx = heapq.heappop(gapsheap)
210
228
211 heapq.heappush(indicesheap, gapidx)
229 heapq.heappush(indicesheap, gapidx)
212
230
213 # the gap sizes are stored as negatives to be sorted decreasingly
231 # the gap sizes are stored as negatives to be sorted decreasingly
214 # by the heap
232 # by the heap
215 readdata -= (-oppgapsize)
233 readdata -= (-oppgapsize)
216 if readdata > 0:
234 if readdata > 0:
217 density = chainpayload / float(readdata)
235 density = chainpayload / float(readdata)
218 else:
236 else:
219 density = 1.0
237 density = 1.0
220
238
221 # Cut the revs at collected indices
239 # Cut the revs at collected indices
222 previdx = 0
240 previdx = 0
223 while indicesheap:
241 while indicesheap:
224 idx = heapq.heappop(indicesheap)
242 idx = heapq.heappop(indicesheap)
225 yield revs[previdx:idx]
243
244 chunk = _trimchunk(revlog, revs, previdx, idx)
245 if chunk:
246 yield chunk
247
226 previdx = idx
248 previdx = idx
227 yield revs[previdx:]
249
250 chunk = _trimchunk(revlog, revs, previdx)
251 if chunk:
252 yield chunk
228
253
229 # index v0:
254 # index v0:
230 # 4 bytes: offset
255 # 4 bytes: offset
231 # 4 bytes: compressed length
256 # 4 bytes: compressed length
232 # 4 bytes: base rev
257 # 4 bytes: base rev
233 # 4 bytes: link rev
258 # 4 bytes: link rev
234 # 20 bytes: parent 1 nodeid
259 # 20 bytes: parent 1 nodeid
235 # 20 bytes: parent 2 nodeid
260 # 20 bytes: parent 2 nodeid
236 # 20 bytes: nodeid
261 # 20 bytes: nodeid
237 indexformatv0 = struct.Struct(">4l20s20s20s")
262 indexformatv0 = struct.Struct(">4l20s20s20s")
238 indexformatv0_pack = indexformatv0.pack
263 indexformatv0_pack = indexformatv0.pack
239 indexformatv0_unpack = indexformatv0.unpack
264 indexformatv0_unpack = indexformatv0.unpack
240
265
241 class revlogoldio(object):
266 class revlogoldio(object):
242 def __init__(self):
267 def __init__(self):
243 self.size = indexformatv0.size
268 self.size = indexformatv0.size
244
269
245 def parseindex(self, data, inline):
270 def parseindex(self, data, inline):
246 s = self.size
271 s = self.size
247 index = []
272 index = []
248 nodemap = {nullid: nullrev}
273 nodemap = {nullid: nullrev}
249 n = off = 0
274 n = off = 0
250 l = len(data)
275 l = len(data)
251 while off + s <= l:
276 while off + s <= l:
252 cur = data[off:off + s]
277 cur = data[off:off + s]
253 off += s
278 off += s
254 e = indexformatv0_unpack(cur)
279 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
280 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
281 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
282 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
283 index.append(e2)
259 nodemap[e[6]] = n
284 nodemap[e[6]] = n
260 n += 1
285 n += 1
261
286
262 # add the magic null revision at -1
287 # add the magic null revision at -1
263 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
288 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
264
289
265 return index, nodemap, None
290 return index, nodemap, None
266
291
267 def packentry(self, entry, node, version, rev):
292 def packentry(self, entry, node, version, rev):
268 if gettype(entry[0]):
293 if gettype(entry[0]):
269 raise RevlogError(_('index entry flags need revlog version 1'))
294 raise RevlogError(_('index entry flags need revlog version 1'))
270 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
295 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
271 node(entry[5]), node(entry[6]), entry[7])
296 node(entry[5]), node(entry[6]), entry[7])
272 return indexformatv0_pack(*e2)
297 return indexformatv0_pack(*e2)
273
298
274 # index ng:
299 # index ng:
275 # 6 bytes: offset
300 # 6 bytes: offset
276 # 2 bytes: flags
301 # 2 bytes: flags
277 # 4 bytes: compressed length
302 # 4 bytes: compressed length
278 # 4 bytes: uncompressed length
303 # 4 bytes: uncompressed length
279 # 4 bytes: base rev
304 # 4 bytes: base rev
280 # 4 bytes: link rev
305 # 4 bytes: link rev
281 # 4 bytes: parent 1 rev
306 # 4 bytes: parent 1 rev
282 # 4 bytes: parent 2 rev
307 # 4 bytes: parent 2 rev
283 # 32 bytes: nodeid
308 # 32 bytes: nodeid
284 indexformatng = struct.Struct(">Qiiiiii20s12x")
309 indexformatng = struct.Struct(">Qiiiiii20s12x")
285 indexformatng_pack = indexformatng.pack
310 indexformatng_pack = indexformatng.pack
286 versionformat = struct.Struct(">I")
311 versionformat = struct.Struct(">I")
287 versionformat_pack = versionformat.pack
312 versionformat_pack = versionformat.pack
288 versionformat_unpack = versionformat.unpack
313 versionformat_unpack = versionformat.unpack
289
314
290 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
315 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
291 # signed integer)
316 # signed integer)
292 _maxentrysize = 0x7fffffff
317 _maxentrysize = 0x7fffffff
293
318
294 class revlogio(object):
319 class revlogio(object):
295 def __init__(self):
320 def __init__(self):
296 self.size = indexformatng.size
321 self.size = indexformatng.size
297
322
298 def parseindex(self, data, inline):
323 def parseindex(self, data, inline):
299 # call the C implementation to parse the index data
324 # call the C implementation to parse the index data
300 index, cache = parsers.parse_index2(data, inline)
325 index, cache = parsers.parse_index2(data, inline)
301 return index, getattr(index, 'nodemap', None), cache
326 return index, getattr(index, 'nodemap', None), cache
302
327
303 def packentry(self, entry, node, version, rev):
328 def packentry(self, entry, node, version, rev):
304 p = indexformatng_pack(*entry)
329 p = indexformatng_pack(*entry)
305 if rev == 0:
330 if rev == 0:
306 p = versionformat_pack(version) + p[4:]
331 p = versionformat_pack(version) + p[4:]
307 return p
332 return p
308
333
309 class revlog(object):
334 class revlog(object):
310 """
335 """
311 the underlying revision storage object
336 the underlying revision storage object
312
337
313 A revlog consists of two parts, an index and the revision data.
338 A revlog consists of two parts, an index and the revision data.
314
339
315 The index is a file with a fixed record size containing
340 The index is a file with a fixed record size containing
316 information on each revision, including its nodeid (hash), the
341 information on each revision, including its nodeid (hash), the
317 nodeids of its parents, the position and offset of its data within
342 nodeids of its parents, the position and offset of its data within
318 the data file, and the revision it's based on. Finally, each entry
343 the data file, and the revision it's based on. Finally, each entry
319 contains a linkrev entry that can serve as a pointer to external
344 contains a linkrev entry that can serve as a pointer to external
320 data.
345 data.
321
346
322 The revision data itself is a linear collection of data chunks.
347 The revision data itself is a linear collection of data chunks.
323 Each chunk represents a revision and is usually represented as a
348 Each chunk represents a revision and is usually represented as a
324 delta against the previous chunk. To bound lookup time, runs of
349 delta against the previous chunk. To bound lookup time, runs of
325 deltas are limited to about 2 times the length of the original
350 deltas are limited to about 2 times the length of the original
326 version data. This makes retrieval of a version proportional to
351 version data. This makes retrieval of a version proportional to
327 its size, or O(1) relative to the number of revisions.
352 its size, or O(1) relative to the number of revisions.
328
353
329 Both pieces of the revlog are written to in an append-only
354 Both pieces of the revlog are written to in an append-only
330 fashion, which means we never need to rewrite a file to insert or
355 fashion, which means we never need to rewrite a file to insert or
331 remove data, and can use some simple techniques to avoid the need
356 remove data, and can use some simple techniques to avoid the need
332 for locking while reading.
357 for locking while reading.
333
358
334 If checkambig, indexfile is opened with checkambig=True at
359 If checkambig, indexfile is opened with checkambig=True at
335 writing, to avoid file stat ambiguity.
360 writing, to avoid file stat ambiguity.
336
361
337 If mmaplargeindex is True, and an mmapindexthreshold is set, the
362 If mmaplargeindex is True, and an mmapindexthreshold is set, the
338 index will be mmapped rather than read if it is larger than the
363 index will be mmapped rather than read if it is larger than the
339 configured threshold.
364 configured threshold.
340 """
365 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
366 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False):
367 mmaplargeindex=False):
343 """
368 """
344 create a revlog object
369 create a revlog object
345
370
346 opener is a function that abstracts the file opening operation
371 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
372 and can be used to implement COW semantics or the like.
348 """
373 """
349 self.indexfile = indexfile
374 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
375 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
376 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
377 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
378 # avoid file stat ambiguity.
354 self._checkambig = checkambig
379 self._checkambig = checkambig
355 # 3-tuple of (node, rev, text) for a raw revision.
380 # 3-tuple of (node, rev, text) for a raw revision.
356 self._cache = None
381 self._cache = None
357 # Maps rev to chain base rev.
382 # Maps rev to chain base rev.
358 self._chainbasecache = util.lrucachedict(100)
383 self._chainbasecache = util.lrucachedict(100)
359 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
384 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 self._chunkcache = (0, '')
385 self._chunkcache = (0, '')
361 # How much data to read and cache into the raw revlog data cache.
386 # How much data to read and cache into the raw revlog data cache.
362 self._chunkcachesize = 65536
387 self._chunkcachesize = 65536
363 self._maxchainlen = None
388 self._maxchainlen = None
364 self._aggressivemergedeltas = False
389 self._aggressivemergedeltas = False
365 self.index = []
390 self.index = []
366 # Mapping of partial identifiers to full nodes.
391 # Mapping of partial identifiers to full nodes.
367 self._pcache = {}
392 self._pcache = {}
368 # Mapping of revision integer to full node.
393 # Mapping of revision integer to full node.
369 self._nodecache = {nullid: nullrev}
394 self._nodecache = {nullid: nullrev}
370 self._nodepos = None
395 self._nodepos = None
371 self._compengine = 'zlib'
396 self._compengine = 'zlib'
372 self._maxdeltachainspan = -1
397 self._maxdeltachainspan = -1
373 self._withsparseread = False
398 self._withsparseread = False
374 self._srdensitythreshold = 0.25
399 self._srdensitythreshold = 0.25
375 self._srmingapsize = 262144
400 self._srmingapsize = 262144
376
401
377 mmapindexthreshold = None
402 mmapindexthreshold = None
378 v = REVLOG_DEFAULT_VERSION
403 v = REVLOG_DEFAULT_VERSION
379 opts = getattr(opener, 'options', None)
404 opts = getattr(opener, 'options', None)
380 if opts is not None:
405 if opts is not None:
381 if 'revlogv2' in opts:
406 if 'revlogv2' in opts:
382 # version 2 revlogs always use generaldelta.
407 # version 2 revlogs always use generaldelta.
383 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
384 elif 'revlogv1' in opts:
409 elif 'revlogv1' in opts:
385 if 'generaldelta' in opts:
410 if 'generaldelta' in opts:
386 v |= FLAG_GENERALDELTA
411 v |= FLAG_GENERALDELTA
387 else:
412 else:
388 v = 0
413 v = 0
389 if 'chunkcachesize' in opts:
414 if 'chunkcachesize' in opts:
390 self._chunkcachesize = opts['chunkcachesize']
415 self._chunkcachesize = opts['chunkcachesize']
391 if 'maxchainlen' in opts:
416 if 'maxchainlen' in opts:
392 self._maxchainlen = opts['maxchainlen']
417 self._maxchainlen = opts['maxchainlen']
393 if 'aggressivemergedeltas' in opts:
418 if 'aggressivemergedeltas' in opts:
394 self._aggressivemergedeltas = opts['aggressivemergedeltas']
419 self._aggressivemergedeltas = opts['aggressivemergedeltas']
395 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
396 if 'compengine' in opts:
421 if 'compengine' in opts:
397 self._compengine = opts['compengine']
422 self._compengine = opts['compengine']
398 if 'maxdeltachainspan' in opts:
423 if 'maxdeltachainspan' in opts:
399 self._maxdeltachainspan = opts['maxdeltachainspan']
424 self._maxdeltachainspan = opts['maxdeltachainspan']
400 if mmaplargeindex and 'mmapindexthreshold' in opts:
425 if mmaplargeindex and 'mmapindexthreshold' in opts:
401 mmapindexthreshold = opts['mmapindexthreshold']
426 mmapindexthreshold = opts['mmapindexthreshold']
402 self._withsparseread = bool(opts.get('with-sparse-read', False))
427 self._withsparseread = bool(opts.get('with-sparse-read', False))
403 if 'sparse-read-density-threshold' in opts:
428 if 'sparse-read-density-threshold' in opts:
404 self._srdensitythreshold = opts['sparse-read-density-threshold']
429 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 if 'sparse-read-min-gap-size' in opts:
430 if 'sparse-read-min-gap-size' in opts:
406 self._srmingapsize = opts['sparse-read-min-gap-size']
431 self._srmingapsize = opts['sparse-read-min-gap-size']
407
432
408 if self._chunkcachesize <= 0:
433 if self._chunkcachesize <= 0:
409 raise RevlogError(_('revlog chunk cache size %r is not greater '
434 raise RevlogError(_('revlog chunk cache size %r is not greater '
410 'than 0') % self._chunkcachesize)
435 'than 0') % self._chunkcachesize)
411 elif self._chunkcachesize & (self._chunkcachesize - 1):
436 elif self._chunkcachesize & (self._chunkcachesize - 1):
412 raise RevlogError(_('revlog chunk cache size %r is not a power '
437 raise RevlogError(_('revlog chunk cache size %r is not a power '
413 'of 2') % self._chunkcachesize)
438 'of 2') % self._chunkcachesize)
414
439
415 indexdata = ''
440 indexdata = ''
416 self._initempty = True
441 self._initempty = True
417 try:
442 try:
418 f = self.opener(self.indexfile)
443 f = self.opener(self.indexfile)
419 if (mmapindexthreshold is not None and
444 if (mmapindexthreshold is not None and
420 self.opener.fstat(f).st_size >= mmapindexthreshold):
445 self.opener.fstat(f).st_size >= mmapindexthreshold):
421 indexdata = util.buffer(util.mmapread(f))
446 indexdata = util.buffer(util.mmapread(f))
422 else:
447 else:
423 indexdata = f.read()
448 indexdata = f.read()
424 f.close()
449 f.close()
425 if len(indexdata) > 0:
450 if len(indexdata) > 0:
426 v = versionformat_unpack(indexdata[:4])[0]
451 v = versionformat_unpack(indexdata[:4])[0]
427 self._initempty = False
452 self._initempty = False
428 except IOError as inst:
453 except IOError as inst:
429 if inst.errno != errno.ENOENT:
454 if inst.errno != errno.ENOENT:
430 raise
455 raise
431
456
432 self.version = v
457 self.version = v
433 self._inline = v & FLAG_INLINE_DATA
458 self._inline = v & FLAG_INLINE_DATA
434 self._generaldelta = v & FLAG_GENERALDELTA
459 self._generaldelta = v & FLAG_GENERALDELTA
435 flags = v & ~0xFFFF
460 flags = v & ~0xFFFF
436 fmt = v & 0xFFFF
461 fmt = v & 0xFFFF
437 if fmt == REVLOGV0:
462 if fmt == REVLOGV0:
438 if flags:
463 if flags:
439 raise RevlogError(_('unknown flags (%#04x) in version %d '
464 raise RevlogError(_('unknown flags (%#04x) in version %d '
440 'revlog %s') %
465 'revlog %s') %
441 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
442 elif fmt == REVLOGV1:
467 elif fmt == REVLOGV1:
443 if flags & ~REVLOGV1_FLAGS:
468 if flags & ~REVLOGV1_FLAGS:
444 raise RevlogError(_('unknown flags (%#04x) in version %d '
469 raise RevlogError(_('unknown flags (%#04x) in version %d '
445 'revlog %s') %
470 'revlog %s') %
446 (flags >> 16, fmt, self.indexfile))
471 (flags >> 16, fmt, self.indexfile))
447 elif fmt == REVLOGV2:
472 elif fmt == REVLOGV2:
448 if flags & ~REVLOGV2_FLAGS:
473 if flags & ~REVLOGV2_FLAGS:
449 raise RevlogError(_('unknown flags (%#04x) in version %d '
474 raise RevlogError(_('unknown flags (%#04x) in version %d '
450 'revlog %s') %
475 'revlog %s') %
451 (flags >> 16, fmt, self.indexfile))
476 (flags >> 16, fmt, self.indexfile))
452 else:
477 else:
453 raise RevlogError(_('unknown version (%d) in revlog %s') %
478 raise RevlogError(_('unknown version (%d) in revlog %s') %
454 (fmt, self.indexfile))
479 (fmt, self.indexfile))
455
480
456 self.storedeltachains = True
481 self.storedeltachains = True
457
482
458 self._io = revlogio()
483 self._io = revlogio()
459 if self.version == REVLOGV0:
484 if self.version == REVLOGV0:
460 self._io = revlogoldio()
485 self._io = revlogoldio()
461 try:
486 try:
462 d = self._io.parseindex(indexdata, self._inline)
487 d = self._io.parseindex(indexdata, self._inline)
463 except (ValueError, IndexError):
488 except (ValueError, IndexError):
464 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
489 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
465 self.index, nodemap, self._chunkcache = d
490 self.index, nodemap, self._chunkcache = d
466 if nodemap is not None:
491 if nodemap is not None:
467 self.nodemap = self._nodecache = nodemap
492 self.nodemap = self._nodecache = nodemap
468 if not self._chunkcache:
493 if not self._chunkcache:
469 self._chunkclear()
494 self._chunkclear()
470 # revnum -> (chain-length, sum-delta-length)
495 # revnum -> (chain-length, sum-delta-length)
471 self._chaininfocache = {}
496 self._chaininfocache = {}
472 # revlog header -> revlog compressor
497 # revlog header -> revlog compressor
473 self._decompressors = {}
498 self._decompressors = {}
474
499
475 @util.propertycache
500 @util.propertycache
476 def _compressor(self):
501 def _compressor(self):
477 return util.compengines[self._compengine].revlogcompressor()
502 return util.compengines[self._compengine].revlogcompressor()
478
503
479 def tip(self):
504 def tip(self):
480 return self.node(len(self.index) - 2)
505 return self.node(len(self.index) - 2)
481 def __contains__(self, rev):
506 def __contains__(self, rev):
482 return 0 <= rev < len(self)
507 return 0 <= rev < len(self)
483 def __len__(self):
508 def __len__(self):
484 return len(self.index) - 1
509 return len(self.index) - 1
485 def __iter__(self):
510 def __iter__(self):
486 return iter(xrange(len(self)))
511 return iter(xrange(len(self)))
487 def revs(self, start=0, stop=None):
512 def revs(self, start=0, stop=None):
488 """iterate over all rev in this revlog (from start to stop)"""
513 """iterate over all rev in this revlog (from start to stop)"""
489 step = 1
514 step = 1
490 if stop is not None:
515 if stop is not None:
491 if start > stop:
516 if start > stop:
492 step = -1
517 step = -1
493 stop += step
518 stop += step
494 else:
519 else:
495 stop = len(self)
520 stop = len(self)
496 return xrange(start, stop, step)
521 return xrange(start, stop, step)
497
522
498 @util.propertycache
523 @util.propertycache
499 def nodemap(self):
524 def nodemap(self):
500 self.rev(self.node(0))
525 self.rev(self.node(0))
501 return self._nodecache
526 return self._nodecache
502
527
503 def hasnode(self, node):
528 def hasnode(self, node):
504 try:
529 try:
505 self.rev(node)
530 self.rev(node)
506 return True
531 return True
507 except KeyError:
532 except KeyError:
508 return False
533 return False
509
534
510 def clearcaches(self):
535 def clearcaches(self):
511 self._cache = None
536 self._cache = None
512 self._chainbasecache.clear()
537 self._chainbasecache.clear()
513 self._chunkcache = (0, '')
538 self._chunkcache = (0, '')
514 self._pcache = {}
539 self._pcache = {}
515
540
516 try:
541 try:
517 self._nodecache.clearcaches()
542 self._nodecache.clearcaches()
518 except AttributeError:
543 except AttributeError:
519 self._nodecache = {nullid: nullrev}
544 self._nodecache = {nullid: nullrev}
520 self._nodepos = None
545 self._nodepos = None
521
546
522 def rev(self, node):
547 def rev(self, node):
523 try:
548 try:
524 return self._nodecache[node]
549 return self._nodecache[node]
525 except TypeError:
550 except TypeError:
526 raise
551 raise
527 except RevlogError:
552 except RevlogError:
528 # parsers.c radix tree lookup failed
553 # parsers.c radix tree lookup failed
529 if node == wdirid:
554 if node == wdirid:
530 raise error.WdirUnsupported
555 raise error.WdirUnsupported
531 raise LookupError(node, self.indexfile, _('no node'))
556 raise LookupError(node, self.indexfile, _('no node'))
532 except KeyError:
557 except KeyError:
533 # pure python cache lookup failed
558 # pure python cache lookup failed
534 n = self._nodecache
559 n = self._nodecache
535 i = self.index
560 i = self.index
536 p = self._nodepos
561 p = self._nodepos
537 if p is None:
562 if p is None:
538 p = len(i) - 2
563 p = len(i) - 2
539 for r in xrange(p, -1, -1):
564 for r in xrange(p, -1, -1):
540 v = i[r][7]
565 v = i[r][7]
541 n[v] = r
566 n[v] = r
542 if v == node:
567 if v == node:
543 self._nodepos = r - 1
568 self._nodepos = r - 1
544 return r
569 return r
545 if node == wdirid:
570 if node == wdirid:
546 raise error.WdirUnsupported
571 raise error.WdirUnsupported
547 raise LookupError(node, self.indexfile, _('no node'))
572 raise LookupError(node, self.indexfile, _('no node'))
548
573
549 # Accessors for index entries.
574 # Accessors for index entries.
550
575
551 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
576 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
552 # are flags.
577 # are flags.
553 def start(self, rev):
578 def start(self, rev):
554 return int(self.index[rev][0] >> 16)
579 return int(self.index[rev][0] >> 16)
555
580
556 def flags(self, rev):
581 def flags(self, rev):
557 return self.index[rev][0] & 0xFFFF
582 return self.index[rev][0] & 0xFFFF
558
583
559 def length(self, rev):
584 def length(self, rev):
560 return self.index[rev][1]
585 return self.index[rev][1]
561
586
562 def rawsize(self, rev):
587 def rawsize(self, rev):
563 """return the length of the uncompressed text for a given revision"""
588 """return the length of the uncompressed text for a given revision"""
564 l = self.index[rev][2]
589 l = self.index[rev][2]
565 if l >= 0:
590 if l >= 0:
566 return l
591 return l
567
592
568 t = self.revision(rev, raw=True)
593 t = self.revision(rev, raw=True)
569 return len(t)
594 return len(t)
570
595
571 def size(self, rev):
596 def size(self, rev):
572 """length of non-raw text (processed by a "read" flag processor)"""
597 """length of non-raw text (processed by a "read" flag processor)"""
573 # fast path: if no "read" flag processor could change the content,
598 # fast path: if no "read" flag processor could change the content,
574 # size is rawsize. note: ELLIPSIS is known to not change the content.
599 # size is rawsize. note: ELLIPSIS is known to not change the content.
575 flags = self.flags(rev)
600 flags = self.flags(rev)
576 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
601 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
577 return self.rawsize(rev)
602 return self.rawsize(rev)
578
603
579 return len(self.revision(rev, raw=False))
604 return len(self.revision(rev, raw=False))
580
605
581 def chainbase(self, rev):
606 def chainbase(self, rev):
582 base = self._chainbasecache.get(rev)
607 base = self._chainbasecache.get(rev)
583 if base is not None:
608 if base is not None:
584 return base
609 return base
585
610
586 index = self.index
611 index = self.index
587 base = index[rev][3]
612 base = index[rev][3]
588 while base != rev:
613 while base != rev:
589 rev = base
614 rev = base
590 base = index[rev][3]
615 base = index[rev][3]
591
616
592 self._chainbasecache[rev] = base
617 self._chainbasecache[rev] = base
593 return base
618 return base
594
619
595 def linkrev(self, rev):
620 def linkrev(self, rev):
596 return self.index[rev][4]
621 return self.index[rev][4]
597
622
598 def parentrevs(self, rev):
623 def parentrevs(self, rev):
599 try:
624 try:
600 return self.index[rev][5:7]
625 return self.index[rev][5:7]
601 except IndexError:
626 except IndexError:
602 if rev == wdirrev:
627 if rev == wdirrev:
603 raise error.WdirUnsupported
628 raise error.WdirUnsupported
604 raise
629 raise
605
630
606 def node(self, rev):
631 def node(self, rev):
607 try:
632 try:
608 return self.index[rev][7]
633 return self.index[rev][7]
609 except IndexError:
634 except IndexError:
610 if rev == wdirrev:
635 if rev == wdirrev:
611 raise error.WdirUnsupported
636 raise error.WdirUnsupported
612 raise
637 raise
613
638
614 # Derived from index values.
639 # Derived from index values.
615
640
616 def end(self, rev):
641 def end(self, rev):
617 return self.start(rev) + self.length(rev)
642 return self.start(rev) + self.length(rev)
618
643
619 def parents(self, node):
644 def parents(self, node):
620 i = self.index
645 i = self.index
621 d = i[self.rev(node)]
646 d = i[self.rev(node)]
622 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
647 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
623
648
624 def chainlen(self, rev):
649 def chainlen(self, rev):
625 return self._chaininfo(rev)[0]
650 return self._chaininfo(rev)[0]
626
651
627 def _chaininfo(self, rev):
652 def _chaininfo(self, rev):
628 chaininfocache = self._chaininfocache
653 chaininfocache = self._chaininfocache
629 if rev in chaininfocache:
654 if rev in chaininfocache:
630 return chaininfocache[rev]
655 return chaininfocache[rev]
631 index = self.index
656 index = self.index
632 generaldelta = self._generaldelta
657 generaldelta = self._generaldelta
633 iterrev = rev
658 iterrev = rev
634 e = index[iterrev]
659 e = index[iterrev]
635 clen = 0
660 clen = 0
636 compresseddeltalen = 0
661 compresseddeltalen = 0
637 while iterrev != e[3]:
662 while iterrev != e[3]:
638 clen += 1
663 clen += 1
639 compresseddeltalen += e[1]
664 compresseddeltalen += e[1]
640 if generaldelta:
665 if generaldelta:
641 iterrev = e[3]
666 iterrev = e[3]
642 else:
667 else:
643 iterrev -= 1
668 iterrev -= 1
644 if iterrev in chaininfocache:
669 if iterrev in chaininfocache:
645 t = chaininfocache[iterrev]
670 t = chaininfocache[iterrev]
646 clen += t[0]
671 clen += t[0]
647 compresseddeltalen += t[1]
672 compresseddeltalen += t[1]
648 break
673 break
649 e = index[iterrev]
674 e = index[iterrev]
650 else:
675 else:
651 # Add text length of base since decompressing that also takes
676 # Add text length of base since decompressing that also takes
652 # work. For cache hits the length is already included.
677 # work. For cache hits the length is already included.
653 compresseddeltalen += e[1]
678 compresseddeltalen += e[1]
654 r = (clen, compresseddeltalen)
679 r = (clen, compresseddeltalen)
655 chaininfocache[rev] = r
680 chaininfocache[rev] = r
656 return r
681 return r
657
682
658 def _deltachain(self, rev, stoprev=None):
683 def _deltachain(self, rev, stoprev=None):
659 """Obtain the delta chain for a revision.
684 """Obtain the delta chain for a revision.
660
685
661 ``stoprev`` specifies a revision to stop at. If not specified, we
686 ``stoprev`` specifies a revision to stop at. If not specified, we
662 stop at the base of the chain.
687 stop at the base of the chain.
663
688
664 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
689 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
665 revs in ascending order and ``stopped`` is a bool indicating whether
690 revs in ascending order and ``stopped`` is a bool indicating whether
666 ``stoprev`` was hit.
691 ``stoprev`` was hit.
667 """
692 """
668 # Try C implementation.
693 # Try C implementation.
669 try:
694 try:
670 return self.index.deltachain(rev, stoprev, self._generaldelta)
695 return self.index.deltachain(rev, stoprev, self._generaldelta)
671 except AttributeError:
696 except AttributeError:
672 pass
697 pass
673
698
674 chain = []
699 chain = []
675
700
676 # Alias to prevent attribute lookup in tight loop.
701 # Alias to prevent attribute lookup in tight loop.
677 index = self.index
702 index = self.index
678 generaldelta = self._generaldelta
703 generaldelta = self._generaldelta
679
704
680 iterrev = rev
705 iterrev = rev
681 e = index[iterrev]
706 e = index[iterrev]
682 while iterrev != e[3] and iterrev != stoprev:
707 while iterrev != e[3] and iterrev != stoprev:
683 chain.append(iterrev)
708 chain.append(iterrev)
684 if generaldelta:
709 if generaldelta:
685 iterrev = e[3]
710 iterrev = e[3]
686 else:
711 else:
687 iterrev -= 1
712 iterrev -= 1
688 e = index[iterrev]
713 e = index[iterrev]
689
714
690 if iterrev == stoprev:
715 if iterrev == stoprev:
691 stopped = True
716 stopped = True
692 else:
717 else:
693 chain.append(iterrev)
718 chain.append(iterrev)
694 stopped = False
719 stopped = False
695
720
696 chain.reverse()
721 chain.reverse()
697 return chain, stopped
722 return chain, stopped
698
723
699 def ancestors(self, revs, stoprev=0, inclusive=False):
724 def ancestors(self, revs, stoprev=0, inclusive=False):
700 """Generate the ancestors of 'revs' in reverse topological order.
725 """Generate the ancestors of 'revs' in reverse topological order.
701 Does not generate revs lower than stoprev.
726 Does not generate revs lower than stoprev.
702
727
703 See the documentation for ancestor.lazyancestors for more details."""
728 See the documentation for ancestor.lazyancestors for more details."""
704
729
705 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
730 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
706 inclusive=inclusive)
731 inclusive=inclusive)
707
732
708 def descendants(self, revs):
733 def descendants(self, revs):
709 """Generate the descendants of 'revs' in revision order.
734 """Generate the descendants of 'revs' in revision order.
710
735
711 Yield a sequence of revision numbers starting with a child of
736 Yield a sequence of revision numbers starting with a child of
712 some rev in revs, i.e., each revision is *not* considered a
737 some rev in revs, i.e., each revision is *not* considered a
713 descendant of itself. Results are ordered by revision number (a
738 descendant of itself. Results are ordered by revision number (a
714 topological sort)."""
739 topological sort)."""
715 first = min(revs)
740 first = min(revs)
716 if first == nullrev:
741 if first == nullrev:
717 for i in self:
742 for i in self:
718 yield i
743 yield i
719 return
744 return
720
745
721 seen = set(revs)
746 seen = set(revs)
722 for i in self.revs(start=first + 1):
747 for i in self.revs(start=first + 1):
723 for x in self.parentrevs(i):
748 for x in self.parentrevs(i):
724 if x != nullrev and x in seen:
749 if x != nullrev and x in seen:
725 seen.add(i)
750 seen.add(i)
726 yield i
751 yield i
727 break
752 break
728
753
729 def findcommonmissing(self, common=None, heads=None):
754 def findcommonmissing(self, common=None, heads=None):
730 """Return a tuple of the ancestors of common and the ancestors of heads
755 """Return a tuple of the ancestors of common and the ancestors of heads
731 that are not ancestors of common. In revset terminology, we return the
756 that are not ancestors of common. In revset terminology, we return the
732 tuple:
757 tuple:
733
758
734 ::common, (::heads) - (::common)
759 ::common, (::heads) - (::common)
735
760
736 The list is sorted by revision number, meaning it is
761 The list is sorted by revision number, meaning it is
737 topologically sorted.
762 topologically sorted.
738
763
739 'heads' and 'common' are both lists of node IDs. If heads is
764 'heads' and 'common' are both lists of node IDs. If heads is
740 not supplied, uses all of the revlog's heads. If common is not
765 not supplied, uses all of the revlog's heads. If common is not
741 supplied, uses nullid."""
766 supplied, uses nullid."""
742 if common is None:
767 if common is None:
743 common = [nullid]
768 common = [nullid]
744 if heads is None:
769 if heads is None:
745 heads = self.heads()
770 heads = self.heads()
746
771
747 common = [self.rev(n) for n in common]
772 common = [self.rev(n) for n in common]
748 heads = [self.rev(n) for n in heads]
773 heads = [self.rev(n) for n in heads]
749
774
750 # we want the ancestors, but inclusive
775 # we want the ancestors, but inclusive
751 class lazyset(object):
776 class lazyset(object):
752 def __init__(self, lazyvalues):
777 def __init__(self, lazyvalues):
753 self.addedvalues = set()
778 self.addedvalues = set()
754 self.lazyvalues = lazyvalues
779 self.lazyvalues = lazyvalues
755
780
756 def __contains__(self, value):
781 def __contains__(self, value):
757 return value in self.addedvalues or value in self.lazyvalues
782 return value in self.addedvalues or value in self.lazyvalues
758
783
759 def __iter__(self):
784 def __iter__(self):
760 added = self.addedvalues
785 added = self.addedvalues
761 for r in added:
786 for r in added:
762 yield r
787 yield r
763 for r in self.lazyvalues:
788 for r in self.lazyvalues:
764 if not r in added:
789 if not r in added:
765 yield r
790 yield r
766
791
767 def add(self, value):
792 def add(self, value):
768 self.addedvalues.add(value)
793 self.addedvalues.add(value)
769
794
770 def update(self, values):
795 def update(self, values):
771 self.addedvalues.update(values)
796 self.addedvalues.update(values)
772
797
773 has = lazyset(self.ancestors(common))
798 has = lazyset(self.ancestors(common))
774 has.add(nullrev)
799 has.add(nullrev)
775 has.update(common)
800 has.update(common)
776
801
777 # take all ancestors from heads that aren't in has
802 # take all ancestors from heads that aren't in has
778 missing = set()
803 missing = set()
779 visit = collections.deque(r for r in heads if r not in has)
804 visit = collections.deque(r for r in heads if r not in has)
780 while visit:
805 while visit:
781 r = visit.popleft()
806 r = visit.popleft()
782 if r in missing:
807 if r in missing:
783 continue
808 continue
784 else:
809 else:
785 missing.add(r)
810 missing.add(r)
786 for p in self.parentrevs(r):
811 for p in self.parentrevs(r):
787 if p not in has:
812 if p not in has:
788 visit.append(p)
813 visit.append(p)
789 missing = list(missing)
814 missing = list(missing)
790 missing.sort()
815 missing.sort()
791 return has, [self.node(miss) for miss in missing]
816 return has, [self.node(miss) for miss in missing]
792
817
793 def incrementalmissingrevs(self, common=None):
818 def incrementalmissingrevs(self, common=None):
794 """Return an object that can be used to incrementally compute the
819 """Return an object that can be used to incrementally compute the
795 revision numbers of the ancestors of arbitrary sets that are not
820 revision numbers of the ancestors of arbitrary sets that are not
796 ancestors of common. This is an ancestor.incrementalmissingancestors
821 ancestors of common. This is an ancestor.incrementalmissingancestors
797 object.
822 object.
798
823
799 'common' is a list of revision numbers. If common is not supplied, uses
824 'common' is a list of revision numbers. If common is not supplied, uses
800 nullrev.
825 nullrev.
801 """
826 """
802 if common is None:
827 if common is None:
803 common = [nullrev]
828 common = [nullrev]
804
829
805 return ancestor.incrementalmissingancestors(self.parentrevs, common)
830 return ancestor.incrementalmissingancestors(self.parentrevs, common)
806
831
807 def findmissingrevs(self, common=None, heads=None):
832 def findmissingrevs(self, common=None, heads=None):
808 """Return the revision numbers of the ancestors of heads that
833 """Return the revision numbers of the ancestors of heads that
809 are not ancestors of common.
834 are not ancestors of common.
810
835
811 More specifically, return a list of revision numbers corresponding to
836 More specifically, return a list of revision numbers corresponding to
812 nodes N such that every N satisfies the following constraints:
837 nodes N such that every N satisfies the following constraints:
813
838
814 1. N is an ancestor of some node in 'heads'
839 1. N is an ancestor of some node in 'heads'
815 2. N is not an ancestor of any node in 'common'
840 2. N is not an ancestor of any node in 'common'
816
841
817 The list is sorted by revision number, meaning it is
842 The list is sorted by revision number, meaning it is
818 topologically sorted.
843 topologically sorted.
819
844
820 'heads' and 'common' are both lists of revision numbers. If heads is
845 'heads' and 'common' are both lists of revision numbers. If heads is
821 not supplied, uses all of the revlog's heads. If common is not
846 not supplied, uses all of the revlog's heads. If common is not
822 supplied, uses nullid."""
847 supplied, uses nullid."""
823 if common is None:
848 if common is None:
824 common = [nullrev]
849 common = [nullrev]
825 if heads is None:
850 if heads is None:
826 heads = self.headrevs()
851 heads = self.headrevs()
827
852
828 inc = self.incrementalmissingrevs(common=common)
853 inc = self.incrementalmissingrevs(common=common)
829 return inc.missingancestors(heads)
854 return inc.missingancestors(heads)
830
855
831 def findmissing(self, common=None, heads=None):
856 def findmissing(self, common=None, heads=None):
832 """Return the ancestors of heads that are not ancestors of common.
857 """Return the ancestors of heads that are not ancestors of common.
833
858
834 More specifically, return a list of nodes N such that every N
859 More specifically, return a list of nodes N such that every N
835 satisfies the following constraints:
860 satisfies the following constraints:
836
861
837 1. N is an ancestor of some node in 'heads'
862 1. N is an ancestor of some node in 'heads'
838 2. N is not an ancestor of any node in 'common'
863 2. N is not an ancestor of any node in 'common'
839
864
840 The list is sorted by revision number, meaning it is
865 The list is sorted by revision number, meaning it is
841 topologically sorted.
866 topologically sorted.
842
867
843 'heads' and 'common' are both lists of node IDs. If heads is
868 'heads' and 'common' are both lists of node IDs. If heads is
844 not supplied, uses all of the revlog's heads. If common is not
869 not supplied, uses all of the revlog's heads. If common is not
845 supplied, uses nullid."""
870 supplied, uses nullid."""
846 if common is None:
871 if common is None:
847 common = [nullid]
872 common = [nullid]
848 if heads is None:
873 if heads is None:
849 heads = self.heads()
874 heads = self.heads()
850
875
851 common = [self.rev(n) for n in common]
876 common = [self.rev(n) for n in common]
852 heads = [self.rev(n) for n in heads]
877 heads = [self.rev(n) for n in heads]
853
878
854 inc = self.incrementalmissingrevs(common=common)
879 inc = self.incrementalmissingrevs(common=common)
855 return [self.node(r) for r in inc.missingancestors(heads)]
880 return [self.node(r) for r in inc.missingancestors(heads)]
856
881
857 def nodesbetween(self, roots=None, heads=None):
882 def nodesbetween(self, roots=None, heads=None):
858 """Return a topological path from 'roots' to 'heads'.
883 """Return a topological path from 'roots' to 'heads'.
859
884
860 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
885 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
861 topologically sorted list of all nodes N that satisfy both of
886 topologically sorted list of all nodes N that satisfy both of
862 these constraints:
887 these constraints:
863
888
864 1. N is a descendant of some node in 'roots'
889 1. N is a descendant of some node in 'roots'
865 2. N is an ancestor of some node in 'heads'
890 2. N is an ancestor of some node in 'heads'
866
891
867 Every node is considered to be both a descendant and an ancestor
892 Every node is considered to be both a descendant and an ancestor
868 of itself, so every reachable node in 'roots' and 'heads' will be
893 of itself, so every reachable node in 'roots' and 'heads' will be
869 included in 'nodes'.
894 included in 'nodes'.
870
895
871 'outroots' is the list of reachable nodes in 'roots', i.e., the
896 'outroots' is the list of reachable nodes in 'roots', i.e., the
872 subset of 'roots' that is returned in 'nodes'. Likewise,
897 subset of 'roots' that is returned in 'nodes'. Likewise,
873 'outheads' is the subset of 'heads' that is also in 'nodes'.
898 'outheads' is the subset of 'heads' that is also in 'nodes'.
874
899
875 'roots' and 'heads' are both lists of node IDs. If 'roots' is
900 'roots' and 'heads' are both lists of node IDs. If 'roots' is
876 unspecified, uses nullid as the only root. If 'heads' is
901 unspecified, uses nullid as the only root. If 'heads' is
877 unspecified, uses list of all of the revlog's heads."""
902 unspecified, uses list of all of the revlog's heads."""
878 nonodes = ([], [], [])
903 nonodes = ([], [], [])
879 if roots is not None:
904 if roots is not None:
880 roots = list(roots)
905 roots = list(roots)
881 if not roots:
906 if not roots:
882 return nonodes
907 return nonodes
883 lowestrev = min([self.rev(n) for n in roots])
908 lowestrev = min([self.rev(n) for n in roots])
884 else:
909 else:
885 roots = [nullid] # Everybody's a descendant of nullid
910 roots = [nullid] # Everybody's a descendant of nullid
886 lowestrev = nullrev
911 lowestrev = nullrev
887 if (lowestrev == nullrev) and (heads is None):
912 if (lowestrev == nullrev) and (heads is None):
888 # We want _all_ the nodes!
913 # We want _all_ the nodes!
889 return ([self.node(r) for r in self], [nullid], list(self.heads()))
914 return ([self.node(r) for r in self], [nullid], list(self.heads()))
890 if heads is None:
915 if heads is None:
891 # All nodes are ancestors, so the latest ancestor is the last
916 # All nodes are ancestors, so the latest ancestor is the last
892 # node.
917 # node.
893 highestrev = len(self) - 1
918 highestrev = len(self) - 1
894 # Set ancestors to None to signal that every node is an ancestor.
919 # Set ancestors to None to signal that every node is an ancestor.
895 ancestors = None
920 ancestors = None
896 # Set heads to an empty dictionary for later discovery of heads
921 # Set heads to an empty dictionary for later discovery of heads
897 heads = {}
922 heads = {}
898 else:
923 else:
899 heads = list(heads)
924 heads = list(heads)
900 if not heads:
925 if not heads:
901 return nonodes
926 return nonodes
902 ancestors = set()
927 ancestors = set()
903 # Turn heads into a dictionary so we can remove 'fake' heads.
928 # Turn heads into a dictionary so we can remove 'fake' heads.
904 # Also, later we will be using it to filter out the heads we can't
929 # Also, later we will be using it to filter out the heads we can't
905 # find from roots.
930 # find from roots.
906 heads = dict.fromkeys(heads, False)
931 heads = dict.fromkeys(heads, False)
907 # Start at the top and keep marking parents until we're done.
932 # Start at the top and keep marking parents until we're done.
908 nodestotag = set(heads)
933 nodestotag = set(heads)
909 # Remember where the top was so we can use it as a limit later.
934 # Remember where the top was so we can use it as a limit later.
910 highestrev = max([self.rev(n) for n in nodestotag])
935 highestrev = max([self.rev(n) for n in nodestotag])
911 while nodestotag:
936 while nodestotag:
912 # grab a node to tag
937 # grab a node to tag
913 n = nodestotag.pop()
938 n = nodestotag.pop()
914 # Never tag nullid
939 # Never tag nullid
915 if n == nullid:
940 if n == nullid:
916 continue
941 continue
917 # A node's revision number represents its place in a
942 # A node's revision number represents its place in a
918 # topologically sorted list of nodes.
943 # topologically sorted list of nodes.
919 r = self.rev(n)
944 r = self.rev(n)
920 if r >= lowestrev:
945 if r >= lowestrev:
921 if n not in ancestors:
946 if n not in ancestors:
922 # If we are possibly a descendant of one of the roots
947 # If we are possibly a descendant of one of the roots
923 # and we haven't already been marked as an ancestor
948 # and we haven't already been marked as an ancestor
924 ancestors.add(n) # Mark as ancestor
949 ancestors.add(n) # Mark as ancestor
925 # Add non-nullid parents to list of nodes to tag.
950 # Add non-nullid parents to list of nodes to tag.
926 nodestotag.update([p for p in self.parents(n) if
951 nodestotag.update([p for p in self.parents(n) if
927 p != nullid])
952 p != nullid])
928 elif n in heads: # We've seen it before, is it a fake head?
953 elif n in heads: # We've seen it before, is it a fake head?
929 # So it is, real heads should not be the ancestors of
954 # So it is, real heads should not be the ancestors of
930 # any other heads.
955 # any other heads.
931 heads.pop(n)
956 heads.pop(n)
932 if not ancestors:
957 if not ancestors:
933 return nonodes
958 return nonodes
934 # Now that we have our set of ancestors, we want to remove any
959 # Now that we have our set of ancestors, we want to remove any
935 # roots that are not ancestors.
960 # roots that are not ancestors.
936
961
937 # If one of the roots was nullid, everything is included anyway.
962 # If one of the roots was nullid, everything is included anyway.
938 if lowestrev > nullrev:
963 if lowestrev > nullrev:
939 # But, since we weren't, let's recompute the lowest rev to not
964 # But, since we weren't, let's recompute the lowest rev to not
940 # include roots that aren't ancestors.
965 # include roots that aren't ancestors.
941
966
942 # Filter out roots that aren't ancestors of heads
967 # Filter out roots that aren't ancestors of heads
943 roots = [root for root in roots if root in ancestors]
968 roots = [root for root in roots if root in ancestors]
944 # Recompute the lowest revision
969 # Recompute the lowest revision
945 if roots:
970 if roots:
946 lowestrev = min([self.rev(root) for root in roots])
971 lowestrev = min([self.rev(root) for root in roots])
947 else:
972 else:
948 # No more roots? Return empty list
973 # No more roots? Return empty list
949 return nonodes
974 return nonodes
950 else:
975 else:
951 # We are descending from nullid, and don't need to care about
976 # We are descending from nullid, and don't need to care about
952 # any other roots.
977 # any other roots.
953 lowestrev = nullrev
978 lowestrev = nullrev
954 roots = [nullid]
979 roots = [nullid]
955 # Transform our roots list into a set.
980 # Transform our roots list into a set.
956 descendants = set(roots)
981 descendants = set(roots)
957 # Also, keep the original roots so we can filter out roots that aren't
982 # Also, keep the original roots so we can filter out roots that aren't
958 # 'real' roots (i.e. are descended from other roots).
983 # 'real' roots (i.e. are descended from other roots).
959 roots = descendants.copy()
984 roots = descendants.copy()
960 # Our topologically sorted list of output nodes.
985 # Our topologically sorted list of output nodes.
961 orderedout = []
986 orderedout = []
962 # Don't start at nullid since we don't want nullid in our output list,
987 # Don't start at nullid since we don't want nullid in our output list,
963 # and if nullid shows up in descendants, empty parents will look like
988 # and if nullid shows up in descendants, empty parents will look like
964 # they're descendants.
989 # they're descendants.
965 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
990 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
966 n = self.node(r)
991 n = self.node(r)
967 isdescendant = False
992 isdescendant = False
968 if lowestrev == nullrev: # Everybody is a descendant of nullid
993 if lowestrev == nullrev: # Everybody is a descendant of nullid
969 isdescendant = True
994 isdescendant = True
970 elif n in descendants:
995 elif n in descendants:
971 # n is already a descendant
996 # n is already a descendant
972 isdescendant = True
997 isdescendant = True
973 # This check only needs to be done here because all the roots
998 # This check only needs to be done here because all the roots
974 # will start being marked is descendants before the loop.
999 # will start being marked is descendants before the loop.
975 if n in roots:
1000 if n in roots:
976 # If n was a root, check if it's a 'real' root.
1001 # If n was a root, check if it's a 'real' root.
977 p = tuple(self.parents(n))
1002 p = tuple(self.parents(n))
978 # If any of its parents are descendants, it's not a root.
1003 # If any of its parents are descendants, it's not a root.
979 if (p[0] in descendants) or (p[1] in descendants):
1004 if (p[0] in descendants) or (p[1] in descendants):
980 roots.remove(n)
1005 roots.remove(n)
981 else:
1006 else:
982 p = tuple(self.parents(n))
1007 p = tuple(self.parents(n))
983 # A node is a descendant if either of its parents are
1008 # A node is a descendant if either of its parents are
984 # descendants. (We seeded the dependents list with the roots
1009 # descendants. (We seeded the dependents list with the roots
985 # up there, remember?)
1010 # up there, remember?)
986 if (p[0] in descendants) or (p[1] in descendants):
1011 if (p[0] in descendants) or (p[1] in descendants):
987 descendants.add(n)
1012 descendants.add(n)
988 isdescendant = True
1013 isdescendant = True
989 if isdescendant and ((ancestors is None) or (n in ancestors)):
1014 if isdescendant and ((ancestors is None) or (n in ancestors)):
990 # Only include nodes that are both descendants and ancestors.
1015 # Only include nodes that are both descendants and ancestors.
991 orderedout.append(n)
1016 orderedout.append(n)
992 if (ancestors is not None) and (n in heads):
1017 if (ancestors is not None) and (n in heads):
993 # We're trying to figure out which heads are reachable
1018 # We're trying to figure out which heads are reachable
994 # from roots.
1019 # from roots.
995 # Mark this head as having been reached
1020 # Mark this head as having been reached
996 heads[n] = True
1021 heads[n] = True
997 elif ancestors is None:
1022 elif ancestors is None:
998 # Otherwise, we're trying to discover the heads.
1023 # Otherwise, we're trying to discover the heads.
999 # Assume this is a head because if it isn't, the next step
1024 # Assume this is a head because if it isn't, the next step
1000 # will eventually remove it.
1025 # will eventually remove it.
1001 heads[n] = True
1026 heads[n] = True
1002 # But, obviously its parents aren't.
1027 # But, obviously its parents aren't.
1003 for p in self.parents(n):
1028 for p in self.parents(n):
1004 heads.pop(p, None)
1029 heads.pop(p, None)
1005 heads = [head for head, flag in heads.iteritems() if flag]
1030 heads = [head for head, flag in heads.iteritems() if flag]
1006 roots = list(roots)
1031 roots = list(roots)
1007 assert orderedout
1032 assert orderedout
1008 assert roots
1033 assert roots
1009 assert heads
1034 assert heads
1010 return (orderedout, roots, heads)
1035 return (orderedout, roots, heads)
1011
1036
1012 def headrevs(self):
1037 def headrevs(self):
1013 try:
1038 try:
1014 return self.index.headrevs()
1039 return self.index.headrevs()
1015 except AttributeError:
1040 except AttributeError:
1016 return self._headrevs()
1041 return self._headrevs()
1017
1042
1018 def computephases(self, roots):
1043 def computephases(self, roots):
1019 return self.index.computephasesmapsets(roots)
1044 return self.index.computephasesmapsets(roots)
1020
1045
1021 def _headrevs(self):
1046 def _headrevs(self):
1022 count = len(self)
1047 count = len(self)
1023 if not count:
1048 if not count:
1024 return [nullrev]
1049 return [nullrev]
1025 # we won't iter over filtered rev so nobody is a head at start
1050 # we won't iter over filtered rev so nobody is a head at start
1026 ishead = [0] * (count + 1)
1051 ishead = [0] * (count + 1)
1027 index = self.index
1052 index = self.index
1028 for r in self:
1053 for r in self:
1029 ishead[r] = 1 # I may be an head
1054 ishead[r] = 1 # I may be an head
1030 e = index[r]
1055 e = index[r]
1031 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1056 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1032 return [r for r, val in enumerate(ishead) if val]
1057 return [r for r, val in enumerate(ishead) if val]
1033
1058
1034 def heads(self, start=None, stop=None):
1059 def heads(self, start=None, stop=None):
1035 """return the list of all nodes that have no children
1060 """return the list of all nodes that have no children
1036
1061
1037 if start is specified, only heads that are descendants of
1062 if start is specified, only heads that are descendants of
1038 start will be returned
1063 start will be returned
1039 if stop is specified, it will consider all the revs from stop
1064 if stop is specified, it will consider all the revs from stop
1040 as if they had no children
1065 as if they had no children
1041 """
1066 """
1042 if start is None and stop is None:
1067 if start is None and stop is None:
1043 if not len(self):
1068 if not len(self):
1044 return [nullid]
1069 return [nullid]
1045 return [self.node(r) for r in self.headrevs()]
1070 return [self.node(r) for r in self.headrevs()]
1046
1071
1047 if start is None:
1072 if start is None:
1048 start = nullid
1073 start = nullid
1049 if stop is None:
1074 if stop is None:
1050 stop = []
1075 stop = []
1051 stoprevs = set([self.rev(n) for n in stop])
1076 stoprevs = set([self.rev(n) for n in stop])
1052 startrev = self.rev(start)
1077 startrev = self.rev(start)
1053 reachable = {startrev}
1078 reachable = {startrev}
1054 heads = {startrev}
1079 heads = {startrev}
1055
1080
1056 parentrevs = self.parentrevs
1081 parentrevs = self.parentrevs
1057 for r in self.revs(start=startrev + 1):
1082 for r in self.revs(start=startrev + 1):
1058 for p in parentrevs(r):
1083 for p in parentrevs(r):
1059 if p in reachable:
1084 if p in reachable:
1060 if r not in stoprevs:
1085 if r not in stoprevs:
1061 reachable.add(r)
1086 reachable.add(r)
1062 heads.add(r)
1087 heads.add(r)
1063 if p in heads and p not in stoprevs:
1088 if p in heads and p not in stoprevs:
1064 heads.remove(p)
1089 heads.remove(p)
1065
1090
1066 return [self.node(r) for r in heads]
1091 return [self.node(r) for r in heads]
1067
1092
1068 def children(self, node):
1093 def children(self, node):
1069 """find the children of a given node"""
1094 """find the children of a given node"""
1070 c = []
1095 c = []
1071 p = self.rev(node)
1096 p = self.rev(node)
1072 for r in self.revs(start=p + 1):
1097 for r in self.revs(start=p + 1):
1073 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1098 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1074 if prevs:
1099 if prevs:
1075 for pr in prevs:
1100 for pr in prevs:
1076 if pr == p:
1101 if pr == p:
1077 c.append(self.node(r))
1102 c.append(self.node(r))
1078 elif p == nullrev:
1103 elif p == nullrev:
1079 c.append(self.node(r))
1104 c.append(self.node(r))
1080 return c
1105 return c
1081
1106
1082 def descendant(self, start, end):
1107 def descendant(self, start, end):
1083 if start == nullrev:
1108 if start == nullrev:
1084 return True
1109 return True
1085 for i in self.descendants([start]):
1110 for i in self.descendants([start]):
1086 if i == end:
1111 if i == end:
1087 return True
1112 return True
1088 elif i > end:
1113 elif i > end:
1089 break
1114 break
1090 return False
1115 return False
1091
1116
1092 def commonancestorsheads(self, a, b):
1117 def commonancestorsheads(self, a, b):
1093 """calculate all the heads of the common ancestors of nodes a and b"""
1118 """calculate all the heads of the common ancestors of nodes a and b"""
1094 a, b = self.rev(a), self.rev(b)
1119 a, b = self.rev(a), self.rev(b)
1095 try:
1120 try:
1096 ancs = self.index.commonancestorsheads(a, b)
1121 ancs = self.index.commonancestorsheads(a, b)
1097 except (AttributeError, OverflowError): # C implementation failed
1122 except (AttributeError, OverflowError): # C implementation failed
1098 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1123 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1099 return pycompat.maplist(self.node, ancs)
1124 return pycompat.maplist(self.node, ancs)
1100
1125
1101 def isancestor(self, a, b):
1126 def isancestor(self, a, b):
1102 """return True if node a is an ancestor of node b
1127 """return True if node a is an ancestor of node b
1103
1128
1104 The implementation of this is trivial but the use of
1129 The implementation of this is trivial but the use of
1105 commonancestorsheads is not."""
1130 commonancestorsheads is not."""
1106 return a in self.commonancestorsheads(a, b)
1131 return a in self.commonancestorsheads(a, b)
1107
1132
1108 def ancestor(self, a, b):
1133 def ancestor(self, a, b):
1109 """calculate the "best" common ancestor of nodes a and b"""
1134 """calculate the "best" common ancestor of nodes a and b"""
1110
1135
1111 a, b = self.rev(a), self.rev(b)
1136 a, b = self.rev(a), self.rev(b)
1112 try:
1137 try:
1113 ancs = self.index.ancestors(a, b)
1138 ancs = self.index.ancestors(a, b)
1114 except (AttributeError, OverflowError):
1139 except (AttributeError, OverflowError):
1115 ancs = ancestor.ancestors(self.parentrevs, a, b)
1140 ancs = ancestor.ancestors(self.parentrevs, a, b)
1116 if ancs:
1141 if ancs:
1117 # choose a consistent winner when there's a tie
1142 # choose a consistent winner when there's a tie
1118 return min(map(self.node, ancs))
1143 return min(map(self.node, ancs))
1119 return nullid
1144 return nullid
1120
1145
1121 def _match(self, id):
1146 def _match(self, id):
1122 if isinstance(id, int):
1147 if isinstance(id, int):
1123 # rev
1148 # rev
1124 return self.node(id)
1149 return self.node(id)
1125 if len(id) == 20:
1150 if len(id) == 20:
1126 # possibly a binary node
1151 # possibly a binary node
1127 # odds of a binary node being all hex in ASCII are 1 in 10**25
1152 # odds of a binary node being all hex in ASCII are 1 in 10**25
1128 try:
1153 try:
1129 node = id
1154 node = id
1130 self.rev(node) # quick search the index
1155 self.rev(node) # quick search the index
1131 return node
1156 return node
1132 except LookupError:
1157 except LookupError:
1133 pass # may be partial hex id
1158 pass # may be partial hex id
1134 try:
1159 try:
1135 # str(rev)
1160 # str(rev)
1136 rev = int(id)
1161 rev = int(id)
1137 if str(rev) != id:
1162 if str(rev) != id:
1138 raise ValueError
1163 raise ValueError
1139 if rev < 0:
1164 if rev < 0:
1140 rev = len(self) + rev
1165 rev = len(self) + rev
1141 if rev < 0 or rev >= len(self):
1166 if rev < 0 or rev >= len(self):
1142 raise ValueError
1167 raise ValueError
1143 return self.node(rev)
1168 return self.node(rev)
1144 except (ValueError, OverflowError):
1169 except (ValueError, OverflowError):
1145 pass
1170 pass
1146 if len(id) == 40:
1171 if len(id) == 40:
1147 try:
1172 try:
1148 # a full hex nodeid?
1173 # a full hex nodeid?
1149 node = bin(id)
1174 node = bin(id)
1150 self.rev(node)
1175 self.rev(node)
1151 return node
1176 return node
1152 except (TypeError, LookupError):
1177 except (TypeError, LookupError):
1153 pass
1178 pass
1154
1179
1155 def _partialmatch(self, id):
1180 def _partialmatch(self, id):
1156 maybewdir = wdirhex.startswith(id)
1181 maybewdir = wdirhex.startswith(id)
1157 try:
1182 try:
1158 partial = self.index.partialmatch(id)
1183 partial = self.index.partialmatch(id)
1159 if partial and self.hasnode(partial):
1184 if partial and self.hasnode(partial):
1160 if maybewdir:
1185 if maybewdir:
1161 # single 'ff...' match in radix tree, ambiguous with wdir
1186 # single 'ff...' match in radix tree, ambiguous with wdir
1162 raise RevlogError
1187 raise RevlogError
1163 return partial
1188 return partial
1164 if maybewdir:
1189 if maybewdir:
1165 # no 'ff...' match in radix tree, wdir identified
1190 # no 'ff...' match in radix tree, wdir identified
1166 raise error.WdirUnsupported
1191 raise error.WdirUnsupported
1167 return None
1192 return None
1168 except RevlogError:
1193 except RevlogError:
1169 # parsers.c radix tree lookup gave multiple matches
1194 # parsers.c radix tree lookup gave multiple matches
1170 # fast path: for unfiltered changelog, radix tree is accurate
1195 # fast path: for unfiltered changelog, radix tree is accurate
1171 if not getattr(self, 'filteredrevs', None):
1196 if not getattr(self, 'filteredrevs', None):
1172 raise LookupError(id, self.indexfile,
1197 raise LookupError(id, self.indexfile,
1173 _('ambiguous identifier'))
1198 _('ambiguous identifier'))
1174 # fall through to slow path that filters hidden revisions
1199 # fall through to slow path that filters hidden revisions
1175 except (AttributeError, ValueError):
1200 except (AttributeError, ValueError):
1176 # we are pure python, or key was too short to search radix tree
1201 # we are pure python, or key was too short to search radix tree
1177 pass
1202 pass
1178
1203
1179 if id in self._pcache:
1204 if id in self._pcache:
1180 return self._pcache[id]
1205 return self._pcache[id]
1181
1206
1182 if len(id) < 40:
1207 if len(id) < 40:
1183 try:
1208 try:
1184 # hex(node)[:...]
1209 # hex(node)[:...]
1185 l = len(id) // 2 # grab an even number of digits
1210 l = len(id) // 2 # grab an even number of digits
1186 prefix = bin(id[:l * 2])
1211 prefix = bin(id[:l * 2])
1187 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1212 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1188 nl = [n for n in nl if hex(n).startswith(id) and
1213 nl = [n for n in nl if hex(n).startswith(id) and
1189 self.hasnode(n)]
1214 self.hasnode(n)]
1190 if len(nl) > 0:
1215 if len(nl) > 0:
1191 if len(nl) == 1 and not maybewdir:
1216 if len(nl) == 1 and not maybewdir:
1192 self._pcache[id] = nl[0]
1217 self._pcache[id] = nl[0]
1193 return nl[0]
1218 return nl[0]
1194 raise LookupError(id, self.indexfile,
1219 raise LookupError(id, self.indexfile,
1195 _('ambiguous identifier'))
1220 _('ambiguous identifier'))
1196 if maybewdir:
1221 if maybewdir:
1197 raise error.WdirUnsupported
1222 raise error.WdirUnsupported
1198 return None
1223 return None
1199 except (TypeError, binascii.Error):
1224 except (TypeError, binascii.Error):
1200 pass
1225 pass
1201
1226
1202 def lookup(self, id):
1227 def lookup(self, id):
1203 """locate a node based on:
1228 """locate a node based on:
1204 - revision number or str(revision number)
1229 - revision number or str(revision number)
1205 - nodeid or subset of hex nodeid
1230 - nodeid or subset of hex nodeid
1206 """
1231 """
1207 n = self._match(id)
1232 n = self._match(id)
1208 if n is not None:
1233 if n is not None:
1209 return n
1234 return n
1210 n = self._partialmatch(id)
1235 n = self._partialmatch(id)
1211 if n:
1236 if n:
1212 return n
1237 return n
1213
1238
1214 raise LookupError(id, self.indexfile, _('no match found'))
1239 raise LookupError(id, self.indexfile, _('no match found'))
1215
1240
1216 def shortest(self, hexnode, minlength=1):
1241 def shortest(self, hexnode, minlength=1):
1217 """Find the shortest unambiguous prefix that matches hexnode."""
1242 """Find the shortest unambiguous prefix that matches hexnode."""
1218 def isvalid(test):
1243 def isvalid(test):
1219 try:
1244 try:
1220 if self._partialmatch(test) is None:
1245 if self._partialmatch(test) is None:
1221 return False
1246 return False
1222
1247
1223 try:
1248 try:
1224 i = int(test)
1249 i = int(test)
1225 # if we are a pure int, then starting with zero will not be
1250 # if we are a pure int, then starting with zero will not be
1226 # confused as a rev; or, obviously, if the int is larger
1251 # confused as a rev; or, obviously, if the int is larger
1227 # than the value of the tip rev
1252 # than the value of the tip rev
1228 if test[0] == '0' or i > len(self):
1253 if test[0] == '0' or i > len(self):
1229 return True
1254 return True
1230 return False
1255 return False
1231 except ValueError:
1256 except ValueError:
1232 return True
1257 return True
1233 except error.RevlogError:
1258 except error.RevlogError:
1234 return False
1259 return False
1235 except error.WdirUnsupported:
1260 except error.WdirUnsupported:
1236 # single 'ff...' match
1261 # single 'ff...' match
1237 return True
1262 return True
1238
1263
1239 shortest = hexnode
1264 shortest = hexnode
1240 startlength = max(6, minlength)
1265 startlength = max(6, minlength)
1241 length = startlength
1266 length = startlength
1242 while True:
1267 while True:
1243 test = hexnode[:length]
1268 test = hexnode[:length]
1244 if isvalid(test):
1269 if isvalid(test):
1245 shortest = test
1270 shortest = test
1246 if length == minlength or length > startlength:
1271 if length == minlength or length > startlength:
1247 return shortest
1272 return shortest
1248 length -= 1
1273 length -= 1
1249 else:
1274 else:
1250 length += 1
1275 length += 1
1251 if len(shortest) <= length:
1276 if len(shortest) <= length:
1252 return shortest
1277 return shortest
1253
1278
1254 def cmp(self, node, text):
1279 def cmp(self, node, text):
1255 """compare text with a given file revision
1280 """compare text with a given file revision
1256
1281
1257 returns True if text is different than what is stored.
1282 returns True if text is different than what is stored.
1258 """
1283 """
1259 p1, p2 = self.parents(node)
1284 p1, p2 = self.parents(node)
1260 return hash(text, p1, p2) != node
1285 return hash(text, p1, p2) != node
1261
1286
1262 def _cachesegment(self, offset, data):
1287 def _cachesegment(self, offset, data):
1263 """Add a segment to the revlog cache.
1288 """Add a segment to the revlog cache.
1264
1289
1265 Accepts an absolute offset and the data that is at that location.
1290 Accepts an absolute offset and the data that is at that location.
1266 """
1291 """
1267 o, d = self._chunkcache
1292 o, d = self._chunkcache
1268 # try to add to existing cache
1293 # try to add to existing cache
1269 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1294 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1270 self._chunkcache = o, d + data
1295 self._chunkcache = o, d + data
1271 else:
1296 else:
1272 self._chunkcache = offset, data
1297 self._chunkcache = offset, data
1273
1298
1274 def _readsegment(self, offset, length, df=None):
1299 def _readsegment(self, offset, length, df=None):
1275 """Load a segment of raw data from the revlog.
1300 """Load a segment of raw data from the revlog.
1276
1301
1277 Accepts an absolute offset, length to read, and an optional existing
1302 Accepts an absolute offset, length to read, and an optional existing
1278 file handle to read from.
1303 file handle to read from.
1279
1304
1280 If an existing file handle is passed, it will be seeked and the
1305 If an existing file handle is passed, it will be seeked and the
1281 original seek position will NOT be restored.
1306 original seek position will NOT be restored.
1282
1307
1283 Returns a str or buffer of raw byte data.
1308 Returns a str or buffer of raw byte data.
1284 """
1309 """
1285 if df is not None:
1310 if df is not None:
1286 closehandle = False
1311 closehandle = False
1287 else:
1312 else:
1288 if self._inline:
1313 if self._inline:
1289 df = self.opener(self.indexfile)
1314 df = self.opener(self.indexfile)
1290 else:
1315 else:
1291 df = self.opener(self.datafile)
1316 df = self.opener(self.datafile)
1292 closehandle = True
1317 closehandle = True
1293
1318
1294 # Cache data both forward and backward around the requested
1319 # Cache data both forward and backward around the requested
1295 # data, in a fixed size window. This helps speed up operations
1320 # data, in a fixed size window. This helps speed up operations
1296 # involving reading the revlog backwards.
1321 # involving reading the revlog backwards.
1297 cachesize = self._chunkcachesize
1322 cachesize = self._chunkcachesize
1298 realoffset = offset & ~(cachesize - 1)
1323 realoffset = offset & ~(cachesize - 1)
1299 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1324 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1300 - realoffset)
1325 - realoffset)
1301 df.seek(realoffset)
1326 df.seek(realoffset)
1302 d = df.read(reallength)
1327 d = df.read(reallength)
1303 if closehandle:
1328 if closehandle:
1304 df.close()
1329 df.close()
1305 self._cachesegment(realoffset, d)
1330 self._cachesegment(realoffset, d)
1306 if offset != realoffset or reallength != length:
1331 if offset != realoffset or reallength != length:
1307 return util.buffer(d, offset - realoffset, length)
1332 return util.buffer(d, offset - realoffset, length)
1308 return d
1333 return d
1309
1334
1310 def _getsegment(self, offset, length, df=None):
1335 def _getsegment(self, offset, length, df=None):
1311 """Obtain a segment of raw data from the revlog.
1336 """Obtain a segment of raw data from the revlog.
1312
1337
1313 Accepts an absolute offset, length of bytes to obtain, and an
1338 Accepts an absolute offset, length of bytes to obtain, and an
1314 optional file handle to the already-opened revlog. If the file
1339 optional file handle to the already-opened revlog. If the file
1315 handle is used, it's original seek position will not be preserved.
1340 handle is used, it's original seek position will not be preserved.
1316
1341
1317 Requests for data may be returned from a cache.
1342 Requests for data may be returned from a cache.
1318
1343
1319 Returns a str or a buffer instance of raw byte data.
1344 Returns a str or a buffer instance of raw byte data.
1320 """
1345 """
1321 o, d = self._chunkcache
1346 o, d = self._chunkcache
1322 l = len(d)
1347 l = len(d)
1323
1348
1324 # is it in the cache?
1349 # is it in the cache?
1325 cachestart = offset - o
1350 cachestart = offset - o
1326 cacheend = cachestart + length
1351 cacheend = cachestart + length
1327 if cachestart >= 0 and cacheend <= l:
1352 if cachestart >= 0 and cacheend <= l:
1328 if cachestart == 0 and cacheend == l:
1353 if cachestart == 0 and cacheend == l:
1329 return d # avoid a copy
1354 return d # avoid a copy
1330 return util.buffer(d, cachestart, cacheend - cachestart)
1355 return util.buffer(d, cachestart, cacheend - cachestart)
1331
1356
1332 return self._readsegment(offset, length, df=df)
1357 return self._readsegment(offset, length, df=df)
1333
1358
1334 def _getsegmentforrevs(self, startrev, endrev, df=None):
1359 def _getsegmentforrevs(self, startrev, endrev, df=None):
1335 """Obtain a segment of raw data corresponding to a range of revisions.
1360 """Obtain a segment of raw data corresponding to a range of revisions.
1336
1361
1337 Accepts the start and end revisions and an optional already-open
1362 Accepts the start and end revisions and an optional already-open
1338 file handle to be used for reading. If the file handle is read, its
1363 file handle to be used for reading. If the file handle is read, its
1339 seek position will not be preserved.
1364 seek position will not be preserved.
1340
1365
1341 Requests for data may be satisfied by a cache.
1366 Requests for data may be satisfied by a cache.
1342
1367
1343 Returns a 2-tuple of (offset, data) for the requested range of
1368 Returns a 2-tuple of (offset, data) for the requested range of
1344 revisions. Offset is the integer offset from the beginning of the
1369 revisions. Offset is the integer offset from the beginning of the
1345 revlog and data is a str or buffer of the raw byte data.
1370 revlog and data is a str or buffer of the raw byte data.
1346
1371
1347 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1372 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1348 to determine where each revision's data begins and ends.
1373 to determine where each revision's data begins and ends.
1349 """
1374 """
1350 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1375 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1351 # (functions are expensive).
1376 # (functions are expensive).
1352 index = self.index
1377 index = self.index
1353 istart = index[startrev]
1378 istart = index[startrev]
1354 start = int(istart[0] >> 16)
1379 start = int(istart[0] >> 16)
1355 if startrev == endrev:
1380 if startrev == endrev:
1356 end = start + istart[1]
1381 end = start + istart[1]
1357 else:
1382 else:
1358 iend = index[endrev]
1383 iend = index[endrev]
1359 end = int(iend[0] >> 16) + iend[1]
1384 end = int(iend[0] >> 16) + iend[1]
1360
1385
1361 if self._inline:
1386 if self._inline:
1362 start += (startrev + 1) * self._io.size
1387 start += (startrev + 1) * self._io.size
1363 end += (endrev + 1) * self._io.size
1388 end += (endrev + 1) * self._io.size
1364 length = end - start
1389 length = end - start
1365
1390
1366 return start, self._getsegment(start, length, df=df)
1391 return start, self._getsegment(start, length, df=df)
1367
1392
1368 def _chunk(self, rev, df=None):
1393 def _chunk(self, rev, df=None):
1369 """Obtain a single decompressed chunk for a revision.
1394 """Obtain a single decompressed chunk for a revision.
1370
1395
1371 Accepts an integer revision and an optional already-open file handle
1396 Accepts an integer revision and an optional already-open file handle
1372 to be used for reading. If used, the seek position of the file will not
1397 to be used for reading. If used, the seek position of the file will not
1373 be preserved.
1398 be preserved.
1374
1399
1375 Returns a str holding uncompressed data for the requested revision.
1400 Returns a str holding uncompressed data for the requested revision.
1376 """
1401 """
1377 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1402 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1378
1403
1379 def _chunks(self, revs, df=None):
1404 def _chunks(self, revs, df=None):
1380 """Obtain decompressed chunks for the specified revisions.
1405 """Obtain decompressed chunks for the specified revisions.
1381
1406
1382 Accepts an iterable of numeric revisions that are assumed to be in
1407 Accepts an iterable of numeric revisions that are assumed to be in
1383 ascending order. Also accepts an optional already-open file handle
1408 ascending order. Also accepts an optional already-open file handle
1384 to be used for reading. If used, the seek position of the file will
1409 to be used for reading. If used, the seek position of the file will
1385 not be preserved.
1410 not be preserved.
1386
1411
1387 This function is similar to calling ``self._chunk()`` multiple times,
1412 This function is similar to calling ``self._chunk()`` multiple times,
1388 but is faster.
1413 but is faster.
1389
1414
1390 Returns a list with decompressed data for each requested revision.
1415 Returns a list with decompressed data for each requested revision.
1391 """
1416 """
1392 if not revs:
1417 if not revs:
1393 return []
1418 return []
1394 start = self.start
1419 start = self.start
1395 length = self.length
1420 length = self.length
1396 inline = self._inline
1421 inline = self._inline
1397 iosize = self._io.size
1422 iosize = self._io.size
1398 buffer = util.buffer
1423 buffer = util.buffer
1399
1424
1400 l = []
1425 l = []
1401 ladd = l.append
1426 ladd = l.append
1402
1427
1403 if not self._withsparseread:
1428 if not self._withsparseread:
1404 slicedchunks = (revs,)
1429 slicedchunks = (revs,)
1405 else:
1430 else:
1406 slicedchunks = _slicechunk(self, revs)
1431 slicedchunks = _slicechunk(self, revs)
1407
1432
1408 for revschunk in slicedchunks:
1433 for revschunk in slicedchunks:
1409 firstrev = revschunk[0]
1434 firstrev = revschunk[0]
1410 # Skip trailing revisions with empty diff
1435 # Skip trailing revisions with empty diff
1411 for lastrev in revschunk[::-1]:
1436 for lastrev in revschunk[::-1]:
1412 if length(lastrev) != 0:
1437 if length(lastrev) != 0:
1413 break
1438 break
1414
1439
1415 try:
1440 try:
1416 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1441 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1417 except OverflowError:
1442 except OverflowError:
1418 # issue4215 - we can't cache a run of chunks greater than
1443 # issue4215 - we can't cache a run of chunks greater than
1419 # 2G on Windows
1444 # 2G on Windows
1420 return [self._chunk(rev, df=df) for rev in revschunk]
1445 return [self._chunk(rev, df=df) for rev in revschunk]
1421
1446
1422 decomp = self.decompress
1447 decomp = self.decompress
1423 for rev in revschunk:
1448 for rev in revschunk:
1424 chunkstart = start(rev)
1449 chunkstart = start(rev)
1425 if inline:
1450 if inline:
1426 chunkstart += (rev + 1) * iosize
1451 chunkstart += (rev + 1) * iosize
1427 chunklength = length(rev)
1452 chunklength = length(rev)
1428 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1453 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1429
1454
1430 return l
1455 return l
1431
1456
1432 def _chunkclear(self):
1457 def _chunkclear(self):
1433 """Clear the raw chunk cache."""
1458 """Clear the raw chunk cache."""
1434 self._chunkcache = (0, '')
1459 self._chunkcache = (0, '')
1435
1460
1436 def deltaparent(self, rev):
1461 def deltaparent(self, rev):
1437 """return deltaparent of the given revision"""
1462 """return deltaparent of the given revision"""
1438 base = self.index[rev][3]
1463 base = self.index[rev][3]
1439 if base == rev:
1464 if base == rev:
1440 return nullrev
1465 return nullrev
1441 elif self._generaldelta:
1466 elif self._generaldelta:
1442 return base
1467 return base
1443 else:
1468 else:
1444 return rev - 1
1469 return rev - 1
1445
1470
1446 def revdiff(self, rev1, rev2):
1471 def revdiff(self, rev1, rev2):
1447 """return or calculate a delta between two revisions
1472 """return or calculate a delta between two revisions
1448
1473
1449 The delta calculated is in binary form and is intended to be written to
1474 The delta calculated is in binary form and is intended to be written to
1450 revlog data directly. So this function needs raw revision data.
1475 revlog data directly. So this function needs raw revision data.
1451 """
1476 """
1452 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1477 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1453 return bytes(self._chunk(rev2))
1478 return bytes(self._chunk(rev2))
1454
1479
1455 return mdiff.textdiff(self.revision(rev1, raw=True),
1480 return mdiff.textdiff(self.revision(rev1, raw=True),
1456 self.revision(rev2, raw=True))
1481 self.revision(rev2, raw=True))
1457
1482
1458 def revision(self, nodeorrev, _df=None, raw=False):
1483 def revision(self, nodeorrev, _df=None, raw=False):
1459 """return an uncompressed revision of a given node or revision
1484 """return an uncompressed revision of a given node or revision
1460 number.
1485 number.
1461
1486
1462 _df - an existing file handle to read from. (internal-only)
1487 _df - an existing file handle to read from. (internal-only)
1463 raw - an optional argument specifying if the revision data is to be
1488 raw - an optional argument specifying if the revision data is to be
1464 treated as raw data when applying flag transforms. 'raw' should be set
1489 treated as raw data when applying flag transforms. 'raw' should be set
1465 to True when generating changegroups or in debug commands.
1490 to True when generating changegroups or in debug commands.
1466 """
1491 """
1467 if isinstance(nodeorrev, int):
1492 if isinstance(nodeorrev, int):
1468 rev = nodeorrev
1493 rev = nodeorrev
1469 node = self.node(rev)
1494 node = self.node(rev)
1470 else:
1495 else:
1471 node = nodeorrev
1496 node = nodeorrev
1472 rev = None
1497 rev = None
1473
1498
1474 cachedrev = None
1499 cachedrev = None
1475 flags = None
1500 flags = None
1476 rawtext = None
1501 rawtext = None
1477 if node == nullid:
1502 if node == nullid:
1478 return ""
1503 return ""
1479 if self._cache:
1504 if self._cache:
1480 if self._cache[0] == node:
1505 if self._cache[0] == node:
1481 # _cache only stores rawtext
1506 # _cache only stores rawtext
1482 if raw:
1507 if raw:
1483 return self._cache[2]
1508 return self._cache[2]
1484 # duplicated, but good for perf
1509 # duplicated, but good for perf
1485 if rev is None:
1510 if rev is None:
1486 rev = self.rev(node)
1511 rev = self.rev(node)
1487 if flags is None:
1512 if flags is None:
1488 flags = self.flags(rev)
1513 flags = self.flags(rev)
1489 # no extra flags set, no flag processor runs, text = rawtext
1514 # no extra flags set, no flag processor runs, text = rawtext
1490 if flags == REVIDX_DEFAULT_FLAGS:
1515 if flags == REVIDX_DEFAULT_FLAGS:
1491 return self._cache[2]
1516 return self._cache[2]
1492 # rawtext is reusable. need to run flag processor
1517 # rawtext is reusable. need to run flag processor
1493 rawtext = self._cache[2]
1518 rawtext = self._cache[2]
1494
1519
1495 cachedrev = self._cache[1]
1520 cachedrev = self._cache[1]
1496
1521
1497 # look up what we need to read
1522 # look up what we need to read
1498 if rawtext is None:
1523 if rawtext is None:
1499 if rev is None:
1524 if rev is None:
1500 rev = self.rev(node)
1525 rev = self.rev(node)
1501
1526
1502 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1527 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1503 if stopped:
1528 if stopped:
1504 rawtext = self._cache[2]
1529 rawtext = self._cache[2]
1505
1530
1506 # drop cache to save memory
1531 # drop cache to save memory
1507 self._cache = None
1532 self._cache = None
1508
1533
1509 bins = self._chunks(chain, df=_df)
1534 bins = self._chunks(chain, df=_df)
1510 if rawtext is None:
1535 if rawtext is None:
1511 rawtext = bytes(bins[0])
1536 rawtext = bytes(bins[0])
1512 bins = bins[1:]
1537 bins = bins[1:]
1513
1538
1514 rawtext = mdiff.patches(rawtext, bins)
1539 rawtext = mdiff.patches(rawtext, bins)
1515 self._cache = (node, rev, rawtext)
1540 self._cache = (node, rev, rawtext)
1516
1541
1517 if flags is None:
1542 if flags is None:
1518 if rev is None:
1543 if rev is None:
1519 rev = self.rev(node)
1544 rev = self.rev(node)
1520 flags = self.flags(rev)
1545 flags = self.flags(rev)
1521
1546
1522 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1547 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1523 if validatehash:
1548 if validatehash:
1524 self.checkhash(text, node, rev=rev)
1549 self.checkhash(text, node, rev=rev)
1525
1550
1526 return text
1551 return text
1527
1552
1528 def hash(self, text, p1, p2):
1553 def hash(self, text, p1, p2):
1529 """Compute a node hash.
1554 """Compute a node hash.
1530
1555
1531 Available as a function so that subclasses can replace the hash
1556 Available as a function so that subclasses can replace the hash
1532 as needed.
1557 as needed.
1533 """
1558 """
1534 return hash(text, p1, p2)
1559 return hash(text, p1, p2)
1535
1560
1536 def _processflags(self, text, flags, operation, raw=False):
1561 def _processflags(self, text, flags, operation, raw=False):
1537 """Inspect revision data flags and applies transforms defined by
1562 """Inspect revision data flags and applies transforms defined by
1538 registered flag processors.
1563 registered flag processors.
1539
1564
1540 ``text`` - the revision data to process
1565 ``text`` - the revision data to process
1541 ``flags`` - the revision flags
1566 ``flags`` - the revision flags
1542 ``operation`` - the operation being performed (read or write)
1567 ``operation`` - the operation being performed (read or write)
1543 ``raw`` - an optional argument describing if the raw transform should be
1568 ``raw`` - an optional argument describing if the raw transform should be
1544 applied.
1569 applied.
1545
1570
1546 This method processes the flags in the order (or reverse order if
1571 This method processes the flags in the order (or reverse order if
1547 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1572 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1548 flag processors registered for present flags. The order of flags defined
1573 flag processors registered for present flags. The order of flags defined
1549 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1574 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1550
1575
1551 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1576 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1552 processed text and ``validatehash`` is a bool indicating whether the
1577 processed text and ``validatehash`` is a bool indicating whether the
1553 returned text should be checked for hash integrity.
1578 returned text should be checked for hash integrity.
1554
1579
1555 Note: If the ``raw`` argument is set, it has precedence over the
1580 Note: If the ``raw`` argument is set, it has precedence over the
1556 operation and will only update the value of ``validatehash``.
1581 operation and will only update the value of ``validatehash``.
1557 """
1582 """
1558 # fast path: no flag processors will run
1583 # fast path: no flag processors will run
1559 if flags == 0:
1584 if flags == 0:
1560 return text, True
1585 return text, True
1561 if not operation in ('read', 'write'):
1586 if not operation in ('read', 'write'):
1562 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1587 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1563 # Check all flags are known.
1588 # Check all flags are known.
1564 if flags & ~REVIDX_KNOWN_FLAGS:
1589 if flags & ~REVIDX_KNOWN_FLAGS:
1565 raise RevlogError(_("incompatible revision flag '%#x'") %
1590 raise RevlogError(_("incompatible revision flag '%#x'") %
1566 (flags & ~REVIDX_KNOWN_FLAGS))
1591 (flags & ~REVIDX_KNOWN_FLAGS))
1567 validatehash = True
1592 validatehash = True
1568 # Depending on the operation (read or write), the order might be
1593 # Depending on the operation (read or write), the order might be
1569 # reversed due to non-commutative transforms.
1594 # reversed due to non-commutative transforms.
1570 orderedflags = REVIDX_FLAGS_ORDER
1595 orderedflags = REVIDX_FLAGS_ORDER
1571 if operation == 'write':
1596 if operation == 'write':
1572 orderedflags = reversed(orderedflags)
1597 orderedflags = reversed(orderedflags)
1573
1598
1574 for flag in orderedflags:
1599 for flag in orderedflags:
1575 # If a flagprocessor has been registered for a known flag, apply the
1600 # If a flagprocessor has been registered for a known flag, apply the
1576 # related operation transform and update result tuple.
1601 # related operation transform and update result tuple.
1577 if flag & flags:
1602 if flag & flags:
1578 vhash = True
1603 vhash = True
1579
1604
1580 if flag not in _flagprocessors:
1605 if flag not in _flagprocessors:
1581 message = _("missing processor for flag '%#x'") % (flag)
1606 message = _("missing processor for flag '%#x'") % (flag)
1582 raise RevlogError(message)
1607 raise RevlogError(message)
1583
1608
1584 processor = _flagprocessors[flag]
1609 processor = _flagprocessors[flag]
1585 if processor is not None:
1610 if processor is not None:
1586 readtransform, writetransform, rawtransform = processor
1611 readtransform, writetransform, rawtransform = processor
1587
1612
1588 if raw:
1613 if raw:
1589 vhash = rawtransform(self, text)
1614 vhash = rawtransform(self, text)
1590 elif operation == 'read':
1615 elif operation == 'read':
1591 text, vhash = readtransform(self, text)
1616 text, vhash = readtransform(self, text)
1592 else: # write operation
1617 else: # write operation
1593 text, vhash = writetransform(self, text)
1618 text, vhash = writetransform(self, text)
1594 validatehash = validatehash and vhash
1619 validatehash = validatehash and vhash
1595
1620
1596 return text, validatehash
1621 return text, validatehash
1597
1622
1598 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1623 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1599 """Check node hash integrity.
1624 """Check node hash integrity.
1600
1625
1601 Available as a function so that subclasses can extend hash mismatch
1626 Available as a function so that subclasses can extend hash mismatch
1602 behaviors as needed.
1627 behaviors as needed.
1603 """
1628 """
1604 if p1 is None and p2 is None:
1629 if p1 is None and p2 is None:
1605 p1, p2 = self.parents(node)
1630 p1, p2 = self.parents(node)
1606 if node != self.hash(text, p1, p2):
1631 if node != self.hash(text, p1, p2):
1607 revornode = rev
1632 revornode = rev
1608 if revornode is None:
1633 if revornode is None:
1609 revornode = templatefilters.short(hex(node))
1634 revornode = templatefilters.short(hex(node))
1610 raise RevlogError(_("integrity check failed on %s:%s")
1635 raise RevlogError(_("integrity check failed on %s:%s")
1611 % (self.indexfile, pycompat.bytestr(revornode)))
1636 % (self.indexfile, pycompat.bytestr(revornode)))
1612
1637
1613 def checkinlinesize(self, tr, fp=None):
1638 def checkinlinesize(self, tr, fp=None):
1614 """Check if the revlog is too big for inline and convert if so.
1639 """Check if the revlog is too big for inline and convert if so.
1615
1640
1616 This should be called after revisions are added to the revlog. If the
1641 This should be called after revisions are added to the revlog. If the
1617 revlog has grown too large to be an inline revlog, it will convert it
1642 revlog has grown too large to be an inline revlog, it will convert it
1618 to use multiple index and data files.
1643 to use multiple index and data files.
1619 """
1644 """
1620 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1645 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1621 return
1646 return
1622
1647
1623 trinfo = tr.find(self.indexfile)
1648 trinfo = tr.find(self.indexfile)
1624 if trinfo is None:
1649 if trinfo is None:
1625 raise RevlogError(_("%s not found in the transaction")
1650 raise RevlogError(_("%s not found in the transaction")
1626 % self.indexfile)
1651 % self.indexfile)
1627
1652
1628 trindex = trinfo[2]
1653 trindex = trinfo[2]
1629 if trindex is not None:
1654 if trindex is not None:
1630 dataoff = self.start(trindex)
1655 dataoff = self.start(trindex)
1631 else:
1656 else:
1632 # revlog was stripped at start of transaction, use all leftover data
1657 # revlog was stripped at start of transaction, use all leftover data
1633 trindex = len(self) - 1
1658 trindex = len(self) - 1
1634 dataoff = self.end(-2)
1659 dataoff = self.end(-2)
1635
1660
1636 tr.add(self.datafile, dataoff)
1661 tr.add(self.datafile, dataoff)
1637
1662
1638 if fp:
1663 if fp:
1639 fp.flush()
1664 fp.flush()
1640 fp.close()
1665 fp.close()
1641
1666
1642 df = self.opener(self.datafile, 'w')
1667 df = self.opener(self.datafile, 'w')
1643 try:
1668 try:
1644 for r in self:
1669 for r in self:
1645 df.write(self._getsegmentforrevs(r, r)[1])
1670 df.write(self._getsegmentforrevs(r, r)[1])
1646 finally:
1671 finally:
1647 df.close()
1672 df.close()
1648
1673
1649 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1674 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1650 checkambig=self._checkambig)
1675 checkambig=self._checkambig)
1651 self.version &= ~FLAG_INLINE_DATA
1676 self.version &= ~FLAG_INLINE_DATA
1652 self._inline = False
1677 self._inline = False
1653 for i in self:
1678 for i in self:
1654 e = self._io.packentry(self.index[i], self.node, self.version, i)
1679 e = self._io.packentry(self.index[i], self.node, self.version, i)
1655 fp.write(e)
1680 fp.write(e)
1656
1681
1657 # if we don't call close, the temp file will never replace the
1682 # if we don't call close, the temp file will never replace the
1658 # real index
1683 # real index
1659 fp.close()
1684 fp.close()
1660
1685
1661 tr.replace(self.indexfile, trindex * self._io.size)
1686 tr.replace(self.indexfile, trindex * self._io.size)
1662 self._chunkclear()
1687 self._chunkclear()
1663
1688
1664 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1689 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1665 node=None, flags=REVIDX_DEFAULT_FLAGS):
1690 node=None, flags=REVIDX_DEFAULT_FLAGS):
1666 """add a revision to the log
1691 """add a revision to the log
1667
1692
1668 text - the revision data to add
1693 text - the revision data to add
1669 transaction - the transaction object used for rollback
1694 transaction - the transaction object used for rollback
1670 link - the linkrev data to add
1695 link - the linkrev data to add
1671 p1, p2 - the parent nodeids of the revision
1696 p1, p2 - the parent nodeids of the revision
1672 cachedelta - an optional precomputed delta
1697 cachedelta - an optional precomputed delta
1673 node - nodeid of revision; typically node is not specified, and it is
1698 node - nodeid of revision; typically node is not specified, and it is
1674 computed by default as hash(text, p1, p2), however subclasses might
1699 computed by default as hash(text, p1, p2), however subclasses might
1675 use different hashing method (and override checkhash() in such case)
1700 use different hashing method (and override checkhash() in such case)
1676 flags - the known flags to set on the revision
1701 flags - the known flags to set on the revision
1677 """
1702 """
1678 if link == nullrev:
1703 if link == nullrev:
1679 raise RevlogError(_("attempted to add linkrev -1 to %s")
1704 raise RevlogError(_("attempted to add linkrev -1 to %s")
1680 % self.indexfile)
1705 % self.indexfile)
1681
1706
1682 if flags:
1707 if flags:
1683 node = node or self.hash(text, p1, p2)
1708 node = node or self.hash(text, p1, p2)
1684
1709
1685 rawtext, validatehash = self._processflags(text, flags, 'write')
1710 rawtext, validatehash = self._processflags(text, flags, 'write')
1686
1711
1687 # If the flag processor modifies the revision data, ignore any provided
1712 # If the flag processor modifies the revision data, ignore any provided
1688 # cachedelta.
1713 # cachedelta.
1689 if rawtext != text:
1714 if rawtext != text:
1690 cachedelta = None
1715 cachedelta = None
1691
1716
1692 if len(rawtext) > _maxentrysize:
1717 if len(rawtext) > _maxentrysize:
1693 raise RevlogError(
1718 raise RevlogError(
1694 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1719 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1695 % (self.indexfile, len(rawtext)))
1720 % (self.indexfile, len(rawtext)))
1696
1721
1697 node = node or self.hash(rawtext, p1, p2)
1722 node = node or self.hash(rawtext, p1, p2)
1698 if node in self.nodemap:
1723 if node in self.nodemap:
1699 return node
1724 return node
1700
1725
1701 if validatehash:
1726 if validatehash:
1702 self.checkhash(rawtext, node, p1=p1, p2=p2)
1727 self.checkhash(rawtext, node, p1=p1, p2=p2)
1703
1728
1704 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1729 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1705 flags, cachedelta=cachedelta)
1730 flags, cachedelta=cachedelta)
1706
1731
1707 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1732 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1708 cachedelta=None):
1733 cachedelta=None):
1709 """add a raw revision with known flags, node and parents
1734 """add a raw revision with known flags, node and parents
1710 useful when reusing a revision not stored in this revlog (ex: received
1735 useful when reusing a revision not stored in this revlog (ex: received
1711 over wire, or read from an external bundle).
1736 over wire, or read from an external bundle).
1712 """
1737 """
1713 dfh = None
1738 dfh = None
1714 if not self._inline:
1739 if not self._inline:
1715 dfh = self.opener(self.datafile, "a+")
1740 dfh = self.opener(self.datafile, "a+")
1716 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1741 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1717 try:
1742 try:
1718 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1743 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1719 flags, cachedelta, ifh, dfh)
1744 flags, cachedelta, ifh, dfh)
1720 finally:
1745 finally:
1721 if dfh:
1746 if dfh:
1722 dfh.close()
1747 dfh.close()
1723 ifh.close()
1748 ifh.close()
1724
1749
1725 def compress(self, data):
1750 def compress(self, data):
1726 """Generate a possibly-compressed representation of data."""
1751 """Generate a possibly-compressed representation of data."""
1727 if not data:
1752 if not data:
1728 return '', data
1753 return '', data
1729
1754
1730 compressed = self._compressor.compress(data)
1755 compressed = self._compressor.compress(data)
1731
1756
1732 if compressed:
1757 if compressed:
1733 # The revlog compressor added the header in the returned data.
1758 # The revlog compressor added the header in the returned data.
1734 return '', compressed
1759 return '', compressed
1735
1760
1736 if data[0:1] == '\0':
1761 if data[0:1] == '\0':
1737 return '', data
1762 return '', data
1738 return 'u', data
1763 return 'u', data
1739
1764
1740 def decompress(self, data):
1765 def decompress(self, data):
1741 """Decompress a revlog chunk.
1766 """Decompress a revlog chunk.
1742
1767
1743 The chunk is expected to begin with a header identifying the
1768 The chunk is expected to begin with a header identifying the
1744 format type so it can be routed to an appropriate decompressor.
1769 format type so it can be routed to an appropriate decompressor.
1745 """
1770 """
1746 if not data:
1771 if not data:
1747 return data
1772 return data
1748
1773
1749 # Revlogs are read much more frequently than they are written and many
1774 # Revlogs are read much more frequently than they are written and many
1750 # chunks only take microseconds to decompress, so performance is
1775 # chunks only take microseconds to decompress, so performance is
1751 # important here.
1776 # important here.
1752 #
1777 #
1753 # We can make a few assumptions about revlogs:
1778 # We can make a few assumptions about revlogs:
1754 #
1779 #
1755 # 1) the majority of chunks will be compressed (as opposed to inline
1780 # 1) the majority of chunks will be compressed (as opposed to inline
1756 # raw data).
1781 # raw data).
1757 # 2) decompressing *any* data will likely by at least 10x slower than
1782 # 2) decompressing *any* data will likely by at least 10x slower than
1758 # returning raw inline data.
1783 # returning raw inline data.
1759 # 3) we want to prioritize common and officially supported compression
1784 # 3) we want to prioritize common and officially supported compression
1760 # engines
1785 # engines
1761 #
1786 #
1762 # It follows that we want to optimize for "decompress compressed data
1787 # It follows that we want to optimize for "decompress compressed data
1763 # when encoded with common and officially supported compression engines"
1788 # when encoded with common and officially supported compression engines"
1764 # case over "raw data" and "data encoded by less common or non-official
1789 # case over "raw data" and "data encoded by less common or non-official
1765 # compression engines." That is why we have the inline lookup first
1790 # compression engines." That is why we have the inline lookup first
1766 # followed by the compengines lookup.
1791 # followed by the compengines lookup.
1767 #
1792 #
1768 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1793 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1769 # compressed chunks. And this matters for changelog and manifest reads.
1794 # compressed chunks. And this matters for changelog and manifest reads.
1770 t = data[0:1]
1795 t = data[0:1]
1771
1796
1772 if t == 'x':
1797 if t == 'x':
1773 try:
1798 try:
1774 return _zlibdecompress(data)
1799 return _zlibdecompress(data)
1775 except zlib.error as e:
1800 except zlib.error as e:
1776 raise RevlogError(_('revlog decompress error: %s') % str(e))
1801 raise RevlogError(_('revlog decompress error: %s') % str(e))
1777 # '\0' is more common than 'u' so it goes first.
1802 # '\0' is more common than 'u' so it goes first.
1778 elif t == '\0':
1803 elif t == '\0':
1779 return data
1804 return data
1780 elif t == 'u':
1805 elif t == 'u':
1781 return util.buffer(data, 1)
1806 return util.buffer(data, 1)
1782
1807
1783 try:
1808 try:
1784 compressor = self._decompressors[t]
1809 compressor = self._decompressors[t]
1785 except KeyError:
1810 except KeyError:
1786 try:
1811 try:
1787 engine = util.compengines.forrevlogheader(t)
1812 engine = util.compengines.forrevlogheader(t)
1788 compressor = engine.revlogcompressor()
1813 compressor = engine.revlogcompressor()
1789 self._decompressors[t] = compressor
1814 self._decompressors[t] = compressor
1790 except KeyError:
1815 except KeyError:
1791 raise RevlogError(_('unknown compression type %r') % t)
1816 raise RevlogError(_('unknown compression type %r') % t)
1792
1817
1793 return compressor.decompress(data)
1818 return compressor.decompress(data)
1794
1819
1795 def _isgooddelta(self, d, textlen):
1820 def _isgooddelta(self, d, textlen):
1796 """Returns True if the given delta is good. Good means that it is within
1821 """Returns True if the given delta is good. Good means that it is within
1797 the disk span, disk size, and chain length bounds that we know to be
1822 the disk span, disk size, and chain length bounds that we know to be
1798 performant."""
1823 performant."""
1799 if d is None:
1824 if d is None:
1800 return False
1825 return False
1801
1826
1802 # - 'dist' is the distance from the base revision -- bounding it limits
1827 # - 'dist' is the distance from the base revision -- bounding it limits
1803 # the amount of I/O we need to do.
1828 # the amount of I/O we need to do.
1804 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1829 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1805 # to apply -- bounding it limits the amount of CPU we consume.
1830 # to apply -- bounding it limits the amount of CPU we consume.
1806 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1831 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1807
1832
1808 defaultmax = textlen * 4
1833 defaultmax = textlen * 4
1809 maxdist = self._maxdeltachainspan
1834 maxdist = self._maxdeltachainspan
1810 if not maxdist:
1835 if not maxdist:
1811 maxdist = dist # ensure the conditional pass
1836 maxdist = dist # ensure the conditional pass
1812 maxdist = max(maxdist, defaultmax)
1837 maxdist = max(maxdist, defaultmax)
1813 if (dist > maxdist or l > textlen or
1838 if (dist > maxdist or l > textlen or
1814 compresseddeltalen > textlen * 2 or
1839 compresseddeltalen > textlen * 2 or
1815 (self._maxchainlen and chainlen > self._maxchainlen)):
1840 (self._maxchainlen and chainlen > self._maxchainlen)):
1816 return False
1841 return False
1817
1842
1818 return True
1843 return True
1819
1844
1820 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1845 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1821 cachedelta, ifh, dfh, alwayscache=False):
1846 cachedelta, ifh, dfh, alwayscache=False):
1822 """internal function to add revisions to the log
1847 """internal function to add revisions to the log
1823
1848
1824 see addrevision for argument descriptions.
1849 see addrevision for argument descriptions.
1825
1850
1826 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1851 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1827
1852
1828 invariants:
1853 invariants:
1829 - rawtext is optional (can be None); if not set, cachedelta must be set.
1854 - rawtext is optional (can be None); if not set, cachedelta must be set.
1830 if both are set, they must correspond to each other.
1855 if both are set, they must correspond to each other.
1831 """
1856 """
1832 if node == nullid:
1857 if node == nullid:
1833 raise RevlogError(_("%s: attempt to add null revision") %
1858 raise RevlogError(_("%s: attempt to add null revision") %
1834 (self.indexfile))
1859 (self.indexfile))
1835 if node == wdirid:
1860 if node == wdirid:
1836 raise RevlogError(_("%s: attempt to add wdir revision") %
1861 raise RevlogError(_("%s: attempt to add wdir revision") %
1837 (self.indexfile))
1862 (self.indexfile))
1838
1863
1839 btext = [rawtext]
1864 btext = [rawtext]
1840 def buildtext():
1865 def buildtext():
1841 if btext[0] is not None:
1866 if btext[0] is not None:
1842 return btext[0]
1867 return btext[0]
1843 baserev = cachedelta[0]
1868 baserev = cachedelta[0]
1844 delta = cachedelta[1]
1869 delta = cachedelta[1]
1845 # special case deltas which replace entire base; no need to decode
1870 # special case deltas which replace entire base; no need to decode
1846 # base revision. this neatly avoids censored bases, which throw when
1871 # base revision. this neatly avoids censored bases, which throw when
1847 # they're decoded.
1872 # they're decoded.
1848 hlen = struct.calcsize(">lll")
1873 hlen = struct.calcsize(">lll")
1849 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1874 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1850 len(delta) - hlen):
1875 len(delta) - hlen):
1851 btext[0] = delta[hlen:]
1876 btext[0] = delta[hlen:]
1852 else:
1877 else:
1853 if self._inline:
1878 if self._inline:
1854 fh = ifh
1879 fh = ifh
1855 else:
1880 else:
1856 fh = dfh
1881 fh = dfh
1857 basetext = self.revision(baserev, _df=fh, raw=True)
1882 basetext = self.revision(baserev, _df=fh, raw=True)
1858 btext[0] = mdiff.patch(basetext, delta)
1883 btext[0] = mdiff.patch(basetext, delta)
1859
1884
1860 try:
1885 try:
1861 res = self._processflags(btext[0], flags, 'read', raw=True)
1886 res = self._processflags(btext[0], flags, 'read', raw=True)
1862 btext[0], validatehash = res
1887 btext[0], validatehash = res
1863 if validatehash:
1888 if validatehash:
1864 self.checkhash(btext[0], node, p1=p1, p2=p2)
1889 self.checkhash(btext[0], node, p1=p1, p2=p2)
1865 if flags & REVIDX_ISCENSORED:
1890 if flags & REVIDX_ISCENSORED:
1866 raise RevlogError(_('node %s is not censored') % node)
1891 raise RevlogError(_('node %s is not censored') % node)
1867 except CensoredNodeError:
1892 except CensoredNodeError:
1868 # must pass the censored index flag to add censored revisions
1893 # must pass the censored index flag to add censored revisions
1869 if not flags & REVIDX_ISCENSORED:
1894 if not flags & REVIDX_ISCENSORED:
1870 raise
1895 raise
1871 return btext[0]
1896 return btext[0]
1872
1897
1873 def builddelta(rev):
1898 def builddelta(rev):
1874 # can we use the cached delta?
1899 # can we use the cached delta?
1875 if cachedelta and cachedelta[0] == rev:
1900 if cachedelta and cachedelta[0] == rev:
1876 delta = cachedelta[1]
1901 delta = cachedelta[1]
1877 else:
1902 else:
1878 t = buildtext()
1903 t = buildtext()
1879 if self.iscensored(rev):
1904 if self.iscensored(rev):
1880 # deltas based on a censored revision must replace the
1905 # deltas based on a censored revision must replace the
1881 # full content in one patch, so delta works everywhere
1906 # full content in one patch, so delta works everywhere
1882 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1907 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1883 delta = header + t
1908 delta = header + t
1884 else:
1909 else:
1885 if self._inline:
1910 if self._inline:
1886 fh = ifh
1911 fh = ifh
1887 else:
1912 else:
1888 fh = dfh
1913 fh = dfh
1889 ptext = self.revision(rev, _df=fh, raw=True)
1914 ptext = self.revision(rev, _df=fh, raw=True)
1890 delta = mdiff.textdiff(ptext, t)
1915 delta = mdiff.textdiff(ptext, t)
1891 header, data = self.compress(delta)
1916 header, data = self.compress(delta)
1892 deltalen = len(header) + len(data)
1917 deltalen = len(header) + len(data)
1893 chainbase = self.chainbase(rev)
1918 chainbase = self.chainbase(rev)
1894 dist = deltalen + offset - self.start(chainbase)
1919 dist = deltalen + offset - self.start(chainbase)
1895 if self._generaldelta:
1920 if self._generaldelta:
1896 base = rev
1921 base = rev
1897 else:
1922 else:
1898 base = chainbase
1923 base = chainbase
1899 chainlen, compresseddeltalen = self._chaininfo(rev)
1924 chainlen, compresseddeltalen = self._chaininfo(rev)
1900 chainlen += 1
1925 chainlen += 1
1901 compresseddeltalen += deltalen
1926 compresseddeltalen += deltalen
1902 return (dist, deltalen, (header, data), base,
1927 return (dist, deltalen, (header, data), base,
1903 chainbase, chainlen, compresseddeltalen)
1928 chainbase, chainlen, compresseddeltalen)
1904
1929
1905 curr = len(self)
1930 curr = len(self)
1906 prev = curr - 1
1931 prev = curr - 1
1907 offset = self.end(prev)
1932 offset = self.end(prev)
1908 delta = None
1933 delta = None
1909 p1r, p2r = self.rev(p1), self.rev(p2)
1934 p1r, p2r = self.rev(p1), self.rev(p2)
1910
1935
1911 # full versions are inserted when the needed deltas
1936 # full versions are inserted when the needed deltas
1912 # become comparable to the uncompressed text
1937 # become comparable to the uncompressed text
1913 if rawtext is None:
1938 if rawtext is None:
1914 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1939 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1915 cachedelta[1])
1940 cachedelta[1])
1916 else:
1941 else:
1917 textlen = len(rawtext)
1942 textlen = len(rawtext)
1918
1943
1919 # should we try to build a delta?
1944 # should we try to build a delta?
1920 if prev != nullrev and self.storedeltachains:
1945 if prev != nullrev and self.storedeltachains:
1921 tested = set()
1946 tested = set()
1922 # This condition is true most of the time when processing
1947 # This condition is true most of the time when processing
1923 # changegroup data into a generaldelta repo. The only time it
1948 # changegroup data into a generaldelta repo. The only time it
1924 # isn't true is if this is the first revision in a delta chain
1949 # isn't true is if this is the first revision in a delta chain
1925 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1950 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1926 if cachedelta and self._generaldelta and self._lazydeltabase:
1951 if cachedelta and self._generaldelta and self._lazydeltabase:
1927 # Assume what we received from the server is a good choice
1952 # Assume what we received from the server is a good choice
1928 # build delta will reuse the cache
1953 # build delta will reuse the cache
1929 candidatedelta = builddelta(cachedelta[0])
1954 candidatedelta = builddelta(cachedelta[0])
1930 tested.add(cachedelta[0])
1955 tested.add(cachedelta[0])
1931 if self._isgooddelta(candidatedelta, textlen):
1956 if self._isgooddelta(candidatedelta, textlen):
1932 delta = candidatedelta
1957 delta = candidatedelta
1933 if delta is None and self._generaldelta:
1958 if delta is None and self._generaldelta:
1934 # exclude already lazy tested base if any
1959 # exclude already lazy tested base if any
1935 parents = [p for p in (p1r, p2r)
1960 parents = [p for p in (p1r, p2r)
1936 if p != nullrev and p not in tested]
1961 if p != nullrev and p not in tested]
1937 if parents and not self._aggressivemergedeltas:
1962 if parents and not self._aggressivemergedeltas:
1938 # Pick whichever parent is closer to us (to minimize the
1963 # Pick whichever parent is closer to us (to minimize the
1939 # chance of having to build a fulltext).
1964 # chance of having to build a fulltext).
1940 parents = [max(parents)]
1965 parents = [max(parents)]
1941 tested.update(parents)
1966 tested.update(parents)
1942 pdeltas = []
1967 pdeltas = []
1943 for p in parents:
1968 for p in parents:
1944 pd = builddelta(p)
1969 pd = builddelta(p)
1945 if self._isgooddelta(pd, textlen):
1970 if self._isgooddelta(pd, textlen):
1946 pdeltas.append(pd)
1971 pdeltas.append(pd)
1947 if pdeltas:
1972 if pdeltas:
1948 delta = min(pdeltas, key=lambda x: x[1])
1973 delta = min(pdeltas, key=lambda x: x[1])
1949 if delta is None and prev not in tested:
1974 if delta is None and prev not in tested:
1950 # other approach failed try against prev to hopefully save us a
1975 # other approach failed try against prev to hopefully save us a
1951 # fulltext.
1976 # fulltext.
1952 candidatedelta = builddelta(prev)
1977 candidatedelta = builddelta(prev)
1953 if self._isgooddelta(candidatedelta, textlen):
1978 if self._isgooddelta(candidatedelta, textlen):
1954 delta = candidatedelta
1979 delta = candidatedelta
1955 if delta is not None:
1980 if delta is not None:
1956 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1981 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1957 else:
1982 else:
1958 rawtext = buildtext()
1983 rawtext = buildtext()
1959 data = self.compress(rawtext)
1984 data = self.compress(rawtext)
1960 l = len(data[1]) + len(data[0])
1985 l = len(data[1]) + len(data[0])
1961 base = chainbase = curr
1986 base = chainbase = curr
1962
1987
1963 e = (offset_type(offset, flags), l, textlen,
1988 e = (offset_type(offset, flags), l, textlen,
1964 base, link, p1r, p2r, node)
1989 base, link, p1r, p2r, node)
1965 self.index.insert(-1, e)
1990 self.index.insert(-1, e)
1966 self.nodemap[node] = curr
1991 self.nodemap[node] = curr
1967
1992
1968 entry = self._io.packentry(e, self.node, self.version, curr)
1993 entry = self._io.packentry(e, self.node, self.version, curr)
1969 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1994 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1970
1995
1971 if alwayscache and rawtext is None:
1996 if alwayscache and rawtext is None:
1972 rawtext = buildtext()
1997 rawtext = buildtext()
1973
1998
1974 if type(rawtext) == str: # only accept immutable objects
1999 if type(rawtext) == str: # only accept immutable objects
1975 self._cache = (node, curr, rawtext)
2000 self._cache = (node, curr, rawtext)
1976 self._chainbasecache[curr] = chainbase
2001 self._chainbasecache[curr] = chainbase
1977 return node
2002 return node
1978
2003
1979 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2004 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1980 # Files opened in a+ mode have inconsistent behavior on various
2005 # Files opened in a+ mode have inconsistent behavior on various
1981 # platforms. Windows requires that a file positioning call be made
2006 # platforms. Windows requires that a file positioning call be made
1982 # when the file handle transitions between reads and writes. See
2007 # when the file handle transitions between reads and writes. See
1983 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2008 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1984 # platforms, Python or the platform itself can be buggy. Some versions
2009 # platforms, Python or the platform itself can be buggy. Some versions
1985 # of Solaris have been observed to not append at the end of the file
2010 # of Solaris have been observed to not append at the end of the file
1986 # if the file was seeked to before the end. See issue4943 for more.
2011 # if the file was seeked to before the end. See issue4943 for more.
1987 #
2012 #
1988 # We work around this issue by inserting a seek() before writing.
2013 # We work around this issue by inserting a seek() before writing.
1989 # Note: This is likely not necessary on Python 3.
2014 # Note: This is likely not necessary on Python 3.
1990 ifh.seek(0, os.SEEK_END)
2015 ifh.seek(0, os.SEEK_END)
1991 if dfh:
2016 if dfh:
1992 dfh.seek(0, os.SEEK_END)
2017 dfh.seek(0, os.SEEK_END)
1993
2018
1994 curr = len(self) - 1
2019 curr = len(self) - 1
1995 if not self._inline:
2020 if not self._inline:
1996 transaction.add(self.datafile, offset)
2021 transaction.add(self.datafile, offset)
1997 transaction.add(self.indexfile, curr * len(entry))
2022 transaction.add(self.indexfile, curr * len(entry))
1998 if data[0]:
2023 if data[0]:
1999 dfh.write(data[0])
2024 dfh.write(data[0])
2000 dfh.write(data[1])
2025 dfh.write(data[1])
2001 ifh.write(entry)
2026 ifh.write(entry)
2002 else:
2027 else:
2003 offset += curr * self._io.size
2028 offset += curr * self._io.size
2004 transaction.add(self.indexfile, offset, curr)
2029 transaction.add(self.indexfile, offset, curr)
2005 ifh.write(entry)
2030 ifh.write(entry)
2006 ifh.write(data[0])
2031 ifh.write(data[0])
2007 ifh.write(data[1])
2032 ifh.write(data[1])
2008 self.checkinlinesize(transaction, ifh)
2033 self.checkinlinesize(transaction, ifh)
2009
2034
2010 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2035 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2011 """
2036 """
2012 add a delta group
2037 add a delta group
2013
2038
2014 given a set of deltas, add them to the revision log. the
2039 given a set of deltas, add them to the revision log. the
2015 first delta is against its parent, which should be in our
2040 first delta is against its parent, which should be in our
2016 log, the rest are against the previous delta.
2041 log, the rest are against the previous delta.
2017
2042
2018 If ``addrevisioncb`` is defined, it will be called with arguments of
2043 If ``addrevisioncb`` is defined, it will be called with arguments of
2019 this revlog and the node that was added.
2044 this revlog and the node that was added.
2020 """
2045 """
2021
2046
2022 nodes = []
2047 nodes = []
2023
2048
2024 r = len(self)
2049 r = len(self)
2025 end = 0
2050 end = 0
2026 if r:
2051 if r:
2027 end = self.end(r - 1)
2052 end = self.end(r - 1)
2028 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2053 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2029 isize = r * self._io.size
2054 isize = r * self._io.size
2030 if self._inline:
2055 if self._inline:
2031 transaction.add(self.indexfile, end + isize, r)
2056 transaction.add(self.indexfile, end + isize, r)
2032 dfh = None
2057 dfh = None
2033 else:
2058 else:
2034 transaction.add(self.indexfile, isize, r)
2059 transaction.add(self.indexfile, isize, r)
2035 transaction.add(self.datafile, end)
2060 transaction.add(self.datafile, end)
2036 dfh = self.opener(self.datafile, "a+")
2061 dfh = self.opener(self.datafile, "a+")
2037 def flush():
2062 def flush():
2038 if dfh:
2063 if dfh:
2039 dfh.flush()
2064 dfh.flush()
2040 ifh.flush()
2065 ifh.flush()
2041 try:
2066 try:
2042 # loop through our set of deltas
2067 # loop through our set of deltas
2043 for data in deltas:
2068 for data in deltas:
2044 node, p1, p2, linknode, deltabase, delta, flags = data
2069 node, p1, p2, linknode, deltabase, delta, flags = data
2045 link = linkmapper(linknode)
2070 link = linkmapper(linknode)
2046 flags = flags or REVIDX_DEFAULT_FLAGS
2071 flags = flags or REVIDX_DEFAULT_FLAGS
2047
2072
2048 nodes.append(node)
2073 nodes.append(node)
2049
2074
2050 if node in self.nodemap:
2075 if node in self.nodemap:
2051 # this can happen if two branches make the same change
2076 # this can happen if two branches make the same change
2052 continue
2077 continue
2053
2078
2054 for p in (p1, p2):
2079 for p in (p1, p2):
2055 if p not in self.nodemap:
2080 if p not in self.nodemap:
2056 raise LookupError(p, self.indexfile,
2081 raise LookupError(p, self.indexfile,
2057 _('unknown parent'))
2082 _('unknown parent'))
2058
2083
2059 if deltabase not in self.nodemap:
2084 if deltabase not in self.nodemap:
2060 raise LookupError(deltabase, self.indexfile,
2085 raise LookupError(deltabase, self.indexfile,
2061 _('unknown delta base'))
2086 _('unknown delta base'))
2062
2087
2063 baserev = self.rev(deltabase)
2088 baserev = self.rev(deltabase)
2064
2089
2065 if baserev != nullrev and self.iscensored(baserev):
2090 if baserev != nullrev and self.iscensored(baserev):
2066 # if base is censored, delta must be full replacement in a
2091 # if base is censored, delta must be full replacement in a
2067 # single patch operation
2092 # single patch operation
2068 hlen = struct.calcsize(">lll")
2093 hlen = struct.calcsize(">lll")
2069 oldlen = self.rawsize(baserev)
2094 oldlen = self.rawsize(baserev)
2070 newlen = len(delta) - hlen
2095 newlen = len(delta) - hlen
2071 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2096 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2072 raise error.CensoredBaseError(self.indexfile,
2097 raise error.CensoredBaseError(self.indexfile,
2073 self.node(baserev))
2098 self.node(baserev))
2074
2099
2075 if not flags and self._peek_iscensored(baserev, delta, flush):
2100 if not flags and self._peek_iscensored(baserev, delta, flush):
2076 flags |= REVIDX_ISCENSORED
2101 flags |= REVIDX_ISCENSORED
2077
2102
2078 # We assume consumers of addrevisioncb will want to retrieve
2103 # We assume consumers of addrevisioncb will want to retrieve
2079 # the added revision, which will require a call to
2104 # the added revision, which will require a call to
2080 # revision(). revision() will fast path if there is a cache
2105 # revision(). revision() will fast path if there is a cache
2081 # hit. So, we tell _addrevision() to always cache in this case.
2106 # hit. So, we tell _addrevision() to always cache in this case.
2082 # We're only using addgroup() in the context of changegroup
2107 # We're only using addgroup() in the context of changegroup
2083 # generation so the revision data can always be handled as raw
2108 # generation so the revision data can always be handled as raw
2084 # by the flagprocessor.
2109 # by the flagprocessor.
2085 self._addrevision(node, None, transaction, link,
2110 self._addrevision(node, None, transaction, link,
2086 p1, p2, flags, (baserev, delta),
2111 p1, p2, flags, (baserev, delta),
2087 ifh, dfh,
2112 ifh, dfh,
2088 alwayscache=bool(addrevisioncb))
2113 alwayscache=bool(addrevisioncb))
2089
2114
2090 if addrevisioncb:
2115 if addrevisioncb:
2091 addrevisioncb(self, node)
2116 addrevisioncb(self, node)
2092
2117
2093 if not dfh and not self._inline:
2118 if not dfh and not self._inline:
2094 # addrevision switched from inline to conventional
2119 # addrevision switched from inline to conventional
2095 # reopen the index
2120 # reopen the index
2096 ifh.close()
2121 ifh.close()
2097 dfh = self.opener(self.datafile, "a+")
2122 dfh = self.opener(self.datafile, "a+")
2098 ifh = self.opener(self.indexfile, "a+",
2123 ifh = self.opener(self.indexfile, "a+",
2099 checkambig=self._checkambig)
2124 checkambig=self._checkambig)
2100 finally:
2125 finally:
2101 if dfh:
2126 if dfh:
2102 dfh.close()
2127 dfh.close()
2103 ifh.close()
2128 ifh.close()
2104
2129
2105 return nodes
2130 return nodes
2106
2131
2107 def iscensored(self, rev):
2132 def iscensored(self, rev):
2108 """Check if a file revision is censored."""
2133 """Check if a file revision is censored."""
2109 return False
2134 return False
2110
2135
2111 def _peek_iscensored(self, baserev, delta, flush):
2136 def _peek_iscensored(self, baserev, delta, flush):
2112 """Quickly check if a delta produces a censored revision."""
2137 """Quickly check if a delta produces a censored revision."""
2113 return False
2138 return False
2114
2139
2115 def getstrippoint(self, minlink):
2140 def getstrippoint(self, minlink):
2116 """find the minimum rev that must be stripped to strip the linkrev
2141 """find the minimum rev that must be stripped to strip the linkrev
2117
2142
2118 Returns a tuple containing the minimum rev and a set of all revs that
2143 Returns a tuple containing the minimum rev and a set of all revs that
2119 have linkrevs that will be broken by this strip.
2144 have linkrevs that will be broken by this strip.
2120 """
2145 """
2121 brokenrevs = set()
2146 brokenrevs = set()
2122 strippoint = len(self)
2147 strippoint = len(self)
2123
2148
2124 heads = {}
2149 heads = {}
2125 futurelargelinkrevs = set()
2150 futurelargelinkrevs = set()
2126 for head in self.headrevs():
2151 for head in self.headrevs():
2127 headlinkrev = self.linkrev(head)
2152 headlinkrev = self.linkrev(head)
2128 heads[head] = headlinkrev
2153 heads[head] = headlinkrev
2129 if headlinkrev >= minlink:
2154 if headlinkrev >= minlink:
2130 futurelargelinkrevs.add(headlinkrev)
2155 futurelargelinkrevs.add(headlinkrev)
2131
2156
2132 # This algorithm involves walking down the rev graph, starting at the
2157 # This algorithm involves walking down the rev graph, starting at the
2133 # heads. Since the revs are topologically sorted according to linkrev,
2158 # heads. Since the revs are topologically sorted according to linkrev,
2134 # once all head linkrevs are below the minlink, we know there are
2159 # once all head linkrevs are below the minlink, we know there are
2135 # no more revs that could have a linkrev greater than minlink.
2160 # no more revs that could have a linkrev greater than minlink.
2136 # So we can stop walking.
2161 # So we can stop walking.
2137 while futurelargelinkrevs:
2162 while futurelargelinkrevs:
2138 strippoint -= 1
2163 strippoint -= 1
2139 linkrev = heads.pop(strippoint)
2164 linkrev = heads.pop(strippoint)
2140
2165
2141 if linkrev < minlink:
2166 if linkrev < minlink:
2142 brokenrevs.add(strippoint)
2167 brokenrevs.add(strippoint)
2143 else:
2168 else:
2144 futurelargelinkrevs.remove(linkrev)
2169 futurelargelinkrevs.remove(linkrev)
2145
2170
2146 for p in self.parentrevs(strippoint):
2171 for p in self.parentrevs(strippoint):
2147 if p != nullrev:
2172 if p != nullrev:
2148 plinkrev = self.linkrev(p)
2173 plinkrev = self.linkrev(p)
2149 heads[p] = plinkrev
2174 heads[p] = plinkrev
2150 if plinkrev >= minlink:
2175 if plinkrev >= minlink:
2151 futurelargelinkrevs.add(plinkrev)
2176 futurelargelinkrevs.add(plinkrev)
2152
2177
2153 return strippoint, brokenrevs
2178 return strippoint, brokenrevs
2154
2179
2155 def strip(self, minlink, transaction):
2180 def strip(self, minlink, transaction):
2156 """truncate the revlog on the first revision with a linkrev >= minlink
2181 """truncate the revlog on the first revision with a linkrev >= minlink
2157
2182
2158 This function is called when we're stripping revision minlink and
2183 This function is called when we're stripping revision minlink and
2159 its descendants from the repository.
2184 its descendants from the repository.
2160
2185
2161 We have to remove all revisions with linkrev >= minlink, because
2186 We have to remove all revisions with linkrev >= minlink, because
2162 the equivalent changelog revisions will be renumbered after the
2187 the equivalent changelog revisions will be renumbered after the
2163 strip.
2188 strip.
2164
2189
2165 So we truncate the revlog on the first of these revisions, and
2190 So we truncate the revlog on the first of these revisions, and
2166 trust that the caller has saved the revisions that shouldn't be
2191 trust that the caller has saved the revisions that shouldn't be
2167 removed and that it'll re-add them after this truncation.
2192 removed and that it'll re-add them after this truncation.
2168 """
2193 """
2169 if len(self) == 0:
2194 if len(self) == 0:
2170 return
2195 return
2171
2196
2172 rev, _ = self.getstrippoint(minlink)
2197 rev, _ = self.getstrippoint(minlink)
2173 if rev == len(self):
2198 if rev == len(self):
2174 return
2199 return
2175
2200
2176 # first truncate the files on disk
2201 # first truncate the files on disk
2177 end = self.start(rev)
2202 end = self.start(rev)
2178 if not self._inline:
2203 if not self._inline:
2179 transaction.add(self.datafile, end)
2204 transaction.add(self.datafile, end)
2180 end = rev * self._io.size
2205 end = rev * self._io.size
2181 else:
2206 else:
2182 end += rev * self._io.size
2207 end += rev * self._io.size
2183
2208
2184 transaction.add(self.indexfile, end)
2209 transaction.add(self.indexfile, end)
2185
2210
2186 # then reset internal state in memory to forget those revisions
2211 # then reset internal state in memory to forget those revisions
2187 self._cache = None
2212 self._cache = None
2188 self._chaininfocache = {}
2213 self._chaininfocache = {}
2189 self._chunkclear()
2214 self._chunkclear()
2190 for x in xrange(rev, len(self)):
2215 for x in xrange(rev, len(self)):
2191 del self.nodemap[self.node(x)]
2216 del self.nodemap[self.node(x)]
2192
2217
2193 del self.index[rev:-1]
2218 del self.index[rev:-1]
2194
2219
2195 def checksize(self):
2220 def checksize(self):
2196 expected = 0
2221 expected = 0
2197 if len(self):
2222 if len(self):
2198 expected = max(0, self.end(len(self) - 1))
2223 expected = max(0, self.end(len(self) - 1))
2199
2224
2200 try:
2225 try:
2201 f = self.opener(self.datafile)
2226 f = self.opener(self.datafile)
2202 f.seek(0, 2)
2227 f.seek(0, 2)
2203 actual = f.tell()
2228 actual = f.tell()
2204 f.close()
2229 f.close()
2205 dd = actual - expected
2230 dd = actual - expected
2206 except IOError as inst:
2231 except IOError as inst:
2207 if inst.errno != errno.ENOENT:
2232 if inst.errno != errno.ENOENT:
2208 raise
2233 raise
2209 dd = 0
2234 dd = 0
2210
2235
2211 try:
2236 try:
2212 f = self.opener(self.indexfile)
2237 f = self.opener(self.indexfile)
2213 f.seek(0, 2)
2238 f.seek(0, 2)
2214 actual = f.tell()
2239 actual = f.tell()
2215 f.close()
2240 f.close()
2216 s = self._io.size
2241 s = self._io.size
2217 i = max(0, actual // s)
2242 i = max(0, actual // s)
2218 di = actual - (i * s)
2243 di = actual - (i * s)
2219 if self._inline:
2244 if self._inline:
2220 databytes = 0
2245 databytes = 0
2221 for r in self:
2246 for r in self:
2222 databytes += max(0, self.length(r))
2247 databytes += max(0, self.length(r))
2223 dd = 0
2248 dd = 0
2224 di = actual - len(self) * s - databytes
2249 di = actual - len(self) * s - databytes
2225 except IOError as inst:
2250 except IOError as inst:
2226 if inst.errno != errno.ENOENT:
2251 if inst.errno != errno.ENOENT:
2227 raise
2252 raise
2228 di = 0
2253 di = 0
2229
2254
2230 return (dd, di)
2255 return (dd, di)
2231
2256
2232 def files(self):
2257 def files(self):
2233 res = [self.indexfile]
2258 res = [self.indexfile]
2234 if not self._inline:
2259 if not self._inline:
2235 res.append(self.datafile)
2260 res.append(self.datafile)
2236 return res
2261 return res
2237
2262
2238 DELTAREUSEALWAYS = 'always'
2263 DELTAREUSEALWAYS = 'always'
2239 DELTAREUSESAMEREVS = 'samerevs'
2264 DELTAREUSESAMEREVS = 'samerevs'
2240 DELTAREUSENEVER = 'never'
2265 DELTAREUSENEVER = 'never'
2241
2266
2242 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2267 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2243
2268
2244 def clone(self, tr, destrevlog, addrevisioncb=None,
2269 def clone(self, tr, destrevlog, addrevisioncb=None,
2245 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2270 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2246 """Copy this revlog to another, possibly with format changes.
2271 """Copy this revlog to another, possibly with format changes.
2247
2272
2248 The destination revlog will contain the same revisions and nodes.
2273 The destination revlog will contain the same revisions and nodes.
2249 However, it may not be bit-for-bit identical due to e.g. delta encoding
2274 However, it may not be bit-for-bit identical due to e.g. delta encoding
2250 differences.
2275 differences.
2251
2276
2252 The ``deltareuse`` argument control how deltas from the existing revlog
2277 The ``deltareuse`` argument control how deltas from the existing revlog
2253 are preserved in the destination revlog. The argument can have the
2278 are preserved in the destination revlog. The argument can have the
2254 following values:
2279 following values:
2255
2280
2256 DELTAREUSEALWAYS
2281 DELTAREUSEALWAYS
2257 Deltas will always be reused (if possible), even if the destination
2282 Deltas will always be reused (if possible), even if the destination
2258 revlog would not select the same revisions for the delta. This is the
2283 revlog would not select the same revisions for the delta. This is the
2259 fastest mode of operation.
2284 fastest mode of operation.
2260 DELTAREUSESAMEREVS
2285 DELTAREUSESAMEREVS
2261 Deltas will be reused if the destination revlog would pick the same
2286 Deltas will be reused if the destination revlog would pick the same
2262 revisions for the delta. This mode strikes a balance between speed
2287 revisions for the delta. This mode strikes a balance between speed
2263 and optimization.
2288 and optimization.
2264 DELTAREUSENEVER
2289 DELTAREUSENEVER
2265 Deltas will never be reused. This is the slowest mode of execution.
2290 Deltas will never be reused. This is the slowest mode of execution.
2266 This mode can be used to recompute deltas (e.g. if the diff/delta
2291 This mode can be used to recompute deltas (e.g. if the diff/delta
2267 algorithm changes).
2292 algorithm changes).
2268
2293
2269 Delta computation can be slow, so the choice of delta reuse policy can
2294 Delta computation can be slow, so the choice of delta reuse policy can
2270 significantly affect run time.
2295 significantly affect run time.
2271
2296
2272 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2297 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2273 two extremes. Deltas will be reused if they are appropriate. But if the
2298 two extremes. Deltas will be reused if they are appropriate. But if the
2274 delta could choose a better revision, it will do so. This means if you
2299 delta could choose a better revision, it will do so. This means if you
2275 are converting a non-generaldelta revlog to a generaldelta revlog,
2300 are converting a non-generaldelta revlog to a generaldelta revlog,
2276 deltas will be recomputed if the delta's parent isn't a parent of the
2301 deltas will be recomputed if the delta's parent isn't a parent of the
2277 revision.
2302 revision.
2278
2303
2279 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2304 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2280 controls whether to compute deltas against both parents for merges.
2305 controls whether to compute deltas against both parents for merges.
2281 By default, the current default is used.
2306 By default, the current default is used.
2282 """
2307 """
2283 if deltareuse not in self.DELTAREUSEALL:
2308 if deltareuse not in self.DELTAREUSEALL:
2284 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2309 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2285
2310
2286 if len(destrevlog):
2311 if len(destrevlog):
2287 raise ValueError(_('destination revlog is not empty'))
2312 raise ValueError(_('destination revlog is not empty'))
2288
2313
2289 if getattr(self, 'filteredrevs', None):
2314 if getattr(self, 'filteredrevs', None):
2290 raise ValueError(_('source revlog has filtered revisions'))
2315 raise ValueError(_('source revlog has filtered revisions'))
2291 if getattr(destrevlog, 'filteredrevs', None):
2316 if getattr(destrevlog, 'filteredrevs', None):
2292 raise ValueError(_('destination revlog has filtered revisions'))
2317 raise ValueError(_('destination revlog has filtered revisions'))
2293
2318
2294 # lazydeltabase controls whether to reuse a cached delta, if possible.
2319 # lazydeltabase controls whether to reuse a cached delta, if possible.
2295 oldlazydeltabase = destrevlog._lazydeltabase
2320 oldlazydeltabase = destrevlog._lazydeltabase
2296 oldamd = destrevlog._aggressivemergedeltas
2321 oldamd = destrevlog._aggressivemergedeltas
2297
2322
2298 try:
2323 try:
2299 if deltareuse == self.DELTAREUSEALWAYS:
2324 if deltareuse == self.DELTAREUSEALWAYS:
2300 destrevlog._lazydeltabase = True
2325 destrevlog._lazydeltabase = True
2301 elif deltareuse == self.DELTAREUSESAMEREVS:
2326 elif deltareuse == self.DELTAREUSESAMEREVS:
2302 destrevlog._lazydeltabase = False
2327 destrevlog._lazydeltabase = False
2303
2328
2304 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2329 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2305
2330
2306 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2331 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2307 self.DELTAREUSESAMEREVS)
2332 self.DELTAREUSESAMEREVS)
2308
2333
2309 index = self.index
2334 index = self.index
2310 for rev in self:
2335 for rev in self:
2311 entry = index[rev]
2336 entry = index[rev]
2312
2337
2313 # Some classes override linkrev to take filtered revs into
2338 # Some classes override linkrev to take filtered revs into
2314 # account. Use raw entry from index.
2339 # account. Use raw entry from index.
2315 flags = entry[0] & 0xffff
2340 flags = entry[0] & 0xffff
2316 linkrev = entry[4]
2341 linkrev = entry[4]
2317 p1 = index[entry[5]][7]
2342 p1 = index[entry[5]][7]
2318 p2 = index[entry[6]][7]
2343 p2 = index[entry[6]][7]
2319 node = entry[7]
2344 node = entry[7]
2320
2345
2321 # (Possibly) reuse the delta from the revlog if allowed and
2346 # (Possibly) reuse the delta from the revlog if allowed and
2322 # the revlog chunk is a delta.
2347 # the revlog chunk is a delta.
2323 cachedelta = None
2348 cachedelta = None
2324 rawtext = None
2349 rawtext = None
2325 if populatecachedelta:
2350 if populatecachedelta:
2326 dp = self.deltaparent(rev)
2351 dp = self.deltaparent(rev)
2327 if dp != nullrev:
2352 if dp != nullrev:
2328 cachedelta = (dp, str(self._chunk(rev)))
2353 cachedelta = (dp, str(self._chunk(rev)))
2329
2354
2330 if not cachedelta:
2355 if not cachedelta:
2331 rawtext = self.revision(rev, raw=True)
2356 rawtext = self.revision(rev, raw=True)
2332
2357
2333 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2358 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2334 checkambig=False)
2359 checkambig=False)
2335 dfh = None
2360 dfh = None
2336 if not destrevlog._inline:
2361 if not destrevlog._inline:
2337 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2362 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2338 try:
2363 try:
2339 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2364 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2340 flags, cachedelta, ifh, dfh)
2365 flags, cachedelta, ifh, dfh)
2341 finally:
2366 finally:
2342 if dfh:
2367 if dfh:
2343 dfh.close()
2368 dfh.close()
2344 ifh.close()
2369 ifh.close()
2345
2370
2346 if addrevisioncb:
2371 if addrevisioncb:
2347 addrevisioncb(self, rev, node)
2372 addrevisioncb(self, rev, node)
2348 finally:
2373 finally:
2349 destrevlog._lazydeltabase = oldlazydeltabase
2374 destrevlog._lazydeltabase = oldlazydeltabase
2350 destrevlog._aggressivemergedeltas = oldamd
2375 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now