##// END OF EJS Templates
sparse-read: move from a recursive-based approach to a heap-based one...
Boris Feld -
r34881:9e18ab7f default
parent child Browse files
Show More
@@ -1,2338 +1,2349 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import os
21 import os
21 import struct
22 import struct
22 import zlib
23 import zlib
23
24
24 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
25 from .node import (
26 from .node import (
26 bin,
27 bin,
27 hex,
28 hex,
28 nullid,
29 nullid,
29 nullrev,
30 nullrev,
30 wdirhex,
31 wdirhex,
31 wdirid,
32 wdirid,
32 wdirrev,
33 wdirrev,
33 )
34 )
34 from .i18n import _
35 from .i18n import _
35 from . import (
36 from . import (
36 ancestor,
37 ancestor,
37 error,
38 error,
38 mdiff,
39 mdiff,
39 policy,
40 policy,
40 pycompat,
41 pycompat,
41 templatefilters,
42 templatefilters,
42 util,
43 util,
43 )
44 )
44
45
45 parsers = policy.importmod(r'parsers')
46 parsers = policy.importmod(r'parsers')
46
47
47 # Aliased for performance.
48 # Aliased for performance.
48 _zlibdecompress = zlib.decompress
49 _zlibdecompress = zlib.decompress
49
50
50 # revlog header flags
51 # revlog header flags
51 REVLOGV0 = 0
52 REVLOGV0 = 0
52 REVLOGV1 = 1
53 REVLOGV1 = 1
53 # Dummy value until file format is finalized.
54 # Dummy value until file format is finalized.
54 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 # Reminder: change the bounds check in revlog.__init__ when this is changed.
55 REVLOGV2 = 0xDEAD
56 REVLOGV2 = 0xDEAD
56 FLAG_INLINE_DATA = (1 << 16)
57 FLAG_INLINE_DATA = (1 << 16)
57 FLAG_GENERALDELTA = (1 << 17)
58 FLAG_GENERALDELTA = (1 << 17)
58 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
59 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 REVLOG_DEFAULT_FORMAT = REVLOGV1
60 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
61 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
62 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63 REVLOGV2_FLAGS = REVLOGV1_FLAGS
63
64
64 # revlog index flags
65 # revlog index flags
65 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
66 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
67 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
68 REVIDX_DEFAULT_FLAGS = 0
69 REVIDX_DEFAULT_FLAGS = 0
69 # stable order in which flags need to be processed and their processors applied
70 # stable order in which flags need to be processed and their processors applied
70 REVIDX_FLAGS_ORDER = [
71 REVIDX_FLAGS_ORDER = [
71 REVIDX_ISCENSORED,
72 REVIDX_ISCENSORED,
72 REVIDX_ELLIPSIS,
73 REVIDX_ELLIPSIS,
73 REVIDX_EXTSTORED,
74 REVIDX_EXTSTORED,
74 ]
75 ]
75 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
76
77
77 # max size of revlog with inline data
78 # max size of revlog with inline data
78 _maxinline = 131072
79 _maxinline = 131072
79 _chunksize = 1048576
80 _chunksize = 1048576
80
81
81 RevlogError = error.RevlogError
82 RevlogError = error.RevlogError
82 LookupError = error.LookupError
83 LookupError = error.LookupError
83 CensoredNodeError = error.CensoredNodeError
84 CensoredNodeError = error.CensoredNodeError
84 ProgrammingError = error.ProgrammingError
85 ProgrammingError = error.ProgrammingError
85
86
86 # Store flag processors (cf. 'addflagprocessor()' to register)
87 # Store flag processors (cf. 'addflagprocessor()' to register)
87 _flagprocessors = {
88 _flagprocessors = {
88 REVIDX_ISCENSORED: None,
89 REVIDX_ISCENSORED: None,
89 }
90 }
90
91
91 def addflagprocessor(flag, processor):
92 def addflagprocessor(flag, processor):
92 """Register a flag processor on a revision data flag.
93 """Register a flag processor on a revision data flag.
93
94
94 Invariant:
95 Invariant:
95 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
96 - Only one flag processor can be registered on a specific flag.
97 - Only one flag processor can be registered on a specific flag.
97 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
98 following signatures:
99 following signatures:
99 - (read) f(self, rawtext) -> text, bool
100 - (read) f(self, rawtext) -> text, bool
100 - (write) f(self, text) -> rawtext, bool
101 - (write) f(self, text) -> rawtext, bool
101 - (raw) f(self, rawtext) -> bool
102 - (raw) f(self, rawtext) -> bool
102 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 "text" is presented to the user. "rawtext" is stored in revlog data, not
103 directly visible to the user.
104 directly visible to the user.
104 The boolean returned by these transforms is used to determine whether
105 The boolean returned by these transforms is used to determine whether
105 the returned text can be used for hash integrity checking. For example,
106 the returned text can be used for hash integrity checking. For example,
106 if "write" returns False, then "text" is used to generate hash. If
107 if "write" returns False, then "text" is used to generate hash. If
107 "write" returns True, that basically means "rawtext" returned by "write"
108 "write" returns True, that basically means "rawtext" returned by "write"
108 should be used to generate hash. Usually, "write" and "read" return
109 should be used to generate hash. Usually, "write" and "read" return
109 different booleans. And "raw" returns a same boolean as "write".
110 different booleans. And "raw" returns a same boolean as "write".
110
111
111 Note: The 'raw' transform is used for changegroup generation and in some
112 Note: The 'raw' transform is used for changegroup generation and in some
112 debug commands. In this case the transform only indicates whether the
113 debug commands. In this case the transform only indicates whether the
113 contents can be used for hash integrity checks.
114 contents can be used for hash integrity checks.
114 """
115 """
115 if not flag & REVIDX_KNOWN_FLAGS:
116 if not flag & REVIDX_KNOWN_FLAGS:
116 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
117 raise ProgrammingError(msg)
118 raise ProgrammingError(msg)
118 if flag not in REVIDX_FLAGS_ORDER:
119 if flag not in REVIDX_FLAGS_ORDER:
119 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
120 raise ProgrammingError(msg)
121 raise ProgrammingError(msg)
121 if flag in _flagprocessors:
122 if flag in _flagprocessors:
122 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
123 raise error.Abort(msg)
124 raise error.Abort(msg)
124 _flagprocessors[flag] = processor
125 _flagprocessors[flag] = processor
125
126
126 def getoffset(q):
127 def getoffset(q):
127 return int(q >> 16)
128 return int(q >> 16)
128
129
129 def gettype(q):
130 def gettype(q):
130 return int(q & 0xFFFF)
131 return int(q & 0xFFFF)
131
132
132 def offset_type(offset, type):
133 def offset_type(offset, type):
133 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
134 raise ValueError('unknown revlog index flags')
135 raise ValueError('unknown revlog index flags')
135 return int(int(offset) << 16 | type)
136 return int(int(offset) << 16 | type)
136
137
137 _nullhash = hashlib.sha1(nullid)
138 _nullhash = hashlib.sha1(nullid)
138
139
139 def hash(text, p1, p2):
140 def hash(text, p1, p2):
140 """generate a hash from the given text and its parent hashes
141 """generate a hash from the given text and its parent hashes
141
142
142 This hash combines both the current file contents and its history
143 This hash combines both the current file contents and its history
143 in a manner that makes it easy to distinguish nodes with the same
144 in a manner that makes it easy to distinguish nodes with the same
144 content in the revision graph.
145 content in the revision graph.
145 """
146 """
146 # As of now, if one of the parent node is null, p2 is null
147 # As of now, if one of the parent node is null, p2 is null
147 if p2 == nullid:
148 if p2 == nullid:
148 # deep copy of a hash is faster than creating one
149 # deep copy of a hash is faster than creating one
149 s = _nullhash.copy()
150 s = _nullhash.copy()
150 s.update(p1)
151 s.update(p1)
151 else:
152 else:
152 # none of the parent nodes are nullid
153 # none of the parent nodes are nullid
153 if p1 < p2:
154 if p1 < p2:
154 a = p1
155 a = p1
155 b = p2
156 b = p2
156 else:
157 else:
157 a = p2
158 a = p2
158 b = p1
159 b = p1
159 s = hashlib.sha1(a)
160 s = hashlib.sha1(a)
160 s.update(b)
161 s.update(b)
161 s.update(text)
162 s.update(text)
162 return s.digest()
163 return s.digest()
163
164
164 def _slicechunk(revlog, revs):
165 def _slicechunk(revlog, revs):
165 """slice revs to reduce the amount of unrelated data to be read from disk.
166 """slice revs to reduce the amount of unrelated data to be read from disk.
166
167
167 ``revs`` is sliced into groups that should be read in one time.
168 ``revs`` is sliced into groups that should be read in one time.
168 Assume that revs are sorted.
169 Assume that revs are sorted.
169 """
170 """
170 start = revlog.start
171 start = revlog.start
171 length = revlog.length
172 length = revlog.length
172
173
173 chunkqueue = collections.deque()
174 if len(revs) <= 1:
174 chunkqueue.append((revs, 0))
175 yield revs
176 return
175
177
176 while chunkqueue:
178 startbyte = start(revs[0])
177 revs, depth = chunkqueue.popleft()
179 endbyte = start(revs[-1]) + length(revs[-1])
180 readdata = deltachainspan = endbyte - startbyte
181
182 chainpayload = sum(length(r) for r in revs)
178
183
179 startbyte = start(revs[0])
184 if deltachainspan:
180 endbyte = start(revs[-1]) + length(revs[-1])
185 density = chainpayload / float(deltachainspan)
181 deltachainspan = endbyte - startbyte
186 else:
187 density = 1.0
182
188
183 if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:
189 # Store the gaps in a heap to have them sorted by decreasing size
184 yield revs
190 gapsheap = []
185 continue
191 heapq.heapify(gapsheap)
192 prevend = None
193 for i, rev in enumerate(revs):
194 revstart = start(rev)
195 revlen = length(rev)
186
196
187 # Find where is the largest hole (this is where we would split) and
197 if prevend is not None:
188 # sum up the lengths of useful data to compute the density of the span
198 gapsize = revstart - prevend
189 textlen = 0
199 if gapsize:
190 prevend = None
200 heapq.heappush(gapsheap, (-gapsize, i))
191 largesthole = 0
201
192 idxlargesthole = -1
202 prevend = revstart + revlen
193 for i, rev in enumerate(revs):
203
194 revstart = start(rev)
204 # Collect the indices of the largest holes until the density is acceptable
195 revlen = length(rev)
205 indicesheap = []
206 heapq.heapify(indicesheap)
207 while gapsheap and density < revlog._srdensitythreshold:
208 oppgapsize, gapidx = heapq.heappop(gapsheap)
209
210 heapq.heappush(indicesheap, gapidx)
196
211
197 if prevend is not None:
212 # the gap sizes are stored as negatives to be sorted decreasingly
198 hole = revstart - prevend
213 # by the heap
199 if hole > largesthole:
214 readdata -= (-oppgapsize)
200 largesthole = hole
215 if readdata > 0:
201 idxlargesthole = i
216 density = chainpayload / float(readdata)
202
217 else:
203 textlen += revlen
218 density = 1.0
204 prevend = revstart + revlen
205
219
206 density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
220 # Cut the revs at collected indices
207
221 previdx = 0
208 if density > revlog._srdensitythreshold:
222 while indicesheap:
209 yield revs
223 idx = heapq.heappop(indicesheap)
210 continue
224 yield revs[previdx:idx]
211
225 previdx = idx
212 # Add the left and right parts so that they will be sliced
226 yield revs[previdx:]
213 # recursively too
214 chunkqueue.append((revs[:idxlargesthole], depth + 1))
215 chunkqueue.append((revs[idxlargesthole:], depth + 1))
216
227
217 # index v0:
228 # index v0:
218 # 4 bytes: offset
229 # 4 bytes: offset
219 # 4 bytes: compressed length
230 # 4 bytes: compressed length
220 # 4 bytes: base rev
231 # 4 bytes: base rev
221 # 4 bytes: link rev
232 # 4 bytes: link rev
222 # 20 bytes: parent 1 nodeid
233 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 2 nodeid
234 # 20 bytes: parent 2 nodeid
224 # 20 bytes: nodeid
235 # 20 bytes: nodeid
225 indexformatv0 = struct.Struct(">4l20s20s20s")
236 indexformatv0 = struct.Struct(">4l20s20s20s")
226 indexformatv0_pack = indexformatv0.pack
237 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_unpack = indexformatv0.unpack
238 indexformatv0_unpack = indexformatv0.unpack
228
239
229 class revlogoldio(object):
240 class revlogoldio(object):
230 def __init__(self):
241 def __init__(self):
231 self.size = indexformatv0.size
242 self.size = indexformatv0.size
232
243
233 def parseindex(self, data, inline):
244 def parseindex(self, data, inline):
234 s = self.size
245 s = self.size
235 index = []
246 index = []
236 nodemap = {nullid: nullrev}
247 nodemap = {nullid: nullrev}
237 n = off = 0
248 n = off = 0
238 l = len(data)
249 l = len(data)
239 while off + s <= l:
250 while off + s <= l:
240 cur = data[off:off + s]
251 cur = data[off:off + s]
241 off += s
252 off += s
242 e = indexformatv0_unpack(cur)
253 e = indexformatv0_unpack(cur)
243 # transform to revlogv1 format
254 # transform to revlogv1 format
244 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
255 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
245 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
256 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
246 index.append(e2)
257 index.append(e2)
247 nodemap[e[6]] = n
258 nodemap[e[6]] = n
248 n += 1
259 n += 1
249
260
250 # add the magic null revision at -1
261 # add the magic null revision at -1
251 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
262 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
252
263
253 return index, nodemap, None
264 return index, nodemap, None
254
265
255 def packentry(self, entry, node, version, rev):
266 def packentry(self, entry, node, version, rev):
256 if gettype(entry[0]):
267 if gettype(entry[0]):
257 raise RevlogError(_('index entry flags need revlog version 1'))
268 raise RevlogError(_('index entry flags need revlog version 1'))
258 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
259 node(entry[5]), node(entry[6]), entry[7])
270 node(entry[5]), node(entry[6]), entry[7])
260 return indexformatv0_pack(*e2)
271 return indexformatv0_pack(*e2)
261
272
262 # index ng:
273 # index ng:
263 # 6 bytes: offset
274 # 6 bytes: offset
264 # 2 bytes: flags
275 # 2 bytes: flags
265 # 4 bytes: compressed length
276 # 4 bytes: compressed length
266 # 4 bytes: uncompressed length
277 # 4 bytes: uncompressed length
267 # 4 bytes: base rev
278 # 4 bytes: base rev
268 # 4 bytes: link rev
279 # 4 bytes: link rev
269 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 1 rev
270 # 4 bytes: parent 2 rev
281 # 4 bytes: parent 2 rev
271 # 32 bytes: nodeid
282 # 32 bytes: nodeid
272 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng = struct.Struct(">Qiiiiii20s12x")
273 indexformatng_pack = indexformatng.pack
284 indexformatng_pack = indexformatng.pack
274 versionformat = struct.Struct(">I")
285 versionformat = struct.Struct(">I")
275 versionformat_pack = versionformat.pack
286 versionformat_pack = versionformat.pack
276 versionformat_unpack = versionformat.unpack
287 versionformat_unpack = versionformat.unpack
277
288
278 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
279 # signed integer)
290 # signed integer)
280 _maxentrysize = 0x7fffffff
291 _maxentrysize = 0x7fffffff
281
292
282 class revlogio(object):
293 class revlogio(object):
283 def __init__(self):
294 def __init__(self):
284 self.size = indexformatng.size
295 self.size = indexformatng.size
285
296
286 def parseindex(self, data, inline):
297 def parseindex(self, data, inline):
287 # call the C implementation to parse the index data
298 # call the C implementation to parse the index data
288 index, cache = parsers.parse_index2(data, inline)
299 index, cache = parsers.parse_index2(data, inline)
289 return index, getattr(index, 'nodemap', None), cache
300 return index, getattr(index, 'nodemap', None), cache
290
301
291 def packentry(self, entry, node, version, rev):
302 def packentry(self, entry, node, version, rev):
292 p = indexformatng_pack(*entry)
303 p = indexformatng_pack(*entry)
293 if rev == 0:
304 if rev == 0:
294 p = versionformat_pack(version) + p[4:]
305 p = versionformat_pack(version) + p[4:]
295 return p
306 return p
296
307
297 class revlog(object):
308 class revlog(object):
298 """
309 """
299 the underlying revision storage object
310 the underlying revision storage object
300
311
301 A revlog consists of two parts, an index and the revision data.
312 A revlog consists of two parts, an index and the revision data.
302
313
303 The index is a file with a fixed record size containing
314 The index is a file with a fixed record size containing
304 information on each revision, including its nodeid (hash), the
315 information on each revision, including its nodeid (hash), the
305 nodeids of its parents, the position and offset of its data within
316 nodeids of its parents, the position and offset of its data within
306 the data file, and the revision it's based on. Finally, each entry
317 the data file, and the revision it's based on. Finally, each entry
307 contains a linkrev entry that can serve as a pointer to external
318 contains a linkrev entry that can serve as a pointer to external
308 data.
319 data.
309
320
310 The revision data itself is a linear collection of data chunks.
321 The revision data itself is a linear collection of data chunks.
311 Each chunk represents a revision and is usually represented as a
322 Each chunk represents a revision and is usually represented as a
312 delta against the previous chunk. To bound lookup time, runs of
323 delta against the previous chunk. To bound lookup time, runs of
313 deltas are limited to about 2 times the length of the original
324 deltas are limited to about 2 times the length of the original
314 version data. This makes retrieval of a version proportional to
325 version data. This makes retrieval of a version proportional to
315 its size, or O(1) relative to the number of revisions.
326 its size, or O(1) relative to the number of revisions.
316
327
317 Both pieces of the revlog are written to in an append-only
328 Both pieces of the revlog are written to in an append-only
318 fashion, which means we never need to rewrite a file to insert or
329 fashion, which means we never need to rewrite a file to insert or
319 remove data, and can use some simple techniques to avoid the need
330 remove data, and can use some simple techniques to avoid the need
320 for locking while reading.
331 for locking while reading.
321
332
322 If checkambig, indexfile is opened with checkambig=True at
333 If checkambig, indexfile is opened with checkambig=True at
323 writing, to avoid file stat ambiguity.
334 writing, to avoid file stat ambiguity.
324
335
325 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 If mmaplargeindex is True, and an mmapindexthreshold is set, the
326 index will be mmapped rather than read if it is larger than the
337 index will be mmapped rather than read if it is larger than the
327 configured threshold.
338 configured threshold.
328 """
339 """
329 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
340 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
330 mmaplargeindex=False):
341 mmaplargeindex=False):
331 """
342 """
332 create a revlog object
343 create a revlog object
333
344
334 opener is a function that abstracts the file opening operation
345 opener is a function that abstracts the file opening operation
335 and can be used to implement COW semantics or the like.
346 and can be used to implement COW semantics or the like.
336 """
347 """
337 self.indexfile = indexfile
348 self.indexfile = indexfile
338 self.datafile = datafile or (indexfile[:-2] + ".d")
349 self.datafile = datafile or (indexfile[:-2] + ".d")
339 self.opener = opener
350 self.opener = opener
340 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
341 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
342 self._checkambig = checkambig
353 self._checkambig = checkambig
343 # 3-tuple of (node, rev, text) for a raw revision.
354 # 3-tuple of (node, rev, text) for a raw revision.
344 self._cache = None
355 self._cache = None
345 # Maps rev to chain base rev.
356 # Maps rev to chain base rev.
346 self._chainbasecache = util.lrucachedict(100)
357 self._chainbasecache = util.lrucachedict(100)
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
358 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 self._chunkcache = (0, '')
359 self._chunkcache = (0, '')
349 # How much data to read and cache into the raw revlog data cache.
360 # How much data to read and cache into the raw revlog data cache.
350 self._chunkcachesize = 65536
361 self._chunkcachesize = 65536
351 self._maxchainlen = None
362 self._maxchainlen = None
352 self._aggressivemergedeltas = False
363 self._aggressivemergedeltas = False
353 self.index = []
364 self.index = []
354 # Mapping of partial identifiers to full nodes.
365 # Mapping of partial identifiers to full nodes.
355 self._pcache = {}
366 self._pcache = {}
356 # Mapping of revision integer to full node.
367 # Mapping of revision integer to full node.
357 self._nodecache = {nullid: nullrev}
368 self._nodecache = {nullid: nullrev}
358 self._nodepos = None
369 self._nodepos = None
359 self._compengine = 'zlib'
370 self._compengine = 'zlib'
360 self._maxdeltachainspan = -1
371 self._maxdeltachainspan = -1
361 self._withsparseread = False
372 self._withsparseread = False
362 self._srdensitythreshold = 0.25
373 self._srdensitythreshold = 0.25
363 self._srminblocksize = 262144
374 self._srminblocksize = 262144
364
375
365 mmapindexthreshold = None
376 mmapindexthreshold = None
366 v = REVLOG_DEFAULT_VERSION
377 v = REVLOG_DEFAULT_VERSION
367 opts = getattr(opener, 'options', None)
378 opts = getattr(opener, 'options', None)
368 if opts is not None:
379 if opts is not None:
369 if 'revlogv2' in opts:
380 if 'revlogv2' in opts:
370 # version 2 revlogs always use generaldelta.
381 # version 2 revlogs always use generaldelta.
371 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
372 elif 'revlogv1' in opts:
383 elif 'revlogv1' in opts:
373 if 'generaldelta' in opts:
384 if 'generaldelta' in opts:
374 v |= FLAG_GENERALDELTA
385 v |= FLAG_GENERALDELTA
375 else:
386 else:
376 v = 0
387 v = 0
377 if 'chunkcachesize' in opts:
388 if 'chunkcachesize' in opts:
378 self._chunkcachesize = opts['chunkcachesize']
389 self._chunkcachesize = opts['chunkcachesize']
379 if 'maxchainlen' in opts:
390 if 'maxchainlen' in opts:
380 self._maxchainlen = opts['maxchainlen']
391 self._maxchainlen = opts['maxchainlen']
381 if 'aggressivemergedeltas' in opts:
392 if 'aggressivemergedeltas' in opts:
382 self._aggressivemergedeltas = opts['aggressivemergedeltas']
393 self._aggressivemergedeltas = opts['aggressivemergedeltas']
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 if 'compengine' in opts:
395 if 'compengine' in opts:
385 self._compengine = opts['compengine']
396 self._compengine = opts['compengine']
386 if 'maxdeltachainspan' in opts:
397 if 'maxdeltachainspan' in opts:
387 self._maxdeltachainspan = opts['maxdeltachainspan']
398 self._maxdeltachainspan = opts['maxdeltachainspan']
388 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 if mmaplargeindex and 'mmapindexthreshold' in opts:
389 mmapindexthreshold = opts['mmapindexthreshold']
400 mmapindexthreshold = opts['mmapindexthreshold']
390 self._withsparseread = bool(opts.get('with-sparse-read', False))
401 self._withsparseread = bool(opts.get('with-sparse-read', False))
391 if 'sparse-read-density-threshold' in opts:
402 if 'sparse-read-density-threshold' in opts:
392 self._srdensitythreshold = opts['sparse-read-density-threshold']
403 self._srdensitythreshold = opts['sparse-read-density-threshold']
393 if 'sparse-read-min-block-size' in opts:
404 if 'sparse-read-min-block-size' in opts:
394 self._srminblocksize = opts['sparse-read-min-block-size']
405 self._srminblocksize = opts['sparse-read-min-block-size']
395
406
396 if self._chunkcachesize <= 0:
407 if self._chunkcachesize <= 0:
397 raise RevlogError(_('revlog chunk cache size %r is not greater '
408 raise RevlogError(_('revlog chunk cache size %r is not greater '
398 'than 0') % self._chunkcachesize)
409 'than 0') % self._chunkcachesize)
399 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 elif self._chunkcachesize & (self._chunkcachesize - 1):
400 raise RevlogError(_('revlog chunk cache size %r is not a power '
411 raise RevlogError(_('revlog chunk cache size %r is not a power '
401 'of 2') % self._chunkcachesize)
412 'of 2') % self._chunkcachesize)
402
413
403 indexdata = ''
414 indexdata = ''
404 self._initempty = True
415 self._initempty = True
405 try:
416 try:
406 f = self.opener(self.indexfile)
417 f = self.opener(self.indexfile)
407 if (mmapindexthreshold is not None and
418 if (mmapindexthreshold is not None and
408 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 self.opener.fstat(f).st_size >= mmapindexthreshold):
409 indexdata = util.buffer(util.mmapread(f))
420 indexdata = util.buffer(util.mmapread(f))
410 else:
421 else:
411 indexdata = f.read()
422 indexdata = f.read()
412 f.close()
423 f.close()
413 if len(indexdata) > 0:
424 if len(indexdata) > 0:
414 v = versionformat_unpack(indexdata[:4])[0]
425 v = versionformat_unpack(indexdata[:4])[0]
415 self._initempty = False
426 self._initempty = False
416 except IOError as inst:
427 except IOError as inst:
417 if inst.errno != errno.ENOENT:
428 if inst.errno != errno.ENOENT:
418 raise
429 raise
419
430
420 self.version = v
431 self.version = v
421 self._inline = v & FLAG_INLINE_DATA
432 self._inline = v & FLAG_INLINE_DATA
422 self._generaldelta = v & FLAG_GENERALDELTA
433 self._generaldelta = v & FLAG_GENERALDELTA
423 flags = v & ~0xFFFF
434 flags = v & ~0xFFFF
424 fmt = v & 0xFFFF
435 fmt = v & 0xFFFF
425 if fmt == REVLOGV0:
436 if fmt == REVLOGV0:
426 if flags:
437 if flags:
427 raise RevlogError(_('unknown flags (%#04x) in version %d '
438 raise RevlogError(_('unknown flags (%#04x) in version %d '
428 'revlog %s') %
439 'revlog %s') %
429 (flags >> 16, fmt, self.indexfile))
440 (flags >> 16, fmt, self.indexfile))
430 elif fmt == REVLOGV1:
441 elif fmt == REVLOGV1:
431 if flags & ~REVLOGV1_FLAGS:
442 if flags & ~REVLOGV1_FLAGS:
432 raise RevlogError(_('unknown flags (%#04x) in version %d '
443 raise RevlogError(_('unknown flags (%#04x) in version %d '
433 'revlog %s') %
444 'revlog %s') %
434 (flags >> 16, fmt, self.indexfile))
445 (flags >> 16, fmt, self.indexfile))
435 elif fmt == REVLOGV2:
446 elif fmt == REVLOGV2:
436 if flags & ~REVLOGV2_FLAGS:
447 if flags & ~REVLOGV2_FLAGS:
437 raise RevlogError(_('unknown flags (%#04x) in version %d '
448 raise RevlogError(_('unknown flags (%#04x) in version %d '
438 'revlog %s') %
449 'revlog %s') %
439 (flags >> 16, fmt, self.indexfile))
450 (flags >> 16, fmt, self.indexfile))
440 else:
451 else:
441 raise RevlogError(_('unknown version (%d) in revlog %s') %
452 raise RevlogError(_('unknown version (%d) in revlog %s') %
442 (fmt, self.indexfile))
453 (fmt, self.indexfile))
443
454
444 self.storedeltachains = True
455 self.storedeltachains = True
445
456
446 self._io = revlogio()
457 self._io = revlogio()
447 if self.version == REVLOGV0:
458 if self.version == REVLOGV0:
448 self._io = revlogoldio()
459 self._io = revlogoldio()
449 try:
460 try:
450 d = self._io.parseindex(indexdata, self._inline)
461 d = self._io.parseindex(indexdata, self._inline)
451 except (ValueError, IndexError):
462 except (ValueError, IndexError):
452 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
463 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
453 self.index, nodemap, self._chunkcache = d
464 self.index, nodemap, self._chunkcache = d
454 if nodemap is not None:
465 if nodemap is not None:
455 self.nodemap = self._nodecache = nodemap
466 self.nodemap = self._nodecache = nodemap
456 if not self._chunkcache:
467 if not self._chunkcache:
457 self._chunkclear()
468 self._chunkclear()
458 # revnum -> (chain-length, sum-delta-length)
469 # revnum -> (chain-length, sum-delta-length)
459 self._chaininfocache = {}
470 self._chaininfocache = {}
460 # revlog header -> revlog compressor
471 # revlog header -> revlog compressor
461 self._decompressors = {}
472 self._decompressors = {}
462
473
463 @util.propertycache
474 @util.propertycache
464 def _compressor(self):
475 def _compressor(self):
465 return util.compengines[self._compengine].revlogcompressor()
476 return util.compengines[self._compengine].revlogcompressor()
466
477
467 def tip(self):
478 def tip(self):
468 return self.node(len(self.index) - 2)
479 return self.node(len(self.index) - 2)
469 def __contains__(self, rev):
480 def __contains__(self, rev):
470 return 0 <= rev < len(self)
481 return 0 <= rev < len(self)
471 def __len__(self):
482 def __len__(self):
472 return len(self.index) - 1
483 return len(self.index) - 1
473 def __iter__(self):
484 def __iter__(self):
474 return iter(xrange(len(self)))
485 return iter(xrange(len(self)))
475 def revs(self, start=0, stop=None):
486 def revs(self, start=0, stop=None):
476 """iterate over all rev in this revlog (from start to stop)"""
487 """iterate over all rev in this revlog (from start to stop)"""
477 step = 1
488 step = 1
478 if stop is not None:
489 if stop is not None:
479 if start > stop:
490 if start > stop:
480 step = -1
491 step = -1
481 stop += step
492 stop += step
482 else:
493 else:
483 stop = len(self)
494 stop = len(self)
484 return xrange(start, stop, step)
495 return xrange(start, stop, step)
485
496
486 @util.propertycache
497 @util.propertycache
487 def nodemap(self):
498 def nodemap(self):
488 self.rev(self.node(0))
499 self.rev(self.node(0))
489 return self._nodecache
500 return self._nodecache
490
501
491 def hasnode(self, node):
502 def hasnode(self, node):
492 try:
503 try:
493 self.rev(node)
504 self.rev(node)
494 return True
505 return True
495 except KeyError:
506 except KeyError:
496 return False
507 return False
497
508
498 def clearcaches(self):
509 def clearcaches(self):
499 self._cache = None
510 self._cache = None
500 self._chainbasecache.clear()
511 self._chainbasecache.clear()
501 self._chunkcache = (0, '')
512 self._chunkcache = (0, '')
502 self._pcache = {}
513 self._pcache = {}
503
514
504 try:
515 try:
505 self._nodecache.clearcaches()
516 self._nodecache.clearcaches()
506 except AttributeError:
517 except AttributeError:
507 self._nodecache = {nullid: nullrev}
518 self._nodecache = {nullid: nullrev}
508 self._nodepos = None
519 self._nodepos = None
509
520
510 def rev(self, node):
521 def rev(self, node):
511 try:
522 try:
512 return self._nodecache[node]
523 return self._nodecache[node]
513 except TypeError:
524 except TypeError:
514 raise
525 raise
515 except RevlogError:
526 except RevlogError:
516 # parsers.c radix tree lookup failed
527 # parsers.c radix tree lookup failed
517 if node == wdirid:
528 if node == wdirid:
518 raise error.WdirUnsupported
529 raise error.WdirUnsupported
519 raise LookupError(node, self.indexfile, _('no node'))
530 raise LookupError(node, self.indexfile, _('no node'))
520 except KeyError:
531 except KeyError:
521 # pure python cache lookup failed
532 # pure python cache lookup failed
522 n = self._nodecache
533 n = self._nodecache
523 i = self.index
534 i = self.index
524 p = self._nodepos
535 p = self._nodepos
525 if p is None:
536 if p is None:
526 p = len(i) - 2
537 p = len(i) - 2
527 for r in xrange(p, -1, -1):
538 for r in xrange(p, -1, -1):
528 v = i[r][7]
539 v = i[r][7]
529 n[v] = r
540 n[v] = r
530 if v == node:
541 if v == node:
531 self._nodepos = r - 1
542 self._nodepos = r - 1
532 return r
543 return r
533 if node == wdirid:
544 if node == wdirid:
534 raise error.WdirUnsupported
545 raise error.WdirUnsupported
535 raise LookupError(node, self.indexfile, _('no node'))
546 raise LookupError(node, self.indexfile, _('no node'))
536
547
537 # Accessors for index entries.
548 # Accessors for index entries.
538
549
539 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
550 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
540 # are flags.
551 # are flags.
541 def start(self, rev):
552 def start(self, rev):
542 return int(self.index[rev][0] >> 16)
553 return int(self.index[rev][0] >> 16)
543
554
544 def flags(self, rev):
555 def flags(self, rev):
545 return self.index[rev][0] & 0xFFFF
556 return self.index[rev][0] & 0xFFFF
546
557
547 def length(self, rev):
558 def length(self, rev):
548 return self.index[rev][1]
559 return self.index[rev][1]
549
560
550 def rawsize(self, rev):
561 def rawsize(self, rev):
551 """return the length of the uncompressed text for a given revision"""
562 """return the length of the uncompressed text for a given revision"""
552 l = self.index[rev][2]
563 l = self.index[rev][2]
553 if l >= 0:
564 if l >= 0:
554 return l
565 return l
555
566
556 t = self.revision(rev, raw=True)
567 t = self.revision(rev, raw=True)
557 return len(t)
568 return len(t)
558
569
559 def size(self, rev):
570 def size(self, rev):
560 """length of non-raw text (processed by a "read" flag processor)"""
571 """length of non-raw text (processed by a "read" flag processor)"""
561 # fast path: if no "read" flag processor could change the content,
572 # fast path: if no "read" flag processor could change the content,
562 # size is rawsize. note: ELLIPSIS is known to not change the content.
573 # size is rawsize. note: ELLIPSIS is known to not change the content.
563 flags = self.flags(rev)
574 flags = self.flags(rev)
564 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
575 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
565 return self.rawsize(rev)
576 return self.rawsize(rev)
566
577
567 return len(self.revision(rev, raw=False))
578 return len(self.revision(rev, raw=False))
568
579
569 def chainbase(self, rev):
580 def chainbase(self, rev):
570 base = self._chainbasecache.get(rev)
581 base = self._chainbasecache.get(rev)
571 if base is not None:
582 if base is not None:
572 return base
583 return base
573
584
574 index = self.index
585 index = self.index
575 base = index[rev][3]
586 base = index[rev][3]
576 while base != rev:
587 while base != rev:
577 rev = base
588 rev = base
578 base = index[rev][3]
589 base = index[rev][3]
579
590
580 self._chainbasecache[rev] = base
591 self._chainbasecache[rev] = base
581 return base
592 return base
582
593
583 def linkrev(self, rev):
594 def linkrev(self, rev):
584 return self.index[rev][4]
595 return self.index[rev][4]
585
596
586 def parentrevs(self, rev):
597 def parentrevs(self, rev):
587 try:
598 try:
588 return self.index[rev][5:7]
599 return self.index[rev][5:7]
589 except IndexError:
600 except IndexError:
590 if rev == wdirrev:
601 if rev == wdirrev:
591 raise error.WdirUnsupported
602 raise error.WdirUnsupported
592 raise
603 raise
593
604
594 def node(self, rev):
605 def node(self, rev):
595 try:
606 try:
596 return self.index[rev][7]
607 return self.index[rev][7]
597 except IndexError:
608 except IndexError:
598 if rev == wdirrev:
609 if rev == wdirrev:
599 raise error.WdirUnsupported
610 raise error.WdirUnsupported
600 raise
611 raise
601
612
602 # Derived from index values.
613 # Derived from index values.
603
614
604 def end(self, rev):
615 def end(self, rev):
605 return self.start(rev) + self.length(rev)
616 return self.start(rev) + self.length(rev)
606
617
607 def parents(self, node):
618 def parents(self, node):
608 i = self.index
619 i = self.index
609 d = i[self.rev(node)]
620 d = i[self.rev(node)]
610 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
621 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
611
622
612 def chainlen(self, rev):
623 def chainlen(self, rev):
613 return self._chaininfo(rev)[0]
624 return self._chaininfo(rev)[0]
614
625
615 def _chaininfo(self, rev):
626 def _chaininfo(self, rev):
616 chaininfocache = self._chaininfocache
627 chaininfocache = self._chaininfocache
617 if rev in chaininfocache:
628 if rev in chaininfocache:
618 return chaininfocache[rev]
629 return chaininfocache[rev]
619 index = self.index
630 index = self.index
620 generaldelta = self._generaldelta
631 generaldelta = self._generaldelta
621 iterrev = rev
632 iterrev = rev
622 e = index[iterrev]
633 e = index[iterrev]
623 clen = 0
634 clen = 0
624 compresseddeltalen = 0
635 compresseddeltalen = 0
625 while iterrev != e[3]:
636 while iterrev != e[3]:
626 clen += 1
637 clen += 1
627 compresseddeltalen += e[1]
638 compresseddeltalen += e[1]
628 if generaldelta:
639 if generaldelta:
629 iterrev = e[3]
640 iterrev = e[3]
630 else:
641 else:
631 iterrev -= 1
642 iterrev -= 1
632 if iterrev in chaininfocache:
643 if iterrev in chaininfocache:
633 t = chaininfocache[iterrev]
644 t = chaininfocache[iterrev]
634 clen += t[0]
645 clen += t[0]
635 compresseddeltalen += t[1]
646 compresseddeltalen += t[1]
636 break
647 break
637 e = index[iterrev]
648 e = index[iterrev]
638 else:
649 else:
639 # Add text length of base since decompressing that also takes
650 # Add text length of base since decompressing that also takes
640 # work. For cache hits the length is already included.
651 # work. For cache hits the length is already included.
641 compresseddeltalen += e[1]
652 compresseddeltalen += e[1]
642 r = (clen, compresseddeltalen)
653 r = (clen, compresseddeltalen)
643 chaininfocache[rev] = r
654 chaininfocache[rev] = r
644 return r
655 return r
645
656
646 def _deltachain(self, rev, stoprev=None):
657 def _deltachain(self, rev, stoprev=None):
647 """Obtain the delta chain for a revision.
658 """Obtain the delta chain for a revision.
648
659
649 ``stoprev`` specifies a revision to stop at. If not specified, we
660 ``stoprev`` specifies a revision to stop at. If not specified, we
650 stop at the base of the chain.
661 stop at the base of the chain.
651
662
652 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
663 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
653 revs in ascending order and ``stopped`` is a bool indicating whether
664 revs in ascending order and ``stopped`` is a bool indicating whether
654 ``stoprev`` was hit.
665 ``stoprev`` was hit.
655 """
666 """
656 # Try C implementation.
667 # Try C implementation.
657 try:
668 try:
658 return self.index.deltachain(rev, stoprev, self._generaldelta)
669 return self.index.deltachain(rev, stoprev, self._generaldelta)
659 except AttributeError:
670 except AttributeError:
660 pass
671 pass
661
672
662 chain = []
673 chain = []
663
674
664 # Alias to prevent attribute lookup in tight loop.
675 # Alias to prevent attribute lookup in tight loop.
665 index = self.index
676 index = self.index
666 generaldelta = self._generaldelta
677 generaldelta = self._generaldelta
667
678
668 iterrev = rev
679 iterrev = rev
669 e = index[iterrev]
680 e = index[iterrev]
670 while iterrev != e[3] and iterrev != stoprev:
681 while iterrev != e[3] and iterrev != stoprev:
671 chain.append(iterrev)
682 chain.append(iterrev)
672 if generaldelta:
683 if generaldelta:
673 iterrev = e[3]
684 iterrev = e[3]
674 else:
685 else:
675 iterrev -= 1
686 iterrev -= 1
676 e = index[iterrev]
687 e = index[iterrev]
677
688
678 if iterrev == stoprev:
689 if iterrev == stoprev:
679 stopped = True
690 stopped = True
680 else:
691 else:
681 chain.append(iterrev)
692 chain.append(iterrev)
682 stopped = False
693 stopped = False
683
694
684 chain.reverse()
695 chain.reverse()
685 return chain, stopped
696 return chain, stopped
686
697
687 def ancestors(self, revs, stoprev=0, inclusive=False):
698 def ancestors(self, revs, stoprev=0, inclusive=False):
688 """Generate the ancestors of 'revs' in reverse topological order.
699 """Generate the ancestors of 'revs' in reverse topological order.
689 Does not generate revs lower than stoprev.
700 Does not generate revs lower than stoprev.
690
701
691 See the documentation for ancestor.lazyancestors for more details."""
702 See the documentation for ancestor.lazyancestors for more details."""
692
703
693 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
704 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
694 inclusive=inclusive)
705 inclusive=inclusive)
695
706
696 def descendants(self, revs):
707 def descendants(self, revs):
697 """Generate the descendants of 'revs' in revision order.
708 """Generate the descendants of 'revs' in revision order.
698
709
699 Yield a sequence of revision numbers starting with a child of
710 Yield a sequence of revision numbers starting with a child of
700 some rev in revs, i.e., each revision is *not* considered a
711 some rev in revs, i.e., each revision is *not* considered a
701 descendant of itself. Results are ordered by revision number (a
712 descendant of itself. Results are ordered by revision number (a
702 topological sort)."""
713 topological sort)."""
703 first = min(revs)
714 first = min(revs)
704 if first == nullrev:
715 if first == nullrev:
705 for i in self:
716 for i in self:
706 yield i
717 yield i
707 return
718 return
708
719
709 seen = set(revs)
720 seen = set(revs)
710 for i in self.revs(start=first + 1):
721 for i in self.revs(start=first + 1):
711 for x in self.parentrevs(i):
722 for x in self.parentrevs(i):
712 if x != nullrev and x in seen:
723 if x != nullrev and x in seen:
713 seen.add(i)
724 seen.add(i)
714 yield i
725 yield i
715 break
726 break
716
727
717 def findcommonmissing(self, common=None, heads=None):
728 def findcommonmissing(self, common=None, heads=None):
718 """Return a tuple of the ancestors of common and the ancestors of heads
729 """Return a tuple of the ancestors of common and the ancestors of heads
719 that are not ancestors of common. In revset terminology, we return the
730 that are not ancestors of common. In revset terminology, we return the
720 tuple:
731 tuple:
721
732
722 ::common, (::heads) - (::common)
733 ::common, (::heads) - (::common)
723
734
724 The list is sorted by revision number, meaning it is
735 The list is sorted by revision number, meaning it is
725 topologically sorted.
736 topologically sorted.
726
737
727 'heads' and 'common' are both lists of node IDs. If heads is
738 'heads' and 'common' are both lists of node IDs. If heads is
728 not supplied, uses all of the revlog's heads. If common is not
739 not supplied, uses all of the revlog's heads. If common is not
729 supplied, uses nullid."""
740 supplied, uses nullid."""
730 if common is None:
741 if common is None:
731 common = [nullid]
742 common = [nullid]
732 if heads is None:
743 if heads is None:
733 heads = self.heads()
744 heads = self.heads()
734
745
735 common = [self.rev(n) for n in common]
746 common = [self.rev(n) for n in common]
736 heads = [self.rev(n) for n in heads]
747 heads = [self.rev(n) for n in heads]
737
748
738 # we want the ancestors, but inclusive
749 # we want the ancestors, but inclusive
739 class lazyset(object):
750 class lazyset(object):
740 def __init__(self, lazyvalues):
751 def __init__(self, lazyvalues):
741 self.addedvalues = set()
752 self.addedvalues = set()
742 self.lazyvalues = lazyvalues
753 self.lazyvalues = lazyvalues
743
754
744 def __contains__(self, value):
755 def __contains__(self, value):
745 return value in self.addedvalues or value in self.lazyvalues
756 return value in self.addedvalues or value in self.lazyvalues
746
757
747 def __iter__(self):
758 def __iter__(self):
748 added = self.addedvalues
759 added = self.addedvalues
749 for r in added:
760 for r in added:
750 yield r
761 yield r
751 for r in self.lazyvalues:
762 for r in self.lazyvalues:
752 if not r in added:
763 if not r in added:
753 yield r
764 yield r
754
765
755 def add(self, value):
766 def add(self, value):
756 self.addedvalues.add(value)
767 self.addedvalues.add(value)
757
768
758 def update(self, values):
769 def update(self, values):
759 self.addedvalues.update(values)
770 self.addedvalues.update(values)
760
771
761 has = lazyset(self.ancestors(common))
772 has = lazyset(self.ancestors(common))
762 has.add(nullrev)
773 has.add(nullrev)
763 has.update(common)
774 has.update(common)
764
775
765 # take all ancestors from heads that aren't in has
776 # take all ancestors from heads that aren't in has
766 missing = set()
777 missing = set()
767 visit = collections.deque(r for r in heads if r not in has)
778 visit = collections.deque(r for r in heads if r not in has)
768 while visit:
779 while visit:
769 r = visit.popleft()
780 r = visit.popleft()
770 if r in missing:
781 if r in missing:
771 continue
782 continue
772 else:
783 else:
773 missing.add(r)
784 missing.add(r)
774 for p in self.parentrevs(r):
785 for p in self.parentrevs(r):
775 if p not in has:
786 if p not in has:
776 visit.append(p)
787 visit.append(p)
777 missing = list(missing)
788 missing = list(missing)
778 missing.sort()
789 missing.sort()
779 return has, [self.node(miss) for miss in missing]
790 return has, [self.node(miss) for miss in missing]
780
791
781 def incrementalmissingrevs(self, common=None):
792 def incrementalmissingrevs(self, common=None):
782 """Return an object that can be used to incrementally compute the
793 """Return an object that can be used to incrementally compute the
783 revision numbers of the ancestors of arbitrary sets that are not
794 revision numbers of the ancestors of arbitrary sets that are not
784 ancestors of common. This is an ancestor.incrementalmissingancestors
795 ancestors of common. This is an ancestor.incrementalmissingancestors
785 object.
796 object.
786
797
787 'common' is a list of revision numbers. If common is not supplied, uses
798 'common' is a list of revision numbers. If common is not supplied, uses
788 nullrev.
799 nullrev.
789 """
800 """
790 if common is None:
801 if common is None:
791 common = [nullrev]
802 common = [nullrev]
792
803
793 return ancestor.incrementalmissingancestors(self.parentrevs, common)
804 return ancestor.incrementalmissingancestors(self.parentrevs, common)
794
805
795 def findmissingrevs(self, common=None, heads=None):
806 def findmissingrevs(self, common=None, heads=None):
796 """Return the revision numbers of the ancestors of heads that
807 """Return the revision numbers of the ancestors of heads that
797 are not ancestors of common.
808 are not ancestors of common.
798
809
799 More specifically, return a list of revision numbers corresponding to
810 More specifically, return a list of revision numbers corresponding to
800 nodes N such that every N satisfies the following constraints:
811 nodes N such that every N satisfies the following constraints:
801
812
802 1. N is an ancestor of some node in 'heads'
813 1. N is an ancestor of some node in 'heads'
803 2. N is not an ancestor of any node in 'common'
814 2. N is not an ancestor of any node in 'common'
804
815
805 The list is sorted by revision number, meaning it is
816 The list is sorted by revision number, meaning it is
806 topologically sorted.
817 topologically sorted.
807
818
808 'heads' and 'common' are both lists of revision numbers. If heads is
819 'heads' and 'common' are both lists of revision numbers. If heads is
809 not supplied, uses all of the revlog's heads. If common is not
820 not supplied, uses all of the revlog's heads. If common is not
810 supplied, uses nullid."""
821 supplied, uses nullid."""
811 if common is None:
822 if common is None:
812 common = [nullrev]
823 common = [nullrev]
813 if heads is None:
824 if heads is None:
814 heads = self.headrevs()
825 heads = self.headrevs()
815
826
816 inc = self.incrementalmissingrevs(common=common)
827 inc = self.incrementalmissingrevs(common=common)
817 return inc.missingancestors(heads)
828 return inc.missingancestors(heads)
818
829
819 def findmissing(self, common=None, heads=None):
830 def findmissing(self, common=None, heads=None):
820 """Return the ancestors of heads that are not ancestors of common.
831 """Return the ancestors of heads that are not ancestors of common.
821
832
822 More specifically, return a list of nodes N such that every N
833 More specifically, return a list of nodes N such that every N
823 satisfies the following constraints:
834 satisfies the following constraints:
824
835
825 1. N is an ancestor of some node in 'heads'
836 1. N is an ancestor of some node in 'heads'
826 2. N is not an ancestor of any node in 'common'
837 2. N is not an ancestor of any node in 'common'
827
838
828 The list is sorted by revision number, meaning it is
839 The list is sorted by revision number, meaning it is
829 topologically sorted.
840 topologically sorted.
830
841
831 'heads' and 'common' are both lists of node IDs. If heads is
842 'heads' and 'common' are both lists of node IDs. If heads is
832 not supplied, uses all of the revlog's heads. If common is not
843 not supplied, uses all of the revlog's heads. If common is not
833 supplied, uses nullid."""
844 supplied, uses nullid."""
834 if common is None:
845 if common is None:
835 common = [nullid]
846 common = [nullid]
836 if heads is None:
847 if heads is None:
837 heads = self.heads()
848 heads = self.heads()
838
849
839 common = [self.rev(n) for n in common]
850 common = [self.rev(n) for n in common]
840 heads = [self.rev(n) for n in heads]
851 heads = [self.rev(n) for n in heads]
841
852
842 inc = self.incrementalmissingrevs(common=common)
853 inc = self.incrementalmissingrevs(common=common)
843 return [self.node(r) for r in inc.missingancestors(heads)]
854 return [self.node(r) for r in inc.missingancestors(heads)]
844
855
845 def nodesbetween(self, roots=None, heads=None):
856 def nodesbetween(self, roots=None, heads=None):
846 """Return a topological path from 'roots' to 'heads'.
857 """Return a topological path from 'roots' to 'heads'.
847
858
848 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
859 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
849 topologically sorted list of all nodes N that satisfy both of
860 topologically sorted list of all nodes N that satisfy both of
850 these constraints:
861 these constraints:
851
862
852 1. N is a descendant of some node in 'roots'
863 1. N is a descendant of some node in 'roots'
853 2. N is an ancestor of some node in 'heads'
864 2. N is an ancestor of some node in 'heads'
854
865
855 Every node is considered to be both a descendant and an ancestor
866 Every node is considered to be both a descendant and an ancestor
856 of itself, so every reachable node in 'roots' and 'heads' will be
867 of itself, so every reachable node in 'roots' and 'heads' will be
857 included in 'nodes'.
868 included in 'nodes'.
858
869
859 'outroots' is the list of reachable nodes in 'roots', i.e., the
870 'outroots' is the list of reachable nodes in 'roots', i.e., the
860 subset of 'roots' that is returned in 'nodes'. Likewise,
871 subset of 'roots' that is returned in 'nodes'. Likewise,
861 'outheads' is the subset of 'heads' that is also in 'nodes'.
872 'outheads' is the subset of 'heads' that is also in 'nodes'.
862
873
863 'roots' and 'heads' are both lists of node IDs. If 'roots' is
874 'roots' and 'heads' are both lists of node IDs. If 'roots' is
864 unspecified, uses nullid as the only root. If 'heads' is
875 unspecified, uses nullid as the only root. If 'heads' is
865 unspecified, uses list of all of the revlog's heads."""
876 unspecified, uses list of all of the revlog's heads."""
866 nonodes = ([], [], [])
877 nonodes = ([], [], [])
867 if roots is not None:
878 if roots is not None:
868 roots = list(roots)
879 roots = list(roots)
869 if not roots:
880 if not roots:
870 return nonodes
881 return nonodes
871 lowestrev = min([self.rev(n) for n in roots])
882 lowestrev = min([self.rev(n) for n in roots])
872 else:
883 else:
873 roots = [nullid] # Everybody's a descendant of nullid
884 roots = [nullid] # Everybody's a descendant of nullid
874 lowestrev = nullrev
885 lowestrev = nullrev
875 if (lowestrev == nullrev) and (heads is None):
886 if (lowestrev == nullrev) and (heads is None):
876 # We want _all_ the nodes!
887 # We want _all_ the nodes!
877 return ([self.node(r) for r in self], [nullid], list(self.heads()))
888 return ([self.node(r) for r in self], [nullid], list(self.heads()))
878 if heads is None:
889 if heads is None:
879 # All nodes are ancestors, so the latest ancestor is the last
890 # All nodes are ancestors, so the latest ancestor is the last
880 # node.
891 # node.
881 highestrev = len(self) - 1
892 highestrev = len(self) - 1
882 # Set ancestors to None to signal that every node is an ancestor.
893 # Set ancestors to None to signal that every node is an ancestor.
883 ancestors = None
894 ancestors = None
884 # Set heads to an empty dictionary for later discovery of heads
895 # Set heads to an empty dictionary for later discovery of heads
885 heads = {}
896 heads = {}
886 else:
897 else:
887 heads = list(heads)
898 heads = list(heads)
888 if not heads:
899 if not heads:
889 return nonodes
900 return nonodes
890 ancestors = set()
901 ancestors = set()
891 # Turn heads into a dictionary so we can remove 'fake' heads.
902 # Turn heads into a dictionary so we can remove 'fake' heads.
892 # Also, later we will be using it to filter out the heads we can't
903 # Also, later we will be using it to filter out the heads we can't
893 # find from roots.
904 # find from roots.
894 heads = dict.fromkeys(heads, False)
905 heads = dict.fromkeys(heads, False)
895 # Start at the top and keep marking parents until we're done.
906 # Start at the top and keep marking parents until we're done.
896 nodestotag = set(heads)
907 nodestotag = set(heads)
897 # Remember where the top was so we can use it as a limit later.
908 # Remember where the top was so we can use it as a limit later.
898 highestrev = max([self.rev(n) for n in nodestotag])
909 highestrev = max([self.rev(n) for n in nodestotag])
899 while nodestotag:
910 while nodestotag:
900 # grab a node to tag
911 # grab a node to tag
901 n = nodestotag.pop()
912 n = nodestotag.pop()
902 # Never tag nullid
913 # Never tag nullid
903 if n == nullid:
914 if n == nullid:
904 continue
915 continue
905 # A node's revision number represents its place in a
916 # A node's revision number represents its place in a
906 # topologically sorted list of nodes.
917 # topologically sorted list of nodes.
907 r = self.rev(n)
918 r = self.rev(n)
908 if r >= lowestrev:
919 if r >= lowestrev:
909 if n not in ancestors:
920 if n not in ancestors:
910 # If we are possibly a descendant of one of the roots
921 # If we are possibly a descendant of one of the roots
911 # and we haven't already been marked as an ancestor
922 # and we haven't already been marked as an ancestor
912 ancestors.add(n) # Mark as ancestor
923 ancestors.add(n) # Mark as ancestor
913 # Add non-nullid parents to list of nodes to tag.
924 # Add non-nullid parents to list of nodes to tag.
914 nodestotag.update([p for p in self.parents(n) if
925 nodestotag.update([p for p in self.parents(n) if
915 p != nullid])
926 p != nullid])
916 elif n in heads: # We've seen it before, is it a fake head?
927 elif n in heads: # We've seen it before, is it a fake head?
917 # So it is, real heads should not be the ancestors of
928 # So it is, real heads should not be the ancestors of
918 # any other heads.
929 # any other heads.
919 heads.pop(n)
930 heads.pop(n)
920 if not ancestors:
931 if not ancestors:
921 return nonodes
932 return nonodes
922 # Now that we have our set of ancestors, we want to remove any
933 # Now that we have our set of ancestors, we want to remove any
923 # roots that are not ancestors.
934 # roots that are not ancestors.
924
935
925 # If one of the roots was nullid, everything is included anyway.
936 # If one of the roots was nullid, everything is included anyway.
926 if lowestrev > nullrev:
937 if lowestrev > nullrev:
927 # But, since we weren't, let's recompute the lowest rev to not
938 # But, since we weren't, let's recompute the lowest rev to not
928 # include roots that aren't ancestors.
939 # include roots that aren't ancestors.
929
940
930 # Filter out roots that aren't ancestors of heads
941 # Filter out roots that aren't ancestors of heads
931 roots = [root for root in roots if root in ancestors]
942 roots = [root for root in roots if root in ancestors]
932 # Recompute the lowest revision
943 # Recompute the lowest revision
933 if roots:
944 if roots:
934 lowestrev = min([self.rev(root) for root in roots])
945 lowestrev = min([self.rev(root) for root in roots])
935 else:
946 else:
936 # No more roots? Return empty list
947 # No more roots? Return empty list
937 return nonodes
948 return nonodes
938 else:
949 else:
939 # We are descending from nullid, and don't need to care about
950 # We are descending from nullid, and don't need to care about
940 # any other roots.
951 # any other roots.
941 lowestrev = nullrev
952 lowestrev = nullrev
942 roots = [nullid]
953 roots = [nullid]
943 # Transform our roots list into a set.
954 # Transform our roots list into a set.
944 descendants = set(roots)
955 descendants = set(roots)
945 # Also, keep the original roots so we can filter out roots that aren't
956 # Also, keep the original roots so we can filter out roots that aren't
946 # 'real' roots (i.e. are descended from other roots).
957 # 'real' roots (i.e. are descended from other roots).
947 roots = descendants.copy()
958 roots = descendants.copy()
948 # Our topologically sorted list of output nodes.
959 # Our topologically sorted list of output nodes.
949 orderedout = []
960 orderedout = []
950 # Don't start at nullid since we don't want nullid in our output list,
961 # Don't start at nullid since we don't want nullid in our output list,
951 # and if nullid shows up in descendants, empty parents will look like
962 # and if nullid shows up in descendants, empty parents will look like
952 # they're descendants.
963 # they're descendants.
953 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
964 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
954 n = self.node(r)
965 n = self.node(r)
955 isdescendant = False
966 isdescendant = False
956 if lowestrev == nullrev: # Everybody is a descendant of nullid
967 if lowestrev == nullrev: # Everybody is a descendant of nullid
957 isdescendant = True
968 isdescendant = True
958 elif n in descendants:
969 elif n in descendants:
959 # n is already a descendant
970 # n is already a descendant
960 isdescendant = True
971 isdescendant = True
961 # This check only needs to be done here because all the roots
972 # This check only needs to be done here because all the roots
962 # will start being marked is descendants before the loop.
973 # will start being marked is descendants before the loop.
963 if n in roots:
974 if n in roots:
964 # If n was a root, check if it's a 'real' root.
975 # If n was a root, check if it's a 'real' root.
965 p = tuple(self.parents(n))
976 p = tuple(self.parents(n))
966 # If any of its parents are descendants, it's not a root.
977 # If any of its parents are descendants, it's not a root.
967 if (p[0] in descendants) or (p[1] in descendants):
978 if (p[0] in descendants) or (p[1] in descendants):
968 roots.remove(n)
979 roots.remove(n)
969 else:
980 else:
970 p = tuple(self.parents(n))
981 p = tuple(self.parents(n))
971 # A node is a descendant if either of its parents are
982 # A node is a descendant if either of its parents are
972 # descendants. (We seeded the dependents list with the roots
983 # descendants. (We seeded the dependents list with the roots
973 # up there, remember?)
984 # up there, remember?)
974 if (p[0] in descendants) or (p[1] in descendants):
985 if (p[0] in descendants) or (p[1] in descendants):
975 descendants.add(n)
986 descendants.add(n)
976 isdescendant = True
987 isdescendant = True
977 if isdescendant and ((ancestors is None) or (n in ancestors)):
988 if isdescendant and ((ancestors is None) or (n in ancestors)):
978 # Only include nodes that are both descendants and ancestors.
989 # Only include nodes that are both descendants and ancestors.
979 orderedout.append(n)
990 orderedout.append(n)
980 if (ancestors is not None) and (n in heads):
991 if (ancestors is not None) and (n in heads):
981 # We're trying to figure out which heads are reachable
992 # We're trying to figure out which heads are reachable
982 # from roots.
993 # from roots.
983 # Mark this head as having been reached
994 # Mark this head as having been reached
984 heads[n] = True
995 heads[n] = True
985 elif ancestors is None:
996 elif ancestors is None:
986 # Otherwise, we're trying to discover the heads.
997 # Otherwise, we're trying to discover the heads.
987 # Assume this is a head because if it isn't, the next step
998 # Assume this is a head because if it isn't, the next step
988 # will eventually remove it.
999 # will eventually remove it.
989 heads[n] = True
1000 heads[n] = True
990 # But, obviously its parents aren't.
1001 # But, obviously its parents aren't.
991 for p in self.parents(n):
1002 for p in self.parents(n):
992 heads.pop(p, None)
1003 heads.pop(p, None)
993 heads = [head for head, flag in heads.iteritems() if flag]
1004 heads = [head for head, flag in heads.iteritems() if flag]
994 roots = list(roots)
1005 roots = list(roots)
995 assert orderedout
1006 assert orderedout
996 assert roots
1007 assert roots
997 assert heads
1008 assert heads
998 return (orderedout, roots, heads)
1009 return (orderedout, roots, heads)
999
1010
1000 def headrevs(self):
1011 def headrevs(self):
1001 try:
1012 try:
1002 return self.index.headrevs()
1013 return self.index.headrevs()
1003 except AttributeError:
1014 except AttributeError:
1004 return self._headrevs()
1015 return self._headrevs()
1005
1016
1006 def computephases(self, roots):
1017 def computephases(self, roots):
1007 return self.index.computephasesmapsets(roots)
1018 return self.index.computephasesmapsets(roots)
1008
1019
1009 def _headrevs(self):
1020 def _headrevs(self):
1010 count = len(self)
1021 count = len(self)
1011 if not count:
1022 if not count:
1012 return [nullrev]
1023 return [nullrev]
1013 # we won't iter over filtered rev so nobody is a head at start
1024 # we won't iter over filtered rev so nobody is a head at start
1014 ishead = [0] * (count + 1)
1025 ishead = [0] * (count + 1)
1015 index = self.index
1026 index = self.index
1016 for r in self:
1027 for r in self:
1017 ishead[r] = 1 # I may be an head
1028 ishead[r] = 1 # I may be an head
1018 e = index[r]
1029 e = index[r]
1019 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1030 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1020 return [r for r, val in enumerate(ishead) if val]
1031 return [r for r, val in enumerate(ishead) if val]
1021
1032
1022 def heads(self, start=None, stop=None):
1033 def heads(self, start=None, stop=None):
1023 """return the list of all nodes that have no children
1034 """return the list of all nodes that have no children
1024
1035
1025 if start is specified, only heads that are descendants of
1036 if start is specified, only heads that are descendants of
1026 start will be returned
1037 start will be returned
1027 if stop is specified, it will consider all the revs from stop
1038 if stop is specified, it will consider all the revs from stop
1028 as if they had no children
1039 as if they had no children
1029 """
1040 """
1030 if start is None and stop is None:
1041 if start is None and stop is None:
1031 if not len(self):
1042 if not len(self):
1032 return [nullid]
1043 return [nullid]
1033 return [self.node(r) for r in self.headrevs()]
1044 return [self.node(r) for r in self.headrevs()]
1034
1045
1035 if start is None:
1046 if start is None:
1036 start = nullid
1047 start = nullid
1037 if stop is None:
1048 if stop is None:
1038 stop = []
1049 stop = []
1039 stoprevs = set([self.rev(n) for n in stop])
1050 stoprevs = set([self.rev(n) for n in stop])
1040 startrev = self.rev(start)
1051 startrev = self.rev(start)
1041 reachable = {startrev}
1052 reachable = {startrev}
1042 heads = {startrev}
1053 heads = {startrev}
1043
1054
1044 parentrevs = self.parentrevs
1055 parentrevs = self.parentrevs
1045 for r in self.revs(start=startrev + 1):
1056 for r in self.revs(start=startrev + 1):
1046 for p in parentrevs(r):
1057 for p in parentrevs(r):
1047 if p in reachable:
1058 if p in reachable:
1048 if r not in stoprevs:
1059 if r not in stoprevs:
1049 reachable.add(r)
1060 reachable.add(r)
1050 heads.add(r)
1061 heads.add(r)
1051 if p in heads and p not in stoprevs:
1062 if p in heads and p not in stoprevs:
1052 heads.remove(p)
1063 heads.remove(p)
1053
1064
1054 return [self.node(r) for r in heads]
1065 return [self.node(r) for r in heads]
1055
1066
1056 def children(self, node):
1067 def children(self, node):
1057 """find the children of a given node"""
1068 """find the children of a given node"""
1058 c = []
1069 c = []
1059 p = self.rev(node)
1070 p = self.rev(node)
1060 for r in self.revs(start=p + 1):
1071 for r in self.revs(start=p + 1):
1061 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1072 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1062 if prevs:
1073 if prevs:
1063 for pr in prevs:
1074 for pr in prevs:
1064 if pr == p:
1075 if pr == p:
1065 c.append(self.node(r))
1076 c.append(self.node(r))
1066 elif p == nullrev:
1077 elif p == nullrev:
1067 c.append(self.node(r))
1078 c.append(self.node(r))
1068 return c
1079 return c
1069
1080
1070 def descendant(self, start, end):
1081 def descendant(self, start, end):
1071 if start == nullrev:
1082 if start == nullrev:
1072 return True
1083 return True
1073 for i in self.descendants([start]):
1084 for i in self.descendants([start]):
1074 if i == end:
1085 if i == end:
1075 return True
1086 return True
1076 elif i > end:
1087 elif i > end:
1077 break
1088 break
1078 return False
1089 return False
1079
1090
1080 def commonancestorsheads(self, a, b):
1091 def commonancestorsheads(self, a, b):
1081 """calculate all the heads of the common ancestors of nodes a and b"""
1092 """calculate all the heads of the common ancestors of nodes a and b"""
1082 a, b = self.rev(a), self.rev(b)
1093 a, b = self.rev(a), self.rev(b)
1083 try:
1094 try:
1084 ancs = self.index.commonancestorsheads(a, b)
1095 ancs = self.index.commonancestorsheads(a, b)
1085 except (AttributeError, OverflowError): # C implementation failed
1096 except (AttributeError, OverflowError): # C implementation failed
1086 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1097 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1087 return pycompat.maplist(self.node, ancs)
1098 return pycompat.maplist(self.node, ancs)
1088
1099
1089 def isancestor(self, a, b):
1100 def isancestor(self, a, b):
1090 """return True if node a is an ancestor of node b
1101 """return True if node a is an ancestor of node b
1091
1102
1092 The implementation of this is trivial but the use of
1103 The implementation of this is trivial but the use of
1093 commonancestorsheads is not."""
1104 commonancestorsheads is not."""
1094 return a in self.commonancestorsheads(a, b)
1105 return a in self.commonancestorsheads(a, b)
1095
1106
1096 def ancestor(self, a, b):
1107 def ancestor(self, a, b):
1097 """calculate the "best" common ancestor of nodes a and b"""
1108 """calculate the "best" common ancestor of nodes a and b"""
1098
1109
1099 a, b = self.rev(a), self.rev(b)
1110 a, b = self.rev(a), self.rev(b)
1100 try:
1111 try:
1101 ancs = self.index.ancestors(a, b)
1112 ancs = self.index.ancestors(a, b)
1102 except (AttributeError, OverflowError):
1113 except (AttributeError, OverflowError):
1103 ancs = ancestor.ancestors(self.parentrevs, a, b)
1114 ancs = ancestor.ancestors(self.parentrevs, a, b)
1104 if ancs:
1115 if ancs:
1105 # choose a consistent winner when there's a tie
1116 # choose a consistent winner when there's a tie
1106 return min(map(self.node, ancs))
1117 return min(map(self.node, ancs))
1107 return nullid
1118 return nullid
1108
1119
1109 def _match(self, id):
1120 def _match(self, id):
1110 if isinstance(id, int):
1121 if isinstance(id, int):
1111 # rev
1122 # rev
1112 return self.node(id)
1123 return self.node(id)
1113 if len(id) == 20:
1124 if len(id) == 20:
1114 # possibly a binary node
1125 # possibly a binary node
1115 # odds of a binary node being all hex in ASCII are 1 in 10**25
1126 # odds of a binary node being all hex in ASCII are 1 in 10**25
1116 try:
1127 try:
1117 node = id
1128 node = id
1118 self.rev(node) # quick search the index
1129 self.rev(node) # quick search the index
1119 return node
1130 return node
1120 except LookupError:
1131 except LookupError:
1121 pass # may be partial hex id
1132 pass # may be partial hex id
1122 try:
1133 try:
1123 # str(rev)
1134 # str(rev)
1124 rev = int(id)
1135 rev = int(id)
1125 if str(rev) != id:
1136 if str(rev) != id:
1126 raise ValueError
1137 raise ValueError
1127 if rev < 0:
1138 if rev < 0:
1128 rev = len(self) + rev
1139 rev = len(self) + rev
1129 if rev < 0 or rev >= len(self):
1140 if rev < 0 or rev >= len(self):
1130 raise ValueError
1141 raise ValueError
1131 return self.node(rev)
1142 return self.node(rev)
1132 except (ValueError, OverflowError):
1143 except (ValueError, OverflowError):
1133 pass
1144 pass
1134 if len(id) == 40:
1145 if len(id) == 40:
1135 try:
1146 try:
1136 # a full hex nodeid?
1147 # a full hex nodeid?
1137 node = bin(id)
1148 node = bin(id)
1138 self.rev(node)
1149 self.rev(node)
1139 return node
1150 return node
1140 except (TypeError, LookupError):
1151 except (TypeError, LookupError):
1141 pass
1152 pass
1142
1153
1143 def _partialmatch(self, id):
1154 def _partialmatch(self, id):
1144 maybewdir = wdirhex.startswith(id)
1155 maybewdir = wdirhex.startswith(id)
1145 try:
1156 try:
1146 partial = self.index.partialmatch(id)
1157 partial = self.index.partialmatch(id)
1147 if partial and self.hasnode(partial):
1158 if partial and self.hasnode(partial):
1148 if maybewdir:
1159 if maybewdir:
1149 # single 'ff...' match in radix tree, ambiguous with wdir
1160 # single 'ff...' match in radix tree, ambiguous with wdir
1150 raise RevlogError
1161 raise RevlogError
1151 return partial
1162 return partial
1152 if maybewdir:
1163 if maybewdir:
1153 # no 'ff...' match in radix tree, wdir identified
1164 # no 'ff...' match in radix tree, wdir identified
1154 raise error.WdirUnsupported
1165 raise error.WdirUnsupported
1155 return None
1166 return None
1156 except RevlogError:
1167 except RevlogError:
1157 # parsers.c radix tree lookup gave multiple matches
1168 # parsers.c radix tree lookup gave multiple matches
1158 # fast path: for unfiltered changelog, radix tree is accurate
1169 # fast path: for unfiltered changelog, radix tree is accurate
1159 if not getattr(self, 'filteredrevs', None):
1170 if not getattr(self, 'filteredrevs', None):
1160 raise LookupError(id, self.indexfile,
1171 raise LookupError(id, self.indexfile,
1161 _('ambiguous identifier'))
1172 _('ambiguous identifier'))
1162 # fall through to slow path that filters hidden revisions
1173 # fall through to slow path that filters hidden revisions
1163 except (AttributeError, ValueError):
1174 except (AttributeError, ValueError):
1164 # we are pure python, or key was too short to search radix tree
1175 # we are pure python, or key was too short to search radix tree
1165 pass
1176 pass
1166
1177
1167 if id in self._pcache:
1178 if id in self._pcache:
1168 return self._pcache[id]
1179 return self._pcache[id]
1169
1180
1170 if len(id) < 40:
1181 if len(id) < 40:
1171 try:
1182 try:
1172 # hex(node)[:...]
1183 # hex(node)[:...]
1173 l = len(id) // 2 # grab an even number of digits
1184 l = len(id) // 2 # grab an even number of digits
1174 prefix = bin(id[:l * 2])
1185 prefix = bin(id[:l * 2])
1175 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1186 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1176 nl = [n for n in nl if hex(n).startswith(id) and
1187 nl = [n for n in nl if hex(n).startswith(id) and
1177 self.hasnode(n)]
1188 self.hasnode(n)]
1178 if len(nl) > 0:
1189 if len(nl) > 0:
1179 if len(nl) == 1 and not maybewdir:
1190 if len(nl) == 1 and not maybewdir:
1180 self._pcache[id] = nl[0]
1191 self._pcache[id] = nl[0]
1181 return nl[0]
1192 return nl[0]
1182 raise LookupError(id, self.indexfile,
1193 raise LookupError(id, self.indexfile,
1183 _('ambiguous identifier'))
1194 _('ambiguous identifier'))
1184 if maybewdir:
1195 if maybewdir:
1185 raise error.WdirUnsupported
1196 raise error.WdirUnsupported
1186 return None
1197 return None
1187 except (TypeError, binascii.Error):
1198 except (TypeError, binascii.Error):
1188 pass
1199 pass
1189
1200
1190 def lookup(self, id):
1201 def lookup(self, id):
1191 """locate a node based on:
1202 """locate a node based on:
1192 - revision number or str(revision number)
1203 - revision number or str(revision number)
1193 - nodeid or subset of hex nodeid
1204 - nodeid or subset of hex nodeid
1194 """
1205 """
1195 n = self._match(id)
1206 n = self._match(id)
1196 if n is not None:
1207 if n is not None:
1197 return n
1208 return n
1198 n = self._partialmatch(id)
1209 n = self._partialmatch(id)
1199 if n:
1210 if n:
1200 return n
1211 return n
1201
1212
1202 raise LookupError(id, self.indexfile, _('no match found'))
1213 raise LookupError(id, self.indexfile, _('no match found'))
1203
1214
1204 def shortest(self, hexnode, minlength=1):
1215 def shortest(self, hexnode, minlength=1):
1205 """Find the shortest unambiguous prefix that matches hexnode."""
1216 """Find the shortest unambiguous prefix that matches hexnode."""
1206 def isvalid(test):
1217 def isvalid(test):
1207 try:
1218 try:
1208 if self._partialmatch(test) is None:
1219 if self._partialmatch(test) is None:
1209 return False
1220 return False
1210
1221
1211 try:
1222 try:
1212 i = int(test)
1223 i = int(test)
1213 # if we are a pure int, then starting with zero will not be
1224 # if we are a pure int, then starting with zero will not be
1214 # confused as a rev; or, obviously, if the int is larger
1225 # confused as a rev; or, obviously, if the int is larger
1215 # than the value of the tip rev
1226 # than the value of the tip rev
1216 if test[0] == '0' or i > len(self):
1227 if test[0] == '0' or i > len(self):
1217 return True
1228 return True
1218 return False
1229 return False
1219 except ValueError:
1230 except ValueError:
1220 return True
1231 return True
1221 except error.RevlogError:
1232 except error.RevlogError:
1222 return False
1233 return False
1223 except error.WdirUnsupported:
1234 except error.WdirUnsupported:
1224 # single 'ff...' match
1235 # single 'ff...' match
1225 return True
1236 return True
1226
1237
1227 shortest = hexnode
1238 shortest = hexnode
1228 startlength = max(6, minlength)
1239 startlength = max(6, minlength)
1229 length = startlength
1240 length = startlength
1230 while True:
1241 while True:
1231 test = hexnode[:length]
1242 test = hexnode[:length]
1232 if isvalid(test):
1243 if isvalid(test):
1233 shortest = test
1244 shortest = test
1234 if length == minlength or length > startlength:
1245 if length == minlength or length > startlength:
1235 return shortest
1246 return shortest
1236 length -= 1
1247 length -= 1
1237 else:
1248 else:
1238 length += 1
1249 length += 1
1239 if len(shortest) <= length:
1250 if len(shortest) <= length:
1240 return shortest
1251 return shortest
1241
1252
1242 def cmp(self, node, text):
1253 def cmp(self, node, text):
1243 """compare text with a given file revision
1254 """compare text with a given file revision
1244
1255
1245 returns True if text is different than what is stored.
1256 returns True if text is different than what is stored.
1246 """
1257 """
1247 p1, p2 = self.parents(node)
1258 p1, p2 = self.parents(node)
1248 return hash(text, p1, p2) != node
1259 return hash(text, p1, p2) != node
1249
1260
1250 def _cachesegment(self, offset, data):
1261 def _cachesegment(self, offset, data):
1251 """Add a segment to the revlog cache.
1262 """Add a segment to the revlog cache.
1252
1263
1253 Accepts an absolute offset and the data that is at that location.
1264 Accepts an absolute offset and the data that is at that location.
1254 """
1265 """
1255 o, d = self._chunkcache
1266 o, d = self._chunkcache
1256 # try to add to existing cache
1267 # try to add to existing cache
1257 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1268 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1258 self._chunkcache = o, d + data
1269 self._chunkcache = o, d + data
1259 else:
1270 else:
1260 self._chunkcache = offset, data
1271 self._chunkcache = offset, data
1261
1272
1262 def _readsegment(self, offset, length, df=None):
1273 def _readsegment(self, offset, length, df=None):
1263 """Load a segment of raw data from the revlog.
1274 """Load a segment of raw data from the revlog.
1264
1275
1265 Accepts an absolute offset, length to read, and an optional existing
1276 Accepts an absolute offset, length to read, and an optional existing
1266 file handle to read from.
1277 file handle to read from.
1267
1278
1268 If an existing file handle is passed, it will be seeked and the
1279 If an existing file handle is passed, it will be seeked and the
1269 original seek position will NOT be restored.
1280 original seek position will NOT be restored.
1270
1281
1271 Returns a str or buffer of raw byte data.
1282 Returns a str or buffer of raw byte data.
1272 """
1283 """
1273 if df is not None:
1284 if df is not None:
1274 closehandle = False
1285 closehandle = False
1275 else:
1286 else:
1276 if self._inline:
1287 if self._inline:
1277 df = self.opener(self.indexfile)
1288 df = self.opener(self.indexfile)
1278 else:
1289 else:
1279 df = self.opener(self.datafile)
1290 df = self.opener(self.datafile)
1280 closehandle = True
1291 closehandle = True
1281
1292
1282 # Cache data both forward and backward around the requested
1293 # Cache data both forward and backward around the requested
1283 # data, in a fixed size window. This helps speed up operations
1294 # data, in a fixed size window. This helps speed up operations
1284 # involving reading the revlog backwards.
1295 # involving reading the revlog backwards.
1285 cachesize = self._chunkcachesize
1296 cachesize = self._chunkcachesize
1286 realoffset = offset & ~(cachesize - 1)
1297 realoffset = offset & ~(cachesize - 1)
1287 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1298 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1288 - realoffset)
1299 - realoffset)
1289 df.seek(realoffset)
1300 df.seek(realoffset)
1290 d = df.read(reallength)
1301 d = df.read(reallength)
1291 if closehandle:
1302 if closehandle:
1292 df.close()
1303 df.close()
1293 self._cachesegment(realoffset, d)
1304 self._cachesegment(realoffset, d)
1294 if offset != realoffset or reallength != length:
1305 if offset != realoffset or reallength != length:
1295 return util.buffer(d, offset - realoffset, length)
1306 return util.buffer(d, offset - realoffset, length)
1296 return d
1307 return d
1297
1308
1298 def _getsegment(self, offset, length, df=None):
1309 def _getsegment(self, offset, length, df=None):
1299 """Obtain a segment of raw data from the revlog.
1310 """Obtain a segment of raw data from the revlog.
1300
1311
1301 Accepts an absolute offset, length of bytes to obtain, and an
1312 Accepts an absolute offset, length of bytes to obtain, and an
1302 optional file handle to the already-opened revlog. If the file
1313 optional file handle to the already-opened revlog. If the file
1303 handle is used, it's original seek position will not be preserved.
1314 handle is used, it's original seek position will not be preserved.
1304
1315
1305 Requests for data may be returned from a cache.
1316 Requests for data may be returned from a cache.
1306
1317
1307 Returns a str or a buffer instance of raw byte data.
1318 Returns a str or a buffer instance of raw byte data.
1308 """
1319 """
1309 o, d = self._chunkcache
1320 o, d = self._chunkcache
1310 l = len(d)
1321 l = len(d)
1311
1322
1312 # is it in the cache?
1323 # is it in the cache?
1313 cachestart = offset - o
1324 cachestart = offset - o
1314 cacheend = cachestart + length
1325 cacheend = cachestart + length
1315 if cachestart >= 0 and cacheend <= l:
1326 if cachestart >= 0 and cacheend <= l:
1316 if cachestart == 0 and cacheend == l:
1327 if cachestart == 0 and cacheend == l:
1317 return d # avoid a copy
1328 return d # avoid a copy
1318 return util.buffer(d, cachestart, cacheend - cachestart)
1329 return util.buffer(d, cachestart, cacheend - cachestart)
1319
1330
1320 return self._readsegment(offset, length, df=df)
1331 return self._readsegment(offset, length, df=df)
1321
1332
1322 def _getsegmentforrevs(self, startrev, endrev, df=None):
1333 def _getsegmentforrevs(self, startrev, endrev, df=None):
1323 """Obtain a segment of raw data corresponding to a range of revisions.
1334 """Obtain a segment of raw data corresponding to a range of revisions.
1324
1335
1325 Accepts the start and end revisions and an optional already-open
1336 Accepts the start and end revisions and an optional already-open
1326 file handle to be used for reading. If the file handle is read, its
1337 file handle to be used for reading. If the file handle is read, its
1327 seek position will not be preserved.
1338 seek position will not be preserved.
1328
1339
1329 Requests for data may be satisfied by a cache.
1340 Requests for data may be satisfied by a cache.
1330
1341
1331 Returns a 2-tuple of (offset, data) for the requested range of
1342 Returns a 2-tuple of (offset, data) for the requested range of
1332 revisions. Offset is the integer offset from the beginning of the
1343 revisions. Offset is the integer offset from the beginning of the
1333 revlog and data is a str or buffer of the raw byte data.
1344 revlog and data is a str or buffer of the raw byte data.
1334
1345
1335 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1346 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1336 to determine where each revision's data begins and ends.
1347 to determine where each revision's data begins and ends.
1337 """
1348 """
1338 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1349 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1339 # (functions are expensive).
1350 # (functions are expensive).
1340 index = self.index
1351 index = self.index
1341 istart = index[startrev]
1352 istart = index[startrev]
1342 start = int(istart[0] >> 16)
1353 start = int(istart[0] >> 16)
1343 if startrev == endrev:
1354 if startrev == endrev:
1344 end = start + istart[1]
1355 end = start + istart[1]
1345 else:
1356 else:
1346 iend = index[endrev]
1357 iend = index[endrev]
1347 end = int(iend[0] >> 16) + iend[1]
1358 end = int(iend[0] >> 16) + iend[1]
1348
1359
1349 if self._inline:
1360 if self._inline:
1350 start += (startrev + 1) * self._io.size
1361 start += (startrev + 1) * self._io.size
1351 end += (endrev + 1) * self._io.size
1362 end += (endrev + 1) * self._io.size
1352 length = end - start
1363 length = end - start
1353
1364
1354 return start, self._getsegment(start, length, df=df)
1365 return start, self._getsegment(start, length, df=df)
1355
1366
1356 def _chunk(self, rev, df=None):
1367 def _chunk(self, rev, df=None):
1357 """Obtain a single decompressed chunk for a revision.
1368 """Obtain a single decompressed chunk for a revision.
1358
1369
1359 Accepts an integer revision and an optional already-open file handle
1370 Accepts an integer revision and an optional already-open file handle
1360 to be used for reading. If used, the seek position of the file will not
1371 to be used for reading. If used, the seek position of the file will not
1361 be preserved.
1372 be preserved.
1362
1373
1363 Returns a str holding uncompressed data for the requested revision.
1374 Returns a str holding uncompressed data for the requested revision.
1364 """
1375 """
1365 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1376 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1366
1377
1367 def _chunks(self, revs, df=None):
1378 def _chunks(self, revs, df=None):
1368 """Obtain decompressed chunks for the specified revisions.
1379 """Obtain decompressed chunks for the specified revisions.
1369
1380
1370 Accepts an iterable of numeric revisions that are assumed to be in
1381 Accepts an iterable of numeric revisions that are assumed to be in
1371 ascending order. Also accepts an optional already-open file handle
1382 ascending order. Also accepts an optional already-open file handle
1372 to be used for reading. If used, the seek position of the file will
1383 to be used for reading. If used, the seek position of the file will
1373 not be preserved.
1384 not be preserved.
1374
1385
1375 This function is similar to calling ``self._chunk()`` multiple times,
1386 This function is similar to calling ``self._chunk()`` multiple times,
1376 but is faster.
1387 but is faster.
1377
1388
1378 Returns a list with decompressed data for each requested revision.
1389 Returns a list with decompressed data for each requested revision.
1379 """
1390 """
1380 if not revs:
1391 if not revs:
1381 return []
1392 return []
1382 start = self.start
1393 start = self.start
1383 length = self.length
1394 length = self.length
1384 inline = self._inline
1395 inline = self._inline
1385 iosize = self._io.size
1396 iosize = self._io.size
1386 buffer = util.buffer
1397 buffer = util.buffer
1387
1398
1388 l = []
1399 l = []
1389 ladd = l.append
1400 ladd = l.append
1390
1401
1391 if not self._withsparseread:
1402 if not self._withsparseread:
1392 slicedchunks = (revs,)
1403 slicedchunks = (revs,)
1393 else:
1404 else:
1394 slicedchunks = _slicechunk(self, revs)
1405 slicedchunks = _slicechunk(self, revs)
1395
1406
1396 for revschunk in slicedchunks:
1407 for revschunk in slicedchunks:
1397 firstrev = revschunk[0]
1408 firstrev = revschunk[0]
1398 # Skip trailing revisions with empty diff
1409 # Skip trailing revisions with empty diff
1399 for lastrev in revschunk[::-1]:
1410 for lastrev in revschunk[::-1]:
1400 if length(lastrev) != 0:
1411 if length(lastrev) != 0:
1401 break
1412 break
1402
1413
1403 try:
1414 try:
1404 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1415 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1405 except OverflowError:
1416 except OverflowError:
1406 # issue4215 - we can't cache a run of chunks greater than
1417 # issue4215 - we can't cache a run of chunks greater than
1407 # 2G on Windows
1418 # 2G on Windows
1408 return [self._chunk(rev, df=df) for rev in revschunk]
1419 return [self._chunk(rev, df=df) for rev in revschunk]
1409
1420
1410 decomp = self.decompress
1421 decomp = self.decompress
1411 for rev in revschunk:
1422 for rev in revschunk:
1412 chunkstart = start(rev)
1423 chunkstart = start(rev)
1413 if inline:
1424 if inline:
1414 chunkstart += (rev + 1) * iosize
1425 chunkstart += (rev + 1) * iosize
1415 chunklength = length(rev)
1426 chunklength = length(rev)
1416 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1427 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1417
1428
1418 return l
1429 return l
1419
1430
1420 def _chunkclear(self):
1431 def _chunkclear(self):
1421 """Clear the raw chunk cache."""
1432 """Clear the raw chunk cache."""
1422 self._chunkcache = (0, '')
1433 self._chunkcache = (0, '')
1423
1434
1424 def deltaparent(self, rev):
1435 def deltaparent(self, rev):
1425 """return deltaparent of the given revision"""
1436 """return deltaparent of the given revision"""
1426 base = self.index[rev][3]
1437 base = self.index[rev][3]
1427 if base == rev:
1438 if base == rev:
1428 return nullrev
1439 return nullrev
1429 elif self._generaldelta:
1440 elif self._generaldelta:
1430 return base
1441 return base
1431 else:
1442 else:
1432 return rev - 1
1443 return rev - 1
1433
1444
1434 def revdiff(self, rev1, rev2):
1445 def revdiff(self, rev1, rev2):
1435 """return or calculate a delta between two revisions
1446 """return or calculate a delta between two revisions
1436
1447
1437 The delta calculated is in binary form and is intended to be written to
1448 The delta calculated is in binary form and is intended to be written to
1438 revlog data directly. So this function needs raw revision data.
1449 revlog data directly. So this function needs raw revision data.
1439 """
1450 """
1440 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1451 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1441 return bytes(self._chunk(rev2))
1452 return bytes(self._chunk(rev2))
1442
1453
1443 return mdiff.textdiff(self.revision(rev1, raw=True),
1454 return mdiff.textdiff(self.revision(rev1, raw=True),
1444 self.revision(rev2, raw=True))
1455 self.revision(rev2, raw=True))
1445
1456
1446 def revision(self, nodeorrev, _df=None, raw=False):
1457 def revision(self, nodeorrev, _df=None, raw=False):
1447 """return an uncompressed revision of a given node or revision
1458 """return an uncompressed revision of a given node or revision
1448 number.
1459 number.
1449
1460
1450 _df - an existing file handle to read from. (internal-only)
1461 _df - an existing file handle to read from. (internal-only)
1451 raw - an optional argument specifying if the revision data is to be
1462 raw - an optional argument specifying if the revision data is to be
1452 treated as raw data when applying flag transforms. 'raw' should be set
1463 treated as raw data when applying flag transforms. 'raw' should be set
1453 to True when generating changegroups or in debug commands.
1464 to True when generating changegroups or in debug commands.
1454 """
1465 """
1455 if isinstance(nodeorrev, int):
1466 if isinstance(nodeorrev, int):
1456 rev = nodeorrev
1467 rev = nodeorrev
1457 node = self.node(rev)
1468 node = self.node(rev)
1458 else:
1469 else:
1459 node = nodeorrev
1470 node = nodeorrev
1460 rev = None
1471 rev = None
1461
1472
1462 cachedrev = None
1473 cachedrev = None
1463 flags = None
1474 flags = None
1464 rawtext = None
1475 rawtext = None
1465 if node == nullid:
1476 if node == nullid:
1466 return ""
1477 return ""
1467 if self._cache:
1478 if self._cache:
1468 if self._cache[0] == node:
1479 if self._cache[0] == node:
1469 # _cache only stores rawtext
1480 # _cache only stores rawtext
1470 if raw:
1481 if raw:
1471 return self._cache[2]
1482 return self._cache[2]
1472 # duplicated, but good for perf
1483 # duplicated, but good for perf
1473 if rev is None:
1484 if rev is None:
1474 rev = self.rev(node)
1485 rev = self.rev(node)
1475 if flags is None:
1486 if flags is None:
1476 flags = self.flags(rev)
1487 flags = self.flags(rev)
1477 # no extra flags set, no flag processor runs, text = rawtext
1488 # no extra flags set, no flag processor runs, text = rawtext
1478 if flags == REVIDX_DEFAULT_FLAGS:
1489 if flags == REVIDX_DEFAULT_FLAGS:
1479 return self._cache[2]
1490 return self._cache[2]
1480 # rawtext is reusable. need to run flag processor
1491 # rawtext is reusable. need to run flag processor
1481 rawtext = self._cache[2]
1492 rawtext = self._cache[2]
1482
1493
1483 cachedrev = self._cache[1]
1494 cachedrev = self._cache[1]
1484
1495
1485 # look up what we need to read
1496 # look up what we need to read
1486 if rawtext is None:
1497 if rawtext is None:
1487 if rev is None:
1498 if rev is None:
1488 rev = self.rev(node)
1499 rev = self.rev(node)
1489
1500
1490 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1501 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1491 if stopped:
1502 if stopped:
1492 rawtext = self._cache[2]
1503 rawtext = self._cache[2]
1493
1504
1494 # drop cache to save memory
1505 # drop cache to save memory
1495 self._cache = None
1506 self._cache = None
1496
1507
1497 bins = self._chunks(chain, df=_df)
1508 bins = self._chunks(chain, df=_df)
1498 if rawtext is None:
1509 if rawtext is None:
1499 rawtext = bytes(bins[0])
1510 rawtext = bytes(bins[0])
1500 bins = bins[1:]
1511 bins = bins[1:]
1501
1512
1502 rawtext = mdiff.patches(rawtext, bins)
1513 rawtext = mdiff.patches(rawtext, bins)
1503 self._cache = (node, rev, rawtext)
1514 self._cache = (node, rev, rawtext)
1504
1515
1505 if flags is None:
1516 if flags is None:
1506 if rev is None:
1517 if rev is None:
1507 rev = self.rev(node)
1518 rev = self.rev(node)
1508 flags = self.flags(rev)
1519 flags = self.flags(rev)
1509
1520
1510 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1521 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1511 if validatehash:
1522 if validatehash:
1512 self.checkhash(text, node, rev=rev)
1523 self.checkhash(text, node, rev=rev)
1513
1524
1514 return text
1525 return text
1515
1526
1516 def hash(self, text, p1, p2):
1527 def hash(self, text, p1, p2):
1517 """Compute a node hash.
1528 """Compute a node hash.
1518
1529
1519 Available as a function so that subclasses can replace the hash
1530 Available as a function so that subclasses can replace the hash
1520 as needed.
1531 as needed.
1521 """
1532 """
1522 return hash(text, p1, p2)
1533 return hash(text, p1, p2)
1523
1534
1524 def _processflags(self, text, flags, operation, raw=False):
1535 def _processflags(self, text, flags, operation, raw=False):
1525 """Inspect revision data flags and applies transforms defined by
1536 """Inspect revision data flags and applies transforms defined by
1526 registered flag processors.
1537 registered flag processors.
1527
1538
1528 ``text`` - the revision data to process
1539 ``text`` - the revision data to process
1529 ``flags`` - the revision flags
1540 ``flags`` - the revision flags
1530 ``operation`` - the operation being performed (read or write)
1541 ``operation`` - the operation being performed (read or write)
1531 ``raw`` - an optional argument describing if the raw transform should be
1542 ``raw`` - an optional argument describing if the raw transform should be
1532 applied.
1543 applied.
1533
1544
1534 This method processes the flags in the order (or reverse order if
1545 This method processes the flags in the order (or reverse order if
1535 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1546 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1536 flag processors registered for present flags. The order of flags defined
1547 flag processors registered for present flags. The order of flags defined
1537 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1548 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1538
1549
1539 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1550 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1540 processed text and ``validatehash`` is a bool indicating whether the
1551 processed text and ``validatehash`` is a bool indicating whether the
1541 returned text should be checked for hash integrity.
1552 returned text should be checked for hash integrity.
1542
1553
1543 Note: If the ``raw`` argument is set, it has precedence over the
1554 Note: If the ``raw`` argument is set, it has precedence over the
1544 operation and will only update the value of ``validatehash``.
1555 operation and will only update the value of ``validatehash``.
1545 """
1556 """
1546 # fast path: no flag processors will run
1557 # fast path: no flag processors will run
1547 if flags == 0:
1558 if flags == 0:
1548 return text, True
1559 return text, True
1549 if not operation in ('read', 'write'):
1560 if not operation in ('read', 'write'):
1550 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1561 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1551 # Check all flags are known.
1562 # Check all flags are known.
1552 if flags & ~REVIDX_KNOWN_FLAGS:
1563 if flags & ~REVIDX_KNOWN_FLAGS:
1553 raise RevlogError(_("incompatible revision flag '%#x'") %
1564 raise RevlogError(_("incompatible revision flag '%#x'") %
1554 (flags & ~REVIDX_KNOWN_FLAGS))
1565 (flags & ~REVIDX_KNOWN_FLAGS))
1555 validatehash = True
1566 validatehash = True
1556 # Depending on the operation (read or write), the order might be
1567 # Depending on the operation (read or write), the order might be
1557 # reversed due to non-commutative transforms.
1568 # reversed due to non-commutative transforms.
1558 orderedflags = REVIDX_FLAGS_ORDER
1569 orderedflags = REVIDX_FLAGS_ORDER
1559 if operation == 'write':
1570 if operation == 'write':
1560 orderedflags = reversed(orderedflags)
1571 orderedflags = reversed(orderedflags)
1561
1572
1562 for flag in orderedflags:
1573 for flag in orderedflags:
1563 # If a flagprocessor has been registered for a known flag, apply the
1574 # If a flagprocessor has been registered for a known flag, apply the
1564 # related operation transform and update result tuple.
1575 # related operation transform and update result tuple.
1565 if flag & flags:
1576 if flag & flags:
1566 vhash = True
1577 vhash = True
1567
1578
1568 if flag not in _flagprocessors:
1579 if flag not in _flagprocessors:
1569 message = _("missing processor for flag '%#x'") % (flag)
1580 message = _("missing processor for flag '%#x'") % (flag)
1570 raise RevlogError(message)
1581 raise RevlogError(message)
1571
1582
1572 processor = _flagprocessors[flag]
1583 processor = _flagprocessors[flag]
1573 if processor is not None:
1584 if processor is not None:
1574 readtransform, writetransform, rawtransform = processor
1585 readtransform, writetransform, rawtransform = processor
1575
1586
1576 if raw:
1587 if raw:
1577 vhash = rawtransform(self, text)
1588 vhash = rawtransform(self, text)
1578 elif operation == 'read':
1589 elif operation == 'read':
1579 text, vhash = readtransform(self, text)
1590 text, vhash = readtransform(self, text)
1580 else: # write operation
1591 else: # write operation
1581 text, vhash = writetransform(self, text)
1592 text, vhash = writetransform(self, text)
1582 validatehash = validatehash and vhash
1593 validatehash = validatehash and vhash
1583
1594
1584 return text, validatehash
1595 return text, validatehash
1585
1596
1586 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1597 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1587 """Check node hash integrity.
1598 """Check node hash integrity.
1588
1599
1589 Available as a function so that subclasses can extend hash mismatch
1600 Available as a function so that subclasses can extend hash mismatch
1590 behaviors as needed.
1601 behaviors as needed.
1591 """
1602 """
1592 if p1 is None and p2 is None:
1603 if p1 is None and p2 is None:
1593 p1, p2 = self.parents(node)
1604 p1, p2 = self.parents(node)
1594 if node != self.hash(text, p1, p2):
1605 if node != self.hash(text, p1, p2):
1595 revornode = rev
1606 revornode = rev
1596 if revornode is None:
1607 if revornode is None:
1597 revornode = templatefilters.short(hex(node))
1608 revornode = templatefilters.short(hex(node))
1598 raise RevlogError(_("integrity check failed on %s:%s")
1609 raise RevlogError(_("integrity check failed on %s:%s")
1599 % (self.indexfile, pycompat.bytestr(revornode)))
1610 % (self.indexfile, pycompat.bytestr(revornode)))
1600
1611
1601 def checkinlinesize(self, tr, fp=None):
1612 def checkinlinesize(self, tr, fp=None):
1602 """Check if the revlog is too big for inline and convert if so.
1613 """Check if the revlog is too big for inline and convert if so.
1603
1614
1604 This should be called after revisions are added to the revlog. If the
1615 This should be called after revisions are added to the revlog. If the
1605 revlog has grown too large to be an inline revlog, it will convert it
1616 revlog has grown too large to be an inline revlog, it will convert it
1606 to use multiple index and data files.
1617 to use multiple index and data files.
1607 """
1618 """
1608 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1619 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1609 return
1620 return
1610
1621
1611 trinfo = tr.find(self.indexfile)
1622 trinfo = tr.find(self.indexfile)
1612 if trinfo is None:
1623 if trinfo is None:
1613 raise RevlogError(_("%s not found in the transaction")
1624 raise RevlogError(_("%s not found in the transaction")
1614 % self.indexfile)
1625 % self.indexfile)
1615
1626
1616 trindex = trinfo[2]
1627 trindex = trinfo[2]
1617 if trindex is not None:
1628 if trindex is not None:
1618 dataoff = self.start(trindex)
1629 dataoff = self.start(trindex)
1619 else:
1630 else:
1620 # revlog was stripped at start of transaction, use all leftover data
1631 # revlog was stripped at start of transaction, use all leftover data
1621 trindex = len(self) - 1
1632 trindex = len(self) - 1
1622 dataoff = self.end(-2)
1633 dataoff = self.end(-2)
1623
1634
1624 tr.add(self.datafile, dataoff)
1635 tr.add(self.datafile, dataoff)
1625
1636
1626 if fp:
1637 if fp:
1627 fp.flush()
1638 fp.flush()
1628 fp.close()
1639 fp.close()
1629
1640
1630 df = self.opener(self.datafile, 'w')
1641 df = self.opener(self.datafile, 'w')
1631 try:
1642 try:
1632 for r in self:
1643 for r in self:
1633 df.write(self._getsegmentforrevs(r, r)[1])
1644 df.write(self._getsegmentforrevs(r, r)[1])
1634 finally:
1645 finally:
1635 df.close()
1646 df.close()
1636
1647
1637 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1648 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1638 checkambig=self._checkambig)
1649 checkambig=self._checkambig)
1639 self.version &= ~FLAG_INLINE_DATA
1650 self.version &= ~FLAG_INLINE_DATA
1640 self._inline = False
1651 self._inline = False
1641 for i in self:
1652 for i in self:
1642 e = self._io.packentry(self.index[i], self.node, self.version, i)
1653 e = self._io.packentry(self.index[i], self.node, self.version, i)
1643 fp.write(e)
1654 fp.write(e)
1644
1655
1645 # if we don't call close, the temp file will never replace the
1656 # if we don't call close, the temp file will never replace the
1646 # real index
1657 # real index
1647 fp.close()
1658 fp.close()
1648
1659
1649 tr.replace(self.indexfile, trindex * self._io.size)
1660 tr.replace(self.indexfile, trindex * self._io.size)
1650 self._chunkclear()
1661 self._chunkclear()
1651
1662
1652 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1663 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1653 node=None, flags=REVIDX_DEFAULT_FLAGS):
1664 node=None, flags=REVIDX_DEFAULT_FLAGS):
1654 """add a revision to the log
1665 """add a revision to the log
1655
1666
1656 text - the revision data to add
1667 text - the revision data to add
1657 transaction - the transaction object used for rollback
1668 transaction - the transaction object used for rollback
1658 link - the linkrev data to add
1669 link - the linkrev data to add
1659 p1, p2 - the parent nodeids of the revision
1670 p1, p2 - the parent nodeids of the revision
1660 cachedelta - an optional precomputed delta
1671 cachedelta - an optional precomputed delta
1661 node - nodeid of revision; typically node is not specified, and it is
1672 node - nodeid of revision; typically node is not specified, and it is
1662 computed by default as hash(text, p1, p2), however subclasses might
1673 computed by default as hash(text, p1, p2), however subclasses might
1663 use different hashing method (and override checkhash() in such case)
1674 use different hashing method (and override checkhash() in such case)
1664 flags - the known flags to set on the revision
1675 flags - the known flags to set on the revision
1665 """
1676 """
1666 if link == nullrev:
1677 if link == nullrev:
1667 raise RevlogError(_("attempted to add linkrev -1 to %s")
1678 raise RevlogError(_("attempted to add linkrev -1 to %s")
1668 % self.indexfile)
1679 % self.indexfile)
1669
1680
1670 if flags:
1681 if flags:
1671 node = node or self.hash(text, p1, p2)
1682 node = node or self.hash(text, p1, p2)
1672
1683
1673 rawtext, validatehash = self._processflags(text, flags, 'write')
1684 rawtext, validatehash = self._processflags(text, flags, 'write')
1674
1685
1675 # If the flag processor modifies the revision data, ignore any provided
1686 # If the flag processor modifies the revision data, ignore any provided
1676 # cachedelta.
1687 # cachedelta.
1677 if rawtext != text:
1688 if rawtext != text:
1678 cachedelta = None
1689 cachedelta = None
1679
1690
1680 if len(rawtext) > _maxentrysize:
1691 if len(rawtext) > _maxentrysize:
1681 raise RevlogError(
1692 raise RevlogError(
1682 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1693 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1683 % (self.indexfile, len(rawtext)))
1694 % (self.indexfile, len(rawtext)))
1684
1695
1685 node = node or self.hash(rawtext, p1, p2)
1696 node = node or self.hash(rawtext, p1, p2)
1686 if node in self.nodemap:
1697 if node in self.nodemap:
1687 return node
1698 return node
1688
1699
1689 if validatehash:
1700 if validatehash:
1690 self.checkhash(rawtext, node, p1=p1, p2=p2)
1701 self.checkhash(rawtext, node, p1=p1, p2=p2)
1691
1702
1692 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1703 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1693 flags, cachedelta=cachedelta)
1704 flags, cachedelta=cachedelta)
1694
1705
1695 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1706 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1696 cachedelta=None):
1707 cachedelta=None):
1697 """add a raw revision with known flags, node and parents
1708 """add a raw revision with known flags, node and parents
1698 useful when reusing a revision not stored in this revlog (ex: received
1709 useful when reusing a revision not stored in this revlog (ex: received
1699 over wire, or read from an external bundle).
1710 over wire, or read from an external bundle).
1700 """
1711 """
1701 dfh = None
1712 dfh = None
1702 if not self._inline:
1713 if not self._inline:
1703 dfh = self.opener(self.datafile, "a+")
1714 dfh = self.opener(self.datafile, "a+")
1704 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1715 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1705 try:
1716 try:
1706 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1717 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1707 flags, cachedelta, ifh, dfh)
1718 flags, cachedelta, ifh, dfh)
1708 finally:
1719 finally:
1709 if dfh:
1720 if dfh:
1710 dfh.close()
1721 dfh.close()
1711 ifh.close()
1722 ifh.close()
1712
1723
1713 def compress(self, data):
1724 def compress(self, data):
1714 """Generate a possibly-compressed representation of data."""
1725 """Generate a possibly-compressed representation of data."""
1715 if not data:
1726 if not data:
1716 return '', data
1727 return '', data
1717
1728
1718 compressed = self._compressor.compress(data)
1729 compressed = self._compressor.compress(data)
1719
1730
1720 if compressed:
1731 if compressed:
1721 # The revlog compressor added the header in the returned data.
1732 # The revlog compressor added the header in the returned data.
1722 return '', compressed
1733 return '', compressed
1723
1734
1724 if data[0:1] == '\0':
1735 if data[0:1] == '\0':
1725 return '', data
1736 return '', data
1726 return 'u', data
1737 return 'u', data
1727
1738
1728 def decompress(self, data):
1739 def decompress(self, data):
1729 """Decompress a revlog chunk.
1740 """Decompress a revlog chunk.
1730
1741
1731 The chunk is expected to begin with a header identifying the
1742 The chunk is expected to begin with a header identifying the
1732 format type so it can be routed to an appropriate decompressor.
1743 format type so it can be routed to an appropriate decompressor.
1733 """
1744 """
1734 if not data:
1745 if not data:
1735 return data
1746 return data
1736
1747
1737 # Revlogs are read much more frequently than they are written and many
1748 # Revlogs are read much more frequently than they are written and many
1738 # chunks only take microseconds to decompress, so performance is
1749 # chunks only take microseconds to decompress, so performance is
1739 # important here.
1750 # important here.
1740 #
1751 #
1741 # We can make a few assumptions about revlogs:
1752 # We can make a few assumptions about revlogs:
1742 #
1753 #
1743 # 1) the majority of chunks will be compressed (as opposed to inline
1754 # 1) the majority of chunks will be compressed (as opposed to inline
1744 # raw data).
1755 # raw data).
1745 # 2) decompressing *any* data will likely by at least 10x slower than
1756 # 2) decompressing *any* data will likely by at least 10x slower than
1746 # returning raw inline data.
1757 # returning raw inline data.
1747 # 3) we want to prioritize common and officially supported compression
1758 # 3) we want to prioritize common and officially supported compression
1748 # engines
1759 # engines
1749 #
1760 #
1750 # It follows that we want to optimize for "decompress compressed data
1761 # It follows that we want to optimize for "decompress compressed data
1751 # when encoded with common and officially supported compression engines"
1762 # when encoded with common and officially supported compression engines"
1752 # case over "raw data" and "data encoded by less common or non-official
1763 # case over "raw data" and "data encoded by less common or non-official
1753 # compression engines." That is why we have the inline lookup first
1764 # compression engines." That is why we have the inline lookup first
1754 # followed by the compengines lookup.
1765 # followed by the compengines lookup.
1755 #
1766 #
1756 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1767 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1757 # compressed chunks. And this matters for changelog and manifest reads.
1768 # compressed chunks. And this matters for changelog and manifest reads.
1758 t = data[0:1]
1769 t = data[0:1]
1759
1770
1760 if t == 'x':
1771 if t == 'x':
1761 try:
1772 try:
1762 return _zlibdecompress(data)
1773 return _zlibdecompress(data)
1763 except zlib.error as e:
1774 except zlib.error as e:
1764 raise RevlogError(_('revlog decompress error: %s') % str(e))
1775 raise RevlogError(_('revlog decompress error: %s') % str(e))
1765 # '\0' is more common than 'u' so it goes first.
1776 # '\0' is more common than 'u' so it goes first.
1766 elif t == '\0':
1777 elif t == '\0':
1767 return data
1778 return data
1768 elif t == 'u':
1779 elif t == 'u':
1769 return util.buffer(data, 1)
1780 return util.buffer(data, 1)
1770
1781
1771 try:
1782 try:
1772 compressor = self._decompressors[t]
1783 compressor = self._decompressors[t]
1773 except KeyError:
1784 except KeyError:
1774 try:
1785 try:
1775 engine = util.compengines.forrevlogheader(t)
1786 engine = util.compengines.forrevlogheader(t)
1776 compressor = engine.revlogcompressor()
1787 compressor = engine.revlogcompressor()
1777 self._decompressors[t] = compressor
1788 self._decompressors[t] = compressor
1778 except KeyError:
1789 except KeyError:
1779 raise RevlogError(_('unknown compression type %r') % t)
1790 raise RevlogError(_('unknown compression type %r') % t)
1780
1791
1781 return compressor.decompress(data)
1792 return compressor.decompress(data)
1782
1793
1783 def _isgooddelta(self, d, textlen):
1794 def _isgooddelta(self, d, textlen):
1784 """Returns True if the given delta is good. Good means that it is within
1795 """Returns True if the given delta is good. Good means that it is within
1785 the disk span, disk size, and chain length bounds that we know to be
1796 the disk span, disk size, and chain length bounds that we know to be
1786 performant."""
1797 performant."""
1787 if d is None:
1798 if d is None:
1788 return False
1799 return False
1789
1800
1790 # - 'dist' is the distance from the base revision -- bounding it limits
1801 # - 'dist' is the distance from the base revision -- bounding it limits
1791 # the amount of I/O we need to do.
1802 # the amount of I/O we need to do.
1792 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1803 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1793 # to apply -- bounding it limits the amount of CPU we consume.
1804 # to apply -- bounding it limits the amount of CPU we consume.
1794 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1805 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1795
1806
1796 defaultmax = textlen * 4
1807 defaultmax = textlen * 4
1797 maxdist = self._maxdeltachainspan
1808 maxdist = self._maxdeltachainspan
1798 if not maxdist:
1809 if not maxdist:
1799 maxdist = dist # ensure the conditional pass
1810 maxdist = dist # ensure the conditional pass
1800 maxdist = max(maxdist, defaultmax)
1811 maxdist = max(maxdist, defaultmax)
1801 if (dist > maxdist or l > textlen or
1812 if (dist > maxdist or l > textlen or
1802 compresseddeltalen > textlen * 2 or
1813 compresseddeltalen > textlen * 2 or
1803 (self._maxchainlen and chainlen > self._maxchainlen)):
1814 (self._maxchainlen and chainlen > self._maxchainlen)):
1804 return False
1815 return False
1805
1816
1806 return True
1817 return True
1807
1818
1808 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1819 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1809 cachedelta, ifh, dfh, alwayscache=False):
1820 cachedelta, ifh, dfh, alwayscache=False):
1810 """internal function to add revisions to the log
1821 """internal function to add revisions to the log
1811
1822
1812 see addrevision for argument descriptions.
1823 see addrevision for argument descriptions.
1813
1824
1814 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1825 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1815
1826
1816 invariants:
1827 invariants:
1817 - rawtext is optional (can be None); if not set, cachedelta must be set.
1828 - rawtext is optional (can be None); if not set, cachedelta must be set.
1818 if both are set, they must correspond to each other.
1829 if both are set, they must correspond to each other.
1819 """
1830 """
1820 if node == nullid:
1831 if node == nullid:
1821 raise RevlogError(_("%s: attempt to add null revision") %
1832 raise RevlogError(_("%s: attempt to add null revision") %
1822 (self.indexfile))
1833 (self.indexfile))
1823 if node == wdirid:
1834 if node == wdirid:
1824 raise RevlogError(_("%s: attempt to add wdir revision") %
1835 raise RevlogError(_("%s: attempt to add wdir revision") %
1825 (self.indexfile))
1836 (self.indexfile))
1826
1837
1827 btext = [rawtext]
1838 btext = [rawtext]
1828 def buildtext():
1839 def buildtext():
1829 if btext[0] is not None:
1840 if btext[0] is not None:
1830 return btext[0]
1841 return btext[0]
1831 baserev = cachedelta[0]
1842 baserev = cachedelta[0]
1832 delta = cachedelta[1]
1843 delta = cachedelta[1]
1833 # special case deltas which replace entire base; no need to decode
1844 # special case deltas which replace entire base; no need to decode
1834 # base revision. this neatly avoids censored bases, which throw when
1845 # base revision. this neatly avoids censored bases, which throw when
1835 # they're decoded.
1846 # they're decoded.
1836 hlen = struct.calcsize(">lll")
1847 hlen = struct.calcsize(">lll")
1837 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1848 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1838 len(delta) - hlen):
1849 len(delta) - hlen):
1839 btext[0] = delta[hlen:]
1850 btext[0] = delta[hlen:]
1840 else:
1851 else:
1841 if self._inline:
1852 if self._inline:
1842 fh = ifh
1853 fh = ifh
1843 else:
1854 else:
1844 fh = dfh
1855 fh = dfh
1845 basetext = self.revision(baserev, _df=fh, raw=True)
1856 basetext = self.revision(baserev, _df=fh, raw=True)
1846 btext[0] = mdiff.patch(basetext, delta)
1857 btext[0] = mdiff.patch(basetext, delta)
1847
1858
1848 try:
1859 try:
1849 res = self._processflags(btext[0], flags, 'read', raw=True)
1860 res = self._processflags(btext[0], flags, 'read', raw=True)
1850 btext[0], validatehash = res
1861 btext[0], validatehash = res
1851 if validatehash:
1862 if validatehash:
1852 self.checkhash(btext[0], node, p1=p1, p2=p2)
1863 self.checkhash(btext[0], node, p1=p1, p2=p2)
1853 if flags & REVIDX_ISCENSORED:
1864 if flags & REVIDX_ISCENSORED:
1854 raise RevlogError(_('node %s is not censored') % node)
1865 raise RevlogError(_('node %s is not censored') % node)
1855 except CensoredNodeError:
1866 except CensoredNodeError:
1856 # must pass the censored index flag to add censored revisions
1867 # must pass the censored index flag to add censored revisions
1857 if not flags & REVIDX_ISCENSORED:
1868 if not flags & REVIDX_ISCENSORED:
1858 raise
1869 raise
1859 return btext[0]
1870 return btext[0]
1860
1871
1861 def builddelta(rev):
1872 def builddelta(rev):
1862 # can we use the cached delta?
1873 # can we use the cached delta?
1863 if cachedelta and cachedelta[0] == rev:
1874 if cachedelta and cachedelta[0] == rev:
1864 delta = cachedelta[1]
1875 delta = cachedelta[1]
1865 else:
1876 else:
1866 t = buildtext()
1877 t = buildtext()
1867 if self.iscensored(rev):
1878 if self.iscensored(rev):
1868 # deltas based on a censored revision must replace the
1879 # deltas based on a censored revision must replace the
1869 # full content in one patch, so delta works everywhere
1880 # full content in one patch, so delta works everywhere
1870 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1881 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1871 delta = header + t
1882 delta = header + t
1872 else:
1883 else:
1873 if self._inline:
1884 if self._inline:
1874 fh = ifh
1885 fh = ifh
1875 else:
1886 else:
1876 fh = dfh
1887 fh = dfh
1877 ptext = self.revision(rev, _df=fh, raw=True)
1888 ptext = self.revision(rev, _df=fh, raw=True)
1878 delta = mdiff.textdiff(ptext, t)
1889 delta = mdiff.textdiff(ptext, t)
1879 header, data = self.compress(delta)
1890 header, data = self.compress(delta)
1880 deltalen = len(header) + len(data)
1891 deltalen = len(header) + len(data)
1881 chainbase = self.chainbase(rev)
1892 chainbase = self.chainbase(rev)
1882 dist = deltalen + offset - self.start(chainbase)
1893 dist = deltalen + offset - self.start(chainbase)
1883 if self._generaldelta:
1894 if self._generaldelta:
1884 base = rev
1895 base = rev
1885 else:
1896 else:
1886 base = chainbase
1897 base = chainbase
1887 chainlen, compresseddeltalen = self._chaininfo(rev)
1898 chainlen, compresseddeltalen = self._chaininfo(rev)
1888 chainlen += 1
1899 chainlen += 1
1889 compresseddeltalen += deltalen
1900 compresseddeltalen += deltalen
1890 return (dist, deltalen, (header, data), base,
1901 return (dist, deltalen, (header, data), base,
1891 chainbase, chainlen, compresseddeltalen)
1902 chainbase, chainlen, compresseddeltalen)
1892
1903
1893 curr = len(self)
1904 curr = len(self)
1894 prev = curr - 1
1905 prev = curr - 1
1895 offset = self.end(prev)
1906 offset = self.end(prev)
1896 delta = None
1907 delta = None
1897 p1r, p2r = self.rev(p1), self.rev(p2)
1908 p1r, p2r = self.rev(p1), self.rev(p2)
1898
1909
1899 # full versions are inserted when the needed deltas
1910 # full versions are inserted when the needed deltas
1900 # become comparable to the uncompressed text
1911 # become comparable to the uncompressed text
1901 if rawtext is None:
1912 if rawtext is None:
1902 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1913 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1903 cachedelta[1])
1914 cachedelta[1])
1904 else:
1915 else:
1905 textlen = len(rawtext)
1916 textlen = len(rawtext)
1906
1917
1907 # should we try to build a delta?
1918 # should we try to build a delta?
1908 if prev != nullrev and self.storedeltachains:
1919 if prev != nullrev and self.storedeltachains:
1909 tested = set()
1920 tested = set()
1910 # This condition is true most of the time when processing
1921 # This condition is true most of the time when processing
1911 # changegroup data into a generaldelta repo. The only time it
1922 # changegroup data into a generaldelta repo. The only time it
1912 # isn't true is if this is the first revision in a delta chain
1923 # isn't true is if this is the first revision in a delta chain
1913 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1924 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1914 if cachedelta and self._generaldelta and self._lazydeltabase:
1925 if cachedelta and self._generaldelta and self._lazydeltabase:
1915 # Assume what we received from the server is a good choice
1926 # Assume what we received from the server is a good choice
1916 # build delta will reuse the cache
1927 # build delta will reuse the cache
1917 candidatedelta = builddelta(cachedelta[0])
1928 candidatedelta = builddelta(cachedelta[0])
1918 tested.add(cachedelta[0])
1929 tested.add(cachedelta[0])
1919 if self._isgooddelta(candidatedelta, textlen):
1930 if self._isgooddelta(candidatedelta, textlen):
1920 delta = candidatedelta
1931 delta = candidatedelta
1921 if delta is None and self._generaldelta:
1932 if delta is None and self._generaldelta:
1922 # exclude already lazy tested base if any
1933 # exclude already lazy tested base if any
1923 parents = [p for p in (p1r, p2r)
1934 parents = [p for p in (p1r, p2r)
1924 if p != nullrev and p not in tested]
1935 if p != nullrev and p not in tested]
1925 if parents and not self._aggressivemergedeltas:
1936 if parents and not self._aggressivemergedeltas:
1926 # Pick whichever parent is closer to us (to minimize the
1937 # Pick whichever parent is closer to us (to minimize the
1927 # chance of having to build a fulltext).
1938 # chance of having to build a fulltext).
1928 parents = [max(parents)]
1939 parents = [max(parents)]
1929 tested.update(parents)
1940 tested.update(parents)
1930 pdeltas = []
1941 pdeltas = []
1931 for p in parents:
1942 for p in parents:
1932 pd = builddelta(p)
1943 pd = builddelta(p)
1933 if self._isgooddelta(pd, textlen):
1944 if self._isgooddelta(pd, textlen):
1934 pdeltas.append(pd)
1945 pdeltas.append(pd)
1935 if pdeltas:
1946 if pdeltas:
1936 delta = min(pdeltas, key=lambda x: x[1])
1947 delta = min(pdeltas, key=lambda x: x[1])
1937 if delta is None and prev not in tested:
1948 if delta is None and prev not in tested:
1938 # other approach failed try against prev to hopefully save us a
1949 # other approach failed try against prev to hopefully save us a
1939 # fulltext.
1950 # fulltext.
1940 candidatedelta = builddelta(prev)
1951 candidatedelta = builddelta(prev)
1941 if self._isgooddelta(candidatedelta, textlen):
1952 if self._isgooddelta(candidatedelta, textlen):
1942 delta = candidatedelta
1953 delta = candidatedelta
1943 if delta is not None:
1954 if delta is not None:
1944 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1955 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1945 else:
1956 else:
1946 rawtext = buildtext()
1957 rawtext = buildtext()
1947 data = self.compress(rawtext)
1958 data = self.compress(rawtext)
1948 l = len(data[1]) + len(data[0])
1959 l = len(data[1]) + len(data[0])
1949 base = chainbase = curr
1960 base = chainbase = curr
1950
1961
1951 e = (offset_type(offset, flags), l, textlen,
1962 e = (offset_type(offset, flags), l, textlen,
1952 base, link, p1r, p2r, node)
1963 base, link, p1r, p2r, node)
1953 self.index.insert(-1, e)
1964 self.index.insert(-1, e)
1954 self.nodemap[node] = curr
1965 self.nodemap[node] = curr
1955
1966
1956 entry = self._io.packentry(e, self.node, self.version, curr)
1967 entry = self._io.packentry(e, self.node, self.version, curr)
1957 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1968 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1958
1969
1959 if alwayscache and rawtext is None:
1970 if alwayscache and rawtext is None:
1960 rawtext = buildtext()
1971 rawtext = buildtext()
1961
1972
1962 if type(rawtext) == str: # only accept immutable objects
1973 if type(rawtext) == str: # only accept immutable objects
1963 self._cache = (node, curr, rawtext)
1974 self._cache = (node, curr, rawtext)
1964 self._chainbasecache[curr] = chainbase
1975 self._chainbasecache[curr] = chainbase
1965 return node
1976 return node
1966
1977
1967 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1978 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1968 # Files opened in a+ mode have inconsistent behavior on various
1979 # Files opened in a+ mode have inconsistent behavior on various
1969 # platforms. Windows requires that a file positioning call be made
1980 # platforms. Windows requires that a file positioning call be made
1970 # when the file handle transitions between reads and writes. See
1981 # when the file handle transitions between reads and writes. See
1971 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1982 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1972 # platforms, Python or the platform itself can be buggy. Some versions
1983 # platforms, Python or the platform itself can be buggy. Some versions
1973 # of Solaris have been observed to not append at the end of the file
1984 # of Solaris have been observed to not append at the end of the file
1974 # if the file was seeked to before the end. See issue4943 for more.
1985 # if the file was seeked to before the end. See issue4943 for more.
1975 #
1986 #
1976 # We work around this issue by inserting a seek() before writing.
1987 # We work around this issue by inserting a seek() before writing.
1977 # Note: This is likely not necessary on Python 3.
1988 # Note: This is likely not necessary on Python 3.
1978 ifh.seek(0, os.SEEK_END)
1989 ifh.seek(0, os.SEEK_END)
1979 if dfh:
1990 if dfh:
1980 dfh.seek(0, os.SEEK_END)
1991 dfh.seek(0, os.SEEK_END)
1981
1992
1982 curr = len(self) - 1
1993 curr = len(self) - 1
1983 if not self._inline:
1994 if not self._inline:
1984 transaction.add(self.datafile, offset)
1995 transaction.add(self.datafile, offset)
1985 transaction.add(self.indexfile, curr * len(entry))
1996 transaction.add(self.indexfile, curr * len(entry))
1986 if data[0]:
1997 if data[0]:
1987 dfh.write(data[0])
1998 dfh.write(data[0])
1988 dfh.write(data[1])
1999 dfh.write(data[1])
1989 ifh.write(entry)
2000 ifh.write(entry)
1990 else:
2001 else:
1991 offset += curr * self._io.size
2002 offset += curr * self._io.size
1992 transaction.add(self.indexfile, offset, curr)
2003 transaction.add(self.indexfile, offset, curr)
1993 ifh.write(entry)
2004 ifh.write(entry)
1994 ifh.write(data[0])
2005 ifh.write(data[0])
1995 ifh.write(data[1])
2006 ifh.write(data[1])
1996 self.checkinlinesize(transaction, ifh)
2007 self.checkinlinesize(transaction, ifh)
1997
2008
1998 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2009 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1999 """
2010 """
2000 add a delta group
2011 add a delta group
2001
2012
2002 given a set of deltas, add them to the revision log. the
2013 given a set of deltas, add them to the revision log. the
2003 first delta is against its parent, which should be in our
2014 first delta is against its parent, which should be in our
2004 log, the rest are against the previous delta.
2015 log, the rest are against the previous delta.
2005
2016
2006 If ``addrevisioncb`` is defined, it will be called with arguments of
2017 If ``addrevisioncb`` is defined, it will be called with arguments of
2007 this revlog and the node that was added.
2018 this revlog and the node that was added.
2008 """
2019 """
2009
2020
2010 nodes = []
2021 nodes = []
2011
2022
2012 r = len(self)
2023 r = len(self)
2013 end = 0
2024 end = 0
2014 if r:
2025 if r:
2015 end = self.end(r - 1)
2026 end = self.end(r - 1)
2016 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2027 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
2017 isize = r * self._io.size
2028 isize = r * self._io.size
2018 if self._inline:
2029 if self._inline:
2019 transaction.add(self.indexfile, end + isize, r)
2030 transaction.add(self.indexfile, end + isize, r)
2020 dfh = None
2031 dfh = None
2021 else:
2032 else:
2022 transaction.add(self.indexfile, isize, r)
2033 transaction.add(self.indexfile, isize, r)
2023 transaction.add(self.datafile, end)
2034 transaction.add(self.datafile, end)
2024 dfh = self.opener(self.datafile, "a+")
2035 dfh = self.opener(self.datafile, "a+")
2025 def flush():
2036 def flush():
2026 if dfh:
2037 if dfh:
2027 dfh.flush()
2038 dfh.flush()
2028 ifh.flush()
2039 ifh.flush()
2029 try:
2040 try:
2030 # loop through our set of deltas
2041 # loop through our set of deltas
2031 for data in deltas:
2042 for data in deltas:
2032 node, p1, p2, linknode, deltabase, delta, flags = data
2043 node, p1, p2, linknode, deltabase, delta, flags = data
2033 link = linkmapper(linknode)
2044 link = linkmapper(linknode)
2034 flags = flags or REVIDX_DEFAULT_FLAGS
2045 flags = flags or REVIDX_DEFAULT_FLAGS
2035
2046
2036 nodes.append(node)
2047 nodes.append(node)
2037
2048
2038 if node in self.nodemap:
2049 if node in self.nodemap:
2039 # this can happen if two branches make the same change
2050 # this can happen if two branches make the same change
2040 continue
2051 continue
2041
2052
2042 for p in (p1, p2):
2053 for p in (p1, p2):
2043 if p not in self.nodemap:
2054 if p not in self.nodemap:
2044 raise LookupError(p, self.indexfile,
2055 raise LookupError(p, self.indexfile,
2045 _('unknown parent'))
2056 _('unknown parent'))
2046
2057
2047 if deltabase not in self.nodemap:
2058 if deltabase not in self.nodemap:
2048 raise LookupError(deltabase, self.indexfile,
2059 raise LookupError(deltabase, self.indexfile,
2049 _('unknown delta base'))
2060 _('unknown delta base'))
2050
2061
2051 baserev = self.rev(deltabase)
2062 baserev = self.rev(deltabase)
2052
2063
2053 if baserev != nullrev and self.iscensored(baserev):
2064 if baserev != nullrev and self.iscensored(baserev):
2054 # if base is censored, delta must be full replacement in a
2065 # if base is censored, delta must be full replacement in a
2055 # single patch operation
2066 # single patch operation
2056 hlen = struct.calcsize(">lll")
2067 hlen = struct.calcsize(">lll")
2057 oldlen = self.rawsize(baserev)
2068 oldlen = self.rawsize(baserev)
2058 newlen = len(delta) - hlen
2069 newlen = len(delta) - hlen
2059 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2070 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2060 raise error.CensoredBaseError(self.indexfile,
2071 raise error.CensoredBaseError(self.indexfile,
2061 self.node(baserev))
2072 self.node(baserev))
2062
2073
2063 if not flags and self._peek_iscensored(baserev, delta, flush):
2074 if not flags and self._peek_iscensored(baserev, delta, flush):
2064 flags |= REVIDX_ISCENSORED
2075 flags |= REVIDX_ISCENSORED
2065
2076
2066 # We assume consumers of addrevisioncb will want to retrieve
2077 # We assume consumers of addrevisioncb will want to retrieve
2067 # the added revision, which will require a call to
2078 # the added revision, which will require a call to
2068 # revision(). revision() will fast path if there is a cache
2079 # revision(). revision() will fast path if there is a cache
2069 # hit. So, we tell _addrevision() to always cache in this case.
2080 # hit. So, we tell _addrevision() to always cache in this case.
2070 # We're only using addgroup() in the context of changegroup
2081 # We're only using addgroup() in the context of changegroup
2071 # generation so the revision data can always be handled as raw
2082 # generation so the revision data can always be handled as raw
2072 # by the flagprocessor.
2083 # by the flagprocessor.
2073 self._addrevision(node, None, transaction, link,
2084 self._addrevision(node, None, transaction, link,
2074 p1, p2, flags, (baserev, delta),
2085 p1, p2, flags, (baserev, delta),
2075 ifh, dfh,
2086 ifh, dfh,
2076 alwayscache=bool(addrevisioncb))
2087 alwayscache=bool(addrevisioncb))
2077
2088
2078 if addrevisioncb:
2089 if addrevisioncb:
2079 addrevisioncb(self, node)
2090 addrevisioncb(self, node)
2080
2091
2081 if not dfh and not self._inline:
2092 if not dfh and not self._inline:
2082 # addrevision switched from inline to conventional
2093 # addrevision switched from inline to conventional
2083 # reopen the index
2094 # reopen the index
2084 ifh.close()
2095 ifh.close()
2085 dfh = self.opener(self.datafile, "a+")
2096 dfh = self.opener(self.datafile, "a+")
2086 ifh = self.opener(self.indexfile, "a+",
2097 ifh = self.opener(self.indexfile, "a+",
2087 checkambig=self._checkambig)
2098 checkambig=self._checkambig)
2088 finally:
2099 finally:
2089 if dfh:
2100 if dfh:
2090 dfh.close()
2101 dfh.close()
2091 ifh.close()
2102 ifh.close()
2092
2103
2093 return nodes
2104 return nodes
2094
2105
2095 def iscensored(self, rev):
2106 def iscensored(self, rev):
2096 """Check if a file revision is censored."""
2107 """Check if a file revision is censored."""
2097 return False
2108 return False
2098
2109
2099 def _peek_iscensored(self, baserev, delta, flush):
2110 def _peek_iscensored(self, baserev, delta, flush):
2100 """Quickly check if a delta produces a censored revision."""
2111 """Quickly check if a delta produces a censored revision."""
2101 return False
2112 return False
2102
2113
2103 def getstrippoint(self, minlink):
2114 def getstrippoint(self, minlink):
2104 """find the minimum rev that must be stripped to strip the linkrev
2115 """find the minimum rev that must be stripped to strip the linkrev
2105
2116
2106 Returns a tuple containing the minimum rev and a set of all revs that
2117 Returns a tuple containing the minimum rev and a set of all revs that
2107 have linkrevs that will be broken by this strip.
2118 have linkrevs that will be broken by this strip.
2108 """
2119 """
2109 brokenrevs = set()
2120 brokenrevs = set()
2110 strippoint = len(self)
2121 strippoint = len(self)
2111
2122
2112 heads = {}
2123 heads = {}
2113 futurelargelinkrevs = set()
2124 futurelargelinkrevs = set()
2114 for head in self.headrevs():
2125 for head in self.headrevs():
2115 headlinkrev = self.linkrev(head)
2126 headlinkrev = self.linkrev(head)
2116 heads[head] = headlinkrev
2127 heads[head] = headlinkrev
2117 if headlinkrev >= minlink:
2128 if headlinkrev >= minlink:
2118 futurelargelinkrevs.add(headlinkrev)
2129 futurelargelinkrevs.add(headlinkrev)
2119
2130
2120 # This algorithm involves walking down the rev graph, starting at the
2131 # This algorithm involves walking down the rev graph, starting at the
2121 # heads. Since the revs are topologically sorted according to linkrev,
2132 # heads. Since the revs are topologically sorted according to linkrev,
2122 # once all head linkrevs are below the minlink, we know there are
2133 # once all head linkrevs are below the minlink, we know there are
2123 # no more revs that could have a linkrev greater than minlink.
2134 # no more revs that could have a linkrev greater than minlink.
2124 # So we can stop walking.
2135 # So we can stop walking.
2125 while futurelargelinkrevs:
2136 while futurelargelinkrevs:
2126 strippoint -= 1
2137 strippoint -= 1
2127 linkrev = heads.pop(strippoint)
2138 linkrev = heads.pop(strippoint)
2128
2139
2129 if linkrev < minlink:
2140 if linkrev < minlink:
2130 brokenrevs.add(strippoint)
2141 brokenrevs.add(strippoint)
2131 else:
2142 else:
2132 futurelargelinkrevs.remove(linkrev)
2143 futurelargelinkrevs.remove(linkrev)
2133
2144
2134 for p in self.parentrevs(strippoint):
2145 for p in self.parentrevs(strippoint):
2135 if p != nullrev:
2146 if p != nullrev:
2136 plinkrev = self.linkrev(p)
2147 plinkrev = self.linkrev(p)
2137 heads[p] = plinkrev
2148 heads[p] = plinkrev
2138 if plinkrev >= minlink:
2149 if plinkrev >= minlink:
2139 futurelargelinkrevs.add(plinkrev)
2150 futurelargelinkrevs.add(plinkrev)
2140
2151
2141 return strippoint, brokenrevs
2152 return strippoint, brokenrevs
2142
2153
2143 def strip(self, minlink, transaction):
2154 def strip(self, minlink, transaction):
2144 """truncate the revlog on the first revision with a linkrev >= minlink
2155 """truncate the revlog on the first revision with a linkrev >= minlink
2145
2156
2146 This function is called when we're stripping revision minlink and
2157 This function is called when we're stripping revision minlink and
2147 its descendants from the repository.
2158 its descendants from the repository.
2148
2159
2149 We have to remove all revisions with linkrev >= minlink, because
2160 We have to remove all revisions with linkrev >= minlink, because
2150 the equivalent changelog revisions will be renumbered after the
2161 the equivalent changelog revisions will be renumbered after the
2151 strip.
2162 strip.
2152
2163
2153 So we truncate the revlog on the first of these revisions, and
2164 So we truncate the revlog on the first of these revisions, and
2154 trust that the caller has saved the revisions that shouldn't be
2165 trust that the caller has saved the revisions that shouldn't be
2155 removed and that it'll re-add them after this truncation.
2166 removed and that it'll re-add them after this truncation.
2156 """
2167 """
2157 if len(self) == 0:
2168 if len(self) == 0:
2158 return
2169 return
2159
2170
2160 rev, _ = self.getstrippoint(minlink)
2171 rev, _ = self.getstrippoint(minlink)
2161 if rev == len(self):
2172 if rev == len(self):
2162 return
2173 return
2163
2174
2164 # first truncate the files on disk
2175 # first truncate the files on disk
2165 end = self.start(rev)
2176 end = self.start(rev)
2166 if not self._inline:
2177 if not self._inline:
2167 transaction.add(self.datafile, end)
2178 transaction.add(self.datafile, end)
2168 end = rev * self._io.size
2179 end = rev * self._io.size
2169 else:
2180 else:
2170 end += rev * self._io.size
2181 end += rev * self._io.size
2171
2182
2172 transaction.add(self.indexfile, end)
2183 transaction.add(self.indexfile, end)
2173
2184
2174 # then reset internal state in memory to forget those revisions
2185 # then reset internal state in memory to forget those revisions
2175 self._cache = None
2186 self._cache = None
2176 self._chaininfocache = {}
2187 self._chaininfocache = {}
2177 self._chunkclear()
2188 self._chunkclear()
2178 for x in xrange(rev, len(self)):
2189 for x in xrange(rev, len(self)):
2179 del self.nodemap[self.node(x)]
2190 del self.nodemap[self.node(x)]
2180
2191
2181 del self.index[rev:-1]
2192 del self.index[rev:-1]
2182
2193
2183 def checksize(self):
2194 def checksize(self):
2184 expected = 0
2195 expected = 0
2185 if len(self):
2196 if len(self):
2186 expected = max(0, self.end(len(self) - 1))
2197 expected = max(0, self.end(len(self) - 1))
2187
2198
2188 try:
2199 try:
2189 f = self.opener(self.datafile)
2200 f = self.opener(self.datafile)
2190 f.seek(0, 2)
2201 f.seek(0, 2)
2191 actual = f.tell()
2202 actual = f.tell()
2192 f.close()
2203 f.close()
2193 dd = actual - expected
2204 dd = actual - expected
2194 except IOError as inst:
2205 except IOError as inst:
2195 if inst.errno != errno.ENOENT:
2206 if inst.errno != errno.ENOENT:
2196 raise
2207 raise
2197 dd = 0
2208 dd = 0
2198
2209
2199 try:
2210 try:
2200 f = self.opener(self.indexfile)
2211 f = self.opener(self.indexfile)
2201 f.seek(0, 2)
2212 f.seek(0, 2)
2202 actual = f.tell()
2213 actual = f.tell()
2203 f.close()
2214 f.close()
2204 s = self._io.size
2215 s = self._io.size
2205 i = max(0, actual // s)
2216 i = max(0, actual // s)
2206 di = actual - (i * s)
2217 di = actual - (i * s)
2207 if self._inline:
2218 if self._inline:
2208 databytes = 0
2219 databytes = 0
2209 for r in self:
2220 for r in self:
2210 databytes += max(0, self.length(r))
2221 databytes += max(0, self.length(r))
2211 dd = 0
2222 dd = 0
2212 di = actual - len(self) * s - databytes
2223 di = actual - len(self) * s - databytes
2213 except IOError as inst:
2224 except IOError as inst:
2214 if inst.errno != errno.ENOENT:
2225 if inst.errno != errno.ENOENT:
2215 raise
2226 raise
2216 di = 0
2227 di = 0
2217
2228
2218 return (dd, di)
2229 return (dd, di)
2219
2230
2220 def files(self):
2231 def files(self):
2221 res = [self.indexfile]
2232 res = [self.indexfile]
2222 if not self._inline:
2233 if not self._inline:
2223 res.append(self.datafile)
2234 res.append(self.datafile)
2224 return res
2235 return res
2225
2236
2226 DELTAREUSEALWAYS = 'always'
2237 DELTAREUSEALWAYS = 'always'
2227 DELTAREUSESAMEREVS = 'samerevs'
2238 DELTAREUSESAMEREVS = 'samerevs'
2228 DELTAREUSENEVER = 'never'
2239 DELTAREUSENEVER = 'never'
2229
2240
2230 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2241 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2231
2242
2232 def clone(self, tr, destrevlog, addrevisioncb=None,
2243 def clone(self, tr, destrevlog, addrevisioncb=None,
2233 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2244 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2234 """Copy this revlog to another, possibly with format changes.
2245 """Copy this revlog to another, possibly with format changes.
2235
2246
2236 The destination revlog will contain the same revisions and nodes.
2247 The destination revlog will contain the same revisions and nodes.
2237 However, it may not be bit-for-bit identical due to e.g. delta encoding
2248 However, it may not be bit-for-bit identical due to e.g. delta encoding
2238 differences.
2249 differences.
2239
2250
2240 The ``deltareuse`` argument control how deltas from the existing revlog
2251 The ``deltareuse`` argument control how deltas from the existing revlog
2241 are preserved in the destination revlog. The argument can have the
2252 are preserved in the destination revlog. The argument can have the
2242 following values:
2253 following values:
2243
2254
2244 DELTAREUSEALWAYS
2255 DELTAREUSEALWAYS
2245 Deltas will always be reused (if possible), even if the destination
2256 Deltas will always be reused (if possible), even if the destination
2246 revlog would not select the same revisions for the delta. This is the
2257 revlog would not select the same revisions for the delta. This is the
2247 fastest mode of operation.
2258 fastest mode of operation.
2248 DELTAREUSESAMEREVS
2259 DELTAREUSESAMEREVS
2249 Deltas will be reused if the destination revlog would pick the same
2260 Deltas will be reused if the destination revlog would pick the same
2250 revisions for the delta. This mode strikes a balance between speed
2261 revisions for the delta. This mode strikes a balance between speed
2251 and optimization.
2262 and optimization.
2252 DELTAREUSENEVER
2263 DELTAREUSENEVER
2253 Deltas will never be reused. This is the slowest mode of execution.
2264 Deltas will never be reused. This is the slowest mode of execution.
2254 This mode can be used to recompute deltas (e.g. if the diff/delta
2265 This mode can be used to recompute deltas (e.g. if the diff/delta
2255 algorithm changes).
2266 algorithm changes).
2256
2267
2257 Delta computation can be slow, so the choice of delta reuse policy can
2268 Delta computation can be slow, so the choice of delta reuse policy can
2258 significantly affect run time.
2269 significantly affect run time.
2259
2270
2260 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2271 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2261 two extremes. Deltas will be reused if they are appropriate. But if the
2272 two extremes. Deltas will be reused if they are appropriate. But if the
2262 delta could choose a better revision, it will do so. This means if you
2273 delta could choose a better revision, it will do so. This means if you
2263 are converting a non-generaldelta revlog to a generaldelta revlog,
2274 are converting a non-generaldelta revlog to a generaldelta revlog,
2264 deltas will be recomputed if the delta's parent isn't a parent of the
2275 deltas will be recomputed if the delta's parent isn't a parent of the
2265 revision.
2276 revision.
2266
2277
2267 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2278 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2268 controls whether to compute deltas against both parents for merges.
2279 controls whether to compute deltas against both parents for merges.
2269 By default, the current default is used.
2280 By default, the current default is used.
2270 """
2281 """
2271 if deltareuse not in self.DELTAREUSEALL:
2282 if deltareuse not in self.DELTAREUSEALL:
2272 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2283 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2273
2284
2274 if len(destrevlog):
2285 if len(destrevlog):
2275 raise ValueError(_('destination revlog is not empty'))
2286 raise ValueError(_('destination revlog is not empty'))
2276
2287
2277 if getattr(self, 'filteredrevs', None):
2288 if getattr(self, 'filteredrevs', None):
2278 raise ValueError(_('source revlog has filtered revisions'))
2289 raise ValueError(_('source revlog has filtered revisions'))
2279 if getattr(destrevlog, 'filteredrevs', None):
2290 if getattr(destrevlog, 'filteredrevs', None):
2280 raise ValueError(_('destination revlog has filtered revisions'))
2291 raise ValueError(_('destination revlog has filtered revisions'))
2281
2292
2282 # lazydeltabase controls whether to reuse a cached delta, if possible.
2293 # lazydeltabase controls whether to reuse a cached delta, if possible.
2283 oldlazydeltabase = destrevlog._lazydeltabase
2294 oldlazydeltabase = destrevlog._lazydeltabase
2284 oldamd = destrevlog._aggressivemergedeltas
2295 oldamd = destrevlog._aggressivemergedeltas
2285
2296
2286 try:
2297 try:
2287 if deltareuse == self.DELTAREUSEALWAYS:
2298 if deltareuse == self.DELTAREUSEALWAYS:
2288 destrevlog._lazydeltabase = True
2299 destrevlog._lazydeltabase = True
2289 elif deltareuse == self.DELTAREUSESAMEREVS:
2300 elif deltareuse == self.DELTAREUSESAMEREVS:
2290 destrevlog._lazydeltabase = False
2301 destrevlog._lazydeltabase = False
2291
2302
2292 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2303 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2293
2304
2294 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2305 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2295 self.DELTAREUSESAMEREVS)
2306 self.DELTAREUSESAMEREVS)
2296
2307
2297 index = self.index
2308 index = self.index
2298 for rev in self:
2309 for rev in self:
2299 entry = index[rev]
2310 entry = index[rev]
2300
2311
2301 # Some classes override linkrev to take filtered revs into
2312 # Some classes override linkrev to take filtered revs into
2302 # account. Use raw entry from index.
2313 # account. Use raw entry from index.
2303 flags = entry[0] & 0xffff
2314 flags = entry[0] & 0xffff
2304 linkrev = entry[4]
2315 linkrev = entry[4]
2305 p1 = index[entry[5]][7]
2316 p1 = index[entry[5]][7]
2306 p2 = index[entry[6]][7]
2317 p2 = index[entry[6]][7]
2307 node = entry[7]
2318 node = entry[7]
2308
2319
2309 # (Possibly) reuse the delta from the revlog if allowed and
2320 # (Possibly) reuse the delta from the revlog if allowed and
2310 # the revlog chunk is a delta.
2321 # the revlog chunk is a delta.
2311 cachedelta = None
2322 cachedelta = None
2312 rawtext = None
2323 rawtext = None
2313 if populatecachedelta:
2324 if populatecachedelta:
2314 dp = self.deltaparent(rev)
2325 dp = self.deltaparent(rev)
2315 if dp != nullrev:
2326 if dp != nullrev:
2316 cachedelta = (dp, str(self._chunk(rev)))
2327 cachedelta = (dp, str(self._chunk(rev)))
2317
2328
2318 if not cachedelta:
2329 if not cachedelta:
2319 rawtext = self.revision(rev, raw=True)
2330 rawtext = self.revision(rev, raw=True)
2320
2331
2321 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2332 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2322 checkambig=False)
2333 checkambig=False)
2323 dfh = None
2334 dfh = None
2324 if not destrevlog._inline:
2335 if not destrevlog._inline:
2325 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2336 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2326 try:
2337 try:
2327 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2338 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2328 flags, cachedelta, ifh, dfh)
2339 flags, cachedelta, ifh, dfh)
2329 finally:
2340 finally:
2330 if dfh:
2341 if dfh:
2331 dfh.close()
2342 dfh.close()
2332 ifh.close()
2343 ifh.close()
2333
2344
2334 if addrevisioncb:
2345 if addrevisioncb:
2335 addrevisioncb(self, rev, node)
2346 addrevisioncb(self, rev, node)
2336 finally:
2347 finally:
2337 destrevlog._lazydeltabase = oldlazydeltabase
2348 destrevlog._lazydeltabase = oldlazydeltabase
2338 destrevlog._aggressivemergedeltas = oldamd
2349 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now