##// END OF EJS Templates
revlog: split `rawtext` retrieval out of _revisiondata...
marmoute -
r43060:e91411fc default
parent child Browse files
Show More
@@ -1,2680 +1,2676 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 repository,
68 repository,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .revlogutils import (
72 from .revlogutils import (
73 deltas as deltautil,
73 deltas as deltautil,
74 flagutil,
74 flagutil,
75 )
75 )
76 from .utils import (
76 from .utils import (
77 interfaceutil,
77 interfaceutil,
78 storageutil,
78 storageutil,
79 stringutil,
79 stringutil,
80 )
80 )
81
81
82 # blanked usage of all the name to prevent pyflakes constraints
82 # blanked usage of all the name to prevent pyflakes constraints
83 # We need these name available in the module for extensions.
83 # We need these name available in the module for extensions.
84 REVLOGV0
84 REVLOGV0
85 REVLOGV1
85 REVLOGV1
86 REVLOGV2
86 REVLOGV2
87 FLAG_INLINE_DATA
87 FLAG_INLINE_DATA
88 FLAG_GENERALDELTA
88 FLAG_GENERALDELTA
89 REVLOG_DEFAULT_FLAGS
89 REVLOG_DEFAULT_FLAGS
90 REVLOG_DEFAULT_FORMAT
90 REVLOG_DEFAULT_FORMAT
91 REVLOG_DEFAULT_VERSION
91 REVLOG_DEFAULT_VERSION
92 REVLOGV1_FLAGS
92 REVLOGV1_FLAGS
93 REVLOGV2_FLAGS
93 REVLOGV2_FLAGS
94 REVIDX_ISCENSORED
94 REVIDX_ISCENSORED
95 REVIDX_ELLIPSIS
95 REVIDX_ELLIPSIS
96 REVIDX_EXTSTORED
96 REVIDX_EXTSTORED
97 REVIDX_DEFAULT_FLAGS
97 REVIDX_DEFAULT_FLAGS
98 REVIDX_FLAGS_ORDER
98 REVIDX_FLAGS_ORDER
99 REVIDX_RAWTEXT_CHANGING_FLAGS
99 REVIDX_RAWTEXT_CHANGING_FLAGS
100
100
101 parsers = policy.importmod(r'parsers')
101 parsers = policy.importmod(r'parsers')
102 rustancestor = policy.importrust(r'ancestor')
102 rustancestor = policy.importrust(r'ancestor')
103 rustdagop = policy.importrust(r'dagop')
103 rustdagop = policy.importrust(r'dagop')
104
104
105 # Aliased for performance.
105 # Aliased for performance.
106 _zlibdecompress = zlib.decompress
106 _zlibdecompress = zlib.decompress
107
107
108 # max size of revlog with inline data
108 # max size of revlog with inline data
109 _maxinline = 131072
109 _maxinline = 131072
110 _chunksize = 1048576
110 _chunksize = 1048576
111
111
112 # Flag processors for REVIDX_ELLIPSIS.
112 # Flag processors for REVIDX_ELLIPSIS.
113 def ellipsisreadprocessor(rl, text):
113 def ellipsisreadprocessor(rl, text):
114 return text, False
114 return text, False
115
115
116 def ellipsiswriteprocessor(rl, text):
116 def ellipsiswriteprocessor(rl, text):
117 return text, False
117 return text, False
118
118
119 def ellipsisrawprocessor(rl, text):
119 def ellipsisrawprocessor(rl, text):
120 return False
120 return False
121
121
122 ellipsisprocessor = (
122 ellipsisprocessor = (
123 ellipsisreadprocessor,
123 ellipsisreadprocessor,
124 ellipsiswriteprocessor,
124 ellipsiswriteprocessor,
125 ellipsisrawprocessor,
125 ellipsisrawprocessor,
126 )
126 )
127
127
128 def getoffset(q):
128 def getoffset(q):
129 return int(q >> 16)
129 return int(q >> 16)
130
130
131 def gettype(q):
131 def gettype(q):
132 return int(q & 0xFFFF)
132 return int(q & 0xFFFF)
133
133
134 def offset_type(offset, type):
134 def offset_type(offset, type):
135 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
135 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
136 raise ValueError('unknown revlog index flags')
136 raise ValueError('unknown revlog index flags')
137 return int(int(offset) << 16 | type)
137 return int(int(offset) << 16 | type)
138
138
139 @attr.s(slots=True, frozen=True)
139 @attr.s(slots=True, frozen=True)
140 class _revisioninfo(object):
140 class _revisioninfo(object):
141 """Information about a revision that allows building its fulltext
141 """Information about a revision that allows building its fulltext
142 node: expected hash of the revision
142 node: expected hash of the revision
143 p1, p2: parent revs of the revision
143 p1, p2: parent revs of the revision
144 btext: built text cache consisting of a one-element list
144 btext: built text cache consisting of a one-element list
145 cachedelta: (baserev, uncompressed_delta) or None
145 cachedelta: (baserev, uncompressed_delta) or None
146 flags: flags associated to the revision storage
146 flags: flags associated to the revision storage
147
147
148 One of btext[0] or cachedelta must be set.
148 One of btext[0] or cachedelta must be set.
149 """
149 """
150 node = attr.ib()
150 node = attr.ib()
151 p1 = attr.ib()
151 p1 = attr.ib()
152 p2 = attr.ib()
152 p2 = attr.ib()
153 btext = attr.ib()
153 btext = attr.ib()
154 textlen = attr.ib()
154 textlen = attr.ib()
155 cachedelta = attr.ib()
155 cachedelta = attr.ib()
156 flags = attr.ib()
156 flags = attr.ib()
157
157
158 @interfaceutil.implementer(repository.irevisiondelta)
158 @interfaceutil.implementer(repository.irevisiondelta)
159 @attr.s(slots=True)
159 @attr.s(slots=True)
160 class revlogrevisiondelta(object):
160 class revlogrevisiondelta(object):
161 node = attr.ib()
161 node = attr.ib()
162 p1node = attr.ib()
162 p1node = attr.ib()
163 p2node = attr.ib()
163 p2node = attr.ib()
164 basenode = attr.ib()
164 basenode = attr.ib()
165 flags = attr.ib()
165 flags = attr.ib()
166 baserevisionsize = attr.ib()
166 baserevisionsize = attr.ib()
167 revision = attr.ib()
167 revision = attr.ib()
168 delta = attr.ib()
168 delta = attr.ib()
169 linknode = attr.ib(default=None)
169 linknode = attr.ib(default=None)
170
170
171 @interfaceutil.implementer(repository.iverifyproblem)
171 @interfaceutil.implementer(repository.iverifyproblem)
172 @attr.s(frozen=True)
172 @attr.s(frozen=True)
173 class revlogproblem(object):
173 class revlogproblem(object):
174 warning = attr.ib(default=None)
174 warning = attr.ib(default=None)
175 error = attr.ib(default=None)
175 error = attr.ib(default=None)
176 node = attr.ib(default=None)
176 node = attr.ib(default=None)
177
177
178 # index v0:
178 # index v0:
179 # 4 bytes: offset
179 # 4 bytes: offset
180 # 4 bytes: compressed length
180 # 4 bytes: compressed length
181 # 4 bytes: base rev
181 # 4 bytes: base rev
182 # 4 bytes: link rev
182 # 4 bytes: link rev
183 # 20 bytes: parent 1 nodeid
183 # 20 bytes: parent 1 nodeid
184 # 20 bytes: parent 2 nodeid
184 # 20 bytes: parent 2 nodeid
185 # 20 bytes: nodeid
185 # 20 bytes: nodeid
186 indexformatv0 = struct.Struct(">4l20s20s20s")
186 indexformatv0 = struct.Struct(">4l20s20s20s")
187 indexformatv0_pack = indexformatv0.pack
187 indexformatv0_pack = indexformatv0.pack
188 indexformatv0_unpack = indexformatv0.unpack
188 indexformatv0_unpack = indexformatv0.unpack
189
189
190 class revlogoldindex(list):
190 class revlogoldindex(list):
191 def __getitem__(self, i):
191 def __getitem__(self, i):
192 if i == -1:
192 if i == -1:
193 return (0, 0, 0, -1, -1, -1, -1, nullid)
193 return (0, 0, 0, -1, -1, -1, -1, nullid)
194 return list.__getitem__(self, i)
194 return list.__getitem__(self, i)
195
195
196 class revlogoldio(object):
196 class revlogoldio(object):
197 def __init__(self):
197 def __init__(self):
198 self.size = indexformatv0.size
198 self.size = indexformatv0.size
199
199
200 def parseindex(self, data, inline):
200 def parseindex(self, data, inline):
201 s = self.size
201 s = self.size
202 index = []
202 index = []
203 nodemap = {nullid: nullrev}
203 nodemap = {nullid: nullrev}
204 n = off = 0
204 n = off = 0
205 l = len(data)
205 l = len(data)
206 while off + s <= l:
206 while off + s <= l:
207 cur = data[off:off + s]
207 cur = data[off:off + s]
208 off += s
208 off += s
209 e = indexformatv0_unpack(cur)
209 e = indexformatv0_unpack(cur)
210 # transform to revlogv1 format
210 # transform to revlogv1 format
211 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
211 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
212 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
212 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
213 index.append(e2)
213 index.append(e2)
214 nodemap[e[6]] = n
214 nodemap[e[6]] = n
215 n += 1
215 n += 1
216
216
217 return revlogoldindex(index), nodemap, None
217 return revlogoldindex(index), nodemap, None
218
218
219 def packentry(self, entry, node, version, rev):
219 def packentry(self, entry, node, version, rev):
220 if gettype(entry[0]):
220 if gettype(entry[0]):
221 raise error.RevlogError(_('index entry flags need revlog '
221 raise error.RevlogError(_('index entry flags need revlog '
222 'version 1'))
222 'version 1'))
223 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
223 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
224 node(entry[5]), node(entry[6]), entry[7])
224 node(entry[5]), node(entry[6]), entry[7])
225 return indexformatv0_pack(*e2)
225 return indexformatv0_pack(*e2)
226
226
227 # index ng:
227 # index ng:
228 # 6 bytes: offset
228 # 6 bytes: offset
229 # 2 bytes: flags
229 # 2 bytes: flags
230 # 4 bytes: compressed length
230 # 4 bytes: compressed length
231 # 4 bytes: uncompressed length
231 # 4 bytes: uncompressed length
232 # 4 bytes: base rev
232 # 4 bytes: base rev
233 # 4 bytes: link rev
233 # 4 bytes: link rev
234 # 4 bytes: parent 1 rev
234 # 4 bytes: parent 1 rev
235 # 4 bytes: parent 2 rev
235 # 4 bytes: parent 2 rev
236 # 32 bytes: nodeid
236 # 32 bytes: nodeid
237 indexformatng = struct.Struct(">Qiiiiii20s12x")
237 indexformatng = struct.Struct(">Qiiiiii20s12x")
238 indexformatng_pack = indexformatng.pack
238 indexformatng_pack = indexformatng.pack
239 versionformat = struct.Struct(">I")
239 versionformat = struct.Struct(">I")
240 versionformat_pack = versionformat.pack
240 versionformat_pack = versionformat.pack
241 versionformat_unpack = versionformat.unpack
241 versionformat_unpack = versionformat.unpack
242
242
243 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
243 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 # signed integer)
244 # signed integer)
245 _maxentrysize = 0x7fffffff
245 _maxentrysize = 0x7fffffff
246
246
247 class revlogio(object):
247 class revlogio(object):
248 def __init__(self):
248 def __init__(self):
249 self.size = indexformatng.size
249 self.size = indexformatng.size
250
250
251 def parseindex(self, data, inline):
251 def parseindex(self, data, inline):
252 # call the C implementation to parse the index data
252 # call the C implementation to parse the index data
253 index, cache = parsers.parse_index2(data, inline)
253 index, cache = parsers.parse_index2(data, inline)
254 return index, getattr(index, 'nodemap', None), cache
254 return index, getattr(index, 'nodemap', None), cache
255
255
256 def packentry(self, entry, node, version, rev):
256 def packentry(self, entry, node, version, rev):
257 p = indexformatng_pack(*entry)
257 p = indexformatng_pack(*entry)
258 if rev == 0:
258 if rev == 0:
259 p = versionformat_pack(version) + p[4:]
259 p = versionformat_pack(version) + p[4:]
260 return p
260 return p
261
261
262 class revlog(object):
262 class revlog(object):
263 """
263 """
264 the underlying revision storage object
264 the underlying revision storage object
265
265
266 A revlog consists of two parts, an index and the revision data.
266 A revlog consists of two parts, an index and the revision data.
267
267
268 The index is a file with a fixed record size containing
268 The index is a file with a fixed record size containing
269 information on each revision, including its nodeid (hash), the
269 information on each revision, including its nodeid (hash), the
270 nodeids of its parents, the position and offset of its data within
270 nodeids of its parents, the position and offset of its data within
271 the data file, and the revision it's based on. Finally, each entry
271 the data file, and the revision it's based on. Finally, each entry
272 contains a linkrev entry that can serve as a pointer to external
272 contains a linkrev entry that can serve as a pointer to external
273 data.
273 data.
274
274
275 The revision data itself is a linear collection of data chunks.
275 The revision data itself is a linear collection of data chunks.
276 Each chunk represents a revision and is usually represented as a
276 Each chunk represents a revision and is usually represented as a
277 delta against the previous chunk. To bound lookup time, runs of
277 delta against the previous chunk. To bound lookup time, runs of
278 deltas are limited to about 2 times the length of the original
278 deltas are limited to about 2 times the length of the original
279 version data. This makes retrieval of a version proportional to
279 version data. This makes retrieval of a version proportional to
280 its size, or O(1) relative to the number of revisions.
280 its size, or O(1) relative to the number of revisions.
281
281
282 Both pieces of the revlog are written to in an append-only
282 Both pieces of the revlog are written to in an append-only
283 fashion, which means we never need to rewrite a file to insert or
283 fashion, which means we never need to rewrite a file to insert or
284 remove data, and can use some simple techniques to avoid the need
284 remove data, and can use some simple techniques to avoid the need
285 for locking while reading.
285 for locking while reading.
286
286
287 If checkambig, indexfile is opened with checkambig=True at
287 If checkambig, indexfile is opened with checkambig=True at
288 writing, to avoid file stat ambiguity.
288 writing, to avoid file stat ambiguity.
289
289
290 If mmaplargeindex is True, and an mmapindexthreshold is set, the
290 If mmaplargeindex is True, and an mmapindexthreshold is set, the
291 index will be mmapped rather than read if it is larger than the
291 index will be mmapped rather than read if it is larger than the
292 configured threshold.
292 configured threshold.
293
293
294 If censorable is True, the revlog can have censored revisions.
294 If censorable is True, the revlog can have censored revisions.
295
295
296 If `upperboundcomp` is not None, this is the expected maximal gain from
296 If `upperboundcomp` is not None, this is the expected maximal gain from
297 compression for the data content.
297 compression for the data content.
298 """
298 """
299 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
299 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
300 mmaplargeindex=False, censorable=False,
300 mmaplargeindex=False, censorable=False,
301 upperboundcomp=None):
301 upperboundcomp=None):
302 """
302 """
303 create a revlog object
303 create a revlog object
304
304
305 opener is a function that abstracts the file opening operation
305 opener is a function that abstracts the file opening operation
306 and can be used to implement COW semantics or the like.
306 and can be used to implement COW semantics or the like.
307
307
308 """
308 """
309 self.upperboundcomp = upperboundcomp
309 self.upperboundcomp = upperboundcomp
310 self.indexfile = indexfile
310 self.indexfile = indexfile
311 self.datafile = datafile or (indexfile[:-2] + ".d")
311 self.datafile = datafile or (indexfile[:-2] + ".d")
312 self.opener = opener
312 self.opener = opener
313 # When True, indexfile is opened with checkambig=True at writing, to
313 # When True, indexfile is opened with checkambig=True at writing, to
314 # avoid file stat ambiguity.
314 # avoid file stat ambiguity.
315 self._checkambig = checkambig
315 self._checkambig = checkambig
316 self._mmaplargeindex = mmaplargeindex
316 self._mmaplargeindex = mmaplargeindex
317 self._censorable = censorable
317 self._censorable = censorable
318 # 3-tuple of (node, rev, text) for a raw revision.
318 # 3-tuple of (node, rev, text) for a raw revision.
319 self._revisioncache = None
319 self._revisioncache = None
320 # Maps rev to chain base rev.
320 # Maps rev to chain base rev.
321 self._chainbasecache = util.lrucachedict(100)
321 self._chainbasecache = util.lrucachedict(100)
322 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
322 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
323 self._chunkcache = (0, '')
323 self._chunkcache = (0, '')
324 # How much data to read and cache into the raw revlog data cache.
324 # How much data to read and cache into the raw revlog data cache.
325 self._chunkcachesize = 65536
325 self._chunkcachesize = 65536
326 self._maxchainlen = None
326 self._maxchainlen = None
327 self._deltabothparents = True
327 self._deltabothparents = True
328 self.index = []
328 self.index = []
329 # Mapping of partial identifiers to full nodes.
329 # Mapping of partial identifiers to full nodes.
330 self._pcache = {}
330 self._pcache = {}
331 # Mapping of revision integer to full node.
331 # Mapping of revision integer to full node.
332 self._nodecache = {nullid: nullrev}
332 self._nodecache = {nullid: nullrev}
333 self._nodepos = None
333 self._nodepos = None
334 self._compengine = 'zlib'
334 self._compengine = 'zlib'
335 self._compengineopts = {}
335 self._compengineopts = {}
336 self._maxdeltachainspan = -1
336 self._maxdeltachainspan = -1
337 self._withsparseread = False
337 self._withsparseread = False
338 self._sparserevlog = False
338 self._sparserevlog = False
339 self._srdensitythreshold = 0.50
339 self._srdensitythreshold = 0.50
340 self._srmingapsize = 262144
340 self._srmingapsize = 262144
341
341
342 # Make copy of flag processors so each revlog instance can support
342 # Make copy of flag processors so each revlog instance can support
343 # custom flags.
343 # custom flags.
344 self._flagprocessors = dict(flagutil.flagprocessors)
344 self._flagprocessors = dict(flagutil.flagprocessors)
345
345
346 # 2-tuple of file handles being used for active writing.
346 # 2-tuple of file handles being used for active writing.
347 self._writinghandles = None
347 self._writinghandles = None
348
348
349 self._loadindex()
349 self._loadindex()
350
350
351 def _loadindex(self):
351 def _loadindex(self):
352 mmapindexthreshold = None
352 mmapindexthreshold = None
353 opts = getattr(self.opener, 'options', {}) or {}
353 opts = getattr(self.opener, 'options', {}) or {}
354
354
355 if 'revlogv2' in opts:
355 if 'revlogv2' in opts:
356 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
356 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
357 elif 'revlogv1' in opts:
357 elif 'revlogv1' in opts:
358 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
358 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
359 if 'generaldelta' in opts:
359 if 'generaldelta' in opts:
360 newversionflags |= FLAG_GENERALDELTA
360 newversionflags |= FLAG_GENERALDELTA
361 elif getattr(self.opener, 'options', None) is not None:
361 elif getattr(self.opener, 'options', None) is not None:
362 # If options provided but no 'revlog*' found, the repository
362 # If options provided but no 'revlog*' found, the repository
363 # would have no 'requires' file in it, which means we have to
363 # would have no 'requires' file in it, which means we have to
364 # stick to the old format.
364 # stick to the old format.
365 newversionflags = REVLOGV0
365 newversionflags = REVLOGV0
366 else:
366 else:
367 newversionflags = REVLOG_DEFAULT_VERSION
367 newversionflags = REVLOG_DEFAULT_VERSION
368
368
369 if 'chunkcachesize' in opts:
369 if 'chunkcachesize' in opts:
370 self._chunkcachesize = opts['chunkcachesize']
370 self._chunkcachesize = opts['chunkcachesize']
371 if 'maxchainlen' in opts:
371 if 'maxchainlen' in opts:
372 self._maxchainlen = opts['maxchainlen']
372 self._maxchainlen = opts['maxchainlen']
373 if 'deltabothparents' in opts:
373 if 'deltabothparents' in opts:
374 self._deltabothparents = opts['deltabothparents']
374 self._deltabothparents = opts['deltabothparents']
375 self._lazydelta = bool(opts.get('lazydelta', True))
375 self._lazydelta = bool(opts.get('lazydelta', True))
376 self._lazydeltabase = False
376 self._lazydeltabase = False
377 if self._lazydelta:
377 if self._lazydelta:
378 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
378 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
379 if 'compengine' in opts:
379 if 'compengine' in opts:
380 self._compengine = opts['compengine']
380 self._compengine = opts['compengine']
381 if 'zlib.level' in opts:
381 if 'zlib.level' in opts:
382 self._compengineopts['zlib.level'] = opts['zlib.level']
382 self._compengineopts['zlib.level'] = opts['zlib.level']
383 if 'zstd.level' in opts:
383 if 'zstd.level' in opts:
384 self._compengineopts['zstd.level'] = opts['zstd.level']
384 self._compengineopts['zstd.level'] = opts['zstd.level']
385 if 'maxdeltachainspan' in opts:
385 if 'maxdeltachainspan' in opts:
386 self._maxdeltachainspan = opts['maxdeltachainspan']
386 self._maxdeltachainspan = opts['maxdeltachainspan']
387 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
387 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
388 mmapindexthreshold = opts['mmapindexthreshold']
388 mmapindexthreshold = opts['mmapindexthreshold']
389 self._sparserevlog = bool(opts.get('sparse-revlog', False))
389 self._sparserevlog = bool(opts.get('sparse-revlog', False))
390 withsparseread = bool(opts.get('with-sparse-read', False))
390 withsparseread = bool(opts.get('with-sparse-read', False))
391 # sparse-revlog forces sparse-read
391 # sparse-revlog forces sparse-read
392 self._withsparseread = self._sparserevlog or withsparseread
392 self._withsparseread = self._sparserevlog or withsparseread
393 if 'sparse-read-density-threshold' in opts:
393 if 'sparse-read-density-threshold' in opts:
394 self._srdensitythreshold = opts['sparse-read-density-threshold']
394 self._srdensitythreshold = opts['sparse-read-density-threshold']
395 if 'sparse-read-min-gap-size' in opts:
395 if 'sparse-read-min-gap-size' in opts:
396 self._srmingapsize = opts['sparse-read-min-gap-size']
396 self._srmingapsize = opts['sparse-read-min-gap-size']
397 if opts.get('enableellipsis'):
397 if opts.get('enableellipsis'):
398 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
398 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
399
399
400 # revlog v0 doesn't have flag processors
400 # revlog v0 doesn't have flag processors
401 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
401 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
402 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
402 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
403
403
404 if self._chunkcachesize <= 0:
404 if self._chunkcachesize <= 0:
405 raise error.RevlogError(_('revlog chunk cache size %r is not '
405 raise error.RevlogError(_('revlog chunk cache size %r is not '
406 'greater than 0') % self._chunkcachesize)
406 'greater than 0') % self._chunkcachesize)
407 elif self._chunkcachesize & (self._chunkcachesize - 1):
407 elif self._chunkcachesize & (self._chunkcachesize - 1):
408 raise error.RevlogError(_('revlog chunk cache size %r is not a '
408 raise error.RevlogError(_('revlog chunk cache size %r is not a '
409 'power of 2') % self._chunkcachesize)
409 'power of 2') % self._chunkcachesize)
410
410
411 indexdata = ''
411 indexdata = ''
412 self._initempty = True
412 self._initempty = True
413 try:
413 try:
414 with self._indexfp() as f:
414 with self._indexfp() as f:
415 if (mmapindexthreshold is not None and
415 if (mmapindexthreshold is not None and
416 self.opener.fstat(f).st_size >= mmapindexthreshold):
416 self.opener.fstat(f).st_size >= mmapindexthreshold):
417 # TODO: should .close() to release resources without
417 # TODO: should .close() to release resources without
418 # relying on Python GC
418 # relying on Python GC
419 indexdata = util.buffer(util.mmapread(f))
419 indexdata = util.buffer(util.mmapread(f))
420 else:
420 else:
421 indexdata = f.read()
421 indexdata = f.read()
422 if len(indexdata) > 0:
422 if len(indexdata) > 0:
423 versionflags = versionformat_unpack(indexdata[:4])[0]
423 versionflags = versionformat_unpack(indexdata[:4])[0]
424 self._initempty = False
424 self._initempty = False
425 else:
425 else:
426 versionflags = newversionflags
426 versionflags = newversionflags
427 except IOError as inst:
427 except IOError as inst:
428 if inst.errno != errno.ENOENT:
428 if inst.errno != errno.ENOENT:
429 raise
429 raise
430
430
431 versionflags = newversionflags
431 versionflags = newversionflags
432
432
433 self.version = versionflags
433 self.version = versionflags
434
434
435 flags = versionflags & ~0xFFFF
435 flags = versionflags & ~0xFFFF
436 fmt = versionflags & 0xFFFF
436 fmt = versionflags & 0xFFFF
437
437
438 if fmt == REVLOGV0:
438 if fmt == REVLOGV0:
439 if flags:
439 if flags:
440 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
440 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
441 'revlog %s') %
441 'revlog %s') %
442 (flags >> 16, fmt, self.indexfile))
442 (flags >> 16, fmt, self.indexfile))
443
443
444 self._inline = False
444 self._inline = False
445 self._generaldelta = False
445 self._generaldelta = False
446
446
447 elif fmt == REVLOGV1:
447 elif fmt == REVLOGV1:
448 if flags & ~REVLOGV1_FLAGS:
448 if flags & ~REVLOGV1_FLAGS:
449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
450 'revlog %s') %
450 'revlog %s') %
451 (flags >> 16, fmt, self.indexfile))
451 (flags >> 16, fmt, self.indexfile))
452
452
453 self._inline = versionflags & FLAG_INLINE_DATA
453 self._inline = versionflags & FLAG_INLINE_DATA
454 self._generaldelta = versionflags & FLAG_GENERALDELTA
454 self._generaldelta = versionflags & FLAG_GENERALDELTA
455
455
456 elif fmt == REVLOGV2:
456 elif fmt == REVLOGV2:
457 if flags & ~REVLOGV2_FLAGS:
457 if flags & ~REVLOGV2_FLAGS:
458 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
458 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
459 'revlog %s') %
459 'revlog %s') %
460 (flags >> 16, fmt, self.indexfile))
460 (flags >> 16, fmt, self.indexfile))
461
461
462 self._inline = versionflags & FLAG_INLINE_DATA
462 self._inline = versionflags & FLAG_INLINE_DATA
463 # generaldelta implied by version 2 revlogs.
463 # generaldelta implied by version 2 revlogs.
464 self._generaldelta = True
464 self._generaldelta = True
465
465
466 else:
466 else:
467 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
467 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
468 (fmt, self.indexfile))
468 (fmt, self.indexfile))
469 # sparse-revlog can't be on without general-delta (issue6056)
469 # sparse-revlog can't be on without general-delta (issue6056)
470 if not self._generaldelta:
470 if not self._generaldelta:
471 self._sparserevlog = False
471 self._sparserevlog = False
472
472
473 self._storedeltachains = True
473 self._storedeltachains = True
474
474
475 self._io = revlogio()
475 self._io = revlogio()
476 if self.version == REVLOGV0:
476 if self.version == REVLOGV0:
477 self._io = revlogoldio()
477 self._io = revlogoldio()
478 try:
478 try:
479 d = self._io.parseindex(indexdata, self._inline)
479 d = self._io.parseindex(indexdata, self._inline)
480 except (ValueError, IndexError):
480 except (ValueError, IndexError):
481 raise error.RevlogError(_("index %s is corrupted") %
481 raise error.RevlogError(_("index %s is corrupted") %
482 self.indexfile)
482 self.indexfile)
483 self.index, nodemap, self._chunkcache = d
483 self.index, nodemap, self._chunkcache = d
484 if nodemap is not None:
484 if nodemap is not None:
485 self.nodemap = self._nodecache = nodemap
485 self.nodemap = self._nodecache = nodemap
486 if not self._chunkcache:
486 if not self._chunkcache:
487 self._chunkclear()
487 self._chunkclear()
488 # revnum -> (chain-length, sum-delta-length)
488 # revnum -> (chain-length, sum-delta-length)
489 self._chaininfocache = {}
489 self._chaininfocache = {}
490 # revlog header -> revlog compressor
490 # revlog header -> revlog compressor
491 self._decompressors = {}
491 self._decompressors = {}
492
492
493 @util.propertycache
493 @util.propertycache
494 def _compressor(self):
494 def _compressor(self):
495 engine = util.compengines[self._compengine]
495 engine = util.compengines[self._compengine]
496 return engine.revlogcompressor(self._compengineopts)
496 return engine.revlogcompressor(self._compengineopts)
497
497
498 def _indexfp(self, mode='r'):
498 def _indexfp(self, mode='r'):
499 """file object for the revlog's index file"""
499 """file object for the revlog's index file"""
500 args = {r'mode': mode}
500 args = {r'mode': mode}
501 if mode != 'r':
501 if mode != 'r':
502 args[r'checkambig'] = self._checkambig
502 args[r'checkambig'] = self._checkambig
503 if mode == 'w':
503 if mode == 'w':
504 args[r'atomictemp'] = True
504 args[r'atomictemp'] = True
505 return self.opener(self.indexfile, **args)
505 return self.opener(self.indexfile, **args)
506
506
507 def _datafp(self, mode='r'):
507 def _datafp(self, mode='r'):
508 """file object for the revlog's data file"""
508 """file object for the revlog's data file"""
509 return self.opener(self.datafile, mode=mode)
509 return self.opener(self.datafile, mode=mode)
510
510
511 @contextlib.contextmanager
511 @contextlib.contextmanager
512 def _datareadfp(self, existingfp=None):
512 def _datareadfp(self, existingfp=None):
513 """file object suitable to read data"""
513 """file object suitable to read data"""
514 # Use explicit file handle, if given.
514 # Use explicit file handle, if given.
515 if existingfp is not None:
515 if existingfp is not None:
516 yield existingfp
516 yield existingfp
517
517
518 # Use a file handle being actively used for writes, if available.
518 # Use a file handle being actively used for writes, if available.
519 # There is some danger to doing this because reads will seek the
519 # There is some danger to doing this because reads will seek the
520 # file. However, _writeentry() performs a SEEK_END before all writes,
520 # file. However, _writeentry() performs a SEEK_END before all writes,
521 # so we should be safe.
521 # so we should be safe.
522 elif self._writinghandles:
522 elif self._writinghandles:
523 if self._inline:
523 if self._inline:
524 yield self._writinghandles[0]
524 yield self._writinghandles[0]
525 else:
525 else:
526 yield self._writinghandles[1]
526 yield self._writinghandles[1]
527
527
528 # Otherwise open a new file handle.
528 # Otherwise open a new file handle.
529 else:
529 else:
530 if self._inline:
530 if self._inline:
531 func = self._indexfp
531 func = self._indexfp
532 else:
532 else:
533 func = self._datafp
533 func = self._datafp
534 with func() as fp:
534 with func() as fp:
535 yield fp
535 yield fp
536
536
537 def tip(self):
537 def tip(self):
538 return self.node(len(self.index) - 1)
538 return self.node(len(self.index) - 1)
539 def __contains__(self, rev):
539 def __contains__(self, rev):
540 return 0 <= rev < len(self)
540 return 0 <= rev < len(self)
541 def __len__(self):
541 def __len__(self):
542 return len(self.index)
542 return len(self.index)
543 def __iter__(self):
543 def __iter__(self):
544 return iter(pycompat.xrange(len(self)))
544 return iter(pycompat.xrange(len(self)))
545 def revs(self, start=0, stop=None):
545 def revs(self, start=0, stop=None):
546 """iterate over all rev in this revlog (from start to stop)"""
546 """iterate over all rev in this revlog (from start to stop)"""
547 return storageutil.iterrevs(len(self), start=start, stop=stop)
547 return storageutil.iterrevs(len(self), start=start, stop=stop)
548
548
549 @util.propertycache
549 @util.propertycache
550 def nodemap(self):
550 def nodemap(self):
551 if self.index:
551 if self.index:
552 # populate mapping down to the initial node
552 # populate mapping down to the initial node
553 node0 = self.index[0][7] # get around changelog filtering
553 node0 = self.index[0][7] # get around changelog filtering
554 self.rev(node0)
554 self.rev(node0)
555 return self._nodecache
555 return self._nodecache
556
556
557 def hasnode(self, node):
557 def hasnode(self, node):
558 try:
558 try:
559 self.rev(node)
559 self.rev(node)
560 return True
560 return True
561 except KeyError:
561 except KeyError:
562 return False
562 return False
563
563
564 def candelta(self, baserev, rev):
564 def candelta(self, baserev, rev):
565 """whether two revisions (baserev, rev) can be delta-ed or not"""
565 """whether two revisions (baserev, rev) can be delta-ed or not"""
566 # Disable delta if either rev requires a content-changing flag
566 # Disable delta if either rev requires a content-changing flag
567 # processor (ex. LFS). This is because such flag processor can alter
567 # processor (ex. LFS). This is because such flag processor can alter
568 # the rawtext content that the delta will be based on, and two clients
568 # the rawtext content that the delta will be based on, and two clients
569 # could have a same revlog node with different flags (i.e. different
569 # could have a same revlog node with different flags (i.e. different
570 # rawtext contents) and the delta could be incompatible.
570 # rawtext contents) and the delta could be incompatible.
571 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
571 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
572 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
572 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
573 return False
573 return False
574 return True
574 return True
575
575
576 def clearcaches(self):
576 def clearcaches(self):
577 self._revisioncache = None
577 self._revisioncache = None
578 self._chainbasecache.clear()
578 self._chainbasecache.clear()
579 self._chunkcache = (0, '')
579 self._chunkcache = (0, '')
580 self._pcache = {}
580 self._pcache = {}
581
581
582 try:
582 try:
583 # If we are using the native C version, you are in a fun case
583 # If we are using the native C version, you are in a fun case
584 # where self.index, self.nodemap and self._nodecaches is the same
584 # where self.index, self.nodemap and self._nodecaches is the same
585 # object.
585 # object.
586 self._nodecache.clearcaches()
586 self._nodecache.clearcaches()
587 except AttributeError:
587 except AttributeError:
588 self._nodecache = {nullid: nullrev}
588 self._nodecache = {nullid: nullrev}
589 self._nodepos = None
589 self._nodepos = None
590
590
591 def rev(self, node):
591 def rev(self, node):
592 try:
592 try:
593 return self._nodecache[node]
593 return self._nodecache[node]
594 except TypeError:
594 except TypeError:
595 raise
595 raise
596 except error.RevlogError:
596 except error.RevlogError:
597 # parsers.c radix tree lookup failed
597 # parsers.c radix tree lookup failed
598 if node == wdirid or node in wdirfilenodeids:
598 if node == wdirid or node in wdirfilenodeids:
599 raise error.WdirUnsupported
599 raise error.WdirUnsupported
600 raise error.LookupError(node, self.indexfile, _('no node'))
600 raise error.LookupError(node, self.indexfile, _('no node'))
601 except KeyError:
601 except KeyError:
602 # pure python cache lookup failed
602 # pure python cache lookup failed
603 n = self._nodecache
603 n = self._nodecache
604 i = self.index
604 i = self.index
605 p = self._nodepos
605 p = self._nodepos
606 if p is None:
606 if p is None:
607 p = len(i) - 1
607 p = len(i) - 1
608 else:
608 else:
609 assert p < len(i)
609 assert p < len(i)
610 for r in pycompat.xrange(p, -1, -1):
610 for r in pycompat.xrange(p, -1, -1):
611 v = i[r][7]
611 v = i[r][7]
612 n[v] = r
612 n[v] = r
613 if v == node:
613 if v == node:
614 self._nodepos = r - 1
614 self._nodepos = r - 1
615 return r
615 return r
616 if node == wdirid or node in wdirfilenodeids:
616 if node == wdirid or node in wdirfilenodeids:
617 raise error.WdirUnsupported
617 raise error.WdirUnsupported
618 raise error.LookupError(node, self.indexfile, _('no node'))
618 raise error.LookupError(node, self.indexfile, _('no node'))
619
619
620 # Accessors for index entries.
620 # Accessors for index entries.
621
621
622 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
622 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
623 # are flags.
623 # are flags.
624 def start(self, rev):
624 def start(self, rev):
625 return int(self.index[rev][0] >> 16)
625 return int(self.index[rev][0] >> 16)
626
626
627 def flags(self, rev):
627 def flags(self, rev):
628 return self.index[rev][0] & 0xFFFF
628 return self.index[rev][0] & 0xFFFF
629
629
630 def length(self, rev):
630 def length(self, rev):
631 return self.index[rev][1]
631 return self.index[rev][1]
632
632
633 def rawsize(self, rev):
633 def rawsize(self, rev):
634 """return the length of the uncompressed text for a given revision"""
634 """return the length of the uncompressed text for a given revision"""
635 l = self.index[rev][2]
635 l = self.index[rev][2]
636 if l >= 0:
636 if l >= 0:
637 return l
637 return l
638
638
639 t = self.rawdata(rev)
639 t = self.rawdata(rev)
640 return len(t)
640 return len(t)
641
641
642 def size(self, rev):
642 def size(self, rev):
643 """length of non-raw text (processed by a "read" flag processor)"""
643 """length of non-raw text (processed by a "read" flag processor)"""
644 # fast path: if no "read" flag processor could change the content,
644 # fast path: if no "read" flag processor could change the content,
645 # size is rawsize. note: ELLIPSIS is known to not change the content.
645 # size is rawsize. note: ELLIPSIS is known to not change the content.
646 flags = self.flags(rev)
646 flags = self.flags(rev)
647 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
647 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
648 return self.rawsize(rev)
648 return self.rawsize(rev)
649
649
650 return len(self.revision(rev, raw=False))
650 return len(self.revision(rev, raw=False))
651
651
652 def chainbase(self, rev):
652 def chainbase(self, rev):
653 base = self._chainbasecache.get(rev)
653 base = self._chainbasecache.get(rev)
654 if base is not None:
654 if base is not None:
655 return base
655 return base
656
656
657 index = self.index
657 index = self.index
658 iterrev = rev
658 iterrev = rev
659 base = index[iterrev][3]
659 base = index[iterrev][3]
660 while base != iterrev:
660 while base != iterrev:
661 iterrev = base
661 iterrev = base
662 base = index[iterrev][3]
662 base = index[iterrev][3]
663
663
664 self._chainbasecache[rev] = base
664 self._chainbasecache[rev] = base
665 return base
665 return base
666
666
667 def linkrev(self, rev):
667 def linkrev(self, rev):
668 return self.index[rev][4]
668 return self.index[rev][4]
669
669
670 def parentrevs(self, rev):
670 def parentrevs(self, rev):
671 try:
671 try:
672 entry = self.index[rev]
672 entry = self.index[rev]
673 except IndexError:
673 except IndexError:
674 if rev == wdirrev:
674 if rev == wdirrev:
675 raise error.WdirUnsupported
675 raise error.WdirUnsupported
676 raise
676 raise
677
677
678 return entry[5], entry[6]
678 return entry[5], entry[6]
679
679
680 # fast parentrevs(rev) where rev isn't filtered
680 # fast parentrevs(rev) where rev isn't filtered
681 _uncheckedparentrevs = parentrevs
681 _uncheckedparentrevs = parentrevs
682
682
683 def node(self, rev):
683 def node(self, rev):
684 try:
684 try:
685 return self.index[rev][7]
685 return self.index[rev][7]
686 except IndexError:
686 except IndexError:
687 if rev == wdirrev:
687 if rev == wdirrev:
688 raise error.WdirUnsupported
688 raise error.WdirUnsupported
689 raise
689 raise
690
690
691 # Derived from index values.
691 # Derived from index values.
692
692
693 def end(self, rev):
693 def end(self, rev):
694 return self.start(rev) + self.length(rev)
694 return self.start(rev) + self.length(rev)
695
695
696 def parents(self, node):
696 def parents(self, node):
697 i = self.index
697 i = self.index
698 d = i[self.rev(node)]
698 d = i[self.rev(node)]
699 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
699 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
700
700
701 def chainlen(self, rev):
701 def chainlen(self, rev):
702 return self._chaininfo(rev)[0]
702 return self._chaininfo(rev)[0]
703
703
704 def _chaininfo(self, rev):
704 def _chaininfo(self, rev):
705 chaininfocache = self._chaininfocache
705 chaininfocache = self._chaininfocache
706 if rev in chaininfocache:
706 if rev in chaininfocache:
707 return chaininfocache[rev]
707 return chaininfocache[rev]
708 index = self.index
708 index = self.index
709 generaldelta = self._generaldelta
709 generaldelta = self._generaldelta
710 iterrev = rev
710 iterrev = rev
711 e = index[iterrev]
711 e = index[iterrev]
712 clen = 0
712 clen = 0
713 compresseddeltalen = 0
713 compresseddeltalen = 0
714 while iterrev != e[3]:
714 while iterrev != e[3]:
715 clen += 1
715 clen += 1
716 compresseddeltalen += e[1]
716 compresseddeltalen += e[1]
717 if generaldelta:
717 if generaldelta:
718 iterrev = e[3]
718 iterrev = e[3]
719 else:
719 else:
720 iterrev -= 1
720 iterrev -= 1
721 if iterrev in chaininfocache:
721 if iterrev in chaininfocache:
722 t = chaininfocache[iterrev]
722 t = chaininfocache[iterrev]
723 clen += t[0]
723 clen += t[0]
724 compresseddeltalen += t[1]
724 compresseddeltalen += t[1]
725 break
725 break
726 e = index[iterrev]
726 e = index[iterrev]
727 else:
727 else:
728 # Add text length of base since decompressing that also takes
728 # Add text length of base since decompressing that also takes
729 # work. For cache hits the length is already included.
729 # work. For cache hits the length is already included.
730 compresseddeltalen += e[1]
730 compresseddeltalen += e[1]
731 r = (clen, compresseddeltalen)
731 r = (clen, compresseddeltalen)
732 chaininfocache[rev] = r
732 chaininfocache[rev] = r
733 return r
733 return r
734
734
735 def _deltachain(self, rev, stoprev=None):
735 def _deltachain(self, rev, stoprev=None):
736 """Obtain the delta chain for a revision.
736 """Obtain the delta chain for a revision.
737
737
738 ``stoprev`` specifies a revision to stop at. If not specified, we
738 ``stoprev`` specifies a revision to stop at. If not specified, we
739 stop at the base of the chain.
739 stop at the base of the chain.
740
740
741 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
741 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
742 revs in ascending order and ``stopped`` is a bool indicating whether
742 revs in ascending order and ``stopped`` is a bool indicating whether
743 ``stoprev`` was hit.
743 ``stoprev`` was hit.
744 """
744 """
745 # Try C implementation.
745 # Try C implementation.
746 try:
746 try:
747 return self.index.deltachain(rev, stoprev, self._generaldelta)
747 return self.index.deltachain(rev, stoprev, self._generaldelta)
748 except AttributeError:
748 except AttributeError:
749 pass
749 pass
750
750
751 chain = []
751 chain = []
752
752
753 # Alias to prevent attribute lookup in tight loop.
753 # Alias to prevent attribute lookup in tight loop.
754 index = self.index
754 index = self.index
755 generaldelta = self._generaldelta
755 generaldelta = self._generaldelta
756
756
757 iterrev = rev
757 iterrev = rev
758 e = index[iterrev]
758 e = index[iterrev]
759 while iterrev != e[3] and iterrev != stoprev:
759 while iterrev != e[3] and iterrev != stoprev:
760 chain.append(iterrev)
760 chain.append(iterrev)
761 if generaldelta:
761 if generaldelta:
762 iterrev = e[3]
762 iterrev = e[3]
763 else:
763 else:
764 iterrev -= 1
764 iterrev -= 1
765 e = index[iterrev]
765 e = index[iterrev]
766
766
767 if iterrev == stoprev:
767 if iterrev == stoprev:
768 stopped = True
768 stopped = True
769 else:
769 else:
770 chain.append(iterrev)
770 chain.append(iterrev)
771 stopped = False
771 stopped = False
772
772
773 chain.reverse()
773 chain.reverse()
774 return chain, stopped
774 return chain, stopped
775
775
776 def ancestors(self, revs, stoprev=0, inclusive=False):
776 def ancestors(self, revs, stoprev=0, inclusive=False):
777 """Generate the ancestors of 'revs' in reverse revision order.
777 """Generate the ancestors of 'revs' in reverse revision order.
778 Does not generate revs lower than stoprev.
778 Does not generate revs lower than stoprev.
779
779
780 See the documentation for ancestor.lazyancestors for more details."""
780 See the documentation for ancestor.lazyancestors for more details."""
781
781
782 # first, make sure start revisions aren't filtered
782 # first, make sure start revisions aren't filtered
783 revs = list(revs)
783 revs = list(revs)
784 checkrev = self.node
784 checkrev = self.node
785 for r in revs:
785 for r in revs:
786 checkrev(r)
786 checkrev(r)
787 # and we're sure ancestors aren't filtered as well
787 # and we're sure ancestors aren't filtered as well
788
788
789 if rustancestor is not None:
789 if rustancestor is not None:
790 lazyancestors = rustancestor.LazyAncestors
790 lazyancestors = rustancestor.LazyAncestors
791 arg = self.index
791 arg = self.index
792 elif util.safehasattr(parsers, 'rustlazyancestors'):
792 elif util.safehasattr(parsers, 'rustlazyancestors'):
793 lazyancestors = ancestor.rustlazyancestors
793 lazyancestors = ancestor.rustlazyancestors
794 arg = self.index
794 arg = self.index
795 else:
795 else:
796 lazyancestors = ancestor.lazyancestors
796 lazyancestors = ancestor.lazyancestors
797 arg = self._uncheckedparentrevs
797 arg = self._uncheckedparentrevs
798 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
798 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
799
799
800 def descendants(self, revs):
800 def descendants(self, revs):
801 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
801 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
802
802
803 def findcommonmissing(self, common=None, heads=None):
803 def findcommonmissing(self, common=None, heads=None):
804 """Return a tuple of the ancestors of common and the ancestors of heads
804 """Return a tuple of the ancestors of common and the ancestors of heads
805 that are not ancestors of common. In revset terminology, we return the
805 that are not ancestors of common. In revset terminology, we return the
806 tuple:
806 tuple:
807
807
808 ::common, (::heads) - (::common)
808 ::common, (::heads) - (::common)
809
809
810 The list is sorted by revision number, meaning it is
810 The list is sorted by revision number, meaning it is
811 topologically sorted.
811 topologically sorted.
812
812
813 'heads' and 'common' are both lists of node IDs. If heads is
813 'heads' and 'common' are both lists of node IDs. If heads is
814 not supplied, uses all of the revlog's heads. If common is not
814 not supplied, uses all of the revlog's heads. If common is not
815 supplied, uses nullid."""
815 supplied, uses nullid."""
816 if common is None:
816 if common is None:
817 common = [nullid]
817 common = [nullid]
818 if heads is None:
818 if heads is None:
819 heads = self.heads()
819 heads = self.heads()
820
820
821 common = [self.rev(n) for n in common]
821 common = [self.rev(n) for n in common]
822 heads = [self.rev(n) for n in heads]
822 heads = [self.rev(n) for n in heads]
823
823
824 # we want the ancestors, but inclusive
824 # we want the ancestors, but inclusive
825 class lazyset(object):
825 class lazyset(object):
826 def __init__(self, lazyvalues):
826 def __init__(self, lazyvalues):
827 self.addedvalues = set()
827 self.addedvalues = set()
828 self.lazyvalues = lazyvalues
828 self.lazyvalues = lazyvalues
829
829
830 def __contains__(self, value):
830 def __contains__(self, value):
831 return value in self.addedvalues or value in self.lazyvalues
831 return value in self.addedvalues or value in self.lazyvalues
832
832
833 def __iter__(self):
833 def __iter__(self):
834 added = self.addedvalues
834 added = self.addedvalues
835 for r in added:
835 for r in added:
836 yield r
836 yield r
837 for r in self.lazyvalues:
837 for r in self.lazyvalues:
838 if not r in added:
838 if not r in added:
839 yield r
839 yield r
840
840
841 def add(self, value):
841 def add(self, value):
842 self.addedvalues.add(value)
842 self.addedvalues.add(value)
843
843
844 def update(self, values):
844 def update(self, values):
845 self.addedvalues.update(values)
845 self.addedvalues.update(values)
846
846
847 has = lazyset(self.ancestors(common))
847 has = lazyset(self.ancestors(common))
848 has.add(nullrev)
848 has.add(nullrev)
849 has.update(common)
849 has.update(common)
850
850
851 # take all ancestors from heads that aren't in has
851 # take all ancestors from heads that aren't in has
852 missing = set()
852 missing = set()
853 visit = collections.deque(r for r in heads if r not in has)
853 visit = collections.deque(r for r in heads if r not in has)
854 while visit:
854 while visit:
855 r = visit.popleft()
855 r = visit.popleft()
856 if r in missing:
856 if r in missing:
857 continue
857 continue
858 else:
858 else:
859 missing.add(r)
859 missing.add(r)
860 for p in self.parentrevs(r):
860 for p in self.parentrevs(r):
861 if p not in has:
861 if p not in has:
862 visit.append(p)
862 visit.append(p)
863 missing = list(missing)
863 missing = list(missing)
864 missing.sort()
864 missing.sort()
865 return has, [self.node(miss) for miss in missing]
865 return has, [self.node(miss) for miss in missing]
866
866
867 def incrementalmissingrevs(self, common=None):
867 def incrementalmissingrevs(self, common=None):
868 """Return an object that can be used to incrementally compute the
868 """Return an object that can be used to incrementally compute the
869 revision numbers of the ancestors of arbitrary sets that are not
869 revision numbers of the ancestors of arbitrary sets that are not
870 ancestors of common. This is an ancestor.incrementalmissingancestors
870 ancestors of common. This is an ancestor.incrementalmissingancestors
871 object.
871 object.
872
872
873 'common' is a list of revision numbers. If common is not supplied, uses
873 'common' is a list of revision numbers. If common is not supplied, uses
874 nullrev.
874 nullrev.
875 """
875 """
876 if common is None:
876 if common is None:
877 common = [nullrev]
877 common = [nullrev]
878
878
879 if rustancestor is not None:
879 if rustancestor is not None:
880 return rustancestor.MissingAncestors(self.index, common)
880 return rustancestor.MissingAncestors(self.index, common)
881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
882
882
883 def findmissingrevs(self, common=None, heads=None):
883 def findmissingrevs(self, common=None, heads=None):
884 """Return the revision numbers of the ancestors of heads that
884 """Return the revision numbers of the ancestors of heads that
885 are not ancestors of common.
885 are not ancestors of common.
886
886
887 More specifically, return a list of revision numbers corresponding to
887 More specifically, return a list of revision numbers corresponding to
888 nodes N such that every N satisfies the following constraints:
888 nodes N such that every N satisfies the following constraints:
889
889
890 1. N is an ancestor of some node in 'heads'
890 1. N is an ancestor of some node in 'heads'
891 2. N is not an ancestor of any node in 'common'
891 2. N is not an ancestor of any node in 'common'
892
892
893 The list is sorted by revision number, meaning it is
893 The list is sorted by revision number, meaning it is
894 topologically sorted.
894 topologically sorted.
895
895
896 'heads' and 'common' are both lists of revision numbers. If heads is
896 'heads' and 'common' are both lists of revision numbers. If heads is
897 not supplied, uses all of the revlog's heads. If common is not
897 not supplied, uses all of the revlog's heads. If common is not
898 supplied, uses nullid."""
898 supplied, uses nullid."""
899 if common is None:
899 if common is None:
900 common = [nullrev]
900 common = [nullrev]
901 if heads is None:
901 if heads is None:
902 heads = self.headrevs()
902 heads = self.headrevs()
903
903
904 inc = self.incrementalmissingrevs(common=common)
904 inc = self.incrementalmissingrevs(common=common)
905 return inc.missingancestors(heads)
905 return inc.missingancestors(heads)
906
906
907 def findmissing(self, common=None, heads=None):
907 def findmissing(self, common=None, heads=None):
908 """Return the ancestors of heads that are not ancestors of common.
908 """Return the ancestors of heads that are not ancestors of common.
909
909
910 More specifically, return a list of nodes N such that every N
910 More specifically, return a list of nodes N such that every N
911 satisfies the following constraints:
911 satisfies the following constraints:
912
912
913 1. N is an ancestor of some node in 'heads'
913 1. N is an ancestor of some node in 'heads'
914 2. N is not an ancestor of any node in 'common'
914 2. N is not an ancestor of any node in 'common'
915
915
916 The list is sorted by revision number, meaning it is
916 The list is sorted by revision number, meaning it is
917 topologically sorted.
917 topologically sorted.
918
918
919 'heads' and 'common' are both lists of node IDs. If heads is
919 'heads' and 'common' are both lists of node IDs. If heads is
920 not supplied, uses all of the revlog's heads. If common is not
920 not supplied, uses all of the revlog's heads. If common is not
921 supplied, uses nullid."""
921 supplied, uses nullid."""
922 if common is None:
922 if common is None:
923 common = [nullid]
923 common = [nullid]
924 if heads is None:
924 if heads is None:
925 heads = self.heads()
925 heads = self.heads()
926
926
927 common = [self.rev(n) for n in common]
927 common = [self.rev(n) for n in common]
928 heads = [self.rev(n) for n in heads]
928 heads = [self.rev(n) for n in heads]
929
929
930 inc = self.incrementalmissingrevs(common=common)
930 inc = self.incrementalmissingrevs(common=common)
931 return [self.node(r) for r in inc.missingancestors(heads)]
931 return [self.node(r) for r in inc.missingancestors(heads)]
932
932
933 def nodesbetween(self, roots=None, heads=None):
933 def nodesbetween(self, roots=None, heads=None):
934 """Return a topological path from 'roots' to 'heads'.
934 """Return a topological path from 'roots' to 'heads'.
935
935
936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
937 topologically sorted list of all nodes N that satisfy both of
937 topologically sorted list of all nodes N that satisfy both of
938 these constraints:
938 these constraints:
939
939
940 1. N is a descendant of some node in 'roots'
940 1. N is a descendant of some node in 'roots'
941 2. N is an ancestor of some node in 'heads'
941 2. N is an ancestor of some node in 'heads'
942
942
943 Every node is considered to be both a descendant and an ancestor
943 Every node is considered to be both a descendant and an ancestor
944 of itself, so every reachable node in 'roots' and 'heads' will be
944 of itself, so every reachable node in 'roots' and 'heads' will be
945 included in 'nodes'.
945 included in 'nodes'.
946
946
947 'outroots' is the list of reachable nodes in 'roots', i.e., the
947 'outroots' is the list of reachable nodes in 'roots', i.e., the
948 subset of 'roots' that is returned in 'nodes'. Likewise,
948 subset of 'roots' that is returned in 'nodes'. Likewise,
949 'outheads' is the subset of 'heads' that is also in 'nodes'.
949 'outheads' is the subset of 'heads' that is also in 'nodes'.
950
950
951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
952 unspecified, uses nullid as the only root. If 'heads' is
952 unspecified, uses nullid as the only root. If 'heads' is
953 unspecified, uses list of all of the revlog's heads."""
953 unspecified, uses list of all of the revlog's heads."""
954 nonodes = ([], [], [])
954 nonodes = ([], [], [])
955 if roots is not None:
955 if roots is not None:
956 roots = list(roots)
956 roots = list(roots)
957 if not roots:
957 if not roots:
958 return nonodes
958 return nonodes
959 lowestrev = min([self.rev(n) for n in roots])
959 lowestrev = min([self.rev(n) for n in roots])
960 else:
960 else:
961 roots = [nullid] # Everybody's a descendant of nullid
961 roots = [nullid] # Everybody's a descendant of nullid
962 lowestrev = nullrev
962 lowestrev = nullrev
963 if (lowestrev == nullrev) and (heads is None):
963 if (lowestrev == nullrev) and (heads is None):
964 # We want _all_ the nodes!
964 # We want _all_ the nodes!
965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
966 if heads is None:
966 if heads is None:
967 # All nodes are ancestors, so the latest ancestor is the last
967 # All nodes are ancestors, so the latest ancestor is the last
968 # node.
968 # node.
969 highestrev = len(self) - 1
969 highestrev = len(self) - 1
970 # Set ancestors to None to signal that every node is an ancestor.
970 # Set ancestors to None to signal that every node is an ancestor.
971 ancestors = None
971 ancestors = None
972 # Set heads to an empty dictionary for later discovery of heads
972 # Set heads to an empty dictionary for later discovery of heads
973 heads = {}
973 heads = {}
974 else:
974 else:
975 heads = list(heads)
975 heads = list(heads)
976 if not heads:
976 if not heads:
977 return nonodes
977 return nonodes
978 ancestors = set()
978 ancestors = set()
979 # Turn heads into a dictionary so we can remove 'fake' heads.
979 # Turn heads into a dictionary so we can remove 'fake' heads.
980 # Also, later we will be using it to filter out the heads we can't
980 # Also, later we will be using it to filter out the heads we can't
981 # find from roots.
981 # find from roots.
982 heads = dict.fromkeys(heads, False)
982 heads = dict.fromkeys(heads, False)
983 # Start at the top and keep marking parents until we're done.
983 # Start at the top and keep marking parents until we're done.
984 nodestotag = set(heads)
984 nodestotag = set(heads)
985 # Remember where the top was so we can use it as a limit later.
985 # Remember where the top was so we can use it as a limit later.
986 highestrev = max([self.rev(n) for n in nodestotag])
986 highestrev = max([self.rev(n) for n in nodestotag])
987 while nodestotag:
987 while nodestotag:
988 # grab a node to tag
988 # grab a node to tag
989 n = nodestotag.pop()
989 n = nodestotag.pop()
990 # Never tag nullid
990 # Never tag nullid
991 if n == nullid:
991 if n == nullid:
992 continue
992 continue
993 # A node's revision number represents its place in a
993 # A node's revision number represents its place in a
994 # topologically sorted list of nodes.
994 # topologically sorted list of nodes.
995 r = self.rev(n)
995 r = self.rev(n)
996 if r >= lowestrev:
996 if r >= lowestrev:
997 if n not in ancestors:
997 if n not in ancestors:
998 # If we are possibly a descendant of one of the roots
998 # If we are possibly a descendant of one of the roots
999 # and we haven't already been marked as an ancestor
999 # and we haven't already been marked as an ancestor
1000 ancestors.add(n) # Mark as ancestor
1000 ancestors.add(n) # Mark as ancestor
1001 # Add non-nullid parents to list of nodes to tag.
1001 # Add non-nullid parents to list of nodes to tag.
1002 nodestotag.update([p for p in self.parents(n) if
1002 nodestotag.update([p for p in self.parents(n) if
1003 p != nullid])
1003 p != nullid])
1004 elif n in heads: # We've seen it before, is it a fake head?
1004 elif n in heads: # We've seen it before, is it a fake head?
1005 # So it is, real heads should not be the ancestors of
1005 # So it is, real heads should not be the ancestors of
1006 # any other heads.
1006 # any other heads.
1007 heads.pop(n)
1007 heads.pop(n)
1008 if not ancestors:
1008 if not ancestors:
1009 return nonodes
1009 return nonodes
1010 # Now that we have our set of ancestors, we want to remove any
1010 # Now that we have our set of ancestors, we want to remove any
1011 # roots that are not ancestors.
1011 # roots that are not ancestors.
1012
1012
1013 # If one of the roots was nullid, everything is included anyway.
1013 # If one of the roots was nullid, everything is included anyway.
1014 if lowestrev > nullrev:
1014 if lowestrev > nullrev:
1015 # But, since we weren't, let's recompute the lowest rev to not
1015 # But, since we weren't, let's recompute the lowest rev to not
1016 # include roots that aren't ancestors.
1016 # include roots that aren't ancestors.
1017
1017
1018 # Filter out roots that aren't ancestors of heads
1018 # Filter out roots that aren't ancestors of heads
1019 roots = [root for root in roots if root in ancestors]
1019 roots = [root for root in roots if root in ancestors]
1020 # Recompute the lowest revision
1020 # Recompute the lowest revision
1021 if roots:
1021 if roots:
1022 lowestrev = min([self.rev(root) for root in roots])
1022 lowestrev = min([self.rev(root) for root in roots])
1023 else:
1023 else:
1024 # No more roots? Return empty list
1024 # No more roots? Return empty list
1025 return nonodes
1025 return nonodes
1026 else:
1026 else:
1027 # We are descending from nullid, and don't need to care about
1027 # We are descending from nullid, and don't need to care about
1028 # any other roots.
1028 # any other roots.
1029 lowestrev = nullrev
1029 lowestrev = nullrev
1030 roots = [nullid]
1030 roots = [nullid]
1031 # Transform our roots list into a set.
1031 # Transform our roots list into a set.
1032 descendants = set(roots)
1032 descendants = set(roots)
1033 # Also, keep the original roots so we can filter out roots that aren't
1033 # Also, keep the original roots so we can filter out roots that aren't
1034 # 'real' roots (i.e. are descended from other roots).
1034 # 'real' roots (i.e. are descended from other roots).
1035 roots = descendants.copy()
1035 roots = descendants.copy()
1036 # Our topologically sorted list of output nodes.
1036 # Our topologically sorted list of output nodes.
1037 orderedout = []
1037 orderedout = []
1038 # Don't start at nullid since we don't want nullid in our output list,
1038 # Don't start at nullid since we don't want nullid in our output list,
1039 # and if nullid shows up in descendants, empty parents will look like
1039 # and if nullid shows up in descendants, empty parents will look like
1040 # they're descendants.
1040 # they're descendants.
1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1042 n = self.node(r)
1042 n = self.node(r)
1043 isdescendant = False
1043 isdescendant = False
1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1045 isdescendant = True
1045 isdescendant = True
1046 elif n in descendants:
1046 elif n in descendants:
1047 # n is already a descendant
1047 # n is already a descendant
1048 isdescendant = True
1048 isdescendant = True
1049 # This check only needs to be done here because all the roots
1049 # This check only needs to be done here because all the roots
1050 # will start being marked is descendants before the loop.
1050 # will start being marked is descendants before the loop.
1051 if n in roots:
1051 if n in roots:
1052 # If n was a root, check if it's a 'real' root.
1052 # If n was a root, check if it's a 'real' root.
1053 p = tuple(self.parents(n))
1053 p = tuple(self.parents(n))
1054 # If any of its parents are descendants, it's not a root.
1054 # If any of its parents are descendants, it's not a root.
1055 if (p[0] in descendants) or (p[1] in descendants):
1055 if (p[0] in descendants) or (p[1] in descendants):
1056 roots.remove(n)
1056 roots.remove(n)
1057 else:
1057 else:
1058 p = tuple(self.parents(n))
1058 p = tuple(self.parents(n))
1059 # A node is a descendant if either of its parents are
1059 # A node is a descendant if either of its parents are
1060 # descendants. (We seeded the dependents list with the roots
1060 # descendants. (We seeded the dependents list with the roots
1061 # up there, remember?)
1061 # up there, remember?)
1062 if (p[0] in descendants) or (p[1] in descendants):
1062 if (p[0] in descendants) or (p[1] in descendants):
1063 descendants.add(n)
1063 descendants.add(n)
1064 isdescendant = True
1064 isdescendant = True
1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1066 # Only include nodes that are both descendants and ancestors.
1066 # Only include nodes that are both descendants and ancestors.
1067 orderedout.append(n)
1067 orderedout.append(n)
1068 if (ancestors is not None) and (n in heads):
1068 if (ancestors is not None) and (n in heads):
1069 # We're trying to figure out which heads are reachable
1069 # We're trying to figure out which heads are reachable
1070 # from roots.
1070 # from roots.
1071 # Mark this head as having been reached
1071 # Mark this head as having been reached
1072 heads[n] = True
1072 heads[n] = True
1073 elif ancestors is None:
1073 elif ancestors is None:
1074 # Otherwise, we're trying to discover the heads.
1074 # Otherwise, we're trying to discover the heads.
1075 # Assume this is a head because if it isn't, the next step
1075 # Assume this is a head because if it isn't, the next step
1076 # will eventually remove it.
1076 # will eventually remove it.
1077 heads[n] = True
1077 heads[n] = True
1078 # But, obviously its parents aren't.
1078 # But, obviously its parents aren't.
1079 for p in self.parents(n):
1079 for p in self.parents(n):
1080 heads.pop(p, None)
1080 heads.pop(p, None)
1081 heads = [head for head, flag in heads.iteritems() if flag]
1081 heads = [head for head, flag in heads.iteritems() if flag]
1082 roots = list(roots)
1082 roots = list(roots)
1083 assert orderedout
1083 assert orderedout
1084 assert roots
1084 assert roots
1085 assert heads
1085 assert heads
1086 return (orderedout, roots, heads)
1086 return (orderedout, roots, heads)
1087
1087
1088 def headrevs(self, revs=None):
1088 def headrevs(self, revs=None):
1089 if revs is None:
1089 if revs is None:
1090 try:
1090 try:
1091 return self.index.headrevs()
1091 return self.index.headrevs()
1092 except AttributeError:
1092 except AttributeError:
1093 return self._headrevs()
1093 return self._headrevs()
1094 if rustdagop is not None:
1094 if rustdagop is not None:
1095 return rustdagop.headrevs(self.index, revs)
1095 return rustdagop.headrevs(self.index, revs)
1096 return dagop.headrevs(revs, self._uncheckedparentrevs)
1096 return dagop.headrevs(revs, self._uncheckedparentrevs)
1097
1097
1098 def computephases(self, roots):
1098 def computephases(self, roots):
1099 return self.index.computephasesmapsets(roots)
1099 return self.index.computephasesmapsets(roots)
1100
1100
1101 def _headrevs(self):
1101 def _headrevs(self):
1102 count = len(self)
1102 count = len(self)
1103 if not count:
1103 if not count:
1104 return [nullrev]
1104 return [nullrev]
1105 # we won't iter over filtered rev so nobody is a head at start
1105 # we won't iter over filtered rev so nobody is a head at start
1106 ishead = [0] * (count + 1)
1106 ishead = [0] * (count + 1)
1107 index = self.index
1107 index = self.index
1108 for r in self:
1108 for r in self:
1109 ishead[r] = 1 # I may be an head
1109 ishead[r] = 1 # I may be an head
1110 e = index[r]
1110 e = index[r]
1111 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1111 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1112 return [r for r, val in enumerate(ishead) if val]
1112 return [r for r, val in enumerate(ishead) if val]
1113
1113
1114 def heads(self, start=None, stop=None):
1114 def heads(self, start=None, stop=None):
1115 """return the list of all nodes that have no children
1115 """return the list of all nodes that have no children
1116
1116
1117 if start is specified, only heads that are descendants of
1117 if start is specified, only heads that are descendants of
1118 start will be returned
1118 start will be returned
1119 if stop is specified, it will consider all the revs from stop
1119 if stop is specified, it will consider all the revs from stop
1120 as if they had no children
1120 as if they had no children
1121 """
1121 """
1122 if start is None and stop is None:
1122 if start is None and stop is None:
1123 if not len(self):
1123 if not len(self):
1124 return [nullid]
1124 return [nullid]
1125 return [self.node(r) for r in self.headrevs()]
1125 return [self.node(r) for r in self.headrevs()]
1126
1126
1127 if start is None:
1127 if start is None:
1128 start = nullrev
1128 start = nullrev
1129 else:
1129 else:
1130 start = self.rev(start)
1130 start = self.rev(start)
1131
1131
1132 stoprevs = set(self.rev(n) for n in stop or [])
1132 stoprevs = set(self.rev(n) for n in stop or [])
1133
1133
1134 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1134 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1135 stoprevs=stoprevs)
1135 stoprevs=stoprevs)
1136
1136
1137 return [self.node(rev) for rev in revs]
1137 return [self.node(rev) for rev in revs]
1138
1138
1139 def children(self, node):
1139 def children(self, node):
1140 """find the children of a given node"""
1140 """find the children of a given node"""
1141 c = []
1141 c = []
1142 p = self.rev(node)
1142 p = self.rev(node)
1143 for r in self.revs(start=p + 1):
1143 for r in self.revs(start=p + 1):
1144 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1144 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1145 if prevs:
1145 if prevs:
1146 for pr in prevs:
1146 for pr in prevs:
1147 if pr == p:
1147 if pr == p:
1148 c.append(self.node(r))
1148 c.append(self.node(r))
1149 elif p == nullrev:
1149 elif p == nullrev:
1150 c.append(self.node(r))
1150 c.append(self.node(r))
1151 return c
1151 return c
1152
1152
1153 def commonancestorsheads(self, a, b):
1153 def commonancestorsheads(self, a, b):
1154 """calculate all the heads of the common ancestors of nodes a and b"""
1154 """calculate all the heads of the common ancestors of nodes a and b"""
1155 a, b = self.rev(a), self.rev(b)
1155 a, b = self.rev(a), self.rev(b)
1156 ancs = self._commonancestorsheads(a, b)
1156 ancs = self._commonancestorsheads(a, b)
1157 return pycompat.maplist(self.node, ancs)
1157 return pycompat.maplist(self.node, ancs)
1158
1158
1159 def _commonancestorsheads(self, *revs):
1159 def _commonancestorsheads(self, *revs):
1160 """calculate all the heads of the common ancestors of revs"""
1160 """calculate all the heads of the common ancestors of revs"""
1161 try:
1161 try:
1162 ancs = self.index.commonancestorsheads(*revs)
1162 ancs = self.index.commonancestorsheads(*revs)
1163 except (AttributeError, OverflowError): # C implementation failed
1163 except (AttributeError, OverflowError): # C implementation failed
1164 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1164 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1165 return ancs
1165 return ancs
1166
1166
1167 def isancestor(self, a, b):
1167 def isancestor(self, a, b):
1168 """return True if node a is an ancestor of node b
1168 """return True if node a is an ancestor of node b
1169
1169
1170 A revision is considered an ancestor of itself."""
1170 A revision is considered an ancestor of itself."""
1171 a, b = self.rev(a), self.rev(b)
1171 a, b = self.rev(a), self.rev(b)
1172 return self.isancestorrev(a, b)
1172 return self.isancestorrev(a, b)
1173
1173
1174 def isancestorrev(self, a, b):
1174 def isancestorrev(self, a, b):
1175 """return True if revision a is an ancestor of revision b
1175 """return True if revision a is an ancestor of revision b
1176
1176
1177 A revision is considered an ancestor of itself.
1177 A revision is considered an ancestor of itself.
1178
1178
1179 The implementation of this is trivial but the use of
1179 The implementation of this is trivial but the use of
1180 reachableroots is not."""
1180 reachableroots is not."""
1181 if a == nullrev:
1181 if a == nullrev:
1182 return True
1182 return True
1183 elif a == b:
1183 elif a == b:
1184 return True
1184 return True
1185 elif a > b:
1185 elif a > b:
1186 return False
1186 return False
1187 return bool(self.reachableroots(a, [b], [a], includepath=False))
1187 return bool(self.reachableroots(a, [b], [a], includepath=False))
1188
1188
1189 def reachableroots(self, minroot, heads, roots, includepath=False):
1189 def reachableroots(self, minroot, heads, roots, includepath=False):
1190 """return (heads(::<roots> and <roots>::<heads>))
1190 """return (heads(::<roots> and <roots>::<heads>))
1191
1191
1192 If includepath is True, return (<roots>::<heads>)."""
1192 If includepath is True, return (<roots>::<heads>)."""
1193 try:
1193 try:
1194 return self.index.reachableroots2(minroot, heads, roots,
1194 return self.index.reachableroots2(minroot, heads, roots,
1195 includepath)
1195 includepath)
1196 except AttributeError:
1196 except AttributeError:
1197 return dagop._reachablerootspure(self.parentrevs,
1197 return dagop._reachablerootspure(self.parentrevs,
1198 minroot, roots, heads, includepath)
1198 minroot, roots, heads, includepath)
1199
1199
1200 def ancestor(self, a, b):
1200 def ancestor(self, a, b):
1201 """calculate the "best" common ancestor of nodes a and b"""
1201 """calculate the "best" common ancestor of nodes a and b"""
1202
1202
1203 a, b = self.rev(a), self.rev(b)
1203 a, b = self.rev(a), self.rev(b)
1204 try:
1204 try:
1205 ancs = self.index.ancestors(a, b)
1205 ancs = self.index.ancestors(a, b)
1206 except (AttributeError, OverflowError):
1206 except (AttributeError, OverflowError):
1207 ancs = ancestor.ancestors(self.parentrevs, a, b)
1207 ancs = ancestor.ancestors(self.parentrevs, a, b)
1208 if ancs:
1208 if ancs:
1209 # choose a consistent winner when there's a tie
1209 # choose a consistent winner when there's a tie
1210 return min(map(self.node, ancs))
1210 return min(map(self.node, ancs))
1211 return nullid
1211 return nullid
1212
1212
1213 def _match(self, id):
1213 def _match(self, id):
1214 if isinstance(id, int):
1214 if isinstance(id, int):
1215 # rev
1215 # rev
1216 return self.node(id)
1216 return self.node(id)
1217 if len(id) == 20:
1217 if len(id) == 20:
1218 # possibly a binary node
1218 # possibly a binary node
1219 # odds of a binary node being all hex in ASCII are 1 in 10**25
1219 # odds of a binary node being all hex in ASCII are 1 in 10**25
1220 try:
1220 try:
1221 node = id
1221 node = id
1222 self.rev(node) # quick search the index
1222 self.rev(node) # quick search the index
1223 return node
1223 return node
1224 except error.LookupError:
1224 except error.LookupError:
1225 pass # may be partial hex id
1225 pass # may be partial hex id
1226 try:
1226 try:
1227 # str(rev)
1227 # str(rev)
1228 rev = int(id)
1228 rev = int(id)
1229 if "%d" % rev != id:
1229 if "%d" % rev != id:
1230 raise ValueError
1230 raise ValueError
1231 if rev < 0:
1231 if rev < 0:
1232 rev = len(self) + rev
1232 rev = len(self) + rev
1233 if rev < 0 or rev >= len(self):
1233 if rev < 0 or rev >= len(self):
1234 raise ValueError
1234 raise ValueError
1235 return self.node(rev)
1235 return self.node(rev)
1236 except (ValueError, OverflowError):
1236 except (ValueError, OverflowError):
1237 pass
1237 pass
1238 if len(id) == 40:
1238 if len(id) == 40:
1239 try:
1239 try:
1240 # a full hex nodeid?
1240 # a full hex nodeid?
1241 node = bin(id)
1241 node = bin(id)
1242 self.rev(node)
1242 self.rev(node)
1243 return node
1243 return node
1244 except (TypeError, error.LookupError):
1244 except (TypeError, error.LookupError):
1245 pass
1245 pass
1246
1246
1247 def _partialmatch(self, id):
1247 def _partialmatch(self, id):
1248 # we don't care wdirfilenodeids as they should be always full hash
1248 # we don't care wdirfilenodeids as they should be always full hash
1249 maybewdir = wdirhex.startswith(id)
1249 maybewdir = wdirhex.startswith(id)
1250 try:
1250 try:
1251 partial = self.index.partialmatch(id)
1251 partial = self.index.partialmatch(id)
1252 if partial and self.hasnode(partial):
1252 if partial and self.hasnode(partial):
1253 if maybewdir:
1253 if maybewdir:
1254 # single 'ff...' match in radix tree, ambiguous with wdir
1254 # single 'ff...' match in radix tree, ambiguous with wdir
1255 raise error.RevlogError
1255 raise error.RevlogError
1256 return partial
1256 return partial
1257 if maybewdir:
1257 if maybewdir:
1258 # no 'ff...' match in radix tree, wdir identified
1258 # no 'ff...' match in radix tree, wdir identified
1259 raise error.WdirUnsupported
1259 raise error.WdirUnsupported
1260 return None
1260 return None
1261 except error.RevlogError:
1261 except error.RevlogError:
1262 # parsers.c radix tree lookup gave multiple matches
1262 # parsers.c radix tree lookup gave multiple matches
1263 # fast path: for unfiltered changelog, radix tree is accurate
1263 # fast path: for unfiltered changelog, radix tree is accurate
1264 if not getattr(self, 'filteredrevs', None):
1264 if not getattr(self, 'filteredrevs', None):
1265 raise error.AmbiguousPrefixLookupError(
1265 raise error.AmbiguousPrefixLookupError(
1266 id, self.indexfile, _('ambiguous identifier'))
1266 id, self.indexfile, _('ambiguous identifier'))
1267 # fall through to slow path that filters hidden revisions
1267 # fall through to slow path that filters hidden revisions
1268 except (AttributeError, ValueError):
1268 except (AttributeError, ValueError):
1269 # we are pure python, or key was too short to search radix tree
1269 # we are pure python, or key was too short to search radix tree
1270 pass
1270 pass
1271
1271
1272 if id in self._pcache:
1272 if id in self._pcache:
1273 return self._pcache[id]
1273 return self._pcache[id]
1274
1274
1275 if len(id) <= 40:
1275 if len(id) <= 40:
1276 try:
1276 try:
1277 # hex(node)[:...]
1277 # hex(node)[:...]
1278 l = len(id) // 2 # grab an even number of digits
1278 l = len(id) // 2 # grab an even number of digits
1279 prefix = bin(id[:l * 2])
1279 prefix = bin(id[:l * 2])
1280 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1280 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1281 nl = [n for n in nl if hex(n).startswith(id) and
1281 nl = [n for n in nl if hex(n).startswith(id) and
1282 self.hasnode(n)]
1282 self.hasnode(n)]
1283 if nullhex.startswith(id):
1283 if nullhex.startswith(id):
1284 nl.append(nullid)
1284 nl.append(nullid)
1285 if len(nl) > 0:
1285 if len(nl) > 0:
1286 if len(nl) == 1 and not maybewdir:
1286 if len(nl) == 1 and not maybewdir:
1287 self._pcache[id] = nl[0]
1287 self._pcache[id] = nl[0]
1288 return nl[0]
1288 return nl[0]
1289 raise error.AmbiguousPrefixLookupError(
1289 raise error.AmbiguousPrefixLookupError(
1290 id, self.indexfile, _('ambiguous identifier'))
1290 id, self.indexfile, _('ambiguous identifier'))
1291 if maybewdir:
1291 if maybewdir:
1292 raise error.WdirUnsupported
1292 raise error.WdirUnsupported
1293 return None
1293 return None
1294 except TypeError:
1294 except TypeError:
1295 pass
1295 pass
1296
1296
1297 def lookup(self, id):
1297 def lookup(self, id):
1298 """locate a node based on:
1298 """locate a node based on:
1299 - revision number or str(revision number)
1299 - revision number or str(revision number)
1300 - nodeid or subset of hex nodeid
1300 - nodeid or subset of hex nodeid
1301 """
1301 """
1302 n = self._match(id)
1302 n = self._match(id)
1303 if n is not None:
1303 if n is not None:
1304 return n
1304 return n
1305 n = self._partialmatch(id)
1305 n = self._partialmatch(id)
1306 if n:
1306 if n:
1307 return n
1307 return n
1308
1308
1309 raise error.LookupError(id, self.indexfile, _('no match found'))
1309 raise error.LookupError(id, self.indexfile, _('no match found'))
1310
1310
1311 def shortest(self, node, minlength=1):
1311 def shortest(self, node, minlength=1):
1312 """Find the shortest unambiguous prefix that matches node."""
1312 """Find the shortest unambiguous prefix that matches node."""
1313 def isvalid(prefix):
1313 def isvalid(prefix):
1314 try:
1314 try:
1315 matchednode = self._partialmatch(prefix)
1315 matchednode = self._partialmatch(prefix)
1316 except error.AmbiguousPrefixLookupError:
1316 except error.AmbiguousPrefixLookupError:
1317 return False
1317 return False
1318 except error.WdirUnsupported:
1318 except error.WdirUnsupported:
1319 # single 'ff...' match
1319 # single 'ff...' match
1320 return True
1320 return True
1321 if matchednode is None:
1321 if matchednode is None:
1322 raise error.LookupError(node, self.indexfile, _('no node'))
1322 raise error.LookupError(node, self.indexfile, _('no node'))
1323 return True
1323 return True
1324
1324
1325 def maybewdir(prefix):
1325 def maybewdir(prefix):
1326 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1326 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1327
1327
1328 hexnode = hex(node)
1328 hexnode = hex(node)
1329
1329
1330 def disambiguate(hexnode, minlength):
1330 def disambiguate(hexnode, minlength):
1331 """Disambiguate against wdirid."""
1331 """Disambiguate against wdirid."""
1332 for length in range(minlength, 41):
1332 for length in range(minlength, 41):
1333 prefix = hexnode[:length]
1333 prefix = hexnode[:length]
1334 if not maybewdir(prefix):
1334 if not maybewdir(prefix):
1335 return prefix
1335 return prefix
1336
1336
1337 if not getattr(self, 'filteredrevs', None):
1337 if not getattr(self, 'filteredrevs', None):
1338 try:
1338 try:
1339 length = max(self.index.shortest(node), minlength)
1339 length = max(self.index.shortest(node), minlength)
1340 return disambiguate(hexnode, length)
1340 return disambiguate(hexnode, length)
1341 except error.RevlogError:
1341 except error.RevlogError:
1342 if node != wdirid:
1342 if node != wdirid:
1343 raise error.LookupError(node, self.indexfile, _('no node'))
1343 raise error.LookupError(node, self.indexfile, _('no node'))
1344 except AttributeError:
1344 except AttributeError:
1345 # Fall through to pure code
1345 # Fall through to pure code
1346 pass
1346 pass
1347
1347
1348 if node == wdirid:
1348 if node == wdirid:
1349 for length in range(minlength, 41):
1349 for length in range(minlength, 41):
1350 prefix = hexnode[:length]
1350 prefix = hexnode[:length]
1351 if isvalid(prefix):
1351 if isvalid(prefix):
1352 return prefix
1352 return prefix
1353
1353
1354 for length in range(minlength, 41):
1354 for length in range(minlength, 41):
1355 prefix = hexnode[:length]
1355 prefix = hexnode[:length]
1356 if isvalid(prefix):
1356 if isvalid(prefix):
1357 return disambiguate(hexnode, length)
1357 return disambiguate(hexnode, length)
1358
1358
1359 def cmp(self, node, text):
1359 def cmp(self, node, text):
1360 """compare text with a given file revision
1360 """compare text with a given file revision
1361
1361
1362 returns True if text is different than what is stored.
1362 returns True if text is different than what is stored.
1363 """
1363 """
1364 p1, p2 = self.parents(node)
1364 p1, p2 = self.parents(node)
1365 return storageutil.hashrevisionsha1(text, p1, p2) != node
1365 return storageutil.hashrevisionsha1(text, p1, p2) != node
1366
1366
1367 def _cachesegment(self, offset, data):
1367 def _cachesegment(self, offset, data):
1368 """Add a segment to the revlog cache.
1368 """Add a segment to the revlog cache.
1369
1369
1370 Accepts an absolute offset and the data that is at that location.
1370 Accepts an absolute offset and the data that is at that location.
1371 """
1371 """
1372 o, d = self._chunkcache
1372 o, d = self._chunkcache
1373 # try to add to existing cache
1373 # try to add to existing cache
1374 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1374 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1375 self._chunkcache = o, d + data
1375 self._chunkcache = o, d + data
1376 else:
1376 else:
1377 self._chunkcache = offset, data
1377 self._chunkcache = offset, data
1378
1378
1379 def _readsegment(self, offset, length, df=None):
1379 def _readsegment(self, offset, length, df=None):
1380 """Load a segment of raw data from the revlog.
1380 """Load a segment of raw data from the revlog.
1381
1381
1382 Accepts an absolute offset, length to read, and an optional existing
1382 Accepts an absolute offset, length to read, and an optional existing
1383 file handle to read from.
1383 file handle to read from.
1384
1384
1385 If an existing file handle is passed, it will be seeked and the
1385 If an existing file handle is passed, it will be seeked and the
1386 original seek position will NOT be restored.
1386 original seek position will NOT be restored.
1387
1387
1388 Returns a str or buffer of raw byte data.
1388 Returns a str or buffer of raw byte data.
1389
1389
1390 Raises if the requested number of bytes could not be read.
1390 Raises if the requested number of bytes could not be read.
1391 """
1391 """
1392 # Cache data both forward and backward around the requested
1392 # Cache data both forward and backward around the requested
1393 # data, in a fixed size window. This helps speed up operations
1393 # data, in a fixed size window. This helps speed up operations
1394 # involving reading the revlog backwards.
1394 # involving reading the revlog backwards.
1395 cachesize = self._chunkcachesize
1395 cachesize = self._chunkcachesize
1396 realoffset = offset & ~(cachesize - 1)
1396 realoffset = offset & ~(cachesize - 1)
1397 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1397 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1398 - realoffset)
1398 - realoffset)
1399 with self._datareadfp(df) as df:
1399 with self._datareadfp(df) as df:
1400 df.seek(realoffset)
1400 df.seek(realoffset)
1401 d = df.read(reallength)
1401 d = df.read(reallength)
1402
1402
1403 self._cachesegment(realoffset, d)
1403 self._cachesegment(realoffset, d)
1404 if offset != realoffset or reallength != length:
1404 if offset != realoffset or reallength != length:
1405 startoffset = offset - realoffset
1405 startoffset = offset - realoffset
1406 if len(d) - startoffset < length:
1406 if len(d) - startoffset < length:
1407 raise error.RevlogError(
1407 raise error.RevlogError(
1408 _('partial read of revlog %s; expected %d bytes from '
1408 _('partial read of revlog %s; expected %d bytes from '
1409 'offset %d, got %d') %
1409 'offset %d, got %d') %
1410 (self.indexfile if self._inline else self.datafile,
1410 (self.indexfile if self._inline else self.datafile,
1411 length, realoffset, len(d) - startoffset))
1411 length, realoffset, len(d) - startoffset))
1412
1412
1413 return util.buffer(d, startoffset, length)
1413 return util.buffer(d, startoffset, length)
1414
1414
1415 if len(d) < length:
1415 if len(d) < length:
1416 raise error.RevlogError(
1416 raise error.RevlogError(
1417 _('partial read of revlog %s; expected %d bytes from offset '
1417 _('partial read of revlog %s; expected %d bytes from offset '
1418 '%d, got %d') %
1418 '%d, got %d') %
1419 (self.indexfile if self._inline else self.datafile,
1419 (self.indexfile if self._inline else self.datafile,
1420 length, offset, len(d)))
1420 length, offset, len(d)))
1421
1421
1422 return d
1422 return d
1423
1423
1424 def _getsegment(self, offset, length, df=None):
1424 def _getsegment(self, offset, length, df=None):
1425 """Obtain a segment of raw data from the revlog.
1425 """Obtain a segment of raw data from the revlog.
1426
1426
1427 Accepts an absolute offset, length of bytes to obtain, and an
1427 Accepts an absolute offset, length of bytes to obtain, and an
1428 optional file handle to the already-opened revlog. If the file
1428 optional file handle to the already-opened revlog. If the file
1429 handle is used, it's original seek position will not be preserved.
1429 handle is used, it's original seek position will not be preserved.
1430
1430
1431 Requests for data may be returned from a cache.
1431 Requests for data may be returned from a cache.
1432
1432
1433 Returns a str or a buffer instance of raw byte data.
1433 Returns a str or a buffer instance of raw byte data.
1434 """
1434 """
1435 o, d = self._chunkcache
1435 o, d = self._chunkcache
1436 l = len(d)
1436 l = len(d)
1437
1437
1438 # is it in the cache?
1438 # is it in the cache?
1439 cachestart = offset - o
1439 cachestart = offset - o
1440 cacheend = cachestart + length
1440 cacheend = cachestart + length
1441 if cachestart >= 0 and cacheend <= l:
1441 if cachestart >= 0 and cacheend <= l:
1442 if cachestart == 0 and cacheend == l:
1442 if cachestart == 0 and cacheend == l:
1443 return d # avoid a copy
1443 return d # avoid a copy
1444 return util.buffer(d, cachestart, cacheend - cachestart)
1444 return util.buffer(d, cachestart, cacheend - cachestart)
1445
1445
1446 return self._readsegment(offset, length, df=df)
1446 return self._readsegment(offset, length, df=df)
1447
1447
1448 def _getsegmentforrevs(self, startrev, endrev, df=None):
1448 def _getsegmentforrevs(self, startrev, endrev, df=None):
1449 """Obtain a segment of raw data corresponding to a range of revisions.
1449 """Obtain a segment of raw data corresponding to a range of revisions.
1450
1450
1451 Accepts the start and end revisions and an optional already-open
1451 Accepts the start and end revisions and an optional already-open
1452 file handle to be used for reading. If the file handle is read, its
1452 file handle to be used for reading. If the file handle is read, its
1453 seek position will not be preserved.
1453 seek position will not be preserved.
1454
1454
1455 Requests for data may be satisfied by a cache.
1455 Requests for data may be satisfied by a cache.
1456
1456
1457 Returns a 2-tuple of (offset, data) for the requested range of
1457 Returns a 2-tuple of (offset, data) for the requested range of
1458 revisions. Offset is the integer offset from the beginning of the
1458 revisions. Offset is the integer offset from the beginning of the
1459 revlog and data is a str or buffer of the raw byte data.
1459 revlog and data is a str or buffer of the raw byte data.
1460
1460
1461 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1461 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1462 to determine where each revision's data begins and ends.
1462 to determine where each revision's data begins and ends.
1463 """
1463 """
1464 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1464 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1465 # (functions are expensive).
1465 # (functions are expensive).
1466 index = self.index
1466 index = self.index
1467 istart = index[startrev]
1467 istart = index[startrev]
1468 start = int(istart[0] >> 16)
1468 start = int(istart[0] >> 16)
1469 if startrev == endrev:
1469 if startrev == endrev:
1470 end = start + istart[1]
1470 end = start + istart[1]
1471 else:
1471 else:
1472 iend = index[endrev]
1472 iend = index[endrev]
1473 end = int(iend[0] >> 16) + iend[1]
1473 end = int(iend[0] >> 16) + iend[1]
1474
1474
1475 if self._inline:
1475 if self._inline:
1476 start += (startrev + 1) * self._io.size
1476 start += (startrev + 1) * self._io.size
1477 end += (endrev + 1) * self._io.size
1477 end += (endrev + 1) * self._io.size
1478 length = end - start
1478 length = end - start
1479
1479
1480 return start, self._getsegment(start, length, df=df)
1480 return start, self._getsegment(start, length, df=df)
1481
1481
1482 def _chunk(self, rev, df=None):
1482 def _chunk(self, rev, df=None):
1483 """Obtain a single decompressed chunk for a revision.
1483 """Obtain a single decompressed chunk for a revision.
1484
1484
1485 Accepts an integer revision and an optional already-open file handle
1485 Accepts an integer revision and an optional already-open file handle
1486 to be used for reading. If used, the seek position of the file will not
1486 to be used for reading. If used, the seek position of the file will not
1487 be preserved.
1487 be preserved.
1488
1488
1489 Returns a str holding uncompressed data for the requested revision.
1489 Returns a str holding uncompressed data for the requested revision.
1490 """
1490 """
1491 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1491 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1492
1492
1493 def _chunks(self, revs, df=None, targetsize=None):
1493 def _chunks(self, revs, df=None, targetsize=None):
1494 """Obtain decompressed chunks for the specified revisions.
1494 """Obtain decompressed chunks for the specified revisions.
1495
1495
1496 Accepts an iterable of numeric revisions that are assumed to be in
1496 Accepts an iterable of numeric revisions that are assumed to be in
1497 ascending order. Also accepts an optional already-open file handle
1497 ascending order. Also accepts an optional already-open file handle
1498 to be used for reading. If used, the seek position of the file will
1498 to be used for reading. If used, the seek position of the file will
1499 not be preserved.
1499 not be preserved.
1500
1500
1501 This function is similar to calling ``self._chunk()`` multiple times,
1501 This function is similar to calling ``self._chunk()`` multiple times,
1502 but is faster.
1502 but is faster.
1503
1503
1504 Returns a list with decompressed data for each requested revision.
1504 Returns a list with decompressed data for each requested revision.
1505 """
1505 """
1506 if not revs:
1506 if not revs:
1507 return []
1507 return []
1508 start = self.start
1508 start = self.start
1509 length = self.length
1509 length = self.length
1510 inline = self._inline
1510 inline = self._inline
1511 iosize = self._io.size
1511 iosize = self._io.size
1512 buffer = util.buffer
1512 buffer = util.buffer
1513
1513
1514 l = []
1514 l = []
1515 ladd = l.append
1515 ladd = l.append
1516
1516
1517 if not self._withsparseread:
1517 if not self._withsparseread:
1518 slicedchunks = (revs,)
1518 slicedchunks = (revs,)
1519 else:
1519 else:
1520 slicedchunks = deltautil.slicechunk(self, revs,
1520 slicedchunks = deltautil.slicechunk(self, revs,
1521 targetsize=targetsize)
1521 targetsize=targetsize)
1522
1522
1523 for revschunk in slicedchunks:
1523 for revschunk in slicedchunks:
1524 firstrev = revschunk[0]
1524 firstrev = revschunk[0]
1525 # Skip trailing revisions with empty diff
1525 # Skip trailing revisions with empty diff
1526 for lastrev in revschunk[::-1]:
1526 for lastrev in revschunk[::-1]:
1527 if length(lastrev) != 0:
1527 if length(lastrev) != 0:
1528 break
1528 break
1529
1529
1530 try:
1530 try:
1531 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1531 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1532 except OverflowError:
1532 except OverflowError:
1533 # issue4215 - we can't cache a run of chunks greater than
1533 # issue4215 - we can't cache a run of chunks greater than
1534 # 2G on Windows
1534 # 2G on Windows
1535 return [self._chunk(rev, df=df) for rev in revschunk]
1535 return [self._chunk(rev, df=df) for rev in revschunk]
1536
1536
1537 decomp = self.decompress
1537 decomp = self.decompress
1538 for rev in revschunk:
1538 for rev in revschunk:
1539 chunkstart = start(rev)
1539 chunkstart = start(rev)
1540 if inline:
1540 if inline:
1541 chunkstart += (rev + 1) * iosize
1541 chunkstart += (rev + 1) * iosize
1542 chunklength = length(rev)
1542 chunklength = length(rev)
1543 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1543 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1544
1544
1545 return l
1545 return l
1546
1546
1547 def _chunkclear(self):
1547 def _chunkclear(self):
1548 """Clear the raw chunk cache."""
1548 """Clear the raw chunk cache."""
1549 self._chunkcache = (0, '')
1549 self._chunkcache = (0, '')
1550
1550
1551 def deltaparent(self, rev):
1551 def deltaparent(self, rev):
1552 """return deltaparent of the given revision"""
1552 """return deltaparent of the given revision"""
1553 base = self.index[rev][3]
1553 base = self.index[rev][3]
1554 if base == rev:
1554 if base == rev:
1555 return nullrev
1555 return nullrev
1556 elif self._generaldelta:
1556 elif self._generaldelta:
1557 return base
1557 return base
1558 else:
1558 else:
1559 return rev - 1
1559 return rev - 1
1560
1560
1561 def issnapshot(self, rev):
1561 def issnapshot(self, rev):
1562 """tells whether rev is a snapshot
1562 """tells whether rev is a snapshot
1563 """
1563 """
1564 if not self._sparserevlog:
1564 if not self._sparserevlog:
1565 return self.deltaparent(rev) == nullrev
1565 return self.deltaparent(rev) == nullrev
1566 elif util.safehasattr(self.index, 'issnapshot'):
1566 elif util.safehasattr(self.index, 'issnapshot'):
1567 # directly assign the method to cache the testing and access
1567 # directly assign the method to cache the testing and access
1568 self.issnapshot = self.index.issnapshot
1568 self.issnapshot = self.index.issnapshot
1569 return self.issnapshot(rev)
1569 return self.issnapshot(rev)
1570 if rev == nullrev:
1570 if rev == nullrev:
1571 return True
1571 return True
1572 entry = self.index[rev]
1572 entry = self.index[rev]
1573 base = entry[3]
1573 base = entry[3]
1574 if base == rev:
1574 if base == rev:
1575 return True
1575 return True
1576 if base == nullrev:
1576 if base == nullrev:
1577 return True
1577 return True
1578 p1 = entry[5]
1578 p1 = entry[5]
1579 p2 = entry[6]
1579 p2 = entry[6]
1580 if base == p1 or base == p2:
1580 if base == p1 or base == p2:
1581 return False
1581 return False
1582 return self.issnapshot(base)
1582 return self.issnapshot(base)
1583
1583
1584 def snapshotdepth(self, rev):
1584 def snapshotdepth(self, rev):
1585 """number of snapshot in the chain before this one"""
1585 """number of snapshot in the chain before this one"""
1586 if not self.issnapshot(rev):
1586 if not self.issnapshot(rev):
1587 raise error.ProgrammingError('revision %d not a snapshot')
1587 raise error.ProgrammingError('revision %d not a snapshot')
1588 return len(self._deltachain(rev)[0]) - 1
1588 return len(self._deltachain(rev)[0]) - 1
1589
1589
1590 def revdiff(self, rev1, rev2):
1590 def revdiff(self, rev1, rev2):
1591 """return or calculate a delta between two revisions
1591 """return or calculate a delta between two revisions
1592
1592
1593 The delta calculated is in binary form and is intended to be written to
1593 The delta calculated is in binary form and is intended to be written to
1594 revlog data directly. So this function needs raw revision data.
1594 revlog data directly. So this function needs raw revision data.
1595 """
1595 """
1596 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1596 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1597 return bytes(self._chunk(rev2))
1597 return bytes(self._chunk(rev2))
1598
1598
1599 return mdiff.textdiff(self.rawdata(rev1),
1599 return mdiff.textdiff(self.rawdata(rev1),
1600 self.rawdata(rev2))
1600 self.rawdata(rev2))
1601
1601
1602 def revision(self, nodeorrev, _df=None, raw=False):
1602 def revision(self, nodeorrev, _df=None, raw=False):
1603 """return an uncompressed revision of a given node or revision
1603 """return an uncompressed revision of a given node or revision
1604 number.
1604 number.
1605
1605
1606 _df - an existing file handle to read from. (internal-only)
1606 _df - an existing file handle to read from. (internal-only)
1607 raw - an optional argument specifying if the revision data is to be
1607 raw - an optional argument specifying if the revision data is to be
1608 treated as raw data when applying flag transforms. 'raw' should be set
1608 treated as raw data when applying flag transforms. 'raw' should be set
1609 to True when generating changegroups or in debug commands.
1609 to True when generating changegroups or in debug commands.
1610 """
1610 """
1611 return self._revisiondata(nodeorrev, _df, raw=raw)
1611 return self._revisiondata(nodeorrev, _df, raw=raw)
1612
1612
1613 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1613 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1614 # deal with <nodeorrev> argument type
1614 # deal with <nodeorrev> argument type
1615 if isinstance(nodeorrev, int):
1615 if isinstance(nodeorrev, int):
1616 rev = nodeorrev
1616 rev = nodeorrev
1617 node = self.node(rev)
1617 node = self.node(rev)
1618 else:
1618 else:
1619 node = nodeorrev
1619 node = nodeorrev
1620 rev = None
1620 rev = None
1621
1621
1622 # fast path the special `nullid` rev
1622 # fast path the special `nullid` rev
1623 if node == nullid:
1623 if node == nullid:
1624 return ""
1624 return ""
1625
1625
1626 # revision in the cache (could be useful to apply delta)
1627 cachedrev = None
1628 # the revlog's flag for this revision
1629 # (usually alter its state or content)
1630 flags = None
1631 # The text as stored inside the revlog. Might be the revision or might
1626 # The text as stored inside the revlog. Might be the revision or might
1632 # need to be processed to retrieve the revision.
1627 # need to be processed to retrieve the revision.
1633 rawtext = None
1628 rawtext = None
1629
1630 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1631
1632 if raw and validated:
1633 # if we don't want to process the raw text and that raw
1634 # text is cached, we can exit early.
1635 return rawtext
1636 if rev is None:
1637 rev = self.rev(node)
1638 # the revlog's flag for this revision
1639 # (usually alter its state or content)
1640 flags = self.flags(rev)
1641
1642 if validated and flags == REVIDX_DEFAULT_FLAGS:
1643 # no extra flags set, no flag processor runs, text = rawtext
1644 return rawtext
1645
1646 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1647 if validatehash:
1648 self.checkhash(text, node, rev=rev)
1649 if not validated:
1650 self._revisioncache = (node, rev, rawtext)
1651
1652 return text
1653
1654 def _rawtext(self, node, rev, _df=None):
1655 """return the possibly unvalidated rawtext for a revision
1656
1657 returns (rev, rawtext, validated)
1658 """
1659
1660 # revision in the cache (could be useful to apply delta)
1661 cachedrev = None
1634 # An intermediate text to apply deltas to
1662 # An intermediate text to apply deltas to
1635 basetext = None
1663 basetext = None
1636 # Do we need to update the rawtext cache once it is validated ?
1637 needcaching = True
1638
1664
1639 # Check if we have the entry in cache
1665 # Check if we have the entry in cache
1640 # The cache entry looks like (node, rev, rawtext)
1666 # The cache entry looks like (node, rev, rawtext)
1641 if self._revisioncache:
1667 if self._revisioncache:
1642 if self._revisioncache[0] == node:
1668 if self._revisioncache[0] == node:
1643 needcaching = False
1669 return (rev, self._revisioncache[2], True)
1644 # _cache only stores rawtext
1645 # rawtext is reusable. but we might need to run flag processors
1646 rawtext = self._revisioncache[2]
1647 if raw:
1648 # if we don't want to process the raw text and that raw
1649 # text is cached, we can exit early.
1650 return rawtext
1651 # duplicated, but good for perf
1652 if rev is None:
1653 rev = self.rev(node)
1654 if flags is None:
1655 flags = self.flags(rev)
1656 # no extra flags set, no flag processor runs, text = rawtext
1657 if flags == REVIDX_DEFAULT_FLAGS:
1658 return rawtext
1659
1660 cachedrev = self._revisioncache[1]
1670 cachedrev = self._revisioncache[1]
1661
1671
1662 # look up what we need to read
1672 if rev is None:
1663 if rawtext is None:
1673 rev = self.rev(node)
1664 if rev is None:
1674
1665 rev = self.rev(node)
1675 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1666
1676 if stopped:
1667 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1677 basetext = self._revisioncache[2]
1668 if stopped:
1678
1669 basetext = self._revisioncache[2]
1679 # drop cache to save memory, the caller is expected to
1670
1680 # update self._revisioncache after validating the text
1671 # drop cache to save memory
1681 self._revisioncache = None
1672 self._revisioncache = None
1682
1673
1683 targetsize = None
1674 targetsize = None
1684 rawsize = self.index[rev][2]
1675 rawsize = self.index[rev][2]
1685 if 0 <= rawsize:
1676 if 0 <= rawsize:
1686 targetsize = 4 * rawsize
1677 targetsize = 4 * rawsize
1687
1678
1688 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1679 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1689 if basetext is None:
1680 if basetext is None:
1690 basetext = bytes(bins[0])
1681 basetext = bytes(bins[0])
1691 bins = bins[1:]
1682 bins = bins[1:]
1692
1683
1693 rawtext = mdiff.patches(basetext, bins)
1684 rawtext = mdiff.patches(basetext, bins)
1694 del basetext # let us have a chance to free memory early
1685 del basetext # let us have a chance to free memory early
1695 return (rev, rawtext, False)
1686
1687 if flags is None:
1688 if rev is None:
1689 rev = self.rev(node)
1690 flags = self.flags(rev)
1691
1692 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1693 if validatehash:
1694 self.checkhash(text, node, rev=rev)
1695
1696 if needcaching:
1697 self._revisioncache = (node, rev, rawtext)
1698
1699 return text
1700
1696
1701 def rawdata(self, nodeorrev, _df=None):
1697 def rawdata(self, nodeorrev, _df=None):
1702 """return an uncompressed raw data of a given node or revision number.
1698 """return an uncompressed raw data of a given node or revision number.
1703
1699
1704 _df - an existing file handle to read from. (internal-only)
1700 _df - an existing file handle to read from. (internal-only)
1705 """
1701 """
1706 return self._revisiondata(nodeorrev, _df, raw=True)
1702 return self._revisiondata(nodeorrev, _df, raw=True)
1707
1703
1708 def hash(self, text, p1, p2):
1704 def hash(self, text, p1, p2):
1709 """Compute a node hash.
1705 """Compute a node hash.
1710
1706
1711 Available as a function so that subclasses can replace the hash
1707 Available as a function so that subclasses can replace the hash
1712 as needed.
1708 as needed.
1713 """
1709 """
1714 return storageutil.hashrevisionsha1(text, p1, p2)
1710 return storageutil.hashrevisionsha1(text, p1, p2)
1715
1711
1716 def _processflags(self, text, flags, operation, raw=False):
1712 def _processflags(self, text, flags, operation, raw=False):
1717 """Inspect revision data flags and applies transforms defined by
1713 """Inspect revision data flags and applies transforms defined by
1718 registered flag processors.
1714 registered flag processors.
1719
1715
1720 ``text`` - the revision data to process
1716 ``text`` - the revision data to process
1721 ``flags`` - the revision flags
1717 ``flags`` - the revision flags
1722 ``operation`` - the operation being performed (read or write)
1718 ``operation`` - the operation being performed (read or write)
1723 ``raw`` - an optional argument describing if the raw transform should be
1719 ``raw`` - an optional argument describing if the raw transform should be
1724 applied.
1720 applied.
1725
1721
1726 This method processes the flags in the order (or reverse order if
1722 This method processes the flags in the order (or reverse order if
1727 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1723 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1728 flag processors registered for present flags. The order of flags defined
1724 flag processors registered for present flags. The order of flags defined
1729 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1725 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1730
1726
1731 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1727 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1732 processed text and ``validatehash`` is a bool indicating whether the
1728 processed text and ``validatehash`` is a bool indicating whether the
1733 returned text should be checked for hash integrity.
1729 returned text should be checked for hash integrity.
1734
1730
1735 Note: If the ``raw`` argument is set, it has precedence over the
1731 Note: If the ``raw`` argument is set, it has precedence over the
1736 operation and will only update the value of ``validatehash``.
1732 operation and will only update the value of ``validatehash``.
1737 """
1733 """
1738 # fast path: no flag processors will run
1734 # fast path: no flag processors will run
1739 if flags == 0:
1735 if flags == 0:
1740 return text, True
1736 return text, True
1741 if not operation in ('read', 'write'):
1737 if not operation in ('read', 'write'):
1742 raise error.ProgrammingError(_("invalid '%s' operation") %
1738 raise error.ProgrammingError(_("invalid '%s' operation") %
1743 operation)
1739 operation)
1744 # Check all flags are known.
1740 # Check all flags are known.
1745 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1741 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1746 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1742 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1747 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1743 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1748 validatehash = True
1744 validatehash = True
1749 # Depending on the operation (read or write), the order might be
1745 # Depending on the operation (read or write), the order might be
1750 # reversed due to non-commutative transforms.
1746 # reversed due to non-commutative transforms.
1751 orderedflags = REVIDX_FLAGS_ORDER
1747 orderedflags = REVIDX_FLAGS_ORDER
1752 if operation == 'write':
1748 if operation == 'write':
1753 orderedflags = reversed(orderedflags)
1749 orderedflags = reversed(orderedflags)
1754
1750
1755 for flag in orderedflags:
1751 for flag in orderedflags:
1756 # If a flagprocessor has been registered for a known flag, apply the
1752 # If a flagprocessor has been registered for a known flag, apply the
1757 # related operation transform and update result tuple.
1753 # related operation transform and update result tuple.
1758 if flag & flags:
1754 if flag & flags:
1759 vhash = True
1755 vhash = True
1760
1756
1761 if flag not in self._flagprocessors:
1757 if flag not in self._flagprocessors:
1762 message = _("missing processor for flag '%#x'") % (flag)
1758 message = _("missing processor for flag '%#x'") % (flag)
1763 raise error.RevlogError(message)
1759 raise error.RevlogError(message)
1764
1760
1765 processor = self._flagprocessors[flag]
1761 processor = self._flagprocessors[flag]
1766 if processor is not None:
1762 if processor is not None:
1767 readtransform, writetransform, rawtransform = processor
1763 readtransform, writetransform, rawtransform = processor
1768
1764
1769 if raw:
1765 if raw:
1770 vhash = rawtransform(self, text)
1766 vhash = rawtransform(self, text)
1771 elif operation == 'read':
1767 elif operation == 'read':
1772 text, vhash = readtransform(self, text)
1768 text, vhash = readtransform(self, text)
1773 else: # write operation
1769 else: # write operation
1774 text, vhash = writetransform(self, text)
1770 text, vhash = writetransform(self, text)
1775 validatehash = validatehash and vhash
1771 validatehash = validatehash and vhash
1776
1772
1777 return text, validatehash
1773 return text, validatehash
1778
1774
1779 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1775 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1780 """Check node hash integrity.
1776 """Check node hash integrity.
1781
1777
1782 Available as a function so that subclasses can extend hash mismatch
1778 Available as a function so that subclasses can extend hash mismatch
1783 behaviors as needed.
1779 behaviors as needed.
1784 """
1780 """
1785 try:
1781 try:
1786 if p1 is None and p2 is None:
1782 if p1 is None and p2 is None:
1787 p1, p2 = self.parents(node)
1783 p1, p2 = self.parents(node)
1788 if node != self.hash(text, p1, p2):
1784 if node != self.hash(text, p1, p2):
1789 # Clear the revision cache on hash failure. The revision cache
1785 # Clear the revision cache on hash failure. The revision cache
1790 # only stores the raw revision and clearing the cache does have
1786 # only stores the raw revision and clearing the cache does have
1791 # the side-effect that we won't have a cache hit when the raw
1787 # the side-effect that we won't have a cache hit when the raw
1792 # revision data is accessed. But this case should be rare and
1788 # revision data is accessed. But this case should be rare and
1793 # it is extra work to teach the cache about the hash
1789 # it is extra work to teach the cache about the hash
1794 # verification state.
1790 # verification state.
1795 if self._revisioncache and self._revisioncache[0] == node:
1791 if self._revisioncache and self._revisioncache[0] == node:
1796 self._revisioncache = None
1792 self._revisioncache = None
1797
1793
1798 revornode = rev
1794 revornode = rev
1799 if revornode is None:
1795 if revornode is None:
1800 revornode = templatefilters.short(hex(node))
1796 revornode = templatefilters.short(hex(node))
1801 raise error.RevlogError(_("integrity check failed on %s:%s")
1797 raise error.RevlogError(_("integrity check failed on %s:%s")
1802 % (self.indexfile, pycompat.bytestr(revornode)))
1798 % (self.indexfile, pycompat.bytestr(revornode)))
1803 except error.RevlogError:
1799 except error.RevlogError:
1804 if self._censorable and storageutil.iscensoredtext(text):
1800 if self._censorable and storageutil.iscensoredtext(text):
1805 raise error.CensoredNodeError(self.indexfile, node, text)
1801 raise error.CensoredNodeError(self.indexfile, node, text)
1806 raise
1802 raise
1807
1803
1808 def _enforceinlinesize(self, tr, fp=None):
1804 def _enforceinlinesize(self, tr, fp=None):
1809 """Check if the revlog is too big for inline and convert if so.
1805 """Check if the revlog is too big for inline and convert if so.
1810
1806
1811 This should be called after revisions are added to the revlog. If the
1807 This should be called after revisions are added to the revlog. If the
1812 revlog has grown too large to be an inline revlog, it will convert it
1808 revlog has grown too large to be an inline revlog, it will convert it
1813 to use multiple index and data files.
1809 to use multiple index and data files.
1814 """
1810 """
1815 tiprev = len(self) - 1
1811 tiprev = len(self) - 1
1816 if (not self._inline or
1812 if (not self._inline or
1817 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1813 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1818 return
1814 return
1819
1815
1820 trinfo = tr.find(self.indexfile)
1816 trinfo = tr.find(self.indexfile)
1821 if trinfo is None:
1817 if trinfo is None:
1822 raise error.RevlogError(_("%s not found in the transaction")
1818 raise error.RevlogError(_("%s not found in the transaction")
1823 % self.indexfile)
1819 % self.indexfile)
1824
1820
1825 trindex = trinfo[2]
1821 trindex = trinfo[2]
1826 if trindex is not None:
1822 if trindex is not None:
1827 dataoff = self.start(trindex)
1823 dataoff = self.start(trindex)
1828 else:
1824 else:
1829 # revlog was stripped at start of transaction, use all leftover data
1825 # revlog was stripped at start of transaction, use all leftover data
1830 trindex = len(self) - 1
1826 trindex = len(self) - 1
1831 dataoff = self.end(tiprev)
1827 dataoff = self.end(tiprev)
1832
1828
1833 tr.add(self.datafile, dataoff)
1829 tr.add(self.datafile, dataoff)
1834
1830
1835 if fp:
1831 if fp:
1836 fp.flush()
1832 fp.flush()
1837 fp.close()
1833 fp.close()
1838 # We can't use the cached file handle after close(). So prevent
1834 # We can't use the cached file handle after close(). So prevent
1839 # its usage.
1835 # its usage.
1840 self._writinghandles = None
1836 self._writinghandles = None
1841
1837
1842 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1838 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1843 for r in self:
1839 for r in self:
1844 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1840 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1845
1841
1846 with self._indexfp('w') as fp:
1842 with self._indexfp('w') as fp:
1847 self.version &= ~FLAG_INLINE_DATA
1843 self.version &= ~FLAG_INLINE_DATA
1848 self._inline = False
1844 self._inline = False
1849 io = self._io
1845 io = self._io
1850 for i in self:
1846 for i in self:
1851 e = io.packentry(self.index[i], self.node, self.version, i)
1847 e = io.packentry(self.index[i], self.node, self.version, i)
1852 fp.write(e)
1848 fp.write(e)
1853
1849
1854 # the temp file replace the real index when we exit the context
1850 # the temp file replace the real index when we exit the context
1855 # manager
1851 # manager
1856
1852
1857 tr.replace(self.indexfile, trindex * self._io.size)
1853 tr.replace(self.indexfile, trindex * self._io.size)
1858 self._chunkclear()
1854 self._chunkclear()
1859
1855
1860 def _nodeduplicatecallback(self, transaction, node):
1856 def _nodeduplicatecallback(self, transaction, node):
1861 """called when trying to add a node already stored.
1857 """called when trying to add a node already stored.
1862 """
1858 """
1863
1859
1864 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1860 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1865 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1861 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1866 """add a revision to the log
1862 """add a revision to the log
1867
1863
1868 text - the revision data to add
1864 text - the revision data to add
1869 transaction - the transaction object used for rollback
1865 transaction - the transaction object used for rollback
1870 link - the linkrev data to add
1866 link - the linkrev data to add
1871 p1, p2 - the parent nodeids of the revision
1867 p1, p2 - the parent nodeids of the revision
1872 cachedelta - an optional precomputed delta
1868 cachedelta - an optional precomputed delta
1873 node - nodeid of revision; typically node is not specified, and it is
1869 node - nodeid of revision; typically node is not specified, and it is
1874 computed by default as hash(text, p1, p2), however subclasses might
1870 computed by default as hash(text, p1, p2), however subclasses might
1875 use different hashing method (and override checkhash() in such case)
1871 use different hashing method (and override checkhash() in such case)
1876 flags - the known flags to set on the revision
1872 flags - the known flags to set on the revision
1877 deltacomputer - an optional deltacomputer instance shared between
1873 deltacomputer - an optional deltacomputer instance shared between
1878 multiple calls
1874 multiple calls
1879 """
1875 """
1880 if link == nullrev:
1876 if link == nullrev:
1881 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1877 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1882 % self.indexfile)
1878 % self.indexfile)
1883
1879
1884 if flags:
1880 if flags:
1885 node = node or self.hash(text, p1, p2)
1881 node = node or self.hash(text, p1, p2)
1886
1882
1887 rawtext, validatehash = self._processflags(text, flags, 'write')
1883 rawtext, validatehash = self._processflags(text, flags, 'write')
1888
1884
1889 # If the flag processor modifies the revision data, ignore any provided
1885 # If the flag processor modifies the revision data, ignore any provided
1890 # cachedelta.
1886 # cachedelta.
1891 if rawtext != text:
1887 if rawtext != text:
1892 cachedelta = None
1888 cachedelta = None
1893
1889
1894 if len(rawtext) > _maxentrysize:
1890 if len(rawtext) > _maxentrysize:
1895 raise error.RevlogError(
1891 raise error.RevlogError(
1896 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1892 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1897 % (self.indexfile, len(rawtext)))
1893 % (self.indexfile, len(rawtext)))
1898
1894
1899 node = node or self.hash(rawtext, p1, p2)
1895 node = node or self.hash(rawtext, p1, p2)
1900 if node in self.nodemap:
1896 if node in self.nodemap:
1901 return node
1897 return node
1902
1898
1903 if validatehash:
1899 if validatehash:
1904 self.checkhash(rawtext, node, p1=p1, p2=p2)
1900 self.checkhash(rawtext, node, p1=p1, p2=p2)
1905
1901
1906 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1902 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1907 flags, cachedelta=cachedelta,
1903 flags, cachedelta=cachedelta,
1908 deltacomputer=deltacomputer)
1904 deltacomputer=deltacomputer)
1909
1905
1910 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1906 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1911 cachedelta=None, deltacomputer=None):
1907 cachedelta=None, deltacomputer=None):
1912 """add a raw revision with known flags, node and parents
1908 """add a raw revision with known flags, node and parents
1913 useful when reusing a revision not stored in this revlog (ex: received
1909 useful when reusing a revision not stored in this revlog (ex: received
1914 over wire, or read from an external bundle).
1910 over wire, or read from an external bundle).
1915 """
1911 """
1916 dfh = None
1912 dfh = None
1917 if not self._inline:
1913 if not self._inline:
1918 dfh = self._datafp("a+")
1914 dfh = self._datafp("a+")
1919 ifh = self._indexfp("a+")
1915 ifh = self._indexfp("a+")
1920 try:
1916 try:
1921 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1917 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1922 flags, cachedelta, ifh, dfh,
1918 flags, cachedelta, ifh, dfh,
1923 deltacomputer=deltacomputer)
1919 deltacomputer=deltacomputer)
1924 finally:
1920 finally:
1925 if dfh:
1921 if dfh:
1926 dfh.close()
1922 dfh.close()
1927 ifh.close()
1923 ifh.close()
1928
1924
1929 def compress(self, data):
1925 def compress(self, data):
1930 """Generate a possibly-compressed representation of data."""
1926 """Generate a possibly-compressed representation of data."""
1931 if not data:
1927 if not data:
1932 return '', data
1928 return '', data
1933
1929
1934 compressed = self._compressor.compress(data)
1930 compressed = self._compressor.compress(data)
1935
1931
1936 if compressed:
1932 if compressed:
1937 # The revlog compressor added the header in the returned data.
1933 # The revlog compressor added the header in the returned data.
1938 return '', compressed
1934 return '', compressed
1939
1935
1940 if data[0:1] == '\0':
1936 if data[0:1] == '\0':
1941 return '', data
1937 return '', data
1942 return 'u', data
1938 return 'u', data
1943
1939
1944 def decompress(self, data):
1940 def decompress(self, data):
1945 """Decompress a revlog chunk.
1941 """Decompress a revlog chunk.
1946
1942
1947 The chunk is expected to begin with a header identifying the
1943 The chunk is expected to begin with a header identifying the
1948 format type so it can be routed to an appropriate decompressor.
1944 format type so it can be routed to an appropriate decompressor.
1949 """
1945 """
1950 if not data:
1946 if not data:
1951 return data
1947 return data
1952
1948
1953 # Revlogs are read much more frequently than they are written and many
1949 # Revlogs are read much more frequently than they are written and many
1954 # chunks only take microseconds to decompress, so performance is
1950 # chunks only take microseconds to decompress, so performance is
1955 # important here.
1951 # important here.
1956 #
1952 #
1957 # We can make a few assumptions about revlogs:
1953 # We can make a few assumptions about revlogs:
1958 #
1954 #
1959 # 1) the majority of chunks will be compressed (as opposed to inline
1955 # 1) the majority of chunks will be compressed (as opposed to inline
1960 # raw data).
1956 # raw data).
1961 # 2) decompressing *any* data will likely by at least 10x slower than
1957 # 2) decompressing *any* data will likely by at least 10x slower than
1962 # returning raw inline data.
1958 # returning raw inline data.
1963 # 3) we want to prioritize common and officially supported compression
1959 # 3) we want to prioritize common and officially supported compression
1964 # engines
1960 # engines
1965 #
1961 #
1966 # It follows that we want to optimize for "decompress compressed data
1962 # It follows that we want to optimize for "decompress compressed data
1967 # when encoded with common and officially supported compression engines"
1963 # when encoded with common and officially supported compression engines"
1968 # case over "raw data" and "data encoded by less common or non-official
1964 # case over "raw data" and "data encoded by less common or non-official
1969 # compression engines." That is why we have the inline lookup first
1965 # compression engines." That is why we have the inline lookup first
1970 # followed by the compengines lookup.
1966 # followed by the compengines lookup.
1971 #
1967 #
1972 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1968 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1973 # compressed chunks. And this matters for changelog and manifest reads.
1969 # compressed chunks. And this matters for changelog and manifest reads.
1974 t = data[0:1]
1970 t = data[0:1]
1975
1971
1976 if t == 'x':
1972 if t == 'x':
1977 try:
1973 try:
1978 return _zlibdecompress(data)
1974 return _zlibdecompress(data)
1979 except zlib.error as e:
1975 except zlib.error as e:
1980 raise error.RevlogError(_('revlog decompress error: %s') %
1976 raise error.RevlogError(_('revlog decompress error: %s') %
1981 stringutil.forcebytestr(e))
1977 stringutil.forcebytestr(e))
1982 # '\0' is more common than 'u' so it goes first.
1978 # '\0' is more common than 'u' so it goes first.
1983 elif t == '\0':
1979 elif t == '\0':
1984 return data
1980 return data
1985 elif t == 'u':
1981 elif t == 'u':
1986 return util.buffer(data, 1)
1982 return util.buffer(data, 1)
1987
1983
1988 try:
1984 try:
1989 compressor = self._decompressors[t]
1985 compressor = self._decompressors[t]
1990 except KeyError:
1986 except KeyError:
1991 try:
1987 try:
1992 engine = util.compengines.forrevlogheader(t)
1988 engine = util.compengines.forrevlogheader(t)
1993 compressor = engine.revlogcompressor(self._compengineopts)
1989 compressor = engine.revlogcompressor(self._compengineopts)
1994 self._decompressors[t] = compressor
1990 self._decompressors[t] = compressor
1995 except KeyError:
1991 except KeyError:
1996 raise error.RevlogError(_('unknown compression type %r') % t)
1992 raise error.RevlogError(_('unknown compression type %r') % t)
1997
1993
1998 return compressor.decompress(data)
1994 return compressor.decompress(data)
1999
1995
2000 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1996 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2001 cachedelta, ifh, dfh, alwayscache=False,
1997 cachedelta, ifh, dfh, alwayscache=False,
2002 deltacomputer=None):
1998 deltacomputer=None):
2003 """internal function to add revisions to the log
1999 """internal function to add revisions to the log
2004
2000
2005 see addrevision for argument descriptions.
2001 see addrevision for argument descriptions.
2006
2002
2007 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2003 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2008
2004
2009 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2005 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2010 be used.
2006 be used.
2011
2007
2012 invariants:
2008 invariants:
2013 - rawtext is optional (can be None); if not set, cachedelta must be set.
2009 - rawtext is optional (can be None); if not set, cachedelta must be set.
2014 if both are set, they must correspond to each other.
2010 if both are set, they must correspond to each other.
2015 """
2011 """
2016 if node == nullid:
2012 if node == nullid:
2017 raise error.RevlogError(_("%s: attempt to add null revision") %
2013 raise error.RevlogError(_("%s: attempt to add null revision") %
2018 self.indexfile)
2014 self.indexfile)
2019 if node == wdirid or node in wdirfilenodeids:
2015 if node == wdirid or node in wdirfilenodeids:
2020 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2016 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2021 self.indexfile)
2017 self.indexfile)
2022
2018
2023 if self._inline:
2019 if self._inline:
2024 fh = ifh
2020 fh = ifh
2025 else:
2021 else:
2026 fh = dfh
2022 fh = dfh
2027
2023
2028 btext = [rawtext]
2024 btext = [rawtext]
2029
2025
2030 curr = len(self)
2026 curr = len(self)
2031 prev = curr - 1
2027 prev = curr - 1
2032 offset = self.end(prev)
2028 offset = self.end(prev)
2033 p1r, p2r = self.rev(p1), self.rev(p2)
2029 p1r, p2r = self.rev(p1), self.rev(p2)
2034
2030
2035 # full versions are inserted when the needed deltas
2031 # full versions are inserted when the needed deltas
2036 # become comparable to the uncompressed text
2032 # become comparable to the uncompressed text
2037 if rawtext is None:
2033 if rawtext is None:
2038 # need rawtext size, before changed by flag processors, which is
2034 # need rawtext size, before changed by flag processors, which is
2039 # the non-raw size. use revlog explicitly to avoid filelog's extra
2035 # the non-raw size. use revlog explicitly to avoid filelog's extra
2040 # logic that might remove metadata size.
2036 # logic that might remove metadata size.
2041 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2037 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2042 cachedelta[1])
2038 cachedelta[1])
2043 else:
2039 else:
2044 textlen = len(rawtext)
2040 textlen = len(rawtext)
2045
2041
2046 if deltacomputer is None:
2042 if deltacomputer is None:
2047 deltacomputer = deltautil.deltacomputer(self)
2043 deltacomputer = deltautil.deltacomputer(self)
2048
2044
2049 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2045 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2050
2046
2051 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2047 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2052
2048
2053 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2049 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2054 deltainfo.base, link, p1r, p2r, node)
2050 deltainfo.base, link, p1r, p2r, node)
2055 self.index.append(e)
2051 self.index.append(e)
2056 self.nodemap[node] = curr
2052 self.nodemap[node] = curr
2057
2053
2058 # Reset the pure node cache start lookup offset to account for new
2054 # Reset the pure node cache start lookup offset to account for new
2059 # revision.
2055 # revision.
2060 if self._nodepos is not None:
2056 if self._nodepos is not None:
2061 self._nodepos = curr
2057 self._nodepos = curr
2062
2058
2063 entry = self._io.packentry(e, self.node, self.version, curr)
2059 entry = self._io.packentry(e, self.node, self.version, curr)
2064 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2060 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2065 link, offset)
2061 link, offset)
2066
2062
2067 rawtext = btext[0]
2063 rawtext = btext[0]
2068
2064
2069 if alwayscache and rawtext is None:
2065 if alwayscache and rawtext is None:
2070 rawtext = deltacomputer.buildtext(revinfo, fh)
2066 rawtext = deltacomputer.buildtext(revinfo, fh)
2071
2067
2072 if type(rawtext) == bytes: # only accept immutable objects
2068 if type(rawtext) == bytes: # only accept immutable objects
2073 self._revisioncache = (node, curr, rawtext)
2069 self._revisioncache = (node, curr, rawtext)
2074 self._chainbasecache[curr] = deltainfo.chainbase
2070 self._chainbasecache[curr] = deltainfo.chainbase
2075 return node
2071 return node
2076
2072
2077 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2073 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2078 # Files opened in a+ mode have inconsistent behavior on various
2074 # Files opened in a+ mode have inconsistent behavior on various
2079 # platforms. Windows requires that a file positioning call be made
2075 # platforms. Windows requires that a file positioning call be made
2080 # when the file handle transitions between reads and writes. See
2076 # when the file handle transitions between reads and writes. See
2081 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2077 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2082 # platforms, Python or the platform itself can be buggy. Some versions
2078 # platforms, Python or the platform itself can be buggy. Some versions
2083 # of Solaris have been observed to not append at the end of the file
2079 # of Solaris have been observed to not append at the end of the file
2084 # if the file was seeked to before the end. See issue4943 for more.
2080 # if the file was seeked to before the end. See issue4943 for more.
2085 #
2081 #
2086 # We work around this issue by inserting a seek() before writing.
2082 # We work around this issue by inserting a seek() before writing.
2087 # Note: This is likely not necessary on Python 3. However, because
2083 # Note: This is likely not necessary on Python 3. However, because
2088 # the file handle is reused for reads and may be seeked there, we need
2084 # the file handle is reused for reads and may be seeked there, we need
2089 # to be careful before changing this.
2085 # to be careful before changing this.
2090 ifh.seek(0, os.SEEK_END)
2086 ifh.seek(0, os.SEEK_END)
2091 if dfh:
2087 if dfh:
2092 dfh.seek(0, os.SEEK_END)
2088 dfh.seek(0, os.SEEK_END)
2093
2089
2094 curr = len(self) - 1
2090 curr = len(self) - 1
2095 if not self._inline:
2091 if not self._inline:
2096 transaction.add(self.datafile, offset)
2092 transaction.add(self.datafile, offset)
2097 transaction.add(self.indexfile, curr * len(entry))
2093 transaction.add(self.indexfile, curr * len(entry))
2098 if data[0]:
2094 if data[0]:
2099 dfh.write(data[0])
2095 dfh.write(data[0])
2100 dfh.write(data[1])
2096 dfh.write(data[1])
2101 ifh.write(entry)
2097 ifh.write(entry)
2102 else:
2098 else:
2103 offset += curr * self._io.size
2099 offset += curr * self._io.size
2104 transaction.add(self.indexfile, offset, curr)
2100 transaction.add(self.indexfile, offset, curr)
2105 ifh.write(entry)
2101 ifh.write(entry)
2106 ifh.write(data[0])
2102 ifh.write(data[0])
2107 ifh.write(data[1])
2103 ifh.write(data[1])
2108 self._enforceinlinesize(transaction, ifh)
2104 self._enforceinlinesize(transaction, ifh)
2109
2105
2110 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2106 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2111 """
2107 """
2112 add a delta group
2108 add a delta group
2113
2109
2114 given a set of deltas, add them to the revision log. the
2110 given a set of deltas, add them to the revision log. the
2115 first delta is against its parent, which should be in our
2111 first delta is against its parent, which should be in our
2116 log, the rest are against the previous delta.
2112 log, the rest are against the previous delta.
2117
2113
2118 If ``addrevisioncb`` is defined, it will be called with arguments of
2114 If ``addrevisioncb`` is defined, it will be called with arguments of
2119 this revlog and the node that was added.
2115 this revlog and the node that was added.
2120 """
2116 """
2121
2117
2122 if self._writinghandles:
2118 if self._writinghandles:
2123 raise error.ProgrammingError('cannot nest addgroup() calls')
2119 raise error.ProgrammingError('cannot nest addgroup() calls')
2124
2120
2125 nodes = []
2121 nodes = []
2126
2122
2127 r = len(self)
2123 r = len(self)
2128 end = 0
2124 end = 0
2129 if r:
2125 if r:
2130 end = self.end(r - 1)
2126 end = self.end(r - 1)
2131 ifh = self._indexfp("a+")
2127 ifh = self._indexfp("a+")
2132 isize = r * self._io.size
2128 isize = r * self._io.size
2133 if self._inline:
2129 if self._inline:
2134 transaction.add(self.indexfile, end + isize, r)
2130 transaction.add(self.indexfile, end + isize, r)
2135 dfh = None
2131 dfh = None
2136 else:
2132 else:
2137 transaction.add(self.indexfile, isize, r)
2133 transaction.add(self.indexfile, isize, r)
2138 transaction.add(self.datafile, end)
2134 transaction.add(self.datafile, end)
2139 dfh = self._datafp("a+")
2135 dfh = self._datafp("a+")
2140 def flush():
2136 def flush():
2141 if dfh:
2137 if dfh:
2142 dfh.flush()
2138 dfh.flush()
2143 ifh.flush()
2139 ifh.flush()
2144
2140
2145 self._writinghandles = (ifh, dfh)
2141 self._writinghandles = (ifh, dfh)
2146
2142
2147 try:
2143 try:
2148 deltacomputer = deltautil.deltacomputer(self)
2144 deltacomputer = deltautil.deltacomputer(self)
2149 # loop through our set of deltas
2145 # loop through our set of deltas
2150 for data in deltas:
2146 for data in deltas:
2151 node, p1, p2, linknode, deltabase, delta, flags = data
2147 node, p1, p2, linknode, deltabase, delta, flags = data
2152 link = linkmapper(linknode)
2148 link = linkmapper(linknode)
2153 flags = flags or REVIDX_DEFAULT_FLAGS
2149 flags = flags or REVIDX_DEFAULT_FLAGS
2154
2150
2155 nodes.append(node)
2151 nodes.append(node)
2156
2152
2157 if node in self.nodemap:
2153 if node in self.nodemap:
2158 self._nodeduplicatecallback(transaction, node)
2154 self._nodeduplicatecallback(transaction, node)
2159 # this can happen if two branches make the same change
2155 # this can happen if two branches make the same change
2160 continue
2156 continue
2161
2157
2162 for p in (p1, p2):
2158 for p in (p1, p2):
2163 if p not in self.nodemap:
2159 if p not in self.nodemap:
2164 raise error.LookupError(p, self.indexfile,
2160 raise error.LookupError(p, self.indexfile,
2165 _('unknown parent'))
2161 _('unknown parent'))
2166
2162
2167 if deltabase not in self.nodemap:
2163 if deltabase not in self.nodemap:
2168 raise error.LookupError(deltabase, self.indexfile,
2164 raise error.LookupError(deltabase, self.indexfile,
2169 _('unknown delta base'))
2165 _('unknown delta base'))
2170
2166
2171 baserev = self.rev(deltabase)
2167 baserev = self.rev(deltabase)
2172
2168
2173 if baserev != nullrev and self.iscensored(baserev):
2169 if baserev != nullrev and self.iscensored(baserev):
2174 # if base is censored, delta must be full replacement in a
2170 # if base is censored, delta must be full replacement in a
2175 # single patch operation
2171 # single patch operation
2176 hlen = struct.calcsize(">lll")
2172 hlen = struct.calcsize(">lll")
2177 oldlen = self.rawsize(baserev)
2173 oldlen = self.rawsize(baserev)
2178 newlen = len(delta) - hlen
2174 newlen = len(delta) - hlen
2179 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2175 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2180 raise error.CensoredBaseError(self.indexfile,
2176 raise error.CensoredBaseError(self.indexfile,
2181 self.node(baserev))
2177 self.node(baserev))
2182
2178
2183 if not flags and self._peek_iscensored(baserev, delta, flush):
2179 if not flags and self._peek_iscensored(baserev, delta, flush):
2184 flags |= REVIDX_ISCENSORED
2180 flags |= REVIDX_ISCENSORED
2185
2181
2186 # We assume consumers of addrevisioncb will want to retrieve
2182 # We assume consumers of addrevisioncb will want to retrieve
2187 # the added revision, which will require a call to
2183 # the added revision, which will require a call to
2188 # revision(). revision() will fast path if there is a cache
2184 # revision(). revision() will fast path if there is a cache
2189 # hit. So, we tell _addrevision() to always cache in this case.
2185 # hit. So, we tell _addrevision() to always cache in this case.
2190 # We're only using addgroup() in the context of changegroup
2186 # We're only using addgroup() in the context of changegroup
2191 # generation so the revision data can always be handled as raw
2187 # generation so the revision data can always be handled as raw
2192 # by the flagprocessor.
2188 # by the flagprocessor.
2193 self._addrevision(node, None, transaction, link,
2189 self._addrevision(node, None, transaction, link,
2194 p1, p2, flags, (baserev, delta),
2190 p1, p2, flags, (baserev, delta),
2195 ifh, dfh,
2191 ifh, dfh,
2196 alwayscache=bool(addrevisioncb),
2192 alwayscache=bool(addrevisioncb),
2197 deltacomputer=deltacomputer)
2193 deltacomputer=deltacomputer)
2198
2194
2199 if addrevisioncb:
2195 if addrevisioncb:
2200 addrevisioncb(self, node)
2196 addrevisioncb(self, node)
2201
2197
2202 if not dfh and not self._inline:
2198 if not dfh and not self._inline:
2203 # addrevision switched from inline to conventional
2199 # addrevision switched from inline to conventional
2204 # reopen the index
2200 # reopen the index
2205 ifh.close()
2201 ifh.close()
2206 dfh = self._datafp("a+")
2202 dfh = self._datafp("a+")
2207 ifh = self._indexfp("a+")
2203 ifh = self._indexfp("a+")
2208 self._writinghandles = (ifh, dfh)
2204 self._writinghandles = (ifh, dfh)
2209 finally:
2205 finally:
2210 self._writinghandles = None
2206 self._writinghandles = None
2211
2207
2212 if dfh:
2208 if dfh:
2213 dfh.close()
2209 dfh.close()
2214 ifh.close()
2210 ifh.close()
2215
2211
2216 return nodes
2212 return nodes
2217
2213
2218 def iscensored(self, rev):
2214 def iscensored(self, rev):
2219 """Check if a file revision is censored."""
2215 """Check if a file revision is censored."""
2220 if not self._censorable:
2216 if not self._censorable:
2221 return False
2217 return False
2222
2218
2223 return self.flags(rev) & REVIDX_ISCENSORED
2219 return self.flags(rev) & REVIDX_ISCENSORED
2224
2220
2225 def _peek_iscensored(self, baserev, delta, flush):
2221 def _peek_iscensored(self, baserev, delta, flush):
2226 """Quickly check if a delta produces a censored revision."""
2222 """Quickly check if a delta produces a censored revision."""
2227 if not self._censorable:
2223 if not self._censorable:
2228 return False
2224 return False
2229
2225
2230 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2226 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2231
2227
2232 def getstrippoint(self, minlink):
2228 def getstrippoint(self, minlink):
2233 """find the minimum rev that must be stripped to strip the linkrev
2229 """find the minimum rev that must be stripped to strip the linkrev
2234
2230
2235 Returns a tuple containing the minimum rev and a set of all revs that
2231 Returns a tuple containing the minimum rev and a set of all revs that
2236 have linkrevs that will be broken by this strip.
2232 have linkrevs that will be broken by this strip.
2237 """
2233 """
2238 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2234 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2239 self.headrevs(),
2235 self.headrevs(),
2240 self.linkrev, self.parentrevs)
2236 self.linkrev, self.parentrevs)
2241
2237
2242 def strip(self, minlink, transaction):
2238 def strip(self, minlink, transaction):
2243 """truncate the revlog on the first revision with a linkrev >= minlink
2239 """truncate the revlog on the first revision with a linkrev >= minlink
2244
2240
2245 This function is called when we're stripping revision minlink and
2241 This function is called when we're stripping revision minlink and
2246 its descendants from the repository.
2242 its descendants from the repository.
2247
2243
2248 We have to remove all revisions with linkrev >= minlink, because
2244 We have to remove all revisions with linkrev >= minlink, because
2249 the equivalent changelog revisions will be renumbered after the
2245 the equivalent changelog revisions will be renumbered after the
2250 strip.
2246 strip.
2251
2247
2252 So we truncate the revlog on the first of these revisions, and
2248 So we truncate the revlog on the first of these revisions, and
2253 trust that the caller has saved the revisions that shouldn't be
2249 trust that the caller has saved the revisions that shouldn't be
2254 removed and that it'll re-add them after this truncation.
2250 removed and that it'll re-add them after this truncation.
2255 """
2251 """
2256 if len(self) == 0:
2252 if len(self) == 0:
2257 return
2253 return
2258
2254
2259 rev, _ = self.getstrippoint(minlink)
2255 rev, _ = self.getstrippoint(minlink)
2260 if rev == len(self):
2256 if rev == len(self):
2261 return
2257 return
2262
2258
2263 # first truncate the files on disk
2259 # first truncate the files on disk
2264 end = self.start(rev)
2260 end = self.start(rev)
2265 if not self._inline:
2261 if not self._inline:
2266 transaction.add(self.datafile, end)
2262 transaction.add(self.datafile, end)
2267 end = rev * self._io.size
2263 end = rev * self._io.size
2268 else:
2264 else:
2269 end += rev * self._io.size
2265 end += rev * self._io.size
2270
2266
2271 transaction.add(self.indexfile, end)
2267 transaction.add(self.indexfile, end)
2272
2268
2273 # then reset internal state in memory to forget those revisions
2269 # then reset internal state in memory to forget those revisions
2274 self._revisioncache = None
2270 self._revisioncache = None
2275 self._chaininfocache = {}
2271 self._chaininfocache = {}
2276 self._chunkclear()
2272 self._chunkclear()
2277 for x in pycompat.xrange(rev, len(self)):
2273 for x in pycompat.xrange(rev, len(self)):
2278 del self.nodemap[self.node(x)]
2274 del self.nodemap[self.node(x)]
2279
2275
2280 del self.index[rev:-1]
2276 del self.index[rev:-1]
2281 self._nodepos = None
2277 self._nodepos = None
2282
2278
2283 def checksize(self):
2279 def checksize(self):
2284 """Check size of index and data files
2280 """Check size of index and data files
2285
2281
2286 return a (dd, di) tuple.
2282 return a (dd, di) tuple.
2287 - dd: extra bytes for the "data" file
2283 - dd: extra bytes for the "data" file
2288 - di: extra bytes for the "index" file
2284 - di: extra bytes for the "index" file
2289
2285
2290 A healthy revlog will return (0, 0).
2286 A healthy revlog will return (0, 0).
2291 """
2287 """
2292 expected = 0
2288 expected = 0
2293 if len(self):
2289 if len(self):
2294 expected = max(0, self.end(len(self) - 1))
2290 expected = max(0, self.end(len(self) - 1))
2295
2291
2296 try:
2292 try:
2297 with self._datafp() as f:
2293 with self._datafp() as f:
2298 f.seek(0, io.SEEK_END)
2294 f.seek(0, io.SEEK_END)
2299 actual = f.tell()
2295 actual = f.tell()
2300 dd = actual - expected
2296 dd = actual - expected
2301 except IOError as inst:
2297 except IOError as inst:
2302 if inst.errno != errno.ENOENT:
2298 if inst.errno != errno.ENOENT:
2303 raise
2299 raise
2304 dd = 0
2300 dd = 0
2305
2301
2306 try:
2302 try:
2307 f = self.opener(self.indexfile)
2303 f = self.opener(self.indexfile)
2308 f.seek(0, io.SEEK_END)
2304 f.seek(0, io.SEEK_END)
2309 actual = f.tell()
2305 actual = f.tell()
2310 f.close()
2306 f.close()
2311 s = self._io.size
2307 s = self._io.size
2312 i = max(0, actual // s)
2308 i = max(0, actual // s)
2313 di = actual - (i * s)
2309 di = actual - (i * s)
2314 if self._inline:
2310 if self._inline:
2315 databytes = 0
2311 databytes = 0
2316 for r in self:
2312 for r in self:
2317 databytes += max(0, self.length(r))
2313 databytes += max(0, self.length(r))
2318 dd = 0
2314 dd = 0
2319 di = actual - len(self) * s - databytes
2315 di = actual - len(self) * s - databytes
2320 except IOError as inst:
2316 except IOError as inst:
2321 if inst.errno != errno.ENOENT:
2317 if inst.errno != errno.ENOENT:
2322 raise
2318 raise
2323 di = 0
2319 di = 0
2324
2320
2325 return (dd, di)
2321 return (dd, di)
2326
2322
2327 def files(self):
2323 def files(self):
2328 res = [self.indexfile]
2324 res = [self.indexfile]
2329 if not self._inline:
2325 if not self._inline:
2330 res.append(self.datafile)
2326 res.append(self.datafile)
2331 return res
2327 return res
2332
2328
2333 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2329 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2334 assumehaveparentrevisions=False,
2330 assumehaveparentrevisions=False,
2335 deltamode=repository.CG_DELTAMODE_STD):
2331 deltamode=repository.CG_DELTAMODE_STD):
2336 if nodesorder not in ('nodes', 'storage', 'linear', None):
2332 if nodesorder not in ('nodes', 'storage', 'linear', None):
2337 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2333 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2338 nodesorder)
2334 nodesorder)
2339
2335
2340 if nodesorder is None and not self._generaldelta:
2336 if nodesorder is None and not self._generaldelta:
2341 nodesorder = 'storage'
2337 nodesorder = 'storage'
2342
2338
2343 if (not self._storedeltachains and
2339 if (not self._storedeltachains and
2344 deltamode != repository.CG_DELTAMODE_PREV):
2340 deltamode != repository.CG_DELTAMODE_PREV):
2345 deltamode = repository.CG_DELTAMODE_FULL
2341 deltamode = repository.CG_DELTAMODE_FULL
2346
2342
2347 return storageutil.emitrevisions(
2343 return storageutil.emitrevisions(
2348 self, nodes, nodesorder, revlogrevisiondelta,
2344 self, nodes, nodesorder, revlogrevisiondelta,
2349 deltaparentfn=self.deltaparent,
2345 deltaparentfn=self.deltaparent,
2350 candeltafn=self.candelta,
2346 candeltafn=self.candelta,
2351 rawsizefn=self.rawsize,
2347 rawsizefn=self.rawsize,
2352 revdifffn=self.revdiff,
2348 revdifffn=self.revdiff,
2353 flagsfn=self.flags,
2349 flagsfn=self.flags,
2354 deltamode=deltamode,
2350 deltamode=deltamode,
2355 revisiondata=revisiondata,
2351 revisiondata=revisiondata,
2356 assumehaveparentrevisions=assumehaveparentrevisions)
2352 assumehaveparentrevisions=assumehaveparentrevisions)
2357
2353
2358 DELTAREUSEALWAYS = 'always'
2354 DELTAREUSEALWAYS = 'always'
2359 DELTAREUSESAMEREVS = 'samerevs'
2355 DELTAREUSESAMEREVS = 'samerevs'
2360 DELTAREUSENEVER = 'never'
2356 DELTAREUSENEVER = 'never'
2361
2357
2362 DELTAREUSEFULLADD = 'fulladd'
2358 DELTAREUSEFULLADD = 'fulladd'
2363
2359
2364 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2360 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2365
2361
2366 def clone(self, tr, destrevlog, addrevisioncb=None,
2362 def clone(self, tr, destrevlog, addrevisioncb=None,
2367 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2363 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2368 """Copy this revlog to another, possibly with format changes.
2364 """Copy this revlog to another, possibly with format changes.
2369
2365
2370 The destination revlog will contain the same revisions and nodes.
2366 The destination revlog will contain the same revisions and nodes.
2371 However, it may not be bit-for-bit identical due to e.g. delta encoding
2367 However, it may not be bit-for-bit identical due to e.g. delta encoding
2372 differences.
2368 differences.
2373
2369
2374 The ``deltareuse`` argument control how deltas from the existing revlog
2370 The ``deltareuse`` argument control how deltas from the existing revlog
2375 are preserved in the destination revlog. The argument can have the
2371 are preserved in the destination revlog. The argument can have the
2376 following values:
2372 following values:
2377
2373
2378 DELTAREUSEALWAYS
2374 DELTAREUSEALWAYS
2379 Deltas will always be reused (if possible), even if the destination
2375 Deltas will always be reused (if possible), even if the destination
2380 revlog would not select the same revisions for the delta. This is the
2376 revlog would not select the same revisions for the delta. This is the
2381 fastest mode of operation.
2377 fastest mode of operation.
2382 DELTAREUSESAMEREVS
2378 DELTAREUSESAMEREVS
2383 Deltas will be reused if the destination revlog would pick the same
2379 Deltas will be reused if the destination revlog would pick the same
2384 revisions for the delta. This mode strikes a balance between speed
2380 revisions for the delta. This mode strikes a balance between speed
2385 and optimization.
2381 and optimization.
2386 DELTAREUSENEVER
2382 DELTAREUSENEVER
2387 Deltas will never be reused. This is the slowest mode of execution.
2383 Deltas will never be reused. This is the slowest mode of execution.
2388 This mode can be used to recompute deltas (e.g. if the diff/delta
2384 This mode can be used to recompute deltas (e.g. if the diff/delta
2389 algorithm changes).
2385 algorithm changes).
2390
2386
2391 Delta computation can be slow, so the choice of delta reuse policy can
2387 Delta computation can be slow, so the choice of delta reuse policy can
2392 significantly affect run time.
2388 significantly affect run time.
2393
2389
2394 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2390 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2395 two extremes. Deltas will be reused if they are appropriate. But if the
2391 two extremes. Deltas will be reused if they are appropriate. But if the
2396 delta could choose a better revision, it will do so. This means if you
2392 delta could choose a better revision, it will do so. This means if you
2397 are converting a non-generaldelta revlog to a generaldelta revlog,
2393 are converting a non-generaldelta revlog to a generaldelta revlog,
2398 deltas will be recomputed if the delta's parent isn't a parent of the
2394 deltas will be recomputed if the delta's parent isn't a parent of the
2399 revision.
2395 revision.
2400
2396
2401 In addition to the delta policy, the ``forcedeltabothparents``
2397 In addition to the delta policy, the ``forcedeltabothparents``
2402 argument controls whether to force compute deltas against both parents
2398 argument controls whether to force compute deltas against both parents
2403 for merges. By default, the current default is used.
2399 for merges. By default, the current default is used.
2404 """
2400 """
2405 if deltareuse not in self.DELTAREUSEALL:
2401 if deltareuse not in self.DELTAREUSEALL:
2406 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2402 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2407
2403
2408 if len(destrevlog):
2404 if len(destrevlog):
2409 raise ValueError(_('destination revlog is not empty'))
2405 raise ValueError(_('destination revlog is not empty'))
2410
2406
2411 if getattr(self, 'filteredrevs', None):
2407 if getattr(self, 'filteredrevs', None):
2412 raise ValueError(_('source revlog has filtered revisions'))
2408 raise ValueError(_('source revlog has filtered revisions'))
2413 if getattr(destrevlog, 'filteredrevs', None):
2409 if getattr(destrevlog, 'filteredrevs', None):
2414 raise ValueError(_('destination revlog has filtered revisions'))
2410 raise ValueError(_('destination revlog has filtered revisions'))
2415
2411
2416 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2412 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2417 # if possible.
2413 # if possible.
2418 oldlazydelta = destrevlog._lazydelta
2414 oldlazydelta = destrevlog._lazydelta
2419 oldlazydeltabase = destrevlog._lazydeltabase
2415 oldlazydeltabase = destrevlog._lazydeltabase
2420 oldamd = destrevlog._deltabothparents
2416 oldamd = destrevlog._deltabothparents
2421
2417
2422 try:
2418 try:
2423 if deltareuse == self.DELTAREUSEALWAYS:
2419 if deltareuse == self.DELTAREUSEALWAYS:
2424 destrevlog._lazydeltabase = True
2420 destrevlog._lazydeltabase = True
2425 destrevlog._lazydelta = True
2421 destrevlog._lazydelta = True
2426 elif deltareuse == self.DELTAREUSESAMEREVS:
2422 elif deltareuse == self.DELTAREUSESAMEREVS:
2427 destrevlog._lazydeltabase = False
2423 destrevlog._lazydeltabase = False
2428 destrevlog._lazydelta = True
2424 destrevlog._lazydelta = True
2429 elif deltareuse == self.DELTAREUSENEVER:
2425 elif deltareuse == self.DELTAREUSENEVER:
2430 destrevlog._lazydeltabase = False
2426 destrevlog._lazydeltabase = False
2431 destrevlog._lazydelta = False
2427 destrevlog._lazydelta = False
2432
2428
2433 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2429 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2434
2430
2435 deltacomputer = deltautil.deltacomputer(destrevlog)
2431 deltacomputer = deltautil.deltacomputer(destrevlog)
2436 index = self.index
2432 index = self.index
2437 for rev in self:
2433 for rev in self:
2438 entry = index[rev]
2434 entry = index[rev]
2439
2435
2440 # Some classes override linkrev to take filtered revs into
2436 # Some classes override linkrev to take filtered revs into
2441 # account. Use raw entry from index.
2437 # account. Use raw entry from index.
2442 flags = entry[0] & 0xffff
2438 flags = entry[0] & 0xffff
2443 linkrev = entry[4]
2439 linkrev = entry[4]
2444 p1 = index[entry[5]][7]
2440 p1 = index[entry[5]][7]
2445 p2 = index[entry[6]][7]
2441 p2 = index[entry[6]][7]
2446 node = entry[7]
2442 node = entry[7]
2447
2443
2448 # (Possibly) reuse the delta from the revlog if allowed and
2444 # (Possibly) reuse the delta from the revlog if allowed and
2449 # the revlog chunk is a delta.
2445 # the revlog chunk is a delta.
2450 cachedelta = None
2446 cachedelta = None
2451 rawtext = None
2447 rawtext = None
2452 if (deltareuse != self.DELTAREUSEFULLADD
2448 if (deltareuse != self.DELTAREUSEFULLADD
2453 and destrevlog._lazydelta):
2449 and destrevlog._lazydelta):
2454 dp = self.deltaparent(rev)
2450 dp = self.deltaparent(rev)
2455 if dp != nullrev:
2451 if dp != nullrev:
2456 cachedelta = (dp, bytes(self._chunk(rev)))
2452 cachedelta = (dp, bytes(self._chunk(rev)))
2457
2453
2458 if not cachedelta:
2454 if not cachedelta:
2459 rawtext = self.rawdata(rev)
2455 rawtext = self.rawdata(rev)
2460
2456
2461
2457
2462 if deltareuse == self.DELTAREUSEFULLADD:
2458 if deltareuse == self.DELTAREUSEFULLADD:
2463 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2459 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2464 cachedelta=cachedelta,
2460 cachedelta=cachedelta,
2465 node=node, flags=flags,
2461 node=node, flags=flags,
2466 deltacomputer=deltacomputer)
2462 deltacomputer=deltacomputer)
2467 else:
2463 else:
2468 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2464 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2469 checkambig=False)
2465 checkambig=False)
2470 dfh = None
2466 dfh = None
2471 if not destrevlog._inline:
2467 if not destrevlog._inline:
2472 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2468 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2473 try:
2469 try:
2474 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2470 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2475 p2, flags, cachedelta, ifh, dfh,
2471 p2, flags, cachedelta, ifh, dfh,
2476 deltacomputer=deltacomputer)
2472 deltacomputer=deltacomputer)
2477 finally:
2473 finally:
2478 if dfh:
2474 if dfh:
2479 dfh.close()
2475 dfh.close()
2480 ifh.close()
2476 ifh.close()
2481
2477
2482 if addrevisioncb:
2478 if addrevisioncb:
2483 addrevisioncb(self, rev, node)
2479 addrevisioncb(self, rev, node)
2484 finally:
2480 finally:
2485 destrevlog._lazydelta = oldlazydelta
2481 destrevlog._lazydelta = oldlazydelta
2486 destrevlog._lazydeltabase = oldlazydeltabase
2482 destrevlog._lazydeltabase = oldlazydeltabase
2487 destrevlog._deltabothparents = oldamd
2483 destrevlog._deltabothparents = oldamd
2488
2484
2489 def censorrevision(self, tr, censornode, tombstone=b''):
2485 def censorrevision(self, tr, censornode, tombstone=b''):
2490 if (self.version & 0xFFFF) == REVLOGV0:
2486 if (self.version & 0xFFFF) == REVLOGV0:
2491 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2487 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2492 self.version)
2488 self.version)
2493
2489
2494 censorrev = self.rev(censornode)
2490 censorrev = self.rev(censornode)
2495 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2491 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2496
2492
2497 if len(tombstone) > self.rawsize(censorrev):
2493 if len(tombstone) > self.rawsize(censorrev):
2498 raise error.Abort(_('censor tombstone must be no longer than '
2494 raise error.Abort(_('censor tombstone must be no longer than '
2499 'censored data'))
2495 'censored data'))
2500
2496
2501 # Rewriting the revlog in place is hard. Our strategy for censoring is
2497 # Rewriting the revlog in place is hard. Our strategy for censoring is
2502 # to create a new revlog, copy all revisions to it, then replace the
2498 # to create a new revlog, copy all revisions to it, then replace the
2503 # revlogs on transaction close.
2499 # revlogs on transaction close.
2504
2500
2505 newindexfile = self.indexfile + b'.tmpcensored'
2501 newindexfile = self.indexfile + b'.tmpcensored'
2506 newdatafile = self.datafile + b'.tmpcensored'
2502 newdatafile = self.datafile + b'.tmpcensored'
2507
2503
2508 # This is a bit dangerous. We could easily have a mismatch of state.
2504 # This is a bit dangerous. We could easily have a mismatch of state.
2509 newrl = revlog(self.opener, newindexfile, newdatafile,
2505 newrl = revlog(self.opener, newindexfile, newdatafile,
2510 censorable=True)
2506 censorable=True)
2511 newrl.version = self.version
2507 newrl.version = self.version
2512 newrl._generaldelta = self._generaldelta
2508 newrl._generaldelta = self._generaldelta
2513 newrl._io = self._io
2509 newrl._io = self._io
2514
2510
2515 for rev in self.revs():
2511 for rev in self.revs():
2516 node = self.node(rev)
2512 node = self.node(rev)
2517 p1, p2 = self.parents(node)
2513 p1, p2 = self.parents(node)
2518
2514
2519 if rev == censorrev:
2515 if rev == censorrev:
2520 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2516 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2521 p1, p2, censornode, REVIDX_ISCENSORED)
2517 p1, p2, censornode, REVIDX_ISCENSORED)
2522
2518
2523 if newrl.deltaparent(rev) != nullrev:
2519 if newrl.deltaparent(rev) != nullrev:
2524 raise error.Abort(_('censored revision stored as delta; '
2520 raise error.Abort(_('censored revision stored as delta; '
2525 'cannot censor'),
2521 'cannot censor'),
2526 hint=_('censoring of revlogs is not '
2522 hint=_('censoring of revlogs is not '
2527 'fully implemented; please report '
2523 'fully implemented; please report '
2528 'this bug'))
2524 'this bug'))
2529 continue
2525 continue
2530
2526
2531 if self.iscensored(rev):
2527 if self.iscensored(rev):
2532 if self.deltaparent(rev) != nullrev:
2528 if self.deltaparent(rev) != nullrev:
2533 raise error.Abort(_('cannot censor due to censored '
2529 raise error.Abort(_('cannot censor due to censored '
2534 'revision having delta stored'))
2530 'revision having delta stored'))
2535 rawtext = self._chunk(rev)
2531 rawtext = self._chunk(rev)
2536 else:
2532 else:
2537 rawtext = self.rawdata(rev)
2533 rawtext = self.rawdata(rev)
2538
2534
2539 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2535 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2540 self.flags(rev))
2536 self.flags(rev))
2541
2537
2542 tr.addbackup(self.indexfile, location='store')
2538 tr.addbackup(self.indexfile, location='store')
2543 if not self._inline:
2539 if not self._inline:
2544 tr.addbackup(self.datafile, location='store')
2540 tr.addbackup(self.datafile, location='store')
2545
2541
2546 self.opener.rename(newrl.indexfile, self.indexfile)
2542 self.opener.rename(newrl.indexfile, self.indexfile)
2547 if not self._inline:
2543 if not self._inline:
2548 self.opener.rename(newrl.datafile, self.datafile)
2544 self.opener.rename(newrl.datafile, self.datafile)
2549
2545
2550 self.clearcaches()
2546 self.clearcaches()
2551 self._loadindex()
2547 self._loadindex()
2552
2548
2553 def verifyintegrity(self, state):
2549 def verifyintegrity(self, state):
2554 """Verifies the integrity of the revlog.
2550 """Verifies the integrity of the revlog.
2555
2551
2556 Yields ``revlogproblem`` instances describing problems that are
2552 Yields ``revlogproblem`` instances describing problems that are
2557 found.
2553 found.
2558 """
2554 """
2559 dd, di = self.checksize()
2555 dd, di = self.checksize()
2560 if dd:
2556 if dd:
2561 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2557 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2562 if di:
2558 if di:
2563 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2559 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2564
2560
2565 version = self.version & 0xFFFF
2561 version = self.version & 0xFFFF
2566
2562
2567 # The verifier tells us what version revlog we should be.
2563 # The verifier tells us what version revlog we should be.
2568 if version != state['expectedversion']:
2564 if version != state['expectedversion']:
2569 yield revlogproblem(
2565 yield revlogproblem(
2570 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2566 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2571 (self.indexfile, version, state['expectedversion']))
2567 (self.indexfile, version, state['expectedversion']))
2572
2568
2573 state['skipread'] = set()
2569 state['skipread'] = set()
2574
2570
2575 for rev in self:
2571 for rev in self:
2576 node = self.node(rev)
2572 node = self.node(rev)
2577
2573
2578 # Verify contents. 4 cases to care about:
2574 # Verify contents. 4 cases to care about:
2579 #
2575 #
2580 # common: the most common case
2576 # common: the most common case
2581 # rename: with a rename
2577 # rename: with a rename
2582 # meta: file content starts with b'\1\n', the metadata
2578 # meta: file content starts with b'\1\n', the metadata
2583 # header defined in filelog.py, but without a rename
2579 # header defined in filelog.py, but without a rename
2584 # ext: content stored externally
2580 # ext: content stored externally
2585 #
2581 #
2586 # More formally, their differences are shown below:
2582 # More formally, their differences are shown below:
2587 #
2583 #
2588 # | common | rename | meta | ext
2584 # | common | rename | meta | ext
2589 # -------------------------------------------------------
2585 # -------------------------------------------------------
2590 # flags() | 0 | 0 | 0 | not 0
2586 # flags() | 0 | 0 | 0 | not 0
2591 # renamed() | False | True | False | ?
2587 # renamed() | False | True | False | ?
2592 # rawtext[0:2]=='\1\n'| False | True | True | ?
2588 # rawtext[0:2]=='\1\n'| False | True | True | ?
2593 #
2589 #
2594 # "rawtext" means the raw text stored in revlog data, which
2590 # "rawtext" means the raw text stored in revlog data, which
2595 # could be retrieved by "rawdata(rev)". "text"
2591 # could be retrieved by "rawdata(rev)". "text"
2596 # mentioned below is "revision(rev)".
2592 # mentioned below is "revision(rev)".
2597 #
2593 #
2598 # There are 3 different lengths stored physically:
2594 # There are 3 different lengths stored physically:
2599 # 1. L1: rawsize, stored in revlog index
2595 # 1. L1: rawsize, stored in revlog index
2600 # 2. L2: len(rawtext), stored in revlog data
2596 # 2. L2: len(rawtext), stored in revlog data
2601 # 3. L3: len(text), stored in revlog data if flags==0, or
2597 # 3. L3: len(text), stored in revlog data if flags==0, or
2602 # possibly somewhere else if flags!=0
2598 # possibly somewhere else if flags!=0
2603 #
2599 #
2604 # L1 should be equal to L2. L3 could be different from them.
2600 # L1 should be equal to L2. L3 could be different from them.
2605 # "text" may or may not affect commit hash depending on flag
2601 # "text" may or may not affect commit hash depending on flag
2606 # processors (see flagutil.addflagprocessor).
2602 # processors (see flagutil.addflagprocessor).
2607 #
2603 #
2608 # | common | rename | meta | ext
2604 # | common | rename | meta | ext
2609 # -------------------------------------------------
2605 # -------------------------------------------------
2610 # rawsize() | L1 | L1 | L1 | L1
2606 # rawsize() | L1 | L1 | L1 | L1
2611 # size() | L1 | L2-LM | L1(*) | L1 (?)
2607 # size() | L1 | L2-LM | L1(*) | L1 (?)
2612 # len(rawtext) | L2 | L2 | L2 | L2
2608 # len(rawtext) | L2 | L2 | L2 | L2
2613 # len(text) | L2 | L2 | L2 | L3
2609 # len(text) | L2 | L2 | L2 | L3
2614 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2610 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2615 #
2611 #
2616 # LM: length of metadata, depending on rawtext
2612 # LM: length of metadata, depending on rawtext
2617 # (*): not ideal, see comment in filelog.size
2613 # (*): not ideal, see comment in filelog.size
2618 # (?): could be "- len(meta)" if the resolved content has
2614 # (?): could be "- len(meta)" if the resolved content has
2619 # rename metadata
2615 # rename metadata
2620 #
2616 #
2621 # Checks needed to be done:
2617 # Checks needed to be done:
2622 # 1. length check: L1 == L2, in all cases.
2618 # 1. length check: L1 == L2, in all cases.
2623 # 2. hash check: depending on flag processor, we may need to
2619 # 2. hash check: depending on flag processor, we may need to
2624 # use either "text" (external), or "rawtext" (in revlog).
2620 # use either "text" (external), or "rawtext" (in revlog).
2625
2621
2626 try:
2622 try:
2627 skipflags = state.get('skipflags', 0)
2623 skipflags = state.get('skipflags', 0)
2628 if skipflags:
2624 if skipflags:
2629 skipflags &= self.flags(rev)
2625 skipflags &= self.flags(rev)
2630
2626
2631 if skipflags:
2627 if skipflags:
2632 state['skipread'].add(node)
2628 state['skipread'].add(node)
2633 else:
2629 else:
2634 # Side-effect: read content and verify hash.
2630 # Side-effect: read content and verify hash.
2635 self.revision(node)
2631 self.revision(node)
2636
2632
2637 l1 = self.rawsize(rev)
2633 l1 = self.rawsize(rev)
2638 l2 = len(self.rawdata(node))
2634 l2 = len(self.rawdata(node))
2639
2635
2640 if l1 != l2:
2636 if l1 != l2:
2641 yield revlogproblem(
2637 yield revlogproblem(
2642 error=_('unpacked size is %d, %d expected') % (l2, l1),
2638 error=_('unpacked size is %d, %d expected') % (l2, l1),
2643 node=node)
2639 node=node)
2644
2640
2645 except error.CensoredNodeError:
2641 except error.CensoredNodeError:
2646 if state['erroroncensored']:
2642 if state['erroroncensored']:
2647 yield revlogproblem(error=_('censored file data'),
2643 yield revlogproblem(error=_('censored file data'),
2648 node=node)
2644 node=node)
2649 state['skipread'].add(node)
2645 state['skipread'].add(node)
2650 except Exception as e:
2646 except Exception as e:
2651 yield revlogproblem(
2647 yield revlogproblem(
2652 error=_('unpacking %s: %s') % (short(node),
2648 error=_('unpacking %s: %s') % (short(node),
2653 stringutil.forcebytestr(e)),
2649 stringutil.forcebytestr(e)),
2654 node=node)
2650 node=node)
2655 state['skipread'].add(node)
2651 state['skipread'].add(node)
2656
2652
2657 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2653 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2658 revisionscount=False, trackedsize=False,
2654 revisionscount=False, trackedsize=False,
2659 storedsize=False):
2655 storedsize=False):
2660 d = {}
2656 d = {}
2661
2657
2662 if exclusivefiles:
2658 if exclusivefiles:
2663 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2659 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2664 if not self._inline:
2660 if not self._inline:
2665 d['exclusivefiles'].append((self.opener, self.datafile))
2661 d['exclusivefiles'].append((self.opener, self.datafile))
2666
2662
2667 if sharedfiles:
2663 if sharedfiles:
2668 d['sharedfiles'] = []
2664 d['sharedfiles'] = []
2669
2665
2670 if revisionscount:
2666 if revisionscount:
2671 d['revisionscount'] = len(self)
2667 d['revisionscount'] = len(self)
2672
2668
2673 if trackedsize:
2669 if trackedsize:
2674 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2670 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2675
2671
2676 if storedsize:
2672 if storedsize:
2677 d['storedsize'] = sum(self.opener.stat(path).st_size
2673 d['storedsize'] = sum(self.opener.stat(path).st_size
2678 for path in self.files())
2674 for path in self.files())
2679
2675
2680 return d
2676 return d
General Comments 0
You need to be logged in to leave comments. Login now