##// END OF EJS Templates
upgrade: document DELTAREUSEFULLADD in revlog.clone...
marmoute -
r43267:6510c783 default
parent child Browse files
Show More
@@ -1,2659 +1,2663
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 )
78 )
79 from .utils import (
79 from .utils import (
80 storageutil,
80 storageutil,
81 stringutil,
81 stringutil,
82 )
82 )
83
83
84 # blanked usage of all the name to prevent pyflakes constraints
84 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
85 # We need these name available in the module for extensions.
86 REVLOGV0
86 REVLOGV0
87 REVLOGV1
87 REVLOGV1
88 REVLOGV2
88 REVLOGV2
89 FLAG_INLINE_DATA
89 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
90 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
93 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
94 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
95 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
96 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
97 REVIDX_ELLIPSIS
98 REVIDX_EXTSTORED
98 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
99 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
100 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
102
102
103 parsers = policy.importmod(r'parsers')
103 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
104 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
105 rustdagop = policy.importrust(r'dagop')
106
106
107 # Aliased for performance.
107 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
108 _zlibdecompress = zlib.decompress
109
109
110 # max size of revlog with inline data
110 # max size of revlog with inline data
111 _maxinline = 131072
111 _maxinline = 131072
112 _chunksize = 1048576
112 _chunksize = 1048576
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False, {}
116 return text, False, {}
117
117
118 def ellipsiswriteprocessor(rl, text, sidedata):
118 def ellipsiswriteprocessor(rl, text, sidedata):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 @attr.s(slots=True, frozen=True)
141 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
142 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
143 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
144 node: expected hash of the revision
145 p1, p2: parent revs of the revision
145 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
146 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
147 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
148 flags: flags associated to the revision storage
149
149
150 One of btext[0] or cachedelta must be set.
150 One of btext[0] or cachedelta must be set.
151 """
151 """
152 node = attr.ib()
152 node = attr.ib()
153 p1 = attr.ib()
153 p1 = attr.ib()
154 p2 = attr.ib()
154 p2 = attr.ib()
155 btext = attr.ib()
155 btext = attr.ib()
156 textlen = attr.ib()
156 textlen = attr.ib()
157 cachedelta = attr.ib()
157 cachedelta = attr.ib()
158 flags = attr.ib()
158 flags = attr.ib()
159
159
160 @interfaceutil.implementer(repository.irevisiondelta)
160 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
161 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
162 class revlogrevisiondelta(object):
163 node = attr.ib()
163 node = attr.ib()
164 p1node = attr.ib()
164 p1node = attr.ib()
165 p2node = attr.ib()
165 p2node = attr.ib()
166 basenode = attr.ib()
166 basenode = attr.ib()
167 flags = attr.ib()
167 flags = attr.ib()
168 baserevisionsize = attr.ib()
168 baserevisionsize = attr.ib()
169 revision = attr.ib()
169 revision = attr.ib()
170 delta = attr.ib()
170 delta = attr.ib()
171 linknode = attr.ib(default=None)
171 linknode = attr.ib(default=None)
172
172
173 @interfaceutil.implementer(repository.iverifyproblem)
173 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
174 @attr.s(frozen=True)
175 class revlogproblem(object):
175 class revlogproblem(object):
176 warning = attr.ib(default=None)
176 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
177 error = attr.ib(default=None)
178 node = attr.ib(default=None)
178 node = attr.ib(default=None)
179
179
180 # index v0:
180 # index v0:
181 # 4 bytes: offset
181 # 4 bytes: offset
182 # 4 bytes: compressed length
182 # 4 bytes: compressed length
183 # 4 bytes: base rev
183 # 4 bytes: base rev
184 # 4 bytes: link rev
184 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
185 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
186 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
187 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
189 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
190 indexformatv0_unpack = indexformatv0.unpack
191
191
192 class revlogoldindex(list):
192 class revlogoldindex(list):
193 def __getitem__(self, i):
193 def __getitem__(self, i):
194 if i == -1:
194 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
196 return list.__getitem__(self, i)
197
197
198 class revlogoldio(object):
198 class revlogoldio(object):
199 def __init__(self):
199 def __init__(self):
200 self.size = indexformatv0.size
200 self.size = indexformatv0.size
201
201
202 def parseindex(self, data, inline):
202 def parseindex(self, data, inline):
203 s = self.size
203 s = self.size
204 index = []
204 index = []
205 nodemap = {nullid: nullrev}
205 nodemap = {nullid: nullrev}
206 n = off = 0
206 n = off = 0
207 l = len(data)
207 l = len(data)
208 while off + s <= l:
208 while off + s <= l:
209 cur = data[off:off + s]
209 cur = data[off:off + s]
210 off += s
210 off += s
211 e = indexformatv0_unpack(cur)
211 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
212 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
215 index.append(e2)
216 nodemap[e[6]] = n
216 nodemap[e[6]] = n
217 n += 1
217 n += 1
218
218
219 return revlogoldindex(index), nodemap, None
219 return revlogoldindex(index), nodemap, None
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
222 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
223 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
224 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
226 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
227 return indexformatv0_pack(*e2)
228
228
229 # index ng:
229 # index ng:
230 # 6 bytes: offset
230 # 6 bytes: offset
231 # 2 bytes: flags
231 # 2 bytes: flags
232 # 4 bytes: compressed length
232 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
233 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
234 # 4 bytes: base rev
235 # 4 bytes: link rev
235 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
236 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
237 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
238 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
240 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
241 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
242 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
243 versionformat_unpack = versionformat.unpack
244
244
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
246 # signed integer)
247 _maxentrysize = 0x7fffffff
247 _maxentrysize = 0x7fffffff
248
248
249 class revlogio(object):
249 class revlogio(object):
250 def __init__(self):
250 def __init__(self):
251 self.size = indexformatng.size
251 self.size = indexformatng.size
252
252
253 def parseindex(self, data, inline):
253 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
254 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
255 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
256 return index, getattr(index, 'nodemap', None), cache
257
257
258 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
259 p = indexformatng_pack(*entry)
260 if rev == 0:
260 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
261 p = versionformat_pack(version) + p[4:]
262 return p
262 return p
263
263
264 class revlog(object):
264 class revlog(object):
265 """
265 """
266 the underlying revision storage object
266 the underlying revision storage object
267
267
268 A revlog consists of two parts, an index and the revision data.
268 A revlog consists of two parts, an index and the revision data.
269
269
270 The index is a file with a fixed record size containing
270 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
271 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
272 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
273 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
274 contains a linkrev entry that can serve as a pointer to external
275 data.
275 data.
276
276
277 The revision data itself is a linear collection of data chunks.
277 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
278 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
279 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
280 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
281 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
282 its size, or O(1) relative to the number of revisions.
283
283
284 Both pieces of the revlog are written to in an append-only
284 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
285 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
286 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
287 for locking while reading.
288
288
289 If checkambig, indexfile is opened with checkambig=True at
289 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
290 writing, to avoid file stat ambiguity.
291
291
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
293 index will be mmapped rather than read if it is larger than the
294 configured threshold.
294 configured threshold.
295
295
296 If censorable is True, the revlog can have censored revisions.
296 If censorable is True, the revlog can have censored revisions.
297
297
298 If `upperboundcomp` is not None, this is the expected maximal gain from
298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
299 compression for the data content.
300 """
300 """
301
301
302 _flagserrorclass = error.RevlogError
302 _flagserrorclass = error.RevlogError
303
303
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 mmaplargeindex=False, censorable=False,
305 mmaplargeindex=False, censorable=False,
306 upperboundcomp=None):
306 upperboundcomp=None):
307 """
307 """
308 create a revlog object
308 create a revlog object
309
309
310 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
312
312
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + ".d")
316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 self.opener = opener
317 self.opener = opener
318 # When True, indexfile is opened with checkambig=True at writing, to
318 # When True, indexfile is opened with checkambig=True at writing, to
319 # avoid file stat ambiguity.
319 # avoid file stat ambiguity.
320 self._checkambig = checkambig
320 self._checkambig = checkambig
321 self._mmaplargeindex = mmaplargeindex
321 self._mmaplargeindex = mmaplargeindex
322 self._censorable = censorable
322 self._censorable = censorable
323 # 3-tuple of (node, rev, text) for a raw revision.
323 # 3-tuple of (node, rev, text) for a raw revision.
324 self._revisioncache = None
324 self._revisioncache = None
325 # Maps rev to chain base rev.
325 # Maps rev to chain base rev.
326 self._chainbasecache = util.lrucachedict(100)
326 self._chainbasecache = util.lrucachedict(100)
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 self._chunkcache = (0, '')
328 self._chunkcache = (0, '')
329 # How much data to read and cache into the raw revlog data cache.
329 # How much data to read and cache into the raw revlog data cache.
330 self._chunkcachesize = 65536
330 self._chunkcachesize = 65536
331 self._maxchainlen = None
331 self._maxchainlen = None
332 self._deltabothparents = True
332 self._deltabothparents = True
333 self.index = []
333 self.index = []
334 # Mapping of partial identifiers to full nodes.
334 # Mapping of partial identifiers to full nodes.
335 self._pcache = {}
335 self._pcache = {}
336 # Mapping of revision integer to full node.
336 # Mapping of revision integer to full node.
337 self._nodecache = {nullid: nullrev}
337 self._nodecache = {nullid: nullrev}
338 self._nodepos = None
338 self._nodepos = None
339 self._compengine = 'zlib'
339 self._compengine = 'zlib'
340 self._compengineopts = {}
340 self._compengineopts = {}
341 self._maxdeltachainspan = -1
341 self._maxdeltachainspan = -1
342 self._withsparseread = False
342 self._withsparseread = False
343 self._sparserevlog = False
343 self._sparserevlog = False
344 self._srdensitythreshold = 0.50
344 self._srdensitythreshold = 0.50
345 self._srmingapsize = 262144
345 self._srmingapsize = 262144
346
346
347 # Make copy of flag processors so each revlog instance can support
347 # Make copy of flag processors so each revlog instance can support
348 # custom flags.
348 # custom flags.
349 self._flagprocessors = dict(flagutil.flagprocessors)
349 self._flagprocessors = dict(flagutil.flagprocessors)
350
350
351 # 2-tuple of file handles being used for active writing.
351 # 2-tuple of file handles being used for active writing.
352 self._writinghandles = None
352 self._writinghandles = None
353
353
354 self._loadindex()
354 self._loadindex()
355
355
356 def _loadindex(self):
356 def _loadindex(self):
357 mmapindexthreshold = None
357 mmapindexthreshold = None
358 opts = getattr(self.opener, 'options', {}) or {}
358 opts = getattr(self.opener, 'options', {}) or {}
359
359
360 if 'revlogv2' in opts:
360 if 'revlogv2' in opts:
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 elif 'revlogv1' in opts:
362 elif 'revlogv1' in opts:
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 if 'generaldelta' in opts:
364 if 'generaldelta' in opts:
365 newversionflags |= FLAG_GENERALDELTA
365 newversionflags |= FLAG_GENERALDELTA
366 elif getattr(self.opener, 'options', None) is not None:
366 elif getattr(self.opener, 'options', None) is not None:
367 # If options provided but no 'revlog*' found, the repository
367 # If options provided but no 'revlog*' found, the repository
368 # would have no 'requires' file in it, which means we have to
368 # would have no 'requires' file in it, which means we have to
369 # stick to the old format.
369 # stick to the old format.
370 newversionflags = REVLOGV0
370 newversionflags = REVLOGV0
371 else:
371 else:
372 newversionflags = REVLOG_DEFAULT_VERSION
372 newversionflags = REVLOG_DEFAULT_VERSION
373
373
374 if 'chunkcachesize' in opts:
374 if 'chunkcachesize' in opts:
375 self._chunkcachesize = opts['chunkcachesize']
375 self._chunkcachesize = opts['chunkcachesize']
376 if 'maxchainlen' in opts:
376 if 'maxchainlen' in opts:
377 self._maxchainlen = opts['maxchainlen']
377 self._maxchainlen = opts['maxchainlen']
378 if 'deltabothparents' in opts:
378 if 'deltabothparents' in opts:
379 self._deltabothparents = opts['deltabothparents']
379 self._deltabothparents = opts['deltabothparents']
380 self._lazydelta = bool(opts.get('lazydelta', True))
380 self._lazydelta = bool(opts.get('lazydelta', True))
381 self._lazydeltabase = False
381 self._lazydeltabase = False
382 if self._lazydelta:
382 if self._lazydelta:
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 if 'compengine' in opts:
384 if 'compengine' in opts:
385 self._compengine = opts['compengine']
385 self._compengine = opts['compengine']
386 if 'zlib.level' in opts:
386 if 'zlib.level' in opts:
387 self._compengineopts['zlib.level'] = opts['zlib.level']
387 self._compengineopts['zlib.level'] = opts['zlib.level']
388 if 'zstd.level' in opts:
388 if 'zstd.level' in opts:
389 self._compengineopts['zstd.level'] = opts['zstd.level']
389 self._compengineopts['zstd.level'] = opts['zstd.level']
390 if 'maxdeltachainspan' in opts:
390 if 'maxdeltachainspan' in opts:
391 self._maxdeltachainspan = opts['maxdeltachainspan']
391 self._maxdeltachainspan = opts['maxdeltachainspan']
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
393 mmapindexthreshold = opts['mmapindexthreshold']
393 mmapindexthreshold = opts['mmapindexthreshold']
394 self._sparserevlog = bool(opts.get('sparse-revlog', False))
394 self._sparserevlog = bool(opts.get('sparse-revlog', False))
395 withsparseread = bool(opts.get('with-sparse-read', False))
395 withsparseread = bool(opts.get('with-sparse-read', False))
396 # sparse-revlog forces sparse-read
396 # sparse-revlog forces sparse-read
397 self._withsparseread = self._sparserevlog or withsparseread
397 self._withsparseread = self._sparserevlog or withsparseread
398 if 'sparse-read-density-threshold' in opts:
398 if 'sparse-read-density-threshold' in opts:
399 self._srdensitythreshold = opts['sparse-read-density-threshold']
399 self._srdensitythreshold = opts['sparse-read-density-threshold']
400 if 'sparse-read-min-gap-size' in opts:
400 if 'sparse-read-min-gap-size' in opts:
401 self._srmingapsize = opts['sparse-read-min-gap-size']
401 self._srmingapsize = opts['sparse-read-min-gap-size']
402 if opts.get('enableellipsis'):
402 if opts.get('enableellipsis'):
403 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
403 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
404
404
405 # revlog v0 doesn't have flag processors
405 # revlog v0 doesn't have flag processors
406 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
406 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
407 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
407 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
408
408
409 if self._chunkcachesize <= 0:
409 if self._chunkcachesize <= 0:
410 raise error.RevlogError(_('revlog chunk cache size %r is not '
410 raise error.RevlogError(_('revlog chunk cache size %r is not '
411 'greater than 0') % self._chunkcachesize)
411 'greater than 0') % self._chunkcachesize)
412 elif self._chunkcachesize & (self._chunkcachesize - 1):
412 elif self._chunkcachesize & (self._chunkcachesize - 1):
413 raise error.RevlogError(_('revlog chunk cache size %r is not a '
413 raise error.RevlogError(_('revlog chunk cache size %r is not a '
414 'power of 2') % self._chunkcachesize)
414 'power of 2') % self._chunkcachesize)
415
415
416 indexdata = ''
416 indexdata = ''
417 self._initempty = True
417 self._initempty = True
418 try:
418 try:
419 with self._indexfp() as f:
419 with self._indexfp() as f:
420 if (mmapindexthreshold is not None and
420 if (mmapindexthreshold is not None and
421 self.opener.fstat(f).st_size >= mmapindexthreshold):
421 self.opener.fstat(f).st_size >= mmapindexthreshold):
422 # TODO: should .close() to release resources without
422 # TODO: should .close() to release resources without
423 # relying on Python GC
423 # relying on Python GC
424 indexdata = util.buffer(util.mmapread(f))
424 indexdata = util.buffer(util.mmapread(f))
425 else:
425 else:
426 indexdata = f.read()
426 indexdata = f.read()
427 if len(indexdata) > 0:
427 if len(indexdata) > 0:
428 versionflags = versionformat_unpack(indexdata[:4])[0]
428 versionflags = versionformat_unpack(indexdata[:4])[0]
429 self._initempty = False
429 self._initempty = False
430 else:
430 else:
431 versionflags = newversionflags
431 versionflags = newversionflags
432 except IOError as inst:
432 except IOError as inst:
433 if inst.errno != errno.ENOENT:
433 if inst.errno != errno.ENOENT:
434 raise
434 raise
435
435
436 versionflags = newversionflags
436 versionflags = newversionflags
437
437
438 self.version = versionflags
438 self.version = versionflags
439
439
440 flags = versionflags & ~0xFFFF
440 flags = versionflags & ~0xFFFF
441 fmt = versionflags & 0xFFFF
441 fmt = versionflags & 0xFFFF
442
442
443 if fmt == REVLOGV0:
443 if fmt == REVLOGV0:
444 if flags:
444 if flags:
445 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
445 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
446 'revlog %s') %
446 'revlog %s') %
447 (flags >> 16, fmt, self.indexfile))
447 (flags >> 16, fmt, self.indexfile))
448
448
449 self._inline = False
449 self._inline = False
450 self._generaldelta = False
450 self._generaldelta = False
451
451
452 elif fmt == REVLOGV1:
452 elif fmt == REVLOGV1:
453 if flags & ~REVLOGV1_FLAGS:
453 if flags & ~REVLOGV1_FLAGS:
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 'revlog %s') %
455 'revlog %s') %
456 (flags >> 16, fmt, self.indexfile))
456 (flags >> 16, fmt, self.indexfile))
457
457
458 self._inline = versionflags & FLAG_INLINE_DATA
458 self._inline = versionflags & FLAG_INLINE_DATA
459 self._generaldelta = versionflags & FLAG_GENERALDELTA
459 self._generaldelta = versionflags & FLAG_GENERALDELTA
460
460
461 elif fmt == REVLOGV2:
461 elif fmt == REVLOGV2:
462 if flags & ~REVLOGV2_FLAGS:
462 if flags & ~REVLOGV2_FLAGS:
463 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
463 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 'revlog %s') %
464 'revlog %s') %
465 (flags >> 16, fmt, self.indexfile))
465 (flags >> 16, fmt, self.indexfile))
466
466
467 self._inline = versionflags & FLAG_INLINE_DATA
467 self._inline = versionflags & FLAG_INLINE_DATA
468 # generaldelta implied by version 2 revlogs.
468 # generaldelta implied by version 2 revlogs.
469 self._generaldelta = True
469 self._generaldelta = True
470
470
471 else:
471 else:
472 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
472 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
473 (fmt, self.indexfile))
473 (fmt, self.indexfile))
474 # sparse-revlog can't be on without general-delta (issue6056)
474 # sparse-revlog can't be on without general-delta (issue6056)
475 if not self._generaldelta:
475 if not self._generaldelta:
476 self._sparserevlog = False
476 self._sparserevlog = False
477
477
478 self._storedeltachains = True
478 self._storedeltachains = True
479
479
480 self._io = revlogio()
480 self._io = revlogio()
481 if self.version == REVLOGV0:
481 if self.version == REVLOGV0:
482 self._io = revlogoldio()
482 self._io = revlogoldio()
483 try:
483 try:
484 d = self._io.parseindex(indexdata, self._inline)
484 d = self._io.parseindex(indexdata, self._inline)
485 except (ValueError, IndexError):
485 except (ValueError, IndexError):
486 raise error.RevlogError(_("index %s is corrupted") %
486 raise error.RevlogError(_("index %s is corrupted") %
487 self.indexfile)
487 self.indexfile)
488 self.index, nodemap, self._chunkcache = d
488 self.index, nodemap, self._chunkcache = d
489 if nodemap is not None:
489 if nodemap is not None:
490 self.nodemap = self._nodecache = nodemap
490 self.nodemap = self._nodecache = nodemap
491 if not self._chunkcache:
491 if not self._chunkcache:
492 self._chunkclear()
492 self._chunkclear()
493 # revnum -> (chain-length, sum-delta-length)
493 # revnum -> (chain-length, sum-delta-length)
494 self._chaininfocache = {}
494 self._chaininfocache = {}
495 # revlog header -> revlog compressor
495 # revlog header -> revlog compressor
496 self._decompressors = {}
496 self._decompressors = {}
497
497
498 @util.propertycache
498 @util.propertycache
499 def _compressor(self):
499 def _compressor(self):
500 engine = util.compengines[self._compengine]
500 engine = util.compengines[self._compengine]
501 return engine.revlogcompressor(self._compengineopts)
501 return engine.revlogcompressor(self._compengineopts)
502
502
503 def _indexfp(self, mode='r'):
503 def _indexfp(self, mode='r'):
504 """file object for the revlog's index file"""
504 """file object for the revlog's index file"""
505 args = {r'mode': mode}
505 args = {r'mode': mode}
506 if mode != 'r':
506 if mode != 'r':
507 args[r'checkambig'] = self._checkambig
507 args[r'checkambig'] = self._checkambig
508 if mode == 'w':
508 if mode == 'w':
509 args[r'atomictemp'] = True
509 args[r'atomictemp'] = True
510 return self.opener(self.indexfile, **args)
510 return self.opener(self.indexfile, **args)
511
511
512 def _datafp(self, mode='r'):
512 def _datafp(self, mode='r'):
513 """file object for the revlog's data file"""
513 """file object for the revlog's data file"""
514 return self.opener(self.datafile, mode=mode)
514 return self.opener(self.datafile, mode=mode)
515
515
516 @contextlib.contextmanager
516 @contextlib.contextmanager
517 def _datareadfp(self, existingfp=None):
517 def _datareadfp(self, existingfp=None):
518 """file object suitable to read data"""
518 """file object suitable to read data"""
519 # Use explicit file handle, if given.
519 # Use explicit file handle, if given.
520 if existingfp is not None:
520 if existingfp is not None:
521 yield existingfp
521 yield existingfp
522
522
523 # Use a file handle being actively used for writes, if available.
523 # Use a file handle being actively used for writes, if available.
524 # There is some danger to doing this because reads will seek the
524 # There is some danger to doing this because reads will seek the
525 # file. However, _writeentry() performs a SEEK_END before all writes,
525 # file. However, _writeentry() performs a SEEK_END before all writes,
526 # so we should be safe.
526 # so we should be safe.
527 elif self._writinghandles:
527 elif self._writinghandles:
528 if self._inline:
528 if self._inline:
529 yield self._writinghandles[0]
529 yield self._writinghandles[0]
530 else:
530 else:
531 yield self._writinghandles[1]
531 yield self._writinghandles[1]
532
532
533 # Otherwise open a new file handle.
533 # Otherwise open a new file handle.
534 else:
534 else:
535 if self._inline:
535 if self._inline:
536 func = self._indexfp
536 func = self._indexfp
537 else:
537 else:
538 func = self._datafp
538 func = self._datafp
539 with func() as fp:
539 with func() as fp:
540 yield fp
540 yield fp
541
541
542 def tip(self):
542 def tip(self):
543 return self.node(len(self.index) - 1)
543 return self.node(len(self.index) - 1)
544 def __contains__(self, rev):
544 def __contains__(self, rev):
545 return 0 <= rev < len(self)
545 return 0 <= rev < len(self)
546 def __len__(self):
546 def __len__(self):
547 return len(self.index)
547 return len(self.index)
548 def __iter__(self):
548 def __iter__(self):
549 return iter(pycompat.xrange(len(self)))
549 return iter(pycompat.xrange(len(self)))
550 def revs(self, start=0, stop=None):
550 def revs(self, start=0, stop=None):
551 """iterate over all rev in this revlog (from start to stop)"""
551 """iterate over all rev in this revlog (from start to stop)"""
552 return storageutil.iterrevs(len(self), start=start, stop=stop)
552 return storageutil.iterrevs(len(self), start=start, stop=stop)
553
553
554 @util.propertycache
554 @util.propertycache
555 def nodemap(self):
555 def nodemap(self):
556 if self.index:
556 if self.index:
557 # populate mapping down to the initial node
557 # populate mapping down to the initial node
558 node0 = self.index[0][7] # get around changelog filtering
558 node0 = self.index[0][7] # get around changelog filtering
559 self.rev(node0)
559 self.rev(node0)
560 return self._nodecache
560 return self._nodecache
561
561
562 def hasnode(self, node):
562 def hasnode(self, node):
563 try:
563 try:
564 self.rev(node)
564 self.rev(node)
565 return True
565 return True
566 except KeyError:
566 except KeyError:
567 return False
567 return False
568
568
569 def candelta(self, baserev, rev):
569 def candelta(self, baserev, rev):
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
571 # Disable delta if either rev requires a content-changing flag
571 # Disable delta if either rev requires a content-changing flag
572 # processor (ex. LFS). This is because such flag processor can alter
572 # processor (ex. LFS). This is because such flag processor can alter
573 # the rawtext content that the delta will be based on, and two clients
573 # the rawtext content that the delta will be based on, and two clients
574 # could have a same revlog node with different flags (i.e. different
574 # could have a same revlog node with different flags (i.e. different
575 # rawtext contents) and the delta could be incompatible.
575 # rawtext contents) and the delta could be incompatible.
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
578 return False
578 return False
579 return True
579 return True
580
580
581 def clearcaches(self):
581 def clearcaches(self):
582 self._revisioncache = None
582 self._revisioncache = None
583 self._chainbasecache.clear()
583 self._chainbasecache.clear()
584 self._chunkcache = (0, '')
584 self._chunkcache = (0, '')
585 self._pcache = {}
585 self._pcache = {}
586
586
587 try:
587 try:
588 # If we are using the native C version, you are in a fun case
588 # If we are using the native C version, you are in a fun case
589 # where self.index, self.nodemap and self._nodecaches is the same
589 # where self.index, self.nodemap and self._nodecaches is the same
590 # object.
590 # object.
591 self._nodecache.clearcaches()
591 self._nodecache.clearcaches()
592 except AttributeError:
592 except AttributeError:
593 self._nodecache = {nullid: nullrev}
593 self._nodecache = {nullid: nullrev}
594 self._nodepos = None
594 self._nodepos = None
595
595
596 def rev(self, node):
596 def rev(self, node):
597 try:
597 try:
598 return self._nodecache[node]
598 return self._nodecache[node]
599 except TypeError:
599 except TypeError:
600 raise
600 raise
601 except error.RevlogError:
601 except error.RevlogError:
602 # parsers.c radix tree lookup failed
602 # parsers.c radix tree lookup failed
603 if node == wdirid or node in wdirfilenodeids:
603 if node == wdirid or node in wdirfilenodeids:
604 raise error.WdirUnsupported
604 raise error.WdirUnsupported
605 raise error.LookupError(node, self.indexfile, _('no node'))
605 raise error.LookupError(node, self.indexfile, _('no node'))
606 except KeyError:
606 except KeyError:
607 # pure python cache lookup failed
607 # pure python cache lookup failed
608 n = self._nodecache
608 n = self._nodecache
609 i = self.index
609 i = self.index
610 p = self._nodepos
610 p = self._nodepos
611 if p is None:
611 if p is None:
612 p = len(i) - 1
612 p = len(i) - 1
613 else:
613 else:
614 assert p < len(i)
614 assert p < len(i)
615 for r in pycompat.xrange(p, -1, -1):
615 for r in pycompat.xrange(p, -1, -1):
616 v = i[r][7]
616 v = i[r][7]
617 n[v] = r
617 n[v] = r
618 if v == node:
618 if v == node:
619 self._nodepos = r - 1
619 self._nodepos = r - 1
620 return r
620 return r
621 if node == wdirid or node in wdirfilenodeids:
621 if node == wdirid or node in wdirfilenodeids:
622 raise error.WdirUnsupported
622 raise error.WdirUnsupported
623 raise error.LookupError(node, self.indexfile, _('no node'))
623 raise error.LookupError(node, self.indexfile, _('no node'))
624
624
625 # Accessors for index entries.
625 # Accessors for index entries.
626
626
627 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
627 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
628 # are flags.
628 # are flags.
629 def start(self, rev):
629 def start(self, rev):
630 return int(self.index[rev][0] >> 16)
630 return int(self.index[rev][0] >> 16)
631
631
632 def flags(self, rev):
632 def flags(self, rev):
633 return self.index[rev][0] & 0xFFFF
633 return self.index[rev][0] & 0xFFFF
634
634
635 def length(self, rev):
635 def length(self, rev):
636 return self.index[rev][1]
636 return self.index[rev][1]
637
637
638 def rawsize(self, rev):
638 def rawsize(self, rev):
639 """return the length of the uncompressed text for a given revision"""
639 """return the length of the uncompressed text for a given revision"""
640 l = self.index[rev][2]
640 l = self.index[rev][2]
641 if l >= 0:
641 if l >= 0:
642 return l
642 return l
643
643
644 t = self.rawdata(rev)
644 t = self.rawdata(rev)
645 return len(t)
645 return len(t)
646
646
647 def size(self, rev):
647 def size(self, rev):
648 """length of non-raw text (processed by a "read" flag processor)"""
648 """length of non-raw text (processed by a "read" flag processor)"""
649 # fast path: if no "read" flag processor could change the content,
649 # fast path: if no "read" flag processor could change the content,
650 # size is rawsize. note: ELLIPSIS is known to not change the content.
650 # size is rawsize. note: ELLIPSIS is known to not change the content.
651 flags = self.flags(rev)
651 flags = self.flags(rev)
652 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
652 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
653 return self.rawsize(rev)
653 return self.rawsize(rev)
654
654
655 return len(self.revision(rev, raw=False))
655 return len(self.revision(rev, raw=False))
656
656
657 def chainbase(self, rev):
657 def chainbase(self, rev):
658 base = self._chainbasecache.get(rev)
658 base = self._chainbasecache.get(rev)
659 if base is not None:
659 if base is not None:
660 return base
660 return base
661
661
662 index = self.index
662 index = self.index
663 iterrev = rev
663 iterrev = rev
664 base = index[iterrev][3]
664 base = index[iterrev][3]
665 while base != iterrev:
665 while base != iterrev:
666 iterrev = base
666 iterrev = base
667 base = index[iterrev][3]
667 base = index[iterrev][3]
668
668
669 self._chainbasecache[rev] = base
669 self._chainbasecache[rev] = base
670 return base
670 return base
671
671
672 def linkrev(self, rev):
672 def linkrev(self, rev):
673 return self.index[rev][4]
673 return self.index[rev][4]
674
674
675 def parentrevs(self, rev):
675 def parentrevs(self, rev):
676 try:
676 try:
677 entry = self.index[rev]
677 entry = self.index[rev]
678 except IndexError:
678 except IndexError:
679 if rev == wdirrev:
679 if rev == wdirrev:
680 raise error.WdirUnsupported
680 raise error.WdirUnsupported
681 raise
681 raise
682
682
683 return entry[5], entry[6]
683 return entry[5], entry[6]
684
684
685 # fast parentrevs(rev) where rev isn't filtered
685 # fast parentrevs(rev) where rev isn't filtered
686 _uncheckedparentrevs = parentrevs
686 _uncheckedparentrevs = parentrevs
687
687
688 def node(self, rev):
688 def node(self, rev):
689 try:
689 try:
690 return self.index[rev][7]
690 return self.index[rev][7]
691 except IndexError:
691 except IndexError:
692 if rev == wdirrev:
692 if rev == wdirrev:
693 raise error.WdirUnsupported
693 raise error.WdirUnsupported
694 raise
694 raise
695
695
696 # Derived from index values.
696 # Derived from index values.
697
697
698 def end(self, rev):
698 def end(self, rev):
699 return self.start(rev) + self.length(rev)
699 return self.start(rev) + self.length(rev)
700
700
701 def parents(self, node):
701 def parents(self, node):
702 i = self.index
702 i = self.index
703 d = i[self.rev(node)]
703 d = i[self.rev(node)]
704 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
704 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
705
705
706 def chainlen(self, rev):
706 def chainlen(self, rev):
707 return self._chaininfo(rev)[0]
707 return self._chaininfo(rev)[0]
708
708
709 def _chaininfo(self, rev):
709 def _chaininfo(self, rev):
710 chaininfocache = self._chaininfocache
710 chaininfocache = self._chaininfocache
711 if rev in chaininfocache:
711 if rev in chaininfocache:
712 return chaininfocache[rev]
712 return chaininfocache[rev]
713 index = self.index
713 index = self.index
714 generaldelta = self._generaldelta
714 generaldelta = self._generaldelta
715 iterrev = rev
715 iterrev = rev
716 e = index[iterrev]
716 e = index[iterrev]
717 clen = 0
717 clen = 0
718 compresseddeltalen = 0
718 compresseddeltalen = 0
719 while iterrev != e[3]:
719 while iterrev != e[3]:
720 clen += 1
720 clen += 1
721 compresseddeltalen += e[1]
721 compresseddeltalen += e[1]
722 if generaldelta:
722 if generaldelta:
723 iterrev = e[3]
723 iterrev = e[3]
724 else:
724 else:
725 iterrev -= 1
725 iterrev -= 1
726 if iterrev in chaininfocache:
726 if iterrev in chaininfocache:
727 t = chaininfocache[iterrev]
727 t = chaininfocache[iterrev]
728 clen += t[0]
728 clen += t[0]
729 compresseddeltalen += t[1]
729 compresseddeltalen += t[1]
730 break
730 break
731 e = index[iterrev]
731 e = index[iterrev]
732 else:
732 else:
733 # Add text length of base since decompressing that also takes
733 # Add text length of base since decompressing that also takes
734 # work. For cache hits the length is already included.
734 # work. For cache hits the length is already included.
735 compresseddeltalen += e[1]
735 compresseddeltalen += e[1]
736 r = (clen, compresseddeltalen)
736 r = (clen, compresseddeltalen)
737 chaininfocache[rev] = r
737 chaininfocache[rev] = r
738 return r
738 return r
739
739
740 def _deltachain(self, rev, stoprev=None):
740 def _deltachain(self, rev, stoprev=None):
741 """Obtain the delta chain for a revision.
741 """Obtain the delta chain for a revision.
742
742
743 ``stoprev`` specifies a revision to stop at. If not specified, we
743 ``stoprev`` specifies a revision to stop at. If not specified, we
744 stop at the base of the chain.
744 stop at the base of the chain.
745
745
746 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
746 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
747 revs in ascending order and ``stopped`` is a bool indicating whether
747 revs in ascending order and ``stopped`` is a bool indicating whether
748 ``stoprev`` was hit.
748 ``stoprev`` was hit.
749 """
749 """
750 # Try C implementation.
750 # Try C implementation.
751 try:
751 try:
752 return self.index.deltachain(rev, stoprev, self._generaldelta)
752 return self.index.deltachain(rev, stoprev, self._generaldelta)
753 except AttributeError:
753 except AttributeError:
754 pass
754 pass
755
755
756 chain = []
756 chain = []
757
757
758 # Alias to prevent attribute lookup in tight loop.
758 # Alias to prevent attribute lookup in tight loop.
759 index = self.index
759 index = self.index
760 generaldelta = self._generaldelta
760 generaldelta = self._generaldelta
761
761
762 iterrev = rev
762 iterrev = rev
763 e = index[iterrev]
763 e = index[iterrev]
764 while iterrev != e[3] and iterrev != stoprev:
764 while iterrev != e[3] and iterrev != stoprev:
765 chain.append(iterrev)
765 chain.append(iterrev)
766 if generaldelta:
766 if generaldelta:
767 iterrev = e[3]
767 iterrev = e[3]
768 else:
768 else:
769 iterrev -= 1
769 iterrev -= 1
770 e = index[iterrev]
770 e = index[iterrev]
771
771
772 if iterrev == stoprev:
772 if iterrev == stoprev:
773 stopped = True
773 stopped = True
774 else:
774 else:
775 chain.append(iterrev)
775 chain.append(iterrev)
776 stopped = False
776 stopped = False
777
777
778 chain.reverse()
778 chain.reverse()
779 return chain, stopped
779 return chain, stopped
780
780
781 def ancestors(self, revs, stoprev=0, inclusive=False):
781 def ancestors(self, revs, stoprev=0, inclusive=False):
782 """Generate the ancestors of 'revs' in reverse revision order.
782 """Generate the ancestors of 'revs' in reverse revision order.
783 Does not generate revs lower than stoprev.
783 Does not generate revs lower than stoprev.
784
784
785 See the documentation for ancestor.lazyancestors for more details."""
785 See the documentation for ancestor.lazyancestors for more details."""
786
786
787 # first, make sure start revisions aren't filtered
787 # first, make sure start revisions aren't filtered
788 revs = list(revs)
788 revs = list(revs)
789 checkrev = self.node
789 checkrev = self.node
790 for r in revs:
790 for r in revs:
791 checkrev(r)
791 checkrev(r)
792 # and we're sure ancestors aren't filtered as well
792 # and we're sure ancestors aren't filtered as well
793
793
794 if rustancestor is not None:
794 if rustancestor is not None:
795 lazyancestors = rustancestor.LazyAncestors
795 lazyancestors = rustancestor.LazyAncestors
796 arg = self.index
796 arg = self.index
797 elif util.safehasattr(parsers, 'rustlazyancestors'):
797 elif util.safehasattr(parsers, 'rustlazyancestors'):
798 lazyancestors = ancestor.rustlazyancestors
798 lazyancestors = ancestor.rustlazyancestors
799 arg = self.index
799 arg = self.index
800 else:
800 else:
801 lazyancestors = ancestor.lazyancestors
801 lazyancestors = ancestor.lazyancestors
802 arg = self._uncheckedparentrevs
802 arg = self._uncheckedparentrevs
803 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
803 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
804
804
805 def descendants(self, revs):
805 def descendants(self, revs):
806 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
806 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
807
807
808 def findcommonmissing(self, common=None, heads=None):
808 def findcommonmissing(self, common=None, heads=None):
809 """Return a tuple of the ancestors of common and the ancestors of heads
809 """Return a tuple of the ancestors of common and the ancestors of heads
810 that are not ancestors of common. In revset terminology, we return the
810 that are not ancestors of common. In revset terminology, we return the
811 tuple:
811 tuple:
812
812
813 ::common, (::heads) - (::common)
813 ::common, (::heads) - (::common)
814
814
815 The list is sorted by revision number, meaning it is
815 The list is sorted by revision number, meaning it is
816 topologically sorted.
816 topologically sorted.
817
817
818 'heads' and 'common' are both lists of node IDs. If heads is
818 'heads' and 'common' are both lists of node IDs. If heads is
819 not supplied, uses all of the revlog's heads. If common is not
819 not supplied, uses all of the revlog's heads. If common is not
820 supplied, uses nullid."""
820 supplied, uses nullid."""
821 if common is None:
821 if common is None:
822 common = [nullid]
822 common = [nullid]
823 if heads is None:
823 if heads is None:
824 heads = self.heads()
824 heads = self.heads()
825
825
826 common = [self.rev(n) for n in common]
826 common = [self.rev(n) for n in common]
827 heads = [self.rev(n) for n in heads]
827 heads = [self.rev(n) for n in heads]
828
828
829 # we want the ancestors, but inclusive
829 # we want the ancestors, but inclusive
830 class lazyset(object):
830 class lazyset(object):
831 def __init__(self, lazyvalues):
831 def __init__(self, lazyvalues):
832 self.addedvalues = set()
832 self.addedvalues = set()
833 self.lazyvalues = lazyvalues
833 self.lazyvalues = lazyvalues
834
834
835 def __contains__(self, value):
835 def __contains__(self, value):
836 return value in self.addedvalues or value in self.lazyvalues
836 return value in self.addedvalues or value in self.lazyvalues
837
837
838 def __iter__(self):
838 def __iter__(self):
839 added = self.addedvalues
839 added = self.addedvalues
840 for r in added:
840 for r in added:
841 yield r
841 yield r
842 for r in self.lazyvalues:
842 for r in self.lazyvalues:
843 if not r in added:
843 if not r in added:
844 yield r
844 yield r
845
845
846 def add(self, value):
846 def add(self, value):
847 self.addedvalues.add(value)
847 self.addedvalues.add(value)
848
848
849 def update(self, values):
849 def update(self, values):
850 self.addedvalues.update(values)
850 self.addedvalues.update(values)
851
851
852 has = lazyset(self.ancestors(common))
852 has = lazyset(self.ancestors(common))
853 has.add(nullrev)
853 has.add(nullrev)
854 has.update(common)
854 has.update(common)
855
855
856 # take all ancestors from heads that aren't in has
856 # take all ancestors from heads that aren't in has
857 missing = set()
857 missing = set()
858 visit = collections.deque(r for r in heads if r not in has)
858 visit = collections.deque(r for r in heads if r not in has)
859 while visit:
859 while visit:
860 r = visit.popleft()
860 r = visit.popleft()
861 if r in missing:
861 if r in missing:
862 continue
862 continue
863 else:
863 else:
864 missing.add(r)
864 missing.add(r)
865 for p in self.parentrevs(r):
865 for p in self.parentrevs(r):
866 if p not in has:
866 if p not in has:
867 visit.append(p)
867 visit.append(p)
868 missing = list(missing)
868 missing = list(missing)
869 missing.sort()
869 missing.sort()
870 return has, [self.node(miss) for miss in missing]
870 return has, [self.node(miss) for miss in missing]
871
871
872 def incrementalmissingrevs(self, common=None):
872 def incrementalmissingrevs(self, common=None):
873 """Return an object that can be used to incrementally compute the
873 """Return an object that can be used to incrementally compute the
874 revision numbers of the ancestors of arbitrary sets that are not
874 revision numbers of the ancestors of arbitrary sets that are not
875 ancestors of common. This is an ancestor.incrementalmissingancestors
875 ancestors of common. This is an ancestor.incrementalmissingancestors
876 object.
876 object.
877
877
878 'common' is a list of revision numbers. If common is not supplied, uses
878 'common' is a list of revision numbers. If common is not supplied, uses
879 nullrev.
879 nullrev.
880 """
880 """
881 if common is None:
881 if common is None:
882 common = [nullrev]
882 common = [nullrev]
883
883
884 if rustancestor is not None:
884 if rustancestor is not None:
885 return rustancestor.MissingAncestors(self.index, common)
885 return rustancestor.MissingAncestors(self.index, common)
886 return ancestor.incrementalmissingancestors(self.parentrevs, common)
886 return ancestor.incrementalmissingancestors(self.parentrevs, common)
887
887
888 def findmissingrevs(self, common=None, heads=None):
888 def findmissingrevs(self, common=None, heads=None):
889 """Return the revision numbers of the ancestors of heads that
889 """Return the revision numbers of the ancestors of heads that
890 are not ancestors of common.
890 are not ancestors of common.
891
891
892 More specifically, return a list of revision numbers corresponding to
892 More specifically, return a list of revision numbers corresponding to
893 nodes N such that every N satisfies the following constraints:
893 nodes N such that every N satisfies the following constraints:
894
894
895 1. N is an ancestor of some node in 'heads'
895 1. N is an ancestor of some node in 'heads'
896 2. N is not an ancestor of any node in 'common'
896 2. N is not an ancestor of any node in 'common'
897
897
898 The list is sorted by revision number, meaning it is
898 The list is sorted by revision number, meaning it is
899 topologically sorted.
899 topologically sorted.
900
900
901 'heads' and 'common' are both lists of revision numbers. If heads is
901 'heads' and 'common' are both lists of revision numbers. If heads is
902 not supplied, uses all of the revlog's heads. If common is not
902 not supplied, uses all of the revlog's heads. If common is not
903 supplied, uses nullid."""
903 supplied, uses nullid."""
904 if common is None:
904 if common is None:
905 common = [nullrev]
905 common = [nullrev]
906 if heads is None:
906 if heads is None:
907 heads = self.headrevs()
907 heads = self.headrevs()
908
908
909 inc = self.incrementalmissingrevs(common=common)
909 inc = self.incrementalmissingrevs(common=common)
910 return inc.missingancestors(heads)
910 return inc.missingancestors(heads)
911
911
912 def findmissing(self, common=None, heads=None):
912 def findmissing(self, common=None, heads=None):
913 """Return the ancestors of heads that are not ancestors of common.
913 """Return the ancestors of heads that are not ancestors of common.
914
914
915 More specifically, return a list of nodes N such that every N
915 More specifically, return a list of nodes N such that every N
916 satisfies the following constraints:
916 satisfies the following constraints:
917
917
918 1. N is an ancestor of some node in 'heads'
918 1. N is an ancestor of some node in 'heads'
919 2. N is not an ancestor of any node in 'common'
919 2. N is not an ancestor of any node in 'common'
920
920
921 The list is sorted by revision number, meaning it is
921 The list is sorted by revision number, meaning it is
922 topologically sorted.
922 topologically sorted.
923
923
924 'heads' and 'common' are both lists of node IDs. If heads is
924 'heads' and 'common' are both lists of node IDs. If heads is
925 not supplied, uses all of the revlog's heads. If common is not
925 not supplied, uses all of the revlog's heads. If common is not
926 supplied, uses nullid."""
926 supplied, uses nullid."""
927 if common is None:
927 if common is None:
928 common = [nullid]
928 common = [nullid]
929 if heads is None:
929 if heads is None:
930 heads = self.heads()
930 heads = self.heads()
931
931
932 common = [self.rev(n) for n in common]
932 common = [self.rev(n) for n in common]
933 heads = [self.rev(n) for n in heads]
933 heads = [self.rev(n) for n in heads]
934
934
935 inc = self.incrementalmissingrevs(common=common)
935 inc = self.incrementalmissingrevs(common=common)
936 return [self.node(r) for r in inc.missingancestors(heads)]
936 return [self.node(r) for r in inc.missingancestors(heads)]
937
937
938 def nodesbetween(self, roots=None, heads=None):
938 def nodesbetween(self, roots=None, heads=None):
939 """Return a topological path from 'roots' to 'heads'.
939 """Return a topological path from 'roots' to 'heads'.
940
940
941 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
941 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
942 topologically sorted list of all nodes N that satisfy both of
942 topologically sorted list of all nodes N that satisfy both of
943 these constraints:
943 these constraints:
944
944
945 1. N is a descendant of some node in 'roots'
945 1. N is a descendant of some node in 'roots'
946 2. N is an ancestor of some node in 'heads'
946 2. N is an ancestor of some node in 'heads'
947
947
948 Every node is considered to be both a descendant and an ancestor
948 Every node is considered to be both a descendant and an ancestor
949 of itself, so every reachable node in 'roots' and 'heads' will be
949 of itself, so every reachable node in 'roots' and 'heads' will be
950 included in 'nodes'.
950 included in 'nodes'.
951
951
952 'outroots' is the list of reachable nodes in 'roots', i.e., the
952 'outroots' is the list of reachable nodes in 'roots', i.e., the
953 subset of 'roots' that is returned in 'nodes'. Likewise,
953 subset of 'roots' that is returned in 'nodes'. Likewise,
954 'outheads' is the subset of 'heads' that is also in 'nodes'.
954 'outheads' is the subset of 'heads' that is also in 'nodes'.
955
955
956 'roots' and 'heads' are both lists of node IDs. If 'roots' is
956 'roots' and 'heads' are both lists of node IDs. If 'roots' is
957 unspecified, uses nullid as the only root. If 'heads' is
957 unspecified, uses nullid as the only root. If 'heads' is
958 unspecified, uses list of all of the revlog's heads."""
958 unspecified, uses list of all of the revlog's heads."""
959 nonodes = ([], [], [])
959 nonodes = ([], [], [])
960 if roots is not None:
960 if roots is not None:
961 roots = list(roots)
961 roots = list(roots)
962 if not roots:
962 if not roots:
963 return nonodes
963 return nonodes
964 lowestrev = min([self.rev(n) for n in roots])
964 lowestrev = min([self.rev(n) for n in roots])
965 else:
965 else:
966 roots = [nullid] # Everybody's a descendant of nullid
966 roots = [nullid] # Everybody's a descendant of nullid
967 lowestrev = nullrev
967 lowestrev = nullrev
968 if (lowestrev == nullrev) and (heads is None):
968 if (lowestrev == nullrev) and (heads is None):
969 # We want _all_ the nodes!
969 # We want _all_ the nodes!
970 return ([self.node(r) for r in self], [nullid], list(self.heads()))
970 return ([self.node(r) for r in self], [nullid], list(self.heads()))
971 if heads is None:
971 if heads is None:
972 # All nodes are ancestors, so the latest ancestor is the last
972 # All nodes are ancestors, so the latest ancestor is the last
973 # node.
973 # node.
974 highestrev = len(self) - 1
974 highestrev = len(self) - 1
975 # Set ancestors to None to signal that every node is an ancestor.
975 # Set ancestors to None to signal that every node is an ancestor.
976 ancestors = None
976 ancestors = None
977 # Set heads to an empty dictionary for later discovery of heads
977 # Set heads to an empty dictionary for later discovery of heads
978 heads = {}
978 heads = {}
979 else:
979 else:
980 heads = list(heads)
980 heads = list(heads)
981 if not heads:
981 if not heads:
982 return nonodes
982 return nonodes
983 ancestors = set()
983 ancestors = set()
984 # Turn heads into a dictionary so we can remove 'fake' heads.
984 # Turn heads into a dictionary so we can remove 'fake' heads.
985 # Also, later we will be using it to filter out the heads we can't
985 # Also, later we will be using it to filter out the heads we can't
986 # find from roots.
986 # find from roots.
987 heads = dict.fromkeys(heads, False)
987 heads = dict.fromkeys(heads, False)
988 # Start at the top and keep marking parents until we're done.
988 # Start at the top and keep marking parents until we're done.
989 nodestotag = set(heads)
989 nodestotag = set(heads)
990 # Remember where the top was so we can use it as a limit later.
990 # Remember where the top was so we can use it as a limit later.
991 highestrev = max([self.rev(n) for n in nodestotag])
991 highestrev = max([self.rev(n) for n in nodestotag])
992 while nodestotag:
992 while nodestotag:
993 # grab a node to tag
993 # grab a node to tag
994 n = nodestotag.pop()
994 n = nodestotag.pop()
995 # Never tag nullid
995 # Never tag nullid
996 if n == nullid:
996 if n == nullid:
997 continue
997 continue
998 # A node's revision number represents its place in a
998 # A node's revision number represents its place in a
999 # topologically sorted list of nodes.
999 # topologically sorted list of nodes.
1000 r = self.rev(n)
1000 r = self.rev(n)
1001 if r >= lowestrev:
1001 if r >= lowestrev:
1002 if n not in ancestors:
1002 if n not in ancestors:
1003 # If we are possibly a descendant of one of the roots
1003 # If we are possibly a descendant of one of the roots
1004 # and we haven't already been marked as an ancestor
1004 # and we haven't already been marked as an ancestor
1005 ancestors.add(n) # Mark as ancestor
1005 ancestors.add(n) # Mark as ancestor
1006 # Add non-nullid parents to list of nodes to tag.
1006 # Add non-nullid parents to list of nodes to tag.
1007 nodestotag.update([p for p in self.parents(n) if
1007 nodestotag.update([p for p in self.parents(n) if
1008 p != nullid])
1008 p != nullid])
1009 elif n in heads: # We've seen it before, is it a fake head?
1009 elif n in heads: # We've seen it before, is it a fake head?
1010 # So it is, real heads should not be the ancestors of
1010 # So it is, real heads should not be the ancestors of
1011 # any other heads.
1011 # any other heads.
1012 heads.pop(n)
1012 heads.pop(n)
1013 if not ancestors:
1013 if not ancestors:
1014 return nonodes
1014 return nonodes
1015 # Now that we have our set of ancestors, we want to remove any
1015 # Now that we have our set of ancestors, we want to remove any
1016 # roots that are not ancestors.
1016 # roots that are not ancestors.
1017
1017
1018 # If one of the roots was nullid, everything is included anyway.
1018 # If one of the roots was nullid, everything is included anyway.
1019 if lowestrev > nullrev:
1019 if lowestrev > nullrev:
1020 # But, since we weren't, let's recompute the lowest rev to not
1020 # But, since we weren't, let's recompute the lowest rev to not
1021 # include roots that aren't ancestors.
1021 # include roots that aren't ancestors.
1022
1022
1023 # Filter out roots that aren't ancestors of heads
1023 # Filter out roots that aren't ancestors of heads
1024 roots = [root for root in roots if root in ancestors]
1024 roots = [root for root in roots if root in ancestors]
1025 # Recompute the lowest revision
1025 # Recompute the lowest revision
1026 if roots:
1026 if roots:
1027 lowestrev = min([self.rev(root) for root in roots])
1027 lowestrev = min([self.rev(root) for root in roots])
1028 else:
1028 else:
1029 # No more roots? Return empty list
1029 # No more roots? Return empty list
1030 return nonodes
1030 return nonodes
1031 else:
1031 else:
1032 # We are descending from nullid, and don't need to care about
1032 # We are descending from nullid, and don't need to care about
1033 # any other roots.
1033 # any other roots.
1034 lowestrev = nullrev
1034 lowestrev = nullrev
1035 roots = [nullid]
1035 roots = [nullid]
1036 # Transform our roots list into a set.
1036 # Transform our roots list into a set.
1037 descendants = set(roots)
1037 descendants = set(roots)
1038 # Also, keep the original roots so we can filter out roots that aren't
1038 # Also, keep the original roots so we can filter out roots that aren't
1039 # 'real' roots (i.e. are descended from other roots).
1039 # 'real' roots (i.e. are descended from other roots).
1040 roots = descendants.copy()
1040 roots = descendants.copy()
1041 # Our topologically sorted list of output nodes.
1041 # Our topologically sorted list of output nodes.
1042 orderedout = []
1042 orderedout = []
1043 # Don't start at nullid since we don't want nullid in our output list,
1043 # Don't start at nullid since we don't want nullid in our output list,
1044 # and if nullid shows up in descendants, empty parents will look like
1044 # and if nullid shows up in descendants, empty parents will look like
1045 # they're descendants.
1045 # they're descendants.
1046 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1046 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1047 n = self.node(r)
1047 n = self.node(r)
1048 isdescendant = False
1048 isdescendant = False
1049 if lowestrev == nullrev: # Everybody is a descendant of nullid
1049 if lowestrev == nullrev: # Everybody is a descendant of nullid
1050 isdescendant = True
1050 isdescendant = True
1051 elif n in descendants:
1051 elif n in descendants:
1052 # n is already a descendant
1052 # n is already a descendant
1053 isdescendant = True
1053 isdescendant = True
1054 # This check only needs to be done here because all the roots
1054 # This check only needs to be done here because all the roots
1055 # will start being marked is descendants before the loop.
1055 # will start being marked is descendants before the loop.
1056 if n in roots:
1056 if n in roots:
1057 # If n was a root, check if it's a 'real' root.
1057 # If n was a root, check if it's a 'real' root.
1058 p = tuple(self.parents(n))
1058 p = tuple(self.parents(n))
1059 # If any of its parents are descendants, it's not a root.
1059 # If any of its parents are descendants, it's not a root.
1060 if (p[0] in descendants) or (p[1] in descendants):
1060 if (p[0] in descendants) or (p[1] in descendants):
1061 roots.remove(n)
1061 roots.remove(n)
1062 else:
1062 else:
1063 p = tuple(self.parents(n))
1063 p = tuple(self.parents(n))
1064 # A node is a descendant if either of its parents are
1064 # A node is a descendant if either of its parents are
1065 # descendants. (We seeded the dependents list with the roots
1065 # descendants. (We seeded the dependents list with the roots
1066 # up there, remember?)
1066 # up there, remember?)
1067 if (p[0] in descendants) or (p[1] in descendants):
1067 if (p[0] in descendants) or (p[1] in descendants):
1068 descendants.add(n)
1068 descendants.add(n)
1069 isdescendant = True
1069 isdescendant = True
1070 if isdescendant and ((ancestors is None) or (n in ancestors)):
1070 if isdescendant and ((ancestors is None) or (n in ancestors)):
1071 # Only include nodes that are both descendants and ancestors.
1071 # Only include nodes that are both descendants and ancestors.
1072 orderedout.append(n)
1072 orderedout.append(n)
1073 if (ancestors is not None) and (n in heads):
1073 if (ancestors is not None) and (n in heads):
1074 # We're trying to figure out which heads are reachable
1074 # We're trying to figure out which heads are reachable
1075 # from roots.
1075 # from roots.
1076 # Mark this head as having been reached
1076 # Mark this head as having been reached
1077 heads[n] = True
1077 heads[n] = True
1078 elif ancestors is None:
1078 elif ancestors is None:
1079 # Otherwise, we're trying to discover the heads.
1079 # Otherwise, we're trying to discover the heads.
1080 # Assume this is a head because if it isn't, the next step
1080 # Assume this is a head because if it isn't, the next step
1081 # will eventually remove it.
1081 # will eventually remove it.
1082 heads[n] = True
1082 heads[n] = True
1083 # But, obviously its parents aren't.
1083 # But, obviously its parents aren't.
1084 for p in self.parents(n):
1084 for p in self.parents(n):
1085 heads.pop(p, None)
1085 heads.pop(p, None)
1086 heads = [head for head, flag in heads.iteritems() if flag]
1086 heads = [head for head, flag in heads.iteritems() if flag]
1087 roots = list(roots)
1087 roots = list(roots)
1088 assert orderedout
1088 assert orderedout
1089 assert roots
1089 assert roots
1090 assert heads
1090 assert heads
1091 return (orderedout, roots, heads)
1091 return (orderedout, roots, heads)
1092
1092
1093 def headrevs(self, revs=None):
1093 def headrevs(self, revs=None):
1094 if revs is None:
1094 if revs is None:
1095 try:
1095 try:
1096 return self.index.headrevs()
1096 return self.index.headrevs()
1097 except AttributeError:
1097 except AttributeError:
1098 return self._headrevs()
1098 return self._headrevs()
1099 if rustdagop is not None:
1099 if rustdagop is not None:
1100 return rustdagop.headrevs(self.index, revs)
1100 return rustdagop.headrevs(self.index, revs)
1101 return dagop.headrevs(revs, self._uncheckedparentrevs)
1101 return dagop.headrevs(revs, self._uncheckedparentrevs)
1102
1102
1103 def computephases(self, roots):
1103 def computephases(self, roots):
1104 return self.index.computephasesmapsets(roots)
1104 return self.index.computephasesmapsets(roots)
1105
1105
1106 def _headrevs(self):
1106 def _headrevs(self):
1107 count = len(self)
1107 count = len(self)
1108 if not count:
1108 if not count:
1109 return [nullrev]
1109 return [nullrev]
1110 # we won't iter over filtered rev so nobody is a head at start
1110 # we won't iter over filtered rev so nobody is a head at start
1111 ishead = [0] * (count + 1)
1111 ishead = [0] * (count + 1)
1112 index = self.index
1112 index = self.index
1113 for r in self:
1113 for r in self:
1114 ishead[r] = 1 # I may be an head
1114 ishead[r] = 1 # I may be an head
1115 e = index[r]
1115 e = index[r]
1116 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1116 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1117 return [r for r, val in enumerate(ishead) if val]
1117 return [r for r, val in enumerate(ishead) if val]
1118
1118
1119 def heads(self, start=None, stop=None):
1119 def heads(self, start=None, stop=None):
1120 """return the list of all nodes that have no children
1120 """return the list of all nodes that have no children
1121
1121
1122 if start is specified, only heads that are descendants of
1122 if start is specified, only heads that are descendants of
1123 start will be returned
1123 start will be returned
1124 if stop is specified, it will consider all the revs from stop
1124 if stop is specified, it will consider all the revs from stop
1125 as if they had no children
1125 as if they had no children
1126 """
1126 """
1127 if start is None and stop is None:
1127 if start is None and stop is None:
1128 if not len(self):
1128 if not len(self):
1129 return [nullid]
1129 return [nullid]
1130 return [self.node(r) for r in self.headrevs()]
1130 return [self.node(r) for r in self.headrevs()]
1131
1131
1132 if start is None:
1132 if start is None:
1133 start = nullrev
1133 start = nullrev
1134 else:
1134 else:
1135 start = self.rev(start)
1135 start = self.rev(start)
1136
1136
1137 stoprevs = set(self.rev(n) for n in stop or [])
1137 stoprevs = set(self.rev(n) for n in stop or [])
1138
1138
1139 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1139 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1140 stoprevs=stoprevs)
1140 stoprevs=stoprevs)
1141
1141
1142 return [self.node(rev) for rev in revs]
1142 return [self.node(rev) for rev in revs]
1143
1143
1144 def children(self, node):
1144 def children(self, node):
1145 """find the children of a given node"""
1145 """find the children of a given node"""
1146 c = []
1146 c = []
1147 p = self.rev(node)
1147 p = self.rev(node)
1148 for r in self.revs(start=p + 1):
1148 for r in self.revs(start=p + 1):
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1150 if prevs:
1150 if prevs:
1151 for pr in prevs:
1151 for pr in prevs:
1152 if pr == p:
1152 if pr == p:
1153 c.append(self.node(r))
1153 c.append(self.node(r))
1154 elif p == nullrev:
1154 elif p == nullrev:
1155 c.append(self.node(r))
1155 c.append(self.node(r))
1156 return c
1156 return c
1157
1157
1158 def commonancestorsheads(self, a, b):
1158 def commonancestorsheads(self, a, b):
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1160 a, b = self.rev(a), self.rev(b)
1160 a, b = self.rev(a), self.rev(b)
1161 ancs = self._commonancestorsheads(a, b)
1161 ancs = self._commonancestorsheads(a, b)
1162 return pycompat.maplist(self.node, ancs)
1162 return pycompat.maplist(self.node, ancs)
1163
1163
1164 def _commonancestorsheads(self, *revs):
1164 def _commonancestorsheads(self, *revs):
1165 """calculate all the heads of the common ancestors of revs"""
1165 """calculate all the heads of the common ancestors of revs"""
1166 try:
1166 try:
1167 ancs = self.index.commonancestorsheads(*revs)
1167 ancs = self.index.commonancestorsheads(*revs)
1168 except (AttributeError, OverflowError): # C implementation failed
1168 except (AttributeError, OverflowError): # C implementation failed
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1170 return ancs
1170 return ancs
1171
1171
1172 def isancestor(self, a, b):
1172 def isancestor(self, a, b):
1173 """return True if node a is an ancestor of node b
1173 """return True if node a is an ancestor of node b
1174
1174
1175 A revision is considered an ancestor of itself."""
1175 A revision is considered an ancestor of itself."""
1176 a, b = self.rev(a), self.rev(b)
1176 a, b = self.rev(a), self.rev(b)
1177 return self.isancestorrev(a, b)
1177 return self.isancestorrev(a, b)
1178
1178
1179 def isancestorrev(self, a, b):
1179 def isancestorrev(self, a, b):
1180 """return True if revision a is an ancestor of revision b
1180 """return True if revision a is an ancestor of revision b
1181
1181
1182 A revision is considered an ancestor of itself.
1182 A revision is considered an ancestor of itself.
1183
1183
1184 The implementation of this is trivial but the use of
1184 The implementation of this is trivial but the use of
1185 reachableroots is not."""
1185 reachableroots is not."""
1186 if a == nullrev:
1186 if a == nullrev:
1187 return True
1187 return True
1188 elif a == b:
1188 elif a == b:
1189 return True
1189 return True
1190 elif a > b:
1190 elif a > b:
1191 return False
1191 return False
1192 return bool(self.reachableroots(a, [b], [a], includepath=False))
1192 return bool(self.reachableroots(a, [b], [a], includepath=False))
1193
1193
1194 def reachableroots(self, minroot, heads, roots, includepath=False):
1194 def reachableroots(self, minroot, heads, roots, includepath=False):
1195 """return (heads(::<roots> and <roots>::<heads>))
1195 """return (heads(::<roots> and <roots>::<heads>))
1196
1196
1197 If includepath is True, return (<roots>::<heads>)."""
1197 If includepath is True, return (<roots>::<heads>)."""
1198 try:
1198 try:
1199 return self.index.reachableroots2(minroot, heads, roots,
1199 return self.index.reachableroots2(minroot, heads, roots,
1200 includepath)
1200 includepath)
1201 except AttributeError:
1201 except AttributeError:
1202 return dagop._reachablerootspure(self.parentrevs,
1202 return dagop._reachablerootspure(self.parentrevs,
1203 minroot, roots, heads, includepath)
1203 minroot, roots, heads, includepath)
1204
1204
1205 def ancestor(self, a, b):
1205 def ancestor(self, a, b):
1206 """calculate the "best" common ancestor of nodes a and b"""
1206 """calculate the "best" common ancestor of nodes a and b"""
1207
1207
1208 a, b = self.rev(a), self.rev(b)
1208 a, b = self.rev(a), self.rev(b)
1209 try:
1209 try:
1210 ancs = self.index.ancestors(a, b)
1210 ancs = self.index.ancestors(a, b)
1211 except (AttributeError, OverflowError):
1211 except (AttributeError, OverflowError):
1212 ancs = ancestor.ancestors(self.parentrevs, a, b)
1212 ancs = ancestor.ancestors(self.parentrevs, a, b)
1213 if ancs:
1213 if ancs:
1214 # choose a consistent winner when there's a tie
1214 # choose a consistent winner when there's a tie
1215 return min(map(self.node, ancs))
1215 return min(map(self.node, ancs))
1216 return nullid
1216 return nullid
1217
1217
1218 def _match(self, id):
1218 def _match(self, id):
1219 if isinstance(id, int):
1219 if isinstance(id, int):
1220 # rev
1220 # rev
1221 return self.node(id)
1221 return self.node(id)
1222 if len(id) == 20:
1222 if len(id) == 20:
1223 # possibly a binary node
1223 # possibly a binary node
1224 # odds of a binary node being all hex in ASCII are 1 in 10**25
1224 # odds of a binary node being all hex in ASCII are 1 in 10**25
1225 try:
1225 try:
1226 node = id
1226 node = id
1227 self.rev(node) # quick search the index
1227 self.rev(node) # quick search the index
1228 return node
1228 return node
1229 except error.LookupError:
1229 except error.LookupError:
1230 pass # may be partial hex id
1230 pass # may be partial hex id
1231 try:
1231 try:
1232 # str(rev)
1232 # str(rev)
1233 rev = int(id)
1233 rev = int(id)
1234 if "%d" % rev != id:
1234 if "%d" % rev != id:
1235 raise ValueError
1235 raise ValueError
1236 if rev < 0:
1236 if rev < 0:
1237 rev = len(self) + rev
1237 rev = len(self) + rev
1238 if rev < 0 or rev >= len(self):
1238 if rev < 0 or rev >= len(self):
1239 raise ValueError
1239 raise ValueError
1240 return self.node(rev)
1240 return self.node(rev)
1241 except (ValueError, OverflowError):
1241 except (ValueError, OverflowError):
1242 pass
1242 pass
1243 if len(id) == 40:
1243 if len(id) == 40:
1244 try:
1244 try:
1245 # a full hex nodeid?
1245 # a full hex nodeid?
1246 node = bin(id)
1246 node = bin(id)
1247 self.rev(node)
1247 self.rev(node)
1248 return node
1248 return node
1249 except (TypeError, error.LookupError):
1249 except (TypeError, error.LookupError):
1250 pass
1250 pass
1251
1251
1252 def _partialmatch(self, id):
1252 def _partialmatch(self, id):
1253 # we don't care wdirfilenodeids as they should be always full hash
1253 # we don't care wdirfilenodeids as they should be always full hash
1254 maybewdir = wdirhex.startswith(id)
1254 maybewdir = wdirhex.startswith(id)
1255 try:
1255 try:
1256 partial = self.index.partialmatch(id)
1256 partial = self.index.partialmatch(id)
1257 if partial and self.hasnode(partial):
1257 if partial and self.hasnode(partial):
1258 if maybewdir:
1258 if maybewdir:
1259 # single 'ff...' match in radix tree, ambiguous with wdir
1259 # single 'ff...' match in radix tree, ambiguous with wdir
1260 raise error.RevlogError
1260 raise error.RevlogError
1261 return partial
1261 return partial
1262 if maybewdir:
1262 if maybewdir:
1263 # no 'ff...' match in radix tree, wdir identified
1263 # no 'ff...' match in radix tree, wdir identified
1264 raise error.WdirUnsupported
1264 raise error.WdirUnsupported
1265 return None
1265 return None
1266 except error.RevlogError:
1266 except error.RevlogError:
1267 # parsers.c radix tree lookup gave multiple matches
1267 # parsers.c radix tree lookup gave multiple matches
1268 # fast path: for unfiltered changelog, radix tree is accurate
1268 # fast path: for unfiltered changelog, radix tree is accurate
1269 if not getattr(self, 'filteredrevs', None):
1269 if not getattr(self, 'filteredrevs', None):
1270 raise error.AmbiguousPrefixLookupError(
1270 raise error.AmbiguousPrefixLookupError(
1271 id, self.indexfile, _('ambiguous identifier'))
1271 id, self.indexfile, _('ambiguous identifier'))
1272 # fall through to slow path that filters hidden revisions
1272 # fall through to slow path that filters hidden revisions
1273 except (AttributeError, ValueError):
1273 except (AttributeError, ValueError):
1274 # we are pure python, or key was too short to search radix tree
1274 # we are pure python, or key was too short to search radix tree
1275 pass
1275 pass
1276
1276
1277 if id in self._pcache:
1277 if id in self._pcache:
1278 return self._pcache[id]
1278 return self._pcache[id]
1279
1279
1280 if len(id) <= 40:
1280 if len(id) <= 40:
1281 try:
1281 try:
1282 # hex(node)[:...]
1282 # hex(node)[:...]
1283 l = len(id) // 2 # grab an even number of digits
1283 l = len(id) // 2 # grab an even number of digits
1284 prefix = bin(id[:l * 2])
1284 prefix = bin(id[:l * 2])
1285 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1285 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1286 nl = [n for n in nl if hex(n).startswith(id) and
1286 nl = [n for n in nl if hex(n).startswith(id) and
1287 self.hasnode(n)]
1287 self.hasnode(n)]
1288 if nullhex.startswith(id):
1288 if nullhex.startswith(id):
1289 nl.append(nullid)
1289 nl.append(nullid)
1290 if len(nl) > 0:
1290 if len(nl) > 0:
1291 if len(nl) == 1 and not maybewdir:
1291 if len(nl) == 1 and not maybewdir:
1292 self._pcache[id] = nl[0]
1292 self._pcache[id] = nl[0]
1293 return nl[0]
1293 return nl[0]
1294 raise error.AmbiguousPrefixLookupError(
1294 raise error.AmbiguousPrefixLookupError(
1295 id, self.indexfile, _('ambiguous identifier'))
1295 id, self.indexfile, _('ambiguous identifier'))
1296 if maybewdir:
1296 if maybewdir:
1297 raise error.WdirUnsupported
1297 raise error.WdirUnsupported
1298 return None
1298 return None
1299 except TypeError:
1299 except TypeError:
1300 pass
1300 pass
1301
1301
1302 def lookup(self, id):
1302 def lookup(self, id):
1303 """locate a node based on:
1303 """locate a node based on:
1304 - revision number or str(revision number)
1304 - revision number or str(revision number)
1305 - nodeid or subset of hex nodeid
1305 - nodeid or subset of hex nodeid
1306 """
1306 """
1307 n = self._match(id)
1307 n = self._match(id)
1308 if n is not None:
1308 if n is not None:
1309 return n
1309 return n
1310 n = self._partialmatch(id)
1310 n = self._partialmatch(id)
1311 if n:
1311 if n:
1312 return n
1312 return n
1313
1313
1314 raise error.LookupError(id, self.indexfile, _('no match found'))
1314 raise error.LookupError(id, self.indexfile, _('no match found'))
1315
1315
1316 def shortest(self, node, minlength=1):
1316 def shortest(self, node, minlength=1):
1317 """Find the shortest unambiguous prefix that matches node."""
1317 """Find the shortest unambiguous prefix that matches node."""
1318 def isvalid(prefix):
1318 def isvalid(prefix):
1319 try:
1319 try:
1320 matchednode = self._partialmatch(prefix)
1320 matchednode = self._partialmatch(prefix)
1321 except error.AmbiguousPrefixLookupError:
1321 except error.AmbiguousPrefixLookupError:
1322 return False
1322 return False
1323 except error.WdirUnsupported:
1323 except error.WdirUnsupported:
1324 # single 'ff...' match
1324 # single 'ff...' match
1325 return True
1325 return True
1326 if matchednode is None:
1326 if matchednode is None:
1327 raise error.LookupError(node, self.indexfile, _('no node'))
1327 raise error.LookupError(node, self.indexfile, _('no node'))
1328 return True
1328 return True
1329
1329
1330 def maybewdir(prefix):
1330 def maybewdir(prefix):
1331 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1331 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1332
1332
1333 hexnode = hex(node)
1333 hexnode = hex(node)
1334
1334
1335 def disambiguate(hexnode, minlength):
1335 def disambiguate(hexnode, minlength):
1336 """Disambiguate against wdirid."""
1336 """Disambiguate against wdirid."""
1337 for length in range(minlength, 41):
1337 for length in range(minlength, 41):
1338 prefix = hexnode[:length]
1338 prefix = hexnode[:length]
1339 if not maybewdir(prefix):
1339 if not maybewdir(prefix):
1340 return prefix
1340 return prefix
1341
1341
1342 if not getattr(self, 'filteredrevs', None):
1342 if not getattr(self, 'filteredrevs', None):
1343 try:
1343 try:
1344 length = max(self.index.shortest(node), minlength)
1344 length = max(self.index.shortest(node), minlength)
1345 return disambiguate(hexnode, length)
1345 return disambiguate(hexnode, length)
1346 except error.RevlogError:
1346 except error.RevlogError:
1347 if node != wdirid:
1347 if node != wdirid:
1348 raise error.LookupError(node, self.indexfile, _('no node'))
1348 raise error.LookupError(node, self.indexfile, _('no node'))
1349 except AttributeError:
1349 except AttributeError:
1350 # Fall through to pure code
1350 # Fall through to pure code
1351 pass
1351 pass
1352
1352
1353 if node == wdirid:
1353 if node == wdirid:
1354 for length in range(minlength, 41):
1354 for length in range(minlength, 41):
1355 prefix = hexnode[:length]
1355 prefix = hexnode[:length]
1356 if isvalid(prefix):
1356 if isvalid(prefix):
1357 return prefix
1357 return prefix
1358
1358
1359 for length in range(minlength, 41):
1359 for length in range(minlength, 41):
1360 prefix = hexnode[:length]
1360 prefix = hexnode[:length]
1361 if isvalid(prefix):
1361 if isvalid(prefix):
1362 return disambiguate(hexnode, length)
1362 return disambiguate(hexnode, length)
1363
1363
1364 def cmp(self, node, text):
1364 def cmp(self, node, text):
1365 """compare text with a given file revision
1365 """compare text with a given file revision
1366
1366
1367 returns True if text is different than what is stored.
1367 returns True if text is different than what is stored.
1368 """
1368 """
1369 p1, p2 = self.parents(node)
1369 p1, p2 = self.parents(node)
1370 return storageutil.hashrevisionsha1(text, p1, p2) != node
1370 return storageutil.hashrevisionsha1(text, p1, p2) != node
1371
1371
1372 def _cachesegment(self, offset, data):
1372 def _cachesegment(self, offset, data):
1373 """Add a segment to the revlog cache.
1373 """Add a segment to the revlog cache.
1374
1374
1375 Accepts an absolute offset and the data that is at that location.
1375 Accepts an absolute offset and the data that is at that location.
1376 """
1376 """
1377 o, d = self._chunkcache
1377 o, d = self._chunkcache
1378 # try to add to existing cache
1378 # try to add to existing cache
1379 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1379 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1380 self._chunkcache = o, d + data
1380 self._chunkcache = o, d + data
1381 else:
1381 else:
1382 self._chunkcache = offset, data
1382 self._chunkcache = offset, data
1383
1383
1384 def _readsegment(self, offset, length, df=None):
1384 def _readsegment(self, offset, length, df=None):
1385 """Load a segment of raw data from the revlog.
1385 """Load a segment of raw data from the revlog.
1386
1386
1387 Accepts an absolute offset, length to read, and an optional existing
1387 Accepts an absolute offset, length to read, and an optional existing
1388 file handle to read from.
1388 file handle to read from.
1389
1389
1390 If an existing file handle is passed, it will be seeked and the
1390 If an existing file handle is passed, it will be seeked and the
1391 original seek position will NOT be restored.
1391 original seek position will NOT be restored.
1392
1392
1393 Returns a str or buffer of raw byte data.
1393 Returns a str or buffer of raw byte data.
1394
1394
1395 Raises if the requested number of bytes could not be read.
1395 Raises if the requested number of bytes could not be read.
1396 """
1396 """
1397 # Cache data both forward and backward around the requested
1397 # Cache data both forward and backward around the requested
1398 # data, in a fixed size window. This helps speed up operations
1398 # data, in a fixed size window. This helps speed up operations
1399 # involving reading the revlog backwards.
1399 # involving reading the revlog backwards.
1400 cachesize = self._chunkcachesize
1400 cachesize = self._chunkcachesize
1401 realoffset = offset & ~(cachesize - 1)
1401 realoffset = offset & ~(cachesize - 1)
1402 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1402 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1403 - realoffset)
1403 - realoffset)
1404 with self._datareadfp(df) as df:
1404 with self._datareadfp(df) as df:
1405 df.seek(realoffset)
1405 df.seek(realoffset)
1406 d = df.read(reallength)
1406 d = df.read(reallength)
1407
1407
1408 self._cachesegment(realoffset, d)
1408 self._cachesegment(realoffset, d)
1409 if offset != realoffset or reallength != length:
1409 if offset != realoffset or reallength != length:
1410 startoffset = offset - realoffset
1410 startoffset = offset - realoffset
1411 if len(d) - startoffset < length:
1411 if len(d) - startoffset < length:
1412 raise error.RevlogError(
1412 raise error.RevlogError(
1413 _('partial read of revlog %s; expected %d bytes from '
1413 _('partial read of revlog %s; expected %d bytes from '
1414 'offset %d, got %d') %
1414 'offset %d, got %d') %
1415 (self.indexfile if self._inline else self.datafile,
1415 (self.indexfile if self._inline else self.datafile,
1416 length, realoffset, len(d) - startoffset))
1416 length, realoffset, len(d) - startoffset))
1417
1417
1418 return util.buffer(d, startoffset, length)
1418 return util.buffer(d, startoffset, length)
1419
1419
1420 if len(d) < length:
1420 if len(d) < length:
1421 raise error.RevlogError(
1421 raise error.RevlogError(
1422 _('partial read of revlog %s; expected %d bytes from offset '
1422 _('partial read of revlog %s; expected %d bytes from offset '
1423 '%d, got %d') %
1423 '%d, got %d') %
1424 (self.indexfile if self._inline else self.datafile,
1424 (self.indexfile if self._inline else self.datafile,
1425 length, offset, len(d)))
1425 length, offset, len(d)))
1426
1426
1427 return d
1427 return d
1428
1428
1429 def _getsegment(self, offset, length, df=None):
1429 def _getsegment(self, offset, length, df=None):
1430 """Obtain a segment of raw data from the revlog.
1430 """Obtain a segment of raw data from the revlog.
1431
1431
1432 Accepts an absolute offset, length of bytes to obtain, and an
1432 Accepts an absolute offset, length of bytes to obtain, and an
1433 optional file handle to the already-opened revlog. If the file
1433 optional file handle to the already-opened revlog. If the file
1434 handle is used, it's original seek position will not be preserved.
1434 handle is used, it's original seek position will not be preserved.
1435
1435
1436 Requests for data may be returned from a cache.
1436 Requests for data may be returned from a cache.
1437
1437
1438 Returns a str or a buffer instance of raw byte data.
1438 Returns a str or a buffer instance of raw byte data.
1439 """
1439 """
1440 o, d = self._chunkcache
1440 o, d = self._chunkcache
1441 l = len(d)
1441 l = len(d)
1442
1442
1443 # is it in the cache?
1443 # is it in the cache?
1444 cachestart = offset - o
1444 cachestart = offset - o
1445 cacheend = cachestart + length
1445 cacheend = cachestart + length
1446 if cachestart >= 0 and cacheend <= l:
1446 if cachestart >= 0 and cacheend <= l:
1447 if cachestart == 0 and cacheend == l:
1447 if cachestart == 0 and cacheend == l:
1448 return d # avoid a copy
1448 return d # avoid a copy
1449 return util.buffer(d, cachestart, cacheend - cachestart)
1449 return util.buffer(d, cachestart, cacheend - cachestart)
1450
1450
1451 return self._readsegment(offset, length, df=df)
1451 return self._readsegment(offset, length, df=df)
1452
1452
1453 def _getsegmentforrevs(self, startrev, endrev, df=None):
1453 def _getsegmentforrevs(self, startrev, endrev, df=None):
1454 """Obtain a segment of raw data corresponding to a range of revisions.
1454 """Obtain a segment of raw data corresponding to a range of revisions.
1455
1455
1456 Accepts the start and end revisions and an optional already-open
1456 Accepts the start and end revisions and an optional already-open
1457 file handle to be used for reading. If the file handle is read, its
1457 file handle to be used for reading. If the file handle is read, its
1458 seek position will not be preserved.
1458 seek position will not be preserved.
1459
1459
1460 Requests for data may be satisfied by a cache.
1460 Requests for data may be satisfied by a cache.
1461
1461
1462 Returns a 2-tuple of (offset, data) for the requested range of
1462 Returns a 2-tuple of (offset, data) for the requested range of
1463 revisions. Offset is the integer offset from the beginning of the
1463 revisions. Offset is the integer offset from the beginning of the
1464 revlog and data is a str or buffer of the raw byte data.
1464 revlog and data is a str or buffer of the raw byte data.
1465
1465
1466 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1466 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1467 to determine where each revision's data begins and ends.
1467 to determine where each revision's data begins and ends.
1468 """
1468 """
1469 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1469 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1470 # (functions are expensive).
1470 # (functions are expensive).
1471 index = self.index
1471 index = self.index
1472 istart = index[startrev]
1472 istart = index[startrev]
1473 start = int(istart[0] >> 16)
1473 start = int(istart[0] >> 16)
1474 if startrev == endrev:
1474 if startrev == endrev:
1475 end = start + istart[1]
1475 end = start + istart[1]
1476 else:
1476 else:
1477 iend = index[endrev]
1477 iend = index[endrev]
1478 end = int(iend[0] >> 16) + iend[1]
1478 end = int(iend[0] >> 16) + iend[1]
1479
1479
1480 if self._inline:
1480 if self._inline:
1481 start += (startrev + 1) * self._io.size
1481 start += (startrev + 1) * self._io.size
1482 end += (endrev + 1) * self._io.size
1482 end += (endrev + 1) * self._io.size
1483 length = end - start
1483 length = end - start
1484
1484
1485 return start, self._getsegment(start, length, df=df)
1485 return start, self._getsegment(start, length, df=df)
1486
1486
1487 def _chunk(self, rev, df=None):
1487 def _chunk(self, rev, df=None):
1488 """Obtain a single decompressed chunk for a revision.
1488 """Obtain a single decompressed chunk for a revision.
1489
1489
1490 Accepts an integer revision and an optional already-open file handle
1490 Accepts an integer revision and an optional already-open file handle
1491 to be used for reading. If used, the seek position of the file will not
1491 to be used for reading. If used, the seek position of the file will not
1492 be preserved.
1492 be preserved.
1493
1493
1494 Returns a str holding uncompressed data for the requested revision.
1494 Returns a str holding uncompressed data for the requested revision.
1495 """
1495 """
1496 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1496 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1497
1497
1498 def _chunks(self, revs, df=None, targetsize=None):
1498 def _chunks(self, revs, df=None, targetsize=None):
1499 """Obtain decompressed chunks for the specified revisions.
1499 """Obtain decompressed chunks for the specified revisions.
1500
1500
1501 Accepts an iterable of numeric revisions that are assumed to be in
1501 Accepts an iterable of numeric revisions that are assumed to be in
1502 ascending order. Also accepts an optional already-open file handle
1502 ascending order. Also accepts an optional already-open file handle
1503 to be used for reading. If used, the seek position of the file will
1503 to be used for reading. If used, the seek position of the file will
1504 not be preserved.
1504 not be preserved.
1505
1505
1506 This function is similar to calling ``self._chunk()`` multiple times,
1506 This function is similar to calling ``self._chunk()`` multiple times,
1507 but is faster.
1507 but is faster.
1508
1508
1509 Returns a list with decompressed data for each requested revision.
1509 Returns a list with decompressed data for each requested revision.
1510 """
1510 """
1511 if not revs:
1511 if not revs:
1512 return []
1512 return []
1513 start = self.start
1513 start = self.start
1514 length = self.length
1514 length = self.length
1515 inline = self._inline
1515 inline = self._inline
1516 iosize = self._io.size
1516 iosize = self._io.size
1517 buffer = util.buffer
1517 buffer = util.buffer
1518
1518
1519 l = []
1519 l = []
1520 ladd = l.append
1520 ladd = l.append
1521
1521
1522 if not self._withsparseread:
1522 if not self._withsparseread:
1523 slicedchunks = (revs,)
1523 slicedchunks = (revs,)
1524 else:
1524 else:
1525 slicedchunks = deltautil.slicechunk(self, revs,
1525 slicedchunks = deltautil.slicechunk(self, revs,
1526 targetsize=targetsize)
1526 targetsize=targetsize)
1527
1527
1528 for revschunk in slicedchunks:
1528 for revschunk in slicedchunks:
1529 firstrev = revschunk[0]
1529 firstrev = revschunk[0]
1530 # Skip trailing revisions with empty diff
1530 # Skip trailing revisions with empty diff
1531 for lastrev in revschunk[::-1]:
1531 for lastrev in revschunk[::-1]:
1532 if length(lastrev) != 0:
1532 if length(lastrev) != 0:
1533 break
1533 break
1534
1534
1535 try:
1535 try:
1536 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1536 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1537 except OverflowError:
1537 except OverflowError:
1538 # issue4215 - we can't cache a run of chunks greater than
1538 # issue4215 - we can't cache a run of chunks greater than
1539 # 2G on Windows
1539 # 2G on Windows
1540 return [self._chunk(rev, df=df) for rev in revschunk]
1540 return [self._chunk(rev, df=df) for rev in revschunk]
1541
1541
1542 decomp = self.decompress
1542 decomp = self.decompress
1543 for rev in revschunk:
1543 for rev in revschunk:
1544 chunkstart = start(rev)
1544 chunkstart = start(rev)
1545 if inline:
1545 if inline:
1546 chunkstart += (rev + 1) * iosize
1546 chunkstart += (rev + 1) * iosize
1547 chunklength = length(rev)
1547 chunklength = length(rev)
1548 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1548 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1549
1549
1550 return l
1550 return l
1551
1551
1552 def _chunkclear(self):
1552 def _chunkclear(self):
1553 """Clear the raw chunk cache."""
1553 """Clear the raw chunk cache."""
1554 self._chunkcache = (0, '')
1554 self._chunkcache = (0, '')
1555
1555
1556 def deltaparent(self, rev):
1556 def deltaparent(self, rev):
1557 """return deltaparent of the given revision"""
1557 """return deltaparent of the given revision"""
1558 base = self.index[rev][3]
1558 base = self.index[rev][3]
1559 if base == rev:
1559 if base == rev:
1560 return nullrev
1560 return nullrev
1561 elif self._generaldelta:
1561 elif self._generaldelta:
1562 return base
1562 return base
1563 else:
1563 else:
1564 return rev - 1
1564 return rev - 1
1565
1565
1566 def issnapshot(self, rev):
1566 def issnapshot(self, rev):
1567 """tells whether rev is a snapshot
1567 """tells whether rev is a snapshot
1568 """
1568 """
1569 if not self._sparserevlog:
1569 if not self._sparserevlog:
1570 return self.deltaparent(rev) == nullrev
1570 return self.deltaparent(rev) == nullrev
1571 elif util.safehasattr(self.index, 'issnapshot'):
1571 elif util.safehasattr(self.index, 'issnapshot'):
1572 # directly assign the method to cache the testing and access
1572 # directly assign the method to cache the testing and access
1573 self.issnapshot = self.index.issnapshot
1573 self.issnapshot = self.index.issnapshot
1574 return self.issnapshot(rev)
1574 return self.issnapshot(rev)
1575 if rev == nullrev:
1575 if rev == nullrev:
1576 return True
1576 return True
1577 entry = self.index[rev]
1577 entry = self.index[rev]
1578 base = entry[3]
1578 base = entry[3]
1579 if base == rev:
1579 if base == rev:
1580 return True
1580 return True
1581 if base == nullrev:
1581 if base == nullrev:
1582 return True
1582 return True
1583 p1 = entry[5]
1583 p1 = entry[5]
1584 p2 = entry[6]
1584 p2 = entry[6]
1585 if base == p1 or base == p2:
1585 if base == p1 or base == p2:
1586 return False
1586 return False
1587 return self.issnapshot(base)
1587 return self.issnapshot(base)
1588
1588
1589 def snapshotdepth(self, rev):
1589 def snapshotdepth(self, rev):
1590 """number of snapshot in the chain before this one"""
1590 """number of snapshot in the chain before this one"""
1591 if not self.issnapshot(rev):
1591 if not self.issnapshot(rev):
1592 raise error.ProgrammingError('revision %d not a snapshot')
1592 raise error.ProgrammingError('revision %d not a snapshot')
1593 return len(self._deltachain(rev)[0]) - 1
1593 return len(self._deltachain(rev)[0]) - 1
1594
1594
1595 def revdiff(self, rev1, rev2):
1595 def revdiff(self, rev1, rev2):
1596 """return or calculate a delta between two revisions
1596 """return or calculate a delta between two revisions
1597
1597
1598 The delta calculated is in binary form and is intended to be written to
1598 The delta calculated is in binary form and is intended to be written to
1599 revlog data directly. So this function needs raw revision data.
1599 revlog data directly. So this function needs raw revision data.
1600 """
1600 """
1601 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1601 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1602 return bytes(self._chunk(rev2))
1602 return bytes(self._chunk(rev2))
1603
1603
1604 return mdiff.textdiff(self.rawdata(rev1),
1604 return mdiff.textdiff(self.rawdata(rev1),
1605 self.rawdata(rev2))
1605 self.rawdata(rev2))
1606
1606
1607 def _processflags(self, text, flags, operation, raw=False):
1607 def _processflags(self, text, flags, operation, raw=False):
1608 """deprecated entry point to access flag processors"""
1608 """deprecated entry point to access flag processors"""
1609 msg = ('_processflag(...) use the specialized variant')
1609 msg = ('_processflag(...) use the specialized variant')
1610 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1610 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1611 if raw:
1611 if raw:
1612 return text, flagutil.processflagsraw(self, text, flags)
1612 return text, flagutil.processflagsraw(self, text, flags)
1613 elif operation == 'read':
1613 elif operation == 'read':
1614 return flagutil.processflagsread(self, text, flags)
1614 return flagutil.processflagsread(self, text, flags)
1615 else: # write operation
1615 else: # write operation
1616 return flagutil.processflagswrite(self, text, flags)
1616 return flagutil.processflagswrite(self, text, flags)
1617
1617
1618 def revision(self, nodeorrev, _df=None, raw=False):
1618 def revision(self, nodeorrev, _df=None, raw=False):
1619 """return an uncompressed revision of a given node or revision
1619 """return an uncompressed revision of a given node or revision
1620 number.
1620 number.
1621
1621
1622 _df - an existing file handle to read from. (internal-only)
1622 _df - an existing file handle to read from. (internal-only)
1623 raw - an optional argument specifying if the revision data is to be
1623 raw - an optional argument specifying if the revision data is to be
1624 treated as raw data when applying flag transforms. 'raw' should be set
1624 treated as raw data when applying flag transforms. 'raw' should be set
1625 to True when generating changegroups or in debug commands.
1625 to True when generating changegroups or in debug commands.
1626 """
1626 """
1627 if raw:
1627 if raw:
1628 msg = ('revlog.revision(..., raw=True) is deprecated, '
1628 msg = ('revlog.revision(..., raw=True) is deprecated, '
1629 'use revlog.rawdata(...)')
1629 'use revlog.rawdata(...)')
1630 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1630 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1631 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1631 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1632
1632
1633 def sidedata(self, nodeorrev, _df=None):
1633 def sidedata(self, nodeorrev, _df=None):
1634 """a map of extra data related to the changeset but not part of the hash
1634 """a map of extra data related to the changeset but not part of the hash
1635
1635
1636 This function currently return a dictionary. However, more advanced
1636 This function currently return a dictionary. However, more advanced
1637 mapping object will likely be used in the future for a more
1637 mapping object will likely be used in the future for a more
1638 efficient/lazy code.
1638 efficient/lazy code.
1639 """
1639 """
1640 return self._revisiondata(nodeorrev, _df)[1]
1640 return self._revisiondata(nodeorrev, _df)[1]
1641
1641
1642 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1642 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1643 # deal with <nodeorrev> argument type
1643 # deal with <nodeorrev> argument type
1644 if isinstance(nodeorrev, int):
1644 if isinstance(nodeorrev, int):
1645 rev = nodeorrev
1645 rev = nodeorrev
1646 node = self.node(rev)
1646 node = self.node(rev)
1647 else:
1647 else:
1648 node = nodeorrev
1648 node = nodeorrev
1649 rev = None
1649 rev = None
1650
1650
1651 # fast path the special `nullid` rev
1651 # fast path the special `nullid` rev
1652 if node == nullid:
1652 if node == nullid:
1653 return "", {}
1653 return "", {}
1654
1654
1655 # The text as stored inside the revlog. Might be the revision or might
1655 # The text as stored inside the revlog. Might be the revision or might
1656 # need to be processed to retrieve the revision.
1656 # need to be processed to retrieve the revision.
1657 rawtext = None
1657 rawtext = None
1658
1658
1659 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1659 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1660
1660
1661 if raw and validated:
1661 if raw and validated:
1662 # if we don't want to process the raw text and that raw
1662 # if we don't want to process the raw text and that raw
1663 # text is cached, we can exit early.
1663 # text is cached, we can exit early.
1664 return rawtext, {}
1664 return rawtext, {}
1665 if rev is None:
1665 if rev is None:
1666 rev = self.rev(node)
1666 rev = self.rev(node)
1667 # the revlog's flag for this revision
1667 # the revlog's flag for this revision
1668 # (usually alter its state or content)
1668 # (usually alter its state or content)
1669 flags = self.flags(rev)
1669 flags = self.flags(rev)
1670
1670
1671 if validated and flags == REVIDX_DEFAULT_FLAGS:
1671 if validated and flags == REVIDX_DEFAULT_FLAGS:
1672 # no extra flags set, no flag processor runs, text = rawtext
1672 # no extra flags set, no flag processor runs, text = rawtext
1673 return rawtext, {}
1673 return rawtext, {}
1674
1674
1675 sidedata = {}
1675 sidedata = {}
1676 if raw:
1676 if raw:
1677 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1677 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1678 text = rawtext
1678 text = rawtext
1679 else:
1679 else:
1680 r = flagutil.processflagsread(self, rawtext, flags)
1680 r = flagutil.processflagsread(self, rawtext, flags)
1681 text, validatehash, sidedata = r
1681 text, validatehash, sidedata = r
1682 if validatehash:
1682 if validatehash:
1683 self.checkhash(text, node, rev=rev)
1683 self.checkhash(text, node, rev=rev)
1684 if not validated:
1684 if not validated:
1685 self._revisioncache = (node, rev, rawtext)
1685 self._revisioncache = (node, rev, rawtext)
1686
1686
1687 return text, sidedata
1687 return text, sidedata
1688
1688
1689 def _rawtext(self, node, rev, _df=None):
1689 def _rawtext(self, node, rev, _df=None):
1690 """return the possibly unvalidated rawtext for a revision
1690 """return the possibly unvalidated rawtext for a revision
1691
1691
1692 returns (rev, rawtext, validated)
1692 returns (rev, rawtext, validated)
1693 """
1693 """
1694
1694
1695 # revision in the cache (could be useful to apply delta)
1695 # revision in the cache (could be useful to apply delta)
1696 cachedrev = None
1696 cachedrev = None
1697 # An intermediate text to apply deltas to
1697 # An intermediate text to apply deltas to
1698 basetext = None
1698 basetext = None
1699
1699
1700 # Check if we have the entry in cache
1700 # Check if we have the entry in cache
1701 # The cache entry looks like (node, rev, rawtext)
1701 # The cache entry looks like (node, rev, rawtext)
1702 if self._revisioncache:
1702 if self._revisioncache:
1703 if self._revisioncache[0] == node:
1703 if self._revisioncache[0] == node:
1704 return (rev, self._revisioncache[2], True)
1704 return (rev, self._revisioncache[2], True)
1705 cachedrev = self._revisioncache[1]
1705 cachedrev = self._revisioncache[1]
1706
1706
1707 if rev is None:
1707 if rev is None:
1708 rev = self.rev(node)
1708 rev = self.rev(node)
1709
1709
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1711 if stopped:
1711 if stopped:
1712 basetext = self._revisioncache[2]
1712 basetext = self._revisioncache[2]
1713
1713
1714 # drop cache to save memory, the caller is expected to
1714 # drop cache to save memory, the caller is expected to
1715 # update self._revisioncache after validating the text
1715 # update self._revisioncache after validating the text
1716 self._revisioncache = None
1716 self._revisioncache = None
1717
1717
1718 targetsize = None
1718 targetsize = None
1719 rawsize = self.index[rev][2]
1719 rawsize = self.index[rev][2]
1720 if 0 <= rawsize:
1720 if 0 <= rawsize:
1721 targetsize = 4 * rawsize
1721 targetsize = 4 * rawsize
1722
1722
1723 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1723 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1724 if basetext is None:
1724 if basetext is None:
1725 basetext = bytes(bins[0])
1725 basetext = bytes(bins[0])
1726 bins = bins[1:]
1726 bins = bins[1:]
1727
1727
1728 rawtext = mdiff.patches(basetext, bins)
1728 rawtext = mdiff.patches(basetext, bins)
1729 del basetext # let us have a chance to free memory early
1729 del basetext # let us have a chance to free memory early
1730 return (rev, rawtext, False)
1730 return (rev, rawtext, False)
1731
1731
1732 def rawdata(self, nodeorrev, _df=None):
1732 def rawdata(self, nodeorrev, _df=None):
1733 """return an uncompressed raw data of a given node or revision number.
1733 """return an uncompressed raw data of a given node or revision number.
1734
1734
1735 _df - an existing file handle to read from. (internal-only)
1735 _df - an existing file handle to read from. (internal-only)
1736 """
1736 """
1737 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1737 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1738
1738
1739 def hash(self, text, p1, p2):
1739 def hash(self, text, p1, p2):
1740 """Compute a node hash.
1740 """Compute a node hash.
1741
1741
1742 Available as a function so that subclasses can replace the hash
1742 Available as a function so that subclasses can replace the hash
1743 as needed.
1743 as needed.
1744 """
1744 """
1745 return storageutil.hashrevisionsha1(text, p1, p2)
1745 return storageutil.hashrevisionsha1(text, p1, p2)
1746
1746
1747 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1747 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1748 """Check node hash integrity.
1748 """Check node hash integrity.
1749
1749
1750 Available as a function so that subclasses can extend hash mismatch
1750 Available as a function so that subclasses can extend hash mismatch
1751 behaviors as needed.
1751 behaviors as needed.
1752 """
1752 """
1753 try:
1753 try:
1754 if p1 is None and p2 is None:
1754 if p1 is None and p2 is None:
1755 p1, p2 = self.parents(node)
1755 p1, p2 = self.parents(node)
1756 if node != self.hash(text, p1, p2):
1756 if node != self.hash(text, p1, p2):
1757 # Clear the revision cache on hash failure. The revision cache
1757 # Clear the revision cache on hash failure. The revision cache
1758 # only stores the raw revision and clearing the cache does have
1758 # only stores the raw revision and clearing the cache does have
1759 # the side-effect that we won't have a cache hit when the raw
1759 # the side-effect that we won't have a cache hit when the raw
1760 # revision data is accessed. But this case should be rare and
1760 # revision data is accessed. But this case should be rare and
1761 # it is extra work to teach the cache about the hash
1761 # it is extra work to teach the cache about the hash
1762 # verification state.
1762 # verification state.
1763 if self._revisioncache and self._revisioncache[0] == node:
1763 if self._revisioncache and self._revisioncache[0] == node:
1764 self._revisioncache = None
1764 self._revisioncache = None
1765
1765
1766 revornode = rev
1766 revornode = rev
1767 if revornode is None:
1767 if revornode is None:
1768 revornode = templatefilters.short(hex(node))
1768 revornode = templatefilters.short(hex(node))
1769 raise error.RevlogError(_("integrity check failed on %s:%s")
1769 raise error.RevlogError(_("integrity check failed on %s:%s")
1770 % (self.indexfile, pycompat.bytestr(revornode)))
1770 % (self.indexfile, pycompat.bytestr(revornode)))
1771 except error.RevlogError:
1771 except error.RevlogError:
1772 if self._censorable and storageutil.iscensoredtext(text):
1772 if self._censorable and storageutil.iscensoredtext(text):
1773 raise error.CensoredNodeError(self.indexfile, node, text)
1773 raise error.CensoredNodeError(self.indexfile, node, text)
1774 raise
1774 raise
1775
1775
1776 def _enforceinlinesize(self, tr, fp=None):
1776 def _enforceinlinesize(self, tr, fp=None):
1777 """Check if the revlog is too big for inline and convert if so.
1777 """Check if the revlog is too big for inline and convert if so.
1778
1778
1779 This should be called after revisions are added to the revlog. If the
1779 This should be called after revisions are added to the revlog. If the
1780 revlog has grown too large to be an inline revlog, it will convert it
1780 revlog has grown too large to be an inline revlog, it will convert it
1781 to use multiple index and data files.
1781 to use multiple index and data files.
1782 """
1782 """
1783 tiprev = len(self) - 1
1783 tiprev = len(self) - 1
1784 if (not self._inline or
1784 if (not self._inline or
1785 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1785 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1786 return
1786 return
1787
1787
1788 trinfo = tr.find(self.indexfile)
1788 trinfo = tr.find(self.indexfile)
1789 if trinfo is None:
1789 if trinfo is None:
1790 raise error.RevlogError(_("%s not found in the transaction")
1790 raise error.RevlogError(_("%s not found in the transaction")
1791 % self.indexfile)
1791 % self.indexfile)
1792
1792
1793 trindex = trinfo[2]
1793 trindex = trinfo[2]
1794 if trindex is not None:
1794 if trindex is not None:
1795 dataoff = self.start(trindex)
1795 dataoff = self.start(trindex)
1796 else:
1796 else:
1797 # revlog was stripped at start of transaction, use all leftover data
1797 # revlog was stripped at start of transaction, use all leftover data
1798 trindex = len(self) - 1
1798 trindex = len(self) - 1
1799 dataoff = self.end(tiprev)
1799 dataoff = self.end(tiprev)
1800
1800
1801 tr.add(self.datafile, dataoff)
1801 tr.add(self.datafile, dataoff)
1802
1802
1803 if fp:
1803 if fp:
1804 fp.flush()
1804 fp.flush()
1805 fp.close()
1805 fp.close()
1806 # We can't use the cached file handle after close(). So prevent
1806 # We can't use the cached file handle after close(). So prevent
1807 # its usage.
1807 # its usage.
1808 self._writinghandles = None
1808 self._writinghandles = None
1809
1809
1810 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1810 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1811 for r in self:
1811 for r in self:
1812 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1812 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1813
1813
1814 with self._indexfp('w') as fp:
1814 with self._indexfp('w') as fp:
1815 self.version &= ~FLAG_INLINE_DATA
1815 self.version &= ~FLAG_INLINE_DATA
1816 self._inline = False
1816 self._inline = False
1817 io = self._io
1817 io = self._io
1818 for i in self:
1818 for i in self:
1819 e = io.packentry(self.index[i], self.node, self.version, i)
1819 e = io.packentry(self.index[i], self.node, self.version, i)
1820 fp.write(e)
1820 fp.write(e)
1821
1821
1822 # the temp file replace the real index when we exit the context
1822 # the temp file replace the real index when we exit the context
1823 # manager
1823 # manager
1824
1824
1825 tr.replace(self.indexfile, trindex * self._io.size)
1825 tr.replace(self.indexfile, trindex * self._io.size)
1826 self._chunkclear()
1826 self._chunkclear()
1827
1827
1828 def _nodeduplicatecallback(self, transaction, node):
1828 def _nodeduplicatecallback(self, transaction, node):
1829 """called when trying to add a node already stored.
1829 """called when trying to add a node already stored.
1830 """
1830 """
1831
1831
1832 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1832 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1833 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1833 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1834 sidedata=None):
1834 sidedata=None):
1835 """add a revision to the log
1835 """add a revision to the log
1836
1836
1837 text - the revision data to add
1837 text - the revision data to add
1838 transaction - the transaction object used for rollback
1838 transaction - the transaction object used for rollback
1839 link - the linkrev data to add
1839 link - the linkrev data to add
1840 p1, p2 - the parent nodeids of the revision
1840 p1, p2 - the parent nodeids of the revision
1841 cachedelta - an optional precomputed delta
1841 cachedelta - an optional precomputed delta
1842 node - nodeid of revision; typically node is not specified, and it is
1842 node - nodeid of revision; typically node is not specified, and it is
1843 computed by default as hash(text, p1, p2), however subclasses might
1843 computed by default as hash(text, p1, p2), however subclasses might
1844 use different hashing method (and override checkhash() in such case)
1844 use different hashing method (and override checkhash() in such case)
1845 flags - the known flags to set on the revision
1845 flags - the known flags to set on the revision
1846 deltacomputer - an optional deltacomputer instance shared between
1846 deltacomputer - an optional deltacomputer instance shared between
1847 multiple calls
1847 multiple calls
1848 """
1848 """
1849 if link == nullrev:
1849 if link == nullrev:
1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1851 % self.indexfile)
1851 % self.indexfile)
1852
1852
1853 if sidedata is None:
1853 if sidedata is None:
1854 sidedata = {}
1854 sidedata = {}
1855
1855
1856 if flags:
1856 if flags:
1857 node = node or self.hash(text, p1, p2)
1857 node = node or self.hash(text, p1, p2)
1858
1858
1859 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1859 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1860 sidedata=sidedata)
1860 sidedata=sidedata)
1861
1861
1862 # If the flag processor modifies the revision data, ignore any provided
1862 # If the flag processor modifies the revision data, ignore any provided
1863 # cachedelta.
1863 # cachedelta.
1864 if rawtext != text:
1864 if rawtext != text:
1865 cachedelta = None
1865 cachedelta = None
1866
1866
1867 if len(rawtext) > _maxentrysize:
1867 if len(rawtext) > _maxentrysize:
1868 raise error.RevlogError(
1868 raise error.RevlogError(
1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1870 % (self.indexfile, len(rawtext)))
1870 % (self.indexfile, len(rawtext)))
1871
1871
1872 node = node or self.hash(rawtext, p1, p2)
1872 node = node or self.hash(rawtext, p1, p2)
1873 if node in self.nodemap:
1873 if node in self.nodemap:
1874 return node
1874 return node
1875
1875
1876 if validatehash:
1876 if validatehash:
1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1878
1878
1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1880 flags, cachedelta=cachedelta,
1880 flags, cachedelta=cachedelta,
1881 deltacomputer=deltacomputer)
1881 deltacomputer=deltacomputer)
1882
1882
1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1884 cachedelta=None, deltacomputer=None):
1884 cachedelta=None, deltacomputer=None):
1885 """add a raw revision with known flags, node and parents
1885 """add a raw revision with known flags, node and parents
1886 useful when reusing a revision not stored in this revlog (ex: received
1886 useful when reusing a revision not stored in this revlog (ex: received
1887 over wire, or read from an external bundle).
1887 over wire, or read from an external bundle).
1888 """
1888 """
1889 dfh = None
1889 dfh = None
1890 if not self._inline:
1890 if not self._inline:
1891 dfh = self._datafp("a+")
1891 dfh = self._datafp("a+")
1892 ifh = self._indexfp("a+")
1892 ifh = self._indexfp("a+")
1893 try:
1893 try:
1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1895 flags, cachedelta, ifh, dfh,
1895 flags, cachedelta, ifh, dfh,
1896 deltacomputer=deltacomputer)
1896 deltacomputer=deltacomputer)
1897 finally:
1897 finally:
1898 if dfh:
1898 if dfh:
1899 dfh.close()
1899 dfh.close()
1900 ifh.close()
1900 ifh.close()
1901
1901
1902 def compress(self, data):
1902 def compress(self, data):
1903 """Generate a possibly-compressed representation of data."""
1903 """Generate a possibly-compressed representation of data."""
1904 if not data:
1904 if not data:
1905 return '', data
1905 return '', data
1906
1906
1907 compressed = self._compressor.compress(data)
1907 compressed = self._compressor.compress(data)
1908
1908
1909 if compressed:
1909 if compressed:
1910 # The revlog compressor added the header in the returned data.
1910 # The revlog compressor added the header in the returned data.
1911 return '', compressed
1911 return '', compressed
1912
1912
1913 if data[0:1] == '\0':
1913 if data[0:1] == '\0':
1914 return '', data
1914 return '', data
1915 return 'u', data
1915 return 'u', data
1916
1916
1917 def decompress(self, data):
1917 def decompress(self, data):
1918 """Decompress a revlog chunk.
1918 """Decompress a revlog chunk.
1919
1919
1920 The chunk is expected to begin with a header identifying the
1920 The chunk is expected to begin with a header identifying the
1921 format type so it can be routed to an appropriate decompressor.
1921 format type so it can be routed to an appropriate decompressor.
1922 """
1922 """
1923 if not data:
1923 if not data:
1924 return data
1924 return data
1925
1925
1926 # Revlogs are read much more frequently than they are written and many
1926 # Revlogs are read much more frequently than they are written and many
1927 # chunks only take microseconds to decompress, so performance is
1927 # chunks only take microseconds to decompress, so performance is
1928 # important here.
1928 # important here.
1929 #
1929 #
1930 # We can make a few assumptions about revlogs:
1930 # We can make a few assumptions about revlogs:
1931 #
1931 #
1932 # 1) the majority of chunks will be compressed (as opposed to inline
1932 # 1) the majority of chunks will be compressed (as opposed to inline
1933 # raw data).
1933 # raw data).
1934 # 2) decompressing *any* data will likely by at least 10x slower than
1934 # 2) decompressing *any* data will likely by at least 10x slower than
1935 # returning raw inline data.
1935 # returning raw inline data.
1936 # 3) we want to prioritize common and officially supported compression
1936 # 3) we want to prioritize common and officially supported compression
1937 # engines
1937 # engines
1938 #
1938 #
1939 # It follows that we want to optimize for "decompress compressed data
1939 # It follows that we want to optimize for "decompress compressed data
1940 # when encoded with common and officially supported compression engines"
1940 # when encoded with common and officially supported compression engines"
1941 # case over "raw data" and "data encoded by less common or non-official
1941 # case over "raw data" and "data encoded by less common or non-official
1942 # compression engines." That is why we have the inline lookup first
1942 # compression engines." That is why we have the inline lookup first
1943 # followed by the compengines lookup.
1943 # followed by the compengines lookup.
1944 #
1944 #
1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1946 # compressed chunks. And this matters for changelog and manifest reads.
1946 # compressed chunks. And this matters for changelog and manifest reads.
1947 t = data[0:1]
1947 t = data[0:1]
1948
1948
1949 if t == 'x':
1949 if t == 'x':
1950 try:
1950 try:
1951 return _zlibdecompress(data)
1951 return _zlibdecompress(data)
1952 except zlib.error as e:
1952 except zlib.error as e:
1953 raise error.RevlogError(_('revlog decompress error: %s') %
1953 raise error.RevlogError(_('revlog decompress error: %s') %
1954 stringutil.forcebytestr(e))
1954 stringutil.forcebytestr(e))
1955 # '\0' is more common than 'u' so it goes first.
1955 # '\0' is more common than 'u' so it goes first.
1956 elif t == '\0':
1956 elif t == '\0':
1957 return data
1957 return data
1958 elif t == 'u':
1958 elif t == 'u':
1959 return util.buffer(data, 1)
1959 return util.buffer(data, 1)
1960
1960
1961 try:
1961 try:
1962 compressor = self._decompressors[t]
1962 compressor = self._decompressors[t]
1963 except KeyError:
1963 except KeyError:
1964 try:
1964 try:
1965 engine = util.compengines.forrevlogheader(t)
1965 engine = util.compengines.forrevlogheader(t)
1966 compressor = engine.revlogcompressor(self._compengineopts)
1966 compressor = engine.revlogcompressor(self._compengineopts)
1967 self._decompressors[t] = compressor
1967 self._decompressors[t] = compressor
1968 except KeyError:
1968 except KeyError:
1969 raise error.RevlogError(_('unknown compression type %r') % t)
1969 raise error.RevlogError(_('unknown compression type %r') % t)
1970
1970
1971 return compressor.decompress(data)
1971 return compressor.decompress(data)
1972
1972
1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1974 cachedelta, ifh, dfh, alwayscache=False,
1974 cachedelta, ifh, dfh, alwayscache=False,
1975 deltacomputer=None):
1975 deltacomputer=None):
1976 """internal function to add revisions to the log
1976 """internal function to add revisions to the log
1977
1977
1978 see addrevision for argument descriptions.
1978 see addrevision for argument descriptions.
1979
1979
1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1981
1981
1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1983 be used.
1983 be used.
1984
1984
1985 invariants:
1985 invariants:
1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1987 if both are set, they must correspond to each other.
1987 if both are set, they must correspond to each other.
1988 """
1988 """
1989 if node == nullid:
1989 if node == nullid:
1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1991 self.indexfile)
1991 self.indexfile)
1992 if node == wdirid or node in wdirfilenodeids:
1992 if node == wdirid or node in wdirfilenodeids:
1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1994 self.indexfile)
1994 self.indexfile)
1995
1995
1996 if self._inline:
1996 if self._inline:
1997 fh = ifh
1997 fh = ifh
1998 else:
1998 else:
1999 fh = dfh
1999 fh = dfh
2000
2000
2001 btext = [rawtext]
2001 btext = [rawtext]
2002
2002
2003 curr = len(self)
2003 curr = len(self)
2004 prev = curr - 1
2004 prev = curr - 1
2005 offset = self.end(prev)
2005 offset = self.end(prev)
2006 p1r, p2r = self.rev(p1), self.rev(p2)
2006 p1r, p2r = self.rev(p1), self.rev(p2)
2007
2007
2008 # full versions are inserted when the needed deltas
2008 # full versions are inserted when the needed deltas
2009 # become comparable to the uncompressed text
2009 # become comparable to the uncompressed text
2010 if rawtext is None:
2010 if rawtext is None:
2011 # need rawtext size, before changed by flag processors, which is
2011 # need rawtext size, before changed by flag processors, which is
2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2013 # logic that might remove metadata size.
2013 # logic that might remove metadata size.
2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2015 cachedelta[1])
2015 cachedelta[1])
2016 else:
2016 else:
2017 textlen = len(rawtext)
2017 textlen = len(rawtext)
2018
2018
2019 if deltacomputer is None:
2019 if deltacomputer is None:
2020 deltacomputer = deltautil.deltacomputer(self)
2020 deltacomputer = deltautil.deltacomputer(self)
2021
2021
2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2023
2023
2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2025
2025
2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2027 deltainfo.base, link, p1r, p2r, node)
2027 deltainfo.base, link, p1r, p2r, node)
2028 self.index.append(e)
2028 self.index.append(e)
2029 self.nodemap[node] = curr
2029 self.nodemap[node] = curr
2030
2030
2031 # Reset the pure node cache start lookup offset to account for new
2031 # Reset the pure node cache start lookup offset to account for new
2032 # revision.
2032 # revision.
2033 if self._nodepos is not None:
2033 if self._nodepos is not None:
2034 self._nodepos = curr
2034 self._nodepos = curr
2035
2035
2036 entry = self._io.packentry(e, self.node, self.version, curr)
2036 entry = self._io.packentry(e, self.node, self.version, curr)
2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2038 link, offset)
2038 link, offset)
2039
2039
2040 rawtext = btext[0]
2040 rawtext = btext[0]
2041
2041
2042 if alwayscache and rawtext is None:
2042 if alwayscache and rawtext is None:
2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2044
2044
2045 if type(rawtext) == bytes: # only accept immutable objects
2045 if type(rawtext) == bytes: # only accept immutable objects
2046 self._revisioncache = (node, curr, rawtext)
2046 self._revisioncache = (node, curr, rawtext)
2047 self._chainbasecache[curr] = deltainfo.chainbase
2047 self._chainbasecache[curr] = deltainfo.chainbase
2048 return node
2048 return node
2049
2049
2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2051 # Files opened in a+ mode have inconsistent behavior on various
2051 # Files opened in a+ mode have inconsistent behavior on various
2052 # platforms. Windows requires that a file positioning call be made
2052 # platforms. Windows requires that a file positioning call be made
2053 # when the file handle transitions between reads and writes. See
2053 # when the file handle transitions between reads and writes. See
2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2055 # platforms, Python or the platform itself can be buggy. Some versions
2055 # platforms, Python or the platform itself can be buggy. Some versions
2056 # of Solaris have been observed to not append at the end of the file
2056 # of Solaris have been observed to not append at the end of the file
2057 # if the file was seeked to before the end. See issue4943 for more.
2057 # if the file was seeked to before the end. See issue4943 for more.
2058 #
2058 #
2059 # We work around this issue by inserting a seek() before writing.
2059 # We work around this issue by inserting a seek() before writing.
2060 # Note: This is likely not necessary on Python 3. However, because
2060 # Note: This is likely not necessary on Python 3. However, because
2061 # the file handle is reused for reads and may be seeked there, we need
2061 # the file handle is reused for reads and may be seeked there, we need
2062 # to be careful before changing this.
2062 # to be careful before changing this.
2063 ifh.seek(0, os.SEEK_END)
2063 ifh.seek(0, os.SEEK_END)
2064 if dfh:
2064 if dfh:
2065 dfh.seek(0, os.SEEK_END)
2065 dfh.seek(0, os.SEEK_END)
2066
2066
2067 curr = len(self) - 1
2067 curr = len(self) - 1
2068 if not self._inline:
2068 if not self._inline:
2069 transaction.add(self.datafile, offset)
2069 transaction.add(self.datafile, offset)
2070 transaction.add(self.indexfile, curr * len(entry))
2070 transaction.add(self.indexfile, curr * len(entry))
2071 if data[0]:
2071 if data[0]:
2072 dfh.write(data[0])
2072 dfh.write(data[0])
2073 dfh.write(data[1])
2073 dfh.write(data[1])
2074 ifh.write(entry)
2074 ifh.write(entry)
2075 else:
2075 else:
2076 offset += curr * self._io.size
2076 offset += curr * self._io.size
2077 transaction.add(self.indexfile, offset, curr)
2077 transaction.add(self.indexfile, offset, curr)
2078 ifh.write(entry)
2078 ifh.write(entry)
2079 ifh.write(data[0])
2079 ifh.write(data[0])
2080 ifh.write(data[1])
2080 ifh.write(data[1])
2081 self._enforceinlinesize(transaction, ifh)
2081 self._enforceinlinesize(transaction, ifh)
2082
2082
2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2084 """
2084 """
2085 add a delta group
2085 add a delta group
2086
2086
2087 given a set of deltas, add them to the revision log. the
2087 given a set of deltas, add them to the revision log. the
2088 first delta is against its parent, which should be in our
2088 first delta is against its parent, which should be in our
2089 log, the rest are against the previous delta.
2089 log, the rest are against the previous delta.
2090
2090
2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2092 this revlog and the node that was added.
2092 this revlog and the node that was added.
2093 """
2093 """
2094
2094
2095 if self._writinghandles:
2095 if self._writinghandles:
2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2097
2097
2098 nodes = []
2098 nodes = []
2099
2099
2100 r = len(self)
2100 r = len(self)
2101 end = 0
2101 end = 0
2102 if r:
2102 if r:
2103 end = self.end(r - 1)
2103 end = self.end(r - 1)
2104 ifh = self._indexfp("a+")
2104 ifh = self._indexfp("a+")
2105 isize = r * self._io.size
2105 isize = r * self._io.size
2106 if self._inline:
2106 if self._inline:
2107 transaction.add(self.indexfile, end + isize, r)
2107 transaction.add(self.indexfile, end + isize, r)
2108 dfh = None
2108 dfh = None
2109 else:
2109 else:
2110 transaction.add(self.indexfile, isize, r)
2110 transaction.add(self.indexfile, isize, r)
2111 transaction.add(self.datafile, end)
2111 transaction.add(self.datafile, end)
2112 dfh = self._datafp("a+")
2112 dfh = self._datafp("a+")
2113 def flush():
2113 def flush():
2114 if dfh:
2114 if dfh:
2115 dfh.flush()
2115 dfh.flush()
2116 ifh.flush()
2116 ifh.flush()
2117
2117
2118 self._writinghandles = (ifh, dfh)
2118 self._writinghandles = (ifh, dfh)
2119
2119
2120 try:
2120 try:
2121 deltacomputer = deltautil.deltacomputer(self)
2121 deltacomputer = deltautil.deltacomputer(self)
2122 # loop through our set of deltas
2122 # loop through our set of deltas
2123 for data in deltas:
2123 for data in deltas:
2124 node, p1, p2, linknode, deltabase, delta, flags = data
2124 node, p1, p2, linknode, deltabase, delta, flags = data
2125 link = linkmapper(linknode)
2125 link = linkmapper(linknode)
2126 flags = flags or REVIDX_DEFAULT_FLAGS
2126 flags = flags or REVIDX_DEFAULT_FLAGS
2127
2127
2128 nodes.append(node)
2128 nodes.append(node)
2129
2129
2130 if node in self.nodemap:
2130 if node in self.nodemap:
2131 self._nodeduplicatecallback(transaction, node)
2131 self._nodeduplicatecallback(transaction, node)
2132 # this can happen if two branches make the same change
2132 # this can happen if two branches make the same change
2133 continue
2133 continue
2134
2134
2135 for p in (p1, p2):
2135 for p in (p1, p2):
2136 if p not in self.nodemap:
2136 if p not in self.nodemap:
2137 raise error.LookupError(p, self.indexfile,
2137 raise error.LookupError(p, self.indexfile,
2138 _('unknown parent'))
2138 _('unknown parent'))
2139
2139
2140 if deltabase not in self.nodemap:
2140 if deltabase not in self.nodemap:
2141 raise error.LookupError(deltabase, self.indexfile,
2141 raise error.LookupError(deltabase, self.indexfile,
2142 _('unknown delta base'))
2142 _('unknown delta base'))
2143
2143
2144 baserev = self.rev(deltabase)
2144 baserev = self.rev(deltabase)
2145
2145
2146 if baserev != nullrev and self.iscensored(baserev):
2146 if baserev != nullrev and self.iscensored(baserev):
2147 # if base is censored, delta must be full replacement in a
2147 # if base is censored, delta must be full replacement in a
2148 # single patch operation
2148 # single patch operation
2149 hlen = struct.calcsize(">lll")
2149 hlen = struct.calcsize(">lll")
2150 oldlen = self.rawsize(baserev)
2150 oldlen = self.rawsize(baserev)
2151 newlen = len(delta) - hlen
2151 newlen = len(delta) - hlen
2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2153 raise error.CensoredBaseError(self.indexfile,
2153 raise error.CensoredBaseError(self.indexfile,
2154 self.node(baserev))
2154 self.node(baserev))
2155
2155
2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2157 flags |= REVIDX_ISCENSORED
2157 flags |= REVIDX_ISCENSORED
2158
2158
2159 # We assume consumers of addrevisioncb will want to retrieve
2159 # We assume consumers of addrevisioncb will want to retrieve
2160 # the added revision, which will require a call to
2160 # the added revision, which will require a call to
2161 # revision(). revision() will fast path if there is a cache
2161 # revision(). revision() will fast path if there is a cache
2162 # hit. So, we tell _addrevision() to always cache in this case.
2162 # hit. So, we tell _addrevision() to always cache in this case.
2163 # We're only using addgroup() in the context of changegroup
2163 # We're only using addgroup() in the context of changegroup
2164 # generation so the revision data can always be handled as raw
2164 # generation so the revision data can always be handled as raw
2165 # by the flagprocessor.
2165 # by the flagprocessor.
2166 self._addrevision(node, None, transaction, link,
2166 self._addrevision(node, None, transaction, link,
2167 p1, p2, flags, (baserev, delta),
2167 p1, p2, flags, (baserev, delta),
2168 ifh, dfh,
2168 ifh, dfh,
2169 alwayscache=bool(addrevisioncb),
2169 alwayscache=bool(addrevisioncb),
2170 deltacomputer=deltacomputer)
2170 deltacomputer=deltacomputer)
2171
2171
2172 if addrevisioncb:
2172 if addrevisioncb:
2173 addrevisioncb(self, node)
2173 addrevisioncb(self, node)
2174
2174
2175 if not dfh and not self._inline:
2175 if not dfh and not self._inline:
2176 # addrevision switched from inline to conventional
2176 # addrevision switched from inline to conventional
2177 # reopen the index
2177 # reopen the index
2178 ifh.close()
2178 ifh.close()
2179 dfh = self._datafp("a+")
2179 dfh = self._datafp("a+")
2180 ifh = self._indexfp("a+")
2180 ifh = self._indexfp("a+")
2181 self._writinghandles = (ifh, dfh)
2181 self._writinghandles = (ifh, dfh)
2182 finally:
2182 finally:
2183 self._writinghandles = None
2183 self._writinghandles = None
2184
2184
2185 if dfh:
2185 if dfh:
2186 dfh.close()
2186 dfh.close()
2187 ifh.close()
2187 ifh.close()
2188
2188
2189 return nodes
2189 return nodes
2190
2190
2191 def iscensored(self, rev):
2191 def iscensored(self, rev):
2192 """Check if a file revision is censored."""
2192 """Check if a file revision is censored."""
2193 if not self._censorable:
2193 if not self._censorable:
2194 return False
2194 return False
2195
2195
2196 return self.flags(rev) & REVIDX_ISCENSORED
2196 return self.flags(rev) & REVIDX_ISCENSORED
2197
2197
2198 def _peek_iscensored(self, baserev, delta, flush):
2198 def _peek_iscensored(self, baserev, delta, flush):
2199 """Quickly check if a delta produces a censored revision."""
2199 """Quickly check if a delta produces a censored revision."""
2200 if not self._censorable:
2200 if not self._censorable:
2201 return False
2201 return False
2202
2202
2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2204
2204
2205 def getstrippoint(self, minlink):
2205 def getstrippoint(self, minlink):
2206 """find the minimum rev that must be stripped to strip the linkrev
2206 """find the minimum rev that must be stripped to strip the linkrev
2207
2207
2208 Returns a tuple containing the minimum rev and a set of all revs that
2208 Returns a tuple containing the minimum rev and a set of all revs that
2209 have linkrevs that will be broken by this strip.
2209 have linkrevs that will be broken by this strip.
2210 """
2210 """
2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2212 self.headrevs(),
2212 self.headrevs(),
2213 self.linkrev, self.parentrevs)
2213 self.linkrev, self.parentrevs)
2214
2214
2215 def strip(self, minlink, transaction):
2215 def strip(self, minlink, transaction):
2216 """truncate the revlog on the first revision with a linkrev >= minlink
2216 """truncate the revlog on the first revision with a linkrev >= minlink
2217
2217
2218 This function is called when we're stripping revision minlink and
2218 This function is called when we're stripping revision minlink and
2219 its descendants from the repository.
2219 its descendants from the repository.
2220
2220
2221 We have to remove all revisions with linkrev >= minlink, because
2221 We have to remove all revisions with linkrev >= minlink, because
2222 the equivalent changelog revisions will be renumbered after the
2222 the equivalent changelog revisions will be renumbered after the
2223 strip.
2223 strip.
2224
2224
2225 So we truncate the revlog on the first of these revisions, and
2225 So we truncate the revlog on the first of these revisions, and
2226 trust that the caller has saved the revisions that shouldn't be
2226 trust that the caller has saved the revisions that shouldn't be
2227 removed and that it'll re-add them after this truncation.
2227 removed and that it'll re-add them after this truncation.
2228 """
2228 """
2229 if len(self) == 0:
2229 if len(self) == 0:
2230 return
2230 return
2231
2231
2232 rev, _ = self.getstrippoint(minlink)
2232 rev, _ = self.getstrippoint(minlink)
2233 if rev == len(self):
2233 if rev == len(self):
2234 return
2234 return
2235
2235
2236 # first truncate the files on disk
2236 # first truncate the files on disk
2237 end = self.start(rev)
2237 end = self.start(rev)
2238 if not self._inline:
2238 if not self._inline:
2239 transaction.add(self.datafile, end)
2239 transaction.add(self.datafile, end)
2240 end = rev * self._io.size
2240 end = rev * self._io.size
2241 else:
2241 else:
2242 end += rev * self._io.size
2242 end += rev * self._io.size
2243
2243
2244 transaction.add(self.indexfile, end)
2244 transaction.add(self.indexfile, end)
2245
2245
2246 # then reset internal state in memory to forget those revisions
2246 # then reset internal state in memory to forget those revisions
2247 self._revisioncache = None
2247 self._revisioncache = None
2248 self._chaininfocache = {}
2248 self._chaininfocache = {}
2249 self._chunkclear()
2249 self._chunkclear()
2250 for x in pycompat.xrange(rev, len(self)):
2250 for x in pycompat.xrange(rev, len(self)):
2251 del self.nodemap[self.node(x)]
2251 del self.nodemap[self.node(x)]
2252
2252
2253 del self.index[rev:-1]
2253 del self.index[rev:-1]
2254 self._nodepos = None
2254 self._nodepos = None
2255
2255
2256 def checksize(self):
2256 def checksize(self):
2257 """Check size of index and data files
2257 """Check size of index and data files
2258
2258
2259 return a (dd, di) tuple.
2259 return a (dd, di) tuple.
2260 - dd: extra bytes for the "data" file
2260 - dd: extra bytes for the "data" file
2261 - di: extra bytes for the "index" file
2261 - di: extra bytes for the "index" file
2262
2262
2263 A healthy revlog will return (0, 0).
2263 A healthy revlog will return (0, 0).
2264 """
2264 """
2265 expected = 0
2265 expected = 0
2266 if len(self):
2266 if len(self):
2267 expected = max(0, self.end(len(self) - 1))
2267 expected = max(0, self.end(len(self) - 1))
2268
2268
2269 try:
2269 try:
2270 with self._datafp() as f:
2270 with self._datafp() as f:
2271 f.seek(0, io.SEEK_END)
2271 f.seek(0, io.SEEK_END)
2272 actual = f.tell()
2272 actual = f.tell()
2273 dd = actual - expected
2273 dd = actual - expected
2274 except IOError as inst:
2274 except IOError as inst:
2275 if inst.errno != errno.ENOENT:
2275 if inst.errno != errno.ENOENT:
2276 raise
2276 raise
2277 dd = 0
2277 dd = 0
2278
2278
2279 try:
2279 try:
2280 f = self.opener(self.indexfile)
2280 f = self.opener(self.indexfile)
2281 f.seek(0, io.SEEK_END)
2281 f.seek(0, io.SEEK_END)
2282 actual = f.tell()
2282 actual = f.tell()
2283 f.close()
2283 f.close()
2284 s = self._io.size
2284 s = self._io.size
2285 i = max(0, actual // s)
2285 i = max(0, actual // s)
2286 di = actual - (i * s)
2286 di = actual - (i * s)
2287 if self._inline:
2287 if self._inline:
2288 databytes = 0
2288 databytes = 0
2289 for r in self:
2289 for r in self:
2290 databytes += max(0, self.length(r))
2290 databytes += max(0, self.length(r))
2291 dd = 0
2291 dd = 0
2292 di = actual - len(self) * s - databytes
2292 di = actual - len(self) * s - databytes
2293 except IOError as inst:
2293 except IOError as inst:
2294 if inst.errno != errno.ENOENT:
2294 if inst.errno != errno.ENOENT:
2295 raise
2295 raise
2296 di = 0
2296 di = 0
2297
2297
2298 return (dd, di)
2298 return (dd, di)
2299
2299
2300 def files(self):
2300 def files(self):
2301 res = [self.indexfile]
2301 res = [self.indexfile]
2302 if not self._inline:
2302 if not self._inline:
2303 res.append(self.datafile)
2303 res.append(self.datafile)
2304 return res
2304 return res
2305
2305
2306 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2306 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2307 assumehaveparentrevisions=False,
2307 assumehaveparentrevisions=False,
2308 deltamode=repository.CG_DELTAMODE_STD):
2308 deltamode=repository.CG_DELTAMODE_STD):
2309 if nodesorder not in ('nodes', 'storage', 'linear', None):
2309 if nodesorder not in ('nodes', 'storage', 'linear', None):
2310 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2310 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2311 nodesorder)
2311 nodesorder)
2312
2312
2313 if nodesorder is None and not self._generaldelta:
2313 if nodesorder is None and not self._generaldelta:
2314 nodesorder = 'storage'
2314 nodesorder = 'storage'
2315
2315
2316 if (not self._storedeltachains and
2316 if (not self._storedeltachains and
2317 deltamode != repository.CG_DELTAMODE_PREV):
2317 deltamode != repository.CG_DELTAMODE_PREV):
2318 deltamode = repository.CG_DELTAMODE_FULL
2318 deltamode = repository.CG_DELTAMODE_FULL
2319
2319
2320 return storageutil.emitrevisions(
2320 return storageutil.emitrevisions(
2321 self, nodes, nodesorder, revlogrevisiondelta,
2321 self, nodes, nodesorder, revlogrevisiondelta,
2322 deltaparentfn=self.deltaparent,
2322 deltaparentfn=self.deltaparent,
2323 candeltafn=self.candelta,
2323 candeltafn=self.candelta,
2324 rawsizefn=self.rawsize,
2324 rawsizefn=self.rawsize,
2325 revdifffn=self.revdiff,
2325 revdifffn=self.revdiff,
2326 flagsfn=self.flags,
2326 flagsfn=self.flags,
2327 deltamode=deltamode,
2327 deltamode=deltamode,
2328 revisiondata=revisiondata,
2328 revisiondata=revisiondata,
2329 assumehaveparentrevisions=assumehaveparentrevisions)
2329 assumehaveparentrevisions=assumehaveparentrevisions)
2330
2330
2331 DELTAREUSEALWAYS = 'always'
2331 DELTAREUSEALWAYS = 'always'
2332 DELTAREUSESAMEREVS = 'samerevs'
2332 DELTAREUSESAMEREVS = 'samerevs'
2333 DELTAREUSENEVER = 'never'
2333 DELTAREUSENEVER = 'never'
2334
2334
2335 DELTAREUSEFULLADD = 'fulladd'
2335 DELTAREUSEFULLADD = 'fulladd'
2336
2336
2337 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2337 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2338
2338
2339 def clone(self, tr, destrevlog, addrevisioncb=None,
2339 def clone(self, tr, destrevlog, addrevisioncb=None,
2340 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2340 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2341 """Copy this revlog to another, possibly with format changes.
2341 """Copy this revlog to another, possibly with format changes.
2342
2342
2343 The destination revlog will contain the same revisions and nodes.
2343 The destination revlog will contain the same revisions and nodes.
2344 However, it may not be bit-for-bit identical due to e.g. delta encoding
2344 However, it may not be bit-for-bit identical due to e.g. delta encoding
2345 differences.
2345 differences.
2346
2346
2347 The ``deltareuse`` argument control how deltas from the existing revlog
2347 The ``deltareuse`` argument control how deltas from the existing revlog
2348 are preserved in the destination revlog. The argument can have the
2348 are preserved in the destination revlog. The argument can have the
2349 following values:
2349 following values:
2350
2350
2351 DELTAREUSEALWAYS
2351 DELTAREUSEALWAYS
2352 Deltas will always be reused (if possible), even if the destination
2352 Deltas will always be reused (if possible), even if the destination
2353 revlog would not select the same revisions for the delta. This is the
2353 revlog would not select the same revisions for the delta. This is the
2354 fastest mode of operation.
2354 fastest mode of operation.
2355 DELTAREUSESAMEREVS
2355 DELTAREUSESAMEREVS
2356 Deltas will be reused if the destination revlog would pick the same
2356 Deltas will be reused if the destination revlog would pick the same
2357 revisions for the delta. This mode strikes a balance between speed
2357 revisions for the delta. This mode strikes a balance between speed
2358 and optimization.
2358 and optimization.
2359 DELTAREUSENEVER
2359 DELTAREUSENEVER
2360 Deltas will never be reused. This is the slowest mode of execution.
2360 Deltas will never be reused. This is the slowest mode of execution.
2361 This mode can be used to recompute deltas (e.g. if the diff/delta
2361 This mode can be used to recompute deltas (e.g. if the diff/delta
2362 algorithm changes).
2362 algorithm changes).
2363 DELTAREUSEFULLADD
2364 Revision will be re-added as if their were new content. This is
2365 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2366 eg: large file detection and handling.
2363
2367
2364 Delta computation can be slow, so the choice of delta reuse policy can
2368 Delta computation can be slow, so the choice of delta reuse policy can
2365 significantly affect run time.
2369 significantly affect run time.
2366
2370
2367 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2371 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2368 two extremes. Deltas will be reused if they are appropriate. But if the
2372 two extremes. Deltas will be reused if they are appropriate. But if the
2369 delta could choose a better revision, it will do so. This means if you
2373 delta could choose a better revision, it will do so. This means if you
2370 are converting a non-generaldelta revlog to a generaldelta revlog,
2374 are converting a non-generaldelta revlog to a generaldelta revlog,
2371 deltas will be recomputed if the delta's parent isn't a parent of the
2375 deltas will be recomputed if the delta's parent isn't a parent of the
2372 revision.
2376 revision.
2373
2377
2374 In addition to the delta policy, the ``forcedeltabothparents``
2378 In addition to the delta policy, the ``forcedeltabothparents``
2375 argument controls whether to force compute deltas against both parents
2379 argument controls whether to force compute deltas against both parents
2376 for merges. By default, the current default is used.
2380 for merges. By default, the current default is used.
2377 """
2381 """
2378 if deltareuse not in self.DELTAREUSEALL:
2382 if deltareuse not in self.DELTAREUSEALL:
2379 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2383 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2380
2384
2381 if len(destrevlog):
2385 if len(destrevlog):
2382 raise ValueError(_('destination revlog is not empty'))
2386 raise ValueError(_('destination revlog is not empty'))
2383
2387
2384 if getattr(self, 'filteredrevs', None):
2388 if getattr(self, 'filteredrevs', None):
2385 raise ValueError(_('source revlog has filtered revisions'))
2389 raise ValueError(_('source revlog has filtered revisions'))
2386 if getattr(destrevlog, 'filteredrevs', None):
2390 if getattr(destrevlog, 'filteredrevs', None):
2387 raise ValueError(_('destination revlog has filtered revisions'))
2391 raise ValueError(_('destination revlog has filtered revisions'))
2388
2392
2389 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2393 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2390 # if possible.
2394 # if possible.
2391 oldlazydelta = destrevlog._lazydelta
2395 oldlazydelta = destrevlog._lazydelta
2392 oldlazydeltabase = destrevlog._lazydeltabase
2396 oldlazydeltabase = destrevlog._lazydeltabase
2393 oldamd = destrevlog._deltabothparents
2397 oldamd = destrevlog._deltabothparents
2394
2398
2395 try:
2399 try:
2396 if deltareuse == self.DELTAREUSEALWAYS:
2400 if deltareuse == self.DELTAREUSEALWAYS:
2397 destrevlog._lazydeltabase = True
2401 destrevlog._lazydeltabase = True
2398 destrevlog._lazydelta = True
2402 destrevlog._lazydelta = True
2399 elif deltareuse == self.DELTAREUSESAMEREVS:
2403 elif deltareuse == self.DELTAREUSESAMEREVS:
2400 destrevlog._lazydeltabase = False
2404 destrevlog._lazydeltabase = False
2401 destrevlog._lazydelta = True
2405 destrevlog._lazydelta = True
2402 elif deltareuse == self.DELTAREUSENEVER:
2406 elif deltareuse == self.DELTAREUSENEVER:
2403 destrevlog._lazydeltabase = False
2407 destrevlog._lazydeltabase = False
2404 destrevlog._lazydelta = False
2408 destrevlog._lazydelta = False
2405
2409
2406 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2410 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2407
2411
2408 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2412 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2409 forcedeltabothparents)
2413 forcedeltabothparents)
2410
2414
2411 finally:
2415 finally:
2412 destrevlog._lazydelta = oldlazydelta
2416 destrevlog._lazydelta = oldlazydelta
2413 destrevlog._lazydeltabase = oldlazydeltabase
2417 destrevlog._lazydeltabase = oldlazydeltabase
2414 destrevlog._deltabothparents = oldamd
2418 destrevlog._deltabothparents = oldamd
2415
2419
2416 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2420 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2417 forcedeltabothparents):
2421 forcedeltabothparents):
2418 """perform the core duty of `revlog.clone` after parameter processing"""
2422 """perform the core duty of `revlog.clone` after parameter processing"""
2419 deltacomputer = deltautil.deltacomputer(destrevlog)
2423 deltacomputer = deltautil.deltacomputer(destrevlog)
2420 index = self.index
2424 index = self.index
2421 for rev in self:
2425 for rev in self:
2422 entry = index[rev]
2426 entry = index[rev]
2423
2427
2424 # Some classes override linkrev to take filtered revs into
2428 # Some classes override linkrev to take filtered revs into
2425 # account. Use raw entry from index.
2429 # account. Use raw entry from index.
2426 flags = entry[0] & 0xffff
2430 flags = entry[0] & 0xffff
2427 linkrev = entry[4]
2431 linkrev = entry[4]
2428 p1 = index[entry[5]][7]
2432 p1 = index[entry[5]][7]
2429 p2 = index[entry[6]][7]
2433 p2 = index[entry[6]][7]
2430 node = entry[7]
2434 node = entry[7]
2431
2435
2432 # (Possibly) reuse the delta from the revlog if allowed and
2436 # (Possibly) reuse the delta from the revlog if allowed and
2433 # the revlog chunk is a delta.
2437 # the revlog chunk is a delta.
2434 cachedelta = None
2438 cachedelta = None
2435 rawtext = None
2439 rawtext = None
2436 if (deltareuse != self.DELTAREUSEFULLADD and destrevlog._lazydelta):
2440 if (deltareuse != self.DELTAREUSEFULLADD and destrevlog._lazydelta):
2437 dp = self.deltaparent(rev)
2441 dp = self.deltaparent(rev)
2438 if dp != nullrev:
2442 if dp != nullrev:
2439 cachedelta = (dp, bytes(self._chunk(rev)))
2443 cachedelta = (dp, bytes(self._chunk(rev)))
2440
2444
2441 if not cachedelta:
2445 if not cachedelta:
2442 rawtext = self.rawdata(rev)
2446 rawtext = self.rawdata(rev)
2443
2447
2444
2448
2445 if deltareuse == self.DELTAREUSEFULLADD:
2449 if deltareuse == self.DELTAREUSEFULLADD:
2446 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2450 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2447 cachedelta=cachedelta,
2451 cachedelta=cachedelta,
2448 node=node, flags=flags,
2452 node=node, flags=flags,
2449 deltacomputer=deltacomputer)
2453 deltacomputer=deltacomputer)
2450 else:
2454 else:
2451 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2455 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2452 checkambig=False)
2456 checkambig=False)
2453 dfh = None
2457 dfh = None
2454 if not destrevlog._inline:
2458 if not destrevlog._inline:
2455 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2459 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2456 try:
2460 try:
2457 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2461 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2458 p2, flags, cachedelta, ifh, dfh,
2462 p2, flags, cachedelta, ifh, dfh,
2459 deltacomputer=deltacomputer)
2463 deltacomputer=deltacomputer)
2460 finally:
2464 finally:
2461 if dfh:
2465 if dfh:
2462 dfh.close()
2466 dfh.close()
2463 ifh.close()
2467 ifh.close()
2464
2468
2465 if addrevisioncb:
2469 if addrevisioncb:
2466 addrevisioncb(self, rev, node)
2470 addrevisioncb(self, rev, node)
2467
2471
2468 def censorrevision(self, tr, censornode, tombstone=b''):
2472 def censorrevision(self, tr, censornode, tombstone=b''):
2469 if (self.version & 0xFFFF) == REVLOGV0:
2473 if (self.version & 0xFFFF) == REVLOGV0:
2470 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2474 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2471 self.version)
2475 self.version)
2472
2476
2473 censorrev = self.rev(censornode)
2477 censorrev = self.rev(censornode)
2474 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2478 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2475
2479
2476 if len(tombstone) > self.rawsize(censorrev):
2480 if len(tombstone) > self.rawsize(censorrev):
2477 raise error.Abort(_('censor tombstone must be no longer than '
2481 raise error.Abort(_('censor tombstone must be no longer than '
2478 'censored data'))
2482 'censored data'))
2479
2483
2480 # Rewriting the revlog in place is hard. Our strategy for censoring is
2484 # Rewriting the revlog in place is hard. Our strategy for censoring is
2481 # to create a new revlog, copy all revisions to it, then replace the
2485 # to create a new revlog, copy all revisions to it, then replace the
2482 # revlogs on transaction close.
2486 # revlogs on transaction close.
2483
2487
2484 newindexfile = self.indexfile + b'.tmpcensored'
2488 newindexfile = self.indexfile + b'.tmpcensored'
2485 newdatafile = self.datafile + b'.tmpcensored'
2489 newdatafile = self.datafile + b'.tmpcensored'
2486
2490
2487 # This is a bit dangerous. We could easily have a mismatch of state.
2491 # This is a bit dangerous. We could easily have a mismatch of state.
2488 newrl = revlog(self.opener, newindexfile, newdatafile,
2492 newrl = revlog(self.opener, newindexfile, newdatafile,
2489 censorable=True)
2493 censorable=True)
2490 newrl.version = self.version
2494 newrl.version = self.version
2491 newrl._generaldelta = self._generaldelta
2495 newrl._generaldelta = self._generaldelta
2492 newrl._io = self._io
2496 newrl._io = self._io
2493
2497
2494 for rev in self.revs():
2498 for rev in self.revs():
2495 node = self.node(rev)
2499 node = self.node(rev)
2496 p1, p2 = self.parents(node)
2500 p1, p2 = self.parents(node)
2497
2501
2498 if rev == censorrev:
2502 if rev == censorrev:
2499 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2503 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2500 p1, p2, censornode, REVIDX_ISCENSORED)
2504 p1, p2, censornode, REVIDX_ISCENSORED)
2501
2505
2502 if newrl.deltaparent(rev) != nullrev:
2506 if newrl.deltaparent(rev) != nullrev:
2503 raise error.Abort(_('censored revision stored as delta; '
2507 raise error.Abort(_('censored revision stored as delta; '
2504 'cannot censor'),
2508 'cannot censor'),
2505 hint=_('censoring of revlogs is not '
2509 hint=_('censoring of revlogs is not '
2506 'fully implemented; please report '
2510 'fully implemented; please report '
2507 'this bug'))
2511 'this bug'))
2508 continue
2512 continue
2509
2513
2510 if self.iscensored(rev):
2514 if self.iscensored(rev):
2511 if self.deltaparent(rev) != nullrev:
2515 if self.deltaparent(rev) != nullrev:
2512 raise error.Abort(_('cannot censor due to censored '
2516 raise error.Abort(_('cannot censor due to censored '
2513 'revision having delta stored'))
2517 'revision having delta stored'))
2514 rawtext = self._chunk(rev)
2518 rawtext = self._chunk(rev)
2515 else:
2519 else:
2516 rawtext = self.rawdata(rev)
2520 rawtext = self.rawdata(rev)
2517
2521
2518 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2522 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2519 self.flags(rev))
2523 self.flags(rev))
2520
2524
2521 tr.addbackup(self.indexfile, location='store')
2525 tr.addbackup(self.indexfile, location='store')
2522 if not self._inline:
2526 if not self._inline:
2523 tr.addbackup(self.datafile, location='store')
2527 tr.addbackup(self.datafile, location='store')
2524
2528
2525 self.opener.rename(newrl.indexfile, self.indexfile)
2529 self.opener.rename(newrl.indexfile, self.indexfile)
2526 if not self._inline:
2530 if not self._inline:
2527 self.opener.rename(newrl.datafile, self.datafile)
2531 self.opener.rename(newrl.datafile, self.datafile)
2528
2532
2529 self.clearcaches()
2533 self.clearcaches()
2530 self._loadindex()
2534 self._loadindex()
2531
2535
2532 def verifyintegrity(self, state):
2536 def verifyintegrity(self, state):
2533 """Verifies the integrity of the revlog.
2537 """Verifies the integrity of the revlog.
2534
2538
2535 Yields ``revlogproblem`` instances describing problems that are
2539 Yields ``revlogproblem`` instances describing problems that are
2536 found.
2540 found.
2537 """
2541 """
2538 dd, di = self.checksize()
2542 dd, di = self.checksize()
2539 if dd:
2543 if dd:
2540 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2544 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2541 if di:
2545 if di:
2542 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2546 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2543
2547
2544 version = self.version & 0xFFFF
2548 version = self.version & 0xFFFF
2545
2549
2546 # The verifier tells us what version revlog we should be.
2550 # The verifier tells us what version revlog we should be.
2547 if version != state['expectedversion']:
2551 if version != state['expectedversion']:
2548 yield revlogproblem(
2552 yield revlogproblem(
2549 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2553 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2550 (self.indexfile, version, state['expectedversion']))
2554 (self.indexfile, version, state['expectedversion']))
2551
2555
2552 state['skipread'] = set()
2556 state['skipread'] = set()
2553
2557
2554 for rev in self:
2558 for rev in self:
2555 node = self.node(rev)
2559 node = self.node(rev)
2556
2560
2557 # Verify contents. 4 cases to care about:
2561 # Verify contents. 4 cases to care about:
2558 #
2562 #
2559 # common: the most common case
2563 # common: the most common case
2560 # rename: with a rename
2564 # rename: with a rename
2561 # meta: file content starts with b'\1\n', the metadata
2565 # meta: file content starts with b'\1\n', the metadata
2562 # header defined in filelog.py, but without a rename
2566 # header defined in filelog.py, but without a rename
2563 # ext: content stored externally
2567 # ext: content stored externally
2564 #
2568 #
2565 # More formally, their differences are shown below:
2569 # More formally, their differences are shown below:
2566 #
2570 #
2567 # | common | rename | meta | ext
2571 # | common | rename | meta | ext
2568 # -------------------------------------------------------
2572 # -------------------------------------------------------
2569 # flags() | 0 | 0 | 0 | not 0
2573 # flags() | 0 | 0 | 0 | not 0
2570 # renamed() | False | True | False | ?
2574 # renamed() | False | True | False | ?
2571 # rawtext[0:2]=='\1\n'| False | True | True | ?
2575 # rawtext[0:2]=='\1\n'| False | True | True | ?
2572 #
2576 #
2573 # "rawtext" means the raw text stored in revlog data, which
2577 # "rawtext" means the raw text stored in revlog data, which
2574 # could be retrieved by "rawdata(rev)". "text"
2578 # could be retrieved by "rawdata(rev)". "text"
2575 # mentioned below is "revision(rev)".
2579 # mentioned below is "revision(rev)".
2576 #
2580 #
2577 # There are 3 different lengths stored physically:
2581 # There are 3 different lengths stored physically:
2578 # 1. L1: rawsize, stored in revlog index
2582 # 1. L1: rawsize, stored in revlog index
2579 # 2. L2: len(rawtext), stored in revlog data
2583 # 2. L2: len(rawtext), stored in revlog data
2580 # 3. L3: len(text), stored in revlog data if flags==0, or
2584 # 3. L3: len(text), stored in revlog data if flags==0, or
2581 # possibly somewhere else if flags!=0
2585 # possibly somewhere else if flags!=0
2582 #
2586 #
2583 # L1 should be equal to L2. L3 could be different from them.
2587 # L1 should be equal to L2. L3 could be different from them.
2584 # "text" may or may not affect commit hash depending on flag
2588 # "text" may or may not affect commit hash depending on flag
2585 # processors (see flagutil.addflagprocessor).
2589 # processors (see flagutil.addflagprocessor).
2586 #
2590 #
2587 # | common | rename | meta | ext
2591 # | common | rename | meta | ext
2588 # -------------------------------------------------
2592 # -------------------------------------------------
2589 # rawsize() | L1 | L1 | L1 | L1
2593 # rawsize() | L1 | L1 | L1 | L1
2590 # size() | L1 | L2-LM | L1(*) | L1 (?)
2594 # size() | L1 | L2-LM | L1(*) | L1 (?)
2591 # len(rawtext) | L2 | L2 | L2 | L2
2595 # len(rawtext) | L2 | L2 | L2 | L2
2592 # len(text) | L2 | L2 | L2 | L3
2596 # len(text) | L2 | L2 | L2 | L3
2593 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2597 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2594 #
2598 #
2595 # LM: length of metadata, depending on rawtext
2599 # LM: length of metadata, depending on rawtext
2596 # (*): not ideal, see comment in filelog.size
2600 # (*): not ideal, see comment in filelog.size
2597 # (?): could be "- len(meta)" if the resolved content has
2601 # (?): could be "- len(meta)" if the resolved content has
2598 # rename metadata
2602 # rename metadata
2599 #
2603 #
2600 # Checks needed to be done:
2604 # Checks needed to be done:
2601 # 1. length check: L1 == L2, in all cases.
2605 # 1. length check: L1 == L2, in all cases.
2602 # 2. hash check: depending on flag processor, we may need to
2606 # 2. hash check: depending on flag processor, we may need to
2603 # use either "text" (external), or "rawtext" (in revlog).
2607 # use either "text" (external), or "rawtext" (in revlog).
2604
2608
2605 try:
2609 try:
2606 skipflags = state.get('skipflags', 0)
2610 skipflags = state.get('skipflags', 0)
2607 if skipflags:
2611 if skipflags:
2608 skipflags &= self.flags(rev)
2612 skipflags &= self.flags(rev)
2609
2613
2610 if skipflags:
2614 if skipflags:
2611 state['skipread'].add(node)
2615 state['skipread'].add(node)
2612 else:
2616 else:
2613 # Side-effect: read content and verify hash.
2617 # Side-effect: read content and verify hash.
2614 self.revision(node)
2618 self.revision(node)
2615
2619
2616 l1 = self.rawsize(rev)
2620 l1 = self.rawsize(rev)
2617 l2 = len(self.rawdata(node))
2621 l2 = len(self.rawdata(node))
2618
2622
2619 if l1 != l2:
2623 if l1 != l2:
2620 yield revlogproblem(
2624 yield revlogproblem(
2621 error=_('unpacked size is %d, %d expected') % (l2, l1),
2625 error=_('unpacked size is %d, %d expected') % (l2, l1),
2622 node=node)
2626 node=node)
2623
2627
2624 except error.CensoredNodeError:
2628 except error.CensoredNodeError:
2625 if state['erroroncensored']:
2629 if state['erroroncensored']:
2626 yield revlogproblem(error=_('censored file data'),
2630 yield revlogproblem(error=_('censored file data'),
2627 node=node)
2631 node=node)
2628 state['skipread'].add(node)
2632 state['skipread'].add(node)
2629 except Exception as e:
2633 except Exception as e:
2630 yield revlogproblem(
2634 yield revlogproblem(
2631 error=_('unpacking %s: %s') % (short(node),
2635 error=_('unpacking %s: %s') % (short(node),
2632 stringutil.forcebytestr(e)),
2636 stringutil.forcebytestr(e)),
2633 node=node)
2637 node=node)
2634 state['skipread'].add(node)
2638 state['skipread'].add(node)
2635
2639
2636 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2640 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2637 revisionscount=False, trackedsize=False,
2641 revisionscount=False, trackedsize=False,
2638 storedsize=False):
2642 storedsize=False):
2639 d = {}
2643 d = {}
2640
2644
2641 if exclusivefiles:
2645 if exclusivefiles:
2642 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2646 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2643 if not self._inline:
2647 if not self._inline:
2644 d['exclusivefiles'].append((self.opener, self.datafile))
2648 d['exclusivefiles'].append((self.opener, self.datafile))
2645
2649
2646 if sharedfiles:
2650 if sharedfiles:
2647 d['sharedfiles'] = []
2651 d['sharedfiles'] = []
2648
2652
2649 if revisionscount:
2653 if revisionscount:
2650 d['revisionscount'] = len(self)
2654 d['revisionscount'] = len(self)
2651
2655
2652 if trackedsize:
2656 if trackedsize:
2653 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2657 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2654
2658
2655 if storedsize:
2659 if storedsize:
2656 d['storedsize'] = sum(self.opener.stat(path).st_size
2660 d['storedsize'] = sum(self.opener.stat(path).st_size
2657 for path in self.files())
2661 for path in self.files())
2658
2662
2659 return d
2663 return d
General Comments 0
You need to be logged in to leave comments. Login now