##// END OF EJS Templates
sidedata: make sure we don't use the flag if there are not sidedata...
marmoute -
r43307:7902001a default
parent child Browse files
Show More
@@ -1,2678 +1,2679 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_SIDEDATA,
57 REVIDX_SIDEDATA,
58 )
58 )
59 from .thirdparty import (
59 from .thirdparty import (
60 attr,
60 attr,
61 )
61 )
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod(r'parsers')
106 parsers = policy.importmod(r'parsers')
107 rustancestor = policy.importrust(r'ancestor')
107 rustancestor = policy.importrust(r'ancestor')
108 rustdagop = policy.importrust(r'dagop')
108 rustdagop = policy.importrust(r'dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121 def ellipsiswriteprocessor(rl, text, sidedata):
121 def ellipsiswriteprocessor(rl, text, sidedata):
122 return text, False
122 return text, False
123
123
124 def ellipsisrawprocessor(rl, text):
124 def ellipsisrawprocessor(rl, text):
125 return False
125 return False
126
126
127 ellipsisprocessor = (
127 ellipsisprocessor = (
128 ellipsisreadprocessor,
128 ellipsisreadprocessor,
129 ellipsiswriteprocessor,
129 ellipsiswriteprocessor,
130 ellipsisrawprocessor,
130 ellipsisrawprocessor,
131 )
131 )
132
132
133 def getoffset(q):
133 def getoffset(q):
134 return int(q >> 16)
134 return int(q >> 16)
135
135
136 def gettype(q):
136 def gettype(q):
137 return int(q & 0xFFFF)
137 return int(q & 0xFFFF)
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError('unknown revlog index flags')
141 raise ValueError('unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144 @attr.s(slots=True, frozen=True)
144 @attr.s(slots=True, frozen=True)
145 class _revisioninfo(object):
145 class _revisioninfo(object):
146 """Information about a revision that allows building its fulltext
146 """Information about a revision that allows building its fulltext
147 node: expected hash of the revision
147 node: expected hash of the revision
148 p1, p2: parent revs of the revision
148 p1, p2: parent revs of the revision
149 btext: built text cache consisting of a one-element list
149 btext: built text cache consisting of a one-element list
150 cachedelta: (baserev, uncompressed_delta) or None
150 cachedelta: (baserev, uncompressed_delta) or None
151 flags: flags associated to the revision storage
151 flags: flags associated to the revision storage
152
152
153 One of btext[0] or cachedelta must be set.
153 One of btext[0] or cachedelta must be set.
154 """
154 """
155 node = attr.ib()
155 node = attr.ib()
156 p1 = attr.ib()
156 p1 = attr.ib()
157 p2 = attr.ib()
157 p2 = attr.ib()
158 btext = attr.ib()
158 btext = attr.ib()
159 textlen = attr.ib()
159 textlen = attr.ib()
160 cachedelta = attr.ib()
160 cachedelta = attr.ib()
161 flags = attr.ib()
161 flags = attr.ib()
162
162
163 @interfaceutil.implementer(repository.irevisiondelta)
163 @interfaceutil.implementer(repository.irevisiondelta)
164 @attr.s(slots=True)
164 @attr.s(slots=True)
165 class revlogrevisiondelta(object):
165 class revlogrevisiondelta(object):
166 node = attr.ib()
166 node = attr.ib()
167 p1node = attr.ib()
167 p1node = attr.ib()
168 p2node = attr.ib()
168 p2node = attr.ib()
169 basenode = attr.ib()
169 basenode = attr.ib()
170 flags = attr.ib()
170 flags = attr.ib()
171 baserevisionsize = attr.ib()
171 baserevisionsize = attr.ib()
172 revision = attr.ib()
172 revision = attr.ib()
173 delta = attr.ib()
173 delta = attr.ib()
174 linknode = attr.ib(default=None)
174 linknode = attr.ib(default=None)
175
175
176 @interfaceutil.implementer(repository.iverifyproblem)
176 @interfaceutil.implementer(repository.iverifyproblem)
177 @attr.s(frozen=True)
177 @attr.s(frozen=True)
178 class revlogproblem(object):
178 class revlogproblem(object):
179 warning = attr.ib(default=None)
179 warning = attr.ib(default=None)
180 error = attr.ib(default=None)
180 error = attr.ib(default=None)
181 node = attr.ib(default=None)
181 node = attr.ib(default=None)
182
182
183 # index v0:
183 # index v0:
184 # 4 bytes: offset
184 # 4 bytes: offset
185 # 4 bytes: compressed length
185 # 4 bytes: compressed length
186 # 4 bytes: base rev
186 # 4 bytes: base rev
187 # 4 bytes: link rev
187 # 4 bytes: link rev
188 # 20 bytes: parent 1 nodeid
188 # 20 bytes: parent 1 nodeid
189 # 20 bytes: parent 2 nodeid
189 # 20 bytes: parent 2 nodeid
190 # 20 bytes: nodeid
190 # 20 bytes: nodeid
191 indexformatv0 = struct.Struct(">4l20s20s20s")
191 indexformatv0 = struct.Struct(">4l20s20s20s")
192 indexformatv0_pack = indexformatv0.pack
192 indexformatv0_pack = indexformatv0.pack
193 indexformatv0_unpack = indexformatv0.unpack
193 indexformatv0_unpack = indexformatv0.unpack
194
194
195 class revlogoldindex(list):
195 class revlogoldindex(list):
196 def __getitem__(self, i):
196 def __getitem__(self, i):
197 if i == -1:
197 if i == -1:
198 return (0, 0, 0, -1, -1, -1, -1, nullid)
198 return (0, 0, 0, -1, -1, -1, -1, nullid)
199 return list.__getitem__(self, i)
199 return list.__getitem__(self, i)
200
200
201 class revlogoldio(object):
201 class revlogoldio(object):
202 def __init__(self):
202 def __init__(self):
203 self.size = indexformatv0.size
203 self.size = indexformatv0.size
204
204
205 def parseindex(self, data, inline):
205 def parseindex(self, data, inline):
206 s = self.size
206 s = self.size
207 index = []
207 index = []
208 nodemap = {nullid: nullrev}
208 nodemap = {nullid: nullrev}
209 n = off = 0
209 n = off = 0
210 l = len(data)
210 l = len(data)
211 while off + s <= l:
211 while off + s <= l:
212 cur = data[off:off + s]
212 cur = data[off:off + s]
213 off += s
213 off += s
214 e = indexformatv0_unpack(cur)
214 e = indexformatv0_unpack(cur)
215 # transform to revlogv1 format
215 # transform to revlogv1 format
216 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
216 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
217 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
217 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
218 index.append(e2)
218 index.append(e2)
219 nodemap[e[6]] = n
219 nodemap[e[6]] = n
220 n += 1
220 n += 1
221
221
222 return revlogoldindex(index), nodemap, None
222 return revlogoldindex(index), nodemap, None
223
223
224 def packentry(self, entry, node, version, rev):
224 def packentry(self, entry, node, version, rev):
225 if gettype(entry[0]):
225 if gettype(entry[0]):
226 raise error.RevlogError(_('index entry flags need revlog '
226 raise error.RevlogError(_('index entry flags need revlog '
227 'version 1'))
227 'version 1'))
228 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
228 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
229 node(entry[5]), node(entry[6]), entry[7])
229 node(entry[5]), node(entry[6]), entry[7])
230 return indexformatv0_pack(*e2)
230 return indexformatv0_pack(*e2)
231
231
232 # index ng:
232 # index ng:
233 # 6 bytes: offset
233 # 6 bytes: offset
234 # 2 bytes: flags
234 # 2 bytes: flags
235 # 4 bytes: compressed length
235 # 4 bytes: compressed length
236 # 4 bytes: uncompressed length
236 # 4 bytes: uncompressed length
237 # 4 bytes: base rev
237 # 4 bytes: base rev
238 # 4 bytes: link rev
238 # 4 bytes: link rev
239 # 4 bytes: parent 1 rev
239 # 4 bytes: parent 1 rev
240 # 4 bytes: parent 2 rev
240 # 4 bytes: parent 2 rev
241 # 32 bytes: nodeid
241 # 32 bytes: nodeid
242 indexformatng = struct.Struct(">Qiiiiii20s12x")
242 indexformatng = struct.Struct(">Qiiiiii20s12x")
243 indexformatng_pack = indexformatng.pack
243 indexformatng_pack = indexformatng.pack
244 versionformat = struct.Struct(">I")
244 versionformat = struct.Struct(">I")
245 versionformat_pack = versionformat.pack
245 versionformat_pack = versionformat.pack
246 versionformat_unpack = versionformat.unpack
246 versionformat_unpack = versionformat.unpack
247
247
248 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
248 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
249 # signed integer)
249 # signed integer)
250 _maxentrysize = 0x7fffffff
250 _maxentrysize = 0x7fffffff
251
251
252 class revlogio(object):
252 class revlogio(object):
253 def __init__(self):
253 def __init__(self):
254 self.size = indexformatng.size
254 self.size = indexformatng.size
255
255
256 def parseindex(self, data, inline):
256 def parseindex(self, data, inline):
257 # call the C implementation to parse the index data
257 # call the C implementation to parse the index data
258 index, cache = parsers.parse_index2(data, inline)
258 index, cache = parsers.parse_index2(data, inline)
259 return index, getattr(index, 'nodemap', None), cache
259 return index, getattr(index, 'nodemap', None), cache
260
260
261 def packentry(self, entry, node, version, rev):
261 def packentry(self, entry, node, version, rev):
262 p = indexformatng_pack(*entry)
262 p = indexformatng_pack(*entry)
263 if rev == 0:
263 if rev == 0:
264 p = versionformat_pack(version) + p[4:]
264 p = versionformat_pack(version) + p[4:]
265 return p
265 return p
266
266
267 class revlog(object):
267 class revlog(object):
268 """
268 """
269 the underlying revision storage object
269 the underlying revision storage object
270
270
271 A revlog consists of two parts, an index and the revision data.
271 A revlog consists of two parts, an index and the revision data.
272
272
273 The index is a file with a fixed record size containing
273 The index is a file with a fixed record size containing
274 information on each revision, including its nodeid (hash), the
274 information on each revision, including its nodeid (hash), the
275 nodeids of its parents, the position and offset of its data within
275 nodeids of its parents, the position and offset of its data within
276 the data file, and the revision it's based on. Finally, each entry
276 the data file, and the revision it's based on. Finally, each entry
277 contains a linkrev entry that can serve as a pointer to external
277 contains a linkrev entry that can serve as a pointer to external
278 data.
278 data.
279
279
280 The revision data itself is a linear collection of data chunks.
280 The revision data itself is a linear collection of data chunks.
281 Each chunk represents a revision and is usually represented as a
281 Each chunk represents a revision and is usually represented as a
282 delta against the previous chunk. To bound lookup time, runs of
282 delta against the previous chunk. To bound lookup time, runs of
283 deltas are limited to about 2 times the length of the original
283 deltas are limited to about 2 times the length of the original
284 version data. This makes retrieval of a version proportional to
284 version data. This makes retrieval of a version proportional to
285 its size, or O(1) relative to the number of revisions.
285 its size, or O(1) relative to the number of revisions.
286
286
287 Both pieces of the revlog are written to in an append-only
287 Both pieces of the revlog are written to in an append-only
288 fashion, which means we never need to rewrite a file to insert or
288 fashion, which means we never need to rewrite a file to insert or
289 remove data, and can use some simple techniques to avoid the need
289 remove data, and can use some simple techniques to avoid the need
290 for locking while reading.
290 for locking while reading.
291
291
292 If checkambig, indexfile is opened with checkambig=True at
292 If checkambig, indexfile is opened with checkambig=True at
293 writing, to avoid file stat ambiguity.
293 writing, to avoid file stat ambiguity.
294
294
295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
296 index will be mmapped rather than read if it is larger than the
296 index will be mmapped rather than read if it is larger than the
297 configured threshold.
297 configured threshold.
298
298
299 If censorable is True, the revlog can have censored revisions.
299 If censorable is True, the revlog can have censored revisions.
300
300
301 If `upperboundcomp` is not None, this is the expected maximal gain from
301 If `upperboundcomp` is not None, this is the expected maximal gain from
302 compression for the data content.
302 compression for the data content.
303 """
303 """
304
304
305 _flagserrorclass = error.RevlogError
305 _flagserrorclass = error.RevlogError
306
306
307 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
307 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
308 mmaplargeindex=False, censorable=False,
308 mmaplargeindex=False, censorable=False,
309 upperboundcomp=None):
309 upperboundcomp=None):
310 """
310 """
311 create a revlog object
311 create a revlog object
312
312
313 opener is a function that abstracts the file opening operation
313 opener is a function that abstracts the file opening operation
314 and can be used to implement COW semantics or the like.
314 and can be used to implement COW semantics or the like.
315
315
316 """
316 """
317 self.upperboundcomp = upperboundcomp
317 self.upperboundcomp = upperboundcomp
318 self.indexfile = indexfile
318 self.indexfile = indexfile
319 self.datafile = datafile or (indexfile[:-2] + ".d")
319 self.datafile = datafile or (indexfile[:-2] + ".d")
320 self.opener = opener
320 self.opener = opener
321 # When True, indexfile is opened with checkambig=True at writing, to
321 # When True, indexfile is opened with checkambig=True at writing, to
322 # avoid file stat ambiguity.
322 # avoid file stat ambiguity.
323 self._checkambig = checkambig
323 self._checkambig = checkambig
324 self._mmaplargeindex = mmaplargeindex
324 self._mmaplargeindex = mmaplargeindex
325 self._censorable = censorable
325 self._censorable = censorable
326 # 3-tuple of (node, rev, text) for a raw revision.
326 # 3-tuple of (node, rev, text) for a raw revision.
327 self._revisioncache = None
327 self._revisioncache = None
328 # Maps rev to chain base rev.
328 # Maps rev to chain base rev.
329 self._chainbasecache = util.lrucachedict(100)
329 self._chainbasecache = util.lrucachedict(100)
330 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
330 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
331 self._chunkcache = (0, '')
331 self._chunkcache = (0, '')
332 # How much data to read and cache into the raw revlog data cache.
332 # How much data to read and cache into the raw revlog data cache.
333 self._chunkcachesize = 65536
333 self._chunkcachesize = 65536
334 self._maxchainlen = None
334 self._maxchainlen = None
335 self._deltabothparents = True
335 self._deltabothparents = True
336 self.index = []
336 self.index = []
337 # Mapping of partial identifiers to full nodes.
337 # Mapping of partial identifiers to full nodes.
338 self._pcache = {}
338 self._pcache = {}
339 # Mapping of revision integer to full node.
339 # Mapping of revision integer to full node.
340 self._nodecache = {nullid: nullrev}
340 self._nodecache = {nullid: nullrev}
341 self._nodepos = None
341 self._nodepos = None
342 self._compengine = 'zlib'
342 self._compengine = 'zlib'
343 self._compengineopts = {}
343 self._compengineopts = {}
344 self._maxdeltachainspan = -1
344 self._maxdeltachainspan = -1
345 self._withsparseread = False
345 self._withsparseread = False
346 self._sparserevlog = False
346 self._sparserevlog = False
347 self._srdensitythreshold = 0.50
347 self._srdensitythreshold = 0.50
348 self._srmingapsize = 262144
348 self._srmingapsize = 262144
349
349
350 # Make copy of flag processors so each revlog instance can support
350 # Make copy of flag processors so each revlog instance can support
351 # custom flags.
351 # custom flags.
352 self._flagprocessors = dict(flagutil.flagprocessors)
352 self._flagprocessors = dict(flagutil.flagprocessors)
353
353
354 # 2-tuple of file handles being used for active writing.
354 # 2-tuple of file handles being used for active writing.
355 self._writinghandles = None
355 self._writinghandles = None
356
356
357 self._loadindex()
357 self._loadindex()
358
358
359 def _loadindex(self):
359 def _loadindex(self):
360 mmapindexthreshold = None
360 mmapindexthreshold = None
361 opts = self.opener.options
361 opts = self.opener.options
362
362
363 if 'revlogv2' in opts:
363 if 'revlogv2' in opts:
364 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
364 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
365 elif 'revlogv1' in opts:
365 elif 'revlogv1' in opts:
366 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
366 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
367 if 'generaldelta' in opts:
367 if 'generaldelta' in opts:
368 newversionflags |= FLAG_GENERALDELTA
368 newversionflags |= FLAG_GENERALDELTA
369 elif 'revlogv0' in self.opener.options:
369 elif 'revlogv0' in self.opener.options:
370 newversionflags = REVLOGV0
370 newversionflags = REVLOGV0
371 else:
371 else:
372 newversionflags = REVLOG_DEFAULT_VERSION
372 newversionflags = REVLOG_DEFAULT_VERSION
373
373
374 if 'chunkcachesize' in opts:
374 if 'chunkcachesize' in opts:
375 self._chunkcachesize = opts['chunkcachesize']
375 self._chunkcachesize = opts['chunkcachesize']
376 if 'maxchainlen' in opts:
376 if 'maxchainlen' in opts:
377 self._maxchainlen = opts['maxchainlen']
377 self._maxchainlen = opts['maxchainlen']
378 if 'deltabothparents' in opts:
378 if 'deltabothparents' in opts:
379 self._deltabothparents = opts['deltabothparents']
379 self._deltabothparents = opts['deltabothparents']
380 self._lazydelta = bool(opts.get('lazydelta', True))
380 self._lazydelta = bool(opts.get('lazydelta', True))
381 self._lazydeltabase = False
381 self._lazydeltabase = False
382 if self._lazydelta:
382 if self._lazydelta:
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 if 'compengine' in opts:
384 if 'compengine' in opts:
385 self._compengine = opts['compengine']
385 self._compengine = opts['compengine']
386 if 'zlib.level' in opts:
386 if 'zlib.level' in opts:
387 self._compengineopts['zlib.level'] = opts['zlib.level']
387 self._compengineopts['zlib.level'] = opts['zlib.level']
388 if 'zstd.level' in opts:
388 if 'zstd.level' in opts:
389 self._compengineopts['zstd.level'] = opts['zstd.level']
389 self._compengineopts['zstd.level'] = opts['zstd.level']
390 if 'maxdeltachainspan' in opts:
390 if 'maxdeltachainspan' in opts:
391 self._maxdeltachainspan = opts['maxdeltachainspan']
391 self._maxdeltachainspan = opts['maxdeltachainspan']
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
393 mmapindexthreshold = opts['mmapindexthreshold']
393 mmapindexthreshold = opts['mmapindexthreshold']
394 self.hassidedata = bool(opts.get('side-data', False))
394 self.hassidedata = bool(opts.get('side-data', False))
395 if self.hassidedata:
395 if self.hassidedata:
396 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
396 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
397 self._sparserevlog = bool(opts.get('sparse-revlog', False))
397 self._sparserevlog = bool(opts.get('sparse-revlog', False))
398 withsparseread = bool(opts.get('with-sparse-read', False))
398 withsparseread = bool(opts.get('with-sparse-read', False))
399 # sparse-revlog forces sparse-read
399 # sparse-revlog forces sparse-read
400 self._withsparseread = self._sparserevlog or withsparseread
400 self._withsparseread = self._sparserevlog or withsparseread
401 if 'sparse-read-density-threshold' in opts:
401 if 'sparse-read-density-threshold' in opts:
402 self._srdensitythreshold = opts['sparse-read-density-threshold']
402 self._srdensitythreshold = opts['sparse-read-density-threshold']
403 if 'sparse-read-min-gap-size' in opts:
403 if 'sparse-read-min-gap-size' in opts:
404 self._srmingapsize = opts['sparse-read-min-gap-size']
404 self._srmingapsize = opts['sparse-read-min-gap-size']
405 if opts.get('enableellipsis'):
405 if opts.get('enableellipsis'):
406 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
406 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
407
407
408 # revlog v0 doesn't have flag processors
408 # revlog v0 doesn't have flag processors
409 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
409 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
410 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
410 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
411
411
412 if self._chunkcachesize <= 0:
412 if self._chunkcachesize <= 0:
413 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 raise error.RevlogError(_('revlog chunk cache size %r is not '
414 'greater than 0') % self._chunkcachesize)
414 'greater than 0') % self._chunkcachesize)
415 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 elif self._chunkcachesize & (self._chunkcachesize - 1):
416 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 raise error.RevlogError(_('revlog chunk cache size %r is not a '
417 'power of 2') % self._chunkcachesize)
417 'power of 2') % self._chunkcachesize)
418
418
419 indexdata = ''
419 indexdata = ''
420 self._initempty = True
420 self._initempty = True
421 try:
421 try:
422 with self._indexfp() as f:
422 with self._indexfp() as f:
423 if (mmapindexthreshold is not None and
423 if (mmapindexthreshold is not None and
424 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 self.opener.fstat(f).st_size >= mmapindexthreshold):
425 # TODO: should .close() to release resources without
425 # TODO: should .close() to release resources without
426 # relying on Python GC
426 # relying on Python GC
427 indexdata = util.buffer(util.mmapread(f))
427 indexdata = util.buffer(util.mmapread(f))
428 else:
428 else:
429 indexdata = f.read()
429 indexdata = f.read()
430 if len(indexdata) > 0:
430 if len(indexdata) > 0:
431 versionflags = versionformat_unpack(indexdata[:4])[0]
431 versionflags = versionformat_unpack(indexdata[:4])[0]
432 self._initempty = False
432 self._initempty = False
433 else:
433 else:
434 versionflags = newversionflags
434 versionflags = newversionflags
435 except IOError as inst:
435 except IOError as inst:
436 if inst.errno != errno.ENOENT:
436 if inst.errno != errno.ENOENT:
437 raise
437 raise
438
438
439 versionflags = newversionflags
439 versionflags = newversionflags
440
440
441 self.version = versionflags
441 self.version = versionflags
442
442
443 flags = versionflags & ~0xFFFF
443 flags = versionflags & ~0xFFFF
444 fmt = versionflags & 0xFFFF
444 fmt = versionflags & 0xFFFF
445
445
446 if fmt == REVLOGV0:
446 if fmt == REVLOGV0:
447 if flags:
447 if flags:
448 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
448 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
449 'revlog %s') %
449 'revlog %s') %
450 (flags >> 16, fmt, self.indexfile))
450 (flags >> 16, fmt, self.indexfile))
451
451
452 self._inline = False
452 self._inline = False
453 self._generaldelta = False
453 self._generaldelta = False
454
454
455 elif fmt == REVLOGV1:
455 elif fmt == REVLOGV1:
456 if flags & ~REVLOGV1_FLAGS:
456 if flags & ~REVLOGV1_FLAGS:
457 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
457 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
458 'revlog %s') %
458 'revlog %s') %
459 (flags >> 16, fmt, self.indexfile))
459 (flags >> 16, fmt, self.indexfile))
460
460
461 self._inline = versionflags & FLAG_INLINE_DATA
461 self._inline = versionflags & FLAG_INLINE_DATA
462 self._generaldelta = versionflags & FLAG_GENERALDELTA
462 self._generaldelta = versionflags & FLAG_GENERALDELTA
463
463
464 elif fmt == REVLOGV2:
464 elif fmt == REVLOGV2:
465 if flags & ~REVLOGV2_FLAGS:
465 if flags & ~REVLOGV2_FLAGS:
466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
467 'revlog %s') %
467 'revlog %s') %
468 (flags >> 16, fmt, self.indexfile))
468 (flags >> 16, fmt, self.indexfile))
469
469
470 self._inline = versionflags & FLAG_INLINE_DATA
470 self._inline = versionflags & FLAG_INLINE_DATA
471 # generaldelta implied by version 2 revlogs.
471 # generaldelta implied by version 2 revlogs.
472 self._generaldelta = True
472 self._generaldelta = True
473
473
474 else:
474 else:
475 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
475 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
476 (fmt, self.indexfile))
476 (fmt, self.indexfile))
477 # sparse-revlog can't be on without general-delta (issue6056)
477 # sparse-revlog can't be on without general-delta (issue6056)
478 if not self._generaldelta:
478 if not self._generaldelta:
479 self._sparserevlog = False
479 self._sparserevlog = False
480
480
481 self._storedeltachains = True
481 self._storedeltachains = True
482
482
483 self._io = revlogio()
483 self._io = revlogio()
484 if self.version == REVLOGV0:
484 if self.version == REVLOGV0:
485 self._io = revlogoldio()
485 self._io = revlogoldio()
486 try:
486 try:
487 d = self._io.parseindex(indexdata, self._inline)
487 d = self._io.parseindex(indexdata, self._inline)
488 except (ValueError, IndexError):
488 except (ValueError, IndexError):
489 raise error.RevlogError(_("index %s is corrupted") %
489 raise error.RevlogError(_("index %s is corrupted") %
490 self.indexfile)
490 self.indexfile)
491 self.index, nodemap, self._chunkcache = d
491 self.index, nodemap, self._chunkcache = d
492 if nodemap is not None:
492 if nodemap is not None:
493 self.nodemap = self._nodecache = nodemap
493 self.nodemap = self._nodecache = nodemap
494 if not self._chunkcache:
494 if not self._chunkcache:
495 self._chunkclear()
495 self._chunkclear()
496 # revnum -> (chain-length, sum-delta-length)
496 # revnum -> (chain-length, sum-delta-length)
497 self._chaininfocache = {}
497 self._chaininfocache = {}
498 # revlog header -> revlog compressor
498 # revlog header -> revlog compressor
499 self._decompressors = {}
499 self._decompressors = {}
500
500
501 @util.propertycache
501 @util.propertycache
502 def _compressor(self):
502 def _compressor(self):
503 engine = util.compengines[self._compengine]
503 engine = util.compengines[self._compengine]
504 return engine.revlogcompressor(self._compengineopts)
504 return engine.revlogcompressor(self._compengineopts)
505
505
506 def _indexfp(self, mode='r'):
506 def _indexfp(self, mode='r'):
507 """file object for the revlog's index file"""
507 """file object for the revlog's index file"""
508 args = {r'mode': mode}
508 args = {r'mode': mode}
509 if mode != 'r':
509 if mode != 'r':
510 args[r'checkambig'] = self._checkambig
510 args[r'checkambig'] = self._checkambig
511 if mode == 'w':
511 if mode == 'w':
512 args[r'atomictemp'] = True
512 args[r'atomictemp'] = True
513 return self.opener(self.indexfile, **args)
513 return self.opener(self.indexfile, **args)
514
514
515 def _datafp(self, mode='r'):
515 def _datafp(self, mode='r'):
516 """file object for the revlog's data file"""
516 """file object for the revlog's data file"""
517 return self.opener(self.datafile, mode=mode)
517 return self.opener(self.datafile, mode=mode)
518
518
519 @contextlib.contextmanager
519 @contextlib.contextmanager
520 def _datareadfp(self, existingfp=None):
520 def _datareadfp(self, existingfp=None):
521 """file object suitable to read data"""
521 """file object suitable to read data"""
522 # Use explicit file handle, if given.
522 # Use explicit file handle, if given.
523 if existingfp is not None:
523 if existingfp is not None:
524 yield existingfp
524 yield existingfp
525
525
526 # Use a file handle being actively used for writes, if available.
526 # Use a file handle being actively used for writes, if available.
527 # There is some danger to doing this because reads will seek the
527 # There is some danger to doing this because reads will seek the
528 # file. However, _writeentry() performs a SEEK_END before all writes,
528 # file. However, _writeentry() performs a SEEK_END before all writes,
529 # so we should be safe.
529 # so we should be safe.
530 elif self._writinghandles:
530 elif self._writinghandles:
531 if self._inline:
531 if self._inline:
532 yield self._writinghandles[0]
532 yield self._writinghandles[0]
533 else:
533 else:
534 yield self._writinghandles[1]
534 yield self._writinghandles[1]
535
535
536 # Otherwise open a new file handle.
536 # Otherwise open a new file handle.
537 else:
537 else:
538 if self._inline:
538 if self._inline:
539 func = self._indexfp
539 func = self._indexfp
540 else:
540 else:
541 func = self._datafp
541 func = self._datafp
542 with func() as fp:
542 with func() as fp:
543 yield fp
543 yield fp
544
544
545 def tip(self):
545 def tip(self):
546 return self.node(len(self.index) - 1)
546 return self.node(len(self.index) - 1)
547 def __contains__(self, rev):
547 def __contains__(self, rev):
548 return 0 <= rev < len(self)
548 return 0 <= rev < len(self)
549 def __len__(self):
549 def __len__(self):
550 return len(self.index)
550 return len(self.index)
551 def __iter__(self):
551 def __iter__(self):
552 return iter(pycompat.xrange(len(self)))
552 return iter(pycompat.xrange(len(self)))
553 def revs(self, start=0, stop=None):
553 def revs(self, start=0, stop=None):
554 """iterate over all rev in this revlog (from start to stop)"""
554 """iterate over all rev in this revlog (from start to stop)"""
555 return storageutil.iterrevs(len(self), start=start, stop=stop)
555 return storageutil.iterrevs(len(self), start=start, stop=stop)
556
556
557 @util.propertycache
557 @util.propertycache
558 def nodemap(self):
558 def nodemap(self):
559 if self.index:
559 if self.index:
560 # populate mapping down to the initial node
560 # populate mapping down to the initial node
561 node0 = self.index[0][7] # get around changelog filtering
561 node0 = self.index[0][7] # get around changelog filtering
562 self.rev(node0)
562 self.rev(node0)
563 return self._nodecache
563 return self._nodecache
564
564
565 def hasnode(self, node):
565 def hasnode(self, node):
566 try:
566 try:
567 self.rev(node)
567 self.rev(node)
568 return True
568 return True
569 except KeyError:
569 except KeyError:
570 return False
570 return False
571
571
572 def candelta(self, baserev, rev):
572 def candelta(self, baserev, rev):
573 """whether two revisions (baserev, rev) can be delta-ed or not"""
573 """whether two revisions (baserev, rev) can be delta-ed or not"""
574 # Disable delta if either rev requires a content-changing flag
574 # Disable delta if either rev requires a content-changing flag
575 # processor (ex. LFS). This is because such flag processor can alter
575 # processor (ex. LFS). This is because such flag processor can alter
576 # the rawtext content that the delta will be based on, and two clients
576 # the rawtext content that the delta will be based on, and two clients
577 # could have a same revlog node with different flags (i.e. different
577 # could have a same revlog node with different flags (i.e. different
578 # rawtext contents) and the delta could be incompatible.
578 # rawtext contents) and the delta could be incompatible.
579 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
579 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
580 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
580 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
581 return False
581 return False
582 return True
582 return True
583
583
584 def clearcaches(self):
584 def clearcaches(self):
585 self._revisioncache = None
585 self._revisioncache = None
586 self._chainbasecache.clear()
586 self._chainbasecache.clear()
587 self._chunkcache = (0, '')
587 self._chunkcache = (0, '')
588 self._pcache = {}
588 self._pcache = {}
589
589
590 try:
590 try:
591 # If we are using the native C version, you are in a fun case
591 # If we are using the native C version, you are in a fun case
592 # where self.index, self.nodemap and self._nodecaches is the same
592 # where self.index, self.nodemap and self._nodecaches is the same
593 # object.
593 # object.
594 self._nodecache.clearcaches()
594 self._nodecache.clearcaches()
595 except AttributeError:
595 except AttributeError:
596 self._nodecache = {nullid: nullrev}
596 self._nodecache = {nullid: nullrev}
597 self._nodepos = None
597 self._nodepos = None
598
598
599 def rev(self, node):
599 def rev(self, node):
600 try:
600 try:
601 return self._nodecache[node]
601 return self._nodecache[node]
602 except TypeError:
602 except TypeError:
603 raise
603 raise
604 except error.RevlogError:
604 except error.RevlogError:
605 # parsers.c radix tree lookup failed
605 # parsers.c radix tree lookup failed
606 if node == wdirid or node in wdirfilenodeids:
606 if node == wdirid or node in wdirfilenodeids:
607 raise error.WdirUnsupported
607 raise error.WdirUnsupported
608 raise error.LookupError(node, self.indexfile, _('no node'))
608 raise error.LookupError(node, self.indexfile, _('no node'))
609 except KeyError:
609 except KeyError:
610 # pure python cache lookup failed
610 # pure python cache lookup failed
611 n = self._nodecache
611 n = self._nodecache
612 i = self.index
612 i = self.index
613 p = self._nodepos
613 p = self._nodepos
614 if p is None:
614 if p is None:
615 p = len(i) - 1
615 p = len(i) - 1
616 else:
616 else:
617 assert p < len(i)
617 assert p < len(i)
618 for r in pycompat.xrange(p, -1, -1):
618 for r in pycompat.xrange(p, -1, -1):
619 v = i[r][7]
619 v = i[r][7]
620 n[v] = r
620 n[v] = r
621 if v == node:
621 if v == node:
622 self._nodepos = r - 1
622 self._nodepos = r - 1
623 return r
623 return r
624 if node == wdirid or node in wdirfilenodeids:
624 if node == wdirid or node in wdirfilenodeids:
625 raise error.WdirUnsupported
625 raise error.WdirUnsupported
626 raise error.LookupError(node, self.indexfile, _('no node'))
626 raise error.LookupError(node, self.indexfile, _('no node'))
627
627
628 # Accessors for index entries.
628 # Accessors for index entries.
629
629
630 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
630 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
631 # are flags.
631 # are flags.
632 def start(self, rev):
632 def start(self, rev):
633 return int(self.index[rev][0] >> 16)
633 return int(self.index[rev][0] >> 16)
634
634
635 def flags(self, rev):
635 def flags(self, rev):
636 return self.index[rev][0] & 0xFFFF
636 return self.index[rev][0] & 0xFFFF
637
637
638 def length(self, rev):
638 def length(self, rev):
639 return self.index[rev][1]
639 return self.index[rev][1]
640
640
641 def rawsize(self, rev):
641 def rawsize(self, rev):
642 """return the length of the uncompressed text for a given revision"""
642 """return the length of the uncompressed text for a given revision"""
643 l = self.index[rev][2]
643 l = self.index[rev][2]
644 if l >= 0:
644 if l >= 0:
645 return l
645 return l
646
646
647 t = self.rawdata(rev)
647 t = self.rawdata(rev)
648 return len(t)
648 return len(t)
649
649
650 def size(self, rev):
650 def size(self, rev):
651 """length of non-raw text (processed by a "read" flag processor)"""
651 """length of non-raw text (processed by a "read" flag processor)"""
652 # fast path: if no "read" flag processor could change the content,
652 # fast path: if no "read" flag processor could change the content,
653 # size is rawsize. note: ELLIPSIS is known to not change the content.
653 # size is rawsize. note: ELLIPSIS is known to not change the content.
654 flags = self.flags(rev)
654 flags = self.flags(rev)
655 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
655 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
656 return self.rawsize(rev)
656 return self.rawsize(rev)
657
657
658 return len(self.revision(rev, raw=False))
658 return len(self.revision(rev, raw=False))
659
659
660 def chainbase(self, rev):
660 def chainbase(self, rev):
661 base = self._chainbasecache.get(rev)
661 base = self._chainbasecache.get(rev)
662 if base is not None:
662 if base is not None:
663 return base
663 return base
664
664
665 index = self.index
665 index = self.index
666 iterrev = rev
666 iterrev = rev
667 base = index[iterrev][3]
667 base = index[iterrev][3]
668 while base != iterrev:
668 while base != iterrev:
669 iterrev = base
669 iterrev = base
670 base = index[iterrev][3]
670 base = index[iterrev][3]
671
671
672 self._chainbasecache[rev] = base
672 self._chainbasecache[rev] = base
673 return base
673 return base
674
674
675 def linkrev(self, rev):
675 def linkrev(self, rev):
676 return self.index[rev][4]
676 return self.index[rev][4]
677
677
678 def parentrevs(self, rev):
678 def parentrevs(self, rev):
679 try:
679 try:
680 entry = self.index[rev]
680 entry = self.index[rev]
681 except IndexError:
681 except IndexError:
682 if rev == wdirrev:
682 if rev == wdirrev:
683 raise error.WdirUnsupported
683 raise error.WdirUnsupported
684 raise
684 raise
685
685
686 return entry[5], entry[6]
686 return entry[5], entry[6]
687
687
688 # fast parentrevs(rev) where rev isn't filtered
688 # fast parentrevs(rev) where rev isn't filtered
689 _uncheckedparentrevs = parentrevs
689 _uncheckedparentrevs = parentrevs
690
690
691 def node(self, rev):
691 def node(self, rev):
692 try:
692 try:
693 return self.index[rev][7]
693 return self.index[rev][7]
694 except IndexError:
694 except IndexError:
695 if rev == wdirrev:
695 if rev == wdirrev:
696 raise error.WdirUnsupported
696 raise error.WdirUnsupported
697 raise
697 raise
698
698
699 # Derived from index values.
699 # Derived from index values.
700
700
701 def end(self, rev):
701 def end(self, rev):
702 return self.start(rev) + self.length(rev)
702 return self.start(rev) + self.length(rev)
703
703
704 def parents(self, node):
704 def parents(self, node):
705 i = self.index
705 i = self.index
706 d = i[self.rev(node)]
706 d = i[self.rev(node)]
707 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
707 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
708
708
709 def chainlen(self, rev):
709 def chainlen(self, rev):
710 return self._chaininfo(rev)[0]
710 return self._chaininfo(rev)[0]
711
711
712 def _chaininfo(self, rev):
712 def _chaininfo(self, rev):
713 chaininfocache = self._chaininfocache
713 chaininfocache = self._chaininfocache
714 if rev in chaininfocache:
714 if rev in chaininfocache:
715 return chaininfocache[rev]
715 return chaininfocache[rev]
716 index = self.index
716 index = self.index
717 generaldelta = self._generaldelta
717 generaldelta = self._generaldelta
718 iterrev = rev
718 iterrev = rev
719 e = index[iterrev]
719 e = index[iterrev]
720 clen = 0
720 clen = 0
721 compresseddeltalen = 0
721 compresseddeltalen = 0
722 while iterrev != e[3]:
722 while iterrev != e[3]:
723 clen += 1
723 clen += 1
724 compresseddeltalen += e[1]
724 compresseddeltalen += e[1]
725 if generaldelta:
725 if generaldelta:
726 iterrev = e[3]
726 iterrev = e[3]
727 else:
727 else:
728 iterrev -= 1
728 iterrev -= 1
729 if iterrev in chaininfocache:
729 if iterrev in chaininfocache:
730 t = chaininfocache[iterrev]
730 t = chaininfocache[iterrev]
731 clen += t[0]
731 clen += t[0]
732 compresseddeltalen += t[1]
732 compresseddeltalen += t[1]
733 break
733 break
734 e = index[iterrev]
734 e = index[iterrev]
735 else:
735 else:
736 # Add text length of base since decompressing that also takes
736 # Add text length of base since decompressing that also takes
737 # work. For cache hits the length is already included.
737 # work. For cache hits the length is already included.
738 compresseddeltalen += e[1]
738 compresseddeltalen += e[1]
739 r = (clen, compresseddeltalen)
739 r = (clen, compresseddeltalen)
740 chaininfocache[rev] = r
740 chaininfocache[rev] = r
741 return r
741 return r
742
742
743 def _deltachain(self, rev, stoprev=None):
743 def _deltachain(self, rev, stoprev=None):
744 """Obtain the delta chain for a revision.
744 """Obtain the delta chain for a revision.
745
745
746 ``stoprev`` specifies a revision to stop at. If not specified, we
746 ``stoprev`` specifies a revision to stop at. If not specified, we
747 stop at the base of the chain.
747 stop at the base of the chain.
748
748
749 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
749 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
750 revs in ascending order and ``stopped`` is a bool indicating whether
750 revs in ascending order and ``stopped`` is a bool indicating whether
751 ``stoprev`` was hit.
751 ``stoprev`` was hit.
752 """
752 """
753 # Try C implementation.
753 # Try C implementation.
754 try:
754 try:
755 return self.index.deltachain(rev, stoprev, self._generaldelta)
755 return self.index.deltachain(rev, stoprev, self._generaldelta)
756 except AttributeError:
756 except AttributeError:
757 pass
757 pass
758
758
759 chain = []
759 chain = []
760
760
761 # Alias to prevent attribute lookup in tight loop.
761 # Alias to prevent attribute lookup in tight loop.
762 index = self.index
762 index = self.index
763 generaldelta = self._generaldelta
763 generaldelta = self._generaldelta
764
764
765 iterrev = rev
765 iterrev = rev
766 e = index[iterrev]
766 e = index[iterrev]
767 while iterrev != e[3] and iterrev != stoprev:
767 while iterrev != e[3] and iterrev != stoprev:
768 chain.append(iterrev)
768 chain.append(iterrev)
769 if generaldelta:
769 if generaldelta:
770 iterrev = e[3]
770 iterrev = e[3]
771 else:
771 else:
772 iterrev -= 1
772 iterrev -= 1
773 e = index[iterrev]
773 e = index[iterrev]
774
774
775 if iterrev == stoprev:
775 if iterrev == stoprev:
776 stopped = True
776 stopped = True
777 else:
777 else:
778 chain.append(iterrev)
778 chain.append(iterrev)
779 stopped = False
779 stopped = False
780
780
781 chain.reverse()
781 chain.reverse()
782 return chain, stopped
782 return chain, stopped
783
783
784 def ancestors(self, revs, stoprev=0, inclusive=False):
784 def ancestors(self, revs, stoprev=0, inclusive=False):
785 """Generate the ancestors of 'revs' in reverse revision order.
785 """Generate the ancestors of 'revs' in reverse revision order.
786 Does not generate revs lower than stoprev.
786 Does not generate revs lower than stoprev.
787
787
788 See the documentation for ancestor.lazyancestors for more details."""
788 See the documentation for ancestor.lazyancestors for more details."""
789
789
790 # first, make sure start revisions aren't filtered
790 # first, make sure start revisions aren't filtered
791 revs = list(revs)
791 revs = list(revs)
792 checkrev = self.node
792 checkrev = self.node
793 for r in revs:
793 for r in revs:
794 checkrev(r)
794 checkrev(r)
795 # and we're sure ancestors aren't filtered as well
795 # and we're sure ancestors aren't filtered as well
796
796
797 if rustancestor is not None:
797 if rustancestor is not None:
798 lazyancestors = rustancestor.LazyAncestors
798 lazyancestors = rustancestor.LazyAncestors
799 arg = self.index
799 arg = self.index
800 elif util.safehasattr(parsers, 'rustlazyancestors'):
800 elif util.safehasattr(parsers, 'rustlazyancestors'):
801 lazyancestors = ancestor.rustlazyancestors
801 lazyancestors = ancestor.rustlazyancestors
802 arg = self.index
802 arg = self.index
803 else:
803 else:
804 lazyancestors = ancestor.lazyancestors
804 lazyancestors = ancestor.lazyancestors
805 arg = self._uncheckedparentrevs
805 arg = self._uncheckedparentrevs
806 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
806 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
807
807
808 def descendants(self, revs):
808 def descendants(self, revs):
809 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
809 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
810
810
811 def findcommonmissing(self, common=None, heads=None):
811 def findcommonmissing(self, common=None, heads=None):
812 """Return a tuple of the ancestors of common and the ancestors of heads
812 """Return a tuple of the ancestors of common and the ancestors of heads
813 that are not ancestors of common. In revset terminology, we return the
813 that are not ancestors of common. In revset terminology, we return the
814 tuple:
814 tuple:
815
815
816 ::common, (::heads) - (::common)
816 ::common, (::heads) - (::common)
817
817
818 The list is sorted by revision number, meaning it is
818 The list is sorted by revision number, meaning it is
819 topologically sorted.
819 topologically sorted.
820
820
821 'heads' and 'common' are both lists of node IDs. If heads is
821 'heads' and 'common' are both lists of node IDs. If heads is
822 not supplied, uses all of the revlog's heads. If common is not
822 not supplied, uses all of the revlog's heads. If common is not
823 supplied, uses nullid."""
823 supplied, uses nullid."""
824 if common is None:
824 if common is None:
825 common = [nullid]
825 common = [nullid]
826 if heads is None:
826 if heads is None:
827 heads = self.heads()
827 heads = self.heads()
828
828
829 common = [self.rev(n) for n in common]
829 common = [self.rev(n) for n in common]
830 heads = [self.rev(n) for n in heads]
830 heads = [self.rev(n) for n in heads]
831
831
832 # we want the ancestors, but inclusive
832 # we want the ancestors, but inclusive
833 class lazyset(object):
833 class lazyset(object):
834 def __init__(self, lazyvalues):
834 def __init__(self, lazyvalues):
835 self.addedvalues = set()
835 self.addedvalues = set()
836 self.lazyvalues = lazyvalues
836 self.lazyvalues = lazyvalues
837
837
838 def __contains__(self, value):
838 def __contains__(self, value):
839 return value in self.addedvalues or value in self.lazyvalues
839 return value in self.addedvalues or value in self.lazyvalues
840
840
841 def __iter__(self):
841 def __iter__(self):
842 added = self.addedvalues
842 added = self.addedvalues
843 for r in added:
843 for r in added:
844 yield r
844 yield r
845 for r in self.lazyvalues:
845 for r in self.lazyvalues:
846 if not r in added:
846 if not r in added:
847 yield r
847 yield r
848
848
849 def add(self, value):
849 def add(self, value):
850 self.addedvalues.add(value)
850 self.addedvalues.add(value)
851
851
852 def update(self, values):
852 def update(self, values):
853 self.addedvalues.update(values)
853 self.addedvalues.update(values)
854
854
855 has = lazyset(self.ancestors(common))
855 has = lazyset(self.ancestors(common))
856 has.add(nullrev)
856 has.add(nullrev)
857 has.update(common)
857 has.update(common)
858
858
859 # take all ancestors from heads that aren't in has
859 # take all ancestors from heads that aren't in has
860 missing = set()
860 missing = set()
861 visit = collections.deque(r for r in heads if r not in has)
861 visit = collections.deque(r for r in heads if r not in has)
862 while visit:
862 while visit:
863 r = visit.popleft()
863 r = visit.popleft()
864 if r in missing:
864 if r in missing:
865 continue
865 continue
866 else:
866 else:
867 missing.add(r)
867 missing.add(r)
868 for p in self.parentrevs(r):
868 for p in self.parentrevs(r):
869 if p not in has:
869 if p not in has:
870 visit.append(p)
870 visit.append(p)
871 missing = list(missing)
871 missing = list(missing)
872 missing.sort()
872 missing.sort()
873 return has, [self.node(miss) for miss in missing]
873 return has, [self.node(miss) for miss in missing]
874
874
875 def incrementalmissingrevs(self, common=None):
875 def incrementalmissingrevs(self, common=None):
876 """Return an object that can be used to incrementally compute the
876 """Return an object that can be used to incrementally compute the
877 revision numbers of the ancestors of arbitrary sets that are not
877 revision numbers of the ancestors of arbitrary sets that are not
878 ancestors of common. This is an ancestor.incrementalmissingancestors
878 ancestors of common. This is an ancestor.incrementalmissingancestors
879 object.
879 object.
880
880
881 'common' is a list of revision numbers. If common is not supplied, uses
881 'common' is a list of revision numbers. If common is not supplied, uses
882 nullrev.
882 nullrev.
883 """
883 """
884 if common is None:
884 if common is None:
885 common = [nullrev]
885 common = [nullrev]
886
886
887 if rustancestor is not None:
887 if rustancestor is not None:
888 return rustancestor.MissingAncestors(self.index, common)
888 return rustancestor.MissingAncestors(self.index, common)
889 return ancestor.incrementalmissingancestors(self.parentrevs, common)
889 return ancestor.incrementalmissingancestors(self.parentrevs, common)
890
890
891 def findmissingrevs(self, common=None, heads=None):
891 def findmissingrevs(self, common=None, heads=None):
892 """Return the revision numbers of the ancestors of heads that
892 """Return the revision numbers of the ancestors of heads that
893 are not ancestors of common.
893 are not ancestors of common.
894
894
895 More specifically, return a list of revision numbers corresponding to
895 More specifically, return a list of revision numbers corresponding to
896 nodes N such that every N satisfies the following constraints:
896 nodes N such that every N satisfies the following constraints:
897
897
898 1. N is an ancestor of some node in 'heads'
898 1. N is an ancestor of some node in 'heads'
899 2. N is not an ancestor of any node in 'common'
899 2. N is not an ancestor of any node in 'common'
900
900
901 The list is sorted by revision number, meaning it is
901 The list is sorted by revision number, meaning it is
902 topologically sorted.
902 topologically sorted.
903
903
904 'heads' and 'common' are both lists of revision numbers. If heads is
904 'heads' and 'common' are both lists of revision numbers. If heads is
905 not supplied, uses all of the revlog's heads. If common is not
905 not supplied, uses all of the revlog's heads. If common is not
906 supplied, uses nullid."""
906 supplied, uses nullid."""
907 if common is None:
907 if common is None:
908 common = [nullrev]
908 common = [nullrev]
909 if heads is None:
909 if heads is None:
910 heads = self.headrevs()
910 heads = self.headrevs()
911
911
912 inc = self.incrementalmissingrevs(common=common)
912 inc = self.incrementalmissingrevs(common=common)
913 return inc.missingancestors(heads)
913 return inc.missingancestors(heads)
914
914
915 def findmissing(self, common=None, heads=None):
915 def findmissing(self, common=None, heads=None):
916 """Return the ancestors of heads that are not ancestors of common.
916 """Return the ancestors of heads that are not ancestors of common.
917
917
918 More specifically, return a list of nodes N such that every N
918 More specifically, return a list of nodes N such that every N
919 satisfies the following constraints:
919 satisfies the following constraints:
920
920
921 1. N is an ancestor of some node in 'heads'
921 1. N is an ancestor of some node in 'heads'
922 2. N is not an ancestor of any node in 'common'
922 2. N is not an ancestor of any node in 'common'
923
923
924 The list is sorted by revision number, meaning it is
924 The list is sorted by revision number, meaning it is
925 topologically sorted.
925 topologically sorted.
926
926
927 'heads' and 'common' are both lists of node IDs. If heads is
927 'heads' and 'common' are both lists of node IDs. If heads is
928 not supplied, uses all of the revlog's heads. If common is not
928 not supplied, uses all of the revlog's heads. If common is not
929 supplied, uses nullid."""
929 supplied, uses nullid."""
930 if common is None:
930 if common is None:
931 common = [nullid]
931 common = [nullid]
932 if heads is None:
932 if heads is None:
933 heads = self.heads()
933 heads = self.heads()
934
934
935 common = [self.rev(n) for n in common]
935 common = [self.rev(n) for n in common]
936 heads = [self.rev(n) for n in heads]
936 heads = [self.rev(n) for n in heads]
937
937
938 inc = self.incrementalmissingrevs(common=common)
938 inc = self.incrementalmissingrevs(common=common)
939 return [self.node(r) for r in inc.missingancestors(heads)]
939 return [self.node(r) for r in inc.missingancestors(heads)]
940
940
941 def nodesbetween(self, roots=None, heads=None):
941 def nodesbetween(self, roots=None, heads=None):
942 """Return a topological path from 'roots' to 'heads'.
942 """Return a topological path from 'roots' to 'heads'.
943
943
944 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
944 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
945 topologically sorted list of all nodes N that satisfy both of
945 topologically sorted list of all nodes N that satisfy both of
946 these constraints:
946 these constraints:
947
947
948 1. N is a descendant of some node in 'roots'
948 1. N is a descendant of some node in 'roots'
949 2. N is an ancestor of some node in 'heads'
949 2. N is an ancestor of some node in 'heads'
950
950
951 Every node is considered to be both a descendant and an ancestor
951 Every node is considered to be both a descendant and an ancestor
952 of itself, so every reachable node in 'roots' and 'heads' will be
952 of itself, so every reachable node in 'roots' and 'heads' will be
953 included in 'nodes'.
953 included in 'nodes'.
954
954
955 'outroots' is the list of reachable nodes in 'roots', i.e., the
955 'outroots' is the list of reachable nodes in 'roots', i.e., the
956 subset of 'roots' that is returned in 'nodes'. Likewise,
956 subset of 'roots' that is returned in 'nodes'. Likewise,
957 'outheads' is the subset of 'heads' that is also in 'nodes'.
957 'outheads' is the subset of 'heads' that is also in 'nodes'.
958
958
959 'roots' and 'heads' are both lists of node IDs. If 'roots' is
959 'roots' and 'heads' are both lists of node IDs. If 'roots' is
960 unspecified, uses nullid as the only root. If 'heads' is
960 unspecified, uses nullid as the only root. If 'heads' is
961 unspecified, uses list of all of the revlog's heads."""
961 unspecified, uses list of all of the revlog's heads."""
962 nonodes = ([], [], [])
962 nonodes = ([], [], [])
963 if roots is not None:
963 if roots is not None:
964 roots = list(roots)
964 roots = list(roots)
965 if not roots:
965 if not roots:
966 return nonodes
966 return nonodes
967 lowestrev = min([self.rev(n) for n in roots])
967 lowestrev = min([self.rev(n) for n in roots])
968 else:
968 else:
969 roots = [nullid] # Everybody's a descendant of nullid
969 roots = [nullid] # Everybody's a descendant of nullid
970 lowestrev = nullrev
970 lowestrev = nullrev
971 if (lowestrev == nullrev) and (heads is None):
971 if (lowestrev == nullrev) and (heads is None):
972 # We want _all_ the nodes!
972 # We want _all_ the nodes!
973 return ([self.node(r) for r in self], [nullid], list(self.heads()))
973 return ([self.node(r) for r in self], [nullid], list(self.heads()))
974 if heads is None:
974 if heads is None:
975 # All nodes are ancestors, so the latest ancestor is the last
975 # All nodes are ancestors, so the latest ancestor is the last
976 # node.
976 # node.
977 highestrev = len(self) - 1
977 highestrev = len(self) - 1
978 # Set ancestors to None to signal that every node is an ancestor.
978 # Set ancestors to None to signal that every node is an ancestor.
979 ancestors = None
979 ancestors = None
980 # Set heads to an empty dictionary for later discovery of heads
980 # Set heads to an empty dictionary for later discovery of heads
981 heads = {}
981 heads = {}
982 else:
982 else:
983 heads = list(heads)
983 heads = list(heads)
984 if not heads:
984 if not heads:
985 return nonodes
985 return nonodes
986 ancestors = set()
986 ancestors = set()
987 # Turn heads into a dictionary so we can remove 'fake' heads.
987 # Turn heads into a dictionary so we can remove 'fake' heads.
988 # Also, later we will be using it to filter out the heads we can't
988 # Also, later we will be using it to filter out the heads we can't
989 # find from roots.
989 # find from roots.
990 heads = dict.fromkeys(heads, False)
990 heads = dict.fromkeys(heads, False)
991 # Start at the top and keep marking parents until we're done.
991 # Start at the top and keep marking parents until we're done.
992 nodestotag = set(heads)
992 nodestotag = set(heads)
993 # Remember where the top was so we can use it as a limit later.
993 # Remember where the top was so we can use it as a limit later.
994 highestrev = max([self.rev(n) for n in nodestotag])
994 highestrev = max([self.rev(n) for n in nodestotag])
995 while nodestotag:
995 while nodestotag:
996 # grab a node to tag
996 # grab a node to tag
997 n = nodestotag.pop()
997 n = nodestotag.pop()
998 # Never tag nullid
998 # Never tag nullid
999 if n == nullid:
999 if n == nullid:
1000 continue
1000 continue
1001 # A node's revision number represents its place in a
1001 # A node's revision number represents its place in a
1002 # topologically sorted list of nodes.
1002 # topologically sorted list of nodes.
1003 r = self.rev(n)
1003 r = self.rev(n)
1004 if r >= lowestrev:
1004 if r >= lowestrev:
1005 if n not in ancestors:
1005 if n not in ancestors:
1006 # If we are possibly a descendant of one of the roots
1006 # If we are possibly a descendant of one of the roots
1007 # and we haven't already been marked as an ancestor
1007 # and we haven't already been marked as an ancestor
1008 ancestors.add(n) # Mark as ancestor
1008 ancestors.add(n) # Mark as ancestor
1009 # Add non-nullid parents to list of nodes to tag.
1009 # Add non-nullid parents to list of nodes to tag.
1010 nodestotag.update([p for p in self.parents(n) if
1010 nodestotag.update([p for p in self.parents(n) if
1011 p != nullid])
1011 p != nullid])
1012 elif n in heads: # We've seen it before, is it a fake head?
1012 elif n in heads: # We've seen it before, is it a fake head?
1013 # So it is, real heads should not be the ancestors of
1013 # So it is, real heads should not be the ancestors of
1014 # any other heads.
1014 # any other heads.
1015 heads.pop(n)
1015 heads.pop(n)
1016 if not ancestors:
1016 if not ancestors:
1017 return nonodes
1017 return nonodes
1018 # Now that we have our set of ancestors, we want to remove any
1018 # Now that we have our set of ancestors, we want to remove any
1019 # roots that are not ancestors.
1019 # roots that are not ancestors.
1020
1020
1021 # If one of the roots was nullid, everything is included anyway.
1021 # If one of the roots was nullid, everything is included anyway.
1022 if lowestrev > nullrev:
1022 if lowestrev > nullrev:
1023 # But, since we weren't, let's recompute the lowest rev to not
1023 # But, since we weren't, let's recompute the lowest rev to not
1024 # include roots that aren't ancestors.
1024 # include roots that aren't ancestors.
1025
1025
1026 # Filter out roots that aren't ancestors of heads
1026 # Filter out roots that aren't ancestors of heads
1027 roots = [root for root in roots if root in ancestors]
1027 roots = [root for root in roots if root in ancestors]
1028 # Recompute the lowest revision
1028 # Recompute the lowest revision
1029 if roots:
1029 if roots:
1030 lowestrev = min([self.rev(root) for root in roots])
1030 lowestrev = min([self.rev(root) for root in roots])
1031 else:
1031 else:
1032 # No more roots? Return empty list
1032 # No more roots? Return empty list
1033 return nonodes
1033 return nonodes
1034 else:
1034 else:
1035 # We are descending from nullid, and don't need to care about
1035 # We are descending from nullid, and don't need to care about
1036 # any other roots.
1036 # any other roots.
1037 lowestrev = nullrev
1037 lowestrev = nullrev
1038 roots = [nullid]
1038 roots = [nullid]
1039 # Transform our roots list into a set.
1039 # Transform our roots list into a set.
1040 descendants = set(roots)
1040 descendants = set(roots)
1041 # Also, keep the original roots so we can filter out roots that aren't
1041 # Also, keep the original roots so we can filter out roots that aren't
1042 # 'real' roots (i.e. are descended from other roots).
1042 # 'real' roots (i.e. are descended from other roots).
1043 roots = descendants.copy()
1043 roots = descendants.copy()
1044 # Our topologically sorted list of output nodes.
1044 # Our topologically sorted list of output nodes.
1045 orderedout = []
1045 orderedout = []
1046 # Don't start at nullid since we don't want nullid in our output list,
1046 # Don't start at nullid since we don't want nullid in our output list,
1047 # and if nullid shows up in descendants, empty parents will look like
1047 # and if nullid shows up in descendants, empty parents will look like
1048 # they're descendants.
1048 # they're descendants.
1049 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1049 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1050 n = self.node(r)
1050 n = self.node(r)
1051 isdescendant = False
1051 isdescendant = False
1052 if lowestrev == nullrev: # Everybody is a descendant of nullid
1052 if lowestrev == nullrev: # Everybody is a descendant of nullid
1053 isdescendant = True
1053 isdescendant = True
1054 elif n in descendants:
1054 elif n in descendants:
1055 # n is already a descendant
1055 # n is already a descendant
1056 isdescendant = True
1056 isdescendant = True
1057 # This check only needs to be done here because all the roots
1057 # This check only needs to be done here because all the roots
1058 # will start being marked is descendants before the loop.
1058 # will start being marked is descendants before the loop.
1059 if n in roots:
1059 if n in roots:
1060 # If n was a root, check if it's a 'real' root.
1060 # If n was a root, check if it's a 'real' root.
1061 p = tuple(self.parents(n))
1061 p = tuple(self.parents(n))
1062 # If any of its parents are descendants, it's not a root.
1062 # If any of its parents are descendants, it's not a root.
1063 if (p[0] in descendants) or (p[1] in descendants):
1063 if (p[0] in descendants) or (p[1] in descendants):
1064 roots.remove(n)
1064 roots.remove(n)
1065 else:
1065 else:
1066 p = tuple(self.parents(n))
1066 p = tuple(self.parents(n))
1067 # A node is a descendant if either of its parents are
1067 # A node is a descendant if either of its parents are
1068 # descendants. (We seeded the dependents list with the roots
1068 # descendants. (We seeded the dependents list with the roots
1069 # up there, remember?)
1069 # up there, remember?)
1070 if (p[0] in descendants) or (p[1] in descendants):
1070 if (p[0] in descendants) or (p[1] in descendants):
1071 descendants.add(n)
1071 descendants.add(n)
1072 isdescendant = True
1072 isdescendant = True
1073 if isdescendant and ((ancestors is None) or (n in ancestors)):
1073 if isdescendant and ((ancestors is None) or (n in ancestors)):
1074 # Only include nodes that are both descendants and ancestors.
1074 # Only include nodes that are both descendants and ancestors.
1075 orderedout.append(n)
1075 orderedout.append(n)
1076 if (ancestors is not None) and (n in heads):
1076 if (ancestors is not None) and (n in heads):
1077 # We're trying to figure out which heads are reachable
1077 # We're trying to figure out which heads are reachable
1078 # from roots.
1078 # from roots.
1079 # Mark this head as having been reached
1079 # Mark this head as having been reached
1080 heads[n] = True
1080 heads[n] = True
1081 elif ancestors is None:
1081 elif ancestors is None:
1082 # Otherwise, we're trying to discover the heads.
1082 # Otherwise, we're trying to discover the heads.
1083 # Assume this is a head because if it isn't, the next step
1083 # Assume this is a head because if it isn't, the next step
1084 # will eventually remove it.
1084 # will eventually remove it.
1085 heads[n] = True
1085 heads[n] = True
1086 # But, obviously its parents aren't.
1086 # But, obviously its parents aren't.
1087 for p in self.parents(n):
1087 for p in self.parents(n):
1088 heads.pop(p, None)
1088 heads.pop(p, None)
1089 heads = [head for head, flag in heads.iteritems() if flag]
1089 heads = [head for head, flag in heads.iteritems() if flag]
1090 roots = list(roots)
1090 roots = list(roots)
1091 assert orderedout
1091 assert orderedout
1092 assert roots
1092 assert roots
1093 assert heads
1093 assert heads
1094 return (orderedout, roots, heads)
1094 return (orderedout, roots, heads)
1095
1095
1096 def headrevs(self, revs=None):
1096 def headrevs(self, revs=None):
1097 if revs is None:
1097 if revs is None:
1098 try:
1098 try:
1099 return self.index.headrevs()
1099 return self.index.headrevs()
1100 except AttributeError:
1100 except AttributeError:
1101 return self._headrevs()
1101 return self._headrevs()
1102 if rustdagop is not None:
1102 if rustdagop is not None:
1103 return rustdagop.headrevs(self.index, revs)
1103 return rustdagop.headrevs(self.index, revs)
1104 return dagop.headrevs(revs, self._uncheckedparentrevs)
1104 return dagop.headrevs(revs, self._uncheckedparentrevs)
1105
1105
1106 def computephases(self, roots):
1106 def computephases(self, roots):
1107 return self.index.computephasesmapsets(roots)
1107 return self.index.computephasesmapsets(roots)
1108
1108
1109 def _headrevs(self):
1109 def _headrevs(self):
1110 count = len(self)
1110 count = len(self)
1111 if not count:
1111 if not count:
1112 return [nullrev]
1112 return [nullrev]
1113 # we won't iter over filtered rev so nobody is a head at start
1113 # we won't iter over filtered rev so nobody is a head at start
1114 ishead = [0] * (count + 1)
1114 ishead = [0] * (count + 1)
1115 index = self.index
1115 index = self.index
1116 for r in self:
1116 for r in self:
1117 ishead[r] = 1 # I may be an head
1117 ishead[r] = 1 # I may be an head
1118 e = index[r]
1118 e = index[r]
1119 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1119 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1120 return [r for r, val in enumerate(ishead) if val]
1120 return [r for r, val in enumerate(ishead) if val]
1121
1121
1122 def heads(self, start=None, stop=None):
1122 def heads(self, start=None, stop=None):
1123 """return the list of all nodes that have no children
1123 """return the list of all nodes that have no children
1124
1124
1125 if start is specified, only heads that are descendants of
1125 if start is specified, only heads that are descendants of
1126 start will be returned
1126 start will be returned
1127 if stop is specified, it will consider all the revs from stop
1127 if stop is specified, it will consider all the revs from stop
1128 as if they had no children
1128 as if they had no children
1129 """
1129 """
1130 if start is None and stop is None:
1130 if start is None and stop is None:
1131 if not len(self):
1131 if not len(self):
1132 return [nullid]
1132 return [nullid]
1133 return [self.node(r) for r in self.headrevs()]
1133 return [self.node(r) for r in self.headrevs()]
1134
1134
1135 if start is None:
1135 if start is None:
1136 start = nullrev
1136 start = nullrev
1137 else:
1137 else:
1138 start = self.rev(start)
1138 start = self.rev(start)
1139
1139
1140 stoprevs = set(self.rev(n) for n in stop or [])
1140 stoprevs = set(self.rev(n) for n in stop or [])
1141
1141
1142 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1142 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1143 stoprevs=stoprevs)
1143 stoprevs=stoprevs)
1144
1144
1145 return [self.node(rev) for rev in revs]
1145 return [self.node(rev) for rev in revs]
1146
1146
1147 def children(self, node):
1147 def children(self, node):
1148 """find the children of a given node"""
1148 """find the children of a given node"""
1149 c = []
1149 c = []
1150 p = self.rev(node)
1150 p = self.rev(node)
1151 for r in self.revs(start=p + 1):
1151 for r in self.revs(start=p + 1):
1152 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1152 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1153 if prevs:
1153 if prevs:
1154 for pr in prevs:
1154 for pr in prevs:
1155 if pr == p:
1155 if pr == p:
1156 c.append(self.node(r))
1156 c.append(self.node(r))
1157 elif p == nullrev:
1157 elif p == nullrev:
1158 c.append(self.node(r))
1158 c.append(self.node(r))
1159 return c
1159 return c
1160
1160
1161 def commonancestorsheads(self, a, b):
1161 def commonancestorsheads(self, a, b):
1162 """calculate all the heads of the common ancestors of nodes a and b"""
1162 """calculate all the heads of the common ancestors of nodes a and b"""
1163 a, b = self.rev(a), self.rev(b)
1163 a, b = self.rev(a), self.rev(b)
1164 ancs = self._commonancestorsheads(a, b)
1164 ancs = self._commonancestorsheads(a, b)
1165 return pycompat.maplist(self.node, ancs)
1165 return pycompat.maplist(self.node, ancs)
1166
1166
1167 def _commonancestorsheads(self, *revs):
1167 def _commonancestorsheads(self, *revs):
1168 """calculate all the heads of the common ancestors of revs"""
1168 """calculate all the heads of the common ancestors of revs"""
1169 try:
1169 try:
1170 ancs = self.index.commonancestorsheads(*revs)
1170 ancs = self.index.commonancestorsheads(*revs)
1171 except (AttributeError, OverflowError): # C implementation failed
1171 except (AttributeError, OverflowError): # C implementation failed
1172 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1172 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1173 return ancs
1173 return ancs
1174
1174
1175 def isancestor(self, a, b):
1175 def isancestor(self, a, b):
1176 """return True if node a is an ancestor of node b
1176 """return True if node a is an ancestor of node b
1177
1177
1178 A revision is considered an ancestor of itself."""
1178 A revision is considered an ancestor of itself."""
1179 a, b = self.rev(a), self.rev(b)
1179 a, b = self.rev(a), self.rev(b)
1180 return self.isancestorrev(a, b)
1180 return self.isancestorrev(a, b)
1181
1181
1182 def isancestorrev(self, a, b):
1182 def isancestorrev(self, a, b):
1183 """return True if revision a is an ancestor of revision b
1183 """return True if revision a is an ancestor of revision b
1184
1184
1185 A revision is considered an ancestor of itself.
1185 A revision is considered an ancestor of itself.
1186
1186
1187 The implementation of this is trivial but the use of
1187 The implementation of this is trivial but the use of
1188 reachableroots is not."""
1188 reachableroots is not."""
1189 if a == nullrev:
1189 if a == nullrev:
1190 return True
1190 return True
1191 elif a == b:
1191 elif a == b:
1192 return True
1192 return True
1193 elif a > b:
1193 elif a > b:
1194 return False
1194 return False
1195 return bool(self.reachableroots(a, [b], [a], includepath=False))
1195 return bool(self.reachableroots(a, [b], [a], includepath=False))
1196
1196
1197 def reachableroots(self, minroot, heads, roots, includepath=False):
1197 def reachableroots(self, minroot, heads, roots, includepath=False):
1198 """return (heads(::<roots> and <roots>::<heads>))
1198 """return (heads(::<roots> and <roots>::<heads>))
1199
1199
1200 If includepath is True, return (<roots>::<heads>)."""
1200 If includepath is True, return (<roots>::<heads>)."""
1201 try:
1201 try:
1202 return self.index.reachableroots2(minroot, heads, roots,
1202 return self.index.reachableroots2(minroot, heads, roots,
1203 includepath)
1203 includepath)
1204 except AttributeError:
1204 except AttributeError:
1205 return dagop._reachablerootspure(self.parentrevs,
1205 return dagop._reachablerootspure(self.parentrevs,
1206 minroot, roots, heads, includepath)
1206 minroot, roots, heads, includepath)
1207
1207
1208 def ancestor(self, a, b):
1208 def ancestor(self, a, b):
1209 """calculate the "best" common ancestor of nodes a and b"""
1209 """calculate the "best" common ancestor of nodes a and b"""
1210
1210
1211 a, b = self.rev(a), self.rev(b)
1211 a, b = self.rev(a), self.rev(b)
1212 try:
1212 try:
1213 ancs = self.index.ancestors(a, b)
1213 ancs = self.index.ancestors(a, b)
1214 except (AttributeError, OverflowError):
1214 except (AttributeError, OverflowError):
1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1216 if ancs:
1216 if ancs:
1217 # choose a consistent winner when there's a tie
1217 # choose a consistent winner when there's a tie
1218 return min(map(self.node, ancs))
1218 return min(map(self.node, ancs))
1219 return nullid
1219 return nullid
1220
1220
1221 def _match(self, id):
1221 def _match(self, id):
1222 if isinstance(id, int):
1222 if isinstance(id, int):
1223 # rev
1223 # rev
1224 return self.node(id)
1224 return self.node(id)
1225 if len(id) == 20:
1225 if len(id) == 20:
1226 # possibly a binary node
1226 # possibly a binary node
1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1228 try:
1228 try:
1229 node = id
1229 node = id
1230 self.rev(node) # quick search the index
1230 self.rev(node) # quick search the index
1231 return node
1231 return node
1232 except error.LookupError:
1232 except error.LookupError:
1233 pass # may be partial hex id
1233 pass # may be partial hex id
1234 try:
1234 try:
1235 # str(rev)
1235 # str(rev)
1236 rev = int(id)
1236 rev = int(id)
1237 if "%d" % rev != id:
1237 if "%d" % rev != id:
1238 raise ValueError
1238 raise ValueError
1239 if rev < 0:
1239 if rev < 0:
1240 rev = len(self) + rev
1240 rev = len(self) + rev
1241 if rev < 0 or rev >= len(self):
1241 if rev < 0 or rev >= len(self):
1242 raise ValueError
1242 raise ValueError
1243 return self.node(rev)
1243 return self.node(rev)
1244 except (ValueError, OverflowError):
1244 except (ValueError, OverflowError):
1245 pass
1245 pass
1246 if len(id) == 40:
1246 if len(id) == 40:
1247 try:
1247 try:
1248 # a full hex nodeid?
1248 # a full hex nodeid?
1249 node = bin(id)
1249 node = bin(id)
1250 self.rev(node)
1250 self.rev(node)
1251 return node
1251 return node
1252 except (TypeError, error.LookupError):
1252 except (TypeError, error.LookupError):
1253 pass
1253 pass
1254
1254
1255 def _partialmatch(self, id):
1255 def _partialmatch(self, id):
1256 # we don't care wdirfilenodeids as they should be always full hash
1256 # we don't care wdirfilenodeids as they should be always full hash
1257 maybewdir = wdirhex.startswith(id)
1257 maybewdir = wdirhex.startswith(id)
1258 try:
1258 try:
1259 partial = self.index.partialmatch(id)
1259 partial = self.index.partialmatch(id)
1260 if partial and self.hasnode(partial):
1260 if partial and self.hasnode(partial):
1261 if maybewdir:
1261 if maybewdir:
1262 # single 'ff...' match in radix tree, ambiguous with wdir
1262 # single 'ff...' match in radix tree, ambiguous with wdir
1263 raise error.RevlogError
1263 raise error.RevlogError
1264 return partial
1264 return partial
1265 if maybewdir:
1265 if maybewdir:
1266 # no 'ff...' match in radix tree, wdir identified
1266 # no 'ff...' match in radix tree, wdir identified
1267 raise error.WdirUnsupported
1267 raise error.WdirUnsupported
1268 return None
1268 return None
1269 except error.RevlogError:
1269 except error.RevlogError:
1270 # parsers.c radix tree lookup gave multiple matches
1270 # parsers.c radix tree lookup gave multiple matches
1271 # fast path: for unfiltered changelog, radix tree is accurate
1271 # fast path: for unfiltered changelog, radix tree is accurate
1272 if not getattr(self, 'filteredrevs', None):
1272 if not getattr(self, 'filteredrevs', None):
1273 raise error.AmbiguousPrefixLookupError(
1273 raise error.AmbiguousPrefixLookupError(
1274 id, self.indexfile, _('ambiguous identifier'))
1274 id, self.indexfile, _('ambiguous identifier'))
1275 # fall through to slow path that filters hidden revisions
1275 # fall through to slow path that filters hidden revisions
1276 except (AttributeError, ValueError):
1276 except (AttributeError, ValueError):
1277 # we are pure python, or key was too short to search radix tree
1277 # we are pure python, or key was too short to search radix tree
1278 pass
1278 pass
1279
1279
1280 if id in self._pcache:
1280 if id in self._pcache:
1281 return self._pcache[id]
1281 return self._pcache[id]
1282
1282
1283 if len(id) <= 40:
1283 if len(id) <= 40:
1284 try:
1284 try:
1285 # hex(node)[:...]
1285 # hex(node)[:...]
1286 l = len(id) // 2 # grab an even number of digits
1286 l = len(id) // 2 # grab an even number of digits
1287 prefix = bin(id[:l * 2])
1287 prefix = bin(id[:l * 2])
1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1289 nl = [n for n in nl if hex(n).startswith(id) and
1289 nl = [n for n in nl if hex(n).startswith(id) and
1290 self.hasnode(n)]
1290 self.hasnode(n)]
1291 if nullhex.startswith(id):
1291 if nullhex.startswith(id):
1292 nl.append(nullid)
1292 nl.append(nullid)
1293 if len(nl) > 0:
1293 if len(nl) > 0:
1294 if len(nl) == 1 and not maybewdir:
1294 if len(nl) == 1 and not maybewdir:
1295 self._pcache[id] = nl[0]
1295 self._pcache[id] = nl[0]
1296 return nl[0]
1296 return nl[0]
1297 raise error.AmbiguousPrefixLookupError(
1297 raise error.AmbiguousPrefixLookupError(
1298 id, self.indexfile, _('ambiguous identifier'))
1298 id, self.indexfile, _('ambiguous identifier'))
1299 if maybewdir:
1299 if maybewdir:
1300 raise error.WdirUnsupported
1300 raise error.WdirUnsupported
1301 return None
1301 return None
1302 except TypeError:
1302 except TypeError:
1303 pass
1303 pass
1304
1304
1305 def lookup(self, id):
1305 def lookup(self, id):
1306 """locate a node based on:
1306 """locate a node based on:
1307 - revision number or str(revision number)
1307 - revision number or str(revision number)
1308 - nodeid or subset of hex nodeid
1308 - nodeid or subset of hex nodeid
1309 """
1309 """
1310 n = self._match(id)
1310 n = self._match(id)
1311 if n is not None:
1311 if n is not None:
1312 return n
1312 return n
1313 n = self._partialmatch(id)
1313 n = self._partialmatch(id)
1314 if n:
1314 if n:
1315 return n
1315 return n
1316
1316
1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1318
1318
1319 def shortest(self, node, minlength=1):
1319 def shortest(self, node, minlength=1):
1320 """Find the shortest unambiguous prefix that matches node."""
1320 """Find the shortest unambiguous prefix that matches node."""
1321 def isvalid(prefix):
1321 def isvalid(prefix):
1322 try:
1322 try:
1323 matchednode = self._partialmatch(prefix)
1323 matchednode = self._partialmatch(prefix)
1324 except error.AmbiguousPrefixLookupError:
1324 except error.AmbiguousPrefixLookupError:
1325 return False
1325 return False
1326 except error.WdirUnsupported:
1326 except error.WdirUnsupported:
1327 # single 'ff...' match
1327 # single 'ff...' match
1328 return True
1328 return True
1329 if matchednode is None:
1329 if matchednode is None:
1330 raise error.LookupError(node, self.indexfile, _('no node'))
1330 raise error.LookupError(node, self.indexfile, _('no node'))
1331 return True
1331 return True
1332
1332
1333 def maybewdir(prefix):
1333 def maybewdir(prefix):
1334 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1334 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1335
1335
1336 hexnode = hex(node)
1336 hexnode = hex(node)
1337
1337
1338 def disambiguate(hexnode, minlength):
1338 def disambiguate(hexnode, minlength):
1339 """Disambiguate against wdirid."""
1339 """Disambiguate against wdirid."""
1340 for length in range(minlength, 41):
1340 for length in range(minlength, 41):
1341 prefix = hexnode[:length]
1341 prefix = hexnode[:length]
1342 if not maybewdir(prefix):
1342 if not maybewdir(prefix):
1343 return prefix
1343 return prefix
1344
1344
1345 if not getattr(self, 'filteredrevs', None):
1345 if not getattr(self, 'filteredrevs', None):
1346 try:
1346 try:
1347 length = max(self.index.shortest(node), minlength)
1347 length = max(self.index.shortest(node), minlength)
1348 return disambiguate(hexnode, length)
1348 return disambiguate(hexnode, length)
1349 except error.RevlogError:
1349 except error.RevlogError:
1350 if node != wdirid:
1350 if node != wdirid:
1351 raise error.LookupError(node, self.indexfile, _('no node'))
1351 raise error.LookupError(node, self.indexfile, _('no node'))
1352 except AttributeError:
1352 except AttributeError:
1353 # Fall through to pure code
1353 # Fall through to pure code
1354 pass
1354 pass
1355
1355
1356 if node == wdirid:
1356 if node == wdirid:
1357 for length in range(minlength, 41):
1357 for length in range(minlength, 41):
1358 prefix = hexnode[:length]
1358 prefix = hexnode[:length]
1359 if isvalid(prefix):
1359 if isvalid(prefix):
1360 return prefix
1360 return prefix
1361
1361
1362 for length in range(minlength, 41):
1362 for length in range(minlength, 41):
1363 prefix = hexnode[:length]
1363 prefix = hexnode[:length]
1364 if isvalid(prefix):
1364 if isvalid(prefix):
1365 return disambiguate(hexnode, length)
1365 return disambiguate(hexnode, length)
1366
1366
1367 def cmp(self, node, text):
1367 def cmp(self, node, text):
1368 """compare text with a given file revision
1368 """compare text with a given file revision
1369
1369
1370 returns True if text is different than what is stored.
1370 returns True if text is different than what is stored.
1371 """
1371 """
1372 p1, p2 = self.parents(node)
1372 p1, p2 = self.parents(node)
1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1374
1374
1375 def _cachesegment(self, offset, data):
1375 def _cachesegment(self, offset, data):
1376 """Add a segment to the revlog cache.
1376 """Add a segment to the revlog cache.
1377
1377
1378 Accepts an absolute offset and the data that is at that location.
1378 Accepts an absolute offset and the data that is at that location.
1379 """
1379 """
1380 o, d = self._chunkcache
1380 o, d = self._chunkcache
1381 # try to add to existing cache
1381 # try to add to existing cache
1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1383 self._chunkcache = o, d + data
1383 self._chunkcache = o, d + data
1384 else:
1384 else:
1385 self._chunkcache = offset, data
1385 self._chunkcache = offset, data
1386
1386
1387 def _readsegment(self, offset, length, df=None):
1387 def _readsegment(self, offset, length, df=None):
1388 """Load a segment of raw data from the revlog.
1388 """Load a segment of raw data from the revlog.
1389
1389
1390 Accepts an absolute offset, length to read, and an optional existing
1390 Accepts an absolute offset, length to read, and an optional existing
1391 file handle to read from.
1391 file handle to read from.
1392
1392
1393 If an existing file handle is passed, it will be seeked and the
1393 If an existing file handle is passed, it will be seeked and the
1394 original seek position will NOT be restored.
1394 original seek position will NOT be restored.
1395
1395
1396 Returns a str or buffer of raw byte data.
1396 Returns a str or buffer of raw byte data.
1397
1397
1398 Raises if the requested number of bytes could not be read.
1398 Raises if the requested number of bytes could not be read.
1399 """
1399 """
1400 # Cache data both forward and backward around the requested
1400 # Cache data both forward and backward around the requested
1401 # data, in a fixed size window. This helps speed up operations
1401 # data, in a fixed size window. This helps speed up operations
1402 # involving reading the revlog backwards.
1402 # involving reading the revlog backwards.
1403 cachesize = self._chunkcachesize
1403 cachesize = self._chunkcachesize
1404 realoffset = offset & ~(cachesize - 1)
1404 realoffset = offset & ~(cachesize - 1)
1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1406 - realoffset)
1406 - realoffset)
1407 with self._datareadfp(df) as df:
1407 with self._datareadfp(df) as df:
1408 df.seek(realoffset)
1408 df.seek(realoffset)
1409 d = df.read(reallength)
1409 d = df.read(reallength)
1410
1410
1411 self._cachesegment(realoffset, d)
1411 self._cachesegment(realoffset, d)
1412 if offset != realoffset or reallength != length:
1412 if offset != realoffset or reallength != length:
1413 startoffset = offset - realoffset
1413 startoffset = offset - realoffset
1414 if len(d) - startoffset < length:
1414 if len(d) - startoffset < length:
1415 raise error.RevlogError(
1415 raise error.RevlogError(
1416 _('partial read of revlog %s; expected %d bytes from '
1416 _('partial read of revlog %s; expected %d bytes from '
1417 'offset %d, got %d') %
1417 'offset %d, got %d') %
1418 (self.indexfile if self._inline else self.datafile,
1418 (self.indexfile if self._inline else self.datafile,
1419 length, realoffset, len(d) - startoffset))
1419 length, realoffset, len(d) - startoffset))
1420
1420
1421 return util.buffer(d, startoffset, length)
1421 return util.buffer(d, startoffset, length)
1422
1422
1423 if len(d) < length:
1423 if len(d) < length:
1424 raise error.RevlogError(
1424 raise error.RevlogError(
1425 _('partial read of revlog %s; expected %d bytes from offset '
1425 _('partial read of revlog %s; expected %d bytes from offset '
1426 '%d, got %d') %
1426 '%d, got %d') %
1427 (self.indexfile if self._inline else self.datafile,
1427 (self.indexfile if self._inline else self.datafile,
1428 length, offset, len(d)))
1428 length, offset, len(d)))
1429
1429
1430 return d
1430 return d
1431
1431
1432 def _getsegment(self, offset, length, df=None):
1432 def _getsegment(self, offset, length, df=None):
1433 """Obtain a segment of raw data from the revlog.
1433 """Obtain a segment of raw data from the revlog.
1434
1434
1435 Accepts an absolute offset, length of bytes to obtain, and an
1435 Accepts an absolute offset, length of bytes to obtain, and an
1436 optional file handle to the already-opened revlog. If the file
1436 optional file handle to the already-opened revlog. If the file
1437 handle is used, it's original seek position will not be preserved.
1437 handle is used, it's original seek position will not be preserved.
1438
1438
1439 Requests for data may be returned from a cache.
1439 Requests for data may be returned from a cache.
1440
1440
1441 Returns a str or a buffer instance of raw byte data.
1441 Returns a str or a buffer instance of raw byte data.
1442 """
1442 """
1443 o, d = self._chunkcache
1443 o, d = self._chunkcache
1444 l = len(d)
1444 l = len(d)
1445
1445
1446 # is it in the cache?
1446 # is it in the cache?
1447 cachestart = offset - o
1447 cachestart = offset - o
1448 cacheend = cachestart + length
1448 cacheend = cachestart + length
1449 if cachestart >= 0 and cacheend <= l:
1449 if cachestart >= 0 and cacheend <= l:
1450 if cachestart == 0 and cacheend == l:
1450 if cachestart == 0 and cacheend == l:
1451 return d # avoid a copy
1451 return d # avoid a copy
1452 return util.buffer(d, cachestart, cacheend - cachestart)
1452 return util.buffer(d, cachestart, cacheend - cachestart)
1453
1453
1454 return self._readsegment(offset, length, df=df)
1454 return self._readsegment(offset, length, df=df)
1455
1455
1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1457 """Obtain a segment of raw data corresponding to a range of revisions.
1457 """Obtain a segment of raw data corresponding to a range of revisions.
1458
1458
1459 Accepts the start and end revisions and an optional already-open
1459 Accepts the start and end revisions and an optional already-open
1460 file handle to be used for reading. If the file handle is read, its
1460 file handle to be used for reading. If the file handle is read, its
1461 seek position will not be preserved.
1461 seek position will not be preserved.
1462
1462
1463 Requests for data may be satisfied by a cache.
1463 Requests for data may be satisfied by a cache.
1464
1464
1465 Returns a 2-tuple of (offset, data) for the requested range of
1465 Returns a 2-tuple of (offset, data) for the requested range of
1466 revisions. Offset is the integer offset from the beginning of the
1466 revisions. Offset is the integer offset from the beginning of the
1467 revlog and data is a str or buffer of the raw byte data.
1467 revlog and data is a str or buffer of the raw byte data.
1468
1468
1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1470 to determine where each revision's data begins and ends.
1470 to determine where each revision's data begins and ends.
1471 """
1471 """
1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1473 # (functions are expensive).
1473 # (functions are expensive).
1474 index = self.index
1474 index = self.index
1475 istart = index[startrev]
1475 istart = index[startrev]
1476 start = int(istart[0] >> 16)
1476 start = int(istart[0] >> 16)
1477 if startrev == endrev:
1477 if startrev == endrev:
1478 end = start + istart[1]
1478 end = start + istart[1]
1479 else:
1479 else:
1480 iend = index[endrev]
1480 iend = index[endrev]
1481 end = int(iend[0] >> 16) + iend[1]
1481 end = int(iend[0] >> 16) + iend[1]
1482
1482
1483 if self._inline:
1483 if self._inline:
1484 start += (startrev + 1) * self._io.size
1484 start += (startrev + 1) * self._io.size
1485 end += (endrev + 1) * self._io.size
1485 end += (endrev + 1) * self._io.size
1486 length = end - start
1486 length = end - start
1487
1487
1488 return start, self._getsegment(start, length, df=df)
1488 return start, self._getsegment(start, length, df=df)
1489
1489
1490 def _chunk(self, rev, df=None):
1490 def _chunk(self, rev, df=None):
1491 """Obtain a single decompressed chunk for a revision.
1491 """Obtain a single decompressed chunk for a revision.
1492
1492
1493 Accepts an integer revision and an optional already-open file handle
1493 Accepts an integer revision and an optional already-open file handle
1494 to be used for reading. If used, the seek position of the file will not
1494 to be used for reading. If used, the seek position of the file will not
1495 be preserved.
1495 be preserved.
1496
1496
1497 Returns a str holding uncompressed data for the requested revision.
1497 Returns a str holding uncompressed data for the requested revision.
1498 """
1498 """
1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1500
1500
1501 def _chunks(self, revs, df=None, targetsize=None):
1501 def _chunks(self, revs, df=None, targetsize=None):
1502 """Obtain decompressed chunks for the specified revisions.
1502 """Obtain decompressed chunks for the specified revisions.
1503
1503
1504 Accepts an iterable of numeric revisions that are assumed to be in
1504 Accepts an iterable of numeric revisions that are assumed to be in
1505 ascending order. Also accepts an optional already-open file handle
1505 ascending order. Also accepts an optional already-open file handle
1506 to be used for reading. If used, the seek position of the file will
1506 to be used for reading. If used, the seek position of the file will
1507 not be preserved.
1507 not be preserved.
1508
1508
1509 This function is similar to calling ``self._chunk()`` multiple times,
1509 This function is similar to calling ``self._chunk()`` multiple times,
1510 but is faster.
1510 but is faster.
1511
1511
1512 Returns a list with decompressed data for each requested revision.
1512 Returns a list with decompressed data for each requested revision.
1513 """
1513 """
1514 if not revs:
1514 if not revs:
1515 return []
1515 return []
1516 start = self.start
1516 start = self.start
1517 length = self.length
1517 length = self.length
1518 inline = self._inline
1518 inline = self._inline
1519 iosize = self._io.size
1519 iosize = self._io.size
1520 buffer = util.buffer
1520 buffer = util.buffer
1521
1521
1522 l = []
1522 l = []
1523 ladd = l.append
1523 ladd = l.append
1524
1524
1525 if not self._withsparseread:
1525 if not self._withsparseread:
1526 slicedchunks = (revs,)
1526 slicedchunks = (revs,)
1527 else:
1527 else:
1528 slicedchunks = deltautil.slicechunk(self, revs,
1528 slicedchunks = deltautil.slicechunk(self, revs,
1529 targetsize=targetsize)
1529 targetsize=targetsize)
1530
1530
1531 for revschunk in slicedchunks:
1531 for revschunk in slicedchunks:
1532 firstrev = revschunk[0]
1532 firstrev = revschunk[0]
1533 # Skip trailing revisions with empty diff
1533 # Skip trailing revisions with empty diff
1534 for lastrev in revschunk[::-1]:
1534 for lastrev in revschunk[::-1]:
1535 if length(lastrev) != 0:
1535 if length(lastrev) != 0:
1536 break
1536 break
1537
1537
1538 try:
1538 try:
1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1540 except OverflowError:
1540 except OverflowError:
1541 # issue4215 - we can't cache a run of chunks greater than
1541 # issue4215 - we can't cache a run of chunks greater than
1542 # 2G on Windows
1542 # 2G on Windows
1543 return [self._chunk(rev, df=df) for rev in revschunk]
1543 return [self._chunk(rev, df=df) for rev in revschunk]
1544
1544
1545 decomp = self.decompress
1545 decomp = self.decompress
1546 for rev in revschunk:
1546 for rev in revschunk:
1547 chunkstart = start(rev)
1547 chunkstart = start(rev)
1548 if inline:
1548 if inline:
1549 chunkstart += (rev + 1) * iosize
1549 chunkstart += (rev + 1) * iosize
1550 chunklength = length(rev)
1550 chunklength = length(rev)
1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1552
1552
1553 return l
1553 return l
1554
1554
1555 def _chunkclear(self):
1555 def _chunkclear(self):
1556 """Clear the raw chunk cache."""
1556 """Clear the raw chunk cache."""
1557 self._chunkcache = (0, '')
1557 self._chunkcache = (0, '')
1558
1558
1559 def deltaparent(self, rev):
1559 def deltaparent(self, rev):
1560 """return deltaparent of the given revision"""
1560 """return deltaparent of the given revision"""
1561 base = self.index[rev][3]
1561 base = self.index[rev][3]
1562 if base == rev:
1562 if base == rev:
1563 return nullrev
1563 return nullrev
1564 elif self._generaldelta:
1564 elif self._generaldelta:
1565 return base
1565 return base
1566 else:
1566 else:
1567 return rev - 1
1567 return rev - 1
1568
1568
1569 def issnapshot(self, rev):
1569 def issnapshot(self, rev):
1570 """tells whether rev is a snapshot
1570 """tells whether rev is a snapshot
1571 """
1571 """
1572 if not self._sparserevlog:
1572 if not self._sparserevlog:
1573 return self.deltaparent(rev) == nullrev
1573 return self.deltaparent(rev) == nullrev
1574 elif util.safehasattr(self.index, 'issnapshot'):
1574 elif util.safehasattr(self.index, 'issnapshot'):
1575 # directly assign the method to cache the testing and access
1575 # directly assign the method to cache the testing and access
1576 self.issnapshot = self.index.issnapshot
1576 self.issnapshot = self.index.issnapshot
1577 return self.issnapshot(rev)
1577 return self.issnapshot(rev)
1578 if rev == nullrev:
1578 if rev == nullrev:
1579 return True
1579 return True
1580 entry = self.index[rev]
1580 entry = self.index[rev]
1581 base = entry[3]
1581 base = entry[3]
1582 if base == rev:
1582 if base == rev:
1583 return True
1583 return True
1584 if base == nullrev:
1584 if base == nullrev:
1585 return True
1585 return True
1586 p1 = entry[5]
1586 p1 = entry[5]
1587 p2 = entry[6]
1587 p2 = entry[6]
1588 if base == p1 or base == p2:
1588 if base == p1 or base == p2:
1589 return False
1589 return False
1590 return self.issnapshot(base)
1590 return self.issnapshot(base)
1591
1591
1592 def snapshotdepth(self, rev):
1592 def snapshotdepth(self, rev):
1593 """number of snapshot in the chain before this one"""
1593 """number of snapshot in the chain before this one"""
1594 if not self.issnapshot(rev):
1594 if not self.issnapshot(rev):
1595 raise error.ProgrammingError('revision %d not a snapshot')
1595 raise error.ProgrammingError('revision %d not a snapshot')
1596 return len(self._deltachain(rev)[0]) - 1
1596 return len(self._deltachain(rev)[0]) - 1
1597
1597
1598 def revdiff(self, rev1, rev2):
1598 def revdiff(self, rev1, rev2):
1599 """return or calculate a delta between two revisions
1599 """return or calculate a delta between two revisions
1600
1600
1601 The delta calculated is in binary form and is intended to be written to
1601 The delta calculated is in binary form and is intended to be written to
1602 revlog data directly. So this function needs raw revision data.
1602 revlog data directly. So this function needs raw revision data.
1603 """
1603 """
1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1605 return bytes(self._chunk(rev2))
1605 return bytes(self._chunk(rev2))
1606
1606
1607 return mdiff.textdiff(self.rawdata(rev1),
1607 return mdiff.textdiff(self.rawdata(rev1),
1608 self.rawdata(rev2))
1608 self.rawdata(rev2))
1609
1609
1610 def _processflags(self, text, flags, operation, raw=False):
1610 def _processflags(self, text, flags, operation, raw=False):
1611 """deprecated entry point to access flag processors"""
1611 """deprecated entry point to access flag processors"""
1612 msg = ('_processflag(...) use the specialized variant')
1612 msg = ('_processflag(...) use the specialized variant')
1613 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1613 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1614 if raw:
1614 if raw:
1615 return text, flagutil.processflagsraw(self, text, flags)
1615 return text, flagutil.processflagsraw(self, text, flags)
1616 elif operation == 'read':
1616 elif operation == 'read':
1617 return flagutil.processflagsread(self, text, flags)
1617 return flagutil.processflagsread(self, text, flags)
1618 else: # write operation
1618 else: # write operation
1619 return flagutil.processflagswrite(self, text, flags)
1619 return flagutil.processflagswrite(self, text, flags)
1620
1620
1621 def revision(self, nodeorrev, _df=None, raw=False):
1621 def revision(self, nodeorrev, _df=None, raw=False):
1622 """return an uncompressed revision of a given node or revision
1622 """return an uncompressed revision of a given node or revision
1623 number.
1623 number.
1624
1624
1625 _df - an existing file handle to read from. (internal-only)
1625 _df - an existing file handle to read from. (internal-only)
1626 raw - an optional argument specifying if the revision data is to be
1626 raw - an optional argument specifying if the revision data is to be
1627 treated as raw data when applying flag transforms. 'raw' should be set
1627 treated as raw data when applying flag transforms. 'raw' should be set
1628 to True when generating changegroups or in debug commands.
1628 to True when generating changegroups or in debug commands.
1629 """
1629 """
1630 if raw:
1630 if raw:
1631 msg = ('revlog.revision(..., raw=True) is deprecated, '
1631 msg = ('revlog.revision(..., raw=True) is deprecated, '
1632 'use revlog.rawdata(...)')
1632 'use revlog.rawdata(...)')
1633 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1633 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1634 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1634 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1635
1635
1636 def sidedata(self, nodeorrev, _df=None):
1636 def sidedata(self, nodeorrev, _df=None):
1637 """a map of extra data related to the changeset but not part of the hash
1637 """a map of extra data related to the changeset but not part of the hash
1638
1638
1639 This function currently return a dictionary. However, more advanced
1639 This function currently return a dictionary. However, more advanced
1640 mapping object will likely be used in the future for a more
1640 mapping object will likely be used in the future for a more
1641 efficient/lazy code.
1641 efficient/lazy code.
1642 """
1642 """
1643 return self._revisiondata(nodeorrev, _df)[1]
1643 return self._revisiondata(nodeorrev, _df)[1]
1644
1644
1645 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1645 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1646 # deal with <nodeorrev> argument type
1646 # deal with <nodeorrev> argument type
1647 if isinstance(nodeorrev, int):
1647 if isinstance(nodeorrev, int):
1648 rev = nodeorrev
1648 rev = nodeorrev
1649 node = self.node(rev)
1649 node = self.node(rev)
1650 else:
1650 else:
1651 node = nodeorrev
1651 node = nodeorrev
1652 rev = None
1652 rev = None
1653
1653
1654 # fast path the special `nullid` rev
1654 # fast path the special `nullid` rev
1655 if node == nullid:
1655 if node == nullid:
1656 return "", {}
1656 return "", {}
1657
1657
1658 # The text as stored inside the revlog. Might be the revision or might
1658 # The text as stored inside the revlog. Might be the revision or might
1659 # need to be processed to retrieve the revision.
1659 # need to be processed to retrieve the revision.
1660 rawtext = None
1660 rawtext = None
1661
1661
1662 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1662 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1663
1663
1664 if raw and validated:
1664 if raw and validated:
1665 # if we don't want to process the raw text and that raw
1665 # if we don't want to process the raw text and that raw
1666 # text is cached, we can exit early.
1666 # text is cached, we can exit early.
1667 return rawtext, {}
1667 return rawtext, {}
1668 if rev is None:
1668 if rev is None:
1669 rev = self.rev(node)
1669 rev = self.rev(node)
1670 # the revlog's flag for this revision
1670 # the revlog's flag for this revision
1671 # (usually alter its state or content)
1671 # (usually alter its state or content)
1672 flags = self.flags(rev)
1672 flags = self.flags(rev)
1673
1673
1674 if validated and flags == REVIDX_DEFAULT_FLAGS:
1674 if validated and flags == REVIDX_DEFAULT_FLAGS:
1675 # no extra flags set, no flag processor runs, text = rawtext
1675 # no extra flags set, no flag processor runs, text = rawtext
1676 return rawtext, {}
1676 return rawtext, {}
1677
1677
1678 sidedata = {}
1678 sidedata = {}
1679 if raw:
1679 if raw:
1680 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1680 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1681 text = rawtext
1681 text = rawtext
1682 else:
1682 else:
1683 try:
1683 try:
1684 r = flagutil.processflagsread(self, rawtext, flags)
1684 r = flagutil.processflagsread(self, rawtext, flags)
1685 except error.SidedataHashError as exc:
1685 except error.SidedataHashError as exc:
1686 msg = _("integrity check failed on %s:%s sidedata key %d")
1686 msg = _("integrity check failed on %s:%s sidedata key %d")
1687 msg %= (self.indexfile, pycompat.bytestr(rev),
1687 msg %= (self.indexfile, pycompat.bytestr(rev),
1688 exc.sidedatakey)
1688 exc.sidedatakey)
1689 raise error.RevlogError(msg)
1689 raise error.RevlogError(msg)
1690 text, validatehash, sidedata = r
1690 text, validatehash, sidedata = r
1691 if validatehash:
1691 if validatehash:
1692 self.checkhash(text, node, rev=rev)
1692 self.checkhash(text, node, rev=rev)
1693 if not validated:
1693 if not validated:
1694 self._revisioncache = (node, rev, rawtext)
1694 self._revisioncache = (node, rev, rawtext)
1695
1695
1696 return text, sidedata
1696 return text, sidedata
1697
1697
1698 def _rawtext(self, node, rev, _df=None):
1698 def _rawtext(self, node, rev, _df=None):
1699 """return the possibly unvalidated rawtext for a revision
1699 """return the possibly unvalidated rawtext for a revision
1700
1700
1701 returns (rev, rawtext, validated)
1701 returns (rev, rawtext, validated)
1702 """
1702 """
1703
1703
1704 # revision in the cache (could be useful to apply delta)
1704 # revision in the cache (could be useful to apply delta)
1705 cachedrev = None
1705 cachedrev = None
1706 # An intermediate text to apply deltas to
1706 # An intermediate text to apply deltas to
1707 basetext = None
1707 basetext = None
1708
1708
1709 # Check if we have the entry in cache
1709 # Check if we have the entry in cache
1710 # The cache entry looks like (node, rev, rawtext)
1710 # The cache entry looks like (node, rev, rawtext)
1711 if self._revisioncache:
1711 if self._revisioncache:
1712 if self._revisioncache[0] == node:
1712 if self._revisioncache[0] == node:
1713 return (rev, self._revisioncache[2], True)
1713 return (rev, self._revisioncache[2], True)
1714 cachedrev = self._revisioncache[1]
1714 cachedrev = self._revisioncache[1]
1715
1715
1716 if rev is None:
1716 if rev is None:
1717 rev = self.rev(node)
1717 rev = self.rev(node)
1718
1718
1719 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1719 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1720 if stopped:
1720 if stopped:
1721 basetext = self._revisioncache[2]
1721 basetext = self._revisioncache[2]
1722
1722
1723 # drop cache to save memory, the caller is expected to
1723 # drop cache to save memory, the caller is expected to
1724 # update self._revisioncache after validating the text
1724 # update self._revisioncache after validating the text
1725 self._revisioncache = None
1725 self._revisioncache = None
1726
1726
1727 targetsize = None
1727 targetsize = None
1728 rawsize = self.index[rev][2]
1728 rawsize = self.index[rev][2]
1729 if 0 <= rawsize:
1729 if 0 <= rawsize:
1730 targetsize = 4 * rawsize
1730 targetsize = 4 * rawsize
1731
1731
1732 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1732 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1733 if basetext is None:
1733 if basetext is None:
1734 basetext = bytes(bins[0])
1734 basetext = bytes(bins[0])
1735 bins = bins[1:]
1735 bins = bins[1:]
1736
1736
1737 rawtext = mdiff.patches(basetext, bins)
1737 rawtext = mdiff.patches(basetext, bins)
1738 del basetext # let us have a chance to free memory early
1738 del basetext # let us have a chance to free memory early
1739 return (rev, rawtext, False)
1739 return (rev, rawtext, False)
1740
1740
1741 def rawdata(self, nodeorrev, _df=None):
1741 def rawdata(self, nodeorrev, _df=None):
1742 """return an uncompressed raw data of a given node or revision number.
1742 """return an uncompressed raw data of a given node or revision number.
1743
1743
1744 _df - an existing file handle to read from. (internal-only)
1744 _df - an existing file handle to read from. (internal-only)
1745 """
1745 """
1746 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1746 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1747
1747
1748 def hash(self, text, p1, p2):
1748 def hash(self, text, p1, p2):
1749 """Compute a node hash.
1749 """Compute a node hash.
1750
1750
1751 Available as a function so that subclasses can replace the hash
1751 Available as a function so that subclasses can replace the hash
1752 as needed.
1752 as needed.
1753 """
1753 """
1754 return storageutil.hashrevisionsha1(text, p1, p2)
1754 return storageutil.hashrevisionsha1(text, p1, p2)
1755
1755
1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1757 """Check node hash integrity.
1757 """Check node hash integrity.
1758
1758
1759 Available as a function so that subclasses can extend hash mismatch
1759 Available as a function so that subclasses can extend hash mismatch
1760 behaviors as needed.
1760 behaviors as needed.
1761 """
1761 """
1762 try:
1762 try:
1763 if p1 is None and p2 is None:
1763 if p1 is None and p2 is None:
1764 p1, p2 = self.parents(node)
1764 p1, p2 = self.parents(node)
1765 if node != self.hash(text, p1, p2):
1765 if node != self.hash(text, p1, p2):
1766 # Clear the revision cache on hash failure. The revision cache
1766 # Clear the revision cache on hash failure. The revision cache
1767 # only stores the raw revision and clearing the cache does have
1767 # only stores the raw revision and clearing the cache does have
1768 # the side-effect that we won't have a cache hit when the raw
1768 # the side-effect that we won't have a cache hit when the raw
1769 # revision data is accessed. But this case should be rare and
1769 # revision data is accessed. But this case should be rare and
1770 # it is extra work to teach the cache about the hash
1770 # it is extra work to teach the cache about the hash
1771 # verification state.
1771 # verification state.
1772 if self._revisioncache and self._revisioncache[0] == node:
1772 if self._revisioncache and self._revisioncache[0] == node:
1773 self._revisioncache = None
1773 self._revisioncache = None
1774
1774
1775 revornode = rev
1775 revornode = rev
1776 if revornode is None:
1776 if revornode is None:
1777 revornode = templatefilters.short(hex(node))
1777 revornode = templatefilters.short(hex(node))
1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1779 % (self.indexfile, pycompat.bytestr(revornode)))
1779 % (self.indexfile, pycompat.bytestr(revornode)))
1780 except error.RevlogError:
1780 except error.RevlogError:
1781 if self._censorable and storageutil.iscensoredtext(text):
1781 if self._censorable and storageutil.iscensoredtext(text):
1782 raise error.CensoredNodeError(self.indexfile, node, text)
1782 raise error.CensoredNodeError(self.indexfile, node, text)
1783 raise
1783 raise
1784
1784
1785 def _enforceinlinesize(self, tr, fp=None):
1785 def _enforceinlinesize(self, tr, fp=None):
1786 """Check if the revlog is too big for inline and convert if so.
1786 """Check if the revlog is too big for inline and convert if so.
1787
1787
1788 This should be called after revisions are added to the revlog. If the
1788 This should be called after revisions are added to the revlog. If the
1789 revlog has grown too large to be an inline revlog, it will convert it
1789 revlog has grown too large to be an inline revlog, it will convert it
1790 to use multiple index and data files.
1790 to use multiple index and data files.
1791 """
1791 """
1792 tiprev = len(self) - 1
1792 tiprev = len(self) - 1
1793 if (not self._inline or
1793 if (not self._inline or
1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1795 return
1795 return
1796
1796
1797 trinfo = tr.find(self.indexfile)
1797 trinfo = tr.find(self.indexfile)
1798 if trinfo is None:
1798 if trinfo is None:
1799 raise error.RevlogError(_("%s not found in the transaction")
1799 raise error.RevlogError(_("%s not found in the transaction")
1800 % self.indexfile)
1800 % self.indexfile)
1801
1801
1802 trindex = trinfo[2]
1802 trindex = trinfo[2]
1803 if trindex is not None:
1803 if trindex is not None:
1804 dataoff = self.start(trindex)
1804 dataoff = self.start(trindex)
1805 else:
1805 else:
1806 # revlog was stripped at start of transaction, use all leftover data
1806 # revlog was stripped at start of transaction, use all leftover data
1807 trindex = len(self) - 1
1807 trindex = len(self) - 1
1808 dataoff = self.end(tiprev)
1808 dataoff = self.end(tiprev)
1809
1809
1810 tr.add(self.datafile, dataoff)
1810 tr.add(self.datafile, dataoff)
1811
1811
1812 if fp:
1812 if fp:
1813 fp.flush()
1813 fp.flush()
1814 fp.close()
1814 fp.close()
1815 # We can't use the cached file handle after close(). So prevent
1815 # We can't use the cached file handle after close(). So prevent
1816 # its usage.
1816 # its usage.
1817 self._writinghandles = None
1817 self._writinghandles = None
1818
1818
1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1820 for r in self:
1820 for r in self:
1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1822
1822
1823 with self._indexfp('w') as fp:
1823 with self._indexfp('w') as fp:
1824 self.version &= ~FLAG_INLINE_DATA
1824 self.version &= ~FLAG_INLINE_DATA
1825 self._inline = False
1825 self._inline = False
1826 io = self._io
1826 io = self._io
1827 for i in self:
1827 for i in self:
1828 e = io.packentry(self.index[i], self.node, self.version, i)
1828 e = io.packentry(self.index[i], self.node, self.version, i)
1829 fp.write(e)
1829 fp.write(e)
1830
1830
1831 # the temp file replace the real index when we exit the context
1831 # the temp file replace the real index when we exit the context
1832 # manager
1832 # manager
1833
1833
1834 tr.replace(self.indexfile, trindex * self._io.size)
1834 tr.replace(self.indexfile, trindex * self._io.size)
1835 self._chunkclear()
1835 self._chunkclear()
1836
1836
1837 def _nodeduplicatecallback(self, transaction, node):
1837 def _nodeduplicatecallback(self, transaction, node):
1838 """called when trying to add a node already stored.
1838 """called when trying to add a node already stored.
1839 """
1839 """
1840
1840
1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1843 sidedata=None):
1843 sidedata=None):
1844 """add a revision to the log
1844 """add a revision to the log
1845
1845
1846 text - the revision data to add
1846 text - the revision data to add
1847 transaction - the transaction object used for rollback
1847 transaction - the transaction object used for rollback
1848 link - the linkrev data to add
1848 link - the linkrev data to add
1849 p1, p2 - the parent nodeids of the revision
1849 p1, p2 - the parent nodeids of the revision
1850 cachedelta - an optional precomputed delta
1850 cachedelta - an optional precomputed delta
1851 node - nodeid of revision; typically node is not specified, and it is
1851 node - nodeid of revision; typically node is not specified, and it is
1852 computed by default as hash(text, p1, p2), however subclasses might
1852 computed by default as hash(text, p1, p2), however subclasses might
1853 use different hashing method (and override checkhash() in such case)
1853 use different hashing method (and override checkhash() in such case)
1854 flags - the known flags to set on the revision
1854 flags - the known flags to set on the revision
1855 deltacomputer - an optional deltacomputer instance shared between
1855 deltacomputer - an optional deltacomputer instance shared between
1856 multiple calls
1856 multiple calls
1857 """
1857 """
1858 if link == nullrev:
1858 if link == nullrev:
1859 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1859 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1860 % self.indexfile)
1860 % self.indexfile)
1861
1861
1862 if sidedata is None:
1862 if sidedata is None:
1863 sidedata = {}
1863 sidedata = {}
1864 flags = flags & ~REVIDX_SIDEDATA
1864 elif not self.hassidedata:
1865 elif not self.hassidedata:
1865 raise error.ProgrammingError(
1866 raise error.ProgrammingError(
1866 _("trying to add sidedata to a revlog who don't support them")
1867 _("trying to add sidedata to a revlog who don't support them")
1867 )
1868 )
1868 else:
1869 else:
1869 flags |= REVIDX_SIDEDATA
1870 flags |= REVIDX_SIDEDATA
1870
1871
1871 if flags:
1872 if flags:
1872 node = node or self.hash(text, p1, p2)
1873 node = node or self.hash(text, p1, p2)
1873
1874
1874 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1875 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1875 sidedata=sidedata)
1876 sidedata=sidedata)
1876
1877
1877 # If the flag processor modifies the revision data, ignore any provided
1878 # If the flag processor modifies the revision data, ignore any provided
1878 # cachedelta.
1879 # cachedelta.
1879 if rawtext != text:
1880 if rawtext != text:
1880 cachedelta = None
1881 cachedelta = None
1881
1882
1882 if len(rawtext) > _maxentrysize:
1883 if len(rawtext) > _maxentrysize:
1883 raise error.RevlogError(
1884 raise error.RevlogError(
1884 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1885 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1885 % (self.indexfile, len(rawtext)))
1886 % (self.indexfile, len(rawtext)))
1886
1887
1887 node = node or self.hash(rawtext, p1, p2)
1888 node = node or self.hash(rawtext, p1, p2)
1888 if node in self.nodemap:
1889 if node in self.nodemap:
1889 return node
1890 return node
1890
1891
1891 if validatehash:
1892 if validatehash:
1892 self.checkhash(rawtext, node, p1=p1, p2=p2)
1893 self.checkhash(rawtext, node, p1=p1, p2=p2)
1893
1894
1894 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1895 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1895 flags, cachedelta=cachedelta,
1896 flags, cachedelta=cachedelta,
1896 deltacomputer=deltacomputer)
1897 deltacomputer=deltacomputer)
1897
1898
1898 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1899 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1899 cachedelta=None, deltacomputer=None):
1900 cachedelta=None, deltacomputer=None):
1900 """add a raw revision with known flags, node and parents
1901 """add a raw revision with known flags, node and parents
1901 useful when reusing a revision not stored in this revlog (ex: received
1902 useful when reusing a revision not stored in this revlog (ex: received
1902 over wire, or read from an external bundle).
1903 over wire, or read from an external bundle).
1903 """
1904 """
1904 dfh = None
1905 dfh = None
1905 if not self._inline:
1906 if not self._inline:
1906 dfh = self._datafp("a+")
1907 dfh = self._datafp("a+")
1907 ifh = self._indexfp("a+")
1908 ifh = self._indexfp("a+")
1908 try:
1909 try:
1909 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1910 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1910 flags, cachedelta, ifh, dfh,
1911 flags, cachedelta, ifh, dfh,
1911 deltacomputer=deltacomputer)
1912 deltacomputer=deltacomputer)
1912 finally:
1913 finally:
1913 if dfh:
1914 if dfh:
1914 dfh.close()
1915 dfh.close()
1915 ifh.close()
1916 ifh.close()
1916
1917
1917 def compress(self, data):
1918 def compress(self, data):
1918 """Generate a possibly-compressed representation of data."""
1919 """Generate a possibly-compressed representation of data."""
1919 if not data:
1920 if not data:
1920 return '', data
1921 return '', data
1921
1922
1922 compressed = self._compressor.compress(data)
1923 compressed = self._compressor.compress(data)
1923
1924
1924 if compressed:
1925 if compressed:
1925 # The revlog compressor added the header in the returned data.
1926 # The revlog compressor added the header in the returned data.
1926 return '', compressed
1927 return '', compressed
1927
1928
1928 if data[0:1] == '\0':
1929 if data[0:1] == '\0':
1929 return '', data
1930 return '', data
1930 return 'u', data
1931 return 'u', data
1931
1932
1932 def decompress(self, data):
1933 def decompress(self, data):
1933 """Decompress a revlog chunk.
1934 """Decompress a revlog chunk.
1934
1935
1935 The chunk is expected to begin with a header identifying the
1936 The chunk is expected to begin with a header identifying the
1936 format type so it can be routed to an appropriate decompressor.
1937 format type so it can be routed to an appropriate decompressor.
1937 """
1938 """
1938 if not data:
1939 if not data:
1939 return data
1940 return data
1940
1941
1941 # Revlogs are read much more frequently than they are written and many
1942 # Revlogs are read much more frequently than they are written and many
1942 # chunks only take microseconds to decompress, so performance is
1943 # chunks only take microseconds to decompress, so performance is
1943 # important here.
1944 # important here.
1944 #
1945 #
1945 # We can make a few assumptions about revlogs:
1946 # We can make a few assumptions about revlogs:
1946 #
1947 #
1947 # 1) the majority of chunks will be compressed (as opposed to inline
1948 # 1) the majority of chunks will be compressed (as opposed to inline
1948 # raw data).
1949 # raw data).
1949 # 2) decompressing *any* data will likely by at least 10x slower than
1950 # 2) decompressing *any* data will likely by at least 10x slower than
1950 # returning raw inline data.
1951 # returning raw inline data.
1951 # 3) we want to prioritize common and officially supported compression
1952 # 3) we want to prioritize common and officially supported compression
1952 # engines
1953 # engines
1953 #
1954 #
1954 # It follows that we want to optimize for "decompress compressed data
1955 # It follows that we want to optimize for "decompress compressed data
1955 # when encoded with common and officially supported compression engines"
1956 # when encoded with common and officially supported compression engines"
1956 # case over "raw data" and "data encoded by less common or non-official
1957 # case over "raw data" and "data encoded by less common or non-official
1957 # compression engines." That is why we have the inline lookup first
1958 # compression engines." That is why we have the inline lookup first
1958 # followed by the compengines lookup.
1959 # followed by the compengines lookup.
1959 #
1960 #
1960 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1961 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1961 # compressed chunks. And this matters for changelog and manifest reads.
1962 # compressed chunks. And this matters for changelog and manifest reads.
1962 t = data[0:1]
1963 t = data[0:1]
1963
1964
1964 if t == 'x':
1965 if t == 'x':
1965 try:
1966 try:
1966 return _zlibdecompress(data)
1967 return _zlibdecompress(data)
1967 except zlib.error as e:
1968 except zlib.error as e:
1968 raise error.RevlogError(_('revlog decompress error: %s') %
1969 raise error.RevlogError(_('revlog decompress error: %s') %
1969 stringutil.forcebytestr(e))
1970 stringutil.forcebytestr(e))
1970 # '\0' is more common than 'u' so it goes first.
1971 # '\0' is more common than 'u' so it goes first.
1971 elif t == '\0':
1972 elif t == '\0':
1972 return data
1973 return data
1973 elif t == 'u':
1974 elif t == 'u':
1974 return util.buffer(data, 1)
1975 return util.buffer(data, 1)
1975
1976
1976 try:
1977 try:
1977 compressor = self._decompressors[t]
1978 compressor = self._decompressors[t]
1978 except KeyError:
1979 except KeyError:
1979 try:
1980 try:
1980 engine = util.compengines.forrevlogheader(t)
1981 engine = util.compengines.forrevlogheader(t)
1981 compressor = engine.revlogcompressor(self._compengineopts)
1982 compressor = engine.revlogcompressor(self._compengineopts)
1982 self._decompressors[t] = compressor
1983 self._decompressors[t] = compressor
1983 except KeyError:
1984 except KeyError:
1984 raise error.RevlogError(_('unknown compression type %r') % t)
1985 raise error.RevlogError(_('unknown compression type %r') % t)
1985
1986
1986 return compressor.decompress(data)
1987 return compressor.decompress(data)
1987
1988
1988 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1989 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1989 cachedelta, ifh, dfh, alwayscache=False,
1990 cachedelta, ifh, dfh, alwayscache=False,
1990 deltacomputer=None):
1991 deltacomputer=None):
1991 """internal function to add revisions to the log
1992 """internal function to add revisions to the log
1992
1993
1993 see addrevision for argument descriptions.
1994 see addrevision for argument descriptions.
1994
1995
1995 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1996 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1996
1997
1997 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1998 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1998 be used.
1999 be used.
1999
2000
2000 invariants:
2001 invariants:
2001 - rawtext is optional (can be None); if not set, cachedelta must be set.
2002 - rawtext is optional (can be None); if not set, cachedelta must be set.
2002 if both are set, they must correspond to each other.
2003 if both are set, they must correspond to each other.
2003 """
2004 """
2004 if node == nullid:
2005 if node == nullid:
2005 raise error.RevlogError(_("%s: attempt to add null revision") %
2006 raise error.RevlogError(_("%s: attempt to add null revision") %
2006 self.indexfile)
2007 self.indexfile)
2007 if node == wdirid or node in wdirfilenodeids:
2008 if node == wdirid or node in wdirfilenodeids:
2008 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2009 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2009 self.indexfile)
2010 self.indexfile)
2010
2011
2011 if self._inline:
2012 if self._inline:
2012 fh = ifh
2013 fh = ifh
2013 else:
2014 else:
2014 fh = dfh
2015 fh = dfh
2015
2016
2016 btext = [rawtext]
2017 btext = [rawtext]
2017
2018
2018 curr = len(self)
2019 curr = len(self)
2019 prev = curr - 1
2020 prev = curr - 1
2020 offset = self.end(prev)
2021 offset = self.end(prev)
2021 p1r, p2r = self.rev(p1), self.rev(p2)
2022 p1r, p2r = self.rev(p1), self.rev(p2)
2022
2023
2023 # full versions are inserted when the needed deltas
2024 # full versions are inserted when the needed deltas
2024 # become comparable to the uncompressed text
2025 # become comparable to the uncompressed text
2025 if rawtext is None:
2026 if rawtext is None:
2026 # need rawtext size, before changed by flag processors, which is
2027 # need rawtext size, before changed by flag processors, which is
2027 # the non-raw size. use revlog explicitly to avoid filelog's extra
2028 # the non-raw size. use revlog explicitly to avoid filelog's extra
2028 # logic that might remove metadata size.
2029 # logic that might remove metadata size.
2029 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2030 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2030 cachedelta[1])
2031 cachedelta[1])
2031 else:
2032 else:
2032 textlen = len(rawtext)
2033 textlen = len(rawtext)
2033
2034
2034 if deltacomputer is None:
2035 if deltacomputer is None:
2035 deltacomputer = deltautil.deltacomputer(self)
2036 deltacomputer = deltautil.deltacomputer(self)
2036
2037
2037 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2038 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2038
2039
2039 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2040 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2040
2041
2041 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2042 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2042 deltainfo.base, link, p1r, p2r, node)
2043 deltainfo.base, link, p1r, p2r, node)
2043 self.index.append(e)
2044 self.index.append(e)
2044 self.nodemap[node] = curr
2045 self.nodemap[node] = curr
2045
2046
2046 # Reset the pure node cache start lookup offset to account for new
2047 # Reset the pure node cache start lookup offset to account for new
2047 # revision.
2048 # revision.
2048 if self._nodepos is not None:
2049 if self._nodepos is not None:
2049 self._nodepos = curr
2050 self._nodepos = curr
2050
2051
2051 entry = self._io.packentry(e, self.node, self.version, curr)
2052 entry = self._io.packentry(e, self.node, self.version, curr)
2052 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2053 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2053 link, offset)
2054 link, offset)
2054
2055
2055 rawtext = btext[0]
2056 rawtext = btext[0]
2056
2057
2057 if alwayscache and rawtext is None:
2058 if alwayscache and rawtext is None:
2058 rawtext = deltacomputer.buildtext(revinfo, fh)
2059 rawtext = deltacomputer.buildtext(revinfo, fh)
2059
2060
2060 if type(rawtext) == bytes: # only accept immutable objects
2061 if type(rawtext) == bytes: # only accept immutable objects
2061 self._revisioncache = (node, curr, rawtext)
2062 self._revisioncache = (node, curr, rawtext)
2062 self._chainbasecache[curr] = deltainfo.chainbase
2063 self._chainbasecache[curr] = deltainfo.chainbase
2063 return node
2064 return node
2064
2065
2065 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2066 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2066 # Files opened in a+ mode have inconsistent behavior on various
2067 # Files opened in a+ mode have inconsistent behavior on various
2067 # platforms. Windows requires that a file positioning call be made
2068 # platforms. Windows requires that a file positioning call be made
2068 # when the file handle transitions between reads and writes. See
2069 # when the file handle transitions between reads and writes. See
2069 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2070 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2070 # platforms, Python or the platform itself can be buggy. Some versions
2071 # platforms, Python or the platform itself can be buggy. Some versions
2071 # of Solaris have been observed to not append at the end of the file
2072 # of Solaris have been observed to not append at the end of the file
2072 # if the file was seeked to before the end. See issue4943 for more.
2073 # if the file was seeked to before the end. See issue4943 for more.
2073 #
2074 #
2074 # We work around this issue by inserting a seek() before writing.
2075 # We work around this issue by inserting a seek() before writing.
2075 # Note: This is likely not necessary on Python 3. However, because
2076 # Note: This is likely not necessary on Python 3. However, because
2076 # the file handle is reused for reads and may be seeked there, we need
2077 # the file handle is reused for reads and may be seeked there, we need
2077 # to be careful before changing this.
2078 # to be careful before changing this.
2078 ifh.seek(0, os.SEEK_END)
2079 ifh.seek(0, os.SEEK_END)
2079 if dfh:
2080 if dfh:
2080 dfh.seek(0, os.SEEK_END)
2081 dfh.seek(0, os.SEEK_END)
2081
2082
2082 curr = len(self) - 1
2083 curr = len(self) - 1
2083 if not self._inline:
2084 if not self._inline:
2084 transaction.add(self.datafile, offset)
2085 transaction.add(self.datafile, offset)
2085 transaction.add(self.indexfile, curr * len(entry))
2086 transaction.add(self.indexfile, curr * len(entry))
2086 if data[0]:
2087 if data[0]:
2087 dfh.write(data[0])
2088 dfh.write(data[0])
2088 dfh.write(data[1])
2089 dfh.write(data[1])
2089 ifh.write(entry)
2090 ifh.write(entry)
2090 else:
2091 else:
2091 offset += curr * self._io.size
2092 offset += curr * self._io.size
2092 transaction.add(self.indexfile, offset, curr)
2093 transaction.add(self.indexfile, offset, curr)
2093 ifh.write(entry)
2094 ifh.write(entry)
2094 ifh.write(data[0])
2095 ifh.write(data[0])
2095 ifh.write(data[1])
2096 ifh.write(data[1])
2096 self._enforceinlinesize(transaction, ifh)
2097 self._enforceinlinesize(transaction, ifh)
2097
2098
2098 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2099 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2099 """
2100 """
2100 add a delta group
2101 add a delta group
2101
2102
2102 given a set of deltas, add them to the revision log. the
2103 given a set of deltas, add them to the revision log. the
2103 first delta is against its parent, which should be in our
2104 first delta is against its parent, which should be in our
2104 log, the rest are against the previous delta.
2105 log, the rest are against the previous delta.
2105
2106
2106 If ``addrevisioncb`` is defined, it will be called with arguments of
2107 If ``addrevisioncb`` is defined, it will be called with arguments of
2107 this revlog and the node that was added.
2108 this revlog and the node that was added.
2108 """
2109 """
2109
2110
2110 if self._writinghandles:
2111 if self._writinghandles:
2111 raise error.ProgrammingError('cannot nest addgroup() calls')
2112 raise error.ProgrammingError('cannot nest addgroup() calls')
2112
2113
2113 nodes = []
2114 nodes = []
2114
2115
2115 r = len(self)
2116 r = len(self)
2116 end = 0
2117 end = 0
2117 if r:
2118 if r:
2118 end = self.end(r - 1)
2119 end = self.end(r - 1)
2119 ifh = self._indexfp("a+")
2120 ifh = self._indexfp("a+")
2120 isize = r * self._io.size
2121 isize = r * self._io.size
2121 if self._inline:
2122 if self._inline:
2122 transaction.add(self.indexfile, end + isize, r)
2123 transaction.add(self.indexfile, end + isize, r)
2123 dfh = None
2124 dfh = None
2124 else:
2125 else:
2125 transaction.add(self.indexfile, isize, r)
2126 transaction.add(self.indexfile, isize, r)
2126 transaction.add(self.datafile, end)
2127 transaction.add(self.datafile, end)
2127 dfh = self._datafp("a+")
2128 dfh = self._datafp("a+")
2128 def flush():
2129 def flush():
2129 if dfh:
2130 if dfh:
2130 dfh.flush()
2131 dfh.flush()
2131 ifh.flush()
2132 ifh.flush()
2132
2133
2133 self._writinghandles = (ifh, dfh)
2134 self._writinghandles = (ifh, dfh)
2134
2135
2135 try:
2136 try:
2136 deltacomputer = deltautil.deltacomputer(self)
2137 deltacomputer = deltautil.deltacomputer(self)
2137 # loop through our set of deltas
2138 # loop through our set of deltas
2138 for data in deltas:
2139 for data in deltas:
2139 node, p1, p2, linknode, deltabase, delta, flags = data
2140 node, p1, p2, linknode, deltabase, delta, flags = data
2140 link = linkmapper(linknode)
2141 link = linkmapper(linknode)
2141 flags = flags or REVIDX_DEFAULT_FLAGS
2142 flags = flags or REVIDX_DEFAULT_FLAGS
2142
2143
2143 nodes.append(node)
2144 nodes.append(node)
2144
2145
2145 if node in self.nodemap:
2146 if node in self.nodemap:
2146 self._nodeduplicatecallback(transaction, node)
2147 self._nodeduplicatecallback(transaction, node)
2147 # this can happen if two branches make the same change
2148 # this can happen if two branches make the same change
2148 continue
2149 continue
2149
2150
2150 for p in (p1, p2):
2151 for p in (p1, p2):
2151 if p not in self.nodemap:
2152 if p not in self.nodemap:
2152 raise error.LookupError(p, self.indexfile,
2153 raise error.LookupError(p, self.indexfile,
2153 _('unknown parent'))
2154 _('unknown parent'))
2154
2155
2155 if deltabase not in self.nodemap:
2156 if deltabase not in self.nodemap:
2156 raise error.LookupError(deltabase, self.indexfile,
2157 raise error.LookupError(deltabase, self.indexfile,
2157 _('unknown delta base'))
2158 _('unknown delta base'))
2158
2159
2159 baserev = self.rev(deltabase)
2160 baserev = self.rev(deltabase)
2160
2161
2161 if baserev != nullrev and self.iscensored(baserev):
2162 if baserev != nullrev and self.iscensored(baserev):
2162 # if base is censored, delta must be full replacement in a
2163 # if base is censored, delta must be full replacement in a
2163 # single patch operation
2164 # single patch operation
2164 hlen = struct.calcsize(">lll")
2165 hlen = struct.calcsize(">lll")
2165 oldlen = self.rawsize(baserev)
2166 oldlen = self.rawsize(baserev)
2166 newlen = len(delta) - hlen
2167 newlen = len(delta) - hlen
2167 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2168 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2168 raise error.CensoredBaseError(self.indexfile,
2169 raise error.CensoredBaseError(self.indexfile,
2169 self.node(baserev))
2170 self.node(baserev))
2170
2171
2171 if not flags and self._peek_iscensored(baserev, delta, flush):
2172 if not flags and self._peek_iscensored(baserev, delta, flush):
2172 flags |= REVIDX_ISCENSORED
2173 flags |= REVIDX_ISCENSORED
2173
2174
2174 # We assume consumers of addrevisioncb will want to retrieve
2175 # We assume consumers of addrevisioncb will want to retrieve
2175 # the added revision, which will require a call to
2176 # the added revision, which will require a call to
2176 # revision(). revision() will fast path if there is a cache
2177 # revision(). revision() will fast path if there is a cache
2177 # hit. So, we tell _addrevision() to always cache in this case.
2178 # hit. So, we tell _addrevision() to always cache in this case.
2178 # We're only using addgroup() in the context of changegroup
2179 # We're only using addgroup() in the context of changegroup
2179 # generation so the revision data can always be handled as raw
2180 # generation so the revision data can always be handled as raw
2180 # by the flagprocessor.
2181 # by the flagprocessor.
2181 self._addrevision(node, None, transaction, link,
2182 self._addrevision(node, None, transaction, link,
2182 p1, p2, flags, (baserev, delta),
2183 p1, p2, flags, (baserev, delta),
2183 ifh, dfh,
2184 ifh, dfh,
2184 alwayscache=bool(addrevisioncb),
2185 alwayscache=bool(addrevisioncb),
2185 deltacomputer=deltacomputer)
2186 deltacomputer=deltacomputer)
2186
2187
2187 if addrevisioncb:
2188 if addrevisioncb:
2188 addrevisioncb(self, node)
2189 addrevisioncb(self, node)
2189
2190
2190 if not dfh and not self._inline:
2191 if not dfh and not self._inline:
2191 # addrevision switched from inline to conventional
2192 # addrevision switched from inline to conventional
2192 # reopen the index
2193 # reopen the index
2193 ifh.close()
2194 ifh.close()
2194 dfh = self._datafp("a+")
2195 dfh = self._datafp("a+")
2195 ifh = self._indexfp("a+")
2196 ifh = self._indexfp("a+")
2196 self._writinghandles = (ifh, dfh)
2197 self._writinghandles = (ifh, dfh)
2197 finally:
2198 finally:
2198 self._writinghandles = None
2199 self._writinghandles = None
2199
2200
2200 if dfh:
2201 if dfh:
2201 dfh.close()
2202 dfh.close()
2202 ifh.close()
2203 ifh.close()
2203
2204
2204 return nodes
2205 return nodes
2205
2206
2206 def iscensored(self, rev):
2207 def iscensored(self, rev):
2207 """Check if a file revision is censored."""
2208 """Check if a file revision is censored."""
2208 if not self._censorable:
2209 if not self._censorable:
2209 return False
2210 return False
2210
2211
2211 return self.flags(rev) & REVIDX_ISCENSORED
2212 return self.flags(rev) & REVIDX_ISCENSORED
2212
2213
2213 def _peek_iscensored(self, baserev, delta, flush):
2214 def _peek_iscensored(self, baserev, delta, flush):
2214 """Quickly check if a delta produces a censored revision."""
2215 """Quickly check if a delta produces a censored revision."""
2215 if not self._censorable:
2216 if not self._censorable:
2216 return False
2217 return False
2217
2218
2218 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2219 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2219
2220
2220 def getstrippoint(self, minlink):
2221 def getstrippoint(self, minlink):
2221 """find the minimum rev that must be stripped to strip the linkrev
2222 """find the minimum rev that must be stripped to strip the linkrev
2222
2223
2223 Returns a tuple containing the minimum rev and a set of all revs that
2224 Returns a tuple containing the minimum rev and a set of all revs that
2224 have linkrevs that will be broken by this strip.
2225 have linkrevs that will be broken by this strip.
2225 """
2226 """
2226 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2227 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2227 self.headrevs(),
2228 self.headrevs(),
2228 self.linkrev, self.parentrevs)
2229 self.linkrev, self.parentrevs)
2229
2230
2230 def strip(self, minlink, transaction):
2231 def strip(self, minlink, transaction):
2231 """truncate the revlog on the first revision with a linkrev >= minlink
2232 """truncate the revlog on the first revision with a linkrev >= minlink
2232
2233
2233 This function is called when we're stripping revision minlink and
2234 This function is called when we're stripping revision minlink and
2234 its descendants from the repository.
2235 its descendants from the repository.
2235
2236
2236 We have to remove all revisions with linkrev >= minlink, because
2237 We have to remove all revisions with linkrev >= minlink, because
2237 the equivalent changelog revisions will be renumbered after the
2238 the equivalent changelog revisions will be renumbered after the
2238 strip.
2239 strip.
2239
2240
2240 So we truncate the revlog on the first of these revisions, and
2241 So we truncate the revlog on the first of these revisions, and
2241 trust that the caller has saved the revisions that shouldn't be
2242 trust that the caller has saved the revisions that shouldn't be
2242 removed and that it'll re-add them after this truncation.
2243 removed and that it'll re-add them after this truncation.
2243 """
2244 """
2244 if len(self) == 0:
2245 if len(self) == 0:
2245 return
2246 return
2246
2247
2247 rev, _ = self.getstrippoint(minlink)
2248 rev, _ = self.getstrippoint(minlink)
2248 if rev == len(self):
2249 if rev == len(self):
2249 return
2250 return
2250
2251
2251 # first truncate the files on disk
2252 # first truncate the files on disk
2252 end = self.start(rev)
2253 end = self.start(rev)
2253 if not self._inline:
2254 if not self._inline:
2254 transaction.add(self.datafile, end)
2255 transaction.add(self.datafile, end)
2255 end = rev * self._io.size
2256 end = rev * self._io.size
2256 else:
2257 else:
2257 end += rev * self._io.size
2258 end += rev * self._io.size
2258
2259
2259 transaction.add(self.indexfile, end)
2260 transaction.add(self.indexfile, end)
2260
2261
2261 # then reset internal state in memory to forget those revisions
2262 # then reset internal state in memory to forget those revisions
2262 self._revisioncache = None
2263 self._revisioncache = None
2263 self._chaininfocache = {}
2264 self._chaininfocache = {}
2264 self._chunkclear()
2265 self._chunkclear()
2265 for x in pycompat.xrange(rev, len(self)):
2266 for x in pycompat.xrange(rev, len(self)):
2266 del self.nodemap[self.node(x)]
2267 del self.nodemap[self.node(x)]
2267
2268
2268 del self.index[rev:-1]
2269 del self.index[rev:-1]
2269 self._nodepos = None
2270 self._nodepos = None
2270
2271
2271 def checksize(self):
2272 def checksize(self):
2272 """Check size of index and data files
2273 """Check size of index and data files
2273
2274
2274 return a (dd, di) tuple.
2275 return a (dd, di) tuple.
2275 - dd: extra bytes for the "data" file
2276 - dd: extra bytes for the "data" file
2276 - di: extra bytes for the "index" file
2277 - di: extra bytes for the "index" file
2277
2278
2278 A healthy revlog will return (0, 0).
2279 A healthy revlog will return (0, 0).
2279 """
2280 """
2280 expected = 0
2281 expected = 0
2281 if len(self):
2282 if len(self):
2282 expected = max(0, self.end(len(self) - 1))
2283 expected = max(0, self.end(len(self) - 1))
2283
2284
2284 try:
2285 try:
2285 with self._datafp() as f:
2286 with self._datafp() as f:
2286 f.seek(0, io.SEEK_END)
2287 f.seek(0, io.SEEK_END)
2287 actual = f.tell()
2288 actual = f.tell()
2288 dd = actual - expected
2289 dd = actual - expected
2289 except IOError as inst:
2290 except IOError as inst:
2290 if inst.errno != errno.ENOENT:
2291 if inst.errno != errno.ENOENT:
2291 raise
2292 raise
2292 dd = 0
2293 dd = 0
2293
2294
2294 try:
2295 try:
2295 f = self.opener(self.indexfile)
2296 f = self.opener(self.indexfile)
2296 f.seek(0, io.SEEK_END)
2297 f.seek(0, io.SEEK_END)
2297 actual = f.tell()
2298 actual = f.tell()
2298 f.close()
2299 f.close()
2299 s = self._io.size
2300 s = self._io.size
2300 i = max(0, actual // s)
2301 i = max(0, actual // s)
2301 di = actual - (i * s)
2302 di = actual - (i * s)
2302 if self._inline:
2303 if self._inline:
2303 databytes = 0
2304 databytes = 0
2304 for r in self:
2305 for r in self:
2305 databytes += max(0, self.length(r))
2306 databytes += max(0, self.length(r))
2306 dd = 0
2307 dd = 0
2307 di = actual - len(self) * s - databytes
2308 di = actual - len(self) * s - databytes
2308 except IOError as inst:
2309 except IOError as inst:
2309 if inst.errno != errno.ENOENT:
2310 if inst.errno != errno.ENOENT:
2310 raise
2311 raise
2311 di = 0
2312 di = 0
2312
2313
2313 return (dd, di)
2314 return (dd, di)
2314
2315
2315 def files(self):
2316 def files(self):
2316 res = [self.indexfile]
2317 res = [self.indexfile]
2317 if not self._inline:
2318 if not self._inline:
2318 res.append(self.datafile)
2319 res.append(self.datafile)
2319 return res
2320 return res
2320
2321
2321 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2322 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2322 assumehaveparentrevisions=False,
2323 assumehaveparentrevisions=False,
2323 deltamode=repository.CG_DELTAMODE_STD):
2324 deltamode=repository.CG_DELTAMODE_STD):
2324 if nodesorder not in ('nodes', 'storage', 'linear', None):
2325 if nodesorder not in ('nodes', 'storage', 'linear', None):
2325 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2326 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2326 nodesorder)
2327 nodesorder)
2327
2328
2328 if nodesorder is None and not self._generaldelta:
2329 if nodesorder is None and not self._generaldelta:
2329 nodesorder = 'storage'
2330 nodesorder = 'storage'
2330
2331
2331 if (not self._storedeltachains and
2332 if (not self._storedeltachains and
2332 deltamode != repository.CG_DELTAMODE_PREV):
2333 deltamode != repository.CG_DELTAMODE_PREV):
2333 deltamode = repository.CG_DELTAMODE_FULL
2334 deltamode = repository.CG_DELTAMODE_FULL
2334
2335
2335 return storageutil.emitrevisions(
2336 return storageutil.emitrevisions(
2336 self, nodes, nodesorder, revlogrevisiondelta,
2337 self, nodes, nodesorder, revlogrevisiondelta,
2337 deltaparentfn=self.deltaparent,
2338 deltaparentfn=self.deltaparent,
2338 candeltafn=self.candelta,
2339 candeltafn=self.candelta,
2339 rawsizefn=self.rawsize,
2340 rawsizefn=self.rawsize,
2340 revdifffn=self.revdiff,
2341 revdifffn=self.revdiff,
2341 flagsfn=self.flags,
2342 flagsfn=self.flags,
2342 deltamode=deltamode,
2343 deltamode=deltamode,
2343 revisiondata=revisiondata,
2344 revisiondata=revisiondata,
2344 assumehaveparentrevisions=assumehaveparentrevisions)
2345 assumehaveparentrevisions=assumehaveparentrevisions)
2345
2346
2346 DELTAREUSEALWAYS = 'always'
2347 DELTAREUSEALWAYS = 'always'
2347 DELTAREUSESAMEREVS = 'samerevs'
2348 DELTAREUSESAMEREVS = 'samerevs'
2348 DELTAREUSENEVER = 'never'
2349 DELTAREUSENEVER = 'never'
2349
2350
2350 DELTAREUSEFULLADD = 'fulladd'
2351 DELTAREUSEFULLADD = 'fulladd'
2351
2352
2352 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2353 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2353
2354
2354 def clone(self, tr, destrevlog, addrevisioncb=None,
2355 def clone(self, tr, destrevlog, addrevisioncb=None,
2355 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2356 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2356 """Copy this revlog to another, possibly with format changes.
2357 """Copy this revlog to another, possibly with format changes.
2357
2358
2358 The destination revlog will contain the same revisions and nodes.
2359 The destination revlog will contain the same revisions and nodes.
2359 However, it may not be bit-for-bit identical due to e.g. delta encoding
2360 However, it may not be bit-for-bit identical due to e.g. delta encoding
2360 differences.
2361 differences.
2361
2362
2362 The ``deltareuse`` argument control how deltas from the existing revlog
2363 The ``deltareuse`` argument control how deltas from the existing revlog
2363 are preserved in the destination revlog. The argument can have the
2364 are preserved in the destination revlog. The argument can have the
2364 following values:
2365 following values:
2365
2366
2366 DELTAREUSEALWAYS
2367 DELTAREUSEALWAYS
2367 Deltas will always be reused (if possible), even if the destination
2368 Deltas will always be reused (if possible), even if the destination
2368 revlog would not select the same revisions for the delta. This is the
2369 revlog would not select the same revisions for the delta. This is the
2369 fastest mode of operation.
2370 fastest mode of operation.
2370 DELTAREUSESAMEREVS
2371 DELTAREUSESAMEREVS
2371 Deltas will be reused if the destination revlog would pick the same
2372 Deltas will be reused if the destination revlog would pick the same
2372 revisions for the delta. This mode strikes a balance between speed
2373 revisions for the delta. This mode strikes a balance between speed
2373 and optimization.
2374 and optimization.
2374 DELTAREUSENEVER
2375 DELTAREUSENEVER
2375 Deltas will never be reused. This is the slowest mode of execution.
2376 Deltas will never be reused. This is the slowest mode of execution.
2376 This mode can be used to recompute deltas (e.g. if the diff/delta
2377 This mode can be used to recompute deltas (e.g. if the diff/delta
2377 algorithm changes).
2378 algorithm changes).
2378 DELTAREUSEFULLADD
2379 DELTAREUSEFULLADD
2379 Revision will be re-added as if their were new content. This is
2380 Revision will be re-added as if their were new content. This is
2380 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2381 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2381 eg: large file detection and handling.
2382 eg: large file detection and handling.
2382
2383
2383 Delta computation can be slow, so the choice of delta reuse policy can
2384 Delta computation can be slow, so the choice of delta reuse policy can
2384 significantly affect run time.
2385 significantly affect run time.
2385
2386
2386 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2387 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2387 two extremes. Deltas will be reused if they are appropriate. But if the
2388 two extremes. Deltas will be reused if they are appropriate. But if the
2388 delta could choose a better revision, it will do so. This means if you
2389 delta could choose a better revision, it will do so. This means if you
2389 are converting a non-generaldelta revlog to a generaldelta revlog,
2390 are converting a non-generaldelta revlog to a generaldelta revlog,
2390 deltas will be recomputed if the delta's parent isn't a parent of the
2391 deltas will be recomputed if the delta's parent isn't a parent of the
2391 revision.
2392 revision.
2392
2393
2393 In addition to the delta policy, the ``forcedeltabothparents``
2394 In addition to the delta policy, the ``forcedeltabothparents``
2394 argument controls whether to force compute deltas against both parents
2395 argument controls whether to force compute deltas against both parents
2395 for merges. By default, the current default is used.
2396 for merges. By default, the current default is used.
2396 """
2397 """
2397 if deltareuse not in self.DELTAREUSEALL:
2398 if deltareuse not in self.DELTAREUSEALL:
2398 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2399 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2399
2400
2400 if len(destrevlog):
2401 if len(destrevlog):
2401 raise ValueError(_('destination revlog is not empty'))
2402 raise ValueError(_('destination revlog is not empty'))
2402
2403
2403 if getattr(self, 'filteredrevs', None):
2404 if getattr(self, 'filteredrevs', None):
2404 raise ValueError(_('source revlog has filtered revisions'))
2405 raise ValueError(_('source revlog has filtered revisions'))
2405 if getattr(destrevlog, 'filteredrevs', None):
2406 if getattr(destrevlog, 'filteredrevs', None):
2406 raise ValueError(_('destination revlog has filtered revisions'))
2407 raise ValueError(_('destination revlog has filtered revisions'))
2407
2408
2408 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2409 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2409 # if possible.
2410 # if possible.
2410 oldlazydelta = destrevlog._lazydelta
2411 oldlazydelta = destrevlog._lazydelta
2411 oldlazydeltabase = destrevlog._lazydeltabase
2412 oldlazydeltabase = destrevlog._lazydeltabase
2412 oldamd = destrevlog._deltabothparents
2413 oldamd = destrevlog._deltabothparents
2413
2414
2414 try:
2415 try:
2415 if deltareuse == self.DELTAREUSEALWAYS:
2416 if deltareuse == self.DELTAREUSEALWAYS:
2416 destrevlog._lazydeltabase = True
2417 destrevlog._lazydeltabase = True
2417 destrevlog._lazydelta = True
2418 destrevlog._lazydelta = True
2418 elif deltareuse == self.DELTAREUSESAMEREVS:
2419 elif deltareuse == self.DELTAREUSESAMEREVS:
2419 destrevlog._lazydeltabase = False
2420 destrevlog._lazydeltabase = False
2420 destrevlog._lazydelta = True
2421 destrevlog._lazydelta = True
2421 elif deltareuse == self.DELTAREUSENEVER:
2422 elif deltareuse == self.DELTAREUSENEVER:
2422 destrevlog._lazydeltabase = False
2423 destrevlog._lazydeltabase = False
2423 destrevlog._lazydelta = False
2424 destrevlog._lazydelta = False
2424
2425
2425 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2426 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2426
2427
2427 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2428 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2428 forcedeltabothparents)
2429 forcedeltabothparents)
2429
2430
2430 finally:
2431 finally:
2431 destrevlog._lazydelta = oldlazydelta
2432 destrevlog._lazydelta = oldlazydelta
2432 destrevlog._lazydeltabase = oldlazydeltabase
2433 destrevlog._lazydeltabase = oldlazydeltabase
2433 destrevlog._deltabothparents = oldamd
2434 destrevlog._deltabothparents = oldamd
2434
2435
2435 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2436 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2436 forcedeltabothparents):
2437 forcedeltabothparents):
2437 """perform the core duty of `revlog.clone` after parameter processing"""
2438 """perform the core duty of `revlog.clone` after parameter processing"""
2438 deltacomputer = deltautil.deltacomputer(destrevlog)
2439 deltacomputer = deltautil.deltacomputer(destrevlog)
2439 index = self.index
2440 index = self.index
2440 for rev in self:
2441 for rev in self:
2441 entry = index[rev]
2442 entry = index[rev]
2442
2443
2443 # Some classes override linkrev to take filtered revs into
2444 # Some classes override linkrev to take filtered revs into
2444 # account. Use raw entry from index.
2445 # account. Use raw entry from index.
2445 flags = entry[0] & 0xffff
2446 flags = entry[0] & 0xffff
2446 linkrev = entry[4]
2447 linkrev = entry[4]
2447 p1 = index[entry[5]][7]
2448 p1 = index[entry[5]][7]
2448 p2 = index[entry[6]][7]
2449 p2 = index[entry[6]][7]
2449 node = entry[7]
2450 node = entry[7]
2450
2451
2451 # (Possibly) reuse the delta from the revlog if allowed and
2452 # (Possibly) reuse the delta from the revlog if allowed and
2452 # the revlog chunk is a delta.
2453 # the revlog chunk is a delta.
2453 cachedelta = None
2454 cachedelta = None
2454 rawtext = None
2455 rawtext = None
2455 if deltareuse == self.DELTAREUSEFULLADD:
2456 if deltareuse == self.DELTAREUSEFULLADD:
2456 text = self.revision(rev)
2457 text = self.revision(rev)
2457 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2458 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2458 cachedelta=cachedelta,
2459 cachedelta=cachedelta,
2459 node=node, flags=flags,
2460 node=node, flags=flags,
2460 deltacomputer=deltacomputer)
2461 deltacomputer=deltacomputer)
2461 else:
2462 else:
2462 if destrevlog._lazydelta:
2463 if destrevlog._lazydelta:
2463 dp = self.deltaparent(rev)
2464 dp = self.deltaparent(rev)
2464 if dp != nullrev:
2465 if dp != nullrev:
2465 cachedelta = (dp, bytes(self._chunk(rev)))
2466 cachedelta = (dp, bytes(self._chunk(rev)))
2466
2467
2467 if not cachedelta:
2468 if not cachedelta:
2468 rawtext = self.rawdata(rev)
2469 rawtext = self.rawdata(rev)
2469
2470
2470 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2471 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2471 checkambig=False)
2472 checkambig=False)
2472 dfh = None
2473 dfh = None
2473 if not destrevlog._inline:
2474 if not destrevlog._inline:
2474 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2475 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2475 try:
2476 try:
2476 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2477 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2477 p2, flags, cachedelta, ifh, dfh,
2478 p2, flags, cachedelta, ifh, dfh,
2478 deltacomputer=deltacomputer)
2479 deltacomputer=deltacomputer)
2479 finally:
2480 finally:
2480 if dfh:
2481 if dfh:
2481 dfh.close()
2482 dfh.close()
2482 ifh.close()
2483 ifh.close()
2483
2484
2484 if addrevisioncb:
2485 if addrevisioncb:
2485 addrevisioncb(self, rev, node)
2486 addrevisioncb(self, rev, node)
2486
2487
2487 def censorrevision(self, tr, censornode, tombstone=b''):
2488 def censorrevision(self, tr, censornode, tombstone=b''):
2488 if (self.version & 0xFFFF) == REVLOGV0:
2489 if (self.version & 0xFFFF) == REVLOGV0:
2489 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2490 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2490 self.version)
2491 self.version)
2491
2492
2492 censorrev = self.rev(censornode)
2493 censorrev = self.rev(censornode)
2493 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2494 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2494
2495
2495 if len(tombstone) > self.rawsize(censorrev):
2496 if len(tombstone) > self.rawsize(censorrev):
2496 raise error.Abort(_('censor tombstone must be no longer than '
2497 raise error.Abort(_('censor tombstone must be no longer than '
2497 'censored data'))
2498 'censored data'))
2498
2499
2499 # Rewriting the revlog in place is hard. Our strategy for censoring is
2500 # Rewriting the revlog in place is hard. Our strategy for censoring is
2500 # to create a new revlog, copy all revisions to it, then replace the
2501 # to create a new revlog, copy all revisions to it, then replace the
2501 # revlogs on transaction close.
2502 # revlogs on transaction close.
2502
2503
2503 newindexfile = self.indexfile + b'.tmpcensored'
2504 newindexfile = self.indexfile + b'.tmpcensored'
2504 newdatafile = self.datafile + b'.tmpcensored'
2505 newdatafile = self.datafile + b'.tmpcensored'
2505
2506
2506 # This is a bit dangerous. We could easily have a mismatch of state.
2507 # This is a bit dangerous. We could easily have a mismatch of state.
2507 newrl = revlog(self.opener, newindexfile, newdatafile,
2508 newrl = revlog(self.opener, newindexfile, newdatafile,
2508 censorable=True)
2509 censorable=True)
2509 newrl.version = self.version
2510 newrl.version = self.version
2510 newrl._generaldelta = self._generaldelta
2511 newrl._generaldelta = self._generaldelta
2511 newrl._io = self._io
2512 newrl._io = self._io
2512
2513
2513 for rev in self.revs():
2514 for rev in self.revs():
2514 node = self.node(rev)
2515 node = self.node(rev)
2515 p1, p2 = self.parents(node)
2516 p1, p2 = self.parents(node)
2516
2517
2517 if rev == censorrev:
2518 if rev == censorrev:
2518 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2519 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2519 p1, p2, censornode, REVIDX_ISCENSORED)
2520 p1, p2, censornode, REVIDX_ISCENSORED)
2520
2521
2521 if newrl.deltaparent(rev) != nullrev:
2522 if newrl.deltaparent(rev) != nullrev:
2522 raise error.Abort(_('censored revision stored as delta; '
2523 raise error.Abort(_('censored revision stored as delta; '
2523 'cannot censor'),
2524 'cannot censor'),
2524 hint=_('censoring of revlogs is not '
2525 hint=_('censoring of revlogs is not '
2525 'fully implemented; please report '
2526 'fully implemented; please report '
2526 'this bug'))
2527 'this bug'))
2527 continue
2528 continue
2528
2529
2529 if self.iscensored(rev):
2530 if self.iscensored(rev):
2530 if self.deltaparent(rev) != nullrev:
2531 if self.deltaparent(rev) != nullrev:
2531 raise error.Abort(_('cannot censor due to censored '
2532 raise error.Abort(_('cannot censor due to censored '
2532 'revision having delta stored'))
2533 'revision having delta stored'))
2533 rawtext = self._chunk(rev)
2534 rawtext = self._chunk(rev)
2534 else:
2535 else:
2535 rawtext = self.rawdata(rev)
2536 rawtext = self.rawdata(rev)
2536
2537
2537 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2538 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2538 self.flags(rev))
2539 self.flags(rev))
2539
2540
2540 tr.addbackup(self.indexfile, location='store')
2541 tr.addbackup(self.indexfile, location='store')
2541 if not self._inline:
2542 if not self._inline:
2542 tr.addbackup(self.datafile, location='store')
2543 tr.addbackup(self.datafile, location='store')
2543
2544
2544 self.opener.rename(newrl.indexfile, self.indexfile)
2545 self.opener.rename(newrl.indexfile, self.indexfile)
2545 if not self._inline:
2546 if not self._inline:
2546 self.opener.rename(newrl.datafile, self.datafile)
2547 self.opener.rename(newrl.datafile, self.datafile)
2547
2548
2548 self.clearcaches()
2549 self.clearcaches()
2549 self._loadindex()
2550 self._loadindex()
2550
2551
2551 def verifyintegrity(self, state):
2552 def verifyintegrity(self, state):
2552 """Verifies the integrity of the revlog.
2553 """Verifies the integrity of the revlog.
2553
2554
2554 Yields ``revlogproblem`` instances describing problems that are
2555 Yields ``revlogproblem`` instances describing problems that are
2555 found.
2556 found.
2556 """
2557 """
2557 dd, di = self.checksize()
2558 dd, di = self.checksize()
2558 if dd:
2559 if dd:
2559 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2560 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2560 if di:
2561 if di:
2561 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2562 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2562
2563
2563 version = self.version & 0xFFFF
2564 version = self.version & 0xFFFF
2564
2565
2565 # The verifier tells us what version revlog we should be.
2566 # The verifier tells us what version revlog we should be.
2566 if version != state['expectedversion']:
2567 if version != state['expectedversion']:
2567 yield revlogproblem(
2568 yield revlogproblem(
2568 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2569 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2569 (self.indexfile, version, state['expectedversion']))
2570 (self.indexfile, version, state['expectedversion']))
2570
2571
2571 state['skipread'] = set()
2572 state['skipread'] = set()
2572
2573
2573 for rev in self:
2574 for rev in self:
2574 node = self.node(rev)
2575 node = self.node(rev)
2575
2576
2576 # Verify contents. 4 cases to care about:
2577 # Verify contents. 4 cases to care about:
2577 #
2578 #
2578 # common: the most common case
2579 # common: the most common case
2579 # rename: with a rename
2580 # rename: with a rename
2580 # meta: file content starts with b'\1\n', the metadata
2581 # meta: file content starts with b'\1\n', the metadata
2581 # header defined in filelog.py, but without a rename
2582 # header defined in filelog.py, but without a rename
2582 # ext: content stored externally
2583 # ext: content stored externally
2583 #
2584 #
2584 # More formally, their differences are shown below:
2585 # More formally, their differences are shown below:
2585 #
2586 #
2586 # | common | rename | meta | ext
2587 # | common | rename | meta | ext
2587 # -------------------------------------------------------
2588 # -------------------------------------------------------
2588 # flags() | 0 | 0 | 0 | not 0
2589 # flags() | 0 | 0 | 0 | not 0
2589 # renamed() | False | True | False | ?
2590 # renamed() | False | True | False | ?
2590 # rawtext[0:2]=='\1\n'| False | True | True | ?
2591 # rawtext[0:2]=='\1\n'| False | True | True | ?
2591 #
2592 #
2592 # "rawtext" means the raw text stored in revlog data, which
2593 # "rawtext" means the raw text stored in revlog data, which
2593 # could be retrieved by "rawdata(rev)". "text"
2594 # could be retrieved by "rawdata(rev)". "text"
2594 # mentioned below is "revision(rev)".
2595 # mentioned below is "revision(rev)".
2595 #
2596 #
2596 # There are 3 different lengths stored physically:
2597 # There are 3 different lengths stored physically:
2597 # 1. L1: rawsize, stored in revlog index
2598 # 1. L1: rawsize, stored in revlog index
2598 # 2. L2: len(rawtext), stored in revlog data
2599 # 2. L2: len(rawtext), stored in revlog data
2599 # 3. L3: len(text), stored in revlog data if flags==0, or
2600 # 3. L3: len(text), stored in revlog data if flags==0, or
2600 # possibly somewhere else if flags!=0
2601 # possibly somewhere else if flags!=0
2601 #
2602 #
2602 # L1 should be equal to L2. L3 could be different from them.
2603 # L1 should be equal to L2. L3 could be different from them.
2603 # "text" may or may not affect commit hash depending on flag
2604 # "text" may or may not affect commit hash depending on flag
2604 # processors (see flagutil.addflagprocessor).
2605 # processors (see flagutil.addflagprocessor).
2605 #
2606 #
2606 # | common | rename | meta | ext
2607 # | common | rename | meta | ext
2607 # -------------------------------------------------
2608 # -------------------------------------------------
2608 # rawsize() | L1 | L1 | L1 | L1
2609 # rawsize() | L1 | L1 | L1 | L1
2609 # size() | L1 | L2-LM | L1(*) | L1 (?)
2610 # size() | L1 | L2-LM | L1(*) | L1 (?)
2610 # len(rawtext) | L2 | L2 | L2 | L2
2611 # len(rawtext) | L2 | L2 | L2 | L2
2611 # len(text) | L2 | L2 | L2 | L3
2612 # len(text) | L2 | L2 | L2 | L3
2612 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2613 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2613 #
2614 #
2614 # LM: length of metadata, depending on rawtext
2615 # LM: length of metadata, depending on rawtext
2615 # (*): not ideal, see comment in filelog.size
2616 # (*): not ideal, see comment in filelog.size
2616 # (?): could be "- len(meta)" if the resolved content has
2617 # (?): could be "- len(meta)" if the resolved content has
2617 # rename metadata
2618 # rename metadata
2618 #
2619 #
2619 # Checks needed to be done:
2620 # Checks needed to be done:
2620 # 1. length check: L1 == L2, in all cases.
2621 # 1. length check: L1 == L2, in all cases.
2621 # 2. hash check: depending on flag processor, we may need to
2622 # 2. hash check: depending on flag processor, we may need to
2622 # use either "text" (external), or "rawtext" (in revlog).
2623 # use either "text" (external), or "rawtext" (in revlog).
2623
2624
2624 try:
2625 try:
2625 skipflags = state.get('skipflags', 0)
2626 skipflags = state.get('skipflags', 0)
2626 if skipflags:
2627 if skipflags:
2627 skipflags &= self.flags(rev)
2628 skipflags &= self.flags(rev)
2628
2629
2629 if skipflags:
2630 if skipflags:
2630 state['skipread'].add(node)
2631 state['skipread'].add(node)
2631 else:
2632 else:
2632 # Side-effect: read content and verify hash.
2633 # Side-effect: read content and verify hash.
2633 self.revision(node)
2634 self.revision(node)
2634
2635
2635 l1 = self.rawsize(rev)
2636 l1 = self.rawsize(rev)
2636 l2 = len(self.rawdata(node))
2637 l2 = len(self.rawdata(node))
2637
2638
2638 if l1 != l2:
2639 if l1 != l2:
2639 yield revlogproblem(
2640 yield revlogproblem(
2640 error=_('unpacked size is %d, %d expected') % (l2, l1),
2641 error=_('unpacked size is %d, %d expected') % (l2, l1),
2641 node=node)
2642 node=node)
2642
2643
2643 except error.CensoredNodeError:
2644 except error.CensoredNodeError:
2644 if state['erroroncensored']:
2645 if state['erroroncensored']:
2645 yield revlogproblem(error=_('censored file data'),
2646 yield revlogproblem(error=_('censored file data'),
2646 node=node)
2647 node=node)
2647 state['skipread'].add(node)
2648 state['skipread'].add(node)
2648 except Exception as e:
2649 except Exception as e:
2649 yield revlogproblem(
2650 yield revlogproblem(
2650 error=_('unpacking %s: %s') % (short(node),
2651 error=_('unpacking %s: %s') % (short(node),
2651 stringutil.forcebytestr(e)),
2652 stringutil.forcebytestr(e)),
2652 node=node)
2653 node=node)
2653 state['skipread'].add(node)
2654 state['skipread'].add(node)
2654
2655
2655 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2656 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2656 revisionscount=False, trackedsize=False,
2657 revisionscount=False, trackedsize=False,
2657 storedsize=False):
2658 storedsize=False):
2658 d = {}
2659 d = {}
2659
2660
2660 if exclusivefiles:
2661 if exclusivefiles:
2661 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2662 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2662 if not self._inline:
2663 if not self._inline:
2663 d['exclusivefiles'].append((self.opener, self.datafile))
2664 d['exclusivefiles'].append((self.opener, self.datafile))
2664
2665
2665 if sharedfiles:
2666 if sharedfiles:
2666 d['sharedfiles'] = []
2667 d['sharedfiles'] = []
2667
2668
2668 if revisionscount:
2669 if revisionscount:
2669 d['revisionscount'] = len(self)
2670 d['revisionscount'] = len(self)
2670
2671
2671 if trackedsize:
2672 if trackedsize:
2672 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2673 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2673
2674
2674 if storedsize:
2675 if storedsize:
2675 d['storedsize'] = sum(self.opener.stat(path).st_size
2676 d['storedsize'] = sum(self.opener.stat(path).st_size
2676 for path in self.files())
2677 for path in self.files())
2677
2678
2678 return d
2679 return d
General Comments 0
You need to be logged in to leave comments. Login now